diff options
515 files changed, 20291 insertions, 3064 deletions
diff --git a/Documentation/devicetree/bindings/net/broadcom-bluetooth.txt b/Documentation/devicetree/bindings/net/broadcom-bluetooth.txt index f16b99571af1..b5eadee4a9a7 100644 --- a/Documentation/devicetree/bindings/net/broadcom-bluetooth.txt +++ b/Documentation/devicetree/bindings/net/broadcom-bluetooth.txt @@ -30,6 +30,12 @@ Optional properties: - "lpo": external low power 32.768 kHz clock - vbat-supply: phandle to regulator supply for VBAT - vddio-supply: phandle to regulator supply for VDDIO + - brcm,bt-pcm-int-params: configure PCM parameters via a 5-byte array + - sco-routing: 0 = PCM, 1 = Transport, 2 = Codec, 3 = I2S + - pcm-interface-rate: 128KBps, 256KBps, 512KBps, 1024KBps, 2048KBps + - pcm-frame-type: short, long + - pcm-sync-mode: slave, master + - pcm-clock-mode: slave, master Example: @@ -41,5 +47,6 @@ Example: bluetooth { compatible = "brcm,bcm43438-bt"; max-speed = <921600>; + brcm,bt-pcm-int-params = [01 02 00 01 01]; }; }; diff --git a/Documentation/devicetree/bindings/net/dsa/ar9331.txt b/Documentation/devicetree/bindings/net/dsa/ar9331.txt new file mode 100644 index 000000000000..320607cbbb17 --- /dev/null +++ b/Documentation/devicetree/bindings/net/dsa/ar9331.txt @@ -0,0 +1,148 @@ +Atheros AR9331 built-in switch +============================= + +It is a switch built-in to Atheros AR9331 WiSoC and addressable over internal +MDIO bus. All PHYs are built-in as well. + +Required properties: + + - compatible: should be: "qca,ar9331-switch" + - reg: Address on the MII bus for the switch. + - resets : Must contain an entry for each entry in reset-names. + - reset-names : Must include the following entries: "switch" + - interrupt-parent: Phandle to the parent interrupt controller + - interrupts: IRQ line for the switch + - interrupt-controller: Indicates the switch is itself an interrupt + controller. This is used for the PHY interrupts. + - #interrupt-cells: must be 1 + - mdio: Container of PHY and devices on the switches MDIO bus. + +See Documentation/devicetree/bindings/net/dsa/dsa.txt for a list of additional +required and optional properties. +Examples: + +eth0: ethernet@19000000 { + compatible = "qca,ar9330-eth"; + reg = <0x19000000 0x200>; + interrupts = <4>; + + resets = <&rst 9>, <&rst 22>; + reset-names = "mac", "mdio"; + clocks = <&pll ATH79_CLK_AHB>, <&pll ATH79_CLK_AHB>; + clock-names = "eth", "mdio"; + + phy-mode = "mii"; + phy-handle = <&phy_port4>; +}; + +eth1: ethernet@1a000000 { + compatible = "qca,ar9330-eth"; + reg = <0x1a000000 0x200>; + interrupts = <5>; + resets = <&rst 13>, <&rst 23>; + reset-names = "mac", "mdio"; + clocks = <&pll ATH79_CLK_AHB>, <&pll ATH79_CLK_AHB>; + clock-names = "eth", "mdio"; + + phy-mode = "gmii"; + + fixed-link { + speed = <1000>; + full-duplex; + }; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + switch10: switch@10 { + #address-cells = <1>; + #size-cells = <0>; + + compatible = "qca,ar9331-switch"; + reg = <0x10>; + resets = <&rst 8>; + reset-names = "switch"; + + interrupt-parent = <&miscintc>; + interrupts = <12>; + + interrupt-controller; + #interrupt-cells = <1>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + switch_port0: port@0 { + reg = <0x0>; + label = "cpu"; + ethernet = <ð1>; + + phy-mode = "gmii"; + + fixed-link { + speed = <1000>; + full-duplex; + }; + }; + + switch_port1: port@1 { + reg = <0x1>; + phy-handle = <&phy_port0>; + phy-mode = "internal"; + }; + + switch_port2: port@2 { + reg = <0x2>; + phy-handle = <&phy_port1>; + phy-mode = "internal"; + }; + + switch_port3: port@3 { + reg = <0x3>; + phy-handle = <&phy_port2>; + phy-mode = "internal"; + }; + + switch_port4: port@4 { + reg = <0x4>; + phy-handle = <&phy_port3>; + phy-mode = "internal"; + }; + }; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + interrupt-parent = <&switch10>; + + phy_port0: phy@0 { + reg = <0x0>; + interrupts = <0>; + }; + + phy_port1: phy@1 { + reg = <0x1>; + interrupts = <0>; + }; + + phy_port2: phy@2 { + reg = <0x2>; + interrupts = <0>; + }; + + phy_port3: phy@3 { + reg = <0x3>; + interrupts = <0>; + }; + + phy_port4: phy@4 { + reg = <0x4>; + interrupts = <0>; + }; + }; + }; + }; +}; diff --git a/Documentation/devicetree/bindings/net/mediatek-dwmac.txt b/Documentation/devicetree/bindings/net/mediatek-dwmac.txt index 8a08621a5b54..afbcaebf062e 100644 --- a/Documentation/devicetree/bindings/net/mediatek-dwmac.txt +++ b/Documentation/devicetree/bindings/net/mediatek-dwmac.txt @@ -14,7 +14,7 @@ Required properties: Should be "macirq" for the main MAC IRQ - clocks: Must contain a phandle for each entry in clock-names. - clock-names: The name of the clock listed in the clocks property. These are - "axi", "apb", "mac_main", "ptp_ref" for MT2712 SoC + "axi", "apb", "mac_main", "ptp_ref", "rmii_internal" for MT2712 SoC. - mac-address: See ethernet.txt in the same directory - phy-mode: See ethernet.txt in the same directory - mediatek,pericfg: A phandle to the syscon node that control ethernet @@ -23,8 +23,10 @@ Required properties: Optional properties: - mediatek,tx-delay-ps: TX clock delay macro value. Default is 0. It should be defined for RGMII/MII interface. + It should be defined for RMII interface when the reference clock is from MT2712 SoC. - mediatek,rx-delay-ps: RX clock delay macro value. Default is 0. - It should be defined for RGMII/MII/RMII interface. + It should be defined for RGMII/MII interface. + It should be defined for RMII interface. Both delay properties need to be a multiple of 170 for RGMII interface, or will round down. Range 0~31*170. Both delay properties need to be a multiple of 550 for MII/RMII interface, @@ -34,13 +36,20 @@ or will round down. Range 0~31*550. reference clock, which is from external PHYs, is connected to RXC pin on MT2712 SoC. Otherwise, is connected to TXC pin. +- mediatek,rmii-clk-from-mac: boolean property, if present indicates that + MT2712 SoC provides the RMII reference clock, which outputs to TXC pin only. - mediatek,txc-inverse: boolean property, if present indicates that 1. tx clock will be inversed in MII/RGMII case, 2. tx clock inside MAC will be inversed relative to reference clock which is from external PHYs in RMII case, and it rarely happen. + 3. the reference clock, which outputs to TXC pin will be inversed in RMII case + when the reference clock is from MT2712 SoC. - mediatek,rxc-inverse: boolean property, if present indicates that 1. rx clock will be inversed in MII/RGMII case. - 2. reference clock will be inversed when arrived at MAC in RMII case. + 2. reference clock will be inversed when arrived at MAC in RMII case, when + the reference clock is from external PHYs. + 3. the inside clock, which be sent to MAC, will be inversed in RMII case when + the reference clock is from MT2712 SoC. - assigned-clocks: mac_main and ptp_ref clocks - assigned-clock-parents: parent clocks of the assigned clocks @@ -50,29 +59,33 @@ Example: reg = <0 0x1101c000 0 0x1300>; interrupts = <GIC_SPI 237 IRQ_TYPE_LEVEL_LOW>; interrupt-names = "macirq"; - phy-mode ="rgmii"; + phy-mode ="rgmii-rxid"; mac-address = [00 55 7b b5 7d f7]; clock-names = "axi", "apb", "mac_main", "ptp_ref", - "ptp_top"; + "rmii_internal"; clocks = <&pericfg CLK_PERI_GMAC>, <&pericfg CLK_PERI_GMAC_PCLK>, <&topckgen CLK_TOP_ETHER_125M_SEL>, - <&topckgen CLK_TOP_ETHER_50M_SEL>; + <&topckgen CLK_TOP_ETHER_50M_SEL>, + <&topckgen CLK_TOP_ETHER_50M_RMII_SEL>; assigned-clocks = <&topckgen CLK_TOP_ETHER_125M_SEL>, - <&topckgen CLK_TOP_ETHER_50M_SEL>; + <&topckgen CLK_TOP_ETHER_50M_SEL>, + <&topckgen CLK_TOP_ETHER_50M_RMII_SEL>; assigned-clock-parents = <&topckgen CLK_TOP_ETHERPLL_125M>, - <&topckgen CLK_TOP_APLL1_D3>; + <&topckgen CLK_TOP_APLL1_D3>, + <&topckgen CLK_TOP_ETHERPLL_50M>; + power-domains = <&scpsys MT2712_POWER_DOMAIN_AUDIO>; mediatek,pericfg = <&pericfg>; mediatek,tx-delay-ps = <1530>; mediatek,rx-delay-ps = <1530>; mediatek,rmii-rxc; mediatek,txc-inverse; mediatek,rxc-inverse; - snps,txpbl = <32>; - snps,rxpbl = <32>; + snps,txpbl = <1>; + snps,rxpbl = <1>; snps,reset-gpio = <&pio 87 GPIO_ACTIVE_LOW>; snps,reset-active-low; }; diff --git a/Documentation/devicetree/bindings/net/ti,dp83867.txt b/Documentation/devicetree/bindings/net/ti,dp83867.txt index 388ff48f53ae..44e2a4fab29e 100644 --- a/Documentation/devicetree/bindings/net/ti,dp83867.txt +++ b/Documentation/devicetree/bindings/net/ti,dp83867.txt @@ -8,8 +8,6 @@ Required properties: - ti,tx-internal-delay - RGMII Transmit Clock Delay - see dt-bindings/net/ti-dp83867.h for applicable values. Required only if interface type is PHY_INTERFACE_MODE_RGMII_ID or PHY_INTERFACE_MODE_RGMII_TXID - - ti,fifo-depth - Transmitt FIFO depth- see dt-bindings/net/ti-dp83867.h - for applicable values Note: If the interface type is PHY_INTERFACE_MODE_RGMII the TX/RX clock delays will be left at their default values, as set by the PHY's pin strapping. @@ -42,6 +40,14 @@ Optional property: Some MACs work with differential SGMII clock. See data manual for details. + - ti,fifo-depth - Transmitt FIFO depth- see dt-bindings/net/ti-dp83867.h + for applicable values (deprecated) + + -tx-fifo-depth - As defined in the ethernet-controller.yaml. Values for + the depth can be found in dt-bindings/net/ti-dp83867.h + -rx-fifo-depth - As defined in the ethernet-controller.yaml. Values for + the depth can be found in dt-bindings/net/ti-dp83867.h + Note: ti,min-output-impedance and ti,max-output-impedance are mutually exclusive. When both properties are present ti,max-output-impedance takes precedence. @@ -55,7 +61,7 @@ Example: reg = <0>; ti,rx-internal-delay = <DP83867_RGMIIDCTL_2_25_NS>; ti,tx-internal-delay = <DP83867_RGMIIDCTL_2_75_NS>; - ti,fifo-depth = <DP83867_PHYCR_FIFO_DEPTH_4_B_NIB>; + tx-fifo-depth = <DP83867_PHYCR_FIFO_DEPTH_4_B_NIB>; }; Datasheet can be found: diff --git a/Documentation/networking/device_drivers/netronome/nfp.rst b/Documentation/networking/device_drivers/netronome/nfp.rst index 6c08ac8b5147..ada611fb427c 100644 --- a/Documentation/networking/device_drivers/netronome/nfp.rst +++ b/Documentation/networking/device_drivers/netronome/nfp.rst @@ -131,3 +131,119 @@ abi_drv_reset abi_drv_load_ifc Defines a list of PF devices allowed to load FW on the device. This variable is not currently user configurable. + +Statistics +========== + +Following device statistics are available through the ``ethtool -S`` interface: + +.. flat-table:: NFP device statistics + :header-rows: 1 + :widths: 3 1 11 + + * - Name + - ID + - Meaning + + * - dev_rx_discards + - 1 + - Packet can be discarded on the RX path for one of the following reasons: + + * The NIC is not in promisc mode, and the destination MAC address + doesn't match the interfaces' MAC address. + * The received packet is larger than the max buffer size on the host. + I.e. it exceeds the Layer 3 MRU. + * There is no freelist descriptor available on the host for the packet. + It is likely that the NIC couldn't cache one in time. + * A BPF program discarded the packet. + * The datapath drop action was executed. + * The MAC discarded the packet due to lack of ingress buffer space + on the NIC. + + * - dev_rx_errors + - 2 + - A packet can be counted (and dropped) as RX error for the following + reasons: + + * A problem with the VEB lookup (only when SR-IOV is used). + * A physical layer problem that causes Ethernet errors, like FCS or + alignment errors. The cause is usually faulty cables or SFPs. + + * - dev_rx_bytes + - 3 + - Total number of bytes received. + + * - dev_rx_uc_bytes + - 4 + - Unicast bytes received. + + * - dev_rx_mc_bytes + - 5 + - Multicast bytes received. + + * - dev_rx_bc_bytes + - 6 + - Broadcast bytes received. + + * - dev_rx_pkts + - 7 + - Total number of packets received. + + * - dev_rx_mc_pkts + - 8 + - Multicast packets received. + + * - dev_rx_bc_pkts + - 9 + - Broadcast packets received. + + * - dev_tx_discards + - 10 + - A packet can be discarded in the TX direction if the MAC is + being flow controlled and the NIC runs out of TX queue space. + + * - dev_tx_errors + - 11 + - A packet can be counted as TX error (and dropped) for one for the + following reasons: + + * The packet is an LSO segment, but the Layer 3 or Layer 4 offset + could not be determined. Therefore LSO could not continue. + * An invalid packet descriptor was received over PCIe. + * The packet Layer 3 length exceeds the device MTU. + * An error on the MAC/physical layer. Usually due to faulty cables or + SFPs. + * A CTM buffer could not be allocated. + * The packet offset was incorrect and could not be fixed by the NIC. + + * - dev_tx_bytes + - 12 + - Total number of bytes transmitted. + + * - dev_tx_uc_bytes + - 13 + - Unicast bytes transmitted. + + * - dev_tx_mc_bytes + - 14 + - Multicast bytes transmitted. + + * - dev_tx_bc_bytes + - 15 + - Broadcast bytes transmitted. + + * - dev_tx_pkts + - 16 + - Total number of packets transmitted. + + * - dev_tx_mc_pkts + - 17 + - Multicast packets transmitted. + + * - dev_tx_bc_pkts + - 18 + - Broadcast packets transmitted. + +Note that statistics unknown to the driver will be displayed as +``dev_unknown_stat$ID``, where ``$ID`` refers to the second column +above. diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index fd26788e8c96..90b8f14bc41a 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -479,6 +479,10 @@ tcp_no_metrics_save - BOOLEAN degradation. If set, TCP will not cache metrics on closing connections. +tcp_no_ssthresh_metrics_save - BOOLEAN + Controls whether TCP saves ssthresh metrics in the route cache. + Default is 1, which disables ssthresh metrics. + tcp_orphan_retries - INTEGER This value influences the timeout of a locally closed TCP connection, when RTO retransmissions remain unacknowledged. diff --git a/MAINTAINERS b/MAINTAINERS index d09519805cc0..60131db28141 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17487,6 +17487,7 @@ F: net/vmw_vsock/diag.c F: net/vmw_vsock/af_vsock_tap.c F: net/vmw_vsock/virtio_transport_common.c F: net/vmw_vsock/virtio_transport.c +F: net/vmw_vsock/vsock_loopback.c F: drivers/net/vsockmon.c F: drivers/vhost/vsock.c F: tools/testing/vsock/ @@ -17857,6 +17858,14 @@ L: linux-gpio@vger.kernel.org S: Maintained F: drivers/gpio/gpio-ws16c48.c +WIREGUARD SECURE NETWORK TUNNEL +M: Jason A. Donenfeld <Jason@zx2c4.com> +S: Maintained +F: drivers/net/wireguard/ +F: tools/testing/selftests/wireguard/ +L: wireguard@lists.zx2c4.com +L: netdev@vger.kernel.org + WISTRON LAPTOP BUTTON DRIVER M: Miloslav Trmac <mitr@volny.cz> S: Maintained diff --git a/arch/m68k/emu/nfeth.c b/arch/m68k/emu/nfeth.c index a4ebd2445eda..d2875e32abfc 100644 --- a/arch/m68k/emu/nfeth.c +++ b/arch/m68k/emu/nfeth.c @@ -167,7 +167,7 @@ static int nfeth_xmit(struct sk_buff *skb, struct net_device *dev) return 0; } -static void nfeth_tx_timeout(struct net_device *dev) +static void nfeth_tx_timeout(struct net_device *dev, unsigned int txqueue) { dev->stats.tx_errors++; netif_wake_queue(dev); diff --git a/arch/mips/boot/dts/qca/ar9331.dtsi b/arch/mips/boot/dts/qca/ar9331.dtsi index 5cfc9d347826..8f5aed760abb 100644 --- a/arch/mips/boot/dts/qca/ar9331.dtsi +++ b/arch/mips/boot/dts/qca/ar9331.dtsi @@ -126,6 +126,9 @@ clocks = <&pll ATH79_CLK_AHB>, <&pll ATH79_CLK_AHB>; clock-names = "eth", "mdio"; + phy-mode = "mii"; + phy-handle = <&phy_port4>; + status = "disabled"; }; @@ -133,13 +136,127 @@ compatible = "qca,ar9330-eth"; reg = <0x1a000000 0x200>; interrupts = <5>; - resets = <&rst 13>, <&rst 23>; reset-names = "mac", "mdio"; clocks = <&pll ATH79_CLK_AHB>, <&pll ATH79_CLK_AHB>; clock-names = "eth", "mdio"; + phy-mode = "gmii"; + status = "disabled"; + + fixed-link { + speed = <1000>; + full-duplex; + }; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + switch10: switch@10 { + #address-cells = <1>; + #size-cells = <0>; + + compatible = "qca,ar9331-switch"; + reg = <0x10>; + resets = <&rst 8>; + reset-names = "switch"; + + interrupt-parent = <&miscintc>; + interrupts = <12>; + + interrupt-controller; + #interrupt-cells = <1>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + switch_port0: port@0 { + reg = <0x0>; + label = "cpu"; + ethernet = <ð1>; + + phy-mode = "gmii"; + + fixed-link { + speed = <1000>; + full-duplex; + }; + }; + + switch_port1: port@1 { + reg = <0x1>; + phy-handle = <&phy_port0>; + phy-mode = "internal"; + + status = "disabled"; + }; + + switch_port2: port@2 { + reg = <0x2>; + phy-handle = <&phy_port1>; + phy-mode = "internal"; + + status = "disabled"; + }; + + switch_port3: port@3 { + reg = <0x3>; + phy-handle = <&phy_port2>; + phy-mode = "internal"; + + status = "disabled"; + }; + + switch_port4: port@4 { + reg = <0x4>; + phy-handle = <&phy_port3>; + phy-mode = "internal"; + + status = "disabled"; + }; + }; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + interrupt-parent = <&switch10>; + + phy_port0: phy@0 { + reg = <0x0>; + interrupts = <0>; + status = "disabled"; + }; + + phy_port1: phy@1 { + reg = <0x1>; + interrupts = <0>; + status = "disabled"; + }; + + phy_port2: phy@2 { + reg = <0x2>; + interrupts = <0>; + status = "disabled"; + }; + + phy_port3: phy@3 { + reg = <0x3>; + interrupts = <0>; + status = "disabled"; + }; + + phy_port4: phy@4 { + reg = <0x4>; + interrupts = <0>; + status = "disabled"; + }; + }; + }; + }; }; usb: usb@1b000100 { diff --git a/arch/mips/boot/dts/qca/ar9331_dpt_module.dts b/arch/mips/boot/dts/qca/ar9331_dpt_module.dts index 77bab823eb3b..0f2b20044834 100644 --- a/arch/mips/boot/dts/qca/ar9331_dpt_module.dts +++ b/arch/mips/boot/dts/qca/ar9331_dpt_module.dts @@ -84,3 +84,16 @@ ð1 { status = "okay"; }; + +&switch_port1 { + label = "lan0"; + status = "okay"; +}; + +&phy_port0 { + status = "okay"; +}; + +&phy_port4 { + status = "okay"; +}; diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index 327b728f7244..35ebeebfc1a8 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -247,7 +247,7 @@ static void uml_net_set_multicast_list(struct net_device *dev) return; } -static void uml_net_tx_timeout(struct net_device *dev) +static void uml_net_tx_timeout(struct net_device *dev, unsigned int txqueue) { netif_trans_update(dev); netif_wake_queue(dev); diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index 92617e16829e..0ff86391f77d 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -1332,7 +1332,7 @@ static void vector_net_set_multicast_list(struct net_device *dev) return; } -static void vector_net_tx_timeout(struct net_device *dev) +static void vector_net_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct vector_private *vp = netdev_priv(dev); diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c index fa9f3893b002..4986226a5ab2 100644 --- a/arch/xtensa/platforms/iss/network.c +++ b/arch/xtensa/platforms/iss/network.c @@ -455,7 +455,7 @@ static void iss_net_set_multicast_list(struct net_device *dev) { } -static void iss_net_tx_timeout(struct net_device *dev) +static void iss_net_tx_timeout(struct net_device *dev, unsigned int txqueue) { } diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c index f1a500205313..8fbd36eb8941 100644 --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c @@ -1414,12 +1414,14 @@ fore200e_open(struct atm_vcc *vcc) static void fore200e_close(struct atm_vcc* vcc) { - struct fore200e* fore200e = FORE200E_DEV(vcc->dev); struct fore200e_vcc* fore200e_vcc; + struct fore200e* fore200e; struct fore200e_vc_map* vc_map; unsigned long flags; ASSERT(vcc); + fore200e = FORE200E_DEV(vcc->dev); + ASSERT((vcc->vpi >= 0) && (vcc->vpi < 1<<FORE200E_VPI_BITS)); ASSERT((vcc->vci >= 0) && (vcc->vci < 1<<FORE200E_VCI_BITS)); @@ -1464,10 +1466,10 @@ fore200e_close(struct atm_vcc* vcc) static int fore200e_send(struct atm_vcc *vcc, struct sk_buff *skb) { - struct fore200e* fore200e = FORE200E_DEV(vcc->dev); - struct fore200e_vcc* fore200e_vcc = FORE200E_VCC(vcc); + struct fore200e* fore200e; + struct fore200e_vcc* fore200e_vcc; struct fore200e_vc_map* vc_map; - struct host_txq* txq = &fore200e->host_txq; + struct host_txq* txq; struct host_txq_entry* entry; struct tpd* tpd; struct tpd_haddr tpd_haddr; @@ -1480,9 +1482,18 @@ fore200e_send(struct atm_vcc *vcc, struct sk_buff *skb) unsigned char* data; unsigned long flags; - ASSERT(vcc); - ASSERT(fore200e); - ASSERT(fore200e_vcc); + if (!vcc) + return -EINVAL; + + fore200e = FORE200E_DEV(vcc->dev); + fore200e_vcc = FORE200E_VCC(vcc); + + if (!fore200e) + return -EINVAL; + + txq = &fore200e->host_txq; + if (!fore200e_vcc) + return -EINVAL; if (!test_bit(ATM_VF_READY, &vcc->flags)) { DPRINTK(1, "VC %d.%d.%d not ready for tx\n", vcc->itf, vcc->vpi, vcc->vpi); diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c index 8e05706fe5d9..0795a49edfae 100644 --- a/drivers/bluetooth/btbcm.c +++ b/drivers/bluetooth/btbcm.c @@ -107,6 +107,52 @@ int btbcm_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr) } EXPORT_SYMBOL_GPL(btbcm_set_bdaddr); +int btbcm_read_pcm_int_params(struct hci_dev *hdev, + struct bcm_set_pcm_int_params *params) +{ + struct sk_buff *skb; + int err = 0; + + skb = __hci_cmd_sync(hdev, 0xfc1d, 0, NULL, HCI_INIT_TIMEOUT); + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + bt_dev_err(hdev, "BCM: Read PCM int params failed (%d)", err); + return err; + } + + if (skb->len != 6 || skb->data[0]) { + bt_dev_err(hdev, "BCM: Read PCM int params length mismatch"); + kfree_skb(skb); + return -EIO; + } + + if (params) + memcpy(params, skb->data + 1, 5); + + kfree_skb(skb); + + return 0; +} +EXPORT_SYMBOL_GPL(btbcm_read_pcm_int_params); + +int btbcm_write_pcm_int_params(struct hci_dev *hdev, + const struct bcm_set_pcm_int_params *params) +{ + struct sk_buff *skb; + int err; + + skb = __hci_cmd_sync(hdev, 0xfc1c, 5, params, HCI_INIT_TIMEOUT); + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + bt_dev_err(hdev, "BCM: Write PCM int params failed (%d)", err); + return err; + } + kfree_skb(skb); + + return 0; +} +EXPORT_SYMBOL_GPL(btbcm_write_pcm_int_params); + int btbcm_patchram(struct hci_dev *hdev, const struct firmware *fw) { const struct hci_command_hdr *cmd; diff --git a/drivers/bluetooth/btbcm.h b/drivers/bluetooth/btbcm.h index d204be8a84bf..3c7dd0765837 100644 --- a/drivers/bluetooth/btbcm.h +++ b/drivers/bluetooth/btbcm.h @@ -54,6 +54,10 @@ struct bcm_set_pcm_format_params { int btbcm_check_bdaddr(struct hci_dev *hdev); int btbcm_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr); int btbcm_patchram(struct hci_dev *hdev, const struct firmware *fw); +int btbcm_read_pcm_int_params(struct hci_dev *hdev, + struct bcm_set_pcm_int_params *params); +int btbcm_write_pcm_int_params(struct hci_dev *hdev, + const struct bcm_set_pcm_int_params *params); int btbcm_setup_patchram(struct hci_dev *hdev); int btbcm_setup_apple(struct hci_dev *hdev); @@ -74,6 +78,18 @@ static inline int btbcm_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr) return -EOPNOTSUPP; } +int btbcm_read_pcm_int_params(struct hci_dev *hdev, + struct bcm_set_pcm_int_params *params) +{ + return -EOPNOTSUPP; +} + +int btbcm_write_pcm_int_params(struct hci_dev *hdev, + const struct bcm_set_pcm_int_params *params) +{ + return -EOPNOTSUPP; +} + static inline int btbcm_patchram(struct hci_dev *hdev, const struct firmware *fw) { return -EOPNOTSUPP; diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 70e385987d41..0eaeca0a64fb 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -552,9 +552,9 @@ static void btusb_rtl_cmd_timeout(struct hci_dev *hdev) } bt_dev_err(hdev, "Reset Realtek device via gpio"); - gpiod_set_value_cansleep(reset_gpio, 0); - msleep(200); gpiod_set_value_cansleep(reset_gpio, 1); + msleep(200); + gpiod_set_value_cansleep(reset_gpio, 0); } static inline void btusb_free_frags(struct btusb_data *data) @@ -2602,7 +2602,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb) * and being processed the events from there then. */ if (test_bit(BTUSB_TX_WAIT_VND_EVT, &data->flags)) { - data->evt_skb = skb_clone(skb, GFP_KERNEL); + data->evt_skb = skb_clone(skb, GFP_ATOMIC); if (!data->evt_skb) goto err_out; } @@ -2867,7 +2867,7 @@ static int btusb_mtk_setup_firmware(struct hci_dev *hdev, const char *fwname) err = btusb_mtk_hci_wmt_sync(hdev, &wmt_params); if (err < 0) { bt_dev_err(hdev, "Failed to send wmt rst (%d)", err); - return err; + goto err_release_fw; } /* Wait a few moments for firmware activation done */ @@ -3832,6 +3832,10 @@ static int btusb_probe(struct usb_interface *intf, * (DEVICE_REMOTE_WAKEUP) */ set_bit(BTUSB_WAKEUP_DISABLE, &data->flags); + + err = usb_autopm_get_interface(intf); + if (err < 0) + goto out_free_dev; } if (id->driver_info & BTUSB_AMP) { diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index d2a6a4afdbbb..f8f5c593a05c 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -48,6 +48,14 @@ #define BCM_NUM_SUPPLIES 2 /** + * struct bcm_device_data - device specific data + * @no_early_set_baudrate: Disallow set baudrate before driver setup() + */ +struct bcm_device_data { + bool no_early_set_baudrate; +}; + +/** * struct bcm_device - device driver resources * @serdev_hu: HCI UART controller struct * @list: bcm_device_list node @@ -79,6 +87,7 @@ * @hu: pointer to HCI UART controller struct, * used to disable flow control during runtime suspend and system sleep * @is_suspended: whether flow control is currently disabled + * @no_early_set_baudrate: don't set_baudrate before setup() */ struct bcm_device { /* Must be the first member, hci_serdev.c expects this. */ @@ -112,6 +121,8 @@ struct bcm_device { struct hci_uart *hu; bool is_suspended; #endif + bool no_early_set_baudrate; + u8 pcm_int_params[5]; }; /* generic bcm uart resources */ @@ -447,7 +458,13 @@ out: if (bcm->dev) { hci_uart_set_flow_control(hu, true); hu->init_speed = bcm->dev->init_speed; - hu->oper_speed = bcm->dev->oper_speed; + + /* If oper_speed is set, ldisc/serdev will set the baudrate + * before calling setup() + */ + if (!bcm->dev->no_early_set_baudrate) + hu->oper_speed = bcm->dev->oper_speed; + err = bcm_gpio_set_power(bcm->dev, true); hci_uart_set_flow_control(hu, false); if (err) @@ -565,6 +582,8 @@ static int bcm_setup(struct hci_uart *hu) /* Operational speed if any */ if (hu->oper_speed) speed = hu->oper_speed; + else if (bcm->dev && bcm->dev->oper_speed) + speed = bcm->dev->oper_speed; else if (hu->proto->oper_speed) speed = hu->proto->oper_speed; else @@ -576,6 +595,16 @@ static int bcm_setup(struct hci_uart *hu) host_set_baudrate(hu, speed); } + /* PCM parameters if provided */ + if (bcm->dev && bcm->dev->pcm_int_params[0] != 0xff) { + struct bcm_set_pcm_int_params params; + + btbcm_read_pcm_int_params(hu->hdev, ¶ms); + + memcpy(¶ms, bcm->dev->pcm_int_params, 5); + btbcm_write_pcm_int_params(hu->hdev, ¶ms); + } + finalize: release_firmware(fw); @@ -1113,6 +1142,8 @@ static int bcm_acpi_probe(struct bcm_device *dev) static int bcm_of_probe(struct bcm_device *bdev) { device_property_read_u32(bdev->dev, "max-speed", &bdev->oper_speed); + device_property_read_u8_array(bdev->dev, "brcm,bt-pcm-int-params", + bdev->pcm_int_params, 5); return 0; } @@ -1128,6 +1159,9 @@ static int bcm_probe(struct platform_device *pdev) dev->dev = &pdev->dev; dev->irq = platform_get_irq(pdev, 0); + /* Initialize routing field to an unused value */ + dev->pcm_int_params[0] = 0xff; + if (has_acpi_companion(&pdev->dev)) { ret = bcm_acpi_probe(dev); if (ret) @@ -1374,6 +1408,7 @@ static struct platform_driver bcm_driver = { static int bcm_serdev_probe(struct serdev_device *serdev) { struct bcm_device *bcmdev; + const struct bcm_device_data *data; int err; bcmdev = devm_kzalloc(&serdev->dev, sizeof(*bcmdev), GFP_KERNEL); @@ -1387,6 +1422,9 @@ static int bcm_serdev_probe(struct serdev_device *serdev) bcmdev->serdev_hu.serdev = serdev; serdev_device_set_drvdata(serdev, bcmdev); + /* Initialize routing field to an unused value */ + bcmdev->pcm_int_params[0] = 0xff; + if (has_acpi_companion(&serdev->dev)) err = bcm_acpi_probe(bcmdev); else @@ -1408,6 +1446,10 @@ static int bcm_serdev_probe(struct serdev_device *serdev) if (err) dev_err(&serdev->dev, "Failed to power down\n"); + data = device_get_match_data(bcmdev->dev); + if (data) + bcmdev->no_early_set_baudrate = data->no_early_set_baudrate; + return hci_uart_register_device(&bcmdev->serdev_hu, &bcm_proto); } @@ -1419,12 +1461,16 @@ static void bcm_serdev_remove(struct serdev_device *serdev) } #ifdef CONFIG_OF +static struct bcm_device_data bcm4354_device_data = { + .no_early_set_baudrate = true, +}; + static const struct of_device_id bcm_bluetooth_of_match[] = { { .compatible = "brcm,bcm20702a1" }, { .compatible = "brcm,bcm4345c5" }, { .compatible = "brcm,bcm4330-bt" }, { .compatible = "brcm,bcm43438-bt" }, - { .compatible = "brcm,bcm43540-bt" }, + { .compatible = "brcm,bcm43540-bt", .data = &bcm4354_device_data }, { .compatible = "brcm,bcm4335a0" }, { }, }; diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c index 82f9a6a814ae..e342daa73d1b 100644 --- a/drivers/char/pcmcia/synclink_cs.c +++ b/drivers/char/pcmcia/synclink_cs.c @@ -4169,7 +4169,7 @@ static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) * * dev pointer to network device structure */ -static void hdlcdev_tx_timeout(struct net_device *dev) +static void hdlcdev_tx_timeout(struct net_device *dev, unsigned int txqueue) { MGSLPC_INFO *info = dev_to_port(dev); unsigned long flags; diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c index aca75237bbcf..72e5b0f65a91 100644 --- a/drivers/crypto/chelsio/chtls/chtls_cm.c +++ b/drivers/crypto/chelsio/chtls/chtls_cm.c @@ -1273,7 +1273,7 @@ static int chtls_pass_accept_req(struct chtls_dev *cdev, struct sk_buff *skb) ctx = (struct listen_ctx *)data; lsk = ctx->lsk; - if (unlikely(tid >= cdev->tids->ntids)) { + if (unlikely(tid_out_of_range(cdev->tids, tid))) { pr_info("passive open TID %u too large\n", tid); return 1; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index e5f438ab716c..4a0d3a9e72e1 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1182,7 +1182,7 @@ unref: return NETDEV_TX_OK; } -static void ipoib_timeout(struct net_device *dev) +static void ipoib_timeout(struct net_device *dev, unsigned int txqueue) { struct ipoib_dev_priv *priv = ipoib_priv(dev); diff --git a/drivers/message/fusion/mptlan.c b/drivers/message/fusion/mptlan.c index ebc00d47abf5..7d3784aa20e5 100644 --- a/drivers/message/fusion/mptlan.c +++ b/drivers/message/fusion/mptlan.c @@ -552,7 +552,7 @@ mpt_lan_close(struct net_device *dev) /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /* Tx timeout handler. */ static void -mpt_lan_tx_timeout(struct net_device *dev) +mpt_lan_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct mpt_lan_priv *priv = netdev_priv(dev); MPT_ADAPTER *mpt_dev = priv->mpt_dev; diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c index f7d610a22347..ada94e6a3c91 100644 --- a/drivers/misc/sgi-xp/xpnet.c +++ b/drivers/misc/sgi-xp/xpnet.c @@ -496,7 +496,7 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) * Deal with transmit timeouts coming from the network layer. */ static void -xpnet_dev_tx_timeout(struct net_device *dev) +xpnet_dev_tx_timeout(struct net_device *dev, unsigned int txqueue) { dev->stats.tx_errors++; } diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index d02f12a5254e..01e2657e4c26 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -71,6 +71,49 @@ config DUMMY To compile this driver as a module, choose M here: the module will be called dummy. +config WIREGUARD + tristate "WireGuard secure network tunnel" + depends on NET && INET + depends on IPV6 || !IPV6 + select NET_UDP_TUNNEL + select DST_CACHE + select CRYPTO + select CRYPTO_LIB_CURVE25519 + select CRYPTO_LIB_CHACHA20POLY1305 + select CRYPTO_LIB_BLAKE2S + select CRYPTO_CHACHA20_X86_64 if X86 && 64BIT + select CRYPTO_POLY1305_X86_64 if X86 && 64BIT + select CRYPTO_BLAKE2S_X86 if X86 && 64BIT + select CRYPTO_CURVE25519_X86 if X86 && 64BIT + select ARM_CRYPTO if ARM + select ARM64_CRYPTO if ARM64 + select CRYPTO_CHACHA20_NEON if (ARM || ARM64) && KERNEL_MODE_NEON + select CRYPTO_POLY1305_NEON if ARM64 && KERNEL_MODE_NEON + select CRYPTO_POLY1305_ARM if ARM + select CRYPTO_CURVE25519_NEON if ARM && KERNEL_MODE_NEON + select CRYPTO_CHACHA_MIPS if CPU_MIPS32_R2 + select CRYPTO_POLY1305_MIPS if CPU_MIPS32 || (CPU_MIPS64 && 64BIT) + help + WireGuard is a secure, fast, and easy to use replacement for IPSec + that uses modern cryptography and clever networking tricks. It's + designed to be fairly general purpose and abstract enough to fit most + use cases, while at the same time remaining extremely simple to + configure. See www.wireguard.com for more info. + + It's safe to say Y or M here, as the driver is very lightweight and + is only in use when an administrator chooses to add an interface. + +config WIREGUARD_DEBUG + bool "Debugging checks and verbose messages" + depends on WIREGUARD + help + This will write log messages for handshake and other events + that occur for a WireGuard interface. It will also perform some + extra validation checks and unit tests at various points. This is + only useful for debugging. + + Say N here unless you know what you're doing. + config EQUALIZER tristate "EQL (serial line load balancing) support" ---help--- diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 0d3ba056cda3..953b7c12f0b0 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_BONDING) += bonding/ obj-$(CONFIG_IPVLAN) += ipvlan/ obj-$(CONFIG_IPVTAP) += ipvlan/ obj-$(CONFIG_DUMMY) += dummy.o +obj-$(CONFIG_WIREGUARD) += wireguard/ obj-$(CONFIG_EQUALIZER) += eql.o obj-$(CONFIG_IFB) += ifb.o obj-$(CONFIG_MACSEC) += macsec.o diff --git a/drivers/net/appletalk/cops.c b/drivers/net/appletalk/cops.c index b3c63d2f16aa..18428e104445 100644 --- a/drivers/net/appletalk/cops.c +++ b/drivers/net/appletalk/cops.c @@ -189,7 +189,7 @@ static int cops_nodeid (struct net_device *dev, int nodeid); static irqreturn_t cops_interrupt (int irq, void *dev_id); static void cops_poll(struct timer_list *t); -static void cops_timeout(struct net_device *dev); +static void cops_timeout(struct net_device *dev, unsigned int txqueue); static void cops_rx (struct net_device *dev); static netdev_tx_t cops_send_packet (struct sk_buff *skb, struct net_device *dev); @@ -844,7 +844,7 @@ static void cops_rx(struct net_device *dev) netif_rx(skb); } -static void cops_timeout(struct net_device *dev) +static void cops_timeout(struct net_device *dev, unsigned int txqueue) { struct cops_local *lp = netdev_priv(dev); int ioaddr = dev->base_addr; diff --git a/drivers/net/arcnet/arcdevice.h b/drivers/net/arcnet/arcdevice.h index b0f5bc07aef5..22a49c6d7ae6 100644 --- a/drivers/net/arcnet/arcdevice.h +++ b/drivers/net/arcnet/arcdevice.h @@ -356,7 +356,7 @@ int arcnet_open(struct net_device *dev); int arcnet_close(struct net_device *dev); netdev_tx_t arcnet_send_packet(struct sk_buff *skb, struct net_device *dev); -void arcnet_timeout(struct net_device *dev); +void arcnet_timeout(struct net_device *dev, unsigned int txqueue); /* I/O equivalents */ diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c index 553776cc1d29..e04efc0a5c97 100644 --- a/drivers/net/arcnet/arcnet.c +++ b/drivers/net/arcnet/arcnet.c @@ -763,7 +763,7 @@ static int go_tx(struct net_device *dev) } /* Called by the kernel when transmit times out */ -void arcnet_timeout(struct net_device *dev) +void arcnet_timeout(struct net_device *dev, unsigned int txqueue) { unsigned long flags; struct arcnet_local *lp = netdev_priv(dev); diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index e3b25f310936..34bfe99641a3 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -31,16 +31,6 @@ #define AD_CHURN_DETECTION_TIME 60 #define AD_AGGREGATE_WAIT_TIME 2 -/* Port state definitions (43.4.2.2 in the 802.3ad standard) */ -#define AD_STATE_LACP_ACTIVITY 0x1 -#define AD_STATE_LACP_TIMEOUT 0x2 -#define AD_STATE_AGGREGATION 0x4 -#define AD_STATE_SYNCHRONIZATION 0x8 -#define AD_STATE_COLLECTING 0x10 -#define AD_STATE_DISTRIBUTING 0x20 -#define AD_STATE_DEFAULTED 0x40 -#define AD_STATE_EXPIRED 0x80 - /* Port Variables definitions used by the State Machines (43.4.7 in the * 802.3ad standard) */ diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c index bd40b114d6cd..d737ceb61203 100644 --- a/drivers/net/caif/caif_serial.c +++ b/drivers/net/caif/caif_serial.c @@ -270,7 +270,9 @@ static int caif_xmit(struct sk_buff *skb, struct net_device *dev) { struct ser_device *ser; - BUG_ON(dev == NULL); + if (WARN_ON(!dev)) + return -EINVAL; + ser = netdev_priv(dev); /* Send flow off once, on high water mark */ diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig index c7667645f04a..cbd74a72d0a1 100644 --- a/drivers/net/dsa/Kconfig +++ b/drivers/net/dsa/Kconfig @@ -54,6 +54,8 @@ source "drivers/net/dsa/mv88e6xxx/Kconfig" source "drivers/net/dsa/ocelot/Kconfig" +source "drivers/net/dsa/qca/Kconfig" + source "drivers/net/dsa/sja1105/Kconfig" config NET_DSA_QCA8K diff --git a/drivers/net/dsa/Makefile b/drivers/net/dsa/Makefile index 9d384a32b3a2..4a943ccc2ca4 100644 --- a/drivers/net/dsa/Makefile +++ b/drivers/net/dsa/Makefile @@ -21,4 +21,5 @@ obj-y += b53/ obj-y += microchip/ obj-y += mv88e6xxx/ obj-y += ocelot/ +obj-y += qca/ obj-y += sja1105/ diff --git a/drivers/net/dsa/qca/Kconfig b/drivers/net/dsa/qca/Kconfig new file mode 100644 index 000000000000..e3c8d715a18f --- /dev/null +++ b/drivers/net/dsa/qca/Kconfig @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0-only +config NET_DSA_AR9331 + tristate "Qualcomm Atheros AR9331 Ethernet switch support" + depends on NET_DSA + select NET_DSA_TAG_AR9331 + select REGMAP + ---help--- + This enables support for the Qualcomm Atheros AR9331 built-in Ethernet + switch. diff --git a/drivers/net/dsa/qca/Makefile b/drivers/net/dsa/qca/Makefile new file mode 100644 index 000000000000..274022319066 --- /dev/null +++ b/drivers/net/dsa/qca/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_NET_DSA_AR9331) += ar9331.o diff --git a/drivers/net/dsa/qca/ar9331.c b/drivers/net/dsa/qca/ar9331.c new file mode 100644 index 000000000000..0d1a7cd85fe8 --- /dev/null +++ b/drivers/net/dsa/qca/ar9331.c @@ -0,0 +1,855 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (c) 2019 Pengutronix, Oleksij Rempel <kernel@pengutronix.de> +/* + * +----------------------+ + * GMAC1----RGMII----|--MAC0 | + * \---MDIO1----|--REGs |----MDIO3----\ + * | | | +------+ + * | | +--| | + * | MAC1-|----RMII--M-----| PHY0 |-o P0 + * | | | | +------+ + * | | | +--| | + * | MAC2-|----RMII--------| PHY1 |-o P1 + * | | | | +------+ + * | | | +--| | + * | MAC3-|----RMII--------| PHY2 |-o P2 + * | | | | +------+ + * | | | +--| | + * | MAC4-|----RMII--------| PHY3 |-o P3 + * | | | | +------+ + * | | | +--| | + * | MAC5-|--+-RMII--M-----|-PHY4-|-o P4 + * | | | | +------+ + * +----------------------+ | \--CFG_SW_PHY_SWAP + * GMAC0---------------RMII--------------------/ \-CFG_SW_PHY_ADDR_SWAP + * \---MDIO0--NC + * + * GMAC0 and MAC5 are connected together and use same PHY. Depending on + * configuration it can be PHY4 (default) or PHY0. Only GMAC0 or MAC5 can be + * used at same time. If GMAC0 is used (default) then MAC5 should be disabled. + * + * CFG_SW_PHY_SWAP - swap connections of PHY0 and PHY4. If this bit is not set + * PHY4 is connected to GMAC0/MAC5 bundle and PHY0 is connected to MAC1. If this + * bit is set, PHY4 is connected to MAC1 and PHY0 is connected to GMAC0/MAC5 + * bundle. + * + * CFG_SW_PHY_ADDR_SWAP - swap addresses of PHY0 and PHY4 + * + * CFG_SW_PHY_SWAP and CFG_SW_PHY_ADDR_SWAP are part of SoC specific register + * set and not related to switch internal registers. + */ + +#include <linux/bitfield.h> +#include <linux/module.h> +#include <linux/of_irq.h> +#include <linux/of_mdio.h> +#include <linux/regmap.h> +#include <linux/reset.h> +#include <net/dsa.h> + +#define AR9331_SW_NAME "ar9331_switch" +#define AR9331_SW_PORTS 6 + +/* dummy reg to change page */ +#define AR9331_SW_REG_PAGE 0x40000 + +/* Global Interrupt */ +#define AR9331_SW_REG_GINT 0x10 +#define AR9331_SW_REG_GINT_MASK 0x14 +#define AR9331_SW_GINT_PHY_INT BIT(2) + +#define AR9331_SW_REG_FLOOD_MASK 0x2c +#define AR9331_SW_FLOOD_MASK_BROAD_TO_CPU BIT(26) + +#define AR9331_SW_REG_GLOBAL_CTRL 0x30 +#define AR9331_SW_GLOBAL_CTRL_MFS_M GENMASK(13, 0) + +#define AR9331_SW_REG_MDIO_CTRL 0x98 +#define AR9331_SW_MDIO_CTRL_BUSY BIT(31) +#define AR9331_SW_MDIO_CTRL_MASTER_EN BIT(30) +#define AR9331_SW_MDIO_CTRL_CMD_READ BIT(27) +#define AR9331_SW_MDIO_CTRL_PHY_ADDR_M GENMASK(25, 21) +#define AR9331_SW_MDIO_CTRL_REG_ADDR_M GENMASK(20, 16) +#define AR9331_SW_MDIO_CTRL_DATA_M GENMASK(16, 0) + +#define AR9331_SW_REG_PORT_STATUS(_port) (0x100 + (_port) * 0x100) + +/* FLOW_LINK_EN - enable mac flow control config auto-neg with phy. + * If not set, mac can be config by software. + */ +#define AR9331_SW_PORT_STATUS_FLOW_LINK_EN BIT(12) + +/* LINK_EN - If set, MAC is configured from PHY link status. + * If not set, MAC should be configured by software. + */ +#define AR9331_SW_PORT_STATUS_LINK_EN BIT(9) +#define AR9331_SW_PORT_STATUS_DUPLEX_MODE BIT(6) +#define AR9331_SW_PORT_STATUS_RX_FLOW_EN BIT(5) +#define AR9331_SW_PORT_STATUS_TX_FLOW_EN BIT(4) +#define AR9331_SW_PORT_STATUS_RXMAC BIT(3) +#define AR9331_SW_PORT_STATUS_TXMAC BIT(2) +#define AR9331_SW_PORT_STATUS_SPEED_M GENMASK(1, 0) +#define AR9331_SW_PORT_STATUS_SPEED_1000 2 +#define AR9331_SW_PORT_STATUS_SPEED_100 1 +#define AR9331_SW_PORT_STATUS_SPEED_10 0 + +#define AR9331_SW_PORT_STATUS_MAC_MASK \ + (AR9331_SW_PORT_STATUS_TXMAC | AR9331_SW_PORT_STATUS_RXMAC) + +#define AR9331_SW_PORT_STATUS_LINK_MASK \ + (AR9331_SW_PORT_STATUS_LINK_EN | AR9331_SW_PORT_STATUS_FLOW_LINK_EN | \ + AR9331_SW_PORT_STATUS_DUPLEX_MODE | \ + AR9331_SW_PORT_STATUS_RX_FLOW_EN | AR9331_SW_PORT_STATUS_TX_FLOW_EN | \ + AR9331_SW_PORT_STATUS_SPEED_M) + +/* Phy bypass mode + * ------------------------------------------------------------------------ + * Bit: | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |10 |11 |12 |13 |14 |15 | + * + * real | start | OP | PhyAddr | Reg Addr | TA | + * atheros| start | OP | 2'b00 |PhyAdd[2:0]| Reg Addr[4:0] | TA | + * + * + * Bit: |16 |17 |18 |19 |20 |21 |22 |23 |24 |25 |26 |27 |28 |29 |30 |31 | + * real | Data | + * atheros| Data | + * + * ------------------------------------------------------------------------ + * Page address mode + * ------------------------------------------------------------------------ + * Bit: | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |10 |11 |12 |13 |14 |15 | + * real | start | OP | PhyAddr | Reg Addr | TA | + * atheros| start | OP | 2'b11 | 8'b0 | TA | + * + * Bit: |16 |17 |18 |19 |20 |21 |22 |23 |24 |25 |26 |27 |28 |29 |30 |31 | + * real | Data | + * atheros| | Page [9:0] | + */ +/* In case of Page Address mode, Bit[18:9] of 32 bit register address should be + * written to bits[9:0] of mdio data register. + */ +#define AR9331_SW_ADDR_PAGE GENMASK(18, 9) + +/* ------------------------------------------------------------------------ + * Normal register access mode + * ------------------------------------------------------------------------ + * Bit: | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |10 |11 |12 |13 |14 |15 | + * real | start | OP | PhyAddr | Reg Addr | TA | + * atheros| start | OP | 2'b10 | low_addr[7:0] | TA | + * + * Bit: |16 |17 |18 |19 |20 |21 |22 |23 |24 |25 |26 |27 |28 |29 |30 |31 | + * real | Data | + * atheros| Data | + * ------------------------------------------------------------------------ + */ +#define AR9331_SW_LOW_ADDR_PHY GENMASK(8, 6) +#define AR9331_SW_LOW_ADDR_REG GENMASK(5, 1) + +#define AR9331_SW_MDIO_PHY_MODE_M GENMASK(4, 3) +#define AR9331_SW_MDIO_PHY_MODE_PAGE 3 +#define AR9331_SW_MDIO_PHY_MODE_REG 2 +#define AR9331_SW_MDIO_PHY_MODE_BYPASS 0 +#define AR9331_SW_MDIO_PHY_ADDR_M GENMASK(2, 0) + +/* Empirical determined values */ +#define AR9331_SW_MDIO_POLL_SLEEP_US 1 +#define AR9331_SW_MDIO_POLL_TIMEOUT_US 20 + +struct ar9331_sw_priv { + struct device *dev; + struct dsa_switch ds; + struct dsa_switch_ops ops; + struct irq_domain *irqdomain; + struct mii_bus *mbus; /* mdio master */ + struct mii_bus *sbus; /* mdio slave */ + struct regmap *regmap; + struct reset_control *sw_reset; +}; + +/* Warning: switch reset will reset last AR9331_SW_MDIO_PHY_MODE_PAGE request + * If some kind of optimization is used, the request should be repeated. + */ +static int ar9331_sw_reset(struct ar9331_sw_priv *priv) +{ + int ret; + + ret = reset_control_assert(priv->sw_reset); + if (ret) + goto error; + + /* AR9331 doc do not provide any information about proper reset + * sequence. The AR8136 (the closes switch to the AR9331) doc says: + * reset duration should be greater than 10ms. So, let's use this value + * for now. + */ + usleep_range(10000, 15000); + ret = reset_control_deassert(priv->sw_reset); + if (ret) + goto error; + /* There is no information on how long should we wait after reset. + * AR8136 has an EEPROM and there is an Interrupt for EEPROM load + * status. AR9331 has no EEPROM support. + * For now, do not wait. In case AR8136 will be needed, the after + * reset delay can be added as well. + */ + + return 0; +error: + dev_err_ratelimited(priv->dev, "%s: %i\n", __func__, ret); + return ret; +} + +static int ar9331_sw_mbus_write(struct mii_bus *mbus, int port, int regnum, + u16 data) +{ + struct ar9331_sw_priv *priv = mbus->priv; + struct regmap *regmap = priv->regmap; + u32 val; + int ret; + + ret = regmap_write(regmap, AR9331_SW_REG_MDIO_CTRL, + AR9331_SW_MDIO_CTRL_BUSY | + AR9331_SW_MDIO_CTRL_MASTER_EN | + FIELD_PREP(AR9331_SW_MDIO_CTRL_PHY_ADDR_M, port) | + FIELD_PREP(AR9331_SW_MDIO_CTRL_REG_ADDR_M, regnum) | + FIELD_PREP(AR9331_SW_MDIO_CTRL_DATA_M, data)); + if (ret) + goto error; + + ret = regmap_read_poll_timeout(regmap, AR9331_SW_REG_MDIO_CTRL, val, + !(val & AR9331_SW_MDIO_CTRL_BUSY), + AR9331_SW_MDIO_POLL_SLEEP_US, + AR9331_SW_MDIO_POLL_TIMEOUT_US); + if (ret) + goto error; + + return 0; +error: + dev_err_ratelimited(priv->dev, "PHY write error: %i\n", ret); + return ret; +} + +static int ar9331_sw_mbus_read(struct mii_bus *mbus, int port, int regnum) +{ + struct ar9331_sw_priv *priv = mbus->priv; + struct regmap *regmap = priv->regmap; + u32 val; + int ret; + + ret = regmap_write(regmap, AR9331_SW_REG_MDIO_CTRL, + AR9331_SW_MDIO_CTRL_BUSY | + AR9331_SW_MDIO_CTRL_MASTER_EN | + AR9331_SW_MDIO_CTRL_CMD_READ | + FIELD_PREP(AR9331_SW_MDIO_CTRL_PHY_ADDR_M, port) | + FIELD_PREP(AR9331_SW_MDIO_CTRL_REG_ADDR_M, regnum)); + if (ret) + goto error; + + ret = regmap_read_poll_timeout(regmap, AR9331_SW_REG_MDIO_CTRL, val, + !(val & AR9331_SW_MDIO_CTRL_BUSY), + AR9331_SW_MDIO_POLL_SLEEP_US, + AR9331_SW_MDIO_POLL_TIMEOUT_US); + if (ret) + goto error; + + ret = regmap_read(regmap, AR9331_SW_REG_MDIO_CTRL, &val); + if (ret) + goto error; + + return FIELD_GET(AR9331_SW_MDIO_CTRL_DATA_M, val); + +error: + dev_err_ratelimited(priv->dev, "PHY read error: %i\n", ret); + return ret; +} + +static int ar9331_sw_mbus_init(struct ar9331_sw_priv *priv) +{ + struct device *dev = priv->dev; + static struct mii_bus *mbus; + struct device_node *np, *mnp; + int ret; + + np = dev->of_node; + + mbus = devm_mdiobus_alloc(dev); + if (!mbus) + return -ENOMEM; + + mbus->name = np->full_name; + snprintf(mbus->id, MII_BUS_ID_SIZE, "%pOF", np); + + mbus->read = ar9331_sw_mbus_read; + mbus->write = ar9331_sw_mbus_write; + mbus->priv = priv; + mbus->parent = dev; + + mnp = of_get_child_by_name(np, "mdio"); + if (!mnp) + return -ENODEV; + + ret = of_mdiobus_register(mbus, mnp); + of_node_put(mnp); + if (ret) + return ret; + + priv->mbus = mbus; + + return 0; +} + +static int ar9331_sw_setup(struct dsa_switch *ds) +{ + struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv; + struct regmap *regmap = priv->regmap; + int ret; + + ret = ar9331_sw_reset(priv); + if (ret) + return ret; + + /* Reset will set proper defaults. CPU - Port0 will be enabled and + * configured. All other ports (ports 1 - 5) are disabled + */ + ret = ar9331_sw_mbus_init(priv); + if (ret) + return ret; + + /* Do not drop broadcast frames */ + ret = regmap_write_bits(regmap, AR9331_SW_REG_FLOOD_MASK, + AR9331_SW_FLOOD_MASK_BROAD_TO_CPU, + AR9331_SW_FLOOD_MASK_BROAD_TO_CPU); + if (ret) + goto error; + + /* Set max frame size to the maximum supported value */ + ret = regmap_write_bits(regmap, AR9331_SW_REG_GLOBAL_CTRL, + AR9331_SW_GLOBAL_CTRL_MFS_M, + AR9331_SW_GLOBAL_CTRL_MFS_M); + if (ret) + goto error; + + return 0; +error: + dev_err_ratelimited(priv->dev, "%s: %i\n", __func__, ret); + return ret; +} + +static void ar9331_sw_port_disable(struct dsa_switch *ds, int port) +{ + struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv; + struct regmap *regmap = priv->regmap; + int ret; + + ret = regmap_write(regmap, AR9331_SW_REG_PORT_STATUS(port), 0); + if (ret) + dev_err_ratelimited(priv->dev, "%s: %i\n", __func__, ret); +} + +static enum dsa_tag_protocol ar9331_sw_get_tag_protocol(struct dsa_switch *ds, + int port) +{ + return DSA_TAG_PROTO_AR9331; +} + +static void ar9331_sw_phylink_validate(struct dsa_switch *ds, int port, + unsigned long *supported, + struct phylink_link_state *state) +{ + __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; + + switch (port) { + case 0: + if (state->interface != PHY_INTERFACE_MODE_GMII) + goto unsupported; + + phylink_set(mask, 1000baseT_Full); + phylink_set(mask, 1000baseT_Half); + break; + case 1: + case 2: + case 3: + case 4: + case 5: + if (state->interface != PHY_INTERFACE_MODE_INTERNAL) + goto unsupported; + break; + default: + bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS); + dev_err(ds->dev, "Unsupported port: %i\n", port); + return; + } + + phylink_set_port_modes(mask); + phylink_set(mask, Pause); + phylink_set(mask, Asym_Pause); + + phylink_set(mask, 10baseT_Half); + phylink_set(mask, 10baseT_Full); + phylink_set(mask, 100baseT_Half); + phylink_set(mask, 100baseT_Full); + + bitmap_and(supported, supported, mask, + __ETHTOOL_LINK_MODE_MASK_NBITS); + bitmap_and(state->advertising, state->advertising, mask, + __ETHTOOL_LINK_MODE_MASK_NBITS); + + return; + +unsupported: + bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS); + dev_err(ds->dev, "Unsupported interface: %d, port: %d\n", + state->interface, port); +} + +static void ar9331_sw_phylink_mac_config(struct dsa_switch *ds, int port, + unsigned int mode, + const struct phylink_link_state *state) +{ + struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv; + struct regmap *regmap = priv->regmap; + int ret; + u32 val; + + switch (state->speed) { + case SPEED_1000: + val = AR9331_SW_PORT_STATUS_SPEED_1000; + break; + case SPEED_100: + val = AR9331_SW_PORT_STATUS_SPEED_100; + break; + case SPEED_10: + val = AR9331_SW_PORT_STATUS_SPEED_10; + break; + default: + return; + } + + if (state->duplex) + val |= AR9331_SW_PORT_STATUS_DUPLEX_MODE; + + if (state->pause & MLO_PAUSE_TX) + val |= AR9331_SW_PORT_STATUS_TX_FLOW_EN; + + if (state->pause & MLO_PAUSE_RX) + val |= AR9331_SW_PORT_STATUS_RX_FLOW_EN; + + ret = regmap_update_bits(regmap, AR9331_SW_REG_PORT_STATUS(port), + AR9331_SW_PORT_STATUS_LINK_MASK, val); + if (ret) + dev_err_ratelimited(priv->dev, "%s: %i\n", __func__, ret); +} + +static void ar9331_sw_phylink_mac_link_down(struct dsa_switch *ds, int port, + unsigned int mode, + phy_interface_t interface) +{ + struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv; + struct regmap *regmap = priv->regmap; + int ret; + + ret = regmap_update_bits(regmap, AR9331_SW_REG_PORT_STATUS(port), + AR9331_SW_PORT_STATUS_MAC_MASK, 0); + if (ret) + dev_err_ratelimited(priv->dev, "%s: %i\n", __func__, ret); +} + +static void ar9331_sw_phylink_mac_link_up(struct dsa_switch *ds, int port, + unsigned int mode, + phy_interface_t interface, + struct phy_device *phydev) +{ + struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv; + struct regmap *regmap = priv->regmap; + int ret; + + ret = regmap_update_bits(regmap, AR9331_SW_REG_PORT_STATUS(port), + AR9331_SW_PORT_STATUS_MAC_MASK, + AR9331_SW_PORT_STATUS_MAC_MASK); + if (ret) + dev_err_ratelimited(priv->dev, "%s: %i\n", __func__, ret); +} + +static const struct dsa_switch_ops ar9331_sw_ops = { + .get_tag_protocol = ar9331_sw_get_tag_protocol, + .setup = ar9331_sw_setup, + .port_disable = ar9331_sw_port_disable, + .phylink_validate = ar9331_sw_phylink_validate, + .phylink_mac_config = ar9331_sw_phylink_mac_config, + .phylink_mac_link_down = ar9331_sw_phylink_mac_link_down, + .phylink_mac_link_up = ar9331_sw_phylink_mac_link_up, +}; + +static irqreturn_t ar9331_sw_irq(int irq, void *data) +{ + struct ar9331_sw_priv *priv = data; + struct regmap *regmap = priv->regmap; + u32 stat; + int ret; + + ret = regmap_read(regmap, AR9331_SW_REG_GINT, &stat); + if (ret) { + dev_err(priv->dev, "can't read interrupt status\n"); + return IRQ_NONE; + } + + if (!stat) + return IRQ_NONE; + + if (stat & AR9331_SW_GINT_PHY_INT) { + int child_irq; + + child_irq = irq_find_mapping(priv->irqdomain, 0); + handle_nested_irq(child_irq); + } + + ret = regmap_write(regmap, AR9331_SW_REG_GINT, stat); + if (ret) { + dev_err(priv->dev, "can't write interrupt status\n"); + return IRQ_NONE; + } + + return IRQ_HANDLED; +} + +static void ar9331_sw_mask_irq(struct irq_data *d) +{ + struct ar9331_sw_priv *priv = irq_data_get_irq_chip_data(d); + struct regmap *regmap = priv->regmap; + int ret; + + ret = regmap_update_bits(regmap, AR9331_SW_REG_GINT_MASK, + AR9331_SW_GINT_PHY_INT, 0); + if (ret) + dev_err(priv->dev, "could not mask IRQ\n"); +} + +static void ar9331_sw_unmask_irq(struct irq_data *d) +{ + struct ar9331_sw_priv *priv = irq_data_get_irq_chip_data(d); + struct regmap *regmap = priv->regmap; + int ret; + + ret = regmap_update_bits(regmap, AR9331_SW_REG_GINT_MASK, + AR9331_SW_GINT_PHY_INT, + AR9331_SW_GINT_PHY_INT); + if (ret) + dev_err(priv->dev, "could not unmask IRQ\n"); +} + +static struct irq_chip ar9331_sw_irq_chip = { + .name = AR9331_SW_NAME, + .irq_mask = ar9331_sw_mask_irq, + .irq_unmask = ar9331_sw_unmask_irq, +}; + +static int ar9331_sw_irq_map(struct irq_domain *domain, unsigned int irq, + irq_hw_number_t hwirq) +{ + irq_set_chip_data(irq, domain->host_data); + irq_set_chip_and_handler(irq, &ar9331_sw_irq_chip, handle_simple_irq); + irq_set_nested_thread(irq, 1); + irq_set_noprobe(irq); + + return 0; +} + +static void ar9331_sw_irq_unmap(struct irq_domain *d, unsigned int irq) +{ + irq_set_nested_thread(irq, 0); + irq_set_chip_and_handler(irq, NULL, NULL); + irq_set_chip_data(irq, NULL); +} + +static const struct irq_domain_ops ar9331_sw_irqdomain_ops = { + .map = ar9331_sw_irq_map, + .unmap = ar9331_sw_irq_unmap, + .xlate = irq_domain_xlate_onecell, +}; + +static int ar9331_sw_irq_init(struct ar9331_sw_priv *priv) +{ + struct device_node *np = priv->dev->of_node; + struct device *dev = priv->dev; + int ret, irq; + + irq = of_irq_get(np, 0); + if (irq <= 0) { + dev_err(dev, "failed to get parent IRQ\n"); + return irq ? irq : -EINVAL; + } + + ret = devm_request_threaded_irq(dev, irq, NULL, ar9331_sw_irq, + IRQF_ONESHOT, AR9331_SW_NAME, priv); + if (ret) { + dev_err(dev, "unable to request irq: %d\n", ret); + return ret; + } + + priv->irqdomain = irq_domain_add_linear(np, 1, &ar9331_sw_irqdomain_ops, + priv); + if (!priv->irqdomain) { + dev_err(dev, "failed to create IRQ domain\n"); + return -EINVAL; + } + + irq_set_parent(irq_create_mapping(priv->irqdomain, 0), irq); + + return 0; +} + +static int __ar9331_mdio_write(struct mii_bus *sbus, u8 mode, u16 reg, u16 val) +{ + u8 r, p; + + p = FIELD_PREP(AR9331_SW_MDIO_PHY_MODE_M, mode) | + FIELD_GET(AR9331_SW_LOW_ADDR_PHY, reg); + r = FIELD_GET(AR9331_SW_LOW_ADDR_REG, reg); + + return mdiobus_write(sbus, p, r, val); +} + +static int __ar9331_mdio_read(struct mii_bus *sbus, u16 reg) +{ + u8 r, p; + + p = FIELD_PREP(AR9331_SW_MDIO_PHY_MODE_M, AR9331_SW_MDIO_PHY_MODE_REG) | + FIELD_GET(AR9331_SW_LOW_ADDR_PHY, reg); + r = FIELD_GET(AR9331_SW_LOW_ADDR_REG, reg); + + return mdiobus_read(sbus, p, r); +} + +static int ar9331_mdio_read(void *ctx, const void *reg_buf, size_t reg_len, + void *val_buf, size_t val_len) +{ + struct ar9331_sw_priv *priv = ctx; + struct mii_bus *sbus = priv->sbus; + u32 reg = *(u32 *)reg_buf; + int ret; + + if (reg == AR9331_SW_REG_PAGE) { + /* We cannot read the page selector register from hardware and + * we cache its value in regmap. Return all bits set here, + * that regmap will always write the page on first use. + */ + *(u32 *)val_buf = GENMASK(9, 0); + return 0; + } + + ret = __ar9331_mdio_read(sbus, reg); + if (ret < 0) + goto error; + + *(u32 *)val_buf = ret; + ret = __ar9331_mdio_read(sbus, reg + 2); + if (ret < 0) + goto error; + + *(u32 *)val_buf |= ret << 16; + + return 0; +error: + dev_err_ratelimited(&sbus->dev, "Bus error. Failed to read register.\n"); + return ret; +} + +static int ar9331_mdio_write(void *ctx, u32 reg, u32 val) +{ + struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ctx; + struct mii_bus *sbus = priv->sbus; + int ret; + + if (reg == AR9331_SW_REG_PAGE) { + ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_PAGE, + 0, val); + if (ret < 0) + goto error; + + return 0; + } + + ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg, val); + if (ret < 0) + goto error; + + ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg + 2, + val >> 16); + if (ret < 0) + goto error; + + return 0; +error: + dev_err_ratelimited(&sbus->dev, "Bus error. Failed to write register.\n"); + return ret; +} + +static int ar9331_sw_bus_write(void *context, const void *data, size_t count) +{ + u32 reg = *(u32 *)data; + u32 val = *((u32 *)data + 1); + + return ar9331_mdio_write(context, reg, val); +} + +static const struct regmap_range ar9331_valid_regs[] = { + regmap_reg_range(0x0, 0x0), + regmap_reg_range(0x10, 0x14), + regmap_reg_range(0x20, 0x24), + regmap_reg_range(0x2c, 0x30), + regmap_reg_range(0x40, 0x44), + regmap_reg_range(0x50, 0x78), + regmap_reg_range(0x80, 0x98), + + regmap_reg_range(0x100, 0x120), + regmap_reg_range(0x200, 0x220), + regmap_reg_range(0x300, 0x320), + regmap_reg_range(0x400, 0x420), + regmap_reg_range(0x500, 0x520), + regmap_reg_range(0x600, 0x620), + + regmap_reg_range(0x20000, 0x200a4), + regmap_reg_range(0x20100, 0x201a4), + regmap_reg_range(0x20200, 0x202a4), + regmap_reg_range(0x20300, 0x203a4), + regmap_reg_range(0x20400, 0x204a4), + regmap_reg_range(0x20500, 0x205a4), + + /* dummy page selector reg */ + regmap_reg_range(AR9331_SW_REG_PAGE, AR9331_SW_REG_PAGE), +}; + +static const struct regmap_range ar9331_nonvolatile_regs[] = { + regmap_reg_range(AR9331_SW_REG_PAGE, AR9331_SW_REG_PAGE), +}; + +static const struct regmap_range_cfg ar9331_regmap_range[] = { + { + .selector_reg = AR9331_SW_REG_PAGE, + .selector_mask = GENMASK(9, 0), + .selector_shift = 0, + + .window_start = 0, + .window_len = 512, + + .range_min = 0, + .range_max = AR9331_SW_REG_PAGE - 4, + }, +}; + +static const struct regmap_access_table ar9331_register_set = { + .yes_ranges = ar9331_valid_regs, + .n_yes_ranges = ARRAY_SIZE(ar9331_valid_regs), +}; + +static const struct regmap_access_table ar9331_volatile_set = { + .no_ranges = ar9331_nonvolatile_regs, + .n_no_ranges = ARRAY_SIZE(ar9331_nonvolatile_regs), +}; + +static const struct regmap_config ar9331_mdio_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, + .max_register = AR9331_SW_REG_PAGE, + + .ranges = ar9331_regmap_range, + .num_ranges = ARRAY_SIZE(ar9331_regmap_range), + + .volatile_table = &ar9331_volatile_set, + .wr_table = &ar9331_register_set, + .rd_table = &ar9331_register_set, + + .cache_type = REGCACHE_RBTREE, +}; + +static struct regmap_bus ar9331_sw_bus = { + .reg_format_endian_default = REGMAP_ENDIAN_NATIVE, + .val_format_endian_default = REGMAP_ENDIAN_NATIVE, + .read = ar9331_mdio_read, + .write = ar9331_sw_bus_write, + .max_raw_read = 4, + .max_raw_write = 4, +}; + +static int ar9331_sw_probe(struct mdio_device *mdiodev) +{ + struct ar9331_sw_priv *priv; + struct dsa_switch *ds; + int ret; + + priv = devm_kzalloc(&mdiodev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->regmap = devm_regmap_init(&mdiodev->dev, &ar9331_sw_bus, priv, + &ar9331_mdio_regmap_config); + if (IS_ERR(priv->regmap)) { + ret = PTR_ERR(priv->regmap); + dev_err(&mdiodev->dev, "regmap init failed: %d\n", ret); + return ret; + } + + priv->sw_reset = devm_reset_control_get(&mdiodev->dev, "switch"); + if (IS_ERR(priv->sw_reset)) { + dev_err(&mdiodev->dev, "missing switch reset\n"); + return PTR_ERR(priv->sw_reset); + } + + priv->sbus = mdiodev->bus; + priv->dev = &mdiodev->dev; + + ret = ar9331_sw_irq_init(priv); + if (ret) + return ret; + + ds = &priv->ds; + ds->dev = &mdiodev->dev; + ds->num_ports = AR9331_SW_PORTS; + ds->priv = priv; + priv->ops = ar9331_sw_ops; + ds->ops = &priv->ops; + dev_set_drvdata(&mdiodev->dev, priv); + + ret = dsa_register_switch(ds); + if (ret) + goto err_remove_irq; + + return 0; + +err_remove_irq: + irq_domain_remove(priv->irqdomain); + + return ret; +} + +static void ar9331_sw_remove(struct mdio_device *mdiodev) +{ + struct ar9331_sw_priv *priv = dev_get_drvdata(&mdiodev->dev); + + irq_domain_remove(priv->irqdomain); + mdiobus_unregister(priv->mbus); + dsa_unregister_switch(&priv->ds); + + reset_control_assert(priv->sw_reset); +} + +static const struct of_device_id ar9331_sw_of_match[] = { + { .compatible = "qca,ar9331-switch" }, + { }, +}; + +static struct mdio_driver ar9331_sw_mdio_driver = { + .probe = ar9331_sw_probe, + .remove = ar9331_sw_remove, + .mdiodrv.driver = { + .name = AR9331_SW_NAME, + .of_match_table = ar9331_sw_of_match, + }, +}; + +mdio_module_driver(ar9331_sw_mdio_driver); + +MODULE_AUTHOR("Oleksij Rempel <kernel@pengutronix.de>"); +MODULE_DESCRIPTION("Driver for Atheros AR9331 switch"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c index 3da97996bdf3..8cafd06ff0c4 100644 --- a/drivers/net/ethernet/3com/3c509.c +++ b/drivers/net/ethernet/3com/3c509.c @@ -196,7 +196,7 @@ static struct net_device_stats *el3_get_stats(struct net_device *dev); static int el3_rx(struct net_device *dev); static int el3_close(struct net_device *dev); static void set_multicast_list(struct net_device *dev); -static void el3_tx_timeout (struct net_device *dev); +static void el3_tx_timeout (struct net_device *dev, unsigned int txqueue); static void el3_down(struct net_device *dev); static void el3_up(struct net_device *dev); static const struct ethtool_ops ethtool_ops; @@ -689,7 +689,7 @@ el3_open(struct net_device *dev) } static void -el3_tx_timeout (struct net_device *dev) +el3_tx_timeout (struct net_device *dev, unsigned int txqueue) { int ioaddr = dev->base_addr; diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c index b15752267c8d..1e233e2f0a5a 100644 --- a/drivers/net/ethernet/3com/3c515.c +++ b/drivers/net/ethernet/3com/3c515.c @@ -371,7 +371,7 @@ static void corkscrew_timer(struct timer_list *t); static netdev_tx_t corkscrew_start_xmit(struct sk_buff *skb, struct net_device *dev); static int corkscrew_rx(struct net_device *dev); -static void corkscrew_timeout(struct net_device *dev); +static void corkscrew_timeout(struct net_device *dev, unsigned int txqueue); static int boomerang_rx(struct net_device *dev); static irqreturn_t corkscrew_interrupt(int irq, void *dev_id); static int corkscrew_close(struct net_device *dev); @@ -961,7 +961,7 @@ static void corkscrew_timer(struct timer_list *t) #endif /* AUTOMEDIA */ } -static void corkscrew_timeout(struct net_device *dev) +static void corkscrew_timeout(struct net_device *dev, unsigned int txqueue) { int i; struct corkscrew_private *vp = netdev_priv(dev); diff --git a/drivers/net/ethernet/3com/3c574_cs.c b/drivers/net/ethernet/3com/3c574_cs.c index 3044a6f35f04..ef1c3151fbb2 100644 --- a/drivers/net/ethernet/3com/3c574_cs.c +++ b/drivers/net/ethernet/3com/3c574_cs.c @@ -234,7 +234,7 @@ static void update_stats(struct net_device *dev); static struct net_device_stats *el3_get_stats(struct net_device *dev); static int el3_rx(struct net_device *dev, int worklimit); static int el3_close(struct net_device *dev); -static void el3_tx_timeout(struct net_device *dev); +static void el3_tx_timeout(struct net_device *dev, unsigned int txqueue); static int el3_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static void set_rx_mode(struct net_device *dev); static void set_multicast_list(struct net_device *dev); @@ -690,7 +690,7 @@ static int el3_open(struct net_device *dev) return 0; } -static void el3_tx_timeout(struct net_device *dev) +static void el3_tx_timeout(struct net_device *dev, unsigned int txqueue) { unsigned int ioaddr = dev->base_addr; diff --git a/drivers/net/ethernet/3com/3c589_cs.c b/drivers/net/ethernet/3com/3c589_cs.c index 2b2695311bda..d47cde6c5f08 100644 --- a/drivers/net/ethernet/3com/3c589_cs.c +++ b/drivers/net/ethernet/3com/3c589_cs.c @@ -173,7 +173,7 @@ static void update_stats(struct net_device *dev); static struct net_device_stats *el3_get_stats(struct net_device *dev); static int el3_rx(struct net_device *dev); static int el3_close(struct net_device *dev); -static void el3_tx_timeout(struct net_device *dev); +static void el3_tx_timeout(struct net_device *dev, unsigned int txqueue); static void set_rx_mode(struct net_device *dev); static void set_multicast_list(struct net_device *dev); static const struct ethtool_ops netdev_ethtool_ops; @@ -526,7 +526,7 @@ static int el3_open(struct net_device *dev) return 0; } -static void el3_tx_timeout(struct net_device *dev) +static void el3_tx_timeout(struct net_device *dev, unsigned int txqueue) { unsigned int ioaddr = dev->base_addr; diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c index 8785c2ff3825..fc046797c0ea 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -776,7 +776,7 @@ static void set_rx_mode(struct net_device *dev); #ifdef CONFIG_PCI static int vortex_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); #endif -static void vortex_tx_timeout(struct net_device *dev); +static void vortex_tx_timeout(struct net_device *dev, unsigned int txqueue); static void acpi_set_WOL(struct net_device *dev); static const struct ethtool_ops vortex_ethtool_ops; static void set_8021q_mode(struct net_device *dev, int enable); @@ -1877,7 +1877,7 @@ leave_media_alone: iowrite16(FakeIntr, ioaddr + EL3_CMD); } -static void vortex_tx_timeout(struct net_device *dev) +static void vortex_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct vortex_private *vp = netdev_priv(dev); void __iomem *ioaddr = vp->ioaddr; diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c index be823c186517..14fce6658106 100644 --- a/drivers/net/ethernet/3com/typhoon.c +++ b/drivers/net/ethernet/3com/typhoon.c @@ -2013,7 +2013,7 @@ typhoon_stop_runtime(struct typhoon *tp, int wait_type) } static void -typhoon_tx_timeout(struct net_device *dev) +typhoon_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct typhoon *tp = netdev_priv(dev); diff --git a/drivers/net/ethernet/8390/8390.c b/drivers/net/ethernet/8390/8390.c index 78f3e532c600..0e0aa4016858 100644 --- a/drivers/net/ethernet/8390/8390.c +++ b/drivers/net/ethernet/8390/8390.c @@ -36,9 +36,9 @@ void ei_set_multicast_list(struct net_device *dev) } EXPORT_SYMBOL(ei_set_multicast_list); -void ei_tx_timeout(struct net_device *dev) +void ei_tx_timeout(struct net_device *dev, unsigned int txqueue) { - __ei_tx_timeout(dev); + __ei_tx_timeout(dev, txqueue); } EXPORT_SYMBOL(ei_tx_timeout); diff --git a/drivers/net/ethernet/8390/8390.h b/drivers/net/ethernet/8390/8390.h index 3e2f2c2e7b58..529c728f334a 100644 --- a/drivers/net/ethernet/8390/8390.h +++ b/drivers/net/ethernet/8390/8390.h @@ -32,7 +32,7 @@ void NS8390_init(struct net_device *dev, int startp); int ei_open(struct net_device *dev); int ei_close(struct net_device *dev); irqreturn_t ei_interrupt(int irq, void *dev_id); -void ei_tx_timeout(struct net_device *dev); +void ei_tx_timeout(struct net_device *dev, unsigned int txqueue); netdev_tx_t ei_start_xmit(struct sk_buff *skb, struct net_device *dev); void ei_set_multicast_list(struct net_device *dev); struct net_device_stats *ei_get_stats(struct net_device *dev); @@ -50,7 +50,7 @@ void NS8390p_init(struct net_device *dev, int startp); int eip_open(struct net_device *dev); int eip_close(struct net_device *dev); irqreturn_t eip_interrupt(int irq, void *dev_id); -void eip_tx_timeout(struct net_device *dev); +void eip_tx_timeout(struct net_device *dev, unsigned int txqueue); netdev_tx_t eip_start_xmit(struct sk_buff *skb, struct net_device *dev); void eip_set_multicast_list(struct net_device *dev); struct net_device_stats *eip_get_stats(struct net_device *dev); diff --git a/drivers/net/ethernet/8390/8390p.c b/drivers/net/ethernet/8390/8390p.c index 6cf36992a2c6..6834742057b3 100644 --- a/drivers/net/ethernet/8390/8390p.c +++ b/drivers/net/ethernet/8390/8390p.c @@ -41,9 +41,9 @@ void eip_set_multicast_list(struct net_device *dev) } EXPORT_SYMBOL(eip_set_multicast_list); -void eip_tx_timeout(struct net_device *dev) +void eip_tx_timeout(struct net_device *dev, unsigned int txqueue) { - __ei_tx_timeout(dev); + __ei_tx_timeout(dev, txqueue); } EXPORT_SYMBOL(eip_tx_timeout); diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c index 0b6bbf63f7ca..aeae7966a082 100644 --- a/drivers/net/ethernet/8390/axnet_cs.c +++ b/drivers/net/ethernet/8390/axnet_cs.c @@ -83,7 +83,7 @@ static netdev_tx_t axnet_start_xmit(struct sk_buff *skb, struct net_device *dev); static struct net_device_stats *get_stats(struct net_device *dev); static void set_multicast_list(struct net_device *dev); -static void axnet_tx_timeout(struct net_device *dev); +static void axnet_tx_timeout(struct net_device *dev, unsigned int txqueue); static irqreturn_t ei_irq_wrapper(int irq, void *dev_id); static void ei_watchdog(struct timer_list *t); static void axnet_reset_8390(struct net_device *dev); @@ -903,7 +903,7 @@ static int ax_close(struct net_device *dev) * completed (or failed) - i.e. never posted a Tx related interrupt. */ -static void axnet_tx_timeout(struct net_device *dev) +static void axnet_tx_timeout(struct net_device *dev, unsigned int txqueue) { long e8390_base = dev->base_addr; struct ei_device *ei_local = netdev_priv(dev); diff --git a/drivers/net/ethernet/8390/lib8390.c b/drivers/net/ethernet/8390/lib8390.c index c9c55c9eab9f..babc92e2692e 100644 --- a/drivers/net/ethernet/8390/lib8390.c +++ b/drivers/net/ethernet/8390/lib8390.c @@ -251,7 +251,7 @@ static int __ei_close(struct net_device *dev) * completed (or failed) - i.e. never posted a Tx related interrupt. */ -static void __ei_tx_timeout(struct net_device *dev) +static void __ei_tx_timeout(struct net_device *dev, unsigned int txqueue) { unsigned long e8390_base = dev->base_addr; struct ei_device *ei_local = netdev_priv(dev); diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c index 816540e6beac..165d18405b0c 100644 --- a/drivers/net/ethernet/adaptec/starfire.c +++ b/drivers/net/ethernet/adaptec/starfire.c @@ -576,7 +576,7 @@ static int mdio_read(struct net_device *dev, int phy_id, int location); static void mdio_write(struct net_device *dev, int phy_id, int location, int value); static int netdev_open(struct net_device *dev); static void check_duplex(struct net_device *dev); -static void tx_timeout(struct net_device *dev); +static void tx_timeout(struct net_device *dev, unsigned int txqueue); static void init_ring(struct net_device *dev); static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev); static irqreturn_t intr_handler(int irq, void *dev_instance); @@ -1105,7 +1105,7 @@ static void check_duplex(struct net_device *dev) } -static void tx_timeout(struct net_device *dev) +static void tx_timeout(struct net_device *dev, unsigned int txqueue) { struct netdev_private *np = netdev_priv(dev); void __iomem *ioaddr = np->base; diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c index 174344c450af..3c51d8c502ed 100644 --- a/drivers/net/ethernet/agere/et131x.c +++ b/drivers/net/ethernet/agere/et131x.c @@ -3811,7 +3811,7 @@ drop_err: * specified by the 'tx_timeo" element in the net_device structure (see * et131x_alloc_device() to see how this value is set). */ -static void et131x_tx_timeout(struct net_device *netdev) +static void et131x_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct et131x_adapter *adapter = netdev_priv(netdev); struct tx_ring *tx_ring = &adapter->tx_ring; diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c index 0537df06a9b5..5ea806423e4c 100644 --- a/drivers/net/ethernet/allwinner/sun4i-emac.c +++ b/drivers/net/ethernet/allwinner/sun4i-emac.c @@ -407,7 +407,7 @@ static void emac_init_device(struct net_device *dev) } /* Our watchdog timed out. Called by the networking layer */ -static void emac_timeout(struct net_device *dev) +static void emac_timeout(struct net_device *dev, unsigned int txqueue) { struct emac_board_info *db = netdev_priv(dev); unsigned long flags; diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c index 46b4207d3266..f366faf88eee 100644 --- a/drivers/net/ethernet/alteon/acenic.c +++ b/drivers/net/ethernet/alteon/acenic.c @@ -437,7 +437,7 @@ static const struct ethtool_ops ace_ethtool_ops = { .set_link_ksettings = ace_set_link_ksettings, }; -static void ace_watchdog(struct net_device *dev); +static void ace_watchdog(struct net_device *dev, unsigned int txqueue); static const struct net_device_ops ace_netdev_ops = { .ndo_open = ace_open, @@ -1542,7 +1542,7 @@ static void ace_set_rxtx_parms(struct net_device *dev, int jumbo) } -static void ace_watchdog(struct net_device *data) +static void ace_watchdog(struct net_device *data, unsigned int txqueue) { struct net_device *dev = data; struct ace_private *ap = netdev_priv(dev); diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index a3250dcf7d53..745fffd422aa 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -744,7 +744,9 @@ static int ena_set_channels(struct net_device *netdev, struct ena_adapter *adapter = netdev_priv(netdev); u32 count = channels->combined_count; /* The check for max value is already done in ethtool */ - if (count < ENA_MIN_NUM_IO_QUEUES) + if (count < ENA_MIN_NUM_IO_QUEUES || + (ena_xdp_present(adapter) && + !ena_xdp_legal_queue_count(adapter, channels->combined_count))) return -EINVAL; return ena_update_queue_count(adapter, count); diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index d46a912002ff..26954fde4766 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -36,7 +36,6 @@ #include <linux/cpu_rmap.h> #endif /* CONFIG_RFS_ACCEL */ #include <linux/ethtool.h> -#include <linux/if_vlan.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/numa.h> @@ -47,6 +46,7 @@ #include <net/ip.h> #include "ena_netdev.h" +#include <linux/bpf_trace.h> #include "ena_pci_id_tbl.h" static char version[] = DEVICE_NAME " v" DRV_MODULE_VERSION "\n"; @@ -78,7 +78,37 @@ static void check_for_admin_com_state(struct ena_adapter *adapter); static void ena_destroy_device(struct ena_adapter *adapter, bool graceful); static int ena_restore_device(struct ena_adapter *adapter); -static void ena_tx_timeout(struct net_device *dev) +static void ena_init_io_rings(struct ena_adapter *adapter, + int first_index, int count); +static void ena_init_napi_in_range(struct ena_adapter *adapter, int first_index, + int count); +static void ena_del_napi_in_range(struct ena_adapter *adapter, int first_index, + int count); +static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid); +static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter, + int first_index, + int count); +static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid); +static void ena_free_tx_resources(struct ena_adapter *adapter, int qid); +static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget); +static void ena_destroy_all_tx_queues(struct ena_adapter *adapter); +static void ena_free_all_io_tx_resources(struct ena_adapter *adapter); +static void ena_napi_disable_in_range(struct ena_adapter *adapter, + int first_index, int count); +static void ena_napi_enable_in_range(struct ena_adapter *adapter, + int first_index, int count); +static int ena_up(struct ena_adapter *adapter); +static void ena_down(struct ena_adapter *adapter); +static void ena_unmask_interrupt(struct ena_ring *tx_ring, + struct ena_ring *rx_ring); +static void ena_update_ring_numa_node(struct ena_ring *tx_ring, + struct ena_ring *rx_ring); +static void ena_unmap_tx_buff(struct ena_ring *tx_ring, + struct ena_tx_buffer *tx_info); +static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter, + int first_index, int count); + +static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct ena_adapter *adapter = netdev_priv(dev); @@ -123,6 +153,451 @@ static int ena_change_mtu(struct net_device *dev, int new_mtu) return ret; } +static int ena_xmit_common(struct net_device *dev, + struct ena_ring *ring, + struct ena_tx_buffer *tx_info, + struct ena_com_tx_ctx *ena_tx_ctx, + u16 next_to_use, + u32 bytes) +{ + struct ena_adapter *adapter = netdev_priv(dev); + int rc, nb_hw_desc; + + if (unlikely(ena_com_is_doorbell_needed(ring->ena_com_io_sq, + ena_tx_ctx))) { + netif_dbg(adapter, tx_queued, dev, + "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n", + ring->qid); + ena_com_write_sq_doorbell(ring->ena_com_io_sq); + } + + /* prepare the packet's descriptors to dma engine */ + rc = ena_com_prepare_tx(ring->ena_com_io_sq, ena_tx_ctx, + &nb_hw_desc); + + /* In case there isn't enough space in the queue for the packet, + * we simply drop it. All other failure reasons of + * ena_com_prepare_tx() are fatal and therefore require a device reset. + */ + if (unlikely(rc)) { + netif_err(adapter, tx_queued, dev, + "failed to prepare tx bufs\n"); + u64_stats_update_begin(&ring->syncp); + ring->tx_stats.prepare_ctx_err++; + u64_stats_update_end(&ring->syncp); + if (rc != -ENOMEM) { + adapter->reset_reason = + ENA_REGS_RESET_DRIVER_INVALID_STATE; + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + } + return rc; + } + + u64_stats_update_begin(&ring->syncp); + ring->tx_stats.cnt++; + ring->tx_stats.bytes += bytes; + u64_stats_update_end(&ring->syncp); + + tx_info->tx_descs = nb_hw_desc; + tx_info->last_jiffies = jiffies; + tx_info->print_once = 0; + + ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use, + ring->ring_size); + return 0; +} + +/* This is the XDP napi callback. XDP queues use a separate napi callback + * than Rx/Tx queues. + */ +static int ena_xdp_io_poll(struct napi_struct *napi, int budget) +{ + struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi); + u32 xdp_work_done, xdp_budget; + struct ena_ring *xdp_ring; + int napi_comp_call = 0; + int ret; + + xdp_ring = ena_napi->xdp_ring; + xdp_ring->first_interrupt = ena_napi->first_interrupt; + + xdp_budget = budget; + + if (!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags) || + test_bit(ENA_FLAG_TRIGGER_RESET, &xdp_ring->adapter->flags)) { + napi_complete_done(napi, 0); + return 0; + } + + xdp_work_done = ena_clean_xdp_irq(xdp_ring, xdp_budget); + + /* If the device is about to reset or down, avoid unmask + * the interrupt and return 0 so NAPI won't reschedule + */ + if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags))) { + napi_complete_done(napi, 0); + ret = 0; + } else if (xdp_budget > xdp_work_done) { + napi_comp_call = 1; + if (napi_complete_done(napi, xdp_work_done)) + ena_unmask_interrupt(xdp_ring, NULL); + ena_update_ring_numa_node(xdp_ring, NULL); + ret = xdp_work_done; + } else { + ret = xdp_budget; + } + + u64_stats_update_begin(&xdp_ring->syncp); + xdp_ring->tx_stats.napi_comp += napi_comp_call; + xdp_ring->tx_stats.tx_poll++; + u64_stats_update_end(&xdp_ring->syncp); + + return ret; +} + +static int ena_xdp_tx_map_buff(struct ena_ring *xdp_ring, + struct ena_tx_buffer *tx_info, + struct xdp_buff *xdp, + void **push_hdr, + u32 *push_len) +{ + struct ena_adapter *adapter = xdp_ring->adapter; + struct ena_com_buf *ena_buf; + dma_addr_t dma = 0; + u32 size; + + tx_info->xdpf = convert_to_xdp_frame(xdp); + size = tx_info->xdpf->len; + ena_buf = tx_info->bufs; + + /* llq push buffer */ + *push_len = min_t(u32, size, xdp_ring->tx_max_header_size); + *push_hdr = tx_info->xdpf->data; + + if (size - *push_len > 0) { + dma = dma_map_single(xdp_ring->dev, + *push_hdr + *push_len, + size - *push_len, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(xdp_ring->dev, dma))) + goto error_report_dma_error; + + tx_info->map_linear_data = 1; + tx_info->num_of_bufs = 1; + } + + ena_buf->paddr = dma; + ena_buf->len = size; + + return 0; + +error_report_dma_error: + u64_stats_update_begin(&xdp_ring->syncp); + xdp_ring->tx_stats.dma_mapping_err++; + u64_stats_update_end(&xdp_ring->syncp); + netdev_warn(adapter->netdev, "failed to map xdp buff\n"); + + xdp_return_frame_rx_napi(tx_info->xdpf); + tx_info->xdpf = NULL; + tx_info->num_of_bufs = 0; + + return -EINVAL; +} + +static int ena_xdp_xmit_buff(struct net_device *dev, + struct xdp_buff *xdp, + int qid, + struct ena_rx_buffer *rx_info) +{ + struct ena_adapter *adapter = netdev_priv(dev); + struct ena_com_tx_ctx ena_tx_ctx = {0}; + struct ena_tx_buffer *tx_info; + struct ena_ring *xdp_ring; + struct ena_ring *rx_ring; + u16 next_to_use, req_id; + int rc; + void *push_hdr; + u32 push_len; + + xdp_ring = &adapter->tx_ring[qid]; + next_to_use = xdp_ring->next_to_use; + req_id = xdp_ring->free_ids[next_to_use]; + tx_info = &xdp_ring->tx_buffer_info[req_id]; + tx_info->num_of_bufs = 0; + rx_ring = &xdp_ring->adapter->rx_ring[qid - + xdp_ring->adapter->xdp_first_ring]; + page_ref_inc(rx_info->page); + tx_info->xdp_rx_page = rx_info->page; + + rc = ena_xdp_tx_map_buff(xdp_ring, tx_info, xdp, &push_hdr, &push_len); + if (unlikely(rc)) + goto error_drop_packet; + + ena_tx_ctx.ena_bufs = tx_info->bufs; + ena_tx_ctx.push_header = push_hdr; + ena_tx_ctx.num_bufs = tx_info->num_of_bufs; + ena_tx_ctx.req_id = req_id; + ena_tx_ctx.header_len = push_len; + + rc = ena_xmit_common(dev, + xdp_ring, + tx_info, + &ena_tx_ctx, + next_to_use, + xdp->data_end - xdp->data); + if (rc) + goto error_unmap_dma; + /* trigger the dma engine. ena_com_write_sq_doorbell() + * has a mb + */ + ena_com_write_sq_doorbell(xdp_ring->ena_com_io_sq); + u64_stats_update_begin(&xdp_ring->syncp); + xdp_ring->tx_stats.doorbells++; + u64_stats_update_end(&xdp_ring->syncp); + + return NETDEV_TX_OK; + +error_unmap_dma: + ena_unmap_tx_buff(xdp_ring, tx_info); + tx_info->xdpf = NULL; +error_drop_packet: + + return NETDEV_TX_OK; +} + +static int ena_xdp_execute(struct ena_ring *rx_ring, + struct xdp_buff *xdp, + struct ena_rx_buffer *rx_info) +{ + struct bpf_prog *xdp_prog; + u32 verdict = XDP_PASS; + + rcu_read_lock(); + xdp_prog = READ_ONCE(rx_ring->xdp_bpf_prog); + + if (!xdp_prog) + goto out; + + verdict = bpf_prog_run_xdp(xdp_prog, xdp); + + if (verdict == XDP_TX) + ena_xdp_xmit_buff(rx_ring->netdev, + xdp, + rx_ring->qid + rx_ring->adapter->num_io_queues, + rx_info); + else if (unlikely(verdict == XDP_ABORTED)) + trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); + else if (unlikely(verdict > XDP_TX)) + bpf_warn_invalid_xdp_action(verdict); +out: + rcu_read_unlock(); + return verdict; +} + +static void ena_init_all_xdp_queues(struct ena_adapter *adapter) +{ + adapter->xdp_first_ring = adapter->num_io_queues; + adapter->xdp_num_queues = adapter->num_io_queues; + + ena_init_io_rings(adapter, + adapter->xdp_first_ring, + adapter->xdp_num_queues); +} + +static int ena_setup_and_create_all_xdp_queues(struct ena_adapter *adapter) +{ + int rc = 0; + + rc = ena_setup_tx_resources_in_range(adapter, adapter->xdp_first_ring, + adapter->xdp_num_queues); + if (rc) + goto setup_err; + + rc = ena_create_io_tx_queues_in_range(adapter, + adapter->xdp_first_ring, + adapter->xdp_num_queues); + if (rc) + goto create_err; + + return 0; + +create_err: + ena_free_all_io_tx_resources(adapter); +setup_err: + return rc; +} + +/* Provides a way for both kernel and bpf-prog to know + * more about the RX-queue a given XDP frame arrived on. + */ +static int ena_xdp_register_rxq_info(struct ena_ring *rx_ring) +{ + int rc; + + rc = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, rx_ring->qid); + + if (rc) { + netif_err(rx_ring->adapter, ifup, rx_ring->netdev, + "Failed to register xdp rx queue info. RX queue num %d rc: %d\n", + rx_ring->qid, rc); + goto err; + } + + rc = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, MEM_TYPE_PAGE_SHARED, + NULL); + + if (rc) { + netif_err(rx_ring->adapter, ifup, rx_ring->netdev, + "Failed to register xdp rx queue info memory model. RX queue num %d rc: %d\n", + rx_ring->qid, rc); + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); + } + +err: + return rc; +} + +static void ena_xdp_unregister_rxq_info(struct ena_ring *rx_ring) +{ + xdp_rxq_info_unreg_mem_model(&rx_ring->xdp_rxq); + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); +} + +void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter, + struct bpf_prog *prog, + int first, + int count) +{ + struct ena_ring *rx_ring; + int i = 0; + + for (i = first; i < count; i++) { + rx_ring = &adapter->rx_ring[i]; + xchg(&rx_ring->xdp_bpf_prog, prog); + if (prog) { + ena_xdp_register_rxq_info(rx_ring); + rx_ring->rx_headroom = XDP_PACKET_HEADROOM; + } else { + ena_xdp_unregister_rxq_info(rx_ring); + rx_ring->rx_headroom = 0; + } + } +} + +void ena_xdp_exchange_program(struct ena_adapter *adapter, + struct bpf_prog *prog) +{ + struct bpf_prog *old_bpf_prog = xchg(&adapter->xdp_bpf_prog, prog); + + ena_xdp_exchange_program_rx_in_range(adapter, + prog, + 0, + adapter->num_io_queues); + + if (old_bpf_prog) + bpf_prog_put(old_bpf_prog); +} + +static int ena_destroy_and_free_all_xdp_queues(struct ena_adapter *adapter) +{ + bool was_up; + int rc; + + was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); + + if (was_up) + ena_down(adapter); + + adapter->xdp_first_ring = 0; + adapter->xdp_num_queues = 0; + ena_xdp_exchange_program(adapter, NULL); + if (was_up) { + rc = ena_up(adapter); + if (rc) + return rc; + } + return 0; +} + +static int ena_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + struct bpf_prog *prog = bpf->prog; + struct bpf_prog *old_bpf_prog; + int rc, prev_mtu; + bool is_up; + + is_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); + rc = ena_xdp_allowed(adapter); + if (rc == ENA_XDP_ALLOWED) { + old_bpf_prog = adapter->xdp_bpf_prog; + if (prog) { + if (!is_up) { + ena_init_all_xdp_queues(adapter); + } else if (!old_bpf_prog) { + ena_down(adapter); + ena_init_all_xdp_queues(adapter); + } + ena_xdp_exchange_program(adapter, prog); + + if (is_up && !old_bpf_prog) { + rc = ena_up(adapter); + if (rc) + return rc; + } + } else if (old_bpf_prog) { + rc = ena_destroy_and_free_all_xdp_queues(adapter); + if (rc) + return rc; + } + + prev_mtu = netdev->max_mtu; + netdev->max_mtu = prog ? ENA_XDP_MAX_MTU : adapter->max_mtu; + + if (!old_bpf_prog) + netif_info(adapter, drv, adapter->netdev, + "xdp program set, changing the max_mtu from %d to %d", + prev_mtu, netdev->max_mtu); + + } else if (rc == ENA_XDP_CURRENT_MTU_TOO_LARGE) { + netif_err(adapter, drv, adapter->netdev, + "Failed to set xdp program, the current MTU (%d) is larger than the maximum allowed MTU (%lu) while xdp is on", + netdev->mtu, ENA_XDP_MAX_MTU); + NL_SET_ERR_MSG_MOD(bpf->extack, + "Failed to set xdp program, the current MTU is larger than the maximum allowed MTU. Check the dmesg for more info"); + return -EINVAL; + } else if (rc == ENA_XDP_NO_ENOUGH_QUEUES) { + netif_err(adapter, drv, adapter->netdev, + "Failed to set xdp program, the Rx/Tx channel count should be at most half of the maximum allowed channel count. The current queue count (%d), the maximal queue count (%d)\n", + adapter->num_io_queues, adapter->max_num_io_queues); + NL_SET_ERR_MSG_MOD(bpf->extack, + "Failed to set xdp program, there is no enough space for allocating XDP queues, Check the dmesg for more info"); + return -EINVAL; + } + + return 0; +} + +/* This is the main xdp callback, it's used by the kernel to set/unset the xdp + * program as well as to query the current xdp program id. + */ +static int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + + switch (bpf->command) { + case XDP_SETUP_PROG: + return ena_xdp_set(netdev, bpf); + case XDP_QUERY_PROG: + bpf->prog_id = adapter->xdp_bpf_prog ? + adapter->xdp_bpf_prog->aux->id : 0; + break; + default: + return -EINVAL; + } + return 0; +} + static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter) { #ifdef CONFIG_RFS_ACCEL @@ -164,7 +639,8 @@ static void ena_init_io_rings_common(struct ena_adapter *adapter, u64_stats_init(&ring->syncp); } -static void ena_init_io_rings(struct ena_adapter *adapter) +static void ena_init_io_rings(struct ena_adapter *adapter, + int first_index, int count) { struct ena_com_dev *ena_dev; struct ena_ring *txr, *rxr; @@ -172,13 +648,12 @@ static void ena_init_io_rings(struct ena_adapter *adapter) ena_dev = adapter->ena_dev; - for (i = 0; i < adapter->num_io_queues; i++) { + for (i = first_index; i < first_index + count; i++) { txr = &adapter->tx_ring[i]; rxr = &adapter->rx_ring[i]; - /* TX/RX common ring state */ + /* TX common ring state */ ena_init_io_rings_common(adapter, txr, i); - ena_init_io_rings_common(adapter, rxr, i); /* TX specific ring state */ txr->ring_size = adapter->requested_tx_ring_size; @@ -188,14 +663,20 @@ static void ena_init_io_rings(struct ena_adapter *adapter) txr->smoothed_interval = ena_com_get_nonadaptive_moderation_interval_tx(ena_dev); - /* RX specific ring state */ - rxr->ring_size = adapter->requested_rx_ring_size; - rxr->rx_copybreak = adapter->rx_copybreak; - rxr->sgl_size = adapter->max_rx_sgl_size; - rxr->smoothed_interval = - ena_com_get_nonadaptive_moderation_interval_rx(ena_dev); - rxr->empty_rx_queue = 0; - adapter->ena_napi[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + /* Don't init RX queues for xdp queues */ + if (!ENA_IS_XDP_INDEX(adapter, i)) { + /* RX common ring state */ + ena_init_io_rings_common(adapter, rxr, i); + + /* RX specific ring state */ + rxr->ring_size = adapter->requested_rx_ring_size; + rxr->rx_copybreak = adapter->rx_copybreak; + rxr->sgl_size = adapter->max_rx_sgl_size; + rxr->smoothed_interval = + ena_com_get_nonadaptive_moderation_interval_rx(ena_dev); + rxr->empty_rx_queue = 0; + adapter->ena_napi[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + } } } @@ -285,16 +766,13 @@ static void ena_free_tx_resources(struct ena_adapter *adapter, int qid) tx_ring->push_buf_intermediate_buf = NULL; } -/* ena_setup_all_tx_resources - allocate I/O Tx queues resources for All queues - * @adapter: private structure - * - * Return 0 on success, negative on failure - */ -static int ena_setup_all_tx_resources(struct ena_adapter *adapter) +static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter, + int first_index, + int count) { int i, rc = 0; - for (i = 0; i < adapter->num_io_queues; i++) { + for (i = first_index; i < first_index + count; i++) { rc = ena_setup_tx_resources(adapter, i); if (rc) goto err_setup_tx; @@ -308,11 +786,20 @@ err_setup_tx: "Tx queue %d: allocation failed\n", i); /* rewind the index freeing the rings as we go */ - while (i--) + while (first_index < i--) ena_free_tx_resources(adapter, i); return rc; } +static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter, + int first_index, int count) +{ + int i; + + for (i = first_index; i < first_index + count; i++) + ena_free_tx_resources(adapter, i); +} + /* ena_free_all_io_tx_resources - Free I/O Tx Resources for All Queues * @adapter: board private structure * @@ -320,10 +807,10 @@ err_setup_tx: */ static void ena_free_all_io_tx_resources(struct ena_adapter *adapter) { - int i; - - for (i = 0; i < adapter->num_io_queues; i++) - ena_free_tx_resources(adapter, i); + ena_free_all_io_tx_resources_in_range(adapter, + 0, + adapter->xdp_num_queues + + adapter->num_io_queues); } static int validate_rx_req_id(struct ena_ring *rx_ring, u16 req_id) @@ -495,8 +982,8 @@ static int ena_alloc_rx_page(struct ena_ring *rx_ring, rx_info->page = page; rx_info->page_offset = 0; ena_buf = &rx_info->ena_buf; - ena_buf->paddr = dma; - ena_buf->len = ENA_PAGE_SIZE; + ena_buf->paddr = dma + rx_ring->rx_headroom; + ena_buf->len = ENA_PAGE_SIZE - rx_ring->rx_headroom; return 0; } @@ -513,7 +1000,9 @@ static void ena_free_rx_page(struct ena_ring *rx_ring, return; } - dma_unmap_page(rx_ring->dev, ena_buf->paddr, ENA_PAGE_SIZE, + dma_unmap_page(rx_ring->dev, + ena_buf->paddr - rx_ring->rx_headroom, + ENA_PAGE_SIZE, DMA_FROM_DEVICE); __free_page(page); @@ -620,8 +1109,8 @@ static void ena_free_all_rx_bufs(struct ena_adapter *adapter) ena_free_rx_bufs(adapter, i); } -static void ena_unmap_tx_skb(struct ena_ring *tx_ring, - struct ena_tx_buffer *tx_info) +static void ena_unmap_tx_buff(struct ena_ring *tx_ring, + struct ena_tx_buffer *tx_info) { struct ena_com_buf *ena_buf; u32 cnt; @@ -675,7 +1164,7 @@ static void ena_free_tx_bufs(struct ena_ring *tx_ring) tx_ring->qid, i); } - ena_unmap_tx_skb(tx_ring, tx_info); + ena_unmap_tx_buff(tx_ring, tx_info); dev_kfree_skb_any(tx_info->skb); } @@ -688,7 +1177,7 @@ static void ena_free_all_tx_bufs(struct ena_adapter *adapter) struct ena_ring *tx_ring; int i; - for (i = 0; i < adapter->num_io_queues; i++) { + for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) { tx_ring = &adapter->tx_ring[i]; ena_free_tx_bufs(tx_ring); } @@ -699,7 +1188,7 @@ static void ena_destroy_all_tx_queues(struct ena_adapter *adapter) u16 ena_qid; int i; - for (i = 0; i < adapter->num_io_queues; i++) { + for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) { ena_qid = ENA_IO_TXQ_IDX(i); ena_com_destroy_io_queue(adapter->ena_dev, ena_qid); } @@ -723,6 +1212,32 @@ static void ena_destroy_all_io_queues(struct ena_adapter *adapter) ena_destroy_all_rx_queues(adapter); } +static int handle_invalid_req_id(struct ena_ring *ring, u16 req_id, + struct ena_tx_buffer *tx_info, bool is_xdp) +{ + if (tx_info) + netif_err(ring->adapter, + tx_done, + ring->netdev, + "tx_info doesn't have valid %s", + is_xdp ? "xdp frame" : "skb"); + else + netif_err(ring->adapter, + tx_done, + ring->netdev, + "Invalid req_id: %hu\n", + req_id); + + u64_stats_update_begin(&ring->syncp); + ring->tx_stats.bad_req_id++; + u64_stats_update_end(&ring->syncp); + + /* Trigger device reset */ + ring->adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID; + set_bit(ENA_FLAG_TRIGGER_RESET, &ring->adapter->flags); + return -EFAULT; +} + static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) { struct ena_tx_buffer *tx_info = NULL; @@ -733,21 +1248,20 @@ static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) return 0; } - if (tx_info) - netif_err(tx_ring->adapter, tx_done, tx_ring->netdev, - "tx_info doesn't have valid skb\n"); - else - netif_err(tx_ring->adapter, tx_done, tx_ring->netdev, - "Invalid req_id: %hu\n", req_id); + return handle_invalid_req_id(tx_ring, req_id, tx_info, false); +} - u64_stats_update_begin(&tx_ring->syncp); - tx_ring->tx_stats.bad_req_id++; - u64_stats_update_end(&tx_ring->syncp); +static int validate_xdp_req_id(struct ena_ring *xdp_ring, u16 req_id) +{ + struct ena_tx_buffer *tx_info = NULL; - /* Trigger device reset */ - tx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID; - set_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags); - return -EFAULT; + if (likely(req_id < xdp_ring->ring_size)) { + tx_info = &xdp_ring->tx_buffer_info[req_id]; + if (likely(tx_info->xdpf)) + return 0; + } + + return handle_invalid_req_id(xdp_ring, req_id, tx_info, true); } static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget) @@ -786,7 +1300,7 @@ static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget) tx_info->skb = NULL; tx_info->last_jiffies = 0; - ena_unmap_tx_skb(tx_ring, tx_info); + ena_unmap_tx_buff(tx_ring, tx_info); netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev, "tx_poll: q %d skb %p completed\n", tx_ring->qid, @@ -1037,6 +1551,33 @@ static void ena_set_rx_hash(struct ena_ring *rx_ring, } } +int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp) +{ + struct ena_rx_buffer *rx_info; + int ret; + + rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]; + xdp->data = page_address(rx_info->page) + + rx_info->page_offset + rx_ring->rx_headroom; + xdp_set_data_meta_invalid(xdp); + xdp->data_hard_start = page_address(rx_info->page); + xdp->data_end = xdp->data + rx_ring->ena_bufs[0].len; + /* If for some reason we received a bigger packet than + * we expect, then we simply drop it + */ + if (unlikely(rx_ring->ena_bufs[0].len > ENA_XDP_MAX_MTU)) + return XDP_DROP; + + ret = ena_xdp_execute(rx_ring, xdp, rx_info); + + /* The xdp program might expand the headers */ + if (ret == XDP_PASS) { + rx_info->page_offset = xdp->data - xdp->data_hard_start; + rx_ring->ena_bufs[0].len = xdp->data_end - xdp->data; + } + + return ret; +} /* ena_clean_rx_irq - Cleanup RX irq * @rx_ring: RX ring to clean * @napi: napi handler @@ -1048,23 +1589,27 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, u32 budget) { u16 next_to_clean = rx_ring->next_to_clean; - u32 res_budget, work_done; - struct ena_com_rx_ctx ena_rx_ctx; struct ena_adapter *adapter; + u32 res_budget, work_done; + int rx_copybreak_pkt = 0; + int refill_threshold; struct sk_buff *skb; int refill_required; - int refill_threshold; - int rc = 0; + struct xdp_buff xdp; int total_len = 0; - int rx_copybreak_pkt = 0; + int xdp_verdict; + int rc = 0; int i; netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, "%s qid %d\n", __func__, rx_ring->qid); res_budget = budget; + xdp.rxq = &rx_ring->xdp_rxq; do { + xdp_verdict = XDP_PASS; + skb = NULL; ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; ena_rx_ctx.max_bufs = rx_ring->sgl_size; ena_rx_ctx.descs = 0; @@ -1082,12 +1627,22 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto, ena_rx_ctx.l4_proto, ena_rx_ctx.hash); + if (ena_xdp_present_ring(rx_ring)) + xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp); + /* allocate skb and fill it */ - skb = ena_rx_skb(rx_ring, rx_ring->ena_bufs, ena_rx_ctx.descs, - &next_to_clean); + if (xdp_verdict == XDP_PASS) + skb = ena_rx_skb(rx_ring, + rx_ring->ena_bufs, + ena_rx_ctx.descs, + &next_to_clean); - /* exit if we failed to retrieve a buffer */ if (unlikely(!skb)) { + if (xdp_verdict == XDP_TX) { + ena_free_rx_page(rx_ring, + &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]); + res_budget--; + } for (i = 0; i < ena_rx_ctx.descs; i++) { rx_ring->free_ids[next_to_clean] = rx_ring->ena_bufs[i].req_id; @@ -1095,6 +1650,8 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, ENA_RX_RING_IDX_NEXT(next_to_clean, rx_ring->ring_size); } + if (xdp_verdict == XDP_TX || xdp_verdict == XDP_DROP) + continue; break; } @@ -1188,9 +1745,14 @@ static void ena_unmask_interrupt(struct ena_ring *tx_ring, struct ena_ring *rx_ring) { struct ena_eth_io_intr_reg intr_reg; - u32 rx_interval = ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev) ? - rx_ring->smoothed_interval : - ena_com_get_nonadaptive_moderation_interval_rx(rx_ring->ena_dev); + u32 rx_interval = 0; + /* Rx ring can be NULL when for XDP tx queues which don't have an + * accompanying rx_ring pair. + */ + if (rx_ring) + rx_interval = ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev) ? + rx_ring->smoothed_interval : + ena_com_get_nonadaptive_moderation_interval_rx(rx_ring->ena_dev); /* Update intr register: rx intr delay, * tx intr delay and interrupt unmask @@ -1203,8 +1765,9 @@ static void ena_unmask_interrupt(struct ena_ring *tx_ring, /* It is a shared MSI-X. * Tx and Rx CQ have pointer to it. * So we use one of them to reach the intr reg + * The Tx ring is used because the rx_ring is NULL for XDP queues */ - ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg); + ena_com_unmask_intr(tx_ring->ena_com_io_cq, &intr_reg); } static void ena_update_ring_numa_node(struct ena_ring *tx_ring, @@ -1222,22 +1785,82 @@ static void ena_update_ring_numa_node(struct ena_ring *tx_ring, if (numa_node != NUMA_NO_NODE) { ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node); - ena_com_update_numa_node(rx_ring->ena_com_io_cq, numa_node); + if (rx_ring) + ena_com_update_numa_node(rx_ring->ena_com_io_cq, + numa_node); } tx_ring->cpu = cpu; - rx_ring->cpu = cpu; + if (rx_ring) + rx_ring->cpu = cpu; return; out: put_cpu(); } +static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget) +{ + u32 total_done = 0; + u16 next_to_clean; + u32 tx_bytes = 0; + int tx_pkts = 0; + u16 req_id; + int rc; + + if (unlikely(!xdp_ring)) + return 0; + next_to_clean = xdp_ring->next_to_clean; + + while (tx_pkts < budget) { + struct ena_tx_buffer *tx_info; + struct xdp_frame *xdpf; + + rc = ena_com_tx_comp_req_id_get(xdp_ring->ena_com_io_cq, + &req_id); + if (rc) + break; + + rc = validate_xdp_req_id(xdp_ring, req_id); + if (rc) + break; + + tx_info = &xdp_ring->tx_buffer_info[req_id]; + xdpf = tx_info->xdpf; + + tx_info->xdpf = NULL; + tx_info->last_jiffies = 0; + ena_unmap_tx_buff(xdp_ring, tx_info); + + netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev, + "tx_poll: q %d skb %p completed\n", xdp_ring->qid, + xdpf); + + tx_bytes += xdpf->len; + tx_pkts++; + total_done += tx_info->tx_descs; + + __free_page(tx_info->xdp_rx_page); + xdp_ring->free_ids[next_to_clean] = req_id; + next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean, + xdp_ring->ring_size); + } + + xdp_ring->next_to_clean = next_to_clean; + ena_com_comp_ack(xdp_ring->ena_com_io_sq, total_done); + ena_com_update_dev_comp_head(xdp_ring->ena_com_io_cq); + + netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev, + "tx_poll: q %d done. total pkts: %d\n", + xdp_ring->qid, tx_pkts); + + return tx_pkts; +} + static int ena_io_poll(struct napi_struct *napi, int budget) { struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi); struct ena_ring *tx_ring, *rx_ring; - u32 tx_work_done; u32 rx_work_done; int tx_budget; @@ -1247,6 +1870,9 @@ static int ena_io_poll(struct napi_struct *napi, int budget) tx_ring = ena_napi->tx_ring; rx_ring = ena_napi->rx_ring; + tx_ring->first_interrupt = ena_napi->first_interrupt; + rx_ring->first_interrupt = ena_napi->first_interrupt; + tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER; if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) || @@ -1318,8 +1944,7 @@ static irqreturn_t ena_intr_msix_io(int irq, void *data) { struct ena_napi *ena_napi = data; - ena_napi->tx_ring->first_interrupt = true; - ena_napi->rx_ring->first_interrupt = true; + ena_napi->first_interrupt = true; napi_schedule_irqoff(&ena_napi->napi); @@ -1394,10 +2019,12 @@ static void ena_setup_io_intr(struct ena_adapter *adapter) { struct net_device *netdev; int irq_idx, i, cpu; + int io_queue_count; netdev = adapter->netdev; + io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues; - for (i = 0; i < adapter->num_io_queues; i++) { + for (i = 0; i < io_queue_count; i++) { irq_idx = ENA_IO_IRQ_IDX(i); cpu = i % num_online_cpus(); @@ -1525,45 +2152,64 @@ static void ena_disable_io_intr_sync(struct ena_adapter *adapter) synchronize_irq(adapter->irq_tbl[i].vector); } -static void ena_del_napi(struct ena_adapter *adapter) +static void ena_del_napi_in_range(struct ena_adapter *adapter, + int first_index, + int count) { int i; - for (i = 0; i < adapter->num_io_queues; i++) - netif_napi_del(&adapter->ena_napi[i].napi); + for (i = first_index; i < first_index + count; i++) { + /* Check if napi was initialized before */ + if (!ENA_IS_XDP_INDEX(adapter, i) || + adapter->ena_napi[i].xdp_ring) + netif_napi_del(&adapter->ena_napi[i].napi); + else + WARN_ON(ENA_IS_XDP_INDEX(adapter, i) && + adapter->ena_napi[i].xdp_ring); + } } -static void ena_init_napi(struct ena_adapter *adapter) +static void ena_init_napi_in_range(struct ena_adapter *adapter, + int first_index, int count) { - struct ena_napi *napi; + struct ena_napi *napi = {0}; int i; - for (i = 0; i < adapter->num_io_queues; i++) { + for (i = first_index; i < first_index + count; i++) { napi = &adapter->ena_napi[i]; netif_napi_add(adapter->netdev, &adapter->ena_napi[i].napi, - ena_io_poll, + ENA_IS_XDP_INDEX(adapter, i) ? ena_xdp_io_poll : ena_io_poll, ENA_NAPI_BUDGET); - napi->rx_ring = &adapter->rx_ring[i]; - napi->tx_ring = &adapter->tx_ring[i]; + + if (!ENA_IS_XDP_INDEX(adapter, i)) { + napi->rx_ring = &adapter->rx_ring[i]; + napi->tx_ring = &adapter->tx_ring[i]; + } else { + napi->xdp_ring = &adapter->tx_ring[i]; + } napi->qid = i; } } -static void ena_napi_disable_all(struct ena_adapter *adapter) +static void ena_napi_disable_in_range(struct ena_adapter *adapter, + int first_index, + int count) { int i; - for (i = 0; i < adapter->num_io_queues; i++) + for (i = first_index; i < first_index + count; i++) napi_disable(&adapter->ena_napi[i].napi); } -static void ena_napi_enable_all(struct ena_adapter *adapter) +static void ena_napi_enable_in_range(struct ena_adapter *adapter, + int first_index, + int count) { int i; - for (i = 0; i < adapter->num_io_queues; i++) + for (i = first_index; i < first_index + count; i++) napi_enable(&adapter->ena_napi[i].napi); } @@ -1578,7 +2224,7 @@ static int ena_rss_configure(struct ena_adapter *adapter) rc = ena_rss_init_default(adapter); if (rc && (rc != -EOPNOTSUPP)) { netif_err(adapter, ifup, adapter->netdev, - "Failed to init RSS rc: %d\n", rc); + "Failed to init RSS rc: %d\n", rc); return rc; } } @@ -1616,7 +2262,9 @@ static int ena_up_complete(struct ena_adapter *adapter) /* enable transmits */ netif_tx_start_all_queues(adapter->netdev); - ena_napi_enable_all(adapter); + ena_napi_enable_in_range(adapter, + 0, + adapter->xdp_num_queues + adapter->num_io_queues); return 0; } @@ -1649,7 +2297,7 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid) if (rc) { netif_err(adapter, ifup, adapter->netdev, "Failed to create I/O TX queue num %d rc: %d\n", - qid, rc); + qid, rc); return rc; } @@ -1668,12 +2316,13 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid) return rc; } -static int ena_create_all_io_tx_queues(struct ena_adapter *adapter) +static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter, + int first_index, int count) { struct ena_com_dev *ena_dev = adapter->ena_dev; int rc, i; - for (i = 0; i < adapter->num_io_queues; i++) { + for (i = first_index; i < first_index + count; i++) { rc = ena_create_io_tx_queue(adapter, i); if (rc) goto create_err; @@ -1682,7 +2331,7 @@ static int ena_create_all_io_tx_queues(struct ena_adapter *adapter) return 0; create_err: - while (i--) + while (i-- > first_index) ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i)); return rc; @@ -1727,13 +2376,15 @@ static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid) netif_err(adapter, ifup, adapter->netdev, "Failed to get RX queue handlers. RX queue num %d rc: %d\n", qid, rc); - ena_com_destroy_io_queue(ena_dev, ena_qid); - return rc; + goto err; } ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node); return rc; +err: + ena_com_destroy_io_queue(ena_dev, ena_qid); + return rc; } static int ena_create_all_io_rx_queues(struct ena_adapter *adapter) @@ -1760,7 +2411,8 @@ create_err: } static void set_io_rings_size(struct ena_adapter *adapter, - int new_tx_size, int new_rx_size) + int new_tx_size, + int new_rx_size) { int i; @@ -1794,14 +2446,24 @@ static int create_queues_with_size_backoff(struct ena_adapter *adapter) * ones due to past queue allocation failures. */ set_io_rings_size(adapter, adapter->requested_tx_ring_size, - adapter->requested_rx_ring_size); + adapter->requested_rx_ring_size); while (1) { - rc = ena_setup_all_tx_resources(adapter); + if (ena_xdp_present(adapter)) { + rc = ena_setup_and_create_all_xdp_queues(adapter); + + if (rc) + goto err_setup_tx; + } + rc = ena_setup_tx_resources_in_range(adapter, + 0, + adapter->num_io_queues); if (rc) goto err_setup_tx; - rc = ena_create_all_io_tx_queues(adapter); + rc = ena_create_io_tx_queues_in_range(adapter, + 0, + adapter->num_io_queues); if (rc) goto err_create_tx_queues; @@ -1825,7 +2487,7 @@ err_setup_tx: if (rc != -ENOMEM) { netif_err(adapter, ifup, adapter->netdev, "Queue creation failed with error code %d\n", - rc); + rc); return rc; } @@ -1848,7 +2510,7 @@ err_setup_tx: new_rx_ring_size = cur_rx_ring_size / 2; if (new_tx_ring_size < ENA_MIN_RING_SIZE || - new_rx_ring_size < ENA_MIN_RING_SIZE) { + new_rx_ring_size < ENA_MIN_RING_SIZE) { netif_err(adapter, ifup, adapter->netdev, "Queue creation failed with the smallest possible queue size of %d for both queues. Not retrying with smaller queues\n", ENA_MIN_RING_SIZE); @@ -1867,10 +2529,11 @@ err_setup_tx: static int ena_up(struct ena_adapter *adapter) { - int rc, i; + int io_queue_count, rc, i; netdev_dbg(adapter->netdev, "%s\n", __func__); + io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues; ena_setup_io_intr(adapter); /* napi poll functions should be initialized before running @@ -1878,7 +2541,7 @@ static int ena_up(struct ena_adapter *adapter) * interrupt, causing the ISR to fire immediately while the poll * function wasn't set yet, causing a null dereference */ - ena_init_napi(adapter); + ena_init_napi_in_range(adapter, 0, io_queue_count); rc = ena_request_io_irq(adapter); if (rc) @@ -1909,7 +2572,7 @@ static int ena_up(struct ena_adapter *adapter) /* schedule napi in case we had pending packets * from the last time we disable napi */ - for (i = 0; i < adapter->num_io_queues; i++) + for (i = 0; i < io_queue_count; i++) napi_schedule(&adapter->ena_napi[i].napi); return rc; @@ -1922,13 +2585,15 @@ err_up: err_create_queues_with_backoff: ena_free_io_irq(adapter); err_req_irq: - ena_del_napi(adapter); + ena_del_napi_in_range(adapter, 0, io_queue_count); return rc; } static void ena_down(struct ena_adapter *adapter) { + int io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues; + netif_info(adapter, ifdown, adapter->netdev, "%s\n", __func__); clear_bit(ENA_FLAG_DEV_UP, &adapter->flags); @@ -1941,7 +2606,7 @@ static void ena_down(struct ena_adapter *adapter) netif_tx_disable(adapter->netdev); /* After this point the napi handler won't enable the tx queue */ - ena_napi_disable_all(adapter); + ena_napi_disable_in_range(adapter, 0, io_queue_count); /* After destroy the queue there won't be any new interrupts */ @@ -1959,7 +2624,7 @@ static void ena_down(struct ena_adapter *adapter) ena_disable_io_intr_sync(adapter); ena_free_io_irq(adapter); - ena_del_napi(adapter); + ena_del_napi_in_range(adapter, 0, io_queue_count); ena_free_all_tx_bufs(adapter); ena_free_all_rx_bufs(adapter); @@ -2049,23 +2714,47 @@ int ena_update_queue_sizes(struct ena_adapter *adapter, ena_close(adapter->netdev); adapter->requested_tx_ring_size = new_tx_size; adapter->requested_rx_ring_size = new_rx_size; - ena_init_io_rings(adapter); + ena_init_io_rings(adapter, + 0, + adapter->xdp_num_queues + + adapter->num_io_queues); return dev_was_up ? ena_up(adapter) : 0; } int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count) { struct ena_com_dev *ena_dev = adapter->ena_dev; + int prev_channel_count; bool dev_was_up; dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); ena_close(adapter->netdev); + prev_channel_count = adapter->num_io_queues; adapter->num_io_queues = new_channel_count; + if (ena_xdp_present(adapter) && + ena_xdp_allowed(adapter) == ENA_XDP_ALLOWED) { + adapter->xdp_first_ring = new_channel_count; + adapter->xdp_num_queues = new_channel_count; + if (prev_channel_count > new_channel_count) + ena_xdp_exchange_program_rx_in_range(adapter, + NULL, + new_channel_count, + prev_channel_count); + else + ena_xdp_exchange_program_rx_in_range(adapter, + adapter->xdp_bpf_prog, + prev_channel_count, + new_channel_count); + } + /* We need to destroy the rss table so that the indirection * table will be reinitialized by ena_up() */ ena_com_rss_destroy(ena_dev); - ena_init_io_rings(adapter); + ena_init_io_rings(adapter, + 0, + adapter->xdp_num_queues + + adapter->num_io_queues); return dev_was_up ? ena_open(adapter->netdev) : 0; } @@ -2249,7 +2938,7 @@ error_report_dma_error: tx_info->skb = NULL; tx_info->num_of_bufs += i; - ena_unmap_tx_skb(tx_ring, tx_info); + ena_unmap_tx_buff(tx_ring, tx_info); return -EINVAL; } @@ -2264,7 +2953,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) struct netdev_queue *txq; void *push_hdr; u16 next_to_use, req_id, header_len; - int qid, rc, nb_hw_desc; + int qid, rc; netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb); /* Determine which tx ring we will be placed on */ @@ -2299,50 +2988,17 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) /* set flags and meta data */ ena_tx_csum(&ena_tx_ctx, skb); - if (unlikely(ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, &ena_tx_ctx))) { - netif_dbg(adapter, tx_queued, dev, - "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n", - qid); - ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); - } - - /* prepare the packet's descriptors to dma engine */ - rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx, - &nb_hw_desc); - - /* ena_com_prepare_tx() can't fail due to overflow of tx queue, - * since the number of free descriptors in the queue is checked - * after sending the previous packet. In case there isn't enough - * space in the queue for the next packet, it is stopped - * until there is again enough available space in the queue. - * All other failure reasons of ena_com_prepare_tx() are fatal - * and therefore require a device reset. - */ - if (unlikely(rc)) { - netif_err(adapter, tx_queued, dev, - "failed to prepare tx bufs\n"); - u64_stats_update_begin(&tx_ring->syncp); - tx_ring->tx_stats.prepare_ctx_err++; - u64_stats_update_end(&tx_ring->syncp); - adapter->reset_reason = ENA_REGS_RESET_DRIVER_INVALID_STATE; - set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + rc = ena_xmit_common(dev, + tx_ring, + tx_info, + &ena_tx_ctx, + next_to_use, + skb->len); + if (rc) goto error_unmap_dma; - } netdev_tx_sent_queue(txq, skb->len); - u64_stats_update_begin(&tx_ring->syncp); - tx_ring->tx_stats.cnt++; - tx_ring->tx_stats.bytes += skb->len; - u64_stats_update_end(&tx_ring->syncp); - - tx_info->tx_descs = nb_hw_desc; - tx_info->last_jiffies = jiffies; - tx_info->print_once = 0; - - tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use, - tx_ring->ring_size); - /* stop the queue when no more space available, the packet can have up * to sgl_size + 2. one for the meta descriptor and one for header * (if the header is larger than tx_max_header_size). @@ -2389,7 +3045,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; error_unmap_dma: - ena_unmap_tx_skb(tx_ring, tx_info); + ena_unmap_tx_buff(tx_ring, tx_info); tx_info->skb = NULL; error_drop_packet: @@ -2568,6 +3224,7 @@ static const struct net_device_ops ena_netdev_ops = { .ndo_change_mtu = ena_change_mtu, .ndo_set_mac_address = NULL, .ndo_validate_addr = eth_validate_addr, + .ndo_bpf = ena_xdp, }; static int ena_device_validate_params(struct ena_adapter *adapter, @@ -2947,7 +3604,9 @@ static void check_for_missing_completions(struct ena_adapter *adapter) struct ena_ring *tx_ring; struct ena_ring *rx_ring; int i, budget, rc; + int io_queue_count; + io_queue_count = adapter->xdp_num_queues + adapter->num_io_queues; /* Make sure the driver doesn't turn the device in other process */ smp_rmb(); @@ -2962,7 +3621,7 @@ static void check_for_missing_completions(struct ena_adapter *adapter) budget = ENA_MONITORED_TX_QUEUES; - for (i = adapter->last_monitored_tx_qid; i < adapter->num_io_queues; i++) { + for (i = adapter->last_monitored_tx_qid; i < io_queue_count; i++) { tx_ring = &adapter->tx_ring[i]; rx_ring = &adapter->rx_ring[i]; @@ -2970,7 +3629,8 @@ static void check_for_missing_completions(struct ena_adapter *adapter) if (unlikely(rc)) return; - rc = check_for_rx_interrupt_queue(adapter, rx_ring); + rc = !ENA_IS_XDP_INDEX(adapter, i) ? + check_for_rx_interrupt_queue(adapter, rx_ring) : 0; if (unlikely(rc)) return; @@ -2979,7 +3639,7 @@ static void check_for_missing_completions(struct ena_adapter *adapter) break; } - adapter->last_monitored_tx_qid = i % adapter->num_io_queues; + adapter->last_monitored_tx_qid = i % io_queue_count; } /* trigger napi schedule after 2 consecutive detections */ @@ -3556,6 +4216,9 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->num_io_queues = max_num_io_queues; adapter->max_num_io_queues = max_num_io_queues; + adapter->xdp_first_ring = 0; + adapter->xdp_num_queues = 0; + adapter->last_monitored_tx_qid = 0; adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK; @@ -3569,7 +4232,10 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) "Failed to query interrupt moderation feature\n"); goto err_netdev_destroy; } - ena_init_io_rings(adapter); + ena_init_io_rings(adapter, + 0, + adapter->xdp_num_queues + + adapter->num_io_queues); netdev->netdev_ops = &ena_netdev_ops; netdev->watchdog_timeo = TX_TIMEOUT; diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index bffd778f2ce3..094324fd0edc 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -36,6 +36,7 @@ #include <linux/bitops.h> #include <linux/dim.h> #include <linux/etherdevice.h> +#include <linux/if_vlan.h> #include <linux/inetdevice.h> #include <linux/interrupt.h> #include <linux/netdevice.h> @@ -142,6 +143,18 @@ #define ENA_MMIO_DISABLE_REG_READ BIT(0) +/* The max MTU size is configured to be the ethernet frame size without + * the overhead of the ethernet header, which can have a VLAN header, and + * a frame check sequence (FCS). + * The buffer size we share with the device is defined to be ENA_PAGE_SIZE + */ + +#define ENA_XDP_MAX_MTU (ENA_PAGE_SIZE - ETH_HLEN - ETH_FCS_LEN - \ + VLAN_HLEN - XDP_PACKET_HEADROOM) + +#define ENA_IS_XDP_INDEX(adapter, index) (((index) >= (adapter)->xdp_first_ring) && \ + ((index) < (adapter)->xdp_first_ring + (adapter)->xdp_num_queues)) + struct ena_irq { irq_handler_t handler; void *data; @@ -155,6 +168,8 @@ struct ena_napi { struct napi_struct napi ____cacheline_aligned; struct ena_ring *tx_ring; struct ena_ring *rx_ring; + struct ena_ring *xdp_ring; + bool first_interrupt; u32 qid; struct dim dim; }; @@ -180,6 +195,17 @@ struct ena_tx_buffer { /* num of buffers used by this skb */ u32 num_of_bufs; + /* XDP buffer structure which is used for sending packets in + * the xdp queues + */ + struct xdp_frame *xdpf; + /* The rx page for the rx buffer that was received in rx and + * re transmitted on xdp tx queues as a result of XDP_TX action. + * We need to free the page once we finished cleaning the buffer in + * clean_xdp_irq() + */ + struct page *xdp_rx_page; + /* Indicate if bufs[0] map the linear data of the skb. */ u8 map_linear_data; @@ -258,10 +284,13 @@ struct ena_ring { struct ena_adapter *adapter; struct ena_com_io_cq *ena_com_io_cq; struct ena_com_io_sq *ena_com_io_sq; + struct bpf_prog *xdp_bpf_prog; + struct xdp_rxq_info xdp_rxq; u16 next_to_use; u16 next_to_clean; u16 rx_copybreak; + u16 rx_headroom; u16 qid; u16 mtu; u16 sgl_size; @@ -379,6 +408,10 @@ struct ena_adapter { u32 last_monitored_tx_qid; enum ena_regs_reset_reason_types reset_reason; + + struct bpf_prog *xdp_bpf_prog; + u32 xdp_first_ring; + u32 xdp_num_queues; }; void ena_set_ethtool_ops(struct net_device *netdev); @@ -390,8 +423,48 @@ void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf); int ena_update_queue_sizes(struct ena_adapter *adapter, u32 new_tx_size, u32 new_rx_size); + int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count); int ena_get_sset_count(struct net_device *netdev, int sset); +enum ena_xdp_errors_t { + ENA_XDP_ALLOWED = 0, + ENA_XDP_CURRENT_MTU_TOO_LARGE, + ENA_XDP_NO_ENOUGH_QUEUES, +}; + +static inline bool ena_xdp_queues_present(struct ena_adapter *adapter) +{ + return adapter->xdp_first_ring != 0; +} + +static inline bool ena_xdp_present(struct ena_adapter *adapter) +{ + return !!adapter->xdp_bpf_prog; +} + +static inline bool ena_xdp_present_ring(struct ena_ring *ring) +{ + return !!ring->xdp_bpf_prog; +} + +static inline int ena_xdp_legal_queue_count(struct ena_adapter *adapter, + u32 queues) +{ + return 2 * queues <= adapter->max_num_io_queues; +} + +static inline enum ena_xdp_errors_t ena_xdp_allowed(struct ena_adapter *adapter) +{ + enum ena_xdp_errors_t rc = ENA_XDP_ALLOWED; + + if (adapter->netdev->mtu > ENA_XDP_MAX_MTU) + rc = ENA_XDP_CURRENT_MTU_TOO_LARGE; + else if (!ena_xdp_legal_queue_count(adapter, adapter->num_io_queues)) + rc = ENA_XDP_NO_ENOUGH_QUEUES; + + return rc; +} + #endif /* !(ENA_H) */ diff --git a/drivers/net/ethernet/amd/7990.c b/drivers/net/ethernet/amd/7990.c index ab30761003da..cf3562e82ca9 100644 --- a/drivers/net/ethernet/amd/7990.c +++ b/drivers/net/ethernet/amd/7990.c @@ -527,7 +527,7 @@ int lance_close(struct net_device *dev) } EXPORT_SYMBOL_GPL(lance_close); -void lance_tx_timeout(struct net_device *dev) +void lance_tx_timeout(struct net_device *dev, unsigned int txqueue) { printk("lance_tx_timeout\n"); lance_reset(dev); diff --git a/drivers/net/ethernet/amd/7990.h b/drivers/net/ethernet/amd/7990.h index 741cdc392c6b..8266b3c1fefc 100644 --- a/drivers/net/ethernet/amd/7990.h +++ b/drivers/net/ethernet/amd/7990.h @@ -243,7 +243,7 @@ int lance_open(struct net_device *dev); int lance_close(struct net_device *dev); int lance_start_xmit(struct sk_buff *skb, struct net_device *dev); void lance_set_multicast(struct net_device *dev); -void lance_tx_timeout(struct net_device *dev); +void lance_tx_timeout(struct net_device *dev, unsigned int txqueue); #ifdef CONFIG_NET_POLL_CONTROLLER void lance_poll(struct net_device *dev); #endif diff --git a/drivers/net/ethernet/amd/a2065.c b/drivers/net/ethernet/amd/a2065.c index 212fe72a190b..a3faf4feb204 100644 --- a/drivers/net/ethernet/amd/a2065.c +++ b/drivers/net/ethernet/amd/a2065.c @@ -522,7 +522,7 @@ static inline int lance_reset(struct net_device *dev) return status; } -static void lance_tx_timeout(struct net_device *dev) +static void lance_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct lance_private *lp = netdev_priv(dev); volatile struct lance_regs *ll = lp->ll; diff --git a/drivers/net/ethernet/amd/am79c961a.c b/drivers/net/ethernet/amd/am79c961a.c index 0842da492a64..1c53408f5d47 100644 --- a/drivers/net/ethernet/amd/am79c961a.c +++ b/drivers/net/ethernet/amd/am79c961a.c @@ -422,7 +422,7 @@ static void am79c961_setmulticastlist (struct net_device *dev) spin_unlock_irqrestore(&priv->chip_lock, flags); } -static void am79c961_timeout(struct net_device *dev) +static void am79c961_timeout(struct net_device *dev, unsigned int txqueue) { printk(KERN_WARNING "%s: transmit timed out, network cable problem?\n", dev->name); diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c index 573e88fc8ede..0f3b743425e8 100644 --- a/drivers/net/ethernet/amd/amd8111e.c +++ b/drivers/net/ethernet/amd/amd8111e.c @@ -1569,7 +1569,7 @@ static int amd8111e_enable_link_change(struct amd8111e_priv *lp) * failed or the interface is locked up. This function will reinitialize * the hardware. */ -static void amd8111e_tx_timeout(struct net_device *dev) +static void amd8111e_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct amd8111e_priv *lp = netdev_priv(dev); int err; diff --git a/drivers/net/ethernet/amd/ariadne.c b/drivers/net/ethernet/amd/ariadne.c index 4b6a5cb85dd2..5e0f645f5bde 100644 --- a/drivers/net/ethernet/amd/ariadne.c +++ b/drivers/net/ethernet/amd/ariadne.c @@ -530,7 +530,7 @@ static inline void ariadne_reset(struct net_device *dev) netif_start_queue(dev); } -static void ariadne_tx_timeout(struct net_device *dev) +static void ariadne_tx_timeout(struct net_device *dev, unsigned int txqueue) { volatile struct Am79C960 *lance = (struct Am79C960 *)dev->base_addr; diff --git a/drivers/net/ethernet/amd/atarilance.c b/drivers/net/ethernet/amd/atarilance.c index d3d44e07afbc..4e36122609a3 100644 --- a/drivers/net/ethernet/amd/atarilance.c +++ b/drivers/net/ethernet/amd/atarilance.c @@ -346,7 +346,7 @@ static int lance_rx( struct net_device *dev ); static int lance_close( struct net_device *dev ); static void set_multicast_list( struct net_device *dev ); static int lance_set_mac_address( struct net_device *dev, void *addr ); -static void lance_tx_timeout (struct net_device *dev); +static void lance_tx_timeout (struct net_device *dev, unsigned int txqueue); /************************* End of Prototypes **************************/ @@ -727,7 +727,7 @@ static void lance_init_ring( struct net_device *dev ) /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ -static void lance_tx_timeout (struct net_device *dev) +static void lance_tx_timeout (struct net_device *dev, unsigned int txqueue) { struct lance_private *lp = netdev_priv(dev); struct lance_ioreg *IO = lp->iobase; diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index 1793950f0582..d832c9f4d306 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -1014,7 +1014,7 @@ static netdev_tx_t au1000_tx(struct sk_buff *skb, struct net_device *dev) * The Tx ring has been full longer than the watchdog timeout * value. The transmitter must be hung? */ -static void au1000_tx_timeout(struct net_device *dev) +static void au1000_tx_timeout(struct net_device *dev, unsigned int txqueue) { netdev_err(dev, "au1000_tx_timeout: dev=%p\n", dev); au1000_reset_mac(dev); diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c index dac4a2fcad6a..6592a2db9efb 100644 --- a/drivers/net/ethernet/amd/declance.c +++ b/drivers/net/ethernet/amd/declance.c @@ -884,7 +884,7 @@ static inline int lance_reset(struct net_device *dev) return status; } -static void lance_tx_timeout(struct net_device *dev) +static void lance_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct lance_private *lp = netdev_priv(dev); volatile struct lance_regs *ll = lp->ll; diff --git a/drivers/net/ethernet/amd/lance.c b/drivers/net/ethernet/amd/lance.c index f90b454b1642..aff44241988c 100644 --- a/drivers/net/ethernet/amd/lance.c +++ b/drivers/net/ethernet/amd/lance.c @@ -306,7 +306,7 @@ static irqreturn_t lance_interrupt(int irq, void *dev_id); static int lance_close(struct net_device *dev); static struct net_device_stats *lance_get_stats(struct net_device *dev); static void set_multicast_list(struct net_device *dev); -static void lance_tx_timeout (struct net_device *dev); +static void lance_tx_timeout (struct net_device *dev, unsigned int txqueue); @@ -913,7 +913,7 @@ lance_restart(struct net_device *dev, unsigned int csr0_bits, int must_reinit) } -static void lance_tx_timeout (struct net_device *dev) +static void lance_tx_timeout (struct net_device *dev, unsigned int txqueue) { struct lance_private *lp = (struct lance_private *) dev->ml_priv; int ioaddr = dev->base_addr; diff --git a/drivers/net/ethernet/amd/ni65.c b/drivers/net/ethernet/amd/ni65.c index c6c2a54c1121..c38edf6f03a3 100644 --- a/drivers/net/ethernet/amd/ni65.c +++ b/drivers/net/ethernet/amd/ni65.c @@ -254,7 +254,7 @@ static int ni65_lance_reinit(struct net_device *dev); static void ni65_init_lance(struct priv *p,unsigned char*,int,int); static netdev_tx_t ni65_send_packet(struct sk_buff *skb, struct net_device *dev); -static void ni65_timeout(struct net_device *dev); +static void ni65_timeout(struct net_device *dev, unsigned int txqueue); static int ni65_close(struct net_device *dev); static int ni65_alloc_buffer(struct net_device *dev); static void ni65_free_buffer(struct priv *p); @@ -1133,7 +1133,7 @@ static void ni65_recv_intr(struct net_device *dev,int csr0) * kick xmitter .. */ -static void ni65_timeout(struct net_device *dev) +static void ni65_timeout(struct net_device *dev, unsigned int txqueue) { int i; struct priv *p = dev->ml_priv; diff --git a/drivers/net/ethernet/amd/nmclan_cs.c b/drivers/net/ethernet/amd/nmclan_cs.c index 9c152d85840d..023aecf6ab30 100644 --- a/drivers/net/ethernet/amd/nmclan_cs.c +++ b/drivers/net/ethernet/amd/nmclan_cs.c @@ -407,7 +407,7 @@ static int mace_open(struct net_device *dev); static int mace_close(struct net_device *dev); static netdev_tx_t mace_start_xmit(struct sk_buff *skb, struct net_device *dev); -static void mace_tx_timeout(struct net_device *dev); +static void mace_tx_timeout(struct net_device *dev, unsigned int txqueue); static irqreturn_t mace_interrupt(int irq, void *dev_id); static struct net_device_stats *mace_get_stats(struct net_device *dev); static int mace_rx(struct net_device *dev, unsigned char RxCnt); @@ -837,7 +837,7 @@ mace_start_xmit failed, put skb back into a list." ---------------------------------------------------------------------------- */ -static void mace_tx_timeout(struct net_device *dev) +static void mace_tx_timeout(struct net_device *dev, unsigned int txqueue) { mace_private *lp = netdev_priv(dev); struct pcmcia_device *link = lp->p_dev; diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c index f5ad12c10934..dc7d88227e76 100644 --- a/drivers/net/ethernet/amd/pcnet32.c +++ b/drivers/net/ethernet/amd/pcnet32.c @@ -314,7 +314,7 @@ static int pcnet32_open(struct net_device *); static int pcnet32_init_ring(struct net_device *); static netdev_tx_t pcnet32_start_xmit(struct sk_buff *, struct net_device *); -static void pcnet32_tx_timeout(struct net_device *dev); +static void pcnet32_tx_timeout(struct net_device *dev, unsigned int txqueue); static irqreturn_t pcnet32_interrupt(int, void *); static int pcnet32_close(struct net_device *); static struct net_device_stats *pcnet32_get_stats(struct net_device *); @@ -2455,7 +2455,7 @@ static void pcnet32_restart(struct net_device *dev, unsigned int csr0_bits) lp->a->write_csr(ioaddr, CSR0, csr0_bits); } -static void pcnet32_tx_timeout(struct net_device *dev) +static void pcnet32_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct pcnet32_private *lp = netdev_priv(dev); unsigned long ioaddr = dev->base_addr, flags; diff --git a/drivers/net/ethernet/amd/sunlance.c b/drivers/net/ethernet/amd/sunlance.c index ebcbf8ca4829..b00e00881253 100644 --- a/drivers/net/ethernet/amd/sunlance.c +++ b/drivers/net/ethernet/amd/sunlance.c @@ -1097,7 +1097,7 @@ static void lance_piozero(void __iomem *dest, int len) sbus_writeb(0, piobuf); } -static void lance_tx_timeout(struct net_device *dev) +static void lance_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct lance_private *lp = netdev_priv(dev); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 98f8f2033154..b71f9b04a51e 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -2152,7 +2152,7 @@ static int xgbe_change_mtu(struct net_device *netdev, int mtu) return 0; } -static void xgbe_tx_timeout(struct net_device *netdev) +static void xgbe_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct xgbe_prv_data *pdata = netdev_priv(netdev); diff --git a/drivers/net/ethernet/apm/xgene-v2/main.c b/drivers/net/ethernet/apm/xgene-v2/main.c index 02b4f3af02b5..c48f60996761 100644 --- a/drivers/net/ethernet/apm/xgene-v2/main.c +++ b/drivers/net/ethernet/apm/xgene-v2/main.c @@ -575,7 +575,7 @@ static void xge_free_pending_skb(struct net_device *ndev) } } -static void xge_timeout(struct net_device *ndev) +static void xge_timeout(struct net_device *ndev, unsigned int txqueue) { struct xge_pdata *pdata = netdev_priv(ndev); diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index d8612131c55e..e284b6753725 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -859,7 +859,7 @@ static int xgene_enet_napi(struct napi_struct *napi, const int budget) return processed; } -static void xgene_enet_timeout(struct net_device *ndev) +static void xgene_enet_timeout(struct net_device *ndev, unsigned int txqueue) { struct xgene_enet_pdata *pdata = netdev_priv(ndev); struct netdev_queue *txq; diff --git a/drivers/net/ethernet/apple/macmace.c b/drivers/net/ethernet/apple/macmace.c index 8d03578d5e8c..95d3061c61be 100644 --- a/drivers/net/ethernet/apple/macmace.c +++ b/drivers/net/ethernet/apple/macmace.c @@ -91,7 +91,7 @@ static int mace_set_address(struct net_device *dev, void *addr); static void mace_reset(struct net_device *dev); static irqreturn_t mace_interrupt(int irq, void *dev_id); static irqreturn_t mace_dma_intr(int irq, void *dev_id); -static void mace_tx_timeout(struct net_device *dev); +static void mace_tx_timeout(struct net_device *dev, unsigned int txqueue); static void __mace_set_address(struct net_device *dev, void *addr); /* @@ -600,7 +600,7 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static void mace_tx_timeout(struct net_device *dev) +static void mace_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct mace_data *mp = netdev_priv(dev); volatile struct mace *mb = mp->mace; diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c index 8f5021091eee..ad8b0e3fcd2c 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -1409,7 +1409,7 @@ static void ag71xx_oom_timer_handler(struct timer_list *t) napi_schedule(&ag->napi); } -static void ag71xx_tx_timeout(struct net_device *ndev) +static void ag71xx_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct ag71xx *ag = netdev_priv(ndev); diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index d4bbcdfd691a..1dcbc486eca9 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -1553,7 +1553,7 @@ static netdev_tx_t alx_start_xmit(struct sk_buff *skb, return alx_start_xmit_ring(skb, alx_tx_queue_mapping(alx, skb)); } -static void alx_tx_timeout(struct net_device *dev) +static void alx_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct alx_priv *alx = netdev_priv(dev); diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 2b239ecea05f..4c0b1f8551dd 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -350,7 +350,7 @@ static void atl1c_del_timer(struct atl1c_adapter *adapter) * atl1c_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure */ -static void atl1c_tx_timeout(struct net_device *netdev) +static void atl1c_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct atl1c_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c index 4f7b65825c15..e0d89942d537 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c @@ -251,7 +251,7 @@ static void atl1e_cancel_work(struct atl1e_adapter *adapter) * atl1e_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure */ -static void atl1e_tx_timeout(struct net_device *netdev) +static void atl1e_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct atl1e_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c index 3aba38322717..b81a4e0c5b57 100644 --- a/drivers/net/ethernet/atheros/atlx/atl2.c +++ b/drivers/net/ethernet/atheros/atlx/atl2.c @@ -1001,7 +1001,7 @@ static int atl2_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) * atl2_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure */ -static void atl2_tx_timeout(struct net_device *netdev) +static void atl2_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct atl2_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/atheros/atlx/atlx.c b/drivers/net/ethernet/atheros/atlx/atlx.c index 505a22c703f7..0941d07d0833 100644 --- a/drivers/net/ethernet/atheros/atlx/atlx.c +++ b/drivers/net/ethernet/atheros/atlx/atlx.c @@ -183,7 +183,7 @@ static void atlx_clear_phy_int(struct atlx_adapter *adapter) * atlx_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure */ -static void atlx_tx_timeout(struct net_device *netdev) +static void atlx_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct atlx_adapter *adapter = netdev_priv(netdev); /* Do the reset outside of interrupt context */ diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 035dbb1b2c98..5b3464c3e8d1 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -948,7 +948,7 @@ irq_ack: return IRQ_RETVAL(handled); } -static void b44_tx_timeout(struct net_device *dev) +static void b44_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct b44 *bp = netdev_priv(dev); diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 825af709708e..1907e47fd0af 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1354,7 +1354,7 @@ out: return ret; } -static void bcm_sysport_tx_timeout(struct net_device *dev) +static void bcm_sysport_tx_timeout(struct net_device *dev, unsigned int txqueue) { netdev_warn(dev, "transmit timeout!\n"); @@ -2427,6 +2427,14 @@ static int bcm_sysport_probe(struct platform_device *pdev) if (!of_id || !of_id->data) return -EINVAL; + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40)); + if (ret) + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (ret) { + dev_err(&pdev->dev, "unable to set DMA mask: %d\n", ret); + return ret; + } + /* Fairly quickly we need to know the type of adapter we have */ params = of_id->data; diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index fbc196b480b6..dbb7874607ca 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -6575,7 +6575,7 @@ bnx2_dump_state(struct bnx2 *bp) } static void -bnx2_tx_timeout(struct net_device *dev) +bnx2_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct bnx2 *bp = netdev_priv(dev); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 5e037a305b83..ee9e9290f112 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -4970,7 +4970,7 @@ int bnx2x_set_features(struct net_device *dev, netdev_features_t features) return 0; } -void bnx2x_tx_timeout(struct net_device *dev) +void bnx2x_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct bnx2x *bp = netdev_priv(dev); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index 8b08cb18e363..e35f48bfdc85 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -617,7 +617,7 @@ int bnx2x_set_features(struct net_device *dev, netdev_features_t features); * * @dev: net device */ -void bnx2x_tx_timeout(struct net_device *dev); +void bnx2x_tx_timeout(struct net_device *dev, unsigned int txqueue); /** bnx2x_get_c2s_mapping - read inner-to-outer vlan configuration * c2s_map should have BNX2X_MAX_PRIORITY entries. diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 85983f0e3134..4e34841906c7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -9976,7 +9976,7 @@ static void bnxt_reset_task(struct bnxt *bp, bool silent) } } -static void bnxt_tx_timeout(struct net_device *dev) +static void bnxt_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct bnxt *bp = netdev_priv(dev); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 120fa05a39ff..3ee7917e3fc0 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2,7 +2,7 @@ /* * Broadcom GENET (Gigabit Ethernet) controller driver * - * Copyright (c) 2014-2017 Broadcom + * Copyright (c) 2014-2019 Broadcom */ #define pr_fmt(fmt) "bcmgenet: " fmt @@ -508,8 +508,8 @@ static int bcmgenet_set_link_ksettings(struct net_device *dev, return phy_ethtool_ksettings_set(dev->phydev, cmd); } -static int bcmgenet_set_rx_csum(struct net_device *dev, - netdev_features_t wanted) +static void bcmgenet_set_rx_csum(struct net_device *dev, + netdev_features_t wanted) { struct bcmgenet_priv *priv = netdev_priv(dev); u32 rbuf_chk_ctrl; @@ -521,7 +521,7 @@ static int bcmgenet_set_rx_csum(struct net_device *dev, /* enable rx checksumming */ if (rx_csum_en) - rbuf_chk_ctrl |= RBUF_RXCHK_EN; + rbuf_chk_ctrl |= RBUF_RXCHK_EN | RBUF_L3_PARSE_DIS; else rbuf_chk_ctrl &= ~RBUF_RXCHK_EN; priv->desc_rxchk_en = rx_csum_en; @@ -535,12 +535,10 @@ static int bcmgenet_set_rx_csum(struct net_device *dev, rbuf_chk_ctrl &= ~RBUF_SKIP_FCS; bcmgenet_rbuf_writel(priv, rbuf_chk_ctrl, RBUF_CHK_CTRL); - - return 0; } -static int bcmgenet_set_tx_csum(struct net_device *dev, - netdev_features_t wanted) +static void bcmgenet_set_tx_csum(struct net_device *dev, + netdev_features_t wanted) { struct bcmgenet_priv *priv = netdev_priv(dev); bool desc_64b_en; @@ -549,7 +547,7 @@ static int bcmgenet_set_tx_csum(struct net_device *dev, tbuf_ctrl = bcmgenet_tbuf_ctrl_get(priv); rbuf_ctrl = bcmgenet_rbuf_readl(priv, RBUF_CTRL); - desc_64b_en = !!(wanted & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)); + desc_64b_en = !!(wanted & NETIF_F_HW_CSUM); /* enable 64 bytes descriptor in both directions (RBUF and TBUF) */ if (desc_64b_en) { @@ -563,21 +561,27 @@ static int bcmgenet_set_tx_csum(struct net_device *dev, bcmgenet_tbuf_ctrl_set(priv, tbuf_ctrl); bcmgenet_rbuf_writel(priv, rbuf_ctrl, RBUF_CTRL); - - return 0; } static int bcmgenet_set_features(struct net_device *dev, netdev_features_t features) { - netdev_features_t changed = features ^ dev->features; - netdev_features_t wanted = dev->wanted_features; - int ret = 0; + struct bcmgenet_priv *priv = netdev_priv(dev); + u32 reg; + int ret; - if (changed & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) - ret = bcmgenet_set_tx_csum(dev, wanted); - if (changed & (NETIF_F_RXCSUM)) - ret = bcmgenet_set_rx_csum(dev, wanted); + ret = clk_prepare_enable(priv->clk); + if (ret) + return ret; + + /* Make sure we reflect the value of CRC_CMD_FWD */ + reg = bcmgenet_umac_readl(priv, UMAC_CMD); + priv->crc_fwd_en = !!(reg & CMD_CRC_FWD); + + bcmgenet_set_tx_csum(dev, features); + bcmgenet_set_rx_csum(dev, features); + + clk_disable_unprepare(priv->clk); return ret; } @@ -857,6 +861,9 @@ static const struct bcmgenet_stats bcmgenet_gstrings_stats[] = { STAT_GENET_SOFT_MIB("alloc_rx_buff_failed", mib.alloc_rx_buff_failed), STAT_GENET_SOFT_MIB("rx_dma_failed", mib.rx_dma_failed), STAT_GENET_SOFT_MIB("tx_dma_failed", mib.tx_dma_failed), + STAT_GENET_SOFT_MIB("tx_realloc_tsb", mib.tx_realloc_tsb), + STAT_GENET_SOFT_MIB("tx_realloc_tsb_failed", + mib.tx_realloc_tsb_failed), /* Per TX queues */ STAT_GENET_Q(0), STAT_GENET_Q(1), @@ -1483,6 +1490,7 @@ static void bcmgenet_tx_reclaim_all(struct net_device *dev) static struct sk_buff *bcmgenet_put_tx_csum(struct net_device *dev, struct sk_buff *skb) { + struct bcmgenet_priv *priv = netdev_priv(dev); struct status_64 *status = NULL; struct sk_buff *new_skb; u16 offset; @@ -1495,12 +1503,15 @@ static struct sk_buff *bcmgenet_put_tx_csum(struct net_device *dev, * enough headroom for us to insert 64B status block. */ new_skb = skb_realloc_headroom(skb, sizeof(*status)); - dev_kfree_skb(skb); if (!new_skb) { + dev_kfree_skb_any(skb); + priv->mib.tx_realloc_tsb_failed++; dev->stats.tx_dropped++; return NULL; } + dev_consume_skb_any(skb); skb = new_skb; + priv->mib.tx_realloc_tsb++; } skb_push(skb, sizeof(*status)); @@ -1516,24 +1527,19 @@ static struct sk_buff *bcmgenet_put_tx_csum(struct net_device *dev, ip_proto = ipv6_hdr(skb)->nexthdr; break; default: - return skb; + /* don't use UDP flag */ + ip_proto = 0; + break; } offset = skb_checksum_start_offset(skb) - sizeof(*status); tx_csum_info = (offset << STATUS_TX_CSUM_START_SHIFT) | - (offset + skb->csum_offset); + (offset + skb->csum_offset) | + STATUS_TX_CSUM_LV; - /* Set the length valid bit for TCP and UDP and just set - * the special UDP flag for IPv4, else just set to 0. - */ - if (ip_proto == IPPROTO_TCP || ip_proto == IPPROTO_UDP) { - tx_csum_info |= STATUS_TX_CSUM_LV; - if (ip_proto == IPPROTO_UDP && - ip_ver == htons(ETH_P_IP)) - tx_csum_info |= STATUS_TX_CSUM_PROTO_UDP; - } else { - tx_csum_info = 0; - } + /* Set the special UDP flag for UDP */ + if (ip_proto == IPPROTO_UDP) + tx_csum_info |= STATUS_TX_CSUM_PROTO_UDP; status->tx_csum_info = tx_csum_info; } @@ -1744,7 +1750,6 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, unsigned int bytes_processed = 0; unsigned int p_index, mask; unsigned int discards; - unsigned int chksum_ok = 0; /* Clear status before servicing to reduce spurious interrupts */ if (ring->index == DESC_INDEX) { @@ -1795,9 +1800,15 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, dmadesc_get_length_status(priv, cb->bd_addr); } else { struct status_64 *status; + __be16 rx_csum; status = (struct status_64 *)skb->data; dma_length_status = status->length_status; + rx_csum = (__force __be16)(status->rx_csum & 0xffff); + if (priv->desc_rxchk_en) { + skb->csum = (__force __wsum)ntohs(rx_csum); + skb->ip_summed = CHECKSUM_COMPLETE; + } } /* DMA flags and length are still valid no matter how @@ -1840,18 +1851,12 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, goto next; } /* error packet */ - chksum_ok = (dma_flag & priv->dma_rx_chk_bit) && - priv->desc_rxchk_en; - skb_put(skb, len); if (priv->desc_64b_en) { skb_pull(skb, 64); len -= 64; } - if (likely(chksum_ok)) - skb->ip_summed = CHECKSUM_UNNECESSARY; - /* remove hardware 2bytes added for IP alignment */ skb_pull(skb, 2); len -= 2; @@ -2886,9 +2891,10 @@ static int bcmgenet_open(struct net_device *dev) init_umac(priv); - /* Make sure we reflect the value of CRC_CMD_FWD */ - reg = bcmgenet_umac_readl(priv, UMAC_CMD); - priv->crc_fwd_en = !!(reg & CMD_CRC_FWD); + /* Apply features again in case we changed them while interface was + * down + */ + bcmgenet_set_features(dev, dev->features); bcmgenet_set_hw_addr(priv, dev->dev_addr); @@ -3055,7 +3061,7 @@ static void bcmgenet_dump_tx_queue(struct bcmgenet_tx_ring *ring) ring->cb_ptr, ring->end_ptr); } -static void bcmgenet_timeout(struct net_device *dev) +static void bcmgenet_timeout(struct net_device *dev, unsigned int txqueue) { struct bcmgenet_priv *priv = netdev_priv(dev); u32 int0_enable = 0; @@ -3327,19 +3333,15 @@ static void bcmgenet_set_hw_params(struct bcmgenet_priv *priv) if (GENET_IS_V5(priv) || GENET_IS_V4(priv)) { bcmgenet_dma_regs = bcmgenet_dma_regs_v3plus; genet_dma_ring_regs = genet_dma_ring_regs_v4; - priv->dma_rx_chk_bit = DMA_RX_CHK_V3PLUS; } else if (GENET_IS_V3(priv)) { bcmgenet_dma_regs = bcmgenet_dma_regs_v3plus; genet_dma_ring_regs = genet_dma_ring_regs_v123; - priv->dma_rx_chk_bit = DMA_RX_CHK_V3PLUS; } else if (GENET_IS_V2(priv)) { bcmgenet_dma_regs = bcmgenet_dma_regs_v2; genet_dma_ring_regs = genet_dma_ring_regs_v123; - priv->dma_rx_chk_bit = DMA_RX_CHK_V12; } else if (GENET_IS_V1(priv)) { bcmgenet_dma_regs = bcmgenet_dma_regs_v1; genet_dma_ring_regs = genet_dma_ring_regs_v123; - priv->dma_rx_chk_bit = DMA_RX_CHK_V12; } /* enum genet_version starts at 1 */ @@ -3535,9 +3537,11 @@ static int bcmgenet_probe(struct platform_device *pdev) priv->msg_enable = netif_msg_init(-1, GENET_MSG_DEFAULT); - /* Set hardware features */ - dev->hw_features |= NETIF_F_SG | NETIF_F_IP_CSUM | - NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM; + /* Set default features */ + dev->features |= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | + NETIF_F_RXCSUM; + dev->hw_features |= dev->features; + dev->vlan_features |= dev->features; /* Request the WOL interrupt and advertise suspend if available */ priv->wol_irq_disabled = true; @@ -3574,6 +3578,14 @@ static int bcmgenet_probe(struct platform_device *pdev) bcmgenet_set_hw_params(priv); + err = -EIO; + if (priv->hw_params->flags & GENET_HAS_40BITS) + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40)); + if (err) + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (err) + goto err; + /* Mii wait queue */ init_waitqueue_head(&priv->wq); /* Always use RX_BUF_LENGTH (2KB) buffer for all chips */ @@ -3689,6 +3701,9 @@ static int bcmgenet_resume(struct device *d) genphy_config_aneg(dev->phydev); bcmgenet_mii_config(priv->dev, false); + /* Restore enabled features */ + bcmgenet_set_features(dev, dev->features); + bcmgenet_set_hw_addr(priv, dev->dev_addr); if (priv->internal_phy) { diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index a5659197598f..61a6fe9f4cec 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -144,6 +144,8 @@ struct bcmgenet_mib_counters { u32 alloc_rx_buff_failed; u32 rx_dma_failed; u32 tx_dma_failed; + u32 tx_realloc_tsb; + u32 tx_realloc_tsb_failed; }; #define UMAC_HD_BKP_CTRL 0x004 @@ -251,6 +253,7 @@ struct bcmgenet_mib_counters { #define RBUF_CHK_CTRL 0x14 #define RBUF_RXCHK_EN (1 << 0) #define RBUF_SKIP_FCS (1 << 4) +#define RBUF_L3_PARSE_DIS (1 << 5) #define RBUF_ENERGY_CTRL 0x9c #define RBUF_EEE_EN (1 << 0) @@ -663,7 +666,6 @@ struct bcmgenet_priv { bool desc_rxchk_en; bool crc_fwd_en; - unsigned int dma_rx_chk_bit; u32 dma_max_burst_length; u32 msg_enable; diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c index 1604ad32e920..80ff52527233 100644 --- a/drivers/net/ethernet/broadcom/sb1250-mac.c +++ b/drivers/net/ethernet/broadcom/sb1250-mac.c @@ -294,7 +294,7 @@ static int sbmac_set_duplex(struct sbmac_softc *s, enum sbmac_duplex duplex, enum sbmac_fc fc); static int sbmac_open(struct net_device *dev); -static void sbmac_tx_timeout (struct net_device *dev); +static void sbmac_tx_timeout (struct net_device *dev, unsigned int txqueue); static void sbmac_set_rx_mode(struct net_device *dev); static int sbmac_mii_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static int sbmac_close(struct net_device *dev); @@ -2419,7 +2419,7 @@ static void sbmac_mii_poll(struct net_device *dev) } -static void sbmac_tx_timeout (struct net_device *dev) +static void sbmac_tx_timeout (struct net_device *dev, unsigned int txqueue) { struct sbmac_softc *sc = netdev_priv(dev); unsigned long flags; diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index ca3aa1250dd1..460b4992914a 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -7645,7 +7645,7 @@ static void tg3_poll_controller(struct net_device *dev) } #endif -static void tg3_tx_timeout(struct net_device *dev) +static void tg3_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct tg3 *tp = netdev_priv(dev); diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index af04a2c81adb..05a3d067c3fc 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -1251,7 +1251,7 @@ static int xgmac_poll(struct napi_struct *napi, int budget) * netdev structure and arrange for the device to be reset to a sane state * in order to transmit a new packet. */ -static void xgmac_tx_timeout(struct net_device *dev) +static void xgmac_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct xgmac_priv *priv = netdev_priv(dev); schedule_work(&priv->tx_timeout_work); diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 7f3b2e3b0868..eab05b5534ea 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -2562,7 +2562,7 @@ lio_xmit_failed: /** \brief Network device Tx timeout * @param netdev pointer to network device */ -static void liquidio_tx_timeout(struct net_device *netdev) +static void liquidio_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct lio *lio; diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c index 370d76822ee0..7a77544a54f5 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c @@ -1628,7 +1628,7 @@ lio_xmit_failed: /** \brief Network device Tx timeout * @param netdev pointer to network device */ -static void liquidio_tx_timeout(struct net_device *netdev) +static void liquidio_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct lio *lio; diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c index f3f2e71431ac..600de587d7a9 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c @@ -31,7 +31,7 @@ static int lio_vf_rep_open(struct net_device *ndev); static int lio_vf_rep_stop(struct net_device *ndev); static netdev_tx_t lio_vf_rep_pkt_xmit(struct sk_buff *skb, struct net_device *ndev); -static void lio_vf_rep_tx_timeout(struct net_device *netdev); +static void lio_vf_rep_tx_timeout(struct net_device *netdev, unsigned int txqueue); static int lio_vf_rep_phys_port_name(struct net_device *dev, char *buf, size_t len); static void lio_vf_rep_get_stats64(struct net_device *dev, @@ -172,7 +172,7 @@ lio_vf_rep_stop(struct net_device *ndev) } static void -lio_vf_rep_tx_timeout(struct net_device *ndev) +lio_vf_rep_tx_timeout(struct net_device *ndev, unsigned int txqueue) { netif_trans_update(ndev); diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index f28409279ea4..016957285f99 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1741,7 +1741,7 @@ static void nicvf_get_stats64(struct net_device *netdev, } -static void nicvf_tx_timeout(struct net_device *dev) +static void nicvf_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct nicvf *nic = netdev_priv(dev); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index a70ac2097892..eda8e1269551 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -56,6 +56,7 @@ #include <asm/io.h> #include "t4_chip_type.h" #include "cxgb4_uld.h" +#include "t4fw_api.h" #define CH_WARN(adap, fmt, ...) dev_warn(adap->pdev_dev, fmt, ## __VA_ARGS__) extern struct list_head adapter_list; @@ -68,6 +69,16 @@ extern struct mutex uld_mutex; #define ETHTXQ_STOP_THRES \ (1 + DIV_ROUND_UP((3 * MAX_SKB_FRAGS) / 2 + (MAX_SKB_FRAGS & 1), 8)) +#define FW_PARAM_DEV(param) \ + (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) | \ + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_##param)) + +#define FW_PARAM_PFVF(param) \ + (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) | \ + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_##param) | \ + FW_PARAMS_PARAM_Y_V(0) | \ + FW_PARAMS_PARAM_Z_V(0)) + enum { MAX_NPORTS = 4, /* max # of ports */ SERNUM_LEN = 24, /* Serial # length */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index 93868dca186a..9a96b01dad56 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -3171,14 +3171,12 @@ static const struct file_operations mem_debugfs_fops = { static int tid_info_show(struct seq_file *seq, void *v) { - unsigned int tid_start = 0; struct adapter *adap = seq->private; - const struct tid_info *t = &adap->tids; - enum chip_type chip = CHELSIO_CHIP_VERSION(adap->params.chip); - - if (chip > CHELSIO_T5) - tid_start = t4_read_reg(adap, LE_DB_ACTIVE_TABLE_START_INDEX_A); + const struct tid_info *t; + enum chip_type chip; + t = &adap->tids; + chip = CHELSIO_CHIP_VERSION(adap->params.chip); if (t4_read_reg(adap, LE_DB_CONFIG_A) & HASHEN_F) { unsigned int sb; seq_printf(seq, "Connections in use: %u\n", @@ -3190,9 +3188,9 @@ static int tid_info_show(struct seq_file *seq, void *v) sb = t4_read_reg(adap, LE_DB_SRVR_START_INDEX_A); if (sb) { - seq_printf(seq, "TID range: %u..%u/%u..%u", tid_start, + seq_printf(seq, "TID range: %u..%u/%u..%u", t->tid_base, sb - 1, adap->tids.hash_base, - t->ntids - 1); + t->tid_base + t->ntids - 1); seq_printf(seq, ", in use: %u/%u\n", atomic_read(&t->tids_in_use), atomic_read(&t->hash_tids_in_use)); @@ -3201,14 +3199,14 @@ static int tid_info_show(struct seq_file *seq, void *v) t->aftid_base, t->aftid_end, adap->tids.hash_base, - t->ntids - 1); + t->tid_base + t->ntids - 1); seq_printf(seq, ", in use: %u/%u\n", atomic_read(&t->tids_in_use), atomic_read(&t->hash_tids_in_use)); } else { seq_printf(seq, "TID range: %u..%u", adap->tids.hash_base, - t->ntids - 1); + t->tid_base + t->ntids - 1); seq_printf(seq, ", in use: %u\n", atomic_read(&t->hash_tids_in_use)); } @@ -3216,8 +3214,8 @@ static int tid_info_show(struct seq_file *seq, void *v) seq_printf(seq, "Connections in use: %u\n", atomic_read(&t->conns_in_use)); - seq_printf(seq, "TID range: %u..%u", tid_start, - tid_start + t->ntids - 1); + seq_printf(seq, "TID range: %u..%u", t->tid_base, + t->tid_base + t->ntids - 1); seq_printf(seq, ", in use: %u\n", atomic_read(&t->tids_in_use)); } @@ -3240,6 +3238,9 @@ static int tid_info_show(struct seq_file *seq, void *v) seq_printf(seq, "SFTID range: %u..%u in use: %u\n", t->sftid_base, t->sftid_base + t->nsftids - 2, t->sftids_in_use); + if (t->nhpftids) + seq_printf(seq, "HPFTID range: %u..%u\n", t->hpftid_base, + t->hpftid_base + t->nhpftids - 1); if (t->ntids) seq_printf(seq, "HW TID usage: %u IP users, %u IPv6 users\n", t4_read_reg(adap, LE_DB_ACT_CNT_IPV4_A), diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 1d39fca11810..2a2938bbb93a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -361,20 +361,22 @@ static int get_filter_count(struct adapter *adapter, unsigned int fidx, tcb_base = t4_read_reg(adapter, TP_CMM_TCB_BASE_A); if (is_hashfilter(adapter) && hash) { - if (fidx < adapter->tids.ntids) { - f = adapter->tids.tid_tab[fidx]; - if (!f) - return -EINVAL; - } else { + if (tid_out_of_range(&adapter->tids, fidx)) return -E2BIG; - } + f = adapter->tids.tid_tab[fidx - adapter->tids.tid_base]; + if (!f) + return -EINVAL; } else { - if ((fidx != (adapter->tids.nftids + - adapter->tids.nsftids - 1)) && - fidx >= adapter->tids.nftids) + if ((fidx != (adapter->tids.nftids + adapter->tids.nsftids + + adapter->tids.nhpftids - 1)) && + fidx >= (adapter->tids.nftids + adapter->tids.nhpftids)) return -E2BIG; - f = &adapter->tids.ftid_tab[fidx]; + if (fidx < adapter->tids.nhpftids) + f = &adapter->tids.hpftid_tab[fidx]; + else + f = &adapter->tids.ftid_tab[fidx - + adapter->tids.nhpftids]; if (!f->valid) return -EINVAL; } @@ -480,6 +482,7 @@ int cxgb4_get_free_ftid(struct net_device *dev, int family) ftid -= n; } spin_unlock_bh(&t->ftid_lock); + ftid += t->nhpftids; return found ? ftid : -ENOMEM; } @@ -507,6 +510,24 @@ static int cxgb4_set_ftid(struct tid_info *t, int fidx, int family, return 0; } +static int cxgb4_set_hpftid(struct tid_info *t, int fidx, int family) +{ + spin_lock_bh(&t->ftid_lock); + + if (test_bit(fidx, t->hpftid_bmap)) { + spin_unlock_bh(&t->ftid_lock); + return -EBUSY; + } + + if (family == PF_INET) + __set_bit(fidx, t->hpftid_bmap); + else + bitmap_allocate_region(t->hpftid_bmap, fidx, 1); + + spin_unlock_bh(&t->ftid_lock); + return 0; +} + static void cxgb4_clear_ftid(struct tid_info *t, int fidx, int family, unsigned int chip_ver) { @@ -522,33 +543,58 @@ static void cxgb4_clear_ftid(struct tid_info *t, int fidx, int family, spin_unlock_bh(&t->ftid_lock); } +static void cxgb4_clear_hpftid(struct tid_info *t, int fidx, int family) +{ + spin_lock_bh(&t->ftid_lock); + + if (family == PF_INET) + __clear_bit(fidx, t->hpftid_bmap); + else + bitmap_release_region(t->hpftid_bmap, fidx, 1); + + spin_unlock_bh(&t->ftid_lock); +} + bool cxgb4_filter_prio_in_range(struct net_device *dev, u32 idx, u32 prio) { + struct filter_entry *prev_fe, *next_fe, *tab; struct adapter *adap = netdev2adap(dev); - struct filter_entry *prev_fe, *next_fe; + u32 prev_ftid, next_ftid, max_tid; struct tid_info *t = &adap->tids; - u32 prev_ftid, next_ftid; + unsigned long *bmap; bool valid = true; + if (idx < t->nhpftids) { + bmap = t->hpftid_bmap; + tab = t->hpftid_tab; + max_tid = t->nhpftids; + } else { + idx -= t->nhpftids; + bmap = t->ftid_bmap; + tab = t->ftid_tab; + max_tid = t->nftids; + } + /* Only insert the rule if both of the following conditions * are met: * 1. The immediate previous rule has priority <= @prio. * 2. The immediate next rule has priority >= @prio. */ spin_lock_bh(&t->ftid_lock); + /* Don't insert if there's a rule already present at @idx. */ - if (test_bit(idx, t->ftid_bmap)) { + if (test_bit(idx, bmap)) { valid = false; goto out_unlock; } - next_ftid = find_next_bit(t->ftid_bmap, t->nftids, idx); - if (next_ftid >= t->nftids) + next_ftid = find_next_bit(bmap, max_tid, idx); + if (next_ftid >= max_tid) next_ftid = idx; - next_fe = &adap->tids.ftid_tab[next_ftid]; + next_fe = &tab[next_ftid]; - prev_ftid = find_last_bit(t->ftid_bmap, idx); + prev_ftid = find_last_bit(bmap, idx); if (prev_ftid >= idx) prev_ftid = idx; @@ -558,13 +604,13 @@ bool cxgb4_filter_prio_in_range(struct net_device *dev, u32 idx, u32 prio) * accordingly. */ if (CHELSIO_CHIP_VERSION(adap->params.chip) < CHELSIO_T6) { - prev_fe = &adap->tids.ftid_tab[prev_ftid & ~0x3]; + prev_fe = &tab[prev_ftid & ~0x3]; if (!prev_fe->fs.type) - prev_fe = &adap->tids.ftid_tab[prev_ftid]; + prev_fe = &tab[prev_ftid]; } else { - prev_fe = &adap->tids.ftid_tab[prev_ftid & ~0x1]; + prev_fe = &tab[prev_ftid & ~0x1]; if (!prev_fe->fs.type) - prev_fe = &adap->tids.ftid_tab[prev_ftid]; + prev_fe = &tab[prev_ftid]; } if ((prev_fe->valid && prio < prev_fe->fs.tc_prio) || @@ -579,11 +625,16 @@ out_unlock: /* Delete the filter at a specified index. */ static int del_filter_wr(struct adapter *adapter, int fidx) { - struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; struct fw_filter_wr *fwr; + struct filter_entry *f; struct sk_buff *skb; unsigned int len; + if (fidx < adapter->tids.nhpftids) + f = &adapter->tids.hpftid_tab[fidx]; + else + f = &adapter->tids.ftid_tab[fidx - adapter->tids.nhpftids]; + len = sizeof(*fwr); skb = alloc_skb(len, GFP_KERNEL); @@ -609,10 +660,15 @@ static int del_filter_wr(struct adapter *adapter, int fidx) */ int set_filter_wr(struct adapter *adapter, int fidx) { - struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; struct fw_filter2_wr *fwr; + struct filter_entry *f; struct sk_buff *skb; + if (fidx < adapter->tids.nhpftids) + f = &adapter->tids.hpftid_tab[fidx]; + else + f = &adapter->tids.ftid_tab[fidx - adapter->tids.nhpftids]; + skb = alloc_skb(sizeof(*fwr), GFP_KERNEL); if (!skb) return -ENOMEM; @@ -762,10 +818,14 @@ int delete_filter(struct adapter *adapter, unsigned int fidx) struct filter_entry *f; int ret; - if (fidx >= adapter->tids.nftids + adapter->tids.nsftids) + if (fidx >= adapter->tids.nftids + adapter->tids.nsftids + + adapter->tids.nhpftids) return -EINVAL; - f = &adapter->tids.ftid_tab[fidx]; + if (fidx < adapter->tids.nhpftids) + f = &adapter->tids.hpftid_tab[fidx]; + else + f = &adapter->tids.ftid_tab[fidx - adapter->tids.nhpftids]; ret = writable_filter(f); if (ret) return ret; @@ -811,12 +871,22 @@ void clear_all_filters(struct adapter *adapter) struct net_device *dev = adapter->port[0]; unsigned int i; + if (adapter->tids.hpftid_tab) { + struct filter_entry *f = &adapter->tids.hpftid_tab[0]; + + for (i = 0; i < adapter->tids.nhpftids; i++, f++) + if (f->valid || f->pending) + cxgb4_del_filter(dev, i, &f->fs); + } + if (adapter->tids.ftid_tab) { struct filter_entry *f = &adapter->tids.ftid_tab[0]; unsigned int max_ftid = adapter->tids.nftids + - adapter->tids.nsftids; + adapter->tids.nsftids + + adapter->tids.nhpftids; + /* Clear all TCAM filters */ - for (i = 0; i < max_ftid; i++, f++) + for (i = adapter->tids.nhpftids; i < max_ftid; i++, f++) if (f->valid || f->pending) cxgb4_del_filter(dev, i, &f->fs); } @@ -1319,17 +1389,17 @@ out_err: * filter specification in order to facilitate signaling completion of the * operation. */ -int __cxgb4_set_filter(struct net_device *dev, int filter_id, +int __cxgb4_set_filter(struct net_device *dev, int ftid, struct ch_filter_specification *fs, struct filter_ctx *ctx) { struct adapter *adapter = netdev2adap(dev); - unsigned int chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip); - unsigned int max_fidx, fidx; - struct filter_entry *f; + unsigned int max_fidx, fidx, chip_ver; + int iq, ret, filter_id = ftid; + struct filter_entry *f, *tab; u32 iconf; - int iq, ret; + chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip); if (fs->hash) { if (is_hashfilter(adapter)) return cxgb4_set_hash_filter(dev, fs, ctx); @@ -1338,7 +1408,7 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, return -EINVAL; } - max_fidx = adapter->tids.nftids; + max_fidx = adapter->tids.nftids + adapter->tids.nhpftids; if (filter_id != (max_fidx + adapter->tids.nsftids - 1) && filter_id >= max_fidx) return -E2BIG; @@ -1353,6 +1423,13 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, if (iq < 0) return iq; + if (fs->prio) { + tab = &adapter->tids.hpftid_tab[0]; + } else { + tab = &adapter->tids.ftid_tab[0]; + filter_id = ftid - adapter->tids.nhpftids; + } + /* IPv6 filters occupy four slots and must be aligned on * four-slot boundaries. IPv4 filters only occupy a single * slot and have no alignment requirements but writing a new @@ -1373,9 +1450,8 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, else fidx = filter_id & ~0x1; - if (fidx != filter_id && - adapter->tids.ftid_tab[fidx].fs.type) { - f = &adapter->tids.ftid_tab[fidx]; + if (fidx != filter_id && tab[fidx].fs.type) { + f = &tab[fidx]; if (f->valid) { dev_err(adapter->pdev_dev, "Invalid location. IPv6 requires 4 slots and is occupying slots %u to %u\n", @@ -1399,7 +1475,7 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, */ for (fidx = filter_id + 1; fidx < filter_id + 4; fidx++) { - f = &adapter->tids.ftid_tab[fidx]; + f = &tab[fidx]; if (f->valid) { dev_err(adapter->pdev_dev, "Invalid location. IPv6 requires 4 slots and an IPv4 filter exists at %u\n", @@ -1415,7 +1491,7 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, return -EINVAL; /* Check overlapping IPv4 filter slot */ fidx = filter_id + 1; - f = &adapter->tids.ftid_tab[fidx]; + f = &tab[fidx]; if (f->valid) { pr_err("%s: IPv6 filter requires 2 indices. IPv4 filter already present at %d. Please remove IPv4 filter first.\n", __func__, fidx); @@ -1427,36 +1503,35 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, /* Check to make sure that provided filter index is not * already in use by someone else */ - f = &adapter->tids.ftid_tab[filter_id]; + f = &tab[filter_id]; if (f->valid) return -EBUSY; - fidx = filter_id + adapter->tids.ftid_base; - ret = cxgb4_set_ftid(&adapter->tids, filter_id, - fs->type ? PF_INET6 : PF_INET, - chip_ver); + if (fs->prio) { + fidx = filter_id + adapter->tids.hpftid_base; + ret = cxgb4_set_hpftid(&adapter->tids, filter_id, + fs->type ? PF_INET6 : PF_INET); + } else { + fidx = filter_id + adapter->tids.ftid_base; + ret = cxgb4_set_ftid(&adapter->tids, filter_id, + fs->type ? PF_INET6 : PF_INET, + chip_ver); + } + if (ret) return ret; /* Check t make sure the filter requested is writable ... */ ret = writable_filter(f); - if (ret) { - /* Clear the bits we have set above */ - cxgb4_clear_ftid(&adapter->tids, filter_id, - fs->type ? PF_INET6 : PF_INET, - chip_ver); - return ret; - } + if (ret) + goto free_tid; if (is_t6(adapter->params.chip) && fs->type && ipv6_addr_type((const struct in6_addr *)fs->val.lip) != IPV6_ADDR_ANY) { ret = cxgb4_clip_get(dev, (const u32 *)&fs->val.lip, 1); - if (ret) { - cxgb4_clear_ftid(&adapter->tids, filter_id, PF_INET6, - chip_ver); - return ret; - } + if (ret) + goto free_tid; } /* Convert the filter specification into our internal format. @@ -1487,7 +1562,7 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, f->fs.mask.vni, 0, 1, 1); if (ret < 0) - goto free_clip; + goto free_tid; f->fs.val.ovlan = ret; f->fs.mask.ovlan = 0x1ff; @@ -1501,21 +1576,22 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, */ f->ctx = ctx; f->tid = fidx; /* Save the actual tid */ - ret = set_filter_wr(adapter, filter_id); - if (ret) { + ret = set_filter_wr(adapter, ftid); + if (ret) + goto free_tid; + + return ret; + +free_tid: + if (f->fs.prio) + cxgb4_clear_hpftid(&adapter->tids, filter_id, + fs->type ? PF_INET6 : PF_INET); + else cxgb4_clear_ftid(&adapter->tids, filter_id, fs->type ? PF_INET6 : PF_INET, chip_ver); - clear_filter(adapter, f); - } - - return ret; -free_clip: - if (is_t6(adapter->params.chip) && f->fs.type) - cxgb4_clip_release(f->dev, (const u32 *)&f->fs.val.lip, 1); - cxgb4_clear_ftid(&adapter->tids, filter_id, - fs->type ? PF_INET6 : PF_INET, chip_ver); + clear_filter(adapter, f); return ret; } @@ -1537,7 +1613,7 @@ static int cxgb4_del_hash_filter(struct net_device *dev, int filter_id, netdev_dbg(dev, "%s: filter_id = %d ; nftids = %d\n", __func__, filter_id, adapter->tids.nftids); - if (filter_id > adapter->tids.ntids) + if (tid_out_of_range(t, filter_id)) return -E2BIG; f = lookup_tid(t, filter_id); @@ -1590,11 +1666,11 @@ int __cxgb4_del_filter(struct net_device *dev, int filter_id, struct filter_ctx *ctx) { struct adapter *adapter = netdev2adap(dev); - unsigned int chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip); + unsigned int max_fidx, chip_ver; struct filter_entry *f; - unsigned int max_fidx; int ret; + chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip); if (fs && fs->hash) { if (is_hashfilter(adapter)) return cxgb4_del_hash_filter(dev, filter_id, ctx); @@ -1603,21 +1679,31 @@ int __cxgb4_del_filter(struct net_device *dev, int filter_id, return -EINVAL; } - max_fidx = adapter->tids.nftids; + max_fidx = adapter->tids.nftids + adapter->tids.nhpftids; if (filter_id != (max_fidx + adapter->tids.nsftids - 1) && filter_id >= max_fidx) return -E2BIG; - f = &adapter->tids.ftid_tab[filter_id]; + if (filter_id < adapter->tids.nhpftids) + f = &adapter->tids.hpftid_tab[filter_id]; + else + f = &adapter->tids.ftid_tab[filter_id - adapter->tids.nhpftids]; + ret = writable_filter(f); if (ret) return ret; if (f->valid) { f->ctx = ctx; - cxgb4_clear_ftid(&adapter->tids, filter_id, - f->fs.type ? PF_INET6 : PF_INET, - chip_ver); + if (f->fs.prio) + cxgb4_clear_hpftid(&adapter->tids, + f->tid - adapter->tids.hpftid_base, + f->fs.type ? PF_INET6 : PF_INET); + else + cxgb4_clear_ftid(&adapter->tids, + f->tid - adapter->tids.ftid_base, + f->fs.type ? PF_INET6 : PF_INET, + chip_ver); return del_filter_wr(adapter, filter_id); } @@ -1842,11 +1928,18 @@ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) max_fidx = adap->tids.nftids + adap->tids.nsftids; /* Get the corresponding filter entry for this tid */ if (adap->tids.ftid_tab) { - /* Check this in normal filter region */ - idx = tid - adap->tids.ftid_base; - if (idx >= max_fidx) - return; - f = &adap->tids.ftid_tab[idx]; + idx = tid - adap->tids.hpftid_base; + if (idx < adap->tids.nhpftids) { + f = &adap->tids.hpftid_tab[idx]; + } else { + /* Check this in normal filter region */ + idx = tid - adap->tids.ftid_base; + if (idx >= max_fidx) + return; + f = &adap->tids.ftid_tab[idx]; + idx += adap->tids.nhpftids; + } + if (f->tid != tid) return; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 12ff69b3ba91..1930e39f195e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -804,6 +804,26 @@ static int setup_ppod_edram(struct adapter *adap) return 0; } +static void adap_config_hpfilter(struct adapter *adapter) +{ + u32 param, val = 0; + int ret; + + /* Enable HP filter region. Older fw will fail this request and + * it is fine. + */ + param = FW_PARAM_DEV(HPFILTER_REGION_SUPPORT); + ret = t4_set_params(adapter, adapter->mbox, adapter->pf, 0, + 1, ¶m, &val); + + /* An error means FW doesn't know about HP filter support, + * it's not a problem, don't return an error. + */ + if (ret < 0) + dev_err(adapter->pdev_dev, + "HP filter region isn't supported by FW\n"); +} + /** * cxgb4_write_rss - write the RSS table for a given port * @pi: the port @@ -1427,8 +1447,8 @@ static void mk_tid_release(struct sk_buff *skb, unsigned int chan, static void cxgb4_queue_tid_release(struct tid_info *t, unsigned int chan, unsigned int tid) { - void **p = &t->tid_tab[tid]; struct adapter *adap = container_of(t, struct adapter, tids); + void **p = &t->tid_tab[tid - t->tid_base]; spin_lock_bh(&adap->tid_release_lock); *p = adap->tid_release_head; @@ -1480,13 +1500,13 @@ static void process_tid_release_list(struct work_struct *work) void cxgb4_remove_tid(struct tid_info *t, unsigned int chan, unsigned int tid, unsigned short family) { - struct sk_buff *skb; struct adapter *adap = container_of(t, struct adapter, tids); + struct sk_buff *skb; - WARN_ON(tid >= t->ntids); + WARN_ON(tid_out_of_range(&adap->tids, tid)); - if (t->tid_tab[tid]) { - t->tid_tab[tid] = NULL; + if (t->tid_tab[tid - adap->tids.tid_base]) { + t->tid_tab[tid - adap->tids.tid_base] = NULL; atomic_dec(&t->conns_in_use); if (t->hash_base && (tid >= t->hash_base)) { if (family == AF_INET6) @@ -1518,6 +1538,7 @@ static int tid_init(struct tid_info *t) struct adapter *adap = container_of(t, struct adapter, tids); unsigned int max_ftids = t->nftids + t->nsftids; unsigned int natids = t->natids; + unsigned int hpftid_bmap_size; unsigned int eotid_bmap_size; unsigned int stid_bmap_size; unsigned int ftid_bmap_size; @@ -1525,12 +1546,15 @@ static int tid_init(struct tid_info *t) stid_bmap_size = BITS_TO_LONGS(t->nstids + t->nsftids); ftid_bmap_size = BITS_TO_LONGS(t->nftids); + hpftid_bmap_size = BITS_TO_LONGS(t->nhpftids); eotid_bmap_size = BITS_TO_LONGS(t->neotids); size = t->ntids * sizeof(*t->tid_tab) + natids * sizeof(*t->atid_tab) + t->nstids * sizeof(*t->stid_tab) + t->nsftids * sizeof(*t->stid_tab) + stid_bmap_size * sizeof(long) + + t->nhpftids * sizeof(*t->hpftid_tab) + + hpftid_bmap_size * sizeof(long) + max_ftids * sizeof(*t->ftid_tab) + ftid_bmap_size * sizeof(long) + t->neotids * sizeof(*t->eotid_tab) + @@ -1543,7 +1567,9 @@ static int tid_init(struct tid_info *t) t->atid_tab = (union aopen_entry *)&t->tid_tab[t->ntids]; t->stid_tab = (struct serv_entry *)&t->atid_tab[natids]; t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids + t->nsftids]; - t->ftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size]; + t->hpftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size]; + t->hpftid_bmap = (unsigned long *)&t->hpftid_tab[t->nhpftids]; + t->ftid_tab = (struct filter_entry *)&t->hpftid_bmap[hpftid_bmap_size]; t->ftid_bmap = (unsigned long *)&t->ftid_tab[max_ftids]; t->eotid_tab = (struct eotid_entry *)&t->ftid_bmap[ftid_bmap_size]; t->eotid_bmap = (unsigned long *)&t->eotid_tab[t->neotids]; @@ -1578,6 +1604,8 @@ static int tid_init(struct tid_info *t) bitmap_zero(t->eotid_bmap, t->neotids); } + if (t->nhpftids) + bitmap_zero(t->hpftid_bmap, t->nhpftids); bitmap_zero(t->ftid_bmap, t->nftids); return 0; } @@ -4351,6 +4379,7 @@ static int adap_init0_config(struct adapter *adapter, int reset) "HMA configuration failed with error %d\n", ret); if (is_t6(adapter->params.chip)) { + adap_config_hpfilter(adapter); ret = setup_ppod_edram(adapter); if (!ret) dev_info(adapter->pdev_dev, "Successfully enabled " @@ -4660,16 +4689,6 @@ static int adap_init0(struct adapter *adap, int vpd_skip) /* * Grab some of our basic fundamental operating parameters. */ -#define FW_PARAM_DEV(param) \ - (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) | \ - FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_##param)) - -#define FW_PARAM_PFVF(param) \ - FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) | \ - FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_##param)| \ - FW_PARAMS_PARAM_Y_V(0) | \ - FW_PARAMS_PARAM_Z_V(0) - params[0] = FW_PARAM_PFVF(EQ_START); params[1] = FW_PARAM_PFVF(L2T_START); params[2] = FW_PARAM_PFVF(L2T_END); @@ -4687,6 +4706,16 @@ static int adap_init0(struct adapter *adap, int vpd_skip) adap->sge.ingr_start = val[5]; if (CHELSIO_CHIP_VERSION(adap->params.chip) > CHELSIO_T5) { + params[0] = FW_PARAM_PFVF(HPFILTER_START); + params[1] = FW_PARAM_PFVF(HPFILTER_END); + ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 2, + params, val); + if (ret < 0) + goto bye; + + adap->tids.hpftid_base = val[0]; + adap->tids.nhpftids = val[1] - val[0] + 1; + /* Read the raw mps entries. In T6, the last 2 tcam entries * are reserved for raw mac addresses (rawf = 2, one per port). */ @@ -4698,6 +4727,9 @@ static int adap_init0(struct adapter *adap, int vpd_skip) adap->rawf_start = val[0]; adap->rawf_cnt = val[1] - val[0] + 1; } + + adap->tids.tid_base = + t4_read_reg(adap, LE_DB_ACTIVE_TABLE_START_INDEX_A); } /* qids (ingress/egress) returned from firmware can be anywhere @@ -5050,8 +5082,6 @@ static int adap_init0(struct adapter *adap, int vpd_skip) } adap->params.crypto = ntohs(caps_cmd.cryptocaps); } -#undef FW_PARAM_PFVF -#undef FW_PARAM_DEV /* The MTU/MSS Table is initialized by now, so load their values. If * we're initializing the adapter, then we'll make any modifications diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index 0fa80bef575d..bb5513bdd293 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -672,10 +672,14 @@ int cxgb4_tc_flower_replace(struct net_device *dev, * 0 to driver. However, the hardware TCAM index * starts from 0. Hence, the -1 here. */ - if (cls->common.prio <= adap->tids.nftids) + if (cls->common.prio <= (adap->tids.nftids + + adap->tids.nhpftids)) { fidx = cls->common.prio - 1; - else + if (fidx < adap->tids.nhpftids) + fs->prio = 1; + } else { fidx = cxgb4_get_free_ftid(dev, inet_family); + } /* Only insert FLOWER rule if its priority doesn't * conflict with existing rules in the LETCAM. diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c index 102b370fbd3e..24c3c2dc7171 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c @@ -137,7 +137,7 @@ static int cxgb4_matchall_alloc_filter(struct net_device *dev, * -1 here. 1 slot is enough to create a wildcard matchall * VIID rule. */ - if (cls->common.prio <= adap->tids.nftids) + if (cls->common.prio <= (adap->tids.nftids + adap->tids.nhpftids)) fidx = cls->common.prio - 1; else fidx = cxgb4_get_free_ftid(dev, PF_INET); @@ -156,6 +156,8 @@ static int cxgb4_matchall_alloc_filter(struct net_device *dev, fs = &tc_port_matchall->ingress.fs; memset(fs, 0, sizeof(*fs)); + if (fidx < adap->tids.nhpftids) + fs->prio = 1; fs->tc_prio = cls->common.prio; fs->tc_cookie = cls->cookie; fs->hitcnts = 1; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c index 133f8623ba86..269b8d9e25e0 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c @@ -176,7 +176,7 @@ int cxgb4_config_knode(struct net_device *dev, struct tc_cls_u32_offload *cls) /* Only insert U32 rule if its priority doesn't conflict with * existing rules in the LETCAM. */ - if (filter_id >= adapter->tids.nftids || + if (filter_id >= adapter->tids.nftids + adapter->tids.nhpftids || !cxgb4_filter_prio_in_range(dev, filter_id, cls->common.prio)) { NL_SET_ERR_MSG_MOD(extack, "No free LETCAM index available"); @@ -199,6 +199,8 @@ int cxgb4_config_knode(struct net_device *dev, struct tc_cls_u32_offload *cls) memset(&fs, 0, sizeof(fs)); + if (filter_id < adapter->tids.nhpftids) + fs.prio = 1; fs.tc_prio = cls->common.prio; fs.tc_cookie = cls->knode.handle; @@ -355,6 +357,7 @@ int cxgb4_delete_knode(struct net_device *dev, struct tc_cls_u32_offload *cls) unsigned int filter_id, max_tids, i, j; struct cxgb4_link *link = NULL; struct cxgb4_tc_u32_table *t; + struct filter_entry *f; u32 handle, uhtid; int ret; @@ -363,8 +366,15 @@ int cxgb4_delete_knode(struct net_device *dev, struct tc_cls_u32_offload *cls) /* Fetch the location to delete the filter. */ filter_id = TC_U32_NODE(cls->knode.handle) - 1; - if (filter_id >= adapter->tids.nftids || - cls->knode.handle != adapter->tids.ftid_tab[filter_id].fs.tc_cookie) + if (filter_id >= adapter->tids.nftids + adapter->tids.nhpftids) + return -ERANGE; + + if (filter_id < adapter->tids.nhpftids) + f = &adapter->tids.hpftid_tab[filter_id]; + else + f = &adapter->tids.ftid_tab[filter_id - adapter->tids.nhpftids]; + + if (cls->knode.handle != f->fs.tc_cookie) return -ERANGE; t = adapter->tc_u32; @@ -445,7 +455,7 @@ void cxgb4_cleanup_tc_u32(struct adapter *adap) struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap) { - unsigned int max_tids = adap->tids.nftids; + unsigned int max_tids = adap->tids.nftids + adap->tids.nhpftids; struct cxgb4_tc_u32_table *t; unsigned int i; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 861b25d28ed6..d9d27bc1ae67 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -99,6 +99,7 @@ struct eotid_entry { */ struct tid_info { void **tid_tab; + unsigned int tid_base; unsigned int ntids; struct serv_entry *stid_tab; @@ -111,6 +112,11 @@ struct tid_info { unsigned int natids; unsigned int atid_base; + struct filter_entry *hpftid_tab; + unsigned long *hpftid_bmap; + unsigned int nhpftids; + unsigned int hpftid_base; + struct filter_entry *ftid_tab; unsigned long *ftid_bmap; unsigned int nftids; @@ -147,9 +153,15 @@ struct tid_info { static inline void *lookup_tid(const struct tid_info *t, unsigned int tid) { + tid -= t->tid_base; return tid < t->ntids ? t->tid_tab[tid] : NULL; } +static inline bool tid_out_of_range(const struct tid_info *t, unsigned int tid) +{ + return ((tid - t->tid_base) >= t->ntids); +} + static inline void *lookup_atid(const struct tid_info *t, unsigned int atid) { return atid < t->natids ? t->atid_tab[atid].data : NULL; @@ -171,7 +183,7 @@ static inline void *lookup_stid(const struct tid_info *t, unsigned int stid) static inline void cxgb4_insert_tid(struct tid_info *t, void *data, unsigned int tid, unsigned short family) { - t->tid_tab[tid] = data; + t->tid_tab[tid - t->tid_base] = data; if (t->hash_base && (tid >= t->hash_base)) { if (family == AF_INET6) atomic_add(2, &t->hash_tids_in_use); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index ac4fb43bdec6..accad1101ad1 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -1321,6 +1321,7 @@ enum fw_params_param_dev { FW_PARAMS_PARAM_DEV_RDMA_WRITE_WITH_IMM = 0x21, FW_PARAMS_PARAM_DEV_PPOD_EDRAM = 0x23, FW_PARAMS_PARAM_DEV_RI_WRITE_CMPL_WR = 0x24, + FW_PARAMS_PARAM_DEV_HPFILTER_REGION_SUPPORT = 0x26, FW_PARAMS_PARAM_DEV_OPAQUE_VIID_SMT_EXTN = 0x27, FW_PARAMS_PARAM_DEV_HASHFILTER_WITH_OFLD = 0x28, FW_PARAMS_PARAM_DEV_DBQ_TIMER = 0x29, diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c index c9aebcde403a..33ace3307059 100644 --- a/drivers/net/ethernet/cirrus/cs89x0.c +++ b/drivers/net/ethernet/cirrus/cs89x0.c @@ -1128,7 +1128,7 @@ net_get_stats(struct net_device *dev) return &dev->stats; } -static void net_timeout(struct net_device *dev) +static void net_timeout(struct net_device *dev, unsigned int txqueue) { /* If we get here, some higher level has decided we are broken. There should really be a "kick me" function call instead. */ diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index acb2856936d2..bbd7b3175f09 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1095,7 +1095,7 @@ static void enic_set_rx_mode(struct net_device *netdev) } /* netif_tx_lock held, BHs disabled */ -static void enic_tx_timeout(struct net_device *netdev) +static void enic_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct enic *enic = netdev_priv(netdev); schedule_work(&enic->tx_hang_reset); diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index a8f4c69252ff..de0b6e066eef 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -1296,7 +1296,7 @@ out_drop: return NETDEV_TX_OK; } -static void gmac_tx_timeout(struct net_device *netdev) +static void gmac_tx_timeout(struct net_device *netdev, unsigned int txqueue) { netdev_err(netdev, "Tx timeout\n"); gmac_dump_dma_state(netdev); diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index cce90b5925d9..1ea3372775e6 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -964,7 +964,7 @@ dm9000_init_dm9000(struct net_device *dev) } /* Our watchdog timed out. Called by the networking layer */ -static void dm9000_timeout(struct net_device *dev) +static void dm9000_timeout(struct net_device *dev, unsigned int txqueue) { struct board_info *db = netdev_priv(dev); u8 reg_save; diff --git a/drivers/net/ethernet/dec/tulip/de2104x.c b/drivers/net/ethernet/dec/tulip/de2104x.c index f1a2da15dd0a..fd3c2abf74b5 100644 --- a/drivers/net/ethernet/dec/tulip/de2104x.c +++ b/drivers/net/ethernet/dec/tulip/de2104x.c @@ -1436,7 +1436,7 @@ static int de_close (struct net_device *dev) return 0; } -static void de_tx_timeout (struct net_device *dev) +static void de_tx_timeout (struct net_device *dev, unsigned int txqueue) { struct de_private *de = netdev_priv(dev); const int irq = de->pdev->irq; diff --git a/drivers/net/ethernet/dec/tulip/dmfe.c b/drivers/net/ethernet/dec/tulip/dmfe.c index 0efdbd1a4a6f..32d470d4122a 100644 --- a/drivers/net/ethernet/dec/tulip/dmfe.c +++ b/drivers/net/ethernet/dec/tulip/dmfe.c @@ -2214,15 +2214,16 @@ static int __init dmfe_init_module(void) if (cr6set) dmfe_cr6_user_set = cr6set; - switch(mode) { - case DMFE_10MHF: + switch (mode) { + case DMFE_10MHF: case DMFE_100MHF: case DMFE_10MFD: case DMFE_100MFD: case DMFE_1M_HPNA: dmfe_media_mode = mode; break; - default:dmfe_media_mode = DMFE_AUTO; + default: + dmfe_media_mode = DMFE_AUTO; break; } diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c index 3e3e08698876..9e9d9eee29d9 100644 --- a/drivers/net/ethernet/dec/tulip/tulip_core.c +++ b/drivers/net/ethernet/dec/tulip/tulip_core.c @@ -255,7 +255,7 @@ MODULE_DEVICE_TABLE(pci, tulip_pci_tbl); const char tulip_media_cap[32] = {0,0,0,16, 3,19,16,24, 27,4,7,5, 0,20,23,20, 28,31,0,0, }; -static void tulip_tx_timeout(struct net_device *dev); +static void tulip_tx_timeout(struct net_device *dev, unsigned int txqueue); static void tulip_init_ring(struct net_device *dev); static void tulip_free_ring(struct net_device *dev); static netdev_tx_t tulip_start_xmit(struct sk_buff *skb, @@ -534,7 +534,7 @@ free_ring: } -static void tulip_tx_timeout(struct net_device *dev) +static void tulip_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct tulip_private *tp = netdev_priv(dev); void __iomem *ioaddr = tp->base_addr; diff --git a/drivers/net/ethernet/dec/tulip/uli526x.c b/drivers/net/ethernet/dec/tulip/uli526x.c index b1f30b194300..117ffe08800d 100644 --- a/drivers/net/ethernet/dec/tulip/uli526x.c +++ b/drivers/net/ethernet/dec/tulip/uli526x.c @@ -1809,8 +1809,8 @@ static int __init uli526x_init_module(void) if (cr6set) uli526x_cr6_user_set = cr6set; - switch (mode) { - case ULI526X_10MHF: + switch (mode) { + case ULI526X_10MHF: case ULI526X_100MHF: case ULI526X_10MFD: case ULI526X_100MFD: diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c index 70cb2d689c2c..7f136488e67c 100644 --- a/drivers/net/ethernet/dec/tulip/winbond-840.c +++ b/drivers/net/ethernet/dec/tulip/winbond-840.c @@ -331,7 +331,7 @@ static void netdev_timer(struct timer_list *t); static void init_rxtx_rings(struct net_device *dev); static void free_rxtx_rings(struct netdev_private *np); static void init_registers(struct net_device *dev); -static void tx_timeout(struct net_device *dev); +static void tx_timeout(struct net_device *dev, unsigned int txqueue); static int alloc_ringdesc(struct net_device *dev); static void free_ringdesc(struct netdev_private *np); static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev); @@ -921,7 +921,7 @@ static void init_registers(struct net_device *dev) iowrite32(0, ioaddr + RxStartDemand); } -static void tx_timeout(struct net_device *dev) +static void tx_timeout(struct net_device *dev, unsigned int txqueue) { struct netdev_private *np = netdev_priv(dev); void __iomem *ioaddr = np->base_addr; diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index 55e720d2ea0c..26c5da032b1e 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -66,7 +66,7 @@ static const int multicast_filter_limit = 0x40; static int rio_open (struct net_device *dev); static void rio_timer (struct timer_list *t); -static void rio_tx_timeout (struct net_device *dev); +static void rio_tx_timeout (struct net_device *dev, unsigned int txqueue); static netdev_tx_t start_xmit (struct sk_buff *skb, struct net_device *dev); static irqreturn_t rio_interrupt (int irq, void *dev_instance); static void rio_free_tx (struct net_device *dev, int irq); @@ -696,7 +696,7 @@ rio_timer (struct timer_list *t) } static void -rio_tx_timeout (struct net_device *dev) +rio_tx_timeout (struct net_device *dev, unsigned int txqueue) { struct netdev_private *np = netdev_priv(dev); void __iomem *ioaddr = np->ioaddr; diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c index 4a37a69764ce..b91387c456ba 100644 --- a/drivers/net/ethernet/dlink/sundance.c +++ b/drivers/net/ethernet/dlink/sundance.c @@ -432,7 +432,7 @@ static int mdio_wait_link(struct net_device *dev, int wait); static int netdev_open(struct net_device *dev); static void check_duplex(struct net_device *dev); static void netdev_timer(struct timer_list *t); -static void tx_timeout(struct net_device *dev); +static void tx_timeout(struct net_device *dev, unsigned int txqueue); static void init_ring(struct net_device *dev); static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev); static int reset_tx (struct net_device *dev); @@ -969,7 +969,7 @@ static void netdev_timer(struct timer_list *t) add_timer(&np->timer); } -static void tx_timeout(struct net_device *dev) +static void tx_timeout(struct net_device *dev, unsigned int txqueue) { struct netdev_private *np = netdev_priv(dev); void __iomem *ioaddr = np->base; diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 39eb7d525043..56f59db6ebf2 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1417,7 +1417,7 @@ drop: return NETDEV_TX_OK; } -static void be_tx_timeout(struct net_device *netdev) +static void be_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct be_adapter *adapter = netdev_priv(netdev); struct device *dev = &adapter->pdev->dev; diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c index ea4f17f5cce7..66406da16b60 100644 --- a/drivers/net/ethernet/ethoc.c +++ b/drivers/net/ethernet/ethoc.c @@ -869,7 +869,7 @@ static int ethoc_change_mtu(struct net_device *dev, int new_mtu) return -ENOSYS; } -static void ethoc_tx_timeout(struct net_device *dev) +static void ethoc_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct ethoc *priv = netdev_priv(dev); u32 pending = ethoc_read(priv, INT_SOURCE); diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 8ed85037f021..48b3b72fe02e 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1545,7 +1545,7 @@ static int ftgmac100_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int return phy_mii_ioctl(netdev->phydev, ifr, cmd); } -static void ftgmac100_tx_timeout(struct net_device *netdev) +static void ftgmac100_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct ftgmac100 *priv = netdev_priv(netdev); diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c index c24fd56a2c71..84f10970299a 100644 --- a/drivers/net/ethernet/fealnx.c +++ b/drivers/net/ethernet/fealnx.c @@ -428,7 +428,7 @@ static void getlinktype(struct net_device *dev); static void getlinkstatus(struct net_device *dev); static void netdev_timer(struct timer_list *t); static void reset_timer(struct timer_list *t); -static void fealnx_tx_timeout(struct net_device *dev); +static void fealnx_tx_timeout(struct net_device *dev, unsigned int txqueue); static void init_ring(struct net_device *dev); static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev); static irqreturn_t intr_handler(int irq, void *dev_instance); @@ -1191,7 +1191,7 @@ static void reset_timer(struct timer_list *t) } -static void fealnx_tx_timeout(struct net_device *dev) +static void fealnx_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct netdev_private *np = netdev_priv(dev); void __iomem *ioaddr = np->mem; diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 6a9d12dad5d9..a60fc3cfc06e 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -288,7 +288,7 @@ static int dpaa_stop(struct net_device *net_dev) return err; } -static void dpaa_tx_timeout(struct net_device *net_dev) +static void dpaa_tx_timeout(struct net_device *net_dev, unsigned int txqueue) { struct dpaa_percpu_priv *percpu_priv; const struct dpaa_priv *priv; diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c index a9503aea527f..a0061e929908 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c @@ -27,6 +27,20 @@ static int dpaa2_ptp_enable(struct ptp_clock_info *ptp, mc_dev = to_fsl_mc_device(dev); switch (rq->type) { + case PTP_CLK_REQ_EXTTS: + switch (rq->extts.index) { + case 0: + bit = DPRTC_EVENT_ETS1; + break; + case 1: + bit = DPRTC_EVENT_ETS2; + break; + default: + return -EINVAL; + } + if (on) + extts_clean_up(ptp_qoriq, rq->extts.index, false); + break; case PTP_CLK_REQ_PPS: bit = DPRTC_EVENT_PPS; break; @@ -96,6 +110,12 @@ static irqreturn_t dpaa2_ptp_irq_handler_thread(int irq, void *priv) ptp_clock_event(ptp_qoriq->clock, &event); } + if (status & DPRTC_EVENT_ETS1) + extts_clean_up(ptp_qoriq, 0, true); + + if (status & DPRTC_EVENT_ETS2) + extts_clean_up(ptp_qoriq, 1, true); + err = dprtc_clear_irq_status(mc_dev->mc_io, 0, mc_dev->mc_handle, DPRTC_IRQ_INDEX, status); if (unlikely(err)) { diff --git a/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h index 4ac05bfef338..96ffeb948f08 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h +++ b/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h @@ -9,9 +9,11 @@ /* Command versioning */ #define DPRTC_CMD_BASE_VERSION 1 +#define DPRTC_CMD_VERSION_2 2 #define DPRTC_CMD_ID_OFFSET 4 #define DPRTC_CMD(id) (((id) << DPRTC_CMD_ID_OFFSET) | DPRTC_CMD_BASE_VERSION) +#define DPRTC_CMD_V2(id) (((id) << DPRTC_CMD_ID_OFFSET) | DPRTC_CMD_VERSION_2) /* Command IDs */ #define DPRTC_CMDID_CLOSE DPRTC_CMD(0x800) @@ -19,7 +21,7 @@ #define DPRTC_CMDID_SET_IRQ_ENABLE DPRTC_CMD(0x012) #define DPRTC_CMDID_GET_IRQ_ENABLE DPRTC_CMD(0x013) -#define DPRTC_CMDID_SET_IRQ_MASK DPRTC_CMD(0x014) +#define DPRTC_CMDID_SET_IRQ_MASK DPRTC_CMD_V2(0x014) #define DPRTC_CMDID_GET_IRQ_MASK DPRTC_CMD(0x015) #define DPRTC_CMDID_GET_IRQ_STATUS DPRTC_CMD(0x016) #define DPRTC_CMDID_CLEAR_IRQ_STATUS DPRTC_CMD(0x017) diff --git a/drivers/net/ethernet/freescale/dpaa2/dprtc.h b/drivers/net/ethernet/freescale/dpaa2/dprtc.h index 311c184e1aef..05c413719e55 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dprtc.h +++ b/drivers/net/ethernet/freescale/dpaa2/dprtc.h @@ -20,6 +20,8 @@ struct fsl_mc_io; #define DPRTC_IRQ_INDEX 0 #define DPRTC_EVENT_PPS 0x08000000 +#define DPRTC_EVENT_ETS1 0x00800000 +#define DPRTC_EVENT_ETS2 0x00400000 int dprtc_open(struct fsl_mc_io *mc_io, u32 cmd_flags, diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 17739906c966..2ee4a2cd4780 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -227,6 +227,8 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb, enetc_bdr_idx_inc(tx_ring, &i); tx_ring->next_to_use = i; + skb_tx_timestamp(skb); + /* let H/W know BD ring has been updated */ enetc_wr_reg(tx_ring->tpir, i); /* includes wmb() */ diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index 880a8ed8bb47..301ee0dde02d 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -579,6 +579,7 @@ static int enetc_get_ts_info(struct net_device *ndev, (1 << HWTSTAMP_FILTER_ALL); #else info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_TX_SOFTWARE | SOF_TIMESTAMPING_SOFTWARE; #endif return 0; diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index 2e99438cb1bf..9190ffc9f6b2 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -192,7 +192,6 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data) u32 hi_credit_bit, hi_credit_reg; u32 max_interference_size; u32 port_frame_max_size; - u32 tc_max_sized_frame; u8 tc = cbs->queue; u8 prio_top, prio_next; int bw_sum = 0; @@ -250,7 +249,7 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data) return -EINVAL; } - tc_max_sized_frame = enetc_port_rd(&si->hw, ENETC_PTCMSDUR(tc)); + enetc_port_rd(&si->hw, ENETC_PTCMSDUR(tc)); /* For top prio TC, the max_interfrence_size is maxSizedFrame. * diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 05c1899f6628..798fed37be46 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1141,7 +1141,7 @@ fec_stop(struct net_device *ndev) static void -fec_timeout(struct net_device *ndev) +fec_timeout(struct net_device *ndev, unsigned int txqueue) { struct fec_enet_private *fep = netdev_priv(ndev); diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c index 30cdb246d020..de5278485062 100644 --- a/drivers/net/ethernet/freescale/fec_mpc52xx.c +++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c @@ -84,7 +84,7 @@ static int debug = -1; /* the above default */ module_param(debug, int, 0); MODULE_PARM_DESC(debug, "debugging messages level"); -static void mpc52xx_fec_tx_timeout(struct net_device *dev) +static void mpc52xx_fec_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct mpc52xx_fec_priv *priv = netdev_priv(dev); unsigned long flags; diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index 3981c06f082f..80903cd58468 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -641,7 +641,7 @@ static void fs_timeout_work(struct work_struct *work) netif_wake_queue(dev); } -static void fs_timeout(struct net_device *dev) +static void fs_timeout(struct net_device *dev, unsigned int txqueue) { struct fs_enet_private *fep = netdev_priv(dev); diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 72868a28b621..b636d83a7ee9 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -2093,7 +2093,7 @@ static void gfar_reset_task(struct work_struct *work) reset_gfar(priv->ndev); } -static void gfar_timeout(struct net_device *dev) +static void gfar_timeout(struct net_device *dev, unsigned int txqueue) { struct gfar_private *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index f839fa94ebdd..0d101c00286f 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -3545,7 +3545,7 @@ static void ucc_geth_timeout_work(struct work_struct *work) * ucc_geth_timeout gets called when a packet has not been * transmitted after a set amount of time. */ -static void ucc_geth_timeout(struct net_device *dev) +static void ucc_geth_timeout(struct net_device *dev, unsigned int txqueue) { struct ucc_geth_private *ugeth = netdev_priv(dev); diff --git a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c index 1eca0fdb9933..a7b7a4aace79 100644 --- a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c +++ b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c @@ -93,7 +93,7 @@ static irqreturn_t fjn_interrupt(int irq, void *dev_id); static void fjn_rx(struct net_device *dev); static void fjn_reset(struct net_device *dev); static void set_rx_mode(struct net_device *dev); -static void fjn_tx_timeout(struct net_device *dev); +static void fjn_tx_timeout(struct net_device *dev, unsigned int txqueue); static const struct ethtool_ops netdev_ethtool_ops; /* @@ -774,7 +774,7 @@ static irqreturn_t fjn_interrupt(int dummy, void *dev_id) /*====================================================================*/ -static void fjn_tx_timeout(struct net_device *dev) +static void fjn_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct local_info *lp = netdev_priv(dev); unsigned int ioaddr = dev->base_addr; diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 9b7a8db9860f..e032563ceefd 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -845,7 +845,7 @@ static void gve_turnup(struct gve_priv *priv) gve_set_napi_enabled(priv); } -static void gve_tx_timeout(struct net_device *dev) +static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct gve_priv *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index 3e9b6d543c77..dc8dd5fc1559 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -779,7 +779,7 @@ static int hip04_mac_stop(struct net_device *ndev) return 0; } -static void hip04_timeout(struct net_device *ndev) +static void hip04_timeout(struct net_device *ndev, unsigned int txqueue) { struct hip04_priv *priv = netdev_priv(ndev); diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c index 247de9105d10..4fb776920a93 100644 --- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c +++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c @@ -893,7 +893,7 @@ static void hix5hd2_tx_timeout_task(struct work_struct *work) hix5hd2_net_open(priv->netdev); } -static void hix5hd2_net_timeout(struct net_device *dev) +static void hix5hd2_net_timeout(struct net_device *dev, unsigned int txqueue) { struct hix5hd2_priv *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 14ab20491fd0..e45553ec114a 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -1485,7 +1485,7 @@ static int hns_nic_net_stop(struct net_device *ndev) static void hns_tx_timeout_reset(struct hns_nic_priv *priv); #define HNS_TX_TIMEO_LIMIT (40 * HZ) -static void hns_nic_net_timeout(struct net_device *ndev) +static void hns_nic_net_timeout(struct net_device *ndev, unsigned int txqueue) { struct hns_nic_priv *priv = netdev_priv(ndev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 69545dd6c938..aee5facc89b5 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -1556,6 +1556,37 @@ static int hns3_nic_set_features(struct net_device *netdev, return 0; } +static netdev_features_t hns3_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ +#define HNS3_MAX_HDR_LEN 480U +#define HNS3_MAX_L4_HDR_LEN 60U + + size_t len; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return features; + + if (skb->encapsulation) + len = skb_inner_transport_header(skb) - skb->data; + else + len = skb_transport_header(skb) - skb->data; + + /* Assume L4 is 60 byte as TCP is the only protocol with a + * a flexible value, and it's max len is 60 bytes. + */ + len += HNS3_MAX_L4_HDR_LEN; + + /* Hardware only supports checksum on the skb with a max header + * len of 480 bytes. + */ + if (len > HNS3_MAX_HDR_LEN) + features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); + + return features; +} + static void hns3_nic_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) { @@ -1869,7 +1900,7 @@ static bool hns3_get_tx_timeo_queue_info(struct net_device *ndev) return true; } -static void hns3_nic_net_timeout(struct net_device *ndev) +static void hns3_nic_net_timeout(struct net_device *ndev, unsigned int txqueue) { struct hns3_nic_priv *priv = netdev_priv(ndev); struct hnae3_handle *h = priv->ae_handle; @@ -1970,6 +2001,7 @@ static const struct net_device_ops hns3_nic_netdev_ops = { .ndo_do_ioctl = hns3_nic_do_ioctl, .ndo_change_mtu = hns3_nic_change_mtu, .ndo_set_features = hns3_nic_set_features, + .ndo_features_check = hns3_features_check, .ndo_get_stats64 = hns3_nic_get_stats64, .ndo_setup_tc = hns3_nic_setup_tc, .ndo_set_rx_mode = hns3_nic_set_rx_mode, @@ -2788,7 +2820,6 @@ static bool hns3_parse_vlan_tag(struct hns3_enet_ring *ring, static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length, unsigned char *va) { -#define HNS3_NEED_ADD_FRAG 1 struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_clean]; struct net_device *netdev = ring_to_netdev(ring); struct sk_buff *skb; @@ -2832,33 +2863,19 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length, desc_cb); ring_ptr_move_fw(ring, next_to_clean); - return HNS3_NEED_ADD_FRAG; + return 0; } -static int hns3_add_frag(struct hns3_enet_ring *ring, struct hns3_desc *desc, - bool pending) +static int hns3_add_frag(struct hns3_enet_ring *ring) { struct sk_buff *skb = ring->skb; struct sk_buff *head_skb = skb; struct sk_buff *new_skb; struct hns3_desc_cb *desc_cb; - struct hns3_desc *pre_desc; + struct hns3_desc *desc; u32 bd_base_info; - int pre_bd; - /* if there is pending bd, the SW param next_to_clean has moved - * to next and the next is NULL - */ - if (pending) { - pre_bd = (ring->next_to_clean - 1 + ring->desc_num) % - ring->desc_num; - pre_desc = &ring->desc[pre_bd]; - bd_base_info = le32_to_cpu(pre_desc->rx.bd_base_info); - } else { - bd_base_info = le32_to_cpu(desc->rx.bd_base_info); - } - - while (!(bd_base_info & BIT(HNS3_RXD_FE_B))) { + do { desc = &ring->desc[ring->next_to_clean]; desc_cb = &ring->desc_cb[ring->next_to_clean]; bd_base_info = le32_to_cpu(desc->rx.bd_base_info); @@ -2895,7 +2912,7 @@ static int hns3_add_frag(struct hns3_enet_ring *ring, struct hns3_desc *desc, hns3_nic_reuse_page(skb, ring->frag_num++, ring, 0, desc_cb); ring_ptr_move_fw(ring, next_to_clean); ring->pending_buf++; - } + } while (!(bd_base_info & BIT(HNS3_RXD_FE_B))); return 0; } @@ -3063,28 +3080,23 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring) if (ret < 0) /* alloc buffer fail */ return ret; - if (ret > 0) { /* need add frag */ - ret = hns3_add_frag(ring, desc, false); + if (!(bd_base_info & BIT(HNS3_RXD_FE_B))) { /* need add frag */ + ret = hns3_add_frag(ring); if (ret) return ret; - - /* As the head data may be changed when GRO enable, copy - * the head data in after other data rx completed - */ - memcpy(skb->data, ring->va, - ALIGN(ring->pull_len, sizeof(long))); } } else { - ret = hns3_add_frag(ring, desc, true); + ret = hns3_add_frag(ring); if (ret) return ret; + } - /* As the head data may be changed when GRO enable, copy - * the head data in after other data rx completed - */ + /* As the head data may be changed when GRO enable, copy + * the head data in after other data rx completed + */ + if (skb->len > HNS3_RX_HEAD_SIZE) memcpy(skb->data, ring->va, ALIGN(ring->pull_len, sizeof(long))); - } ret = hns3_handle_bdinfo(ring, skb); if (unlikely(ret)) { @@ -3590,7 +3602,12 @@ static void hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv) if (!tqp_vector->rx_group.ring && !tqp_vector->tx_group.ring) continue; - hns3_get_vector_ring_chain(tqp_vector, &vector_ring_chain); + /* Since the mapping can be overwritten, when fail to get the + * chain between vector and ring, we should go on to deal with + * the remaining options. + */ + if (hns3_get_vector_ring_chain(tqp_vector, &vector_ring_chain)) + dev_warn(priv->dev, "failed to get ring chain\n"); h->ae_algo->ops->unmap_ring_from_vector(h, tqp_vector->vector_irq, &vector_ring_chain); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c index 940ead3970d1..7f509eff562e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c @@ -479,19 +479,6 @@ static void hclge_cmd_uninit_regs(struct hclge_hw *hw) hclge_write_dev(hw, HCLGE_NIC_CRQ_TAIL_REG, 0); } -static void hclge_destroy_queue(struct hclge_cmq_ring *ring) -{ - spin_lock(&ring->lock); - hclge_free_cmd_desc(ring); - spin_unlock(&ring->lock); -} - -static void hclge_destroy_cmd_queue(struct hclge_hw *hw) -{ - hclge_destroy_queue(&hw->cmq.csq); - hclge_destroy_queue(&hw->cmq.crq); -} - void hclge_cmd_uninit(struct hclge_dev *hdev) { spin_lock_bh(&hdev->hw.cmq.csq.lock); @@ -501,5 +488,6 @@ void hclge_cmd_uninit(struct hclge_dev *hdev) spin_unlock(&hdev->hw.cmq.crq.lock); spin_unlock_bh(&hdev->hw.cmq.csq.lock); - hclge_destroy_cmd_queue(&hdev->hw); + hclge_free_cmd_desc(&hdev->hw.cmq.csq); + hclge_free_cmd_desc(&hdev->hw.cmq.crq); } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c index 112df34b3869..f3d4cbd28913 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c @@ -886,8 +886,8 @@ static void hclge_dbg_dump_mng_table(struct hclge_dev *hdev) } } -static void hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, u8 stage, - bool sel_x, u32 loc) +static int hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, u8 stage, + bool sel_x, u32 loc) { struct hclge_fd_tcam_config_1_cmd *req1; struct hclge_fd_tcam_config_2_cmd *req2; @@ -912,7 +912,7 @@ static void hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, u8 stage, ret = hclge_cmd_send(&hdev->hw, desc, 3); if (ret) - return; + return ret; dev_info(&hdev->pdev->dev, " read result tcam key %s(%u):\n", sel_x ? "x" : "y", loc); @@ -931,16 +931,76 @@ static void hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, u8 stage, req = (u32 *)req3->tcam_data; for (i = 0; i < 5; i++) dev_info(&hdev->pdev->dev, "%08x\n", *req++); + + return ret; +} + +static int hclge_dbg_get_rules_location(struct hclge_dev *hdev, u16 *rule_locs) +{ + struct hclge_fd_rule *rule; + struct hlist_node *node; + int cnt = 0; + + spin_lock_bh(&hdev->fd_rule_lock); + hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) { + rule_locs[cnt] = rule->location; + cnt++; + } + spin_unlock_bh(&hdev->fd_rule_lock); + + if (cnt != hdev->hclge_fd_rule_num) + return -EINVAL; + + return cnt; } static void hclge_dbg_fd_tcam(struct hclge_dev *hdev) { - u32 i; + int i, ret, rule_cnt; + u16 *rule_locs; - for (i = 0; i < hdev->fd_cfg.rule_num[0]; i++) { - hclge_dbg_fd_tcam_read(hdev, 0, true, i); - hclge_dbg_fd_tcam_read(hdev, 0, false, i); + if (!hnae3_dev_fd_supported(hdev)) { + dev_err(&hdev->pdev->dev, + "Only FD-supported dev supports dump fd tcam\n"); + return; } + + if (!hdev->hclge_fd_rule_num || + !hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]) + return; + + rule_locs = kcalloc(hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1], + sizeof(u16), GFP_KERNEL); + if (!rule_locs) + return; + + rule_cnt = hclge_dbg_get_rules_location(hdev, rule_locs); + if (rule_cnt <= 0) { + dev_err(&hdev->pdev->dev, + "failed to get rule number, ret = %d\n", rule_cnt); + kfree(rule_locs); + return; + } + + for (i = 0; i < rule_cnt; i++) { + ret = hclge_dbg_fd_tcam_read(hdev, 0, true, rule_locs[i]); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to get fd tcam key x, ret = %d\n", ret); + kfree(rule_locs); + return; + } + + ret = hclge_dbg_fd_tcam_read(hdev, 0, false, rule_locs[i]); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to get fd tcam key y, ret = %d\n", ret); + kfree(rule_locs); + return; + } + } + + kfree(rule_locs); } void hclge_dbg_dump_rst_info(struct hclge_dev *hdev) @@ -976,6 +1036,14 @@ void hclge_dbg_dump_rst_info(struct hclge_dev *hdev) dev_info(&hdev->pdev->dev, "hdev state: 0x%lx\n", hdev->state); } +static void hclge_dbg_dump_serv_info(struct hclge_dev *hdev) +{ + dev_info(&hdev->pdev->dev, "last_serv_processed: %lu\n", + hdev->last_serv_processed); + dev_info(&hdev->pdev->dev, "last_serv_cnt: %lu\n", + hdev->serv_processed_cnt); +} + static void hclge_dbg_get_m7_stats_info(struct hclge_dev *hdev) { struct hclge_desc *desc_src, *desc_tmp; @@ -1227,6 +1295,8 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf) hclge_dbg_dump_reg_cmd(hdev, &cmd_buf[sizeof(DUMP_REG)]); } else if (strncmp(cmd_buf, "dump reset info", 15) == 0) { hclge_dbg_dump_rst_info(hdev); + } else if (strncmp(cmd_buf, "dump serv info", 14) == 0) { + hclge_dbg_dump_serv_info(hdev); } else if (strncmp(cmd_buf, "dump m7 info", 12) == 0) { hclge_dbg_get_m7_stats_info(hdev); } else if (strncmp(cmd_buf, "dump ncl_config", 15) == 0) { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index d862e9ba27e1..a510f005209c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -72,6 +72,8 @@ static int hclge_set_default_loopback(struct hclge_dev *hdev); static struct hnae3_ae_algo ae_algo; +static struct workqueue_struct *hclge_wq; + static const struct pci_device_id ae_algo_pci_tbl[] = { {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_GE), 0}, {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE), 0}, @@ -416,7 +418,7 @@ static int hclge_mac_update_stats_defective(struct hclge_dev *hdev) { #define HCLGE_MAC_CMD_NUM 21 - u64 *data = (u64 *)(&hdev->hw_stats.mac_stats); + u64 *data = (u64 *)(&hdev->mac_stats); struct hclge_desc desc[HCLGE_MAC_CMD_NUM]; __le64 *desc_data; int i, k, n; @@ -453,7 +455,7 @@ static int hclge_mac_update_stats_defective(struct hclge_dev *hdev) static int hclge_mac_update_stats_complete(struct hclge_dev *hdev, u32 desc_num) { - u64 *data = (u64 *)(&hdev->hw_stats.mac_stats); + u64 *data = (u64 *)(&hdev->mac_stats); struct hclge_desc *desc; __le64 *desc_data; u16 i, k, n; @@ -802,7 +804,7 @@ static void hclge_get_stats(struct hnae3_handle *handle, u64 *data) struct hclge_dev *hdev = vport->back; u64 *p; - p = hclge_comm_get_stats(&hdev->hw_stats.mac_stats, g_mac_stats_string, + p = hclge_comm_get_stats(&hdev->mac_stats, g_mac_stats_string, ARRAY_SIZE(g_mac_stats_string), data); p = hclge_tqps_get_stats(handle, p); } @@ -815,8 +817,8 @@ static void hclge_get_mac_stat(struct hnae3_handle *handle, hclge_update_stats(handle, NULL); - mac_stats->tx_pause_cnt = hdev->hw_stats.mac_stats.mac_tx_mac_pause_num; - mac_stats->rx_pause_cnt = hdev->hw_stats.mac_stats.mac_rx_mac_pause_num; + mac_stats->tx_pause_cnt = hdev->mac_stats.mac_tx_mac_pause_num; + mac_stats->rx_pause_cnt = hdev->mac_stats.mac_rx_mac_pause_num; } static int hclge_parse_func_status(struct hclge_dev *hdev, @@ -2665,31 +2667,27 @@ static int hclge_mac_init(struct hclge_dev *hdev) static void hclge_mbx_task_schedule(struct hclge_dev *hdev) { - if (!test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state) && + if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) && !test_and_set_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state)) - queue_work_on(cpumask_first(&hdev->affinity_mask), system_wq, - &hdev->mbx_service_task); + mod_delayed_work_on(cpumask_first(&hdev->affinity_mask), + hclge_wq, &hdev->service_task, 0); } static void hclge_reset_task_schedule(struct hclge_dev *hdev) { if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) && !test_and_set_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state)) - queue_work_on(cpumask_first(&hdev->affinity_mask), system_wq, - &hdev->rst_service_task); + mod_delayed_work_on(cpumask_first(&hdev->affinity_mask), + hclge_wq, &hdev->service_task, 0); } void hclge_task_schedule(struct hclge_dev *hdev, unsigned long delay_time) { - if (!test_bit(HCLGE_STATE_DOWN, &hdev->state) && - !test_bit(HCLGE_STATE_REMOVING, &hdev->state) && - !test_and_set_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state)) { - hdev->hw_stats.stats_timer++; - hdev->fd_arfs_expire_timer++; + if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) && + !test_bit(HCLGE_STATE_RST_FAIL, &hdev->state)) mod_delayed_work_on(cpumask_first(&hdev->affinity_mask), - system_wq, &hdev->service_task, + hclge_wq, &hdev->service_task, delay_time); - } } static int hclge_get_mac_link_status(struct hclge_dev *hdev) @@ -2748,6 +2746,10 @@ static void hclge_update_link_status(struct hclge_dev *hdev) if (!client) return; + + if (test_and_set_bit(HCLGE_STATE_LINK_UPDATING, &hdev->state)) + return; + state = hclge_get_mac_phy_link(hdev); if (state != hdev->hw.mac.link) { for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { @@ -2761,6 +2763,8 @@ static void hclge_update_link_status(struct hclge_dev *hdev) } hdev->hw.mac.link = state; } + + clear_bit(HCLGE_STATE_LINK_UPDATING, &hdev->state); } static void hclge_update_port_capability(struct hclge_mac *mac) @@ -2940,6 +2944,9 @@ static int hclge_get_vf_config(struct hnae3_handle *handle, int vf, ivf->trusted = vport->vf_info.trusted; ivf->min_tx_rate = 0; ivf->max_tx_rate = vport->vf_info.max_tx_rate; + ivf->vlan = vport->port_base_vlan_cfg.vlan_info.vlan_tag; + ivf->vlan_proto = htons(vport->port_base_vlan_cfg.vlan_info.vlan_proto); + ivf->qos = vport->port_base_vlan_cfg.vlan_info.qos; ether_addr_copy(ivf->mac, vport->vf_info.mac); return 0; @@ -2998,8 +3005,6 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval) /* check for vector0 msix event source */ if (msix_src_reg & HCLGE_VECTOR0_REG_MSIX_MASK) { - dev_info(&hdev->pdev->dev, "received event 0x%x\n", - msix_src_reg); *clearval = msix_src_reg; return HCLGE_VECTOR0_EVENT_ERR; } @@ -3352,6 +3357,18 @@ static int hclge_set_all_vf_rst(struct hclge_dev *hdev, bool reset) return 0; } +static void hclge_mailbox_service_task(struct hclge_dev *hdev) +{ + if (!test_and_clear_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state) || + test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state) || + test_and_set_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state)) + return; + + hclge_mbx_handler(hdev); + + clear_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state); +} + static int hclge_func_reset_sync_vf(struct hclge_dev *hdev) { struct hclge_pf_rst_sync_cmd *req; @@ -3363,6 +3380,9 @@ static int hclge_func_reset_sync_vf(struct hclge_dev *hdev) hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_VF_RST_RDY, true); do { + /* vf need to down netdev by mbx during PF or FLR reset */ + hclge_mailbox_service_task(hdev); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); /* for compatible with old firmware, wait * 100 ms for VF to stop IO @@ -3483,10 +3503,15 @@ static enum hnae3_reset_type hclge_get_reset_level(struct hnae3_ae_dev *ae_dev, /* first, resolve any unknown reset type to the known type(s) */ if (test_bit(HNAE3_UNKNOWN_RESET, addr)) { + u32 msix_sts_reg = hclge_read_dev(&hdev->hw, + HCLGE_VECTOR0_PF_OTHER_INT_STS_REG); /* we will intentionally ignore any errors from this function * as we will end up in *some* reset request in any case */ - hclge_handle_hw_msix_error(hdev, addr); + if (hclge_handle_hw_msix_error(hdev, addr)) + dev_info(&hdev->pdev->dev, "received msix interrupt 0x%x\n", + msix_sts_reg); + clear_bit(HNAE3_UNKNOWN_RESET, addr); /* We defered the clearing of the error event which caused * interrupt since it was not posssible to do that in @@ -3672,6 +3697,8 @@ static bool hclge_reset_err_handle(struct hclge_dev *hdev) hclge_dbg_dump_rst_info(hdev); + set_bit(HCLGE_STATE_RST_FAIL, &hdev->state); + return false; } @@ -3825,6 +3852,7 @@ static void hclge_reset(struct hclge_dev *hdev) hdev->rst_stats.reset_fail_cnt = 0; hdev->rst_stats.reset_done_cnt++; ae_dev->reset_type = HNAE3_NONE_RESET; + clear_bit(HCLGE_STATE_RST_FAIL, &hdev->state); /* if default_reset_request has a higher level reset request, * it should be handled as soon as possible. since some errors @@ -3939,36 +3967,19 @@ static void hclge_reset_subtask(struct hclge_dev *hdev) hdev->reset_type = HNAE3_NONE_RESET; } -static void hclge_reset_service_task(struct work_struct *work) +static void hclge_reset_service_task(struct hclge_dev *hdev) { - struct hclge_dev *hdev = - container_of(work, struct hclge_dev, rst_service_task); + if (!test_and_clear_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state)) + return; if (test_and_set_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) return; - clear_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state); - hclge_reset_subtask(hdev); clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state); } -static void hclge_mailbox_service_task(struct work_struct *work) -{ - struct hclge_dev *hdev = - container_of(work, struct hclge_dev, mbx_service_task); - - if (test_and_set_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state)) - return; - - clear_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state); - - hclge_mbx_handler(hdev); - - clear_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state); -} - static void hclge_update_vport_alive(struct hclge_dev *hdev) { int i; @@ -3986,29 +3997,62 @@ static void hclge_update_vport_alive(struct hclge_dev *hdev) } } -static void hclge_service_task(struct work_struct *work) +static void hclge_periodic_service_task(struct hclge_dev *hdev) { - struct hclge_dev *hdev = - container_of(work, struct hclge_dev, service_task.work); + unsigned long delta = round_jiffies_relative(HZ); - clear_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state); + /* Always handle the link updating to make sure link state is + * updated when it is triggered by mbx. + */ + hclge_update_link_status(hdev); - if (hdev->hw_stats.stats_timer >= HCLGE_STATS_TIMER_INTERVAL) { - hclge_update_stats_for_all(hdev); - hdev->hw_stats.stats_timer = 0; + if (time_is_after_jiffies(hdev->last_serv_processed + HZ)) { + delta = jiffies - hdev->last_serv_processed; + + if (delta < round_jiffies_relative(HZ)) { + delta = round_jiffies_relative(HZ) - delta; + goto out; + } } - hclge_update_port_info(hdev); - hclge_update_link_status(hdev); + hdev->serv_processed_cnt++; hclge_update_vport_alive(hdev); + + if (test_bit(HCLGE_STATE_DOWN, &hdev->state)) { + hdev->last_serv_processed = jiffies; + goto out; + } + + if (!(hdev->serv_processed_cnt % HCLGE_STATS_TIMER_INTERVAL)) + hclge_update_stats_for_all(hdev); + + hclge_update_port_info(hdev); hclge_sync_vlan_filter(hdev); - if (hdev->fd_arfs_expire_timer >= HCLGE_FD_ARFS_EXPIRE_TIMER_INTERVAL) { + if (!(hdev->serv_processed_cnt % HCLGE_ARFS_EXPIRE_INTERVAL)) hclge_rfs_filter_expire(hdev); - hdev->fd_arfs_expire_timer = 0; - } - hclge_task_schedule(hdev, round_jiffies_relative(HZ)); + hdev->last_serv_processed = jiffies; + +out: + hclge_task_schedule(hdev, delta); +} + +static void hclge_service_task(struct work_struct *work) +{ + struct hclge_dev *hdev = + container_of(work, struct hclge_dev, service_task.work); + + hclge_reset_service_task(hdev); + hclge_mailbox_service_task(hdev); + hclge_periodic_service_task(hdev); + + /* Handle reset and mbx again in case periodical task delays the + * handling by calling hclge_task_schedule() in + * hclge_periodic_service_task(). + */ + hclge_reset_service_task(hdev); + hclge_mailbox_service_task(hdev); } struct hclge_vport *hclge_get_vport(struct hnae3_handle *handle) @@ -6734,6 +6778,19 @@ static void hclge_reset_tqp_stats(struct hnae3_handle *handle) } } +static void hclge_flush_link_update(struct hclge_dev *hdev) +{ +#define HCLGE_FLUSH_LINK_TIMEOUT 100000 + + unsigned long last = hdev->serv_processed_cnt; + int i = 0; + + while (test_bit(HCLGE_STATE_LINK_UPDATING, &hdev->state) && + i++ < HCLGE_FLUSH_LINK_TIMEOUT && + last == hdev->serv_processed_cnt) + usleep_range(1, 1); +} + static void hclge_set_timer_task(struct hnae3_handle *handle, bool enable) { struct hclge_vport *vport = hclge_get_vport(handle); @@ -6742,12 +6799,12 @@ static void hclge_set_timer_task(struct hnae3_handle *handle, bool enable) if (enable) { hclge_task_schedule(hdev, round_jiffies_relative(HZ)); } else { - /* Set the DOWN flag here to disable the service to be - * scheduled again - */ + /* Set the DOWN flag here to disable link updating */ set_bit(HCLGE_STATE_DOWN, &hdev->state); - cancel_delayed_work_sync(&hdev->service_task); - clear_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state); + + /* flush memory to make sure DOWN is seen by service task */ + smp_mb__before_atomic(); + hclge_flush_link_update(hdev); } } @@ -7483,7 +7540,6 @@ void hclge_uninit_vport_mac_table(struct hclge_dev *hdev) struct hclge_vport *vport; int i; - mutex_lock(&hdev->vport_cfg_mutex); for (i = 0; i < hdev->num_alloc_vport; i++) { vport = &hdev->vport[i]; list_for_each_entry_safe(mac, tmp, &vport->uc_mac_list, node) { @@ -7496,7 +7552,6 @@ void hclge_uninit_vport_mac_table(struct hclge_dev *hdev) kfree(mac); } } - mutex_unlock(&hdev->vport_cfg_mutex); } static int hclge_get_mac_ethertype_cmd_status(struct hclge_dev *hdev, @@ -8257,7 +8312,6 @@ void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev) struct hclge_vport *vport; int i; - mutex_lock(&hdev->vport_cfg_mutex); for (i = 0; i < hdev->num_alloc_vport; i++) { vport = &hdev->vport[i]; list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) { @@ -8265,7 +8319,6 @@ void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev) kfree(vlan); } } - mutex_unlock(&hdev->vport_cfg_mutex); } static void hclge_restore_vlan_table(struct hnae3_handle *handle) @@ -8277,7 +8330,6 @@ static void hclge_restore_vlan_table(struct hnae3_handle *handle) u16 state, vlan_id; int i; - mutex_lock(&hdev->vport_cfg_mutex); for (i = 0; i < hdev->num_alloc_vport; i++) { vport = &hdev->vport[i]; vlan_proto = vport->port_base_vlan_cfg.vlan_info.vlan_proto; @@ -8303,8 +8355,6 @@ static void hclge_restore_vlan_table(struct hnae3_handle *handle) break; } } - - mutex_unlock(&hdev->vport_cfg_mutex); } int hclge_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable) @@ -9256,6 +9306,7 @@ static void hclge_state_init(struct hclge_dev *hdev) set_bit(HCLGE_STATE_DOWN, &hdev->state); clear_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state); clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state); + clear_bit(HCLGE_STATE_RST_FAIL, &hdev->state); clear_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state); clear_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state); } @@ -9269,10 +9320,6 @@ static void hclge_state_uninit(struct hclge_dev *hdev) del_timer_sync(&hdev->reset_timer); if (hdev->service_task.work.func) cancel_delayed_work_sync(&hdev->service_task); - if (hdev->rst_service_task.func) - cancel_work_sync(&hdev->rst_service_task); - if (hdev->mbx_service_task.func) - cancel_work_sync(&hdev->mbx_service_task); } static void hclge_flr_prepare(struct hnae3_ae_dev *ae_dev) @@ -9342,7 +9389,6 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) hdev->mps = ETH_FRAME_LEN + ETH_FCS_LEN + 2 * VLAN_HLEN; mutex_init(&hdev->vport_lock); - mutex_init(&hdev->vport_cfg_mutex); spin_lock_init(&hdev->fd_rule_lock); ret = hclge_pci_init(hdev); @@ -9477,8 +9523,6 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) timer_setup(&hdev->reset_timer, hclge_reset_timer, 0); INIT_DELAYED_WORK(&hdev->service_task, hclge_service_task); - INIT_WORK(&hdev->rst_service_task, hclge_reset_service_task); - INIT_WORK(&hdev->mbx_service_task, hclge_mailbox_service_task); /* Setup affinity after service timer setup because add_timer_on * is called in affinity notify. @@ -9512,6 +9556,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) dev_info(&hdev->pdev->dev, "%s driver initialization finished.\n", HCLGE_DRIVER_NAME); + hclge_task_schedule(hdev, round_jiffies_relative(HZ)); + return 0; err_mdiobus_unreg: @@ -9534,7 +9580,7 @@ out: static void hclge_stats_clear(struct hclge_dev *hdev) { - memset(&hdev->hw_stats, 0, sizeof(hdev->hw_stats)); + memset(&hdev->mac_stats, 0, sizeof(hdev->mac_stats)); } static int hclge_set_mac_spoofchk(struct hclge_dev *hdev, int vf, bool enable) @@ -9895,7 +9941,6 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) mutex_destroy(&hdev->vport_lock); hclge_uninit_vport_mac_table(hdev); hclge_uninit_vport_vlan_table(hdev); - mutex_destroy(&hdev->vport_cfg_mutex); ae_dev->priv = NULL; } @@ -10611,6 +10656,12 @@ static int hclge_init(void) { pr_info("%s is initializing\n", HCLGE_NAME); + hclge_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, HCLGE_NAME); + if (!hclge_wq) { + pr_err("%s: failed to create workqueue\n", HCLGE_NAME); + return -ENOMEM; + } + hnae3_register_ae_algo(&ae_algo); return 0; @@ -10619,6 +10670,7 @@ static int hclge_init(void) static void hclge_exit(void) { hnae3_unregister_ae_algo(&ae_algo); + destroy_workqueue(hclge_wq); } module_init(hclge_init); module_exit(hclge_exit); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index ebb4c6e9aed3..4e5cfda16eb7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -208,13 +208,14 @@ enum HCLGE_DEV_STATE { HCLGE_STATE_NIC_REGISTERED, HCLGE_STATE_ROCE_REGISTERED, HCLGE_STATE_SERVICE_INITED, - HCLGE_STATE_SERVICE_SCHED, HCLGE_STATE_RST_SERVICE_SCHED, HCLGE_STATE_RST_HANDLING, HCLGE_STATE_MBX_SERVICE_SCHED, HCLGE_STATE_MBX_HANDLING, HCLGE_STATE_STATISTICS_UPDATING, HCLGE_STATE_CMD_DISABLE, + HCLGE_STATE_LINK_UPDATING, + HCLGE_STATE_RST_FAIL, HCLGE_STATE_MAX }; @@ -454,11 +455,7 @@ struct hclge_mac_stats { u64 mac_rx_ctrl_pkt_num; }; -#define HCLGE_STATS_TIMER_INTERVAL (60 * 5) -struct hclge_hw_stats { - struct hclge_mac_stats mac_stats; - u32 stats_timer; -}; +#define HCLGE_STATS_TIMER_INTERVAL 300UL struct hclge_vlan_type_cfg { u16 rx_ot_fst_vlan_type; @@ -549,7 +546,7 @@ struct key_info { /* assigned by firmware, the real filter number for each pf may be less */ #define MAX_FD_FILTER_NUM 4096 -#define HCLGE_FD_ARFS_EXPIRE_TIMER_INTERVAL 5 +#define HCLGE_ARFS_EXPIRE_INTERVAL 5UL enum HCLGE_FD_ACTIVE_RULE_TYPE { HCLGE_FD_RULE_NONE, @@ -712,7 +709,7 @@ struct hclge_dev { struct hnae3_ae_dev *ae_dev; struct hclge_hw hw; struct hclge_misc_vector misc_vector; - struct hclge_hw_stats hw_stats; + struct hclge_mac_stats mac_stats; unsigned long state; unsigned long flr_state; unsigned long last_reset_time; @@ -774,8 +771,6 @@ struct hclge_dev { unsigned long service_timer_previous; struct timer_list reset_timer; struct delayed_work service_task; - struct work_struct rst_service_task; - struct work_struct mbx_service_task; bool cur_promisc; int num_alloc_vfs; /* Actual number of VFs allocated */ @@ -811,7 +806,8 @@ struct hclge_dev { struct hlist_head fd_rule_list; spinlock_t fd_rule_lock; /* protect fd_rule_list and fd_bmap */ u16 hclge_fd_rule_num; - u16 fd_arfs_expire_timer; + unsigned long serv_processed_cnt; + unsigned long last_serv_processed; unsigned long fd_bmap[BITS_TO_LONGS(MAX_FD_FILTER_NUM)]; enum HCLGE_FD_ACTIVE_RULE_TYPE fd_active_type; u8 fd_en; @@ -825,8 +821,6 @@ struct hclge_dev { u16 share_umv_size; struct mutex umv_mutex; /* protect share_umv_size */ - struct mutex vport_cfg_mutex; /* Protect stored vf table */ - DECLARE_KFIFO(mac_tnl_log, struct hclge_mac_tnl_stats, HCLGE_MAC_TNL_LOG_SIZE); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index 0b433ebe6a2d..f905dd3386b3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -635,7 +635,6 @@ static void hclge_handle_link_change_event(struct hclge_dev *hdev, #define LINK_STATUS_OFFSET 1 #define LINK_FAIL_CODE_OFFSET 2 - clear_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state); hclge_task_schedule(hdev, 0); if (!req->msg[LINK_STATUS_OFFSET]) @@ -798,13 +797,11 @@ void hclge_mbx_handler(struct hclge_dev *hdev) hclge_get_link_mode(vport, req); break; case HCLGE_MBX_GET_VF_FLR_STATUS: - mutex_lock(&hdev->vport_cfg_mutex); hclge_rm_vport_all_mac_table(vport, true, HCLGE_MAC_ADDR_UC); hclge_rm_vport_all_mac_table(vport, true, HCLGE_MAC_ADDR_MC); hclge_rm_vport_all_vlan_table(vport, true); - mutex_unlock(&hdev->vport_cfg_mutex); break; case HCLGE_MBX_GET_MEDIA_TYPE: ret = hclge_get_vf_media_type(vport, req); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c index af2245e3bb95..f38d236ebf4f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c @@ -443,7 +443,7 @@ void hclgevf_cmd_uninit(struct hclgevf_dev *hdev) { spin_lock_bh(&hdev->hw.cmq.csq.lock); spin_lock(&hdev->hw.cmq.crq.lock); - clear_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state); + set_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state); hclgevf_cmd_uninit_regs(&hdev->hw); spin_unlock(&hdev->hw.cmq.crq.lock); spin_unlock_bh(&hdev->hw.cmq.csq.lock); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 25d78a5aaa34..c33b8027f801 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -16,6 +16,8 @@ static int hclgevf_reset_hdev(struct hclgevf_dev *hdev); static struct hnae3_ae_algo ae_algovf; +static struct workqueue_struct *hclgevf_wq; + static const struct pci_device_id ae_algovf_pci_tbl[] = { {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_VF), 0}, {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_DCB_PFC_VF), 0}, @@ -440,6 +442,9 @@ void hclgevf_update_link_status(struct hclgevf_dev *hdev, int link_state) struct hnae3_client *rclient; struct hnae3_client *client; + if (test_and_set_bit(HCLGEVF_STATE_LINK_UPDATING, &hdev->state)) + return; + client = handle->client; rclient = hdev->roce_client; @@ -452,6 +457,8 @@ void hclgevf_update_link_status(struct hclgevf_dev *hdev, int link_state) rclient->ops->link_status_change(rhandle, !!link_state); hdev->hw.mac.link = link_state; } + + clear_bit(HCLGEVF_STATE_LINK_UPDATING, &hdev->state); } static void hclgevf_update_link_mode(struct hclgevf_dev *hdev) @@ -1591,6 +1598,7 @@ static void hclgevf_reset_err_handle(struct hclgevf_dev *hdev) set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state); hclgevf_reset_task_schedule(hdev); } else { + set_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state); hclgevf_dump_rst_info(hdev); } } @@ -1652,6 +1660,7 @@ static int hclgevf_reset(struct hclgevf_dev *hdev) ae_dev->reset_type = HNAE3_NONE_RESET; hdev->rst_stats.rst_done_cnt++; hdev->rst_stats.rst_fail_cnt = 0; + clear_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state); return ret; err_reset_lock: @@ -1767,62 +1776,39 @@ static void hclgevf_get_misc_vector(struct hclgevf_dev *hdev) void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev) { - if (!test_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state) && - !test_bit(HCLGEVF_STATE_REMOVING, &hdev->state)) { - set_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state); - schedule_work(&hdev->rst_service_task); - } + if (!test_bit(HCLGEVF_STATE_REMOVING, &hdev->state) && + !test_and_set_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, + &hdev->state)) + mod_delayed_work(hclgevf_wq, &hdev->service_task, 0); } void hclgevf_mbx_task_schedule(struct hclgevf_dev *hdev) { - if (!test_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state) && - !test_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state)) { - set_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state); - schedule_work(&hdev->mbx_service_task); - } + if (!test_bit(HCLGEVF_STATE_REMOVING, &hdev->state) && + !test_and_set_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, + &hdev->state)) + mod_delayed_work(hclgevf_wq, &hdev->service_task, 0); } -static void hclgevf_task_schedule(struct hclgevf_dev *hdev) +static void hclgevf_task_schedule(struct hclgevf_dev *hdev, + unsigned long delay) { - if (!test_bit(HCLGEVF_STATE_DOWN, &hdev->state) && - !test_and_set_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state)) - schedule_work(&hdev->service_task); + if (!test_bit(HCLGEVF_STATE_REMOVING, &hdev->state) && + !test_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state)) + mod_delayed_work(hclgevf_wq, &hdev->service_task, delay); } -static void hclgevf_deferred_task_schedule(struct hclgevf_dev *hdev) -{ - /* if we have any pending mailbox event then schedule the mbx task */ - if (hdev->mbx_event_pending) - hclgevf_mbx_task_schedule(hdev); - - if (test_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state)) - hclgevf_reset_task_schedule(hdev); -} - -static void hclgevf_service_timer(struct timer_list *t) -{ - struct hclgevf_dev *hdev = from_timer(hdev, t, service_timer); - - mod_timer(&hdev->service_timer, jiffies + - HCLGEVF_GENERAL_TASK_INTERVAL * HZ); - - hdev->stats_timer++; - hclgevf_task_schedule(hdev); -} - -static void hclgevf_reset_service_task(struct work_struct *work) +static void hclgevf_reset_service_task(struct hclgevf_dev *hdev) { #define HCLGEVF_MAX_RESET_ATTEMPTS_CNT 3 - struct hclgevf_dev *hdev = - container_of(work, struct hclgevf_dev, rst_service_task); int ret; - if (test_and_set_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) + if (!test_and_clear_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state)) return; - clear_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state); + if (test_and_set_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) + return; if (test_and_clear_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state)) { @@ -1885,39 +1871,24 @@ static void hclgevf_reset_service_task(struct work_struct *work) clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state); } -static void hclgevf_mailbox_service_task(struct work_struct *work) +static void hclgevf_mailbox_service_task(struct hclgevf_dev *hdev) { - struct hclgevf_dev *hdev; - - hdev = container_of(work, struct hclgevf_dev, mbx_service_task); + if (!test_and_clear_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state)) + return; if (test_and_set_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state)) return; - clear_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state); - hclgevf_mbx_async_handler(hdev); clear_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state); } -static void hclgevf_keep_alive_timer(struct timer_list *t) +static void hclgevf_keep_alive(struct hclgevf_dev *hdev) { - struct hclgevf_dev *hdev = from_timer(hdev, t, keep_alive_timer); - - schedule_work(&hdev->keep_alive_task); - mod_timer(&hdev->keep_alive_timer, jiffies + - HCLGEVF_KEEP_ALIVE_TASK_INTERVAL * HZ); -} - -static void hclgevf_keep_alive_task(struct work_struct *work) -{ - struct hclgevf_dev *hdev; u8 respmsg; int ret; - hdev = container_of(work, struct hclgevf_dev, keep_alive_task); - if (test_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state)) return; @@ -1928,19 +1899,32 @@ static void hclgevf_keep_alive_task(struct work_struct *work) "VF sends keep alive cmd failed(=%d)\n", ret); } -static void hclgevf_service_task(struct work_struct *work) +static void hclgevf_periodic_service_task(struct hclgevf_dev *hdev) { - struct hnae3_handle *handle; - struct hclgevf_dev *hdev; + unsigned long delta = round_jiffies_relative(HZ); + struct hnae3_handle *handle = &hdev->nic; - hdev = container_of(work, struct hclgevf_dev, service_task); - handle = &hdev->nic; + if (time_is_after_jiffies(hdev->last_serv_processed + HZ)) { + delta = jiffies - hdev->last_serv_processed; - if (hdev->stats_timer >= HCLGEVF_STATS_TIMER_INTERVAL) { - hclgevf_tqps_update_stats(handle); - hdev->stats_timer = 0; + if (delta < round_jiffies_relative(HZ)) { + delta = round_jiffies_relative(HZ) - delta; + goto out; + } + } + + hdev->serv_processed_cnt++; + if (!(hdev->serv_processed_cnt % HCLGEVF_KEEP_ALIVE_TASK_INTERVAL)) + hclgevf_keep_alive(hdev); + + if (test_bit(HCLGEVF_STATE_DOWN, &hdev->state)) { + hdev->last_serv_processed = jiffies; + goto out; } + if (!(hdev->serv_processed_cnt % HCLGEVF_STATS_TIMER_INTERVAL)) + hclgevf_tqps_update_stats(handle); + /* request the link status from the PF. PF would be able to tell VF * about such updates in future so we might remove this later */ @@ -1950,9 +1934,27 @@ static void hclgevf_service_task(struct work_struct *work) hclgevf_sync_vlan_filter(hdev); - hclgevf_deferred_task_schedule(hdev); + hdev->last_serv_processed = jiffies; - clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state); +out: + hclgevf_task_schedule(hdev, delta); +} + +static void hclgevf_service_task(struct work_struct *work) +{ + struct hclgevf_dev *hdev = container_of(work, struct hclgevf_dev, + service_task.work); + + hclgevf_reset_service_task(hdev); + hclgevf_mailbox_service_task(hdev); + hclgevf_periodic_service_task(hdev); + + /* Handle reset and mbx again in case periodical task delays the + * handling by calling hclgevf_task_schedule() in + * hclgevf_periodic_service_task() + */ + hclgevf_reset_service_task(hdev); + hclgevf_mailbox_service_task(hdev); } static void hclgevf_clear_event_cause(struct hclgevf_dev *hdev, u32 regclr) @@ -2189,16 +2191,31 @@ static int hclgevf_init_vlan_config(struct hclgevf_dev *hdev) false); } +static void hclgevf_flush_link_update(struct hclgevf_dev *hdev) +{ +#define HCLGEVF_FLUSH_LINK_TIMEOUT 100000 + + unsigned long last = hdev->serv_processed_cnt; + int i = 0; + + while (test_bit(HCLGEVF_STATE_LINK_UPDATING, &hdev->state) && + i++ < HCLGEVF_FLUSH_LINK_TIMEOUT && + last == hdev->serv_processed_cnt) + usleep_range(1, 1); +} + static void hclgevf_set_timer_task(struct hnae3_handle *handle, bool enable) { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); if (enable) { - mod_timer(&hdev->service_timer, jiffies + HZ); + hclgevf_task_schedule(hdev, 0); } else { - del_timer_sync(&hdev->service_timer); - cancel_work_sync(&hdev->service_task); - clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state); + set_bit(HCLGEVF_STATE_DOWN, &hdev->state); + + /* flush memory to make sure DOWN is seen by service task */ + smp_mb__before_atomic(); + hclgevf_flush_link_update(hdev); } } @@ -2245,16 +2262,12 @@ static int hclgevf_set_alive(struct hnae3_handle *handle, bool alive) static int hclgevf_client_start(struct hnae3_handle *handle) { - struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); int ret; ret = hclgevf_set_alive(handle, true); if (ret) return ret; - mod_timer(&hdev->keep_alive_timer, jiffies + - HCLGEVF_KEEP_ALIVE_TASK_INTERVAL * HZ); - return 0; } @@ -2267,25 +2280,15 @@ static void hclgevf_client_stop(struct hnae3_handle *handle) if (ret) dev_warn(&hdev->pdev->dev, "%s failed %d\n", __func__, ret); - - del_timer_sync(&hdev->keep_alive_timer); - cancel_work_sync(&hdev->keep_alive_task); } static void hclgevf_state_init(struct hclgevf_dev *hdev) { - /* setup tasks for the MBX */ - INIT_WORK(&hdev->mbx_service_task, hclgevf_mailbox_service_task); clear_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state); clear_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state); + clear_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state); - /* setup tasks for service timer */ - timer_setup(&hdev->service_timer, hclgevf_service_timer, 0); - - INIT_WORK(&hdev->service_task, hclgevf_service_task); - clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state); - - INIT_WORK(&hdev->rst_service_task, hclgevf_reset_service_task); + INIT_DELAYED_WORK(&hdev->service_task, hclgevf_service_task); mutex_init(&hdev->mbx_resp.mbx_mutex); @@ -2298,18 +2301,8 @@ static void hclgevf_state_uninit(struct hclgevf_dev *hdev) set_bit(HCLGEVF_STATE_DOWN, &hdev->state); set_bit(HCLGEVF_STATE_REMOVING, &hdev->state); - if (hdev->keep_alive_timer.function) - del_timer_sync(&hdev->keep_alive_timer); - if (hdev->keep_alive_task.func) - cancel_work_sync(&hdev->keep_alive_task); - if (hdev->service_timer.function) - del_timer_sync(&hdev->service_timer); - if (hdev->service_task.func) - cancel_work_sync(&hdev->service_task); - if (hdev->mbx_service_task.func) - cancel_work_sync(&hdev->mbx_service_task); - if (hdev->rst_service_task.func) - cancel_work_sync(&hdev->rst_service_task); + if (hdev->service_task.work.func) + cancel_delayed_work_sync(&hdev->service_task); mutex_destroy(&hdev->mbx_resp.mbx_mutex); } @@ -2807,6 +2800,8 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev) dev_info(&hdev->pdev->dev, "finished initializing %s driver\n", HCLGEVF_DRIVER_NAME); + hclgevf_task_schedule(hdev, round_jiffies_relative(HZ)); + return 0; err_config: @@ -2838,7 +2833,6 @@ static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev) static int hclgevf_init_ae_dev(struct hnae3_ae_dev *ae_dev) { struct pci_dev *pdev = ae_dev->pdev; - struct hclgevf_dev *hdev; int ret; ret = hclgevf_alloc_hdev(ae_dev); @@ -2853,10 +2847,6 @@ static int hclgevf_init_ae_dev(struct hnae3_ae_dev *ae_dev) return ret; } - hdev = ae_dev->priv; - timer_setup(&hdev->keep_alive_timer, hclgevf_keep_alive_timer, 0); - INIT_WORK(&hdev->keep_alive_task, hclgevf_keep_alive_task); - return 0; } @@ -3213,6 +3203,12 @@ static int hclgevf_init(void) { pr_info("%s is initializing\n", HCLGEVF_NAME); + hclgevf_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, HCLGEVF_NAME); + if (!hclgevf_wq) { + pr_err("%s: failed to create workqueue\n", HCLGEVF_NAME); + return -ENOMEM; + } + hnae3_register_ae_algo(&ae_algovf); return 0; @@ -3221,6 +3217,7 @@ static int hclgevf_init(void) static void hclgevf_exit(void) { hnae3_unregister_ae_algo(&ae_algovf); + destroy_workqueue(hclgevf_wq); } module_init(hclgevf_init); module_exit(hclgevf_exit); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index 2f4c81bf4169..003114f6db6c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -142,12 +142,13 @@ enum hclgevf_states { HCLGEVF_STATE_REMOVING, HCLGEVF_STATE_NIC_REGISTERED, /* task states */ - HCLGEVF_STATE_SERVICE_SCHED, HCLGEVF_STATE_RST_SERVICE_SCHED, HCLGEVF_STATE_RST_HANDLING, HCLGEVF_STATE_MBX_SERVICE_SCHED, HCLGEVF_STATE_MBX_HANDLING, HCLGEVF_STATE_CMD_DISABLE, + HCLGEVF_STATE_LINK_UPDATING, + HCLGEVF_STATE_RST_FAIL, }; struct hclgevf_mac { @@ -283,12 +284,7 @@ struct hclgevf_dev { struct hclgevf_mbx_resp_status mbx_resp; /* mailbox response */ struct hclgevf_mbx_arq_ring arq; /* mailbox async rx queue */ - struct timer_list service_timer; - struct timer_list keep_alive_timer; - struct work_struct service_task; - struct work_struct keep_alive_task; - struct work_struct rst_service_task; - struct work_struct mbx_service_task; + struct delayed_work service_task; struct hclgevf_tqp *htqp; @@ -298,7 +294,8 @@ struct hclgevf_dev { struct hnae3_client *nic_client; struct hnae3_client *roce_client; u32 flag; - u32 stats_timer; + unsigned long serv_processed_cnt; + unsigned long last_serv_processed; }; static inline bool hclgevf_is_reset_pending(struct hclgevf_dev *hdev) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c index 2411ad270c98..02a14f5e7fe3 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -766,7 +766,7 @@ static void hinic_set_rx_mode(struct net_device *netdev) queue_work(nic_dev->workq, &rx_mode_work->work); } -static void hinic_tx_timeout(struct net_device *netdev) +static void hinic_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct hinic_dev *nic_dev = netdev_priv(netdev); diff --git a/drivers/net/ethernet/i825xx/82596.c b/drivers/net/ethernet/i825xx/82596.c index 92929750f832..bef676d93339 100644 --- a/drivers/net/ethernet/i825xx/82596.c +++ b/drivers/net/ethernet/i825xx/82596.c @@ -363,7 +363,7 @@ static netdev_tx_t i596_start_xmit(struct sk_buff *skb, struct net_device *dev); static irqreturn_t i596_interrupt(int irq, void *dev_id); static int i596_close(struct net_device *dev); static void i596_add_cmd(struct net_device *dev, struct i596_cmd *cmd); -static void i596_tx_timeout (struct net_device *dev); +static void i596_tx_timeout (struct net_device *dev, unsigned int txqueue); static void print_eth(unsigned char *buf, char *str); static void set_multicast_list(struct net_device *dev); @@ -1019,7 +1019,7 @@ err_irq_dev: return res; } -static void i596_tx_timeout (struct net_device *dev) +static void i596_tx_timeout (struct net_device *dev, unsigned int txqueue) { struct i596_private *lp = dev->ml_priv; int ioaddr = dev->base_addr; diff --git a/drivers/net/ethernet/i825xx/ether1.c b/drivers/net/ethernet/i825xx/ether1.c index bb3b8adbe4f0..a0bfb509e002 100644 --- a/drivers/net/ethernet/i825xx/ether1.c +++ b/drivers/net/ethernet/i825xx/ether1.c @@ -66,7 +66,7 @@ static netdev_tx_t ether1_sendpacket(struct sk_buff *skb, static irqreturn_t ether1_interrupt(int irq, void *dev_id); static int ether1_close(struct net_device *dev); static void ether1_setmulticastlist(struct net_device *dev); -static void ether1_timeout(struct net_device *dev); +static void ether1_timeout(struct net_device *dev, unsigned int txqueue); /* ------------------------------------------------------------------------- */ @@ -650,7 +650,7 @@ ether1_open (struct net_device *dev) } static void -ether1_timeout(struct net_device *dev) +ether1_timeout(struct net_device *dev, unsigned int txqueue) { printk(KERN_WARNING "%s: transmit timeout, network cable problem?\n", dev->name); diff --git a/drivers/net/ethernet/i825xx/lib82596.c b/drivers/net/ethernet/i825xx/lib82596.c index f9742af7f142..b03757e169e4 100644 --- a/drivers/net/ethernet/i825xx/lib82596.c +++ b/drivers/net/ethernet/i825xx/lib82596.c @@ -351,7 +351,7 @@ static netdev_tx_t i596_start_xmit(struct sk_buff *skb, struct net_device *dev); static irqreturn_t i596_interrupt(int irq, void *dev_id); static int i596_close(struct net_device *dev); static void i596_add_cmd(struct net_device *dev, struct i596_cmd *cmd); -static void i596_tx_timeout (struct net_device *dev); +static void i596_tx_timeout (struct net_device *dev, unsigned int txqueue); static void print_eth(unsigned char *buf, char *str); static void set_multicast_list(struct net_device *dev); static inline void ca(struct net_device *dev); @@ -936,7 +936,7 @@ out_remove_rx_bufs: return -EAGAIN; } -static void i596_tx_timeout (struct net_device *dev) +static void i596_tx_timeout (struct net_device *dev, unsigned int txqueue) { struct i596_private *lp = netdev_priv(dev); diff --git a/drivers/net/ethernet/i825xx/sun3_82586.c b/drivers/net/ethernet/i825xx/sun3_82586.c index 1a86184d44c0..4564ee02c95f 100644 --- a/drivers/net/ethernet/i825xx/sun3_82586.c +++ b/drivers/net/ethernet/i825xx/sun3_82586.c @@ -125,7 +125,7 @@ static netdev_tx_t sun3_82586_send_packet(struct sk_buff *, struct net_device *); static struct net_device_stats *sun3_82586_get_stats(struct net_device *dev); static void set_multicast_list(struct net_device *dev); -static void sun3_82586_timeout(struct net_device *dev); +static void sun3_82586_timeout(struct net_device *dev, unsigned int txqueue); #if 0 static void sun3_82586_dump(struct net_device *,void *); #endif @@ -965,7 +965,7 @@ static void startrecv586(struct net_device *dev) WAIT_4_SCB_CMD_RUC(); /* wait for accept cmd. (no timeout!!) */ } -static void sun3_82586_timeout(struct net_device *dev) +static void sun3_82586_timeout(struct net_device *dev, unsigned int txqueue) { struct priv *p = netdev_priv(dev); #ifndef NO_NOPCOMMANDS diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index 13e30eba5349..0273fb7a9d01 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -2786,7 +2786,7 @@ out: return; } -static void ehea_tx_watchdog(struct net_device *dev) +static void ehea_tx_watchdog(struct net_device *dev, unsigned int txqueue) { struct ehea_port *port = netdev_priv(dev); diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 2e40425d8a34..b7fc17756c51 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -776,7 +776,7 @@ static void emac_reset_work(struct work_struct *work) mutex_unlock(&dev->link_lock); } -static void emac_tx_timeout(struct net_device *ndev) +static void emac_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct emac_instance *dev = netdev_priv(ndev); diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index c90080781924..94b9d8913b66 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2282,7 +2282,7 @@ err: return -ret; } -static void ibmvnic_tx_timeout(struct net_device *dev) +static void ibmvnic_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct ibmvnic_adapter *adapter = netdev_priv(dev); diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index a65d5a9ba7db..1b8d015ebfb0 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -2316,7 +2316,7 @@ static void e100_down(struct nic *nic) e100_rx_clean_list(nic); } -static void e100_tx_timeout(struct net_device *netdev) +static void e100_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct nic *nic = netdev_priv(netdev); diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index aca97b084003..2bced34c19ba 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -134,7 +134,7 @@ static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd); static void e1000_enter_82542_rst(struct e1000_adapter *adapter); static void e1000_leave_82542_rst(struct e1000_adapter *adapter); -static void e1000_tx_timeout(struct net_device *dev); +static void e1000_tx_timeout(struct net_device *dev, unsigned int txqueue); static void e1000_reset_task(struct work_struct *work); static void e1000_smartspeed(struct e1000_adapter *adapter); static int e1000_82547_fifo_workaround(struct e1000_adapter *adapter, @@ -3488,7 +3488,7 @@ exit: * e1000_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure **/ -static void e1000_tx_timeout(struct net_device *netdev) +static void e1000_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct e1000_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index fe7997c18a10..4c220600ea9a 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -5929,7 +5929,7 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, * e1000_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure **/ -static void e1000_tx_timeout(struct net_device *netdev) +static void e1000_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct e1000_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index 68baee04dc58..ba2566e2123d 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -697,7 +697,7 @@ static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev) * fm10k_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure **/ -static void fm10k_tx_timeout(struct net_device *netdev) +static void fm10k_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct fm10k_intfc *interface = netdev_priv(netdev); bool real_tx_hang = false; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 1ccabeafa44c..4c9ac6c80eb8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -301,7 +301,7 @@ void i40e_service_event_schedule(struct i40e_pf *pf) * device is munged, not just the one netdev port, so go for the full * reset. **/ -static void i40e_tx_timeout(struct net_device *netdev) +static void i40e_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 821987da5698..0a8824871618 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -159,7 +159,7 @@ void iavf_schedule_reset(struct iavf_adapter *adapter) * iavf_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure **/ -static void iavf_tx_timeout(struct net_device *netdev) +static void iavf_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct iavf_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 69bff085acf7..4d5220c9c721 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5060,7 +5060,7 @@ ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, * ice_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure */ -static void ice_tx_timeout(struct net_device *netdev) +static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_ring *tx_ring = NULL; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 98346eb064d5..d11e64a58ed1 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -146,7 +146,7 @@ static int igb_poll(struct napi_struct *, int); static bool igb_clean_tx_irq(struct igb_q_vector *, int); static int igb_clean_rx_irq(struct igb_q_vector *, int); static int igb_ioctl(struct net_device *, struct ifreq *, int cmd); -static void igb_tx_timeout(struct net_device *); +static void igb_tx_timeout(struct net_device *, unsigned int txqueue); static void igb_reset_task(struct work_struct *); static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features); @@ -6184,7 +6184,7 @@ static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, * igb_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure **/ -static void igb_tx_timeout(struct net_device *netdev) +static void igb_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index 6003dc3ff5fd..5b1800c3ba82 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -2375,7 +2375,7 @@ static netdev_tx_t igbvf_xmit_frame(struct sk_buff *skb, * igbvf_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure **/ -static void igbvf_tx_timeout(struct net_device *netdev) +static void igbvf_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct igbvf_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c index 3d8c051dd327..b64e91ea3465 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c @@ -70,7 +70,7 @@ static int ixgb_clean(struct napi_struct *, int); static bool ixgb_clean_rx_irq(struct ixgb_adapter *, int *, int); static void ixgb_alloc_rx_buffers(struct ixgb_adapter *, int); -static void ixgb_tx_timeout(struct net_device *dev); +static void ixgb_tx_timeout(struct net_device *dev, unsigned int txqueue); static void ixgb_tx_timeout_task(struct work_struct *work); static void ixgb_vlan_strip_enable(struct ixgb_adapter *adapter); @@ -1538,7 +1538,7 @@ ixgb_xmit_frame(struct sk_buff *skb, struct net_device *netdev) **/ static void -ixgb_tx_timeout(struct net_device *netdev) +ixgb_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct ixgb_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c index 171cdc552961..5b1cf49df3d3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c @@ -166,7 +166,9 @@ static ssize_t ixgbe_dbg_netdev_ops_write(struct file *filp, ixgbe_dbg_netdev_ops_buf[len] = '\0'; if (strncmp(ixgbe_dbg_netdev_ops_buf, "tx_timeout", 10) == 0) { - adapter->netdev->netdev_ops->ndo_tx_timeout(adapter->netdev); + /* TX Queue number below is wrong, but ixgbe does not use it */ + adapter->netdev->netdev_ops->ndo_tx_timeout(adapter->netdev, + UINT_MAX); e_dev_info("tx_timeout called\n"); } else { e_dev_info("Unknown command: %s\n", ixgbe_dbg_netdev_ops_buf); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 25c097cd8100..8129ea2e94a8 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6158,7 +6158,7 @@ static void ixgbe_set_eee_capable(struct ixgbe_adapter *adapter) * ixgbe_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure **/ -static void ixgbe_tx_timeout(struct net_device *netdev) +static void ixgbe_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct ixgbe_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 076f2da36f27..fa286694ac2c 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -250,7 +250,7 @@ static void ixgbevf_tx_timeout_reset(struct ixgbevf_adapter *adapter) * ixgbevf_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure **/ -static void ixgbevf_tx_timeout(struct net_device *netdev) +static void ixgbevf_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct ixgbevf_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index 25aa400e2e3c..2e4975572e9f 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -2337,7 +2337,7 @@ jme_change_mtu(struct net_device *netdev, int new_mtu) } static void -jme_tx_timeout(struct net_device *netdev) +jme_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct jme_adapter *jme = netdev_priv(netdev); diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index ae195f8adff5..f98d9d627c71 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -917,7 +917,7 @@ static void korina_restart_task(struct work_struct *work) enable_irq(lp->rx_irq); } -static void korina_tx_timeout(struct net_device *dev) +static void korina_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct korina_private *lp = netdev_priv(dev); diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c index 6e73ffe6f928..028e3e6222e9 100644 --- a/drivers/net/ethernet/lantiq_etop.c +++ b/drivers/net/ethernet/lantiq_etop.c @@ -594,7 +594,7 @@ err_hw: } static void -ltq_etop_tx_timeout(struct net_device *dev) +ltq_etop_tx_timeout(struct net_device *dev, unsigned int txqueue) { int err; diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index d5b644131cff..c43820597218 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -2590,7 +2590,7 @@ static void tx_timeout_task(struct work_struct *ugly) } } -static void mv643xx_eth_tx_timeout(struct net_device *dev) +static void mv643xx_eth_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct mv643xx_eth_private *mp = netdev_priv(dev); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 62dc2f362a16..28eba99888c3 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -4792,6 +4792,8 @@ static void mvpp2_phylink_validate(struct phylink_config *config, phylink_set(mask, 10000baseER_Full); phylink_set(mask, 10000baseKR_Full); } + if (state->interface != PHY_INTERFACE_MODE_NA) + break; /* Fall-through */ case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_RGMII_ID: @@ -4802,13 +4804,23 @@ static void mvpp2_phylink_validate(struct phylink_config *config, phylink_set(mask, 10baseT_Full); phylink_set(mask, 100baseT_Half); phylink_set(mask, 100baseT_Full); + phylink_set(mask, 1000baseT_Full); + phylink_set(mask, 1000baseX_Full); + if (state->interface != PHY_INTERFACE_MODE_NA) + break; /* Fall-through */ case PHY_INTERFACE_MODE_1000BASEX: case PHY_INTERFACE_MODE_2500BASEX: - phylink_set(mask, 1000baseT_Full); - phylink_set(mask, 1000baseX_Full); - phylink_set(mask, 2500baseT_Full); - phylink_set(mask, 2500baseX_Full); + if (port->comphy || + state->interface != PHY_INTERFACE_MODE_2500BASEX) { + phylink_set(mask, 1000baseT_Full); + phylink_set(mask, 1000baseX_Full); + } + if (port->comphy || + state->interface == PHY_INTERFACE_MODE_2500BASEX) { + phylink_set(mask, 2500baseT_Full); + phylink_set(mask, 2500baseX_Full); + } break; default: goto empty_set; @@ -4817,6 +4829,8 @@ static void mvpp2_phylink_validate(struct phylink_config *config, bitmap_and(supported, supported, mask, __ETHTOOL_LINK_MODE_MASK_NBITS); bitmap_and(state->advertising, state->advertising, mask, __ETHTOOL_LINK_MODE_MASK_NBITS); + + phylink_helper_basex_speed(state); return; empty_set: @@ -5411,6 +5425,16 @@ static int mvpp2_port_probe(struct platform_device *pdev, port->phylink = NULL; } + /* Cycle the comphy to power it down, saving 270mW per port - + * don't worry about an error powering it up. When the comphy + * driver does this, we can remove this code. + */ + if (port->comphy) { + err = mvpp22_comphy_init(port); + if (err == 0) + phy_power_off(port->comphy); + } + err = register_netdev(dev); if (err < 0) { dev_err(&pdev->dev, "failed to register netdev\n"); diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index 3fb7ee3d4d13..1a6877902dd6 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -742,7 +742,7 @@ txq_reclaim_end: return released; } -static void pxa168_eth_tx_timeout(struct net_device *dev) +static void pxa168_eth_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct pxa168_eth_private *pep = netdev_priv(dev); diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c index 095f6c71b4fa..8ca15958e752 100644 --- a/drivers/net/ethernet/marvell/skge.c +++ b/drivers/net/ethernet/marvell/skge.c @@ -2884,7 +2884,7 @@ static void skge_tx_clean(struct net_device *dev) skge->tx_ring.to_clean = e; } -static void skge_tx_timeout(struct net_device *dev) +static void skge_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct skge_port *skge = netdev_priv(dev); diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 5f56ee83e3b1..acd1cba987fb 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -2358,7 +2358,7 @@ static void sky2_qlink_intr(struct sky2_hw *hw) /* Transmit timeout is only called if we are running, carrier is up * and tx queue is full (stopped). */ -static void sky2_tx_timeout(struct net_device *dev) +static void sky2_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct sky2_port *sky2 = netdev_priv(dev); struct sky2_hw *hw = sky2->hw; diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 527ad2aadcca..8c6cfd15481c 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2081,7 +2081,7 @@ static void mtk_dma_free(struct mtk_eth *eth) kfree(eth->scratch_head); } -static void mtk_tx_timeout(struct net_device *dev) +static void mtk_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct mtk_mac *mac = netdev_priv(dev); struct mtk_eth *eth = mac->hw; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 7af75b63245f..43dcbd8214c6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1363,24 +1363,18 @@ static void mlx4_en_delete_rss_steer_rules(struct mlx4_en_priv *priv) } } -static void mlx4_en_tx_timeout(struct net_device *dev) +static void mlx4_en_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; - int i; + struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[TX][txqueue]; if (netif_msg_timer(priv)) en_warn(priv, "Tx timeout called on port:%d\n", priv->port); - for (i = 0; i < priv->tx_ring_num[TX]; i++) { - struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[TX][i]; - - if (!netif_tx_queue_stopped(netdev_get_tx_queue(dev, i))) - continue; - en_warn(priv, "TX timeout on queue: %d, QP: 0x%x, CQ: 0x%x, Cons: 0x%x, Prod: 0x%x\n", - i, tx_ring->qpn, tx_ring->sp_cqn, - tx_ring->cons, tx_ring->prod); - } + en_warn(priv, "TX timeout on queue: %d, QP: 0x%x, CQ: 0x%x, Cons: 0x%x, Prod: 0x%x\n", + txqueue, tx_ring->qpn, tx_ring->sp_cqn, + tx_ring->cons, tx_ring->prod); priv->port_stats.tx_timeout++; en_dbg(DRV, priv, "Scheduling watchdog\n"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 4980e80a5e85..68f1c8cb302b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4332,7 +4332,7 @@ unlock: rtnl_unlock(); } -static void mlx5e_tx_timeout(struct net_device *dev) +static void mlx5e_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct mlx5e_priv *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c index b70afa310ad2..416676c35b1f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c @@ -200,8 +200,6 @@ static void mlx5_lag_fib_update(struct work_struct *work) rtnl_lock(); switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ - case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_DEL: mlx5_lag_fib_route_event(ldev, fib_work->event, fib_work->fen_info.fi); @@ -259,8 +257,6 @@ static int mlx5_lag_fib_event(struct notifier_block *nb, switch (event) { case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ - case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_DEL: fen_info = container_of(info, struct fib_entry_notifier_info, info); diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 5294a1622643..86a2d575ae73 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -3477,10 +3477,10 @@ MLXSW_REG_DEFINE(qeec, MLXSW_REG_QEEC_ID, MLXSW_REG_QEEC_LEN); MLXSW_ITEM32(reg, qeec, local_port, 0x00, 16, 8); enum mlxsw_reg_qeec_hr { - MLXSW_REG_QEEC_HIERARCY_PORT, - MLXSW_REG_QEEC_HIERARCY_GROUP, - MLXSW_REG_QEEC_HIERARCY_SUBGROUP, - MLXSW_REG_QEEC_HIERARCY_TC, + MLXSW_REG_QEEC_HR_PORT, + MLXSW_REG_QEEC_HR_GROUP, + MLXSW_REG_QEEC_HR_SUBGROUP, + MLXSW_REG_QEEC_HR_TC, }; /* reg_qeec_element_hierarchy @@ -3618,8 +3618,7 @@ static inline void mlxsw_reg_qeec_ptps_pack(char *payload, u8 local_port, { MLXSW_REG_ZERO(qeec, payload); mlxsw_reg_qeec_local_port_set(payload, local_port); - mlxsw_reg_qeec_element_hierarchy_set(payload, - MLXSW_REG_QEEC_HIERARCY_PORT); + mlxsw_reg_qeec_element_hierarchy_set(payload, MLXSW_REG_QEEC_HR_PORT); mlxsw_reg_qeec_ptps_set(payload, ptps); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 556dca328bb5..ea632042e609 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1796,6 +1796,8 @@ static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type, return mlxsw_sp_setup_tc_red(mlxsw_sp_port, type_data); case TC_SETUP_QDISC_PRIO: return mlxsw_sp_setup_tc_prio(mlxsw_sp_port, type_data); + case TC_SETUP_QDISC_ETS: + return mlxsw_sp_setup_tc_ets(mlxsw_sp_port, type_data); default: return -EOPNOTSUPP; } @@ -3602,26 +3604,25 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) * one subgroup, which are all member in the same group. */ err = mlxsw_sp_port_ets_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_GROUP, 0, 0, false, - 0); + MLXSW_REG_QEEC_HR_GROUP, 0, 0, false, 0); if (err) return err; for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { err = mlxsw_sp_port_ets_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i, + MLXSW_REG_QEEC_HR_SUBGROUP, i, 0, false, 0); if (err) return err; } for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { err = mlxsw_sp_port_ets_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_TC, i, i, + MLXSW_REG_QEEC_HR_TC, i, i, false, 0); if (err) return err; err = mlxsw_sp_port_ets_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_TC, + MLXSW_REG_QEEC_HR_TC, i + 8, i, true, 100); if (err) @@ -3633,13 +3634,13 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) * for the initial configuration. */ err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_PORT, 0, 0, + MLXSW_REG_QEEC_HR_PORT, 0, 0, MLXSW_REG_QEEC_MAS_DIS); if (err) return err; for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_SUBGROUP, + MLXSW_REG_QEEC_HR_SUBGROUP, i, 0, MLXSW_REG_QEEC_MAS_DIS); if (err) @@ -3647,14 +3648,14 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) } for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_TC, + MLXSW_REG_QEEC_HR_TC, i, i, MLXSW_REG_QEEC_MAS_DIS); if (err) return err; err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_TC, + MLXSW_REG_QEEC_HR_TC, i + 8, i, MLXSW_REG_QEEC_MAS_DIS); if (err) @@ -3664,7 +3665,7 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) /* Configure the min shaper for multicast TCs. */ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { err = mlxsw_sp_port_min_bw_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_TC, + MLXSW_REG_QEEC_HR_TC, i + 8, i, MLXSW_REG_QEEC_MIS_MIN); if (err) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 347bec9d1ecf..948ef4720d40 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -852,6 +852,8 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_red_qopt_offload *p); int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_prio_qopt_offload *p); +int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_ets_qopt_offload *p); /* spectrum_fid.c */ bool mlxsw_sp_fid_is_dummy(struct mlxsw_sp *mlxsw_sp, u16 fid_index); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c index 21296fa7f7fb..fe3bbba90659 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c @@ -160,7 +160,7 @@ static int __mlxsw_sp_dcbnl_ieee_setets(struct mlxsw_sp_port *mlxsw_sp_port, u8 weight = ets->tc_tx_bw[i]; err = mlxsw_sp_port_ets_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i, + MLXSW_REG_QEEC_HR_SUBGROUP, i, 0, dwrr, weight); if (err) { netdev_err(dev, "Failed to link subgroup ETS element %d to group\n", @@ -198,7 +198,7 @@ err_port_ets_set: u8 weight = my_ets->tc_tx_bw[i]; err = mlxsw_sp_port_ets_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i, + MLXSW_REG_QEEC_HR_SUBGROUP, i, 0, dwrr, weight); } return err; @@ -507,7 +507,7 @@ static int mlxsw_sp_dcbnl_ieee_setmaxrate(struct net_device *dev, for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_SUBGROUP, + MLXSW_REG_QEEC_HR_SUBGROUP, i, 0, maxrate->tc_maxrate[i]); if (err) { @@ -523,7 +523,7 @@ static int mlxsw_sp_dcbnl_ieee_setmaxrate(struct net_device *dev, err_port_ets_maxrate_set: for (i--; i >= 0; i--) mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, - MLXSW_REG_QEEC_HIERARCY_SUBGROUP, + MLXSW_REG_QEEC_HR_SUBGROUP, i, 0, my_maxrate->tc_maxrate[i]); return err; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index 68cc6737d45c..81a2c087f534 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -18,6 +18,7 @@ enum mlxsw_sp_qdisc_type { MLXSW_SP_QDISC_NO_QDISC, MLXSW_SP_QDISC_RED, MLXSW_SP_QDISC_PRIO, + MLXSW_SP_QDISC_ETS, }; struct mlxsw_sp_qdisc_ops { @@ -471,14 +472,16 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, } static int -mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +__mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port) { int i; for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, MLXSW_SP_PORT_DEFAULT_TCLASS); + mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HR_SUBGROUP, + i, 0, false, 0); mlxsw_sp_qdisc_destroy(mlxsw_sp_port, &mlxsw_sp_port->tclass_qdiscs[i]); mlxsw_sp_port->tclass_qdiscs[i].prio_bitmap = 0; @@ -488,36 +491,58 @@ mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port, } static int -mlxsw_sp_qdisc_prio_check_params(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, - void *params) +mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) { - struct tc_prio_qopt_offload_params *p = params; + return __mlxsw_sp_qdisc_ets_destroy(mlxsw_sp_port); +} - if (p->bands > IEEE_8021QAZ_MAX_TCS) +static int +__mlxsw_sp_qdisc_ets_check_params(unsigned int nbands) +{ + if (nbands > IEEE_8021QAZ_MAX_TCS) return -EOPNOTSUPP; return 0; } static int -mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, - void *params) +mlxsw_sp_qdisc_prio_check_params(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) { struct tc_prio_qopt_offload_params *p = params; + + return __mlxsw_sp_qdisc_ets_check_params(p->bands); +} + +static int +__mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, + unsigned int nbands, + const unsigned int *quanta, + const unsigned int *weights, + const u8 *priomap) +{ struct mlxsw_sp_qdisc *child_qdisc; int tclass, i, band, backlog; u8 old_priomap; int err; - for (band = 0; band < p->bands; band++) { + for (band = 0; band < nbands; band++) { tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band); child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass]; old_priomap = child_qdisc->prio_bitmap; child_qdisc->prio_bitmap = 0; + + err = mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HR_SUBGROUP, + tclass, 0, !!quanta[band], + weights[band]); + if (err) + return err; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - if (p->priomap[i] == band) { + if (priomap[i] == band) { child_qdisc->prio_bitmap |= BIT(i); if (BIT(i) & old_priomap) continue; @@ -540,21 +565,46 @@ mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port, child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass]; child_qdisc->prio_bitmap = 0; mlxsw_sp_qdisc_destroy(mlxsw_sp_port, child_qdisc); + mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HR_SUBGROUP, + tclass, 0, false, 0); } return 0; } +static int +mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_prio_qopt_offload_params *p = params; + unsigned int zeroes[TCQ_ETS_MAX_BANDS] = {0}; + + return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, p->bands, + zeroes, zeroes, p->priomap); +} + +static void +__mlxsw_sp_qdisc_ets_unoffload(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct gnet_stats_queue *qstats) +{ + u64 backlog; + + backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_qdisc->stats_base.backlog); + qstats->backlog -= backlog; +} + static void mlxsw_sp_qdisc_prio_unoffload(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params) { struct tc_prio_qopt_offload_params *p = params; - u64 backlog; - backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, - mlxsw_sp_qdisc->stats_base.backlog); - p->qstats->backlog -= backlog; + __mlxsw_sp_qdisc_ets_unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, + p->qstats); } static int @@ -631,31 +681,97 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = { .clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats, }; -/* Grafting is not supported in mlxsw. It will result in un-offloading of the - * grafted qdisc as well as the qdisc in the qdisc new location. - * (However, if the graft is to the location where the qdisc is already at, it - * will be ignored completely and won't cause un-offloading). +static int +mlxsw_sp_qdisc_ets_check_params(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_ets_qopt_offload_replace_params *p = params; + + return __mlxsw_sp_qdisc_ets_check_params(p->bands); +} + +static int +mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_ets_qopt_offload_replace_params *p = params; + + return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, p->bands, + p->quanta, p->weights, p->priomap); +} + +static void +mlxsw_sp_qdisc_ets_unoffload(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_ets_qopt_offload_replace_params *p = params; + + __mlxsw_sp_qdisc_ets_unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, + p->qstats); +} + +static int +mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + return __mlxsw_sp_qdisc_ets_destroy(mlxsw_sp_port); +} + +static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_ets = { + .type = MLXSW_SP_QDISC_ETS, + .check_params = mlxsw_sp_qdisc_ets_check_params, + .replace = mlxsw_sp_qdisc_ets_replace, + .unoffload = mlxsw_sp_qdisc_ets_unoffload, + .destroy = mlxsw_sp_qdisc_ets_destroy, + .get_stats = mlxsw_sp_qdisc_get_prio_stats, + .clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats, +}; + +/* Linux allows linking of Qdiscs to arbitrary classes (so long as the resulting + * graph is free of cycles). These operations do not change the parent handle + * though, which means it can be incomplete (if there is more than one class + * where the Qdisc in question is grafted) or outright wrong (if the Qdisc was + * linked to a different class and then removed from the original class). + * + * E.g. consider this sequence of operations: + * + * # tc qdisc add dev swp1 root handle 1: prio + * # tc qdisc add dev swp1 parent 1:3 handle 13: red limit 1000000 avpkt 10000 + * RED: set bandwidth to 10Mbit + * # tc qdisc link dev swp1 handle 13: parent 1:2 + * + * At this point, both 1:2 and 1:3 have the same RED Qdisc instance as their + * child. But RED will still only claim that 1:3 is its parent. If it's removed + * from that band, its only parent will be 1:2, but it will continue to claim + * that it is in fact 1:3. + * + * The notification for child Qdisc replace (e.g. TC_RED_REPLACE) comes before + * the notification for parent graft (e.g. TC_PRIO_GRAFT). We take the replace + * notification to offload the child Qdisc, based on its parent handle, and use + * the graft operation to validate that the class where the child is actually + * grafted corresponds to the parent handle. If the two don't match, we + * unoffload the child. */ static int -mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, - struct tc_prio_qopt_offload_graft_params *p) +__mlxsw_sp_qdisc_ets_graft(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + u8 band, u32 child_handle) { - int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->band); + int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(band); struct mlxsw_sp_qdisc *old_qdisc; - /* Check if the grafted qdisc is already in its "new" location. If so - - * nothing needs to be done. - */ - if (p->band < IEEE_8021QAZ_MAX_TCS && - mlxsw_sp_port->tclass_qdiscs[tclass_num].handle == p->child_handle) + if (band < IEEE_8021QAZ_MAX_TCS && + mlxsw_sp_port->tclass_qdiscs[tclass_num].handle == child_handle) return 0; /* See if the grafted qdisc is already offloaded on any tclass. If so, * unoffload it. */ old_qdisc = mlxsw_sp_qdisc_find_by_handle(mlxsw_sp_port, - p->child_handle); + child_handle); if (old_qdisc) mlxsw_sp_qdisc_destroy(mlxsw_sp_port, old_qdisc); @@ -664,6 +780,15 @@ mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port, return -EOPNOTSUPP; } +static int +mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct tc_prio_qopt_offload_graft_params *p) +{ + return __mlxsw_sp_qdisc_ets_graft(mlxsw_sp_port, mlxsw_sp_qdisc, + p->band, p->child_handle); +} + int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_prio_qopt_offload *p) { @@ -697,6 +822,40 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, } } +int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_ets_qopt_offload *p) +{ + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; + + mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, true); + if (!mlxsw_sp_qdisc) + return -EOPNOTSUPP; + + if (p->command == TC_ETS_REPLACE) + return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle, + mlxsw_sp_qdisc, + &mlxsw_sp_qdisc_ops_ets, + &p->replace_params); + + if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle, + MLXSW_SP_QDISC_ETS)) + return -EOPNOTSUPP; + + switch (p->command) { + case TC_ETS_DESTROY: + return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); + case TC_ETS_STATS: + return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc, + &p->stats); + case TC_ETS_GRAFT: + return __mlxsw_sp_qdisc_ets_graft(mlxsw_sp_port, mlxsw_sp_qdisc, + p->graft_params.band, + p->graft_params.child_handle); + default: + return -EOPNOTSUPP; + } +} + int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port) { struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 30bfe3880faf..bba1c8215d06 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3845,7 +3845,7 @@ static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp, key.fib_nh = fib_nh; nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key); - if (WARN_ON_ONCE(!nh)) + if (!nh) return; switch (event) { @@ -4780,95 +4780,6 @@ static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_vr_put(mlxsw_sp, vr); } -static struct mlxsw_sp_fib4_entry * -mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node, - const struct mlxsw_sp_fib4_entry *new4_entry) -{ - struct mlxsw_sp_fib4_entry *fib4_entry; - - list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) { - if (fib4_entry->tb_id > new4_entry->tb_id) - continue; - if (fib4_entry->tb_id != new4_entry->tb_id) - break; - if (fib4_entry->tos > new4_entry->tos) - continue; - if (fib4_entry->prio >= new4_entry->prio || - fib4_entry->tos < new4_entry->tos) - return fib4_entry; - } - - return NULL; -} - -static int -mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry, - struct mlxsw_sp_fib4_entry *new4_entry) -{ - struct mlxsw_sp_fib_node *fib_node; - - if (WARN_ON(!fib4_entry)) - return -EINVAL; - - fib_node = fib4_entry->common.fib_node; - list_for_each_entry_from(fib4_entry, &fib_node->entry_list, - common.list) { - if (fib4_entry->tb_id != new4_entry->tb_id || - fib4_entry->tos != new4_entry->tos || - fib4_entry->prio != new4_entry->prio) - break; - } - - list_add_tail(&new4_entry->common.list, &fib4_entry->common.list); - return 0; -} - -static int -mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry, - bool replace, bool append) -{ - struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node; - struct mlxsw_sp_fib4_entry *fib4_entry; - - fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry); - - if (append) - return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry); - if (replace && WARN_ON(!fib4_entry)) - return -EINVAL; - - /* Insert new entry before replaced one, so that we can later - * remove the second. - */ - if (fib4_entry) { - list_add_tail(&new4_entry->common.list, - &fib4_entry->common.list); - } else { - struct mlxsw_sp_fib4_entry *last; - - list_for_each_entry(last, &fib_node->entry_list, common.list) { - if (new4_entry->tb_id > last->tb_id) - break; - fib4_entry = last; - } - - if (fib4_entry) - list_add(&new4_entry->common.list, - &fib4_entry->common.list); - else - list_add(&new4_entry->common.list, - &fib_node->entry_list); - } - - return 0; -} - -static void -mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry) -{ - list_del(&fib4_entry->common.list); -} - static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry) { @@ -4912,14 +4823,12 @@ static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp, } static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib4_entry *fib4_entry, - bool replace, bool append) + struct mlxsw_sp_fib4_entry *fib4_entry) { + struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node; int err; - err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append); - if (err) - return err; + list_add(&fib4_entry->common.list, &fib_node->entry_list); err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common); if (err) @@ -4928,7 +4837,7 @@ static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp, return 0; err_fib_node_entry_add: - mlxsw_sp_fib4_node_list_remove(fib4_entry); + list_del(&fib4_entry->common.list); return err; } @@ -4937,20 +4846,19 @@ mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib4_entry *fib4_entry) { mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common); - mlxsw_sp_fib4_node_list_remove(fib4_entry); + list_del(&fib4_entry->common.list); if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common); } static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib4_entry *fib4_entry, - bool replace) + struct mlxsw_sp_fib4_entry *fib4_entry) { struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node; struct mlxsw_sp_fib4_entry *replaced; - if (!replace) + if (list_is_singular(&fib_node->entry_list)) return; /* We inserted the new entry before replaced one */ @@ -4962,9 +4870,8 @@ static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp, } static int -mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp, - const struct fib_entry_notifier_info *fen_info, - bool replace, bool append) +mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp, + const struct fib_entry_notifier_info *fen_info) { struct mlxsw_sp_fib4_entry *fib4_entry; struct mlxsw_sp_fib_node *fib_node; @@ -4989,14 +4896,13 @@ mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp, goto err_fib4_entry_create; } - err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace, - append); + err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry); if (err) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n"); goto err_fib4_node_entry_link; } - mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace); + mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry); return 0; @@ -6094,7 +6000,6 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) struct mlxsw_sp_fib_event_work *fib_work = container_of(work, struct mlxsw_sp_fib_event_work, work); struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; - bool replace, append; int err; /* Protect internal structures from changes */ @@ -6102,13 +6007,9 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) mlxsw_sp_span_respin(mlxsw_sp); switch (fib_work->event) { - case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ - case FIB_EVENT_ENTRY_ADD: - replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE; - append = fib_work->event == FIB_EVENT_ENTRY_APPEND; - err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info, - replace, append); + case FIB_EVENT_ENTRY_REPLACE: + err = mlxsw_sp_router_fib4_replace(mlxsw_sp, + &fib_work->fen_info); if (err) mlxsw_sp_router_fib_abort(mlxsw_sp); fib_info_put(fib_work->fen_info.fi); @@ -6211,8 +6112,6 @@ static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work, switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ - case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_DEL: fen_info = container_of(info, struct fib_entry_notifier_info, info); @@ -6343,9 +6242,8 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, err = mlxsw_sp_router_fib_rule_event(event, info, router->mlxsw_sp); return notifier_from_errno(err); - case FIB_EVENT_ENTRY_ADD: - case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ + case FIB_EVENT_ENTRY_ADD: /* fall through */ + case FIB_EVENT_ENTRY_REPLACE: if (router->aborted) { NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route"); return notifier_from_errno(-EINVAL); diff --git a/drivers/net/ethernet/micrel/ks8842.c b/drivers/net/ethernet/micrel/ks8842.c index da329ca115cc..f3f6dfe3eddc 100644 --- a/drivers/net/ethernet/micrel/ks8842.c +++ b/drivers/net/ethernet/micrel/ks8842.c @@ -1103,7 +1103,7 @@ static void ks8842_tx_timeout_work(struct work_struct *work) __ks8842_start_new_rx_dma(netdev); } -static void ks8842_tx_timeout(struct net_device *netdev) +static void ks8842_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct ks8842_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c index e102e1560ac7..d1444ba36e10 100644 --- a/drivers/net/ethernet/micrel/ksz884x.c +++ b/drivers/net/ethernet/micrel/ksz884x.c @@ -4896,7 +4896,7 @@ unlock: * triggered to free up resources so that the transmit routine can continue * sending out packets. The hardware is reset to correct the problem. */ -static void netdev_tx_timeout(struct net_device *dev) +static void netdev_tx_timeout(struct net_device *dev, unsigned int txqueue) { static unsigned long last_reset; diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c index 0567e4f387a5..09cdc2f2e7ff 100644 --- a/drivers/net/ethernet/microchip/enc28j60.c +++ b/drivers/net/ethernet/microchip/enc28j60.c @@ -1325,7 +1325,7 @@ static irqreturn_t enc28j60_irq(int irq, void *dev_id) return IRQ_HANDLED; } -static void enc28j60_tx_timeout(struct net_device *ndev) +static void enc28j60_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct enc28j60_net *priv = netdev_priv(ndev); diff --git a/drivers/net/ethernet/microchip/encx24j600.c b/drivers/net/ethernet/microchip/encx24j600.c index 52c41d11f565..39925e4bf2ec 100644 --- a/drivers/net/ethernet/microchip/encx24j600.c +++ b/drivers/net/ethernet/microchip/encx24j600.c @@ -892,7 +892,7 @@ static netdev_tx_t encx24j600_tx(struct sk_buff *skb, struct net_device *dev) } /* Deal with a transmit timeout */ -static void encx24j600_tx_timeout(struct net_device *dev) +static void encx24j600_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct encx24j600_priv *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c index 1a2634cbbb69..d21d706b83a7 100644 --- a/drivers/net/ethernet/natsemi/natsemi.c +++ b/drivers/net/ethernet/natsemi/natsemi.c @@ -612,7 +612,7 @@ static void undo_cable_magic(struct net_device *dev); static void check_link(struct net_device *dev); static void netdev_timer(struct timer_list *t); static void dump_ring(struct net_device *dev); -static void ns_tx_timeout(struct net_device *dev); +static void ns_tx_timeout(struct net_device *dev, unsigned int txqueue); static int alloc_ring(struct net_device *dev); static void refill_rx(struct net_device *dev); static void init_ring(struct net_device *dev); @@ -1881,7 +1881,7 @@ static void dump_ring(struct net_device *dev) } } -static void ns_tx_timeout(struct net_device *dev) +static void ns_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct netdev_private *np = netdev_priv(dev); void __iomem * ioaddr = ns_ioaddr(dev); diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c index 6af9a7eee114..be5f62f06785 100644 --- a/drivers/net/ethernet/natsemi/ns83820.c +++ b/drivers/net/ethernet/natsemi/ns83820.c @@ -1549,7 +1549,7 @@ static int ns83820_stop(struct net_device *ndev) return 0; } -static void ns83820_tx_timeout(struct net_device *ndev) +static void ns83820_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct ns83820 *dev = PRIV(ndev); u32 tx_done_idx; @@ -1603,7 +1603,7 @@ static void ns83820_tx_watch(struct timer_list *t) ndev->name, dev->tx_done_idx, dev->tx_free_idx, atomic_read(&dev->nr_tx_skbs)); - ns83820_tx_timeout(ndev); + ns83820_tx_timeout(ndev, UINT_MAX); } mod_timer(&dev->tx_watchdog, jiffies + 2*HZ); diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index b339125b2f09..fdebc8598b22 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -161,7 +161,7 @@ static int sonic_close(struct net_device *dev) return 0; } -static void sonic_tx_timeout(struct net_device *dev) +static void sonic_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct sonic_local *lp = netdev_priv(dev); int i; diff --git a/drivers/net/ethernet/natsemi/sonic.h b/drivers/net/ethernet/natsemi/sonic.h index 2b27f7049acb..f1544481aac1 100644 --- a/drivers/net/ethernet/natsemi/sonic.h +++ b/drivers/net/ethernet/natsemi/sonic.h @@ -336,7 +336,7 @@ static int sonic_close(struct net_device *dev); static struct net_device_stats *sonic_get_stats(struct net_device *dev); static void sonic_multicast_list(struct net_device *dev); static int sonic_init(struct net_device *dev); -static void sonic_tx_timeout(struct net_device *dev); +static void sonic_tx_timeout(struct net_device *dev, unsigned int txqueue); static void sonic_msg_init(struct net_device *dev); /* Internal inlines for reading/writing DMA buffers. Note that bus diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index e0b2bf327905..0ec6b8e8b549 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -7238,7 +7238,7 @@ out_unlock: * void */ -static void s2io_tx_watchdog(struct net_device *dev) +static void s2io_tx_watchdog(struct net_device *dev, unsigned int txqueue) { struct s2io_nic *sp = netdev_priv(dev); struct swStat *swstats = &sp->mac_control.stats_info->sw_stat; diff --git a/drivers/net/ethernet/neterion/s2io.h b/drivers/net/ethernet/neterion/s2io.h index 0a921f30f98f..6fa3159a977f 100644 --- a/drivers/net/ethernet/neterion/s2io.h +++ b/drivers/net/ethernet/neterion/s2io.h @@ -1065,7 +1065,7 @@ static void s2io_txpic_intr_handle(struct s2io_nic *sp); static void tx_intr_handler(struct fifo_info *fifo_data); static void s2io_handle_errors(void * dev_id); -static void s2io_tx_watchdog(struct net_device *dev); +static void s2io_tx_watchdog(struct net_device *dev, unsigned int txqueue); static void s2io_set_multicast(struct net_device *dev); static int rx_osm_handler(struct ring_info *ring_data, struct RxD_t * rxdp); static void s2io_link(struct s2io_nic * sp, int link); diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index 1d334f2e0a56..9b63574b6202 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -3273,7 +3273,7 @@ static int vxge_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) * This function is triggered if the Tx Queue is stopped * for a pre-defined amount of time when the Interface is still up. */ -static void vxge_tx_watchdog(struct net_device *dev) +static void vxge_tx_watchdog(struct net_device *dev, unsigned int txqueue) { struct vxgedev *vdev; diff --git a/drivers/net/ethernet/netronome/nfp/abm/cls.c b/drivers/net/ethernet/netronome/nfp/abm/cls.c index 9f8a1f69c0c4..23ebddfb9532 100644 --- a/drivers/net/ethernet/netronome/nfp/abm/cls.c +++ b/drivers/net/ethernet/netronome/nfp/abm/cls.c @@ -176,10 +176,8 @@ nfp_abm_u32_knode_replace(struct nfp_abm_link *alink, u8 mask, val; int err; - if (!nfp_abm_u32_check_knode(alink->abm, knode, proto, extack)) { - err = -EOPNOTSUPP; + if (!nfp_abm_u32_check_knode(alink->abm, knode, proto, extack)) goto err_delete; - } tos_off = proto == htons(ETH_P_IP) ? 16 : 20; @@ -200,18 +198,14 @@ nfp_abm_u32_knode_replace(struct nfp_abm_link *alink, if ((iter->val & cmask) == (val & cmask) && iter->band != knode->res->classid) { NL_SET_ERR_MSG_MOD(extack, "conflict with already offloaded filter"); - err = -EOPNOTSUPP; goto err_delete; } } if (!match) { match = kzalloc(sizeof(*match), GFP_KERNEL); - if (!match) { - err = -ENOMEM; - goto err_delete; - } - + if (!match) + return -ENOMEM; list_add(&match->list, &alink->dscp_map); } match->handle = knode->handle; @@ -227,7 +221,7 @@ nfp_abm_u32_knode_replace(struct nfp_abm_link *alink, err_delete: nfp_abm_u32_knode_delete(alink, knode); - return err; + return -EOPNOTSUPP; } static int nfp_abm_setup_tc_block_cb(enum tc_setup_type type, diff --git a/drivers/net/ethernet/netronome/nfp/ccm.h b/drivers/net/ethernet/netronome/nfp/ccm.h index a460c75522be..d81d450be50e 100644 --- a/drivers/net/ethernet/netronome/nfp/ccm.h +++ b/drivers/net/ethernet/netronome/nfp/ccm.h @@ -26,6 +26,7 @@ enum nfp_ccm_type { NFP_CCM_TYPE_CRYPTO_ADD = 10, NFP_CCM_TYPE_CRYPTO_DEL = 11, NFP_CCM_TYPE_CRYPTO_UPDATE = 12, + NFP_CCM_TYPE_CRYPTO_RESYNC = 13, __NFP_CCM_TYPE_MAX, }; diff --git a/drivers/net/ethernet/netronome/nfp/crypto/crypto.h b/drivers/net/ethernet/netronome/nfp/crypto/crypto.h index 60372ddf69f0..bffe58bb2f27 100644 --- a/drivers/net/ethernet/netronome/nfp/crypto/crypto.h +++ b/drivers/net/ethernet/netronome/nfp/crypto/crypto.h @@ -4,6 +4,10 @@ #ifndef NFP_CRYPTO_H #define NFP_CRYPTO_H 1 +struct net_device; +struct nfp_net; +struct nfp_net_tls_resync_req; + struct nfp_net_tls_offload_ctx { __be32 fw_handle[2]; @@ -17,11 +21,22 @@ struct nfp_net_tls_offload_ctx { #ifdef CONFIG_TLS_DEVICE int nfp_net_tls_init(struct nfp_net *nn); +int nfp_net_tls_rx_resync_req(struct net_device *netdev, + struct nfp_net_tls_resync_req *req, + void *pkt, unsigned int pkt_len); #else static inline int nfp_net_tls_init(struct nfp_net *nn) { return 0; } + +static inline int +nfp_net_tls_rx_resync_req(struct net_device *netdev, + struct nfp_net_tls_resync_req *req, + void *pkt, unsigned int pkt_len) +{ + return -EOPNOTSUPP; +} #endif #endif diff --git a/drivers/net/ethernet/netronome/nfp/crypto/fw.h b/drivers/net/ethernet/netronome/nfp/crypto/fw.h index 67413d946c4a..8d1458896bcb 100644 --- a/drivers/net/ethernet/netronome/nfp/crypto/fw.h +++ b/drivers/net/ethernet/netronome/nfp/crypto/fw.h @@ -9,6 +9,14 @@ #define NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_ENC 0 #define NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_DEC 1 +struct nfp_net_tls_resync_req { + __be32 fw_handle[2]; + __be32 tcp_seq; + u8 l3_offset; + u8 l4_offset; + u8 resv[2]; +}; + struct nfp_crypto_reply_simple { struct nfp_ccm_hdr hdr; __be32 error; diff --git a/drivers/net/ethernet/netronome/nfp/crypto/tls.c b/drivers/net/ethernet/netronome/nfp/crypto/tls.c index 96a96b35c0ca..7c50e3dfb9d5 100644 --- a/drivers/net/ethernet/netronome/nfp/crypto/tls.c +++ b/drivers/net/ethernet/netronome/nfp/crypto/tls.c @@ -5,6 +5,7 @@ #include <linux/ipv6.h> #include <linux/skbuff.h> #include <linux/string.h> +#include <net/inet6_hashtables.h> #include <net/tls.h> #include "../ccm.h" @@ -391,8 +392,9 @@ nfp_net_tls_add(struct net_device *netdev, struct sock *sk, if (direction == TLS_OFFLOAD_CTX_DIR_TX) return 0; - tls_offload_rx_resync_set_type(sk, - TLS_OFFLOAD_SYNC_TYPE_CORE_NEXT_HINT); + if (!nn->tlv_caps.tls_resync_ss) + tls_offload_rx_resync_set_type(sk, TLS_OFFLOAD_SYNC_TYPE_CORE_NEXT_HINT); + return 0; err_fw_remove: @@ -424,6 +426,7 @@ nfp_net_tls_resync(struct net_device *netdev, struct sock *sk, u32 seq, struct nfp_net *nn = netdev_priv(netdev); struct nfp_net_tls_offload_ctx *ntls; struct nfp_crypto_req_update *req; + enum nfp_ccm_type type; struct sk_buff *skb; gfp_t flags; int err; @@ -442,15 +445,18 @@ nfp_net_tls_resync(struct net_device *netdev, struct sock *sk, u32 seq, req->tcp_seq = cpu_to_be32(seq); memcpy(req->rec_no, rcd_sn, sizeof(req->rec_no)); + type = NFP_CCM_TYPE_CRYPTO_UPDATE; if (direction == TLS_OFFLOAD_CTX_DIR_TX) { - err = nfp_net_tls_communicate_simple(nn, skb, "sync", - NFP_CCM_TYPE_CRYPTO_UPDATE); + err = nfp_net_tls_communicate_simple(nn, skb, "sync", type); if (err) return err; ntls->next_seq = seq; } else { - nfp_ccm_mbox_post(nn, skb, NFP_CCM_TYPE_CRYPTO_UPDATE, + if (nn->tlv_caps.tls_resync_ss) + type = NFP_CCM_TYPE_CRYPTO_RESYNC; + nfp_ccm_mbox_post(nn, skb, type, sizeof(struct nfp_crypto_reply_simple)); + atomic_inc(&nn->ktls_rx_resync_sent); } return 0; @@ -462,6 +468,79 @@ static const struct tlsdev_ops nfp_net_tls_ops = { .tls_dev_resync = nfp_net_tls_resync, }; +int nfp_net_tls_rx_resync_req(struct net_device *netdev, + struct nfp_net_tls_resync_req *req, + void *pkt, unsigned int pkt_len) +{ + struct nfp_net *nn = netdev_priv(netdev); + struct nfp_net_tls_offload_ctx *ntls; + struct ipv6hdr *ipv6h; + struct tcphdr *th; + struct iphdr *iph; + struct sock *sk; + __be32 tcp_seq; + int err; + + iph = pkt + req->l3_offset; + ipv6h = pkt + req->l3_offset; + th = pkt + req->l4_offset; + + if ((u8 *)&th[1] > (u8 *)pkt + pkt_len) { + netdev_warn_once(netdev, "invalid TLS RX resync request (l3_off: %hhu l4_off: %hhu pkt_len: %u)\n", + req->l3_offset, req->l4_offset, pkt_len); + err = -EINVAL; + goto err_cnt_ign; + } + + switch (iph->version) { + case 4: + sk = inet_lookup_established(dev_net(netdev), &tcp_hashinfo, + iph->saddr, th->source, iph->daddr, + th->dest, netdev->ifindex); + break; +#if IS_ENABLED(CONFIG_IPV6) + case 6: + sk = __inet6_lookup_established(dev_net(netdev), &tcp_hashinfo, + &ipv6h->saddr, th->source, + &ipv6h->daddr, ntohs(th->dest), + netdev->ifindex, 0); + break; +#endif + default: + netdev_warn_once(netdev, "invalid TLS RX resync request (l3_off: %hhu l4_off: %hhu ipver: %u)\n", + req->l3_offset, req->l4_offset, iph->version); + err = -EINVAL; + goto err_cnt_ign; + } + + err = 0; + if (!sk) + goto err_cnt_ign; + if (!tls_is_sk_rx_device_offloaded(sk) || + sk->sk_shutdown & RCV_SHUTDOWN) + goto err_put_sock; + + ntls = tls_driver_ctx(sk, TLS_OFFLOAD_CTX_DIR_RX); + /* some FW versions can't report the handle and report 0s */ + if (memchr_inv(&req->fw_handle, 0, sizeof(req->fw_handle)) && + memcmp(&req->fw_handle, &ntls->fw_handle, sizeof(ntls->fw_handle))) + goto err_put_sock; + + /* copy to ensure alignment */ + memcpy(&tcp_seq, &req->tcp_seq, sizeof(tcp_seq)); + tls_offload_rx_resync_request(sk, tcp_seq); + atomic_inc(&nn->ktls_rx_resync_req); + + sock_gen_put(sk); + return 0; + +err_put_sock: + sock_gen_put(sk); +err_cnt_ign: + atomic_inc(&nn->ktls_rx_resync_ign); + return err; +} + static int nfp_net_tls_reset(struct nfp_net *nn) { struct nfp_crypto_req_reset *req; diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 1b019fdfcd97..c06600fb47ff 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -22,8 +22,9 @@ #define NFP_FL_TUNNEL_CSUM cpu_to_be16(0x01) #define NFP_FL_TUNNEL_KEY cpu_to_be16(0x04) #define NFP_FL_TUNNEL_GENEVE_OPT cpu_to_be16(0x0800) -#define NFP_FL_SUPPORTED_TUNNEL_INFO_FLAGS IP_TUNNEL_INFO_TX -#define NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS (NFP_FL_TUNNEL_CSUM | \ +#define NFP_FL_SUPPORTED_TUNNEL_INFO_FLAGS (IP_TUNNEL_INFO_TX | \ + IP_TUNNEL_INFO_IPV6) +#define NFP_FL_SUPPORTED_UDP_TUN_FLAGS (NFP_FL_TUNNEL_CSUM | \ NFP_FL_TUNNEL_KEY | \ NFP_FL_TUNNEL_GENEVE_OPT) @@ -394,19 +395,26 @@ nfp_fl_push_geneve_options(struct nfp_fl_payload *nfp_fl, int *list_len, } static int -nfp_fl_set_ipv4_tun(struct nfp_app *app, struct nfp_fl_set_ipv4_tun *set_tun, - const struct flow_action_entry *act, - struct nfp_fl_pre_tunnel *pre_tun, - enum nfp_flower_tun_type tun_type, - struct net_device *netdev, struct netlink_ext_ack *extack) +nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun, + const struct flow_action_entry *act, + struct nfp_fl_pre_tunnel *pre_tun, + enum nfp_flower_tun_type tun_type, + struct net_device *netdev, struct netlink_ext_ack *extack) { - size_t act_size = sizeof(struct nfp_fl_set_ipv4_tun); const struct ip_tunnel_info *ip_tun = act->tunnel; + bool ipv6 = ip_tunnel_info_af(ip_tun) == AF_INET6; + size_t act_size = sizeof(struct nfp_fl_set_tun); struct nfp_flower_priv *priv = app->priv; u32 tmp_set_ip_tun_type_index = 0; /* Currently support one pre-tunnel so index is always 0. */ int pretun_idx = 0; + if (!IS_ENABLED(CONFIG_IPV6) && ipv6) + return -EOPNOTSUPP; + + if (ipv6 && !(priv->flower_ext_feats & NFP_FL_FEATS_IPV6_TUN)) + return -EOPNOTSUPP; + BUILD_BUG_ON(NFP_FL_TUNNEL_CSUM != TUNNEL_CSUM || NFP_FL_TUNNEL_KEY != TUNNEL_KEY || NFP_FL_TUNNEL_GENEVE_OPT != TUNNEL_GENEVE_OPT); @@ -417,19 +425,35 @@ nfp_fl_set_ipv4_tun(struct nfp_app *app, struct nfp_fl_set_ipv4_tun *set_tun, return -EOPNOTSUPP; } - set_tun->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL; + set_tun->head.jump_id = NFP_FL_ACTION_OPCODE_SET_TUNNEL; set_tun->head.len_lw = act_size >> NFP_FL_LW_SIZ; /* Set tunnel type and pre-tunnel index. */ tmp_set_ip_tun_type_index |= - FIELD_PREP(NFP_FL_IPV4_TUNNEL_TYPE, tun_type) | - FIELD_PREP(NFP_FL_IPV4_PRE_TUN_INDEX, pretun_idx); + FIELD_PREP(NFP_FL_TUNNEL_TYPE, tun_type) | + FIELD_PREP(NFP_FL_PRE_TUN_INDEX, pretun_idx); set_tun->tun_type_index = cpu_to_be32(tmp_set_ip_tun_type_index); set_tun->tun_id = ip_tun->key.tun_id; if (ip_tun->key.ttl) { set_tun->ttl = ip_tun->key.ttl; +#ifdef CONFIG_IPV6 + } else if (ipv6) { + struct net *net = dev_net(netdev); + struct flowi6 flow = {}; + struct dst_entry *dst; + + flow.daddr = ip_tun->key.u.ipv6.dst; + flow.flowi4_proto = IPPROTO_UDP; + dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &flow, NULL); + if (!IS_ERR(dst)) { + set_tun->ttl = ip6_dst_hoplimit(dst); + dst_release(dst); + } else { + set_tun->ttl = net->ipv6.devconf_all->hop_limit; + } +#endif } else { struct net *net = dev_net(netdev); struct flowi4 flow = {}; @@ -455,7 +479,7 @@ nfp_fl_set_ipv4_tun(struct nfp_app *app, struct nfp_fl_set_ipv4_tun *set_tun, set_tun->tos = ip_tun->key.tos; if (!(ip_tun->key.tun_flags & NFP_FL_TUNNEL_KEY) || - ip_tun->key.tun_flags & ~NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS) { + ip_tun->key.tun_flags & ~NFP_FL_SUPPORTED_UDP_TUN_FLAGS) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support tunnel flag offload"); return -EOPNOTSUPP; } @@ -467,7 +491,12 @@ nfp_fl_set_ipv4_tun(struct nfp_app *app, struct nfp_fl_set_ipv4_tun *set_tun, } /* Complete pre_tunnel action. */ - pre_tun->ipv4_dst = ip_tun->key.u.ipv4.dst; + if (ipv6) { + pre_tun->flags |= cpu_to_be16(NFP_FL_PRE_TUN_IPV6); + pre_tun->ipv6_dst = ip_tun->key.u.ipv6.dst; + } else { + pre_tun->ipv4_dst = ip_tun->key.u.ipv4.dst; + } return 0; } @@ -956,8 +985,8 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act, struct nfp_flower_pedit_acts *set_act, bool *pkt_host, struct netlink_ext_ack *extack, int act_idx) { - struct nfp_fl_set_ipv4_tun *set_tun; struct nfp_fl_pre_tunnel *pre_tun; + struct nfp_fl_set_tun *set_tun; struct nfp_fl_push_vlan *psh_v; struct nfp_fl_push_mpls *psh_m; struct nfp_fl_pop_vlan *pop_v; @@ -1032,7 +1061,7 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act, * If none, the packet falls back before applying other actions. */ if (*a_len + sizeof(struct nfp_fl_pre_tunnel) + - sizeof(struct nfp_fl_set_ipv4_tun) > NFP_FL_MAX_A_SIZ) { + sizeof(struct nfp_fl_set_tun) > NFP_FL_MAX_A_SIZ) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at tunnel encap"); return -EOPNOTSUPP; } @@ -1046,11 +1075,11 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act, return err; set_tun = (void *)&nfp_fl->action_data[*a_len]; - err = nfp_fl_set_ipv4_tun(app, set_tun, act, pre_tun, - *tun_type, netdev, extack); + err = nfp_fl_set_tun(app, set_tun, act, pre_tun, *tun_type, + netdev, extack); if (err) return err; - *a_len += sizeof(struct nfp_fl_set_ipv4_tun); + *a_len += sizeof(struct nfp_fl_set_tun); } break; case FLOW_ACTION_TUNNEL_DECAP: diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c index 05981b54eaab..a595ddb92bff 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c @@ -270,11 +270,17 @@ nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb) } goto err_default; case NFP_FLOWER_CMSG_TYPE_NO_NEIGH: - nfp_tunnel_request_route(app, skb); + nfp_tunnel_request_route_v4(app, skb); + break; + case NFP_FLOWER_CMSG_TYPE_NO_NEIGH_V6: + nfp_tunnel_request_route_v6(app, skb); break; case NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS: nfp_tunnel_keep_alive(app, skb); break; + case NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS_V6: + nfp_tunnel_keep_alive_v6(app, skb); + break; case NFP_FLOWER_CMSG_TYPE_QOS_STATS: nfp_flower_stats_rlim_reply(app, skb); break; @@ -361,7 +367,8 @@ void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb) nfp_flower_process_mtu_ack(app, skb)) { /* Handle MTU acks outside wq to prevent RTNL conflict. */ dev_consume_skb_any(skb); - } else if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH) { + } else if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH || + cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6) { /* Acks from the NFP that the route is added - ignore. */ dev_consume_skb_any(skb); } else if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_PORT_REIFY) { diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index 7eb2ec8969c3..9b50d76bbc09 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -26,6 +26,7 @@ #define NFP_FLOWER_LAYER2_GRE BIT(0) #define NFP_FLOWER_LAYER2_GENEVE BIT(5) #define NFP_FLOWER_LAYER2_GENEVE_OP BIT(6) +#define NFP_FLOWER_LAYER2_TUN_IPV6 BIT(7) #define NFP_FLOWER_MASK_VLAN_PRIO GENMASK(15, 13) #define NFP_FLOWER_MASK_VLAN_PRESENT BIT(12) @@ -63,6 +64,7 @@ #define NFP_FL_MAX_GENEVE_OPT_ACT 32 #define NFP_FL_MAX_GENEVE_OPT_CNT 64 #define NFP_FL_MAX_GENEVE_OPT_KEY 32 +#define NFP_FL_MAX_GENEVE_OPT_KEY_V6 8 /* Action opcodes */ #define NFP_FL_ACTION_OPCODE_OUTPUT 0 @@ -70,7 +72,7 @@ #define NFP_FL_ACTION_OPCODE_POP_VLAN 2 #define NFP_FL_ACTION_OPCODE_PUSH_MPLS 3 #define NFP_FL_ACTION_OPCODE_POP_MPLS 4 -#define NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL 6 +#define NFP_FL_ACTION_OPCODE_SET_TUNNEL 6 #define NFP_FL_ACTION_OPCODE_SET_ETHERNET 7 #define NFP_FL_ACTION_OPCODE_SET_MPLS 8 #define NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS 9 @@ -99,8 +101,8 @@ /* Tunnel ports */ #define NFP_FL_PORT_TYPE_TUN 0x50000000 -#define NFP_FL_IPV4_TUNNEL_TYPE GENMASK(7, 4) -#define NFP_FL_IPV4_PRE_TUN_INDEX GENMASK(2, 0) +#define NFP_FL_TUNNEL_TYPE GENMASK(7, 4) +#define NFP_FL_PRE_TUN_INDEX GENMASK(2, 0) #define NFP_FLOWER_WORKQ_MAX_SKBS 30000 @@ -206,13 +208,16 @@ struct nfp_fl_pre_lag { struct nfp_fl_pre_tunnel { struct nfp_fl_act_head head; - __be16 reserved; - __be32 ipv4_dst; - /* reserved for use with IPv6 addresses */ - __be32 extra[3]; + __be16 flags; + union { + __be32 ipv4_dst; + struct in6_addr ipv6_dst; + }; }; -struct nfp_fl_set_ipv4_tun { +#define NFP_FL_PRE_TUN_IPV6 BIT(0) + +struct nfp_fl_set_tun { struct nfp_fl_act_head head; __be16 reserved; __be64 tun_id __packed; @@ -387,6 +392,11 @@ struct nfp_flower_tun_ipv4 { __be32 dst; }; +struct nfp_flower_tun_ipv6 { + struct in6_addr src; + struct in6_addr dst; +}; + struct nfp_flower_tun_ip_ext { u8 tos; u8 ttl; @@ -416,6 +426,42 @@ struct nfp_flower_ipv4_udp_tun { __be32 tun_id; }; +/* Flow Frame IPv6 UDP TUNNEL --> Tunnel details (11W/44B) + * ----------------------------------------------------------------- + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 31 - 0 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 63 - 32 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 95 - 64 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 127 - 96 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 31 - 0 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 63 - 32 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 95 - 64 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 127 - 96 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | tos | ttl | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | VNI | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_ipv6_udp_tun { + struct nfp_flower_tun_ipv6 ipv6; + __be16 reserved1; + struct nfp_flower_tun_ip_ext ip_ext; + __be32 reserved2; + __be32 tun_id; +}; + /* Flow Frame GRE TUNNEL --> Tunnel details (6W/24B) * ----------------------------------------------------------------- * 3 2 1 @@ -445,6 +491,46 @@ struct nfp_flower_ipv4_gre_tun { __be32 reserved2; }; +/* Flow Frame GRE TUNNEL V6 --> Tunnel details (12W/48B) + * ----------------------------------------------------------------- + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 31 - 0 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 63 - 32 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 95 - 64 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_src, 127 - 96 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 31 - 0 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 63 - 32 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 95 - 64 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv6_addr_dst, 127 - 96 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | tun_flags | tos | ttl | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | Ethertype | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Key | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_ipv6_gre_tun { + struct nfp_flower_tun_ipv6 ipv6; + __be16 tun_flags; + struct nfp_flower_tun_ip_ext ip_ext; + __be16 reserved1; + __be16 ethertype; + __be32 tun_key; + __be32 reserved2; +}; + struct nfp_flower_geneve_options { u8 data[NFP_FL_MAX_GENEVE_OPT_KEY]; }; @@ -485,6 +571,10 @@ enum nfp_flower_cmsg_type_port { NFP_FLOWER_CMSG_TYPE_QOS_DEL = 19, NFP_FLOWER_CMSG_TYPE_QOS_STATS = 20, NFP_FLOWER_CMSG_TYPE_PRE_TUN_RULE = 21, + NFP_FLOWER_CMSG_TYPE_TUN_IPS_V6 = 22, + NFP_FLOWER_CMSG_TYPE_NO_NEIGH_V6 = 23, + NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6 = 24, + NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS_V6 = 25, NFP_FLOWER_CMSG_TYPE_MAX = 32, }; diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index 31d94592a7c0..ddd7b7f5f5ab 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -43,6 +43,7 @@ struct nfp_app; #define NFP_FL_FEATS_VF_RLIM BIT(4) #define NFP_FL_FEATS_FLOW_MOD BIT(5) #define NFP_FL_FEATS_PRE_TUN_RULES BIT(6) +#define NFP_FL_FEATS_IPV6_TUN BIT(7) #define NFP_FL_FEATS_FLOW_MERGE BIT(30) #define NFP_FL_FEATS_LAG BIT(31) @@ -62,18 +63,26 @@ struct nfp_fl_stats_id { * struct nfp_fl_tunnel_offloads - priv data for tunnel offloads * @offloaded_macs: Hashtable of the offloaded MAC addresses * @ipv4_off_list: List of IPv4 addresses to offload - * @neigh_off_list: List of neighbour offloads + * @ipv6_off_list: List of IPv6 addresses to offload + * @neigh_off_list_v4: List of IPv4 neighbour offloads + * @neigh_off_list_v6: List of IPv6 neighbour offloads * @ipv4_off_lock: Lock for the IPv4 address list - * @neigh_off_lock: Lock for the neighbour address list + * @ipv6_off_lock: Lock for the IPv6 address list + * @neigh_off_lock_v4: Lock for the IPv4 neighbour address list + * @neigh_off_lock_v6: Lock for the IPv6 neighbour address list * @mac_off_ids: IDA to manage id assignment for offloaded MACs * @neigh_nb: Notifier to monitor neighbour state */ struct nfp_fl_tunnel_offloads { struct rhashtable offloaded_macs; struct list_head ipv4_off_list; - struct list_head neigh_off_list; + struct list_head ipv6_off_list; + struct list_head neigh_off_list_v4; + struct list_head neigh_off_list_v6; struct mutex ipv4_off_lock; - spinlock_t neigh_off_lock; + struct mutex ipv6_off_lock; + spinlock_t neigh_off_lock_v4; + spinlock_t neigh_off_lock_v6; struct ida mac_off_ids; struct notifier_block neigh_nb; }; @@ -273,12 +282,25 @@ struct nfp_fl_stats { u64 used; }; +/** + * struct nfp_ipv6_addr_entry - cached IPv6 addresses + * @ipv6_addr: IP address + * @ref_count: number of rules currently using this IP + * @list: list pointer + */ +struct nfp_ipv6_addr_entry { + struct in6_addr ipv6_addr; + int ref_count; + struct list_head list; +}; + struct nfp_fl_payload { struct nfp_fl_rule_metadata meta; unsigned long tc_flower_cookie; struct rhash_head fl_node; struct rcu_head rcu; __be32 nfp_tun_ipv4_addr; + struct nfp_ipv6_addr_entry *nfp_tun_ipv6; struct net_device *ingress_dev; char *unmasked_data; char *mask_data; @@ -396,8 +418,14 @@ int nfp_tunnel_mac_event_handler(struct nfp_app *app, unsigned long event, void *ptr); void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4); void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4); -void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb); +void +nfp_tunnel_put_ipv6_off(struct nfp_app *app, struct nfp_ipv6_addr_entry *entry); +struct nfp_ipv6_addr_entry * +nfp_tunnel_add_ipv6_off(struct nfp_app *app, struct in6_addr *ipv6); +void nfp_tunnel_request_route_v4(struct nfp_app *app, struct sk_buff *skb); +void nfp_tunnel_request_route_v6(struct nfp_app *app, struct sk_buff *skb); void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb); +void nfp_tunnel_keep_alive_v6(struct nfp_app *app, struct sk_buff *skb); void nfp_flower_lag_init(struct nfp_fl_lag *lag); void nfp_flower_lag_cleanup(struct nfp_fl_lag *lag); int nfp_flower_lag_reset(struct nfp_fl_lag *lag); diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c index 9cc3ba17ff69..546bc01d507d 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/match.c +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -10,9 +10,8 @@ static void nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *ext, struct nfp_flower_meta_tci *msk, - struct flow_cls_offload *flow, u8 key_type) + struct flow_rule *rule, u8 key_type) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); u16 tmp_tci; memset(ext, 0, sizeof(struct nfp_flower_meta_tci)); @@ -77,11 +76,8 @@ nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port, static void nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext, - struct nfp_flower_mac_mpls *msk, - struct flow_cls_offload *flow) + struct nfp_flower_mac_mpls *msk, struct flow_rule *rule) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); - memset(ext, 0, sizeof(struct nfp_flower_mac_mpls)); memset(msk, 0, sizeof(struct nfp_flower_mac_mpls)); @@ -130,10 +126,8 @@ nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext, static void nfp_flower_compile_tport(struct nfp_flower_tp_ports *ext, struct nfp_flower_tp_ports *msk, - struct flow_cls_offload *flow) + struct flow_rule *rule) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); - memset(ext, 0, sizeof(struct nfp_flower_tp_ports)); memset(msk, 0, sizeof(struct nfp_flower_tp_ports)); @@ -150,11 +144,8 @@ nfp_flower_compile_tport(struct nfp_flower_tp_ports *ext, static void nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *ext, - struct nfp_flower_ip_ext *msk, - struct flow_cls_offload *flow) + struct nfp_flower_ip_ext *msk, struct flow_rule *rule) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { struct flow_match_basic match; @@ -224,10 +215,8 @@ nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *ext, static void nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *ext, - struct nfp_flower_ipv4 *msk, - struct flow_cls_offload *flow) + struct nfp_flower_ipv4 *msk, struct flow_rule *rule) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); struct flow_match_ipv4_addrs match; memset(ext, 0, sizeof(struct nfp_flower_ipv4)); @@ -241,16 +230,13 @@ nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *ext, msk->ipv4_dst = match.mask->dst; } - nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, flow); + nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); } static void nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *ext, - struct nfp_flower_ipv6 *msk, - struct flow_cls_offload *flow) + struct nfp_flower_ipv6 *msk, struct flow_rule *rule) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); - memset(ext, 0, sizeof(struct nfp_flower_ipv6)); memset(msk, 0, sizeof(struct nfp_flower_ipv6)); @@ -264,16 +250,15 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *ext, msk->ipv6_dst = match.mask->dst; } - nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, flow); + nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); } static int -nfp_flower_compile_geneve_opt(void *ext, void *msk, - struct flow_cls_offload *flow) +nfp_flower_compile_geneve_opt(void *ext, void *msk, struct flow_rule *rule) { struct flow_match_enc_opts match; - flow_rule_match_enc_opts(flow->rule, &match); + flow_rule_match_enc_opts(rule, &match); memcpy(ext, match.key->data, match.key->len); memcpy(msk, match.mask->data, match.mask->len); @@ -283,10 +268,8 @@ nfp_flower_compile_geneve_opt(void *ext, void *msk, static void nfp_flower_compile_tun_ipv4_addrs(struct nfp_flower_tun_ipv4 *ext, struct nfp_flower_tun_ipv4 *msk, - struct flow_cls_offload *flow) + struct flow_rule *rule) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { struct flow_match_ipv4_addrs match; @@ -299,12 +282,26 @@ nfp_flower_compile_tun_ipv4_addrs(struct nfp_flower_tun_ipv4 *ext, } static void +nfp_flower_compile_tun_ipv6_addrs(struct nfp_flower_tun_ipv6 *ext, + struct nfp_flower_tun_ipv6 *msk, + struct flow_rule *rule) +{ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) { + struct flow_match_ipv6_addrs match; + + flow_rule_match_enc_ipv6_addrs(rule, &match); + ext->src = match.key->src; + ext->dst = match.key->dst; + msk->src = match.mask->src; + msk->dst = match.mask->dst; + } +} + +static void nfp_flower_compile_tun_ip_ext(struct nfp_flower_tun_ip_ext *ext, struct nfp_flower_tun_ip_ext *msk, - struct flow_cls_offload *flow) + struct flow_rule *rule) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { struct flow_match_ip match; @@ -317,57 +314,97 @@ nfp_flower_compile_tun_ip_ext(struct nfp_flower_tun_ip_ext *ext, } static void -nfp_flower_compile_ipv4_gre_tun(struct nfp_flower_ipv4_gre_tun *ext, - struct nfp_flower_ipv4_gre_tun *msk, - struct flow_cls_offload *flow) +nfp_flower_compile_tun_udp_key(__be32 *key, __be32 *key_msk, + struct flow_rule *rule) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); - - memset(ext, 0, sizeof(struct nfp_flower_ipv4_gre_tun)); - memset(msk, 0, sizeof(struct nfp_flower_ipv4_gre_tun)); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid match; + u32 vni; - /* NVGRE is the only supported GRE tunnel type */ - ext->ethertype = cpu_to_be16(ETH_P_TEB); - msk->ethertype = cpu_to_be16(~0); + flow_rule_match_enc_keyid(rule, &match); + vni = be32_to_cpu(match.key->keyid) << NFP_FL_TUN_VNI_OFFSET; + *key = cpu_to_be32(vni); + vni = be32_to_cpu(match.mask->keyid) << NFP_FL_TUN_VNI_OFFSET; + *key_msk = cpu_to_be32(vni); + } +} +static void +nfp_flower_compile_tun_gre_key(__be32 *key, __be32 *key_msk, __be16 *flags, + __be16 *flags_msk, struct flow_rule *rule) +{ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { struct flow_match_enc_keyid match; flow_rule_match_enc_keyid(rule, &match); - ext->tun_key = match.key->keyid; - msk->tun_key = match.mask->keyid; + *key = match.key->keyid; + *key_msk = match.mask->keyid; - ext->tun_flags = cpu_to_be16(NFP_FL_GRE_FLAG_KEY); - msk->tun_flags = cpu_to_be16(NFP_FL_GRE_FLAG_KEY); + *flags = cpu_to_be16(NFP_FL_GRE_FLAG_KEY); + *flags_msk = cpu_to_be16(NFP_FL_GRE_FLAG_KEY); } +} + +static void +nfp_flower_compile_ipv4_gre_tun(struct nfp_flower_ipv4_gre_tun *ext, + struct nfp_flower_ipv4_gre_tun *msk, + struct flow_rule *rule) +{ + memset(ext, 0, sizeof(struct nfp_flower_ipv4_gre_tun)); + memset(msk, 0, sizeof(struct nfp_flower_ipv4_gre_tun)); + + /* NVGRE is the only supported GRE tunnel type */ + ext->ethertype = cpu_to_be16(ETH_P_TEB); + msk->ethertype = cpu_to_be16(~0); - nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, flow); - nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, flow); + nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, rule); + nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); + nfp_flower_compile_tun_gre_key(&ext->tun_key, &msk->tun_key, + &ext->tun_flags, &msk->tun_flags, rule); } static void nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *ext, struct nfp_flower_ipv4_udp_tun *msk, - struct flow_cls_offload *flow) + struct flow_rule *rule) { - struct flow_rule *rule = flow_cls_offload_flow_rule(flow); - memset(ext, 0, sizeof(struct nfp_flower_ipv4_udp_tun)); memset(msk, 0, sizeof(struct nfp_flower_ipv4_udp_tun)); - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { - struct flow_match_enc_keyid match; - u32 temp_vni; + nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, rule); + nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); + nfp_flower_compile_tun_udp_key(&ext->tun_id, &msk->tun_id, rule); +} - flow_rule_match_enc_keyid(rule, &match); - temp_vni = be32_to_cpu(match.key->keyid) << NFP_FL_TUN_VNI_OFFSET; - ext->tun_id = cpu_to_be32(temp_vni); - temp_vni = be32_to_cpu(match.mask->keyid) << NFP_FL_TUN_VNI_OFFSET; - msk->tun_id = cpu_to_be32(temp_vni); - } +static void +nfp_flower_compile_ipv6_udp_tun(struct nfp_flower_ipv6_udp_tun *ext, + struct nfp_flower_ipv6_udp_tun *msk, + struct flow_rule *rule) +{ + memset(ext, 0, sizeof(struct nfp_flower_ipv6_udp_tun)); + memset(msk, 0, sizeof(struct nfp_flower_ipv6_udp_tun)); + + nfp_flower_compile_tun_ipv6_addrs(&ext->ipv6, &msk->ipv6, rule); + nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); + nfp_flower_compile_tun_udp_key(&ext->tun_id, &msk->tun_id, rule); +} + +static void +nfp_flower_compile_ipv6_gre_tun(struct nfp_flower_ipv6_gre_tun *ext, + struct nfp_flower_ipv6_gre_tun *msk, + struct flow_rule *rule) +{ + memset(ext, 0, sizeof(struct nfp_flower_ipv6_gre_tun)); + memset(msk, 0, sizeof(struct nfp_flower_ipv6_gre_tun)); + + /* NVGRE is the only supported GRE tunnel type */ + ext->ethertype = cpu_to_be16(ETH_P_TEB); + msk->ethertype = cpu_to_be16(~0); - nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, flow); - nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, flow); + nfp_flower_compile_tun_ipv6_addrs(&ext->ipv6, &msk->ipv6, rule); + nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule); + nfp_flower_compile_tun_gre_key(&ext->tun_key, &msk->tun_key, + &ext->tun_flags, &msk->tun_flags, rule); } int nfp_flower_compile_flow_match(struct nfp_app *app, @@ -378,6 +415,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app, enum nfp_flower_tun_type tun_type, struct netlink_ext_ack *extack) { + struct flow_rule *rule = flow_cls_offload_flow_rule(flow); u32 port_id; int err; u8 *ext; @@ -393,7 +431,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app, nfp_flower_compile_meta_tci((struct nfp_flower_meta_tci *)ext, (struct nfp_flower_meta_tci *)msk, - flow, key_ls->key_layer); + rule, key_ls->key_layer); ext += sizeof(struct nfp_flower_meta_tci); msk += sizeof(struct nfp_flower_meta_tci); @@ -425,7 +463,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app, if (NFP_FLOWER_LAYER_MAC & key_ls->key_layer) { nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)ext, (struct nfp_flower_mac_mpls *)msk, - flow); + rule); ext += sizeof(struct nfp_flower_mac_mpls); msk += sizeof(struct nfp_flower_mac_mpls); } @@ -433,7 +471,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app, if (NFP_FLOWER_LAYER_TP & key_ls->key_layer) { nfp_flower_compile_tport((struct nfp_flower_tp_ports *)ext, (struct nfp_flower_tp_ports *)msk, - flow); + rule); ext += sizeof(struct nfp_flower_tp_ports); msk += sizeof(struct nfp_flower_tp_ports); } @@ -441,7 +479,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app, if (NFP_FLOWER_LAYER_IPV4 & key_ls->key_layer) { nfp_flower_compile_ipv4((struct nfp_flower_ipv4 *)ext, (struct nfp_flower_ipv4 *)msk, - flow); + rule); ext += sizeof(struct nfp_flower_ipv4); msk += sizeof(struct nfp_flower_ipv4); } @@ -449,43 +487,83 @@ int nfp_flower_compile_flow_match(struct nfp_app *app, if (NFP_FLOWER_LAYER_IPV6 & key_ls->key_layer) { nfp_flower_compile_ipv6((struct nfp_flower_ipv6 *)ext, (struct nfp_flower_ipv6 *)msk, - flow); + rule); ext += sizeof(struct nfp_flower_ipv6); msk += sizeof(struct nfp_flower_ipv6); } if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GRE) { - __be32 tun_dst; - - nfp_flower_compile_ipv4_gre_tun((void *)ext, (void *)msk, flow); - tun_dst = ((struct nfp_flower_ipv4_gre_tun *)ext)->ipv4.dst; - ext += sizeof(struct nfp_flower_ipv4_gre_tun); - msk += sizeof(struct nfp_flower_ipv4_gre_tun); - - /* Store the tunnel destination in the rule data. - * This must be present and be an exact match. - */ - nfp_flow->nfp_tun_ipv4_addr = tun_dst; - nfp_tunnel_add_ipv4_off(app, tun_dst); + if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) { + struct nfp_flower_ipv6_gre_tun *gre_match; + struct nfp_ipv6_addr_entry *entry; + struct in6_addr *dst; + + nfp_flower_compile_ipv6_gre_tun((void *)ext, + (void *)msk, rule); + gre_match = (struct nfp_flower_ipv6_gre_tun *)ext; + dst = &gre_match->ipv6.dst; + ext += sizeof(struct nfp_flower_ipv6_gre_tun); + msk += sizeof(struct nfp_flower_ipv6_gre_tun); + + entry = nfp_tunnel_add_ipv6_off(app, dst); + if (!entry) + return -EOPNOTSUPP; + + nfp_flow->nfp_tun_ipv6 = entry; + } else { + __be32 dst; + + nfp_flower_compile_ipv4_gre_tun((void *)ext, + (void *)msk, rule); + dst = ((struct nfp_flower_ipv4_gre_tun *)ext)->ipv4.dst; + ext += sizeof(struct nfp_flower_ipv4_gre_tun); + msk += sizeof(struct nfp_flower_ipv4_gre_tun); + + /* Store the tunnel destination in the rule data. + * This must be present and be an exact match. + */ + nfp_flow->nfp_tun_ipv4_addr = dst; + nfp_tunnel_add_ipv4_off(app, dst); + } } if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN || key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE) { - __be32 tun_dst; - - nfp_flower_compile_ipv4_udp_tun((void *)ext, (void *)msk, flow); - tun_dst = ((struct nfp_flower_ipv4_udp_tun *)ext)->ipv4.dst; - ext += sizeof(struct nfp_flower_ipv4_udp_tun); - msk += sizeof(struct nfp_flower_ipv4_udp_tun); - - /* Store the tunnel destination in the rule data. - * This must be present and be an exact match. - */ - nfp_flow->nfp_tun_ipv4_addr = tun_dst; - nfp_tunnel_add_ipv4_off(app, tun_dst); + if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) { + struct nfp_flower_ipv6_udp_tun *udp_match; + struct nfp_ipv6_addr_entry *entry; + struct in6_addr *dst; + + nfp_flower_compile_ipv6_udp_tun((void *)ext, + (void *)msk, rule); + udp_match = (struct nfp_flower_ipv6_udp_tun *)ext; + dst = &udp_match->ipv6.dst; + ext += sizeof(struct nfp_flower_ipv6_udp_tun); + msk += sizeof(struct nfp_flower_ipv6_udp_tun); + + entry = nfp_tunnel_add_ipv6_off(app, dst); + if (!entry) + return -EOPNOTSUPP; + + nfp_flow->nfp_tun_ipv6 = entry; + } else { + __be32 dst; + + nfp_flower_compile_ipv4_udp_tun((void *)ext, + (void *)msk, rule); + dst = ((struct nfp_flower_ipv4_udp_tun *)ext)->ipv4.dst; + ext += sizeof(struct nfp_flower_ipv4_udp_tun); + msk += sizeof(struct nfp_flower_ipv4_udp_tun); + + /* Store the tunnel destination in the rule data. + * This must be present and be an exact match. + */ + nfp_flow->nfp_tun_ipv4_addr = dst; + nfp_tunnel_add_ipv4_off(app, dst); + } if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) { - err = nfp_flower_compile_geneve_opt(ext, msk, flow); + err = nfp_flower_compile_geneve_opt(ext, msk, rule); if (err) return err; } diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 987ae221f6be..7ca5c1becfcf 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -54,6 +54,10 @@ (BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) +#define NFP_FLOWER_WHITELIST_TUN_DISSECTOR_V6_R \ + (BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) + #define NFP_FLOWER_MERGE_FIELDS \ (NFP_FLOWER_LAYER_PORT | \ NFP_FLOWER_LAYER_MAC | \ @@ -64,7 +68,8 @@ #define NFP_FLOWER_PRE_TUN_RULE_FIELDS \ (NFP_FLOWER_LAYER_PORT | \ NFP_FLOWER_LAYER_MAC | \ - NFP_FLOWER_LAYER_IPV4) + NFP_FLOWER_LAYER_IPV4 | \ + NFP_FLOWER_LAYER_IPV6) struct nfp_flower_merge_check { union { @@ -146,10 +151,11 @@ static bool nfp_flower_check_higher_than_l3(struct flow_cls_offload *f) static int nfp_flower_calc_opt_layer(struct flow_dissector_key_enc_opts *enc_opts, - u32 *key_layer_two, int *key_size, + u32 *key_layer_two, int *key_size, bool ipv6, struct netlink_ext_ack *extack) { - if (enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY) { + if (enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY || + (ipv6 && enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY_V6)) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: geneve options exceed maximum length"); return -EOPNOTSUPP; } @@ -167,7 +173,7 @@ nfp_flower_calc_udp_tun_layer(struct flow_dissector_key_ports *enc_ports, struct flow_dissector_key_enc_opts *enc_op, u32 *key_layer_two, u8 *key_layer, int *key_size, struct nfp_flower_priv *priv, - enum nfp_flower_tun_type *tun_type, + enum nfp_flower_tun_type *tun_type, bool ipv6, struct netlink_ext_ack *extack) { int err; @@ -176,7 +182,15 @@ nfp_flower_calc_udp_tun_layer(struct flow_dissector_key_ports *enc_ports, case htons(IANA_VXLAN_UDP_PORT): *tun_type = NFP_FL_TUNNEL_VXLAN; *key_layer |= NFP_FLOWER_LAYER_VXLAN; - *key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + + if (ipv6) { + *key_layer |= NFP_FLOWER_LAYER_EXT_META; + *key_size += sizeof(struct nfp_flower_ext_meta); + *key_layer_two |= NFP_FLOWER_LAYER2_TUN_IPV6; + *key_size += sizeof(struct nfp_flower_ipv6_udp_tun); + } else { + *key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + } if (enc_op) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: encap options not supported on vxlan tunnels"); @@ -192,7 +206,13 @@ nfp_flower_calc_udp_tun_layer(struct flow_dissector_key_ports *enc_ports, *key_layer |= NFP_FLOWER_LAYER_EXT_META; *key_size += sizeof(struct nfp_flower_ext_meta); *key_layer_two |= NFP_FLOWER_LAYER2_GENEVE; - *key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + + if (ipv6) { + *key_layer_two |= NFP_FLOWER_LAYER2_TUN_IPV6; + *key_size += sizeof(struct nfp_flower_ipv6_udp_tun); + } else { + *key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + } if (!enc_op) break; @@ -200,8 +220,8 @@ nfp_flower_calc_udp_tun_layer(struct flow_dissector_key_ports *enc_ports, NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support geneve option offload"); return -EOPNOTSUPP; } - err = nfp_flower_calc_opt_layer(enc_op, key_layer_two, - key_size, extack); + err = nfp_flower_calc_opt_layer(enc_op, key_layer_two, key_size, + ipv6, extack); if (err) return err; break; @@ -237,6 +257,8 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, /* If any tun dissector is used then the required set must be used. */ if (dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR && + (dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR_V6_R) + != NFP_FLOWER_WHITELIST_TUN_DISSECTOR_V6_R && (dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R) != NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: tunnel match not supported"); @@ -268,8 +290,10 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { struct flow_match_enc_opts enc_op = { NULL, NULL }; struct flow_match_ipv4_addrs ipv4_addrs; + struct flow_match_ipv6_addrs ipv6_addrs; struct flow_match_control enc_ctl; struct flow_match_ports enc_ports; + bool ipv6_tun = false; flow_rule_match_enc_control(rule, &enc_ctl); @@ -277,38 +301,62 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, NL_SET_ERR_MSG_MOD(extack, "unsupported offload: wildcarded protocols on tunnels are not supported"); return -EOPNOTSUPP; } - if (enc_ctl.key->addr_type != FLOW_DISSECTOR_KEY_IPV4_ADDRS) { - NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only IPv4 tunnels are supported"); + + ipv6_tun = enc_ctl.key->addr_type == + FLOW_DISSECTOR_KEY_IPV6_ADDRS; + if (ipv6_tun && + !(priv->flower_ext_feats & NFP_FL_FEATS_IPV6_TUN)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: firmware does not support IPv6 tunnels"); return -EOPNOTSUPP; } - /* These fields are already verified as used. */ - flow_rule_match_enc_ipv4_addrs(rule, &ipv4_addrs); - if (ipv4_addrs.mask->dst != cpu_to_be32(~0)) { - NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only an exact match IPv4 destination address is supported"); + if (!ipv6_tun && + enc_ctl.key->addr_type != FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: tunnel address type not IPv4 or IPv6"); return -EOPNOTSUPP; } + if (ipv6_tun) { + flow_rule_match_enc_ipv6_addrs(rule, &ipv6_addrs); + if (memchr_inv(&ipv6_addrs.mask->dst, 0xff, + sizeof(ipv6_addrs.mask->dst))) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only an exact match IPv6 destination address is supported"); + return -EOPNOTSUPP; + } + } else { + flow_rule_match_enc_ipv4_addrs(rule, &ipv4_addrs); + if (ipv4_addrs.mask->dst != cpu_to_be32(~0)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only an exact match IPv4 destination address is supported"); + return -EOPNOTSUPP; + } + } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS)) flow_rule_match_enc_opts(rule, &enc_op); - if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) { /* check if GRE, which has no enc_ports */ - if (netif_is_gretap(netdev)) { - *tun_type = NFP_FL_TUNNEL_GRE; - key_layer |= NFP_FLOWER_LAYER_EXT_META; - key_size += sizeof(struct nfp_flower_ext_meta); - key_layer_two |= NFP_FLOWER_LAYER2_GRE; - key_size += - sizeof(struct nfp_flower_ipv4_gre_tun); + if (!netif_is_gretap(netdev)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: an exact match on L4 destination port is required for non-GRE tunnels"); + return -EOPNOTSUPP; + } - if (enc_op.key) { - NL_SET_ERR_MSG_MOD(extack, "unsupported offload: encap options not supported on GRE tunnels"); - return -EOPNOTSUPP; - } + *tun_type = NFP_FL_TUNNEL_GRE; + key_layer |= NFP_FLOWER_LAYER_EXT_META; + key_size += sizeof(struct nfp_flower_ext_meta); + key_layer_two |= NFP_FLOWER_LAYER2_GRE; + + if (ipv6_tun) { + key_layer_two |= NFP_FLOWER_LAYER2_TUN_IPV6; + key_size += + sizeof(struct nfp_flower_ipv6_udp_tun); } else { - NL_SET_ERR_MSG_MOD(extack, "unsupported offload: an exact match on L4 destination port is required for non-GRE tunnels"); + key_size += + sizeof(struct nfp_flower_ipv4_udp_tun); + } + + if (enc_op.key) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: encap options not supported on GRE tunnels"); return -EOPNOTSUPP; } } else { @@ -323,7 +371,8 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, &key_layer_two, &key_layer, &key_size, priv, - tun_type, extack); + tun_type, ipv6_tun, + extack); if (err) return err; @@ -491,6 +540,7 @@ nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer) goto err_free_mask; flow_pay->nfp_tun_ipv4_addr = 0; + flow_pay->nfp_tun_ipv6 = NULL; flow_pay->meta.flags = 0; INIT_LIST_HEAD(&flow_pay->linked_flows); flow_pay->in_hw = false; @@ -517,10 +567,12 @@ nfp_flower_update_merge_with_actions(struct nfp_fl_payload *flow, struct nfp_fl_set_ip4_addrs *ipv4_add; struct nfp_fl_set_ipv6_addr *ipv6_add; struct nfp_fl_push_vlan *push_vlan; + struct nfp_fl_pre_tunnel *pre_tun; struct nfp_fl_set_tport *tport; struct nfp_fl_set_eth *eth; struct nfp_fl_act_head *a; unsigned int act_off = 0; + bool ipv6_tun = false; u8 act_id = 0; u8 *ports; int i; @@ -542,14 +594,18 @@ nfp_flower_update_merge_with_actions(struct nfp_fl_payload *flow, case NFP_FL_ACTION_OPCODE_POP_VLAN: merge->tci = cpu_to_be16(0); break; - case NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL: + case NFP_FL_ACTION_OPCODE_SET_TUNNEL: /* New tunnel header means l2 to l4 can be matched. */ eth_broadcast_addr(&merge->l2.mac_dst[0]); eth_broadcast_addr(&merge->l2.mac_src[0]); memset(&merge->l4, 0xff, sizeof(struct nfp_flower_tp_ports)); - memset(&merge->ipv4, 0xff, - sizeof(struct nfp_flower_ipv4)); + if (ipv6_tun) + memset(&merge->ipv6, 0xff, + sizeof(struct nfp_flower_ipv6)); + else + memset(&merge->ipv4, 0xff, + sizeof(struct nfp_flower_ipv4)); break; case NFP_FL_ACTION_OPCODE_SET_ETHERNET: eth = (struct nfp_fl_set_eth *)a; @@ -597,6 +653,10 @@ nfp_flower_update_merge_with_actions(struct nfp_fl_payload *flow, ports[i] |= tport->tp_port_mask[i]; break; case NFP_FL_ACTION_OPCODE_PRE_TUNNEL: + pre_tun = (struct nfp_fl_pre_tunnel *)a; + ipv6_tun = be16_to_cpu(pre_tun->flags) & + NFP_FL_PRE_TUN_IPV6; + break; case NFP_FL_ACTION_OPCODE_PRE_LAG: case NFP_FL_ACTION_OPCODE_PUSH_GENEVE: break; @@ -765,15 +825,15 @@ nfp_fl_verify_post_tun_acts(char *acts, int len, struct nfp_fl_push_vlan **vlan) static int nfp_fl_push_vlan_after_tun(char *acts, int len, struct nfp_fl_push_vlan *vlan) { - struct nfp_fl_set_ipv4_tun *tun; + struct nfp_fl_set_tun *tun; struct nfp_fl_act_head *a; unsigned int act_off = 0; while (act_off < len) { a = (struct nfp_fl_act_head *)&acts[act_off]; - if (a->jump_id == NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL) { - tun = (struct nfp_fl_set_ipv4_tun *)a; + if (a->jump_id == NFP_FL_ACTION_OPCODE_SET_TUNNEL) { + tun = (struct nfp_fl_set_tun *)a; tun->outer_vlan_tpid = vlan->vlan_tpid; tun->outer_vlan_tci = vlan->vlan_tci; @@ -1058,15 +1118,22 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app, return -EOPNOTSUPP; } - if (key_layer & NFP_FLOWER_LAYER_IPV4) { + if (key_layer & NFP_FLOWER_LAYER_IPV4 || + key_layer & NFP_FLOWER_LAYER_IPV6) { + /* Flags and proto fields have same offset in IPv4 and IPv6. */ int ip_flags = offsetof(struct nfp_flower_ipv4, ip_ext.flags); int ip_proto = offsetof(struct nfp_flower_ipv4, ip_ext.proto); + int size; int i; + size = key_layer & NFP_FLOWER_LAYER_IPV4 ? + sizeof(struct nfp_flower_ipv4) : + sizeof(struct nfp_flower_ipv6); + mask += sizeof(struct nfp_flower_mac_mpls); /* Ensure proto and flags are the only IP layer fields. */ - for (i = 0; i < sizeof(struct nfp_flower_ipv4); i++) + for (i = 0; i < size; i++) if (mask[i] && i != ip_flags && i != ip_proto) { NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: only flags and proto can be matched in ip header"); return -EOPNOTSUPP; @@ -1195,6 +1262,8 @@ err_remove_rhash: err_release_metadata: nfp_modify_flow_metadata(app, flow_pay); err_destroy_flow: + if (flow_pay->nfp_tun_ipv6) + nfp_tunnel_put_ipv6_off(app, flow_pay->nfp_tun_ipv6); kfree(flow_pay->action_data); kfree(flow_pay->mask_data); kfree(flow_pay->unmasked_data); @@ -1311,6 +1380,9 @@ nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev, if (nfp_flow->nfp_tun_ipv4_addr) nfp_tunnel_del_ipv4_off(app, nfp_flow->nfp_tun_ipv4_addr); + if (nfp_flow->nfp_tun_ipv6) + nfp_tunnel_put_ipv6_off(app, nfp_flow->nfp_tun_ipv6); + if (!nfp_flow->in_hw) { err = 0; goto err_free_merge_flow; diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index 2600ce476d6b..2df3deedf9fd 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -55,6 +55,25 @@ struct nfp_tun_active_tuns { }; /** + * struct nfp_tun_active_tuns_v6 - periodic message of active IPv6 tunnels + * @seq: sequence number of the message + * @count: number of tunnels report in message + * @flags: options part of the request + * @tun_info.ipv6: dest IPv6 address of active route + * @tun_info.egress_port: port the encapsulated packet egressed + * @tun_info: tunnels that have sent traffic in reported period + */ +struct nfp_tun_active_tuns_v6 { + __be32 seq; + __be32 count; + __be32 flags; + struct route_ip_info_v6 { + struct in6_addr ipv6; + __be32 egress_port; + } tun_info[]; +}; + +/** * struct nfp_tun_neigh - neighbour/route entry on the NFP * @dst_ipv4: destination IPv4 address * @src_ipv4: source IPv4 address @@ -71,6 +90,22 @@ struct nfp_tun_neigh { }; /** + * struct nfp_tun_neigh_v6 - neighbour/route entry on the NFP + * @dst_ipv6: destination IPv6 address + * @src_ipv6: source IPv6 address + * @dst_addr: destination MAC address + * @src_addr: source MAC address + * @port_id: NFP port to output packet on - associated with source IPv6 + */ +struct nfp_tun_neigh_v6 { + struct in6_addr dst_ipv6; + struct in6_addr src_ipv6; + u8 dst_addr[ETH_ALEN]; + u8 src_addr[ETH_ALEN]; + __be32 port_id; +}; + +/** * struct nfp_tun_req_route_ipv4 - NFP requests a route/neighbour lookup * @ingress_port: ingress port of packet that signalled request * @ipv4_addr: destination ipv4 address for route @@ -83,13 +118,23 @@ struct nfp_tun_req_route_ipv4 { }; /** - * struct nfp_ipv4_route_entry - routes that are offloaded to the NFP - * @ipv4_addr: destination of route + * struct nfp_tun_req_route_ipv6 - NFP requests an IPv6 route/neighbour lookup + * @ingress_port: ingress port of packet that signalled request + * @ipv6_addr: destination ipv6 address for route + */ +struct nfp_tun_req_route_ipv6 { + __be32 ingress_port; + struct in6_addr ipv6_addr; +}; + +/** + * struct nfp_offloaded_route - routes that are offloaded to the NFP * @list: list pointer + * @ip_add: destination of route - can be IPv4 or IPv6 */ -struct nfp_ipv4_route_entry { - __be32 ipv4_addr; +struct nfp_offloaded_route { struct list_head list; + u8 ip_add[]; }; #define NFP_FL_IPV4_ADDRS_MAX 32 @@ -116,6 +161,18 @@ struct nfp_ipv4_addr_entry { struct list_head list; }; +#define NFP_FL_IPV6_ADDRS_MAX 4 + +/** + * struct nfp_tun_ipv6_addr - set the IP address list on the NFP + * @count: number of IPs populated in the array + * @ipv6_addr: array of IPV6_ADDRS_MAX 128 bit IPv6 addresses + */ +struct nfp_tun_ipv6_addr { + __be32 count; + struct in6_addr ipv6_addr[NFP_FL_IPV6_ADDRS_MAX]; +}; + #define NFP_TUN_MAC_OFFLOAD_DEL_FLAG 0x2 /** @@ -206,6 +263,49 @@ void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb) rcu_read_unlock(); } +void nfp_tunnel_keep_alive_v6(struct nfp_app *app, struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct nfp_tun_active_tuns_v6 *payload; + struct net_device *netdev; + int count, i, pay_len; + struct neighbour *n; + void *ipv6_add; + u32 port; + + payload = nfp_flower_cmsg_get_data(skb); + count = be32_to_cpu(payload->count); + if (count > NFP_FL_IPV6_ADDRS_MAX) { + nfp_flower_cmsg_warn(app, "IPv6 tunnel keep-alive request exceeds max routes.\n"); + return; + } + + pay_len = nfp_flower_cmsg_get_data_len(skb); + if (pay_len != struct_size(payload, tun_info, count)) { + nfp_flower_cmsg_warn(app, "Corruption in tunnel keep-alive message.\n"); + return; + } + + rcu_read_lock(); + for (i = 0; i < count; i++) { + ipv6_add = &payload->tun_info[i].ipv6; + port = be32_to_cpu(payload->tun_info[i].egress_port); + netdev = nfp_app_dev_get(app, port, NULL); + if (!netdev) + continue; + + n = neigh_lookup(&nd_tbl, ipv6_add, netdev); + if (!n) + continue; + + /* Update the used timestamp of neighbour */ + neigh_event_send(n, NULL); + neigh_release(n); + } + rcu_read_unlock(); +#endif +} + static int nfp_flower_xmit_tun_conf(struct nfp_app *app, u8 mtype, u16 plen, void *pdata, gfp_t flag) @@ -224,71 +324,126 @@ nfp_flower_xmit_tun_conf(struct nfp_app *app, u8 mtype, u16 plen, void *pdata, return 0; } -static bool nfp_tun_has_route(struct nfp_app *app, __be32 ipv4_addr) +static bool +__nfp_tun_has_route(struct list_head *route_list, spinlock_t *list_lock, + void *add, int add_len) { - struct nfp_flower_priv *priv = app->priv; - struct nfp_ipv4_route_entry *entry; - struct list_head *ptr, *storage; + struct nfp_offloaded_route *entry; - spin_lock_bh(&priv->tun.neigh_off_lock); - list_for_each_safe(ptr, storage, &priv->tun.neigh_off_list) { - entry = list_entry(ptr, struct nfp_ipv4_route_entry, list); - if (entry->ipv4_addr == ipv4_addr) { - spin_unlock_bh(&priv->tun.neigh_off_lock); + spin_lock_bh(list_lock); + list_for_each_entry(entry, route_list, list) + if (!memcmp(entry->ip_add, add, add_len)) { + spin_unlock_bh(list_lock); return true; } - } - spin_unlock_bh(&priv->tun.neigh_off_lock); + spin_unlock_bh(list_lock); return false; } -static void nfp_tun_add_route_to_cache(struct nfp_app *app, __be32 ipv4_addr) +static int +__nfp_tun_add_route_to_cache(struct list_head *route_list, + spinlock_t *list_lock, void *add, int add_len) { - struct nfp_flower_priv *priv = app->priv; - struct nfp_ipv4_route_entry *entry; - struct list_head *ptr, *storage; + struct nfp_offloaded_route *entry; - spin_lock_bh(&priv->tun.neigh_off_lock); - list_for_each_safe(ptr, storage, &priv->tun.neigh_off_list) { - entry = list_entry(ptr, struct nfp_ipv4_route_entry, list); - if (entry->ipv4_addr == ipv4_addr) { - spin_unlock_bh(&priv->tun.neigh_off_lock); - return; + spin_lock_bh(list_lock); + list_for_each_entry(entry, route_list, list) + if (!memcmp(entry->ip_add, add, add_len)) { + spin_unlock_bh(list_lock); + return 0; } - } - entry = kmalloc(sizeof(*entry), GFP_ATOMIC); + + entry = kmalloc(sizeof(*entry) + add_len, GFP_ATOMIC); if (!entry) { - spin_unlock_bh(&priv->tun.neigh_off_lock); - nfp_flower_cmsg_warn(app, "Mem error when storing new route.\n"); - return; + spin_unlock_bh(list_lock); + return -ENOMEM; } - entry->ipv4_addr = ipv4_addr; - list_add_tail(&entry->list, &priv->tun.neigh_off_list); - spin_unlock_bh(&priv->tun.neigh_off_lock); + memcpy(entry->ip_add, add, add_len); + list_add_tail(&entry->list, route_list); + spin_unlock_bh(list_lock); + + return 0; } -static void nfp_tun_del_route_from_cache(struct nfp_app *app, __be32 ipv4_addr) +static void +__nfp_tun_del_route_from_cache(struct list_head *route_list, + spinlock_t *list_lock, void *add, int add_len) { - struct nfp_flower_priv *priv = app->priv; - struct nfp_ipv4_route_entry *entry; - struct list_head *ptr, *storage; + struct nfp_offloaded_route *entry; - spin_lock_bh(&priv->tun.neigh_off_lock); - list_for_each_safe(ptr, storage, &priv->tun.neigh_off_list) { - entry = list_entry(ptr, struct nfp_ipv4_route_entry, list); - if (entry->ipv4_addr == ipv4_addr) { + spin_lock_bh(list_lock); + list_for_each_entry(entry, route_list, list) + if (!memcmp(entry->ip_add, add, add_len)) { list_del(&entry->list); kfree(entry); break; } - } - spin_unlock_bh(&priv->tun.neigh_off_lock); + spin_unlock_bh(list_lock); +} + +static bool nfp_tun_has_route_v4(struct nfp_app *app, __be32 *ipv4_addr) +{ + struct nfp_flower_priv *priv = app->priv; + + return __nfp_tun_has_route(&priv->tun.neigh_off_list_v4, + &priv->tun.neigh_off_lock_v4, ipv4_addr, + sizeof(*ipv4_addr)); +} + +static bool +nfp_tun_has_route_v6(struct nfp_app *app, struct in6_addr *ipv6_addr) +{ + struct nfp_flower_priv *priv = app->priv; + + return __nfp_tun_has_route(&priv->tun.neigh_off_list_v6, + &priv->tun.neigh_off_lock_v6, ipv6_addr, + sizeof(*ipv6_addr)); +} + +static void +nfp_tun_add_route_to_cache_v4(struct nfp_app *app, __be32 *ipv4_addr) +{ + struct nfp_flower_priv *priv = app->priv; + + __nfp_tun_add_route_to_cache(&priv->tun.neigh_off_list_v4, + &priv->tun.neigh_off_lock_v4, ipv4_addr, + sizeof(*ipv4_addr)); +} + +static void +nfp_tun_add_route_to_cache_v6(struct nfp_app *app, struct in6_addr *ipv6_addr) +{ + struct nfp_flower_priv *priv = app->priv; + + __nfp_tun_add_route_to_cache(&priv->tun.neigh_off_list_v6, + &priv->tun.neigh_off_lock_v6, ipv6_addr, + sizeof(*ipv6_addr)); } static void -nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app, - struct flowi4 *flow, struct neighbour *neigh, gfp_t flag) +nfp_tun_del_route_from_cache_v4(struct nfp_app *app, __be32 *ipv4_addr) +{ + struct nfp_flower_priv *priv = app->priv; + + __nfp_tun_del_route_from_cache(&priv->tun.neigh_off_list_v4, + &priv->tun.neigh_off_lock_v4, ipv4_addr, + sizeof(*ipv4_addr)); +} + +static void +nfp_tun_del_route_from_cache_v6(struct nfp_app *app, struct in6_addr *ipv6_addr) +{ + struct nfp_flower_priv *priv = app->priv; + + __nfp_tun_del_route_from_cache(&priv->tun.neigh_off_list_v6, + &priv->tun.neigh_off_lock_v6, ipv6_addr, + sizeof(*ipv6_addr)); +} + +static void +nfp_tun_write_neigh_v4(struct net_device *netdev, struct nfp_app *app, + struct flowi4 *flow, struct neighbour *neigh, gfp_t flag) { struct nfp_tun_neigh payload; u32 port_id; @@ -302,7 +457,7 @@ nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app, /* If entry has expired send dst IP with all other fields 0. */ if (!(neigh->nud_state & NUD_VALID) || neigh->dead) { - nfp_tun_del_route_from_cache(app, payload.dst_ipv4); + nfp_tun_del_route_from_cache_v4(app, &payload.dst_ipv4); /* Trigger ARP to verify invalid neighbour state. */ neigh_event_send(neigh, NULL); goto send_msg; @@ -314,7 +469,7 @@ nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app, neigh_ha_snapshot(payload.dst_addr, neigh, netdev); payload.port_id = cpu_to_be32(port_id); /* Add destination of new route to NFP cache. */ - nfp_tun_add_route_to_cache(app, payload.dst_ipv4); + nfp_tun_add_route_to_cache_v4(app, &payload.dst_ipv4); send_msg: nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH, @@ -322,16 +477,54 @@ send_msg: (unsigned char *)&payload, flag); } +static void +nfp_tun_write_neigh_v6(struct net_device *netdev, struct nfp_app *app, + struct flowi6 *flow, struct neighbour *neigh, gfp_t flag) +{ + struct nfp_tun_neigh_v6 payload; + u32 port_id; + + port_id = nfp_flower_get_port_id_from_netdev(app, netdev); + if (!port_id) + return; + + memset(&payload, 0, sizeof(struct nfp_tun_neigh_v6)); + payload.dst_ipv6 = flow->daddr; + + /* If entry has expired send dst IP with all other fields 0. */ + if (!(neigh->nud_state & NUD_VALID) || neigh->dead) { + nfp_tun_del_route_from_cache_v6(app, &payload.dst_ipv6); + /* Trigger probe to verify invalid neighbour state. */ + neigh_event_send(neigh, NULL); + goto send_msg; + } + + /* Have a valid neighbour so populate rest of entry. */ + payload.src_ipv6 = flow->saddr; + ether_addr_copy(payload.src_addr, netdev->dev_addr); + neigh_ha_snapshot(payload.dst_addr, neigh, netdev); + payload.port_id = cpu_to_be32(port_id); + /* Add destination of new route to NFP cache. */ + nfp_tun_add_route_to_cache_v6(app, &payload.dst_ipv6); + +send_msg: + nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6, + sizeof(struct nfp_tun_neigh_v6), + (unsigned char *)&payload, flag); +} + static int nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, void *ptr) { struct nfp_flower_priv *app_priv; struct netevent_redirect *redir; - struct flowi4 flow = {}; + struct flowi4 flow4 = {}; + struct flowi6 flow6 = {}; struct neighbour *n; struct nfp_app *app; struct rtable *rt; + bool ipv6 = false; int err; switch (event) { @@ -346,7 +539,13 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, return NOTIFY_DONE; } - flow.daddr = *(__be32 *)n->primary_key; + if (n->tbl->family == AF_INET6) + ipv6 = true; + + if (ipv6) + flow6.daddr = *(struct in6_addr *)n->primary_key; + else + flow4.daddr = *(__be32 *)n->primary_key; app_priv = container_of(nb, struct nfp_flower_priv, tun.neigh_nb); app = app_priv->app; @@ -356,28 +555,46 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, return NOTIFY_DONE; /* Only concerned with changes to routes already added to NFP. */ - if (!nfp_tun_has_route(app, flow.daddr)) + if ((ipv6 && !nfp_tun_has_route_v6(app, &flow6.daddr)) || + (!ipv6 && !nfp_tun_has_route_v4(app, &flow4.daddr))) return NOTIFY_DONE; #if IS_ENABLED(CONFIG_INET) - /* Do a route lookup to populate flow data. */ - rt = ip_route_output_key(dev_net(n->dev), &flow); - err = PTR_ERR_OR_ZERO(rt); - if (err) + if (ipv6) { +#if IS_ENABLED(CONFIG_IPV6) + struct dst_entry *dst; + + dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(n->dev), NULL, + &flow6, NULL); + if (IS_ERR(dst)) + return NOTIFY_DONE; + + dst_release(dst); + flow6.flowi6_proto = IPPROTO_UDP; + nfp_tun_write_neigh_v6(n->dev, app, &flow6, n, GFP_ATOMIC); +#else return NOTIFY_DONE; +#endif /* CONFIG_IPV6 */ + } else { + /* Do a route lookup to populate flow data. */ + rt = ip_route_output_key(dev_net(n->dev), &flow4); + err = PTR_ERR_OR_ZERO(rt); + if (err) + return NOTIFY_DONE; - ip_rt_put(rt); + ip_rt_put(rt); + + flow4.flowi4_proto = IPPROTO_UDP; + nfp_tun_write_neigh_v4(n->dev, app, &flow4, n, GFP_ATOMIC); + } #else return NOTIFY_DONE; -#endif - - flow.flowi4_proto = IPPROTO_UDP; - nfp_tun_write_neigh(n->dev, app, &flow, n, GFP_ATOMIC); +#endif /* CONFIG_INET */ return NOTIFY_OK; } -void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb) +void nfp_tunnel_request_route_v4(struct nfp_app *app, struct sk_buff *skb) { struct nfp_tun_req_route_ipv4 *payload; struct net_device *netdev; @@ -411,7 +628,7 @@ void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb) ip_rt_put(rt); if (!n) goto fail_rcu_unlock; - nfp_tun_write_neigh(n->dev, app, &flow, n, GFP_ATOMIC); + nfp_tun_write_neigh_v4(n->dev, app, &flow, n, GFP_ATOMIC); neigh_release(n); rcu_read_unlock(); return; @@ -421,6 +638,48 @@ fail_rcu_unlock: nfp_flower_cmsg_warn(app, "Requested route not found.\n"); } +void nfp_tunnel_request_route_v6(struct nfp_app *app, struct sk_buff *skb) +{ + struct nfp_tun_req_route_ipv6 *payload; + struct net_device *netdev; + struct flowi6 flow = {}; + struct dst_entry *dst; + struct neighbour *n; + + payload = nfp_flower_cmsg_get_data(skb); + + rcu_read_lock(); + netdev = nfp_app_dev_get(app, be32_to_cpu(payload->ingress_port), NULL); + if (!netdev) + goto fail_rcu_unlock; + + flow.daddr = payload->ipv6_addr; + flow.flowi6_proto = IPPROTO_UDP; + +#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) + dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(netdev), NULL, &flow, + NULL); + if (IS_ERR(dst)) + goto fail_rcu_unlock; +#else + goto fail_rcu_unlock; +#endif + + n = dst_neigh_lookup(dst, &flow.daddr); + dst_release(dst); + if (!n) + goto fail_rcu_unlock; + + nfp_tun_write_neigh_v6(n->dev, app, &flow, n, GFP_ATOMIC); + neigh_release(n); + rcu_read_unlock(); + return; + +fail_rcu_unlock: + rcu_read_unlock(); + nfp_flower_cmsg_warn(app, "Requested IPv6 route not found.\n"); +} + static void nfp_tun_write_ipv4_list(struct nfp_app *app) { struct nfp_flower_priv *priv = app->priv; @@ -502,6 +761,78 @@ void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4) nfp_tun_write_ipv4_list(app); } +static void nfp_tun_write_ipv6_list(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv6_addr_entry *entry; + struct nfp_tun_ipv6_addr payload; + int count = 0; + + memset(&payload, 0, sizeof(struct nfp_tun_ipv6_addr)); + mutex_lock(&priv->tun.ipv6_off_lock); + list_for_each_entry(entry, &priv->tun.ipv6_off_list, list) { + if (count >= NFP_FL_IPV6_ADDRS_MAX) { + nfp_flower_cmsg_warn(app, "Too many IPv6 tunnel endpoint addresses, some cannot be offloaded.\n"); + break; + } + payload.ipv6_addr[count++] = entry->ipv6_addr; + } + mutex_unlock(&priv->tun.ipv6_off_lock); + payload.count = cpu_to_be32(count); + + nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_IPS_V6, + sizeof(struct nfp_tun_ipv6_addr), + &payload, GFP_KERNEL); +} + +struct nfp_ipv6_addr_entry * +nfp_tunnel_add_ipv6_off(struct nfp_app *app, struct in6_addr *ipv6) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv6_addr_entry *entry; + + mutex_lock(&priv->tun.ipv6_off_lock); + list_for_each_entry(entry, &priv->tun.ipv6_off_list, list) + if (!memcmp(&entry->ipv6_addr, ipv6, sizeof(*ipv6))) { + entry->ref_count++; + mutex_unlock(&priv->tun.ipv6_off_lock); + return entry; + } + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + mutex_unlock(&priv->tun.ipv6_off_lock); + nfp_flower_cmsg_warn(app, "Mem error when offloading IP address.\n"); + return NULL; + } + entry->ipv6_addr = *ipv6; + entry->ref_count = 1; + list_add_tail(&entry->list, &priv->tun.ipv6_off_list); + mutex_unlock(&priv->tun.ipv6_off_lock); + + nfp_tun_write_ipv6_list(app); + + return entry; +} + +void +nfp_tunnel_put_ipv6_off(struct nfp_app *app, struct nfp_ipv6_addr_entry *entry) +{ + struct nfp_flower_priv *priv = app->priv; + bool freed = false; + + mutex_lock(&priv->tun.ipv6_off_lock); + if (!--entry->ref_count) { + list_del(&entry->list); + kfree(entry); + freed = true; + } + mutex_unlock(&priv->tun.ipv6_off_lock); + + if (freed) + nfp_tun_write_ipv6_list(app); +} + static int __nfp_tunnel_offload_mac(struct nfp_app *app, u8 *mac, u16 idx, bool del) { @@ -1013,13 +1344,17 @@ int nfp_tunnel_config_start(struct nfp_app *app) ida_init(&priv->tun.mac_off_ids); - /* Initialise priv data for IPv4 offloading. */ + /* Initialise priv data for IPv4/v6 offloading. */ mutex_init(&priv->tun.ipv4_off_lock); INIT_LIST_HEAD(&priv->tun.ipv4_off_list); + mutex_init(&priv->tun.ipv6_off_lock); + INIT_LIST_HEAD(&priv->tun.ipv6_off_list); /* Initialise priv data for neighbour offloading. */ - spin_lock_init(&priv->tun.neigh_off_lock); - INIT_LIST_HEAD(&priv->tun.neigh_off_list); + spin_lock_init(&priv->tun.neigh_off_lock_v4); + INIT_LIST_HEAD(&priv->tun.neigh_off_list_v4); + spin_lock_init(&priv->tun.neigh_off_lock_v6); + INIT_LIST_HEAD(&priv->tun.neigh_off_list_v6); priv->tun.neigh_nb.notifier_call = nfp_tun_neigh_event_handler; err = register_netevent_notifier(&priv->tun.neigh_nb); @@ -1034,9 +1369,11 @@ int nfp_tunnel_config_start(struct nfp_app *app) void nfp_tunnel_config_stop(struct nfp_app *app) { + struct nfp_offloaded_route *route_entry, *temp; struct nfp_flower_priv *priv = app->priv; - struct nfp_ipv4_route_entry *route_entry; struct nfp_ipv4_addr_entry *ip_entry; + struct nfp_tun_neigh_v6 ipv6_route; + struct nfp_tun_neigh ipv4_route; struct list_head *ptr, *storage; unregister_netevent_notifier(&priv->tun.neigh_nb); @@ -1050,12 +1387,35 @@ void nfp_tunnel_config_stop(struct nfp_app *app) kfree(ip_entry); } - /* Free any memory that may be occupied by the route list. */ - list_for_each_safe(ptr, storage, &priv->tun.neigh_off_list) { - route_entry = list_entry(ptr, struct nfp_ipv4_route_entry, - list); + mutex_destroy(&priv->tun.ipv6_off_lock); + + /* Free memory in the route list and remove entries from fw cache. */ + list_for_each_entry_safe(route_entry, temp, + &priv->tun.neigh_off_list_v4, list) { + memset(&ipv4_route, 0, sizeof(ipv4_route)); + memcpy(&ipv4_route.dst_ipv4, &route_entry->ip_add, + sizeof(ipv4_route.dst_ipv4)); list_del(&route_entry->list); kfree(route_entry); + + nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH, + sizeof(struct nfp_tun_neigh), + (unsigned char *)&ipv4_route, + GFP_KERNEL); + } + + list_for_each_entry_safe(route_entry, temp, + &priv->tun.neigh_off_list_v6, list) { + memset(&ipv6_route, 0, sizeof(ipv6_route)); + memcpy(&ipv6_route.dst_ipv6, &route_entry->ip_add, + sizeof(ipv6_route.dst_ipv6)); + list_del(&route_entry->list); + kfree(route_entry); + + nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6, + sizeof(struct nfp_tun_neigh), + (unsigned char *)&ipv6_route, + GFP_KERNEL); } /* Destroy rhash. Entries should be cleaned on netdev notifier unreg. */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 250f510b1d21..ff4438478ea9 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -586,6 +586,9 @@ struct nfp_net_dp { * @ktls_conn_id_gen: Trivial generator for kTLS connection ids (for TX) * @ktls_no_space: Counter of firmware rejecting kTLS connection due to * lack of space + * @ktls_rx_resync_req: Counter of TLS RX resync requested + * @ktls_rx_resync_ign: Counter of TLS RX resync requests ignored + * @ktls_rx_resync_sent: Counter of TLS RX resync completed * @mbox_cmsg: Common Control Message via vNIC mailbox state * @mbox_cmsg.queue: CCM mbox queue of pending messages * @mbox_cmsg.wq: CCM mbox wait queue of waiting processes @@ -674,6 +677,9 @@ struct nfp_net { atomic64_t ktls_conn_id_gen; atomic_t ktls_no_space; + atomic_t ktls_rx_resync_req; + atomic_t ktls_rx_resync_ign; + atomic_t ktls_rx_resync_sent; struct { struct sk_buff_head queue; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index bcdcd6de7dea..9bfb3b077bc1 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -47,6 +47,7 @@ #include "nfp_net_sriov.h" #include "nfp_port.h" #include "crypto/crypto.h" +#include "crypto/fw.h" /** * nfp_net_get_fw_version() - Read and parse the FW version @@ -1321,17 +1322,11 @@ nfp_net_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring) netdev_tx_reset_queue(nd_q); } -static void nfp_net_tx_timeout(struct net_device *netdev) +static void nfp_net_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct nfp_net *nn = netdev_priv(netdev); - int i; - for (i = 0; i < nn->dp.netdev->real_num_tx_queues; i++) { - if (!netif_tx_queue_stopped(netdev_get_tx_queue(netdev, i))) - continue; - nn_warn(nn, "TX timeout on ring: %d\n", i); - } - nn_warn(nn, "TX watchdog timeout\n"); + nn_warn(nn, "TX watchdog timeout on ring: %u\n", txqueue); } /* Receive processing @@ -1667,9 +1662,9 @@ nfp_net_set_hash_desc(struct net_device *netdev, struct nfp_meta_parsed *meta, &rx_hash->hash); } -static void * +static bool nfp_net_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta, - void *data, int meta_len) + void *data, void *pkt, unsigned int pkt_len, int meta_len) { u32 meta_info; @@ -1699,14 +1694,20 @@ nfp_net_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta, (__force __wsum)__get_unaligned_cpu32(data); data += 4; break; + case NFP_NET_META_RESYNC_INFO: + if (nfp_net_tls_rx_resync_req(netdev, data, pkt, + pkt_len)) + return NULL; + data += sizeof(struct nfp_net_tls_resync_req); + break; default: - return NULL; + return true; } meta_info >>= NFP_NET_META_FIELD_SIZE; } - return data; + return data != pkt; } static void @@ -1891,12 +1892,10 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) nfp_net_set_hash_desc(dp->netdev, &meta, rxbuf->frag + meta_off, rxd); } else if (meta_len) { - void *end; - - end = nfp_net_parse_meta(dp->netdev, &meta, - rxbuf->frag + meta_off, - meta_len); - if (unlikely(end != rxbuf->frag + pkt_off)) { + if (unlikely(nfp_net_parse_meta(dp->netdev, &meta, + rxbuf->frag + meta_off, + rxbuf->frag + pkt_off, + pkt_len, meta_len))) { nn_dp_warn(dp, "invalid RX packet metadata\n"); nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c index d835c14b7257..c3a763134e79 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c @@ -17,6 +17,30 @@ static void nfp_net_tlv_caps_reset(struct nfp_net_tlv_caps *caps) caps->mbox_len = NFP_NET_CFG_MBOX_VAL_MAX_SZ; } +static bool +nfp_net_tls_parse_crypto_ops(struct device *dev, struct nfp_net_tlv_caps *caps, + u8 __iomem *ctrl_mem, u8 __iomem *data, + unsigned int length, unsigned int offset, + bool rx_stream_scan) +{ + /* Ignore the legacy TLV if new one was already parsed */ + if (caps->tls_resync_ss && !rx_stream_scan) + return true; + + if (length < 32) { + dev_err(dev, + "CRYPTO OPS TLV should be at least 32B, is %dB offset:%u\n", + length, offset); + return false; + } + + caps->crypto_ops = readl(data); + caps->crypto_enable_off = data - ctrl_mem + 16; + caps->tls_resync_ss = rx_stream_scan; + + return true; +} + int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem, struct nfp_net_tlv_caps *caps) { @@ -104,15 +128,25 @@ int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem, caps->mbox_cmsg_types = readl(data); break; case NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS: - if (length < 32) { - dev_err(dev, - "CRYPTO OPS TLV should be at least 32B, is %dB offset:%u\n", - length, offset); + if (!nfp_net_tls_parse_crypto_ops(dev, caps, ctrl_mem, + data, length, offset, + false)) return -EINVAL; + break; + case NFP_NET_CFG_TLV_TYPE_VNIC_STATS: + if ((data - ctrl_mem) % 8) { + dev_warn(dev, "VNIC STATS TLV misaligned, ignoring offset:%u len:%u\n", + offset, length); + break; } - - caps->crypto_ops = readl(data); - caps->crypto_enable_off = data - ctrl_mem + 16; + caps->vnic_stats_off = data - ctrl_mem; + caps->vnic_stats_cnt = length / 10; + break; + case NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS_RX_SCAN: + if (!nfp_net_tls_parse_crypto_ops(dev, caps, ctrl_mem, + data, length, offset, + true)) + return -EINVAL; break; default: if (!FIELD_GET(NFP_NET_CFG_TLV_HEADER_REQUIRED, hdr)) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index ee6b24e4eacd..3d61a8cb60b0 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -45,6 +45,7 @@ #define NFP_NET_META_PORTID 5 #define NFP_NET_META_CSUM 6 /* checksum complete type */ #define NFP_NET_META_CONN_HANDLE 7 +#define NFP_NET_META_RESYNC_INFO 8 /* RX resync info request */ #define NFP_META_PORT_ID_CTRL ~0U @@ -479,6 +480,22 @@ * 8 words, bitmaps of supported and enabled crypto operations. * First 16B (4 words) contains a bitmap of supported crypto operations, * and next 16B contain the enabled operations. + * This capability is made obsolete by ones with better sync methods. + * + * %NFP_NET_CFG_TLV_TYPE_VNIC_STATS: + * Variable, per-vNIC statistics, data should be 8B aligned (FW should insert + * zero-length RESERVED TLV to pad). + * TLV data has two sections. First is an array of statistics' IDs (2B each). + * Second 8B statistics themselves. Statistics are 8B aligned, meaning there + * may be a padding between sections. + * Number of statistics can be determined as floor(tlv.length / (2 + 8)). + * This TLV overwrites %NFP_NET_CFG_STATS_* values (statistics in this TLV + * duplicate the old ones, so driver should be careful not to unnecessarily + * render both). + * + * %NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS_RX_SCAN: + * Same as %NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS, but crypto TLS does stream scan + * RX sync, rather than kernel-assisted sync. */ #define NFP_NET_CFG_TLV_TYPE_UNKNOWN 0 #define NFP_NET_CFG_TLV_TYPE_RESERVED 1 @@ -490,6 +507,8 @@ #define NFP_NET_CFG_TLV_TYPE_REPR_CAP 7 #define NFP_NET_CFG_TLV_TYPE_MBOX_CMSG_TYPES 10 #define NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS 11 /* see crypto/fw.h */ +#define NFP_NET_CFG_TLV_TYPE_VNIC_STATS 12 +#define NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS_RX_SCAN 13 struct device; @@ -502,6 +521,9 @@ struct device; * @mbox_cmsg_types: cmsgs which can be passed through the mailbox * @crypto_ops: supported crypto operations * @crypto_enable_off: offset of crypto ops enable region + * @vnic_stats_off: offset of vNIC stats area + * @vnic_stats_cnt: number of vNIC stats + * @tls_resync_ss: TLS resync will be performed via stream scan */ struct nfp_net_tlv_caps { u32 me_freq_mhz; @@ -511,6 +533,9 @@ struct nfp_net_tlv_caps { u32 mbox_cmsg_types; u32 crypto_ops; unsigned int crypto_enable_off; + unsigned int vnic_stats_off; + unsigned int vnic_stats_cnt; + unsigned int tls_resync_ss:1; }; int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 1b840ee47339..d648e32c0520 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -148,11 +148,33 @@ static const struct nfp_et_stat nfp_mac_et_stats[] = { { "tx_pause_frames_class7", NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS7, }, }; +static const char nfp_tlv_stat_names[][ETH_GSTRING_LEN] = { + [1] = "dev_rx_discards", + [2] = "dev_rx_errors", + [3] = "dev_rx_bytes", + [4] = "dev_rx_uc_bytes", + [5] = "dev_rx_mc_bytes", + [6] = "dev_rx_bc_bytes", + [7] = "dev_rx_pkts", + [8] = "dev_rx_mc_pkts", + [9] = "dev_rx_bc_pkts", + + [10] = "dev_tx_discards", + [11] = "dev_tx_errors", + [12] = "dev_tx_bytes", + [13] = "dev_tx_uc_bytes", + [14] = "dev_tx_mc_bytes", + [15] = "dev_tx_bc_bytes", + [16] = "dev_tx_pkts", + [17] = "dev_tx_mc_pkts", + [18] = "dev_tx_bc_pkts", +}; + #define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats) #define NN_ET_SWITCH_STATS_LEN 9 #define NN_RVEC_GATHER_STATS 13 #define NN_RVEC_PER_Q_STATS 3 -#define NN_CTRL_PATH_STATS 1 +#define NN_CTRL_PATH_STATS 4 #define SFP_SFF_REV_COMPLIANCE 1 @@ -454,6 +476,9 @@ static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data) data = nfp_pr_et(data, "tx_tls_drop_no_sync_data"); data = nfp_pr_et(data, "hw_tls_no_space"); + data = nfp_pr_et(data, "rx_tls_resync_req_ok"); + data = nfp_pr_et(data, "rx_tls_resync_req_ign"); + data = nfp_pr_et(data, "rx_tls_resync_sent"); return data; } @@ -502,6 +527,9 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) *data++ = gathered_stats[j]; *data++ = atomic_read(&nn->ktls_no_space); + *data++ = atomic_read(&nn->ktls_rx_resync_req); + *data++ = atomic_read(&nn->ktls_rx_resync_ign); + *data++ = atomic_read(&nn->ktls_rx_resync_sent); return data; } @@ -560,6 +588,65 @@ nfp_vnic_get_hw_stats(u64 *data, u8 __iomem *mem, unsigned int num_vecs) return data; } +static unsigned int nfp_vnic_get_tlv_stats_count(struct nfp_net *nn) +{ + return nn->tlv_caps.vnic_stats_cnt + nn->max_r_vecs * 4; +} + +static u8 *nfp_vnic_get_tlv_stats_strings(struct nfp_net *nn, u8 *data) +{ + unsigned int i, id; + u8 __iomem *mem; + u64 id_word = 0; + + mem = nn->dp.ctrl_bar + nn->tlv_caps.vnic_stats_off; + for (i = 0; i < nn->tlv_caps.vnic_stats_cnt; i++) { + if (!(i % 4)) + id_word = readq(mem + i * 2); + + id = (u16)id_word; + id_word >>= 16; + + if (id < ARRAY_SIZE(nfp_tlv_stat_names) && + nfp_tlv_stat_names[id][0]) { + memcpy(data, nfp_tlv_stat_names[id], ETH_GSTRING_LEN); + data += ETH_GSTRING_LEN; + } else { + data = nfp_pr_et(data, "dev_unknown_stat%u", id); + } + } + + for (i = 0; i < nn->max_r_vecs; i++) { + data = nfp_pr_et(data, "rxq_%u_pkts", i); + data = nfp_pr_et(data, "rxq_%u_bytes", i); + data = nfp_pr_et(data, "txq_%u_pkts", i); + data = nfp_pr_et(data, "txq_%u_bytes", i); + } + + return data; +} + +static u64 *nfp_vnic_get_tlv_stats(struct nfp_net *nn, u64 *data) +{ + u8 __iomem *mem; + unsigned int i; + + mem = nn->dp.ctrl_bar + nn->tlv_caps.vnic_stats_off; + mem += roundup(2 * nn->tlv_caps.vnic_stats_cnt, 8); + for (i = 0; i < nn->tlv_caps.vnic_stats_cnt; i++) + *data++ = readq(mem + i * 8); + + mem = nn->dp.ctrl_bar; + for (i = 0; i < nn->max_r_vecs; i++) { + *data++ = readq(mem + NFP_NET_CFG_RXR_STATS(i)); + *data++ = readq(mem + NFP_NET_CFG_RXR_STATS(i) + 8); + *data++ = readq(mem + NFP_NET_CFG_TXR_STATS(i)); + *data++ = readq(mem + NFP_NET_CFG_TXR_STATS(i) + 8); + } + + return data; +} + static unsigned int nfp_mac_get_stats_count(struct net_device *netdev) { struct nfp_port *port; @@ -609,8 +696,12 @@ static void nfp_net_get_strings(struct net_device *netdev, switch (stringset) { case ETH_SS_STATS: data = nfp_vnic_get_sw_stats_strings(netdev, data); - data = nfp_vnic_get_hw_stats_strings(data, nn->max_r_vecs, - false); + if (!nn->tlv_caps.vnic_stats_off) + data = nfp_vnic_get_hw_stats_strings(data, + nn->max_r_vecs, + false); + else + data = nfp_vnic_get_tlv_stats_strings(nn, data); data = nfp_mac_get_stats_strings(netdev, data); data = nfp_app_port_get_stats_strings(nn->port, data); break; @@ -624,7 +715,11 @@ nfp_net_get_stats(struct net_device *netdev, struct ethtool_stats *stats, struct nfp_net *nn = netdev_priv(netdev); data = nfp_vnic_get_sw_stats(netdev, data); - data = nfp_vnic_get_hw_stats(data, nn->dp.ctrl_bar, nn->max_r_vecs); + if (!nn->tlv_caps.vnic_stats_off) + data = nfp_vnic_get_hw_stats(data, nn->dp.ctrl_bar, + nn->max_r_vecs); + else + data = nfp_vnic_get_tlv_stats(nn, data); data = nfp_mac_get_stats(netdev, data); data = nfp_app_port_get_stats(nn->port, data); } @@ -632,13 +727,18 @@ nfp_net_get_stats(struct net_device *netdev, struct ethtool_stats *stats, static int nfp_net_get_sset_count(struct net_device *netdev, int sset) { struct nfp_net *nn = netdev_priv(netdev); + unsigned int cnt; switch (sset) { case ETH_SS_STATS: - return nfp_vnic_get_sw_stats_count(netdev) + - nfp_vnic_get_hw_stats_count(nn->max_r_vecs) + - nfp_mac_get_stats_count(netdev) + - nfp_app_port_get_stats_count(nn->port); + cnt = nfp_vnic_get_sw_stats_count(netdev); + if (!nn->tlv_caps.vnic_stats_off) + cnt += nfp_vnic_get_hw_stats_count(nn->max_r_vecs); + else + cnt += nfp_vnic_get_tlv_stats_count(nn); + cnt += nfp_mac_get_stats_count(netdev); + cnt += nfp_app_port_get_stats_count(nn->port); + return cnt; default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index 6b54cb3b681d..2fc10a36afa4 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -2739,7 +2739,7 @@ static int nv_tx_done_optimized(struct net_device *dev, int limit) * nv_tx_timeout: dev->tx_timeout function * Called with netif_tx_lock held. */ -static void nv_tx_timeout(struct net_device *dev) +static void nv_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct fe_priv *np = netdev_priv(dev); u8 __iomem *base = get_hwbase(dev); diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index 18e6d87c607b..73ec195fbc30 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -2271,7 +2271,7 @@ static int pch_gbe_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) * pch_gbe_tx_timeout - Respond to a Tx Hang * @netdev: Network interface device structure */ -static void pch_gbe_tx_timeout(struct net_device *netdev) +static void pch_gbe_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct pch_gbe_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/packetengines/hamachi.c b/drivers/net/ethernet/packetengines/hamachi.c index eee883a2aa8d..70816d2e2990 100644 --- a/drivers/net/ethernet/packetengines/hamachi.c +++ b/drivers/net/ethernet/packetengines/hamachi.c @@ -548,7 +548,7 @@ static void mdio_write(struct net_device *dev, int phy_id, int location, int val static int hamachi_open(struct net_device *dev); static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static void hamachi_timer(struct timer_list *t); -static void hamachi_tx_timeout(struct net_device *dev); +static void hamachi_tx_timeout(struct net_device *dev, unsigned int txqueue); static void hamachi_init_ring(struct net_device *dev); static netdev_tx_t hamachi_start_xmit(struct sk_buff *skb, struct net_device *dev); @@ -1042,7 +1042,7 @@ static void hamachi_timer(struct timer_list *t) add_timer(&hmp->timer); } -static void hamachi_tx_timeout(struct net_device *dev) +static void hamachi_tx_timeout(struct net_device *dev, unsigned int txqueue) { int i; struct hamachi_private *hmp = netdev_priv(dev); diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c index 5113ee647090..520779f05e1a 100644 --- a/drivers/net/ethernet/packetengines/yellowfin.c +++ b/drivers/net/ethernet/packetengines/yellowfin.c @@ -344,7 +344,7 @@ static void mdio_write(void __iomem *ioaddr, int phy_id, int location, int value static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static int yellowfin_open(struct net_device *dev); static void yellowfin_timer(struct timer_list *t); -static void yellowfin_tx_timeout(struct net_device *dev); +static void yellowfin_tx_timeout(struct net_device *dev, unsigned int txqueue); static int yellowfin_init_ring(struct net_device *dev); static netdev_tx_t yellowfin_start_xmit(struct sk_buff *skb, struct net_device *dev); @@ -677,7 +677,7 @@ static void yellowfin_timer(struct timer_list *t) add_timer(&yp->timer); } -static void yellowfin_tx_timeout(struct net_device *dev) +static void yellowfin_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct yellowfin_private *yp = netdev_priv(dev); void __iomem *ioaddr = yp->base; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index ef8258713369..a76108a9c7db 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -1285,7 +1285,7 @@ static void ionic_tx_timeout_work(struct work_struct *ws) rtnl_unlock(); } -static void ionic_tx_timeout(struct net_device *netdev) +static void ionic_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct ionic_lif *lif = netdev_priv(netdev); diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index c692a41e4548..8067ea04d455 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -49,7 +49,7 @@ static int netxen_nic_open(struct net_device *netdev); static int netxen_nic_close(struct net_device *netdev); static netdev_tx_t netxen_nic_xmit_frame(struct sk_buff *, struct net_device *); -static void netxen_tx_timeout(struct net_device *netdev); +static void netxen_tx_timeout(struct net_device *netdev, unsigned int txqueue); static void netxen_tx_timeout_task(struct work_struct *work); static void netxen_fw_poll_work(struct work_struct *work); static void netxen_schedule_work(struct netxen_adapter *adapter, @@ -2222,7 +2222,7 @@ static void netxen_nic_handle_phy_intr(struct netxen_adapter *adapter) netxen_advert_link_change(adapter, linkup); } -static void netxen_tx_timeout(struct net_device *netdev) +static void netxen_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct netxen_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c index 7e0b795230b2..900bc603e30a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c @@ -331,8 +331,8 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn, u8 sb_index = p_hwfn->p_eq->eq_sb_index; struct qed_spq_entry *p_ent = NULL; struct qed_sp_init_data init_data; - int rc = -EINVAL; u8 page_cnt, i; + int rc; /* update initial eq producer */ qed_eq_prod_update(p_hwfn, @@ -447,7 +447,7 @@ int qed_sp_pf_update(struct qed_hwfn *p_hwfn) { struct qed_spq_entry *p_ent = NULL; struct qed_sp_init_data init_data; - int rc = -EINVAL; + int rc; /* Get SPQ entry */ memset(&init_data, 0, sizeof(init_data)); @@ -471,7 +471,7 @@ int qed_sp_pf_update_ufp(struct qed_hwfn *p_hwfn) { struct qed_spq_entry *p_ent = NULL; struct qed_sp_init_data init_data; - int rc = -EOPNOTSUPP; + int rc; if (p_hwfn->ufp_info.pri_type == QED_UFP_PRI_UNKNOWN) { DP_INFO(p_hwfn, "Invalid priority type %d\n", @@ -509,7 +509,7 @@ int qed_sp_pf_update_tunn_cfg(struct qed_hwfn *p_hwfn, { struct qed_spq_entry *p_ent = NULL; struct qed_sp_init_data init_data; - int rc = -EINVAL; + int rc; if (IS_VF(p_hwfn->cdev)) return qed_vf_pf_tunnel_param_update(p_hwfn, p_tunn); @@ -546,7 +546,7 @@ int qed_sp_pf_stop(struct qed_hwfn *p_hwfn) { struct qed_spq_entry *p_ent = NULL; struct qed_sp_init_data init_data; - int rc = -EINVAL; + int rc; /* Get SPQ entry */ memset(&init_data, 0, sizeof(init_data)); diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index b4b8ba00ee01..bb864765c761 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -3602,7 +3602,7 @@ static int ql3xxx_set_mac_address(struct net_device *ndev, void *p) return 0; } -static void ql3xxx_tx_timeout(struct net_device *ndev) +static void ql3xxx_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct ql3_adapter *qdev = netdev_priv(ndev); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index c07438db30ba..9dd6cb36f366 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -56,7 +56,7 @@ static int qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent); static void qlcnic_remove(struct pci_dev *pdev); static int qlcnic_open(struct net_device *netdev); static int qlcnic_close(struct net_device *netdev); -static void qlcnic_tx_timeout(struct net_device *netdev); +static void qlcnic_tx_timeout(struct net_device *netdev, unsigned int txqueue); static void qlcnic_attach_work(struct work_struct *work); static void qlcnic_fwinit_work(struct work_struct *work); @@ -3068,7 +3068,7 @@ static void qlcnic_dump_rings(struct qlcnic_adapter *adapter) } -static void qlcnic_tx_timeout(struct net_device *netdev) +static void qlcnic_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct qlcnic_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 98f92268cbaa..522fad4cb2cd 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -282,7 +282,7 @@ static int emac_close(struct net_device *netdev) } /* Respond to a TX hang */ -static void emac_tx_timeout(struct net_device *netdev) +static void emac_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct emac_adapter *adpt = netdev_priv(netdev); diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index baac016f3ec0..5a3b65a6eb4f 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -785,7 +785,7 @@ qcaspi_netdev_xmit(struct sk_buff *skb, struct net_device *dev) } static void -qcaspi_netdev_tx_timeout(struct net_device *dev) +qcaspi_netdev_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct qcaspi *qca = netdev_priv(dev); diff --git a/drivers/net/ethernet/qualcomm/qca_uart.c b/drivers/net/ethernet/qualcomm/qca_uart.c index 0981068504fa..375a844cd27c 100644 --- a/drivers/net/ethernet/qualcomm/qca_uart.c +++ b/drivers/net/ethernet/qualcomm/qca_uart.c @@ -248,7 +248,7 @@ out: return NETDEV_TX_OK; } -static void qcauart_netdev_tx_timeout(struct net_device *dev) +static void qcauart_netdev_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct qcauart *qca = netdev_priv(dev); diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c index 274e5b4bc4ac..c23cb61bbd30 100644 --- a/drivers/net/ethernet/rdc/r6040.c +++ b/drivers/net/ethernet/rdc/r6040.c @@ -410,7 +410,7 @@ static void r6040_init_mac_regs(struct net_device *dev) iowrite16(TM2TX, ioaddr + MTPR); } -static void r6040_tx_timeout(struct net_device *dev) +static void r6040_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct r6040_private *priv = netdev_priv(dev); void __iomem *ioaddr = priv->base; diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index 4f910c4f67b0..60d342f82fb3 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -1235,7 +1235,7 @@ static int cp_close (struct net_device *dev) return 0; } -static void cp_tx_timeout(struct net_device *dev) +static void cp_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct cp_private *cp = netdev_priv(dev); unsigned long flags; diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c index 55d01266e615..5caeb8368eab 100644 --- a/drivers/net/ethernet/realtek/8139too.c +++ b/drivers/net/ethernet/realtek/8139too.c @@ -642,7 +642,7 @@ static int mdio_read (struct net_device *dev, int phy_id, int location); static void mdio_write (struct net_device *dev, int phy_id, int location, int val); static void rtl8139_start_thread(struct rtl8139_private *tp); -static void rtl8139_tx_timeout (struct net_device *dev); +static void rtl8139_tx_timeout (struct net_device *dev, unsigned int txqueue); static void rtl8139_init_ring (struct net_device *dev); static netdev_tx_t rtl8139_start_xmit (struct sk_buff *skb, struct net_device *dev); @@ -1700,7 +1700,7 @@ static void rtl8139_tx_timeout_task (struct work_struct *work) spin_unlock_bh(&tp->rx_lock); } -static void rtl8139_tx_timeout (struct net_device *dev) +static void rtl8139_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct rtl8139_private *tp = netdev_priv(dev); diff --git a/drivers/net/ethernet/realtek/atp.c b/drivers/net/ethernet/realtek/atp.c index 58e0ca9093d3..9e3b35c97e63 100644 --- a/drivers/net/ethernet/realtek/atp.c +++ b/drivers/net/ethernet/realtek/atp.c @@ -204,7 +204,7 @@ static void net_rx(struct net_device *dev); static void read_block(long ioaddr, int length, unsigned char *buffer, int data_mode); static int net_close(struct net_device *dev); static void set_rx_mode(struct net_device *dev); -static void tx_timeout(struct net_device *dev); +static void tx_timeout(struct net_device *dev, unsigned int txqueue); /* A list of all installed ATP devices, for removing the driver module. */ @@ -533,7 +533,7 @@ static void write_packet(long ioaddr, int length, unsigned char *packet, int pad outb(Ctrl_HNibWrite | Ctrl_SelData | Ctrl_IRQEN, ioaddr + PAR_CONTROL); } -static void tx_timeout(struct net_device *dev) +static void tx_timeout(struct net_device *dev, unsigned int txqueue) { long ioaddr = dev->base_addr; diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 67a4d5d45e3a..a887b685d070 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -5435,7 +5435,7 @@ static void rtl_reset_work(struct rtl8169_private *tp) netif_wake_queue(dev); } -static void rtl8169_tx_timeout(struct net_device *dev) +static void rtl8169_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct rtl8169_private *tp = netdev_priv(dev); @@ -6825,6 +6825,15 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) int chipset, region; int jumbo_max, rc; + /* Some tools for creating an initramfs don't consider softdeps, then + * r8169.ko may be in initramfs, but realtek.ko not. Then the generic + * PHY driver is used that doesn't work with most chip versions. + */ + if (!driver_find("RTL8201CP Ethernet", &mdio_bus_type)) { + dev_err(&pdev->dev, "realtek.ko not loaded, maybe it needs to be added to initramfs?\n"); + return -ENOENT; + } + dev = devm_alloc_etherdev(&pdev->dev, sizeof (*tp)); if (!dev) return -ENOMEM; diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 4b13a184bfc7..067ad25553b9 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1425,7 +1425,7 @@ out_napi_off: } /* Timeout function for Ethernet AVB */ -static void ravb_tx_timeout(struct net_device *ndev) +static void ravb_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct ravb_private *priv = netdev_priv(ndev); diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index e19b49c4013e..cdd8ab2eb910 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2478,7 +2478,7 @@ out_napi_off: } /* Timeout function */ -static void sh_eth_tx_timeout(struct net_device *ndev) +static void sh_eth_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct sh_eth_private *mdp = netdev_priv(ndev); struct sh_eth_rxdesc *rxdesc; diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index bc4f951315da..7585cd2270ba 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -2159,7 +2159,7 @@ static void rocker_router_fib_event_work(struct work_struct *work) /* Protect internal structures from changes */ rtnl_lock(); switch (fib_work->event) { - case FIB_EVENT_ENTRY_ADD: + case FIB_EVENT_ENTRY_REPLACE: err = rocker_world_fib4_add(rocker, &fib_work->fen_info); if (err) rocker_world_fib4_abort(rocker); @@ -2201,7 +2201,7 @@ static int rocker_router_fib_event(struct notifier_block *nb, fib_work->event = event; switch (event) { - case FIB_EVENT_ENTRY_ADD: /* fall through */ + case FIB_EVENT_ENTRY_REPLACE: /* fall through */ case FIB_EVENT_ENTRY_DEL: if (info->family == AF_INET) { struct fib_entry_notifier_info *fen_info = ptr; diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c index c56fcbb37066..cd6e0de48248 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c @@ -1572,7 +1572,7 @@ static int sxgbe_poll(struct napi_struct *napi, int budget) * netdev structure and arrange for the device to be reset to a sane state * in order to transmit a new packet. */ -static void sxgbe_tx_timeout(struct net_device *dev) +static void sxgbe_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct sxgbe_priv_data *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/seeq/ether3.c b/drivers/net/ethernet/seeq/ether3.c index 632a7c85964d..128ee7cda1ed 100644 --- a/drivers/net/ethernet/seeq/ether3.c +++ b/drivers/net/ethernet/seeq/ether3.c @@ -79,7 +79,7 @@ static netdev_tx_t ether3_sendpacket(struct sk_buff *skb, static irqreturn_t ether3_interrupt (int irq, void *dev_id); static int ether3_close (struct net_device *dev); static void ether3_setmulticastlist (struct net_device *dev); -static void ether3_timeout(struct net_device *dev); +static void ether3_timeout(struct net_device *dev, unsigned int txqueue); #define BUS_16 2 #define BUS_8 1 @@ -450,7 +450,7 @@ static void ether3_setmulticastlist(struct net_device *dev) ether3_outw(priv(dev)->regs.config1 | CFG1_LOCBUFMEM, REG_CONFIG1); } -static void ether3_timeout(struct net_device *dev) +static void ether3_timeout(struct net_device *dev, unsigned int txqueue) { unsigned long flags; diff --git a/drivers/net/ethernet/seeq/sgiseeq.c b/drivers/net/ethernet/seeq/sgiseeq.c index 276c7cae7cee..8507ff242014 100644 --- a/drivers/net/ethernet/seeq/sgiseeq.c +++ b/drivers/net/ethernet/seeq/sgiseeq.c @@ -645,7 +645,7 @@ sgiseeq_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } -static void timeout(struct net_device *dev) +static void timeout(struct net_device *dev, unsigned int txqueue) { printk(KERN_NOTICE "%s: transmit timed out, resetting\n", dev->name); sgiseeq_reset(dev); diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 992c773620ec..f358709fab67 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -2395,7 +2395,7 @@ static void efx_net_stats(struct net_device *net_dev, } /* Context: netif_tx_lock held, BHs disabled. */ -static void efx_watchdog(struct net_device *net_dev) +static void efx_watchdog(struct net_device *net_dev, unsigned int txqueue) { struct efx_nic *efx = netdev_priv(net_dev); diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c index eecc348b1c32..bee4cd9d7135 100644 --- a/drivers/net/ethernet/sfc/falcon/efx.c +++ b/drivers/net/ethernet/sfc/falcon/efx.c @@ -2108,7 +2108,7 @@ static void ef4_net_stats(struct net_device *net_dev, } /* Context: netif_tx_lock held, BHs disabled. */ -static void ef4_watchdog(struct net_device *net_dev) +static void ef4_watchdog(struct net_device *net_dev, unsigned int txqueue) { struct ef4_nic *efx = netdev_priv(net_dev); diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c index d242906ae233..06637b03deed 100644 --- a/drivers/net/ethernet/sgi/ioc3-eth.c +++ b/drivers/net/ethernet/sgi/ioc3-eth.c @@ -114,7 +114,7 @@ struct ioc3_private { static int ioc3_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static void ioc3_set_multicast_list(struct net_device *dev); static netdev_tx_t ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev); -static void ioc3_timeout(struct net_device *dev); +static void ioc3_timeout(struct net_device *dev, unsigned int txqueue); static inline unsigned int ioc3_hash(const unsigned char *addr); static void ioc3_start(struct ioc3_private *ip); static inline void ioc3_stop(struct ioc3_private *ip); @@ -1479,7 +1479,7 @@ drop_packet: return NETDEV_TX_OK; } -static void ioc3_timeout(struct net_device *dev) +static void ioc3_timeout(struct net_device *dev, unsigned int txqueue) { struct ioc3_private *ip = netdev_priv(dev); diff --git a/drivers/net/ethernet/sgi/meth.c b/drivers/net/ethernet/sgi/meth.c index 539bc5db989c..0c396ecd3389 100644 --- a/drivers/net/ethernet/sgi/meth.c +++ b/drivers/net/ethernet/sgi/meth.c @@ -90,7 +90,7 @@ struct meth_private { spinlock_t meth_lock; }; -static void meth_tx_timeout(struct net_device *dev); +static void meth_tx_timeout(struct net_device *dev, unsigned int txqueue); static irqreturn_t meth_interrupt(int irq, void *dev_id); /* global, initialized in ip32-setup.c */ @@ -727,7 +727,7 @@ static netdev_tx_t meth_tx(struct sk_buff *skb, struct net_device *dev) /* * Deal with a transmit timeout. */ -static void meth_tx_timeout(struct net_device *dev) +static void meth_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct meth_private *priv = netdev_priv(dev); unsigned long flags; diff --git a/drivers/net/ethernet/silan/sc92031.c b/drivers/net/ethernet/silan/sc92031.c index c7641a236eb8..cb043eb1bdc1 100644 --- a/drivers/net/ethernet/silan/sc92031.c +++ b/drivers/net/ethernet/silan/sc92031.c @@ -1078,7 +1078,7 @@ static void sc92031_set_multicast_list(struct net_device *dev) spin_unlock_bh(&priv->lock); } -static void sc92031_tx_timeout(struct net_device *dev) +static void sc92031_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct sc92031_priv *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/sis/sis190.c b/drivers/net/ethernet/sis/sis190.c index 5b351beb78cb..5a4b6e3ab38f 100644 --- a/drivers/net/ethernet/sis/sis190.c +++ b/drivers/net/ethernet/sis/sis190.c @@ -1538,7 +1538,7 @@ err_out_0: goto out; } -static void sis190_tx_timeout(struct net_device *dev) +static void sis190_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct sis190_private *tp = netdev_priv(dev); void __iomem *ioaddr = tp->mmio_addr; diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index 85eaccbbbac1..81ed7589e33c 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -222,7 +222,7 @@ static int mdio_read(struct net_device *net_dev, int phy_id, int location); static void mdio_write(struct net_device *net_dev, int phy_id, int location, int val); static void sis900_timer(struct timer_list *t); static void sis900_check_mode (struct net_device *net_dev, struct mii_phy *mii_phy); -static void sis900_tx_timeout(struct net_device *net_dev); +static void sis900_tx_timeout(struct net_device *net_dev, unsigned int txqueue); static void sis900_init_tx_ring(struct net_device *net_dev); static void sis900_init_rx_ring(struct net_device *net_dev); static netdev_tx_t sis900_start_xmit(struct sk_buff *skb, @@ -1537,7 +1537,7 @@ static void sis900_read_mode(struct net_device *net_dev, int *speed, int *duplex * disable interrupts and do some tasks */ -static void sis900_tx_timeout(struct net_device *net_dev) +static void sis900_tx_timeout(struct net_device *net_dev, unsigned int txqueue) { struct sis900_private *sis_priv = netdev_priv(net_dev); void __iomem *ioaddr = sis_priv->ioaddr; diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c index be47d864f8b9..912760e8514c 100644 --- a/drivers/net/ethernet/smsc/epic100.c +++ b/drivers/net/ethernet/smsc/epic100.c @@ -291,7 +291,7 @@ static int mdio_read(struct net_device *dev, int phy_id, int location); static void mdio_write(struct net_device *dev, int phy_id, int loc, int val); static void epic_restart(struct net_device *dev); static void epic_timer(struct timer_list *t); -static void epic_tx_timeout(struct net_device *dev); +static void epic_tx_timeout(struct net_device *dev, unsigned int txqueue); static void epic_init_ring(struct net_device *dev); static netdev_tx_t epic_start_xmit(struct sk_buff *skb, struct net_device *dev); @@ -861,7 +861,7 @@ static void epic_timer(struct timer_list *t) add_timer(&ep->timer); } -static void epic_tx_timeout(struct net_device *dev) +static void epic_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct epic_private *ep = netdev_priv(dev); void __iomem *ioaddr = ep->ioaddr; diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c index 8d88e4083456..186c0bddbe5f 100644 --- a/drivers/net/ethernet/smsc/smc911x.c +++ b/drivers/net/ethernet/smsc/smc911x.c @@ -936,7 +936,7 @@ static void smc911x_phy_configure(struct work_struct *work) if (lp->ctl_rspeed != 100) my_ad_caps &= ~(ADVERTISE_100BASE4|ADVERTISE_100FULL|ADVERTISE_100HALF); - if (!lp->ctl_rfduplx) + if (!lp->ctl_rfduplx) my_ad_caps &= ~(ADVERTISE_100FULL|ADVERTISE_10FULL); /* Update our Auto-Neg Advertisement Register */ @@ -1245,7 +1245,7 @@ static void smc911x_poll_controller(struct net_device *dev) #endif /* Our watchdog timed out. Called by the networking layer */ -static void smc911x_timeout(struct net_device *dev) +static void smc911x_timeout(struct net_device *dev, unsigned int txqueue) { struct smc911x_local *lp = netdev_priv(dev); int status, mask; diff --git a/drivers/net/ethernet/smsc/smc9194.c b/drivers/net/ethernet/smsc/smc9194.c index d3bb2ba51f40..4b2330deed47 100644 --- a/drivers/net/ethernet/smsc/smc9194.c +++ b/drivers/net/ethernet/smsc/smc9194.c @@ -216,7 +216,7 @@ static int smc_open(struct net_device *dev); /* . Our watchdog timed out. Called by the networking layer */ -static void smc_timeout(struct net_device *dev); +static void smc_timeout(struct net_device *dev, unsigned int txqueue); /* . This is called by the kernel in response to 'ifconfig ethX down'. It @@ -1094,7 +1094,7 @@ static int smc_open(struct net_device *dev) .-------------------------------------------------------- */ -static void smc_timeout(struct net_device *dev) +static void smc_timeout(struct net_device *dev, unsigned int txqueue) { /* If we get here, some higher level has decided we are broken. There should really be a "kick me" function call instead. */ diff --git a/drivers/net/ethernet/smsc/smc91c92_cs.c b/drivers/net/ethernet/smsc/smc91c92_cs.c index a55f430f6a7b..f2a50eb3c1e0 100644 --- a/drivers/net/ethernet/smsc/smc91c92_cs.c +++ b/drivers/net/ethernet/smsc/smc91c92_cs.c @@ -271,7 +271,7 @@ static void smc91c92_release(struct pcmcia_device *link); static int smc_open(struct net_device *dev); static int smc_close(struct net_device *dev); static int smc_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); -static void smc_tx_timeout(struct net_device *dev); +static void smc_tx_timeout(struct net_device *dev, unsigned int txqueue); static netdev_tx_t smc_start_xmit(struct sk_buff *skb, struct net_device *dev); static irqreturn_t smc_interrupt(int irq, void *dev_id); @@ -1178,7 +1178,7 @@ static void smc_hardware_send_packet(struct net_device * dev) /*====================================================================*/ -static void smc_tx_timeout(struct net_device *dev) +static void smc_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct smc_private *smc = netdev_priv(dev); unsigned int ioaddr = dev->base_addr; diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 3a6761131f4c..90410f9d3b1a 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -1321,7 +1321,7 @@ static void smc_poll_controller(struct net_device *dev) #endif /* Our watchdog timed out. Called by the networking layer */ -static void smc_timeout(struct net_device *dev) +static void smc_timeout(struct net_device *dev, unsigned int txqueue) { struct smc_local *lp = netdev_priv(dev); void __iomem *ioaddr = lp->base; diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index b210e987a1db..09c72025ed3e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -363,6 +363,11 @@ struct dma_features { unsigned int dvlan; unsigned int l3l4fnum; unsigned int arpoffsel; + /* TSN Features */ + unsigned int estwid; + unsigned int estdep; + unsigned int estsel; + unsigned int fpesel; }; /* GMAC TX FIFO is 8K, Rx FIFO is 16K */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index dd9967aeda22..2342d497348e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -40,7 +40,7 @@ struct tegra_eqos { static int dwc_eth_dwmac_config_dt(struct platform_device *pdev, struct plat_stmmacenet_data *plat_dat) { - struct device_node *np = pdev->dev.of_node; + struct device *dev = &pdev->dev; u32 burst_map = 0; u32 bit_index = 0; u32 a_index = 0; @@ -52,9 +52,10 @@ static int dwc_eth_dwmac_config_dt(struct platform_device *pdev, return -ENOMEM; } - plat_dat->axi->axi_lpi_en = of_property_read_bool(np, "snps,en-lpi"); - if (of_property_read_u32(np, "snps,write-requests", - &plat_dat->axi->axi_wr_osr_lmt)) { + plat_dat->axi->axi_lpi_en = device_property_read_bool(dev, + "snps,en-lpi"); + if (device_property_read_u32(dev, "snps,write-requests", + &plat_dat->axi->axi_wr_osr_lmt)) { /** * Since the register has a reset value of 1, if property * is missing, default to 1. @@ -68,8 +69,8 @@ static int dwc_eth_dwmac_config_dt(struct platform_device *pdev, plat_dat->axi->axi_wr_osr_lmt--; } - if (of_property_read_u32(np, "snps,read-requests", - &plat_dat->axi->axi_rd_osr_lmt)) { + if (device_property_read_u32(dev, "snps,read-requests", + &plat_dat->axi->axi_rd_osr_lmt)) { /** * Since the register has a reset value of 1, if property * is missing, default to 1. @@ -82,7 +83,7 @@ static int dwc_eth_dwmac_config_dt(struct platform_device *pdev, */ plat_dat->axi->axi_rd_osr_lmt--; } - of_property_read_u32(np, "snps,burst-map", &burst_map); + device_property_read_u32(dev, "snps,burst-map", &burst_map); /* converts burst-map bitmask to burst array */ for (bit_index = 0; bit_index < 7; bit_index++) { @@ -270,6 +271,7 @@ static void *tegra_eqos_probe(struct platform_device *pdev, struct plat_stmmacenet_data *data, struct stmmac_resources *res) { + struct device *dev = &pdev->dev; struct tegra_eqos *eqos; int err; @@ -282,6 +284,9 @@ static void *tegra_eqos_probe(struct platform_device *pdev, eqos->dev = &pdev->dev; eqos->regs = res->addr; + if (!is_of_node(dev->fwnode)) + goto bypass_clk_reset_gpio; + eqos->clk_master = devm_clk_get(&pdev->dev, "master_bus"); if (IS_ERR(eqos->clk_master)) { err = PTR_ERR(eqos->clk_master); @@ -354,6 +359,7 @@ static void *tegra_eqos_probe(struct platform_device *pdev, usleep_range(2000, 4000); +bypass_clk_reset_gpio: data->fix_mac_speed = tegra_eqos_fix_speed; data->init = tegra_eqos_init; data->bsp_priv = eqos; @@ -421,7 +427,7 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev) void *priv; int ret; - data = of_device_get_match_data(&pdev->dev); + data = device_get_match_data(&pdev->dev); memset(&stmmac_res, 0, sizeof(struct stmmac_resources)); @@ -478,7 +484,7 @@ static int dwc_eth_dwmac_remove(struct platform_device *pdev) const struct dwc_eth_dwmac_data *data; int err; - data = of_device_get_match_data(&pdev->dev); + data = device_get_match_data(&pdev->dev); err = stmmac_dvr_remove(&pdev->dev); if (err < 0) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index bdb80421acac..9e4b83832938 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -55,6 +55,8 @@ struct mediatek_dwmac_plat_data { struct regmap *peri_regmap; struct device *dev; phy_interface_t phy_mode; + int num_clks_to_config; + bool rmii_clk_from_mac; bool rmii_rxc; }; @@ -73,21 +75,33 @@ struct mediatek_dwmac_variant { /* list of clocks required for mac */ static const char * const mt2712_dwmac_clk_l[] = { - "axi", "apb", "mac_main", "ptp_ref" + "axi", "apb", "mac_main", "ptp_ref", "rmii_internal" }; static int mt2712_set_interface(struct mediatek_dwmac_plat_data *plat) { + int rmii_clk_from_mac = plat->rmii_clk_from_mac ? RMII_CLK_SRC_INTERNAL : 0; int rmii_rxc = plat->rmii_rxc ? RMII_CLK_SRC_RXC : 0; u32 intf_val = 0; + /* The clock labeled as "rmii_internal" in mt2712_dwmac_clk_l is needed + * only in RMII(when MAC provides the reference clock), and useless for + * RGMII/MII/RMII(when PHY provides the reference clock). + * num_clks_to_config indicates the real number of clocks should be + * configured, equals to (plat->variant->num_clks - 1) in default for all the case, + * then +1 for rmii_clk_from_mac case. + */ + plat->num_clks_to_config = plat->variant->num_clks - 1; + /* select phy interface in top control domain */ switch (plat->phy_mode) { case PHY_INTERFACE_MODE_MII: intf_val |= PHY_INTF_MII; break; case PHY_INTERFACE_MODE_RMII: - intf_val |= (PHY_INTF_RMII | rmii_rxc); + if (plat->rmii_clk_from_mac) + plat->num_clks_to_config++; + intf_val |= (PHY_INTF_RMII | rmii_rxc | rmii_clk_from_mac); break; case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_RGMII_TXID: @@ -173,35 +187,50 @@ static int mt2712_set_delay(struct mediatek_dwmac_plat_data *plat) delay_val |= FIELD_PREP(ETH_DLY_RXC_INV, mac_delay->rx_inv); break; case PHY_INTERFACE_MODE_RMII: - /* the rmii reference clock is from external phy, - * and the property "rmii_rxc" indicates which pin(TXC/RXC) - * the reference clk is connected to. The reference clock is a - * received signal, so rx_delay/rx_inv are used to indicate - * the reference clock timing adjustment - */ - if (plat->rmii_rxc) { - /* the rmii reference clock from outside is connected - * to RXC pin, the reference clock will be adjusted - * by RXC delay macro circuit. - */ - delay_val |= FIELD_PREP(ETH_DLY_RXC_ENABLE, !!mac_delay->rx_delay); - delay_val |= FIELD_PREP(ETH_DLY_RXC_STAGES, mac_delay->rx_delay); - delay_val |= FIELD_PREP(ETH_DLY_RXC_INV, mac_delay->rx_inv); - } else { - /* the rmii reference clock from outside is connected - * to TXC pin, the reference clock will be adjusted - * by TXC delay macro circuit. + if (plat->rmii_clk_from_mac) { + /* case 1: mac provides the rmii reference clock, + * and the clock output to TXC pin. + * The egress timing can be adjusted by GTXC delay macro circuit. + * The ingress timing can be adjusted by TXC delay macro circuit. */ delay_val |= FIELD_PREP(ETH_DLY_TXC_ENABLE, !!mac_delay->rx_delay); delay_val |= FIELD_PREP(ETH_DLY_TXC_STAGES, mac_delay->rx_delay); delay_val |= FIELD_PREP(ETH_DLY_TXC_INV, mac_delay->rx_inv); + + delay_val |= FIELD_PREP(ETH_DLY_GTXC_ENABLE, !!mac_delay->tx_delay); + delay_val |= FIELD_PREP(ETH_DLY_GTXC_STAGES, mac_delay->tx_delay); + delay_val |= FIELD_PREP(ETH_DLY_GTXC_INV, mac_delay->tx_inv); + } else { + /* case 2: the rmii reference clock is from external phy, + * and the property "rmii_rxc" indicates which pin(TXC/RXC) + * the reference clk is connected to. The reference clock is a + * received signal, so rx_delay/rx_inv are used to indicate + * the reference clock timing adjustment + */ + if (plat->rmii_rxc) { + /* the rmii reference clock from outside is connected + * to RXC pin, the reference clock will be adjusted + * by RXC delay macro circuit. + */ + delay_val |= FIELD_PREP(ETH_DLY_RXC_ENABLE, !!mac_delay->rx_delay); + delay_val |= FIELD_PREP(ETH_DLY_RXC_STAGES, mac_delay->rx_delay); + delay_val |= FIELD_PREP(ETH_DLY_RXC_INV, mac_delay->rx_inv); + } else { + /* the rmii reference clock from outside is connected + * to TXC pin, the reference clock will be adjusted + * by TXC delay macro circuit. + */ + delay_val |= FIELD_PREP(ETH_DLY_TXC_ENABLE, !!mac_delay->rx_delay); + delay_val |= FIELD_PREP(ETH_DLY_TXC_STAGES, mac_delay->rx_delay); + delay_val |= FIELD_PREP(ETH_DLY_TXC_INV, mac_delay->rx_inv); + } + /* tx_inv will inverse the tx clock inside mac relateive to + * reference clock from external phy, + * and this bit is located in the same register with fine-tune + */ + if (mac_delay->tx_inv) + fine_val = ETH_RMII_DLY_TX_INV; } - /* tx_inv will inverse the tx clock inside mac relateive to - * reference clock from external phy, - * and this bit is located in the same register with fine-tune - */ - if (mac_delay->tx_inv) - fine_val = ETH_RMII_DLY_TX_INV; break; case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_RGMII_TXID: @@ -278,6 +307,7 @@ static int mediatek_dwmac_config_dt(struct mediatek_dwmac_plat_data *plat) mac_delay->tx_inv = of_property_read_bool(plat->np, "mediatek,txc-inverse"); mac_delay->rx_inv = of_property_read_bool(plat->np, "mediatek,rxc-inverse"); plat->rmii_rxc = of_property_read_bool(plat->np, "mediatek,rmii-rxc"); + plat->rmii_clk_from_mac = of_property_read_bool(plat->np, "mediatek,rmii-clk-from-mac"); return 0; } @@ -294,6 +324,8 @@ static int mediatek_dwmac_clk_init(struct mediatek_dwmac_plat_data *plat) for (i = 0; i < num; i++) plat->clks[i].id = variant->clk_list[i]; + plat->num_clks_to_config = variant->num_clks; + return devm_clk_bulk_get(plat->dev, num, plat->clks); } @@ -321,7 +353,7 @@ static int mediatek_dwmac_init(struct platform_device *pdev, void *priv) return ret; } - ret = clk_bulk_prepare_enable(variant->num_clks, plat->clks); + ret = clk_bulk_prepare_enable(plat->num_clks_to_config, plat->clks); if (ret) { dev_err(plat->dev, "failed to enable clks, err = %d\n", ret); return ret; @@ -336,9 +368,8 @@ static int mediatek_dwmac_init(struct platform_device *pdev, void *priv) static void mediatek_dwmac_exit(struct platform_device *pdev, void *priv) { struct mediatek_dwmac_plat_data *plat = priv; - const struct mediatek_dwmac_variant *variant = plat->variant; - clk_bulk_disable_unprepare(variant->num_clks, plat->clks); + clk_bulk_disable_unprepare(plat->num_clks_to_config, plat->clks); pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 1c8d84ed8410..6f834302fda3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -335,14 +335,30 @@ static void sun8i_dwmac_dump_mac_regs(struct mac_device_info *hw, } } -static void sun8i_dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan) +static void sun8i_dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan, + bool rx, bool tx) { - writel(EMAC_RX_INT | EMAC_TX_INT, ioaddr + EMAC_INT_EN); + u32 value = readl(ioaddr + EMAC_INT_EN); + + if (rx) + value |= EMAC_RX_INT; + if (tx) + value |= EMAC_TX_INT; + + writel(value, ioaddr + EMAC_INT_EN); } -static void sun8i_dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan) +static void sun8i_dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan, + bool rx, bool tx) { - writel(0, ioaddr + EMAC_INT_EN); + u32 value = readl(ioaddr + EMAC_INT_EN); + + if (rx) + value &= ~EMAC_RX_INT; + if (tx) + value &= ~EMAC_TX_INT; + + writel(value, ioaddr + EMAC_INT_EN); } static void sun8i_dwmac_dma_start_tx(void __iomem *ioaddr, u32 chan) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h index 2dc70d104161..2e6b60a476c6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h @@ -64,6 +64,8 @@ #define GMAC_RXQCTRL_MCBCQEN_SHIFT 20 #define GMAC_RXQCTRL_TACPQE BIT(21) #define GMAC_RXQCTRL_TACPQE_SHIFT 21 +#define GMAC_RXQCTRL_FPRQ GENMASK(26, 24) +#define GMAC_RXQCTRL_FPRQ_SHIFT 24 /* MAC Packet Filtering */ #define GMAC_PACKET_FILTER_PR BIT(0) @@ -176,6 +178,8 @@ enum power_event { #define GMAC_CONFIG_SARC GENMASK(30, 28) #define GMAC_CONFIG_SARC_SHIFT 28 #define GMAC_CONFIG_IPC BIT(27) +#define GMAC_CONFIG_IPG GENMASK(26, 24) +#define GMAC_CONFIG_IPG_SHIFT 24 #define GMAC_CONFIG_2K BIT(22) #define GMAC_CONFIG_ACS BIT(20) #define GMAC_CONFIG_BE BIT(18) @@ -183,6 +187,7 @@ enum power_event { #define GMAC_CONFIG_JE BIT(16) #define GMAC_CONFIG_PS BIT(15) #define GMAC_CONFIG_FES BIT(14) +#define GMAC_CONFIG_FES_SHIFT 14 #define GMAC_CONFIG_DM BIT(13) #define GMAC_CONFIG_LM BIT(12) #define GMAC_CONFIG_DCRS BIT(9) @@ -190,6 +195,9 @@ enum power_event { #define GMAC_CONFIG_RE BIT(0) /* MAC extended config */ +#define GMAC_CONFIG_EIPG GENMASK(29, 25) +#define GMAC_CONFIG_EIPG_SHIFT 25 +#define GMAC_CONFIG_EIPG_EN BIT(24) #define GMAC_CONFIG_HDSMS GENMASK(22, 20) #define GMAC_CONFIG_HDSMS_SHIFT 20 #define GMAC_CONFIG_HDSMS_256 (0x2 << GMAC_CONFIG_HDSMS_SHIFT) @@ -231,6 +239,10 @@ enum power_event { /* MAC HW features3 bitmap */ #define GMAC_HW_FEAT_ASP GENMASK(29, 28) +#define GMAC_HW_FEAT_FPESEL BIT(26) +#define GMAC_HW_FEAT_ESTWID GENMASK(21, 20) +#define GMAC_HW_FEAT_ESTDEP GENMASK(19, 17) +#define GMAC_HW_FEAT_ESTSEL BIT(16) #define GMAC_HW_FEAT_FRPES GENMASK(14, 13) #define GMAC_HW_FEAT_FRPBS GENMASK(12, 11) #define GMAC_HW_FEAT_FRPSEL BIT(10) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 40ca00e596dd..f0c0ea616032 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -984,6 +984,8 @@ const struct stmmac_ops dwmac410_ops = { .set_arp_offload = dwmac4_set_arp_offload, .config_l3_filter = dwmac4_config_l3_filter, .config_l4_filter = dwmac4_config_l4_filter, + .est_configure = dwmac5_est_configure, + .fpe_configure = dwmac5_fpe_configure, }; const struct stmmac_ops dwmac510_ops = { @@ -1027,6 +1029,8 @@ const struct stmmac_ops dwmac510_ops = { .set_arp_offload = dwmac4_set_arp_offload, .config_l3_filter = dwmac4_config_l3_filter, .config_l4_filter = dwmac4_config_l4_filter, + .est_configure = dwmac5_est_configure, + .fpe_configure = dwmac5_fpe_configure, }; int dwmac4_setup(struct stmmac_priv *priv) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c index c15409030710..213d44482ffa 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c @@ -404,6 +404,10 @@ static void dwmac4_get_hw_feature(void __iomem *ioaddr, /* 5.10 Features */ dma_cap->asp = (hw_cap & GMAC_HW_FEAT_ASP) >> 28; + dma_cap->fpesel = (hw_cap & GMAC_HW_FEAT_FPESEL) >> 26; + dma_cap->estwid = (hw_cap & GMAC_HW_FEAT_ESTWID) >> 20; + dma_cap->estdep = (hw_cap & GMAC_HW_FEAT_ESTDEP) >> 17; + dma_cap->estsel = (hw_cap & GMAC_HW_FEAT_ESTSEL) >> 16; dma_cap->frpes = (hw_cap & GMAC_HW_FEAT_FRPES) >> 13; dma_cap->frpbs = (hw_cap & GMAC_HW_FEAT_FRPBS) >> 11; dma_cap->frpsel = (hw_cap & GMAC_HW_FEAT_FRPSEL) >> 10; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h index 589931795847..bcb6d5190f3d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h @@ -168,6 +168,8 @@ /* DMA default interrupt mask for 4.00 */ #define DMA_CHAN_INTR_DEFAULT_MASK (DMA_CHAN_INTR_NORMAL | \ DMA_CHAN_INTR_ABNORMAL) +#define DMA_CHAN_INTR_DEFAULT_RX (DMA_CHAN_INTR_ENA_RIE) +#define DMA_CHAN_INTR_DEFAULT_TX (DMA_CHAN_INTR_ENA_TIE) #define DMA_CHAN_INTR_NORMAL_4_10 (DMA_CHAN_INTR_ENA_NIE_4_10 | \ DMA_CHAN_INTR_ENA_RIE | \ @@ -178,6 +180,8 @@ /* DMA default interrupt mask for 4.10a */ #define DMA_CHAN_INTR_DEFAULT_MASK_4_10 (DMA_CHAN_INTR_NORMAL_4_10 | \ DMA_CHAN_INTR_ABNORMAL_4_10) +#define DMA_CHAN_INTR_DEFAULT_RX_4_10 (DMA_CHAN_INTR_ENA_RIE) +#define DMA_CHAN_INTR_DEFAULT_TX_4_10 (DMA_CHAN_INTR_ENA_TIE) /* channel 0 specific fields */ #define DMA_CHAN0_DBG_STAT_TPS GENMASK(15, 12) @@ -186,9 +190,10 @@ #define DMA_CHAN0_DBG_STAT_RPS_SHIFT 8 int dwmac4_dma_reset(void __iomem *ioaddr); -void dwmac4_enable_dma_irq(void __iomem *ioaddr, u32 chan); -void dwmac410_enable_dma_irq(void __iomem *ioaddr, u32 chan); -void dwmac4_disable_dma_irq(void __iomem *ioaddr, u32 chan); +void dwmac4_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx); +void dwmac410_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx); +void dwmac4_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx); +void dwmac410_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx); void dwmac4_dma_start_tx(void __iomem *ioaddr, u32 chan); void dwmac4_dma_stop_tx(void __iomem *ioaddr, u32 chan); void dwmac4_dma_start_rx(void __iomem *ioaddr, u32 chan); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c index f2a29a90e085..9becca280074 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c @@ -97,21 +97,52 @@ void dwmac4_set_rx_ring_len(void __iomem *ioaddr, u32 len, u32 chan) writel(len, ioaddr + DMA_CHAN_RX_RING_LEN(chan)); } -void dwmac4_enable_dma_irq(void __iomem *ioaddr, u32 chan) +void dwmac4_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx) { - writel(DMA_CHAN_INTR_DEFAULT_MASK, ioaddr + - DMA_CHAN_INTR_ENA(chan)); + u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(chan)); + + if (rx) + value |= DMA_CHAN_INTR_DEFAULT_RX; + if (tx) + value |= DMA_CHAN_INTR_DEFAULT_TX; + + writel(value, ioaddr + DMA_CHAN_INTR_ENA(chan)); } -void dwmac410_enable_dma_irq(void __iomem *ioaddr, u32 chan) +void dwmac410_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx) { - writel(DMA_CHAN_INTR_DEFAULT_MASK_4_10, - ioaddr + DMA_CHAN_INTR_ENA(chan)); + u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(chan)); + + if (rx) + value |= DMA_CHAN_INTR_DEFAULT_RX_4_10; + if (tx) + value |= DMA_CHAN_INTR_DEFAULT_TX_4_10; + + writel(value, ioaddr + DMA_CHAN_INTR_ENA(chan)); } -void dwmac4_disable_dma_irq(void __iomem *ioaddr, u32 chan) +void dwmac4_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx) { - writel(0, ioaddr + DMA_CHAN_INTR_ENA(chan)); + u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(chan)); + + if (rx) + value &= ~DMA_CHAN_INTR_DEFAULT_RX; + if (tx) + value &= ~DMA_CHAN_INTR_DEFAULT_TX; + + writel(value, ioaddr + DMA_CHAN_INTR_ENA(chan)); +} + +void dwmac410_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx) +{ + u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(chan)); + + if (rx) + value &= ~DMA_CHAN_INTR_DEFAULT_RX_4_10; + if (tx) + value &= ~DMA_CHAN_INTR_DEFAULT_TX_4_10; + + writel(value, ioaddr + DMA_CHAN_INTR_ENA(chan)); } int dwmac4_dma_interrupt(void __iomem *ioaddr, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c index e436fa160c7d..5d4a3c2458ea 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c @@ -550,3 +550,121 @@ int dwmac5_flex_pps_config(void __iomem *ioaddr, int index, writel(val, ioaddr + MAC_PPS_CONTROL); return 0; } + +static int dwmac5_est_write(void __iomem *ioaddr, u32 reg, u32 val, bool gcl) +{ + u32 ctrl; + + writel(val, ioaddr + MTL_EST_GCL_DATA); + + ctrl = (reg << ADDR_SHIFT); + ctrl |= gcl ? 0 : GCRR; + + writel(ctrl, ioaddr + MTL_EST_GCL_CONTROL); + + ctrl |= SRWO; + writel(ctrl, ioaddr + MTL_EST_GCL_CONTROL); + + return readl_poll_timeout(ioaddr + MTL_EST_GCL_CONTROL, + ctrl, !(ctrl & SRWO), 100, 5000); +} + +int dwmac5_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg, + unsigned int ptp_rate) +{ + u32 speed, total_offset, offset, ctrl, ctr_low; + u32 extcfg = readl(ioaddr + GMAC_EXT_CONFIG); + u32 mac_cfg = readl(ioaddr + GMAC_CONFIG); + int i, ret = 0x0; + u64 total_ctr; + + if (extcfg & GMAC_CONFIG_EIPG_EN) { + offset = (extcfg & GMAC_CONFIG_EIPG) >> GMAC_CONFIG_EIPG_SHIFT; + offset = 104 + (offset * 8); + } else { + offset = (mac_cfg & GMAC_CONFIG_IPG) >> GMAC_CONFIG_IPG_SHIFT; + offset = 96 - (offset * 8); + } + + speed = mac_cfg & (GMAC_CONFIG_PS | GMAC_CONFIG_FES); + speed = speed >> GMAC_CONFIG_FES_SHIFT; + + switch (speed) { + case 0x0: + offset = offset * 1000; /* 1G */ + break; + case 0x1: + offset = offset * 400; /* 2.5G */ + break; + case 0x2: + offset = offset * 100000; /* 10M */ + break; + case 0x3: + offset = offset * 10000; /* 100M */ + break; + default: + return -EINVAL; + } + + offset = offset / 1000; + + ret |= dwmac5_est_write(ioaddr, BTR_LOW, cfg->btr[0], false); + ret |= dwmac5_est_write(ioaddr, BTR_HIGH, cfg->btr[1], false); + ret |= dwmac5_est_write(ioaddr, TER, cfg->ter, false); + ret |= dwmac5_est_write(ioaddr, LLR, cfg->gcl_size, false); + if (ret) + return ret; + + total_offset = 0; + for (i = 0; i < cfg->gcl_size; i++) { + ret = dwmac5_est_write(ioaddr, i, cfg->gcl[i] + offset, true); + if (ret) + return ret; + + total_offset += offset; + } + + total_ctr = cfg->ctr[0] + cfg->ctr[1] * 1000000000; + total_ctr += total_offset; + + ctr_low = do_div(total_ctr, 1000000000); + + ret |= dwmac5_est_write(ioaddr, CTR_LOW, ctr_low, false); + ret |= dwmac5_est_write(ioaddr, CTR_HIGH, total_ctr, false); + if (ret) + return ret; + + ctrl = readl(ioaddr + MTL_EST_CONTROL); + ctrl &= ~PTOV; + ctrl |= ((1000000000 / ptp_rate) * 6) << PTOV_SHIFT; + if (cfg->enable) + ctrl |= EEST | SSWL; + else + ctrl &= ~EEST; + + writel(ctrl, ioaddr + MTL_EST_CONTROL); + return 0; +} + +void dwmac5_fpe_configure(void __iomem *ioaddr, u32 num_txq, u32 num_rxq, + bool enable) +{ + u32 value; + + if (!enable) { + value = readl(ioaddr + MAC_FPE_CTRL_STS); + + value &= ~EFPE; + + writel(value, ioaddr + MAC_FPE_CTRL_STS); + } + + value = readl(ioaddr + GMAC_RXQ_CTRL1); + value &= ~GMAC_RXQCTRL_FPRQ; + value |= (num_rxq - 1) << GMAC_RXQCTRL_FPRQ_SHIFT; + writel(value, ioaddr + GMAC_RXQ_CTRL1); + + value = readl(ioaddr + MAC_FPE_CTRL_STS); + value |= EFPE; + writel(value, ioaddr + MAC_FPE_CTRL_STS); +} diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h index 23fecf68f781..3e8faa96b4d4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h @@ -11,6 +11,9 @@ #define PRTYEN BIT(1) #define TMOUTEN BIT(0) +#define MAC_FPE_CTRL_STS 0x00000234 +#define EFPE BIT(0) + #define MAC_PPS_CONTROL 0x00000b70 #define PPS_MAXIDX(x) ((((x) + 1) * 8) - 1) #define PPS_MINIDX(x) ((x) * 8) @@ -30,6 +33,23 @@ #define MAC_PPSx_INTERVAL(x) (0x00000b88 + ((x) * 0x10)) #define MAC_PPSx_WIDTH(x) (0x00000b8c + ((x) * 0x10)) +#define MTL_EST_CONTROL 0x00000c50 +#define PTOV GENMASK(31, 24) +#define PTOV_SHIFT 24 +#define SSWL BIT(1) +#define EEST BIT(0) +#define MTL_EST_GCL_CONTROL 0x00000c80 +#define BTR_LOW 0x0 +#define BTR_HIGH 0x1 +#define CTR_LOW 0x2 +#define CTR_HIGH 0x3 +#define TER 0x4 +#define LLR 0x5 +#define ADDR_SHIFT 8 +#define GCRR BIT(2) +#define SRWO BIT(0) +#define MTL_EST_GCL_DATA 0x00000c84 + #define MTL_RXP_CONTROL_STATUS 0x00000ca0 #define RXPI BIT(31) #define NPE GENMASK(23, 16) @@ -83,5 +103,9 @@ int dwmac5_rxp_config(void __iomem *ioaddr, struct stmmac_tc_entry *entries, int dwmac5_flex_pps_config(void __iomem *ioaddr, int index, struct stmmac_pps_cfg *cfg, bool enable, u32 sub_second_inc, u32 systime_flags); +int dwmac5_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg, + unsigned int ptp_rate); +void dwmac5_fpe_configure(void __iomem *ioaddr, u32 num_txq, u32 num_rxq, + bool enable); #endif /* __DWMAC5_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h index 292b880f3f9f..e5dbd0bc257e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h @@ -96,6 +96,8 @@ /* DMA default interrupt mask */ #define DMA_INTR_DEFAULT_MASK (DMA_INTR_NORMAL | DMA_INTR_ABNORMAL) +#define DMA_INTR_DEFAULT_RX (DMA_INTR_ENA_RIE) +#define DMA_INTR_DEFAULT_TX (DMA_INTR_ENA_TIE) /* DMA Status register defines */ #define DMA_STATUS_GLPII 0x40000000 /* GMAC LPI interrupt */ @@ -130,8 +132,8 @@ #define NUM_DWMAC1000_DMA_REGS 23 void dwmac_enable_dma_transmission(void __iomem *ioaddr); -void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan); -void dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan); +void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx); +void dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx); void dwmac_dma_start_tx(void __iomem *ioaddr, u32 chan); void dwmac_dma_stop_tx(void __iomem *ioaddr, u32 chan); void dwmac_dma_start_rx(void __iomem *ioaddr, u32 chan); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c index 1bc25aa86dbd..688d36095333 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c @@ -37,14 +37,28 @@ void dwmac_enable_dma_transmission(void __iomem *ioaddr) writel(1, ioaddr + DMA_XMT_POLL_DEMAND); } -void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan) +void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx) { - writel(DMA_INTR_DEFAULT_MASK, ioaddr + DMA_INTR_ENA); + u32 value = readl(ioaddr + DMA_INTR_ENA); + + if (rx) + value |= DMA_INTR_DEFAULT_RX; + if (tx) + value |= DMA_INTR_DEFAULT_TX; + + writel(value, ioaddr + DMA_INTR_ENA); } -void dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan) +void dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx) { - writel(0, ioaddr + DMA_INTR_ENA); + u32 value = readl(ioaddr + DMA_INTR_ENA); + + if (rx) + value &= ~DMA_INTR_DEFAULT_RX; + if (tx) + value &= ~DMA_INTR_DEFAULT_TX; + + writel(value, ioaddr + DMA_INTR_ENA); } void dwmac_dma_start_tx(void __iomem *ioaddr, u32 chan) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h index 3b6e559aa0b9..174b903a8211 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h @@ -73,6 +73,9 @@ #define XGMAC_RXQ_CTRL0 0x000000a0 #define XGMAC_RXQEN(x) GENMASK((x) * 2 + 1, (x) * 2) #define XGMAC_RXQEN_SHIFT(x) ((x) * 2) +#define XGMAC_RXQ_CTRL1 0x000000a4 +#define XGMAC_RQ GENMASK(7, 4) +#define XGMAC_RQ_SHIFT 4 #define XGMAC_RXQ_CTRL2 0x000000a8 #define XGMAC_RXQ_CTRL3 0x000000ac #define XGMAC_PSRQ(x) GENMASK((x) * 8 + 7, (x) * 8) @@ -136,6 +139,10 @@ #define XGMAC_HWFEAT_TXQCNT GENMASK(9, 6) #define XGMAC_HWFEAT_RXQCNT GENMASK(3, 0) #define XGMAC_HW_FEATURE3 0x00000128 +#define XGMAC_HWFEAT_FPESEL BIT(26) +#define XGMAC_HWFEAT_ESTWID GENMASK(24, 23) +#define XGMAC_HWFEAT_ESTDEP GENMASK(22, 20) +#define XGMAC_HWFEAT_ESTSEL BIT(19) #define XGMAC_HWFEAT_ASP GENMASK(15, 14) #define XGMAC_HWFEAT_DVLAN BIT(13) #define XGMAC_HWFEAT_FRPES GENMASK(12, 11) @@ -148,6 +155,8 @@ #define XGMAC_MDIO_ADDR 0x00000200 #define XGMAC_MDIO_DATA 0x00000204 #define XGMAC_MDIO_C22P 0x00000220 +#define XGMAC_FPE_CTRL_STS 0x00000280 +#define XGMAC_EFPE BIT(0) #define XGMAC_ADDRx_HIGH(x) (0x00000300 + (x) * 0x8) #define XGMAC_ADDR_MAX 32 #define XGMAC_AE BIT(31) @@ -237,6 +246,22 @@ #define XGMAC_TC_PRTY_MAP1 0x00001044 #define XGMAC_PSTC(x) GENMASK((x) * 8 + 7, (x) * 8) #define XGMAC_PSTC_SHIFT(x) ((x) * 8) +#define XGMAC_MTL_EST_CONTROL 0x00001050 +#define XGMAC_PTOV GENMASK(31, 23) +#define XGMAC_PTOV_SHIFT 23 +#define XGMAC_SSWL BIT(1) +#define XGMAC_EEST BIT(0) +#define XGMAC_MTL_EST_GCL_CONTROL 0x00001080 +#define XGMAC_BTR_LOW 0x0 +#define XGMAC_BTR_HIGH 0x1 +#define XGMAC_CTR_LOW 0x2 +#define XGMAC_CTR_HIGH 0x3 +#define XGMAC_TER 0x4 +#define XGMAC_LLR 0x5 +#define XGMAC_ADDR_SHIFT 8 +#define XGMAC_GCRR BIT(2) +#define XGMAC_SRWO BIT(0) +#define XGMAC_MTL_EST_GCL_DATA 0x00001084 #define XGMAC_MTL_RXP_CONTROL_STATUS 0x000010a0 #define XGMAC_RXPI BIT(31) #define XGMAC_NPE GENMASK(23, 16) @@ -361,6 +386,8 @@ #define XGMAC_TIE BIT(0) #define XGMAC_DMA_INT_DEFAULT_EN (XGMAC_NIE | XGMAC_AIE | XGMAC_RBUE | \ XGMAC_RIE | XGMAC_TIE) +#define XGMAC_DMA_INT_DEFAULT_RX (XGMAC_RBUE | XGMAC_RIE) +#define XGMAC_DMA_INT_DEFAULT_TX (XGMAC_TIE) #define XGMAC_DMA_CH_Rx_WATCHDOG(x) (0x0000313c + (0x80 * (x))) #define XGMAC_RWT GENMASK(7, 0) #define XGMAC_DMA_CH_STATUS(x) (0x00003160 + (0x80 * (x))) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index 082f5ee9e525..307105e8dea0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -1359,6 +1359,80 @@ static void dwxgmac2_set_arp_offload(struct mac_device_info *hw, bool en, writel(value, ioaddr + XGMAC_RX_CONFIG); } +static int dwxgmac3_est_write(void __iomem *ioaddr, u32 reg, u32 val, bool gcl) +{ + u32 ctrl; + + writel(val, ioaddr + XGMAC_MTL_EST_GCL_DATA); + + ctrl = (reg << XGMAC_ADDR_SHIFT); + ctrl |= gcl ? 0 : XGMAC_GCRR; + + writel(ctrl, ioaddr + XGMAC_MTL_EST_GCL_CONTROL); + + ctrl |= XGMAC_SRWO; + writel(ctrl, ioaddr + XGMAC_MTL_EST_GCL_CONTROL); + + return readl_poll_timeout_atomic(ioaddr + XGMAC_MTL_EST_GCL_CONTROL, + ctrl, !(ctrl & XGMAC_SRWO), 100, 5000); +} + +static int dwxgmac3_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg, + unsigned int ptp_rate) +{ + int i, ret = 0x0; + u32 ctrl; + + ret |= dwxgmac3_est_write(ioaddr, XGMAC_BTR_LOW, cfg->btr[0], false); + ret |= dwxgmac3_est_write(ioaddr, XGMAC_BTR_HIGH, cfg->btr[1], false); + ret |= dwxgmac3_est_write(ioaddr, XGMAC_TER, cfg->ter, false); + ret |= dwxgmac3_est_write(ioaddr, XGMAC_LLR, cfg->gcl_size, false); + ret |= dwxgmac3_est_write(ioaddr, XGMAC_CTR_LOW, cfg->ctr[0], false); + ret |= dwxgmac3_est_write(ioaddr, XGMAC_CTR_HIGH, cfg->ctr[1], false); + if (ret) + return ret; + + for (i = 0; i < cfg->gcl_size; i++) { + ret = dwxgmac3_est_write(ioaddr, i, cfg->gcl[i], true); + if (ret) + return ret; + } + + ctrl = readl(ioaddr + XGMAC_MTL_EST_CONTROL); + ctrl &= ~XGMAC_PTOV; + ctrl |= ((1000000000 / ptp_rate) * 9) << XGMAC_PTOV_SHIFT; + if (cfg->enable) + ctrl |= XGMAC_EEST | XGMAC_SSWL; + else + ctrl &= ~XGMAC_EEST; + + writel(ctrl, ioaddr + XGMAC_MTL_EST_CONTROL); + return 0; +} + +static void dwxgmac3_fpe_configure(void __iomem *ioaddr, u32 num_txq, + u32 num_rxq, bool enable) +{ + u32 value; + + if (!enable) { + value = readl(ioaddr + XGMAC_FPE_CTRL_STS); + + value &= ~XGMAC_EFPE; + + writel(value, ioaddr + XGMAC_FPE_CTRL_STS); + } + + value = readl(ioaddr + XGMAC_RXQ_CTRL1); + value &= ~XGMAC_RQ; + value |= (num_rxq - 1) << XGMAC_RQ_SHIFT; + writel(value, ioaddr + XGMAC_RXQ_CTRL1); + + value = readl(ioaddr + XGMAC_FPE_CTRL_STS); + value |= XGMAC_EFPE; + writel(value, ioaddr + XGMAC_FPE_CTRL_STS); +} + const struct stmmac_ops dwxgmac210_ops = { .core_init = dwxgmac2_core_init, .set_mac = dwxgmac2_set_mac, @@ -1402,6 +1476,8 @@ const struct stmmac_ops dwxgmac210_ops = { .config_l3_filter = dwxgmac2_config_l3_filter, .config_l4_filter = dwxgmac2_config_l4_filter, .set_arp_offload = dwxgmac2_set_arp_offload, + .est_configure = dwxgmac3_est_configure, + .fpe_configure = dwxgmac3_fpe_configure, }; int dwxgmac2_setup(struct stmmac_priv *priv) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index 22a7f0cc1b90..c1ca73ebb0e7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -248,14 +248,30 @@ static void dwxgmac2_dma_tx_mode(void __iomem *ioaddr, int mode, writel(value, ioaddr + XGMAC_MTL_TXQ_OPMODE(channel)); } -static void dwxgmac2_enable_dma_irq(void __iomem *ioaddr, u32 chan) +static void dwxgmac2_enable_dma_irq(void __iomem *ioaddr, u32 chan, + bool rx, bool tx) { - writel(XGMAC_DMA_INT_DEFAULT_EN, ioaddr + XGMAC_DMA_CH_INT_EN(chan)); + u32 value = readl(ioaddr + XGMAC_DMA_CH_INT_EN(chan)); + + if (rx) + value |= XGMAC_DMA_INT_DEFAULT_RX; + if (tx) + value |= XGMAC_DMA_INT_DEFAULT_TX; + + writel(value, ioaddr + XGMAC_DMA_CH_INT_EN(chan)); } -static void dwxgmac2_disable_dma_irq(void __iomem *ioaddr, u32 chan) +static void dwxgmac2_disable_dma_irq(void __iomem *ioaddr, u32 chan, + bool rx, bool tx) { - writel(0, ioaddr + XGMAC_DMA_CH_INT_EN(chan)); + u32 value = readl(ioaddr + XGMAC_DMA_CH_INT_EN(chan)); + + if (rx) + value &= ~XGMAC_DMA_INT_DEFAULT_RX; + if (tx) + value &= ~XGMAC_DMA_INT_DEFAULT_TX; + + writel(value, ioaddr + XGMAC_DMA_CH_INT_EN(chan)); } static void dwxgmac2_dma_start_tx(void __iomem *ioaddr, u32 chan) @@ -413,6 +429,10 @@ static void dwxgmac2_get_hw_feature(void __iomem *ioaddr, /* MAC HW feature 3 */ hw_cap = readl(ioaddr + XGMAC_HW_FEATURE3); + dma_cap->fpesel = (hw_cap & XGMAC_HWFEAT_FPESEL) >> 26; + dma_cap->estwid = (hw_cap & XGMAC_HWFEAT_ESTWID) >> 23; + dma_cap->estdep = (hw_cap & XGMAC_HWFEAT_ESTDEP) >> 20; + dma_cap->estsel = (hw_cap & XGMAC_HWFEAT_ESTSEL) >> 19; dma_cap->asp = (hw_cap & XGMAC_HWFEAT_ASP) >> 14; dma_cap->dvlan = (hw_cap & XGMAC_HWFEAT_DVLAN) >> 13; dma_cap->frpes = (hw_cap & XGMAC_HWFEAT_FRPES) >> 11; diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index aa5b917398fe..905a6f0edaca 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -187,8 +187,10 @@ struct stmmac_dma_ops { void (*dma_diagnostic_fr) (void *data, struct stmmac_extra_stats *x, void __iomem *ioaddr); void (*enable_dma_transmission) (void __iomem *ioaddr); - void (*enable_dma_irq)(void __iomem *ioaddr, u32 chan); - void (*disable_dma_irq)(void __iomem *ioaddr, u32 chan); + void (*enable_dma_irq)(void __iomem *ioaddr, u32 chan, + bool rx, bool tx); + void (*disable_dma_irq)(void __iomem *ioaddr, u32 chan, + bool rx, bool tx); void (*start_tx)(void __iomem *ioaddr, u32 chan); void (*stop_tx)(void __iomem *ioaddr, u32 chan); void (*start_rx)(void __iomem *ioaddr, u32 chan); @@ -274,6 +276,7 @@ struct stmmac_safety_stats; struct stmmac_tc_entry; struct stmmac_pps_cfg; struct stmmac_rss; +struct stmmac_est; /* Helpers to program the MAC core */ struct stmmac_ops { @@ -371,6 +374,10 @@ struct stmmac_ops { bool en, bool udp, bool sa, bool inv, u32 match); void (*set_arp_offload)(struct mac_device_info *hw, bool en, u32 addr); + int (*est_configure)(void __iomem *ioaddr, struct stmmac_est *cfg, + unsigned int ptp_rate); + void (*fpe_configure)(void __iomem *ioaddr, u32 num_txq, u32 num_rxq, + bool enable); }; #define stmmac_core_init(__priv, __args...) \ @@ -457,6 +464,10 @@ struct stmmac_ops { stmmac_do_callback(__priv, mac, config_l4_filter, __args) #define stmmac_set_arp_offload(__priv, __args...) \ stmmac_do_void_callback(__priv, mac, set_arp_offload, __args) +#define stmmac_est_configure(__priv, __args...) \ + stmmac_do_callback(__priv, mac, est_configure, __args) +#define stmmac_fpe_configure(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, fpe_configure, __args) /* PTP and HW Timer helpers */ struct stmmac_hwtimestamp { @@ -514,6 +525,7 @@ struct stmmac_priv; struct tc_cls_u32_offload; struct tc_cbs_qopt_offload; struct flow_cls_offload; +struct tc_taprio_qopt_offload; struct stmmac_tc_ops { int (*init)(struct stmmac_priv *priv); @@ -523,6 +535,8 @@ struct stmmac_tc_ops { struct tc_cbs_qopt_offload *qopt); int (*setup_cls)(struct stmmac_priv *priv, struct flow_cls_offload *cls); + int (*setup_taprio)(struct stmmac_priv *priv, + struct tc_taprio_qopt_offload *qopt); }; #define stmmac_tc_init(__priv, __args...) \ @@ -533,6 +547,8 @@ struct stmmac_tc_ops { stmmac_do_callback(__priv, tc, setup_cbs, __args) #define stmmac_tc_setup_cls(__priv, __args...) \ stmmac_do_callback(__priv, tc, setup_cls, __args) +#define stmmac_tc_setup_taprio(__priv, __args...) \ + stmmac_do_callback(__priv, tc, setup_taprio, __args) struct stmmac_counters; diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c index 252cf48c5816..a57b0fa815ab 100644 --- a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c @@ -119,6 +119,13 @@ #define MMC_RX_ICMP_GD_OCTETS 0x180 #define MMC_RX_ICMP_ERR_OCTETS 0x184 +#define MMC_TX_FPE_FRAG 0x1a8 +#define MMC_TX_HOLD_REQ 0x1ac +#define MMC_RX_PKT_ASSEMBLY_ERR 0x1c8 +#define MMC_RX_PKT_SMD_ERR 0x1cc +#define MMC_RX_PKT_ASSEMBLY_OK 0x1d0 +#define MMC_RX_FPE_FRAG 0x1d4 + /* XGMAC MMC Registers */ #define MMC_XGMAC_TX_OCTET_GB 0x14 #define MMC_XGMAC_TX_PKT_GB 0x1c @@ -315,6 +322,15 @@ static void dwmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc) mmc->mmc_rx_tcp_err_octets += readl(mmcaddr + MMC_RX_TCP_ERR_OCTETS); mmc->mmc_rx_icmp_gd_octets += readl(mmcaddr + MMC_RX_ICMP_GD_OCTETS); mmc->mmc_rx_icmp_err_octets += readl(mmcaddr + MMC_RX_ICMP_ERR_OCTETS); + + mmc->mmc_tx_fpe_fragment_cntr += readl(mmcaddr + MMC_TX_FPE_FRAG); + mmc->mmc_tx_hold_req_cntr += readl(mmcaddr + MMC_TX_HOLD_REQ); + mmc->mmc_rx_packet_assembly_err_cntr += + readl(mmcaddr + MMC_RX_PKT_ASSEMBLY_ERR); + mmc->mmc_rx_packet_smd_err_cntr += readl(mmcaddr + MMC_RX_PKT_SMD_ERR); + mmc->mmc_rx_packet_assembly_ok_cntr += + readl(mmcaddr + MMC_RX_PKT_ASSEMBLY_OK); + mmc->mmc_rx_fpe_fragment_cntr += readl(mmcaddr + MMC_RX_FPE_FRAG); } const struct stmmac_mmc_ops dwmac_mmc_ops = { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index d993fc7e82c3..f98c5eefb382 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -88,6 +88,7 @@ struct stmmac_channel { struct napi_struct rx_napi ____cacheline_aligned_in_smp; struct napi_struct tx_napi ____cacheline_aligned_in_smp; struct stmmac_priv *priv_data; + spinlock_t lock; u32 index; }; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index bbc65bd332a8..18a959589cbc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1975,7 +1975,7 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue) /* We still have pending packets, let's call for a new scheduling */ if (tx_q->dirty_tx != tx_q->cur_tx) - mod_timer(&tx_q->txtimer, STMMAC_COAL_TIMER(10)); + mod_timer(&tx_q->txtimer, STMMAC_COAL_TIMER(priv->tx_coal_timer)); __netif_tx_unlock_bh(netdev_get_tx_queue(priv->dev, queue)); @@ -2069,17 +2069,25 @@ static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan) int status = stmmac_dma_interrupt_status(priv, priv->ioaddr, &priv->xstats, chan); struct stmmac_channel *ch = &priv->channel[chan]; + unsigned long flags; if ((status & handle_rx) && (chan < priv->plat->rx_queues_to_use)) { if (napi_schedule_prep(&ch->rx_napi)) { - stmmac_disable_dma_irq(priv, priv->ioaddr, chan); + spin_lock_irqsave(&ch->lock, flags); + stmmac_disable_dma_irq(priv, priv->ioaddr, chan, 1, 0); + spin_unlock_irqrestore(&ch->lock, flags); __napi_schedule_irqoff(&ch->rx_napi); - status |= handle_tx; } } - if ((status & handle_tx) && (chan < priv->plat->tx_queues_to_use)) - napi_schedule_irqoff(&ch->tx_napi); + if ((status & handle_tx) && (chan < priv->plat->tx_queues_to_use)) { + if (napi_schedule_prep(&ch->tx_napi)) { + spin_lock_irqsave(&ch->lock, flags); + stmmac_disable_dma_irq(priv, priv->ioaddr, chan, 0, 1); + spin_unlock_irqrestore(&ch->lock, flags); + __napi_schedule_irqoff(&ch->tx_napi); + } + } return status; } @@ -2274,14 +2282,14 @@ static void stmmac_tx_timer(struct timer_list *t) ch = &priv->channel[tx_q->queue_index]; - /* - * If NAPI is already running we can miss some events. Let's rearm - * the timer and try again. - */ - if (likely(napi_schedule_prep(&ch->tx_napi))) + if (likely(napi_schedule_prep(&ch->tx_napi))) { + unsigned long flags; + + spin_lock_irqsave(&ch->lock, flags); + stmmac_disable_dma_irq(priv, priv->ioaddr, ch->index, 0, 1); + spin_unlock_irqrestore(&ch->lock, flags); __napi_schedule(&ch->tx_napi); - else - mod_timer(&tx_q->txtimer, STMMAC_COAL_TIMER(10)); + } } /** @@ -3751,8 +3759,14 @@ static int stmmac_napi_poll_rx(struct napi_struct *napi, int budget) priv->xstats.napi_poll++; work_done = stmmac_rx(priv, budget, chan); - if (work_done < budget && napi_complete_done(napi, work_done)) - stmmac_enable_dma_irq(priv, priv->ioaddr, chan); + if (work_done < budget && napi_complete_done(napi, work_done)) { + unsigned long flags; + + spin_lock_irqsave(&ch->lock, flags); + stmmac_enable_dma_irq(priv, priv->ioaddr, chan, 1, 0); + spin_unlock_irqrestore(&ch->lock, flags); + } + return work_done; } @@ -3761,7 +3775,6 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget) struct stmmac_channel *ch = container_of(napi, struct stmmac_channel, tx_napi); struct stmmac_priv *priv = ch->priv_data; - struct stmmac_tx_queue *tx_q; u32 chan = ch->index; int work_done; @@ -3770,15 +3783,12 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget) work_done = stmmac_tx_clean(priv, DMA_TX_SIZE, chan); work_done = min(work_done, budget); - if (work_done < budget) - napi_complete_done(napi, work_done); + if (work_done < budget && napi_complete_done(napi, work_done)) { + unsigned long flags; - /* Force transmission restart */ - tx_q = &priv->tx_queue[chan]; - if (tx_q->cur_tx != tx_q->dirty_tx) { - stmmac_enable_dma_transmission(priv, priv->ioaddr); - stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, - chan); + spin_lock_irqsave(&ch->lock, flags); + stmmac_enable_dma_irq(priv, priv->ioaddr, chan, 0, 1); + spin_unlock_irqrestore(&ch->lock, flags); } return work_done; @@ -3792,7 +3802,7 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget) * netdev structure and arrange for the device to be reset to a sane state * in order to transmit a new packet. */ -static void stmmac_tx_timeout(struct net_device *dev) +static void stmmac_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct stmmac_priv *priv = netdev_priv(dev); @@ -4066,6 +4076,8 @@ static int stmmac_setup_tc(struct net_device *ndev, enum tc_setup_type type, priv, priv, true); case TC_SETUP_QDISC_CBS: return stmmac_tc_setup_cbs(priv, priv, type_data); + case TC_SETUP_QDISC_TAPRIO: + return stmmac_tc_setup_taprio(priv, priv, type_data); default: return -EOPNOTSUPP; } @@ -4238,9 +4250,38 @@ static int stmmac_dma_cap_show(struct seq_file *seq, void *v) priv->dma_cap.number_rx_channel); seq_printf(seq, "\tNumber of Additional TX channel: %d\n", priv->dma_cap.number_tx_channel); + seq_printf(seq, "\tNumber of Additional RX queues: %d\n", + priv->dma_cap.number_rx_queues); + seq_printf(seq, "\tNumber of Additional TX queues: %d\n", + priv->dma_cap.number_tx_queues); seq_printf(seq, "\tEnhanced descriptors: %s\n", (priv->dma_cap.enh_desc) ? "Y" : "N"); - + seq_printf(seq, "\tTX Fifo Size: %d\n", priv->dma_cap.tx_fifo_size); + seq_printf(seq, "\tRX Fifo Size: %d\n", priv->dma_cap.rx_fifo_size); + seq_printf(seq, "\tHash Table Size: %d\n", priv->dma_cap.hash_tb_sz); + seq_printf(seq, "\tTSO: %s\n", priv->dma_cap.tsoen ? "Y" : "N"); + seq_printf(seq, "\tNumber of PPS Outputs: %d\n", + priv->dma_cap.pps_out_num); + seq_printf(seq, "\tSafety Features: %s\n", + priv->dma_cap.asp ? "Y" : "N"); + seq_printf(seq, "\tFlexible RX Parser: %s\n", + priv->dma_cap.frpsel ? "Y" : "N"); + seq_printf(seq, "\tEnhanced Addressing: %d\n", + priv->dma_cap.addr64); + seq_printf(seq, "\tReceive Side Scaling: %s\n", + priv->dma_cap.rssen ? "Y" : "N"); + seq_printf(seq, "\tVLAN Hash Filtering: %s\n", + priv->dma_cap.vlhash ? "Y" : "N"); + seq_printf(seq, "\tSplit Header: %s\n", + priv->dma_cap.sphen ? "Y" : "N"); + seq_printf(seq, "\tVLAN TX Insertion: %s\n", + priv->dma_cap.vlins ? "Y" : "N"); + seq_printf(seq, "\tDouble VLAN: %s\n", + priv->dma_cap.dvlan ? "Y" : "N"); + seq_printf(seq, "\tNumber of L3/L4 Filters: %d\n", + priv->dma_cap.l3l4fnum); + seq_printf(seq, "\tARP Offloading: %s\n", + priv->dma_cap.arpoffsel ? "Y" : "N"); return 0; } DEFINE_SHOW_ATTRIBUTE(stmmac_dma_cap); @@ -4685,6 +4726,7 @@ int stmmac_dvr_probe(struct device *device, for (queue = 0; queue < maxq; queue++) { struct stmmac_channel *ch = &priv->channel[queue]; + spin_lock_init(&ch->lock); ch->priv_data = priv; ch->index = queue; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 7d972e0fd2b0..6c4686b77516 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -591,9 +591,146 @@ static int tc_setup_cls(struct stmmac_priv *priv, return ret; } +static int tc_setup_taprio(struct stmmac_priv *priv, + struct tc_taprio_qopt_offload *qopt) +{ + u32 size, wid = priv->dma_cap.estwid, dep = priv->dma_cap.estdep; + struct plat_stmmacenet_data *plat = priv->plat; + struct timespec64 time; + bool fpe = false; + int i, ret = 0; + u64 ctr; + + if (!priv->dma_cap.estsel) + return -EOPNOTSUPP; + + switch (wid) { + case 0x1: + wid = 16; + break; + case 0x2: + wid = 20; + break; + case 0x3: + wid = 24; + break; + default: + return -EOPNOTSUPP; + } + + switch (dep) { + case 0x1: + dep = 64; + break; + case 0x2: + dep = 128; + break; + case 0x3: + dep = 256; + break; + case 0x4: + dep = 512; + break; + case 0x5: + dep = 1024; + break; + default: + return -EOPNOTSUPP; + } + + if (!qopt->enable) + goto disable; + if (qopt->num_entries >= dep) + return -EINVAL; + if (!qopt->base_time) + return -ERANGE; + if (!qopt->cycle_time) + return -ERANGE; + + if (!plat->est) { + plat->est = devm_kzalloc(priv->device, sizeof(*plat->est), + GFP_KERNEL); + if (!plat->est) + return -ENOMEM; + } else { + memset(plat->est, 0, sizeof(*plat->est)); + } + + size = qopt->num_entries; + + priv->plat->est->gcl_size = size; + priv->plat->est->enable = qopt->enable; + + for (i = 0; i < size; i++) { + s64 delta_ns = qopt->entries[i].interval; + u32 gates = qopt->entries[i].gate_mask; + + if (delta_ns > GENMASK(wid, 0)) + return -ERANGE; + if (gates > GENMASK(31 - wid, 0)) + return -ERANGE; + + switch (qopt->entries[i].command) { + case TC_TAPRIO_CMD_SET_GATES: + if (fpe) + return -EINVAL; + break; + case TC_TAPRIO_CMD_SET_AND_HOLD: + gates |= BIT(0); + fpe = true; + break; + case TC_TAPRIO_CMD_SET_AND_RELEASE: + gates &= ~BIT(0); + fpe = true; + break; + default: + return -EOPNOTSUPP; + } + + priv->plat->est->gcl[i] = delta_ns | (gates << wid); + } + + /* Adjust for real system time */ + time = ktime_to_timespec64(qopt->base_time); + priv->plat->est->btr[0] = (u32)time.tv_nsec; + priv->plat->est->btr[1] = (u32)time.tv_sec; + + ctr = qopt->cycle_time; + priv->plat->est->ctr[0] = do_div(ctr, NSEC_PER_SEC); + priv->plat->est->ctr[1] = (u32)ctr; + + if (fpe && !priv->dma_cap.fpesel) + return -EOPNOTSUPP; + + ret = stmmac_fpe_configure(priv, priv->ioaddr, + priv->plat->tx_queues_to_use, + priv->plat->rx_queues_to_use, fpe); + if (ret && fpe) { + netdev_err(priv->dev, "failed to enable Frame Preemption\n"); + return ret; + } + + ret = stmmac_est_configure(priv, priv->ioaddr, priv->plat->est, + priv->plat->clk_ptp_rate); + if (ret) { + netdev_err(priv->dev, "failed to configure EST\n"); + goto disable; + } + + netdev_info(priv->dev, "configured EST\n"); + return 0; + +disable: + priv->plat->est->enable = false; + stmmac_est_configure(priv, priv->ioaddr, priv->plat->est, + priv->plat->clk_ptp_rate); + return ret; +} + const struct stmmac_tc_ops dwmac510_tc_ops = { .init = tc_init, .setup_cls_u32 = tc_setup_cls_u32, .setup_cbs = tc_setup_cbs, .setup_cls = tc_setup_cls, + .setup_taprio = tc_setup_taprio, }; diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index c91876f8c536..6ec9163e232c 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -2666,7 +2666,7 @@ static void cas_netpoll(struct net_device *dev) } #endif -static void cas_tx_timeout(struct net_device *dev) +static void cas_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct cas *cp = netdev_priv(dev); diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index f5fd1f3c07cc..9a5004f674c7 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -6517,7 +6517,7 @@ static void niu_reset_task(struct work_struct *work) spin_unlock_irqrestore(&np->lock, flags); } -static void niu_tx_timeout(struct net_device *dev) +static void niu_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct niu *np = netdev_priv(dev); diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c index e9b757b03b56..c5add0b45eed 100644 --- a/drivers/net/ethernet/sun/sunbmac.c +++ b/drivers/net/ethernet/sun/sunbmac.c @@ -941,7 +941,7 @@ static int bigmac_close(struct net_device *dev) return 0; } -static void bigmac_tx_timeout(struct net_device *dev) +static void bigmac_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct bigmac *bp = netdev_priv(dev); diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c index 3e7631160384..8358064fbd48 100644 --- a/drivers/net/ethernet/sun/sungem.c +++ b/drivers/net/ethernet/sun/sungem.c @@ -970,7 +970,7 @@ static void gem_poll_controller(struct net_device *dev) } #endif -static void gem_tx_timeout(struct net_device *dev) +static void gem_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct gem *gp = netdev_priv(dev); diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c index d007dfeba5c3..f0fe7bb2a750 100644 --- a/drivers/net/ethernet/sun/sunhme.c +++ b/drivers/net/ethernet/sun/sunhme.c @@ -2246,7 +2246,7 @@ static int happy_meal_close(struct net_device *dev) #define SXD(x) #endif -static void happy_meal_tx_timeout(struct net_device *dev) +static void happy_meal_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct happy_meal *hp = netdev_priv(dev); diff --git a/drivers/net/ethernet/sun/sunqe.c b/drivers/net/ethernet/sun/sunqe.c index 1468fa0a54e9..2102b95ec347 100644 --- a/drivers/net/ethernet/sun/sunqe.c +++ b/drivers/net/ethernet/sun/sunqe.c @@ -544,7 +544,7 @@ static void qe_tx_reclaim(struct sunqe *qep) qep->tx_old = elem; } -static void qe_tx_timeout(struct net_device *dev) +static void qe_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct sunqe *qep = netdev_priv(dev); int tx_full; diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c index 8b94d9ad9e2b..a601a306f9a5 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.c +++ b/drivers/net/ethernet/sun/sunvnet_common.c @@ -1539,7 +1539,7 @@ out_dropped: } EXPORT_SYMBOL_GPL(sunvnet_start_xmit_common); -void sunvnet_tx_timeout_common(struct net_device *dev) +void sunvnet_tx_timeout_common(struct net_device *dev, unsigned int txqueue) { /* XXX Implement me XXX */ } diff --git a/drivers/net/ethernet/sun/sunvnet_common.h b/drivers/net/ethernet/sun/sunvnet_common.h index 2b808d2482d6..5416a3cb9e7d 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.h +++ b/drivers/net/ethernet/sun/sunvnet_common.h @@ -135,7 +135,7 @@ int sunvnet_open_common(struct net_device *dev); int sunvnet_close_common(struct net_device *dev); void sunvnet_set_rx_mode_common(struct net_device *dev, struct vnet *vp); int sunvnet_set_mac_addr_common(struct net_device *dev, void *p); -void sunvnet_tx_timeout_common(struct net_device *dev); +void sunvnet_tx_timeout_common(struct net_device *dev, unsigned int txqueue); netdev_tx_t sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev, struct vnet_port *(*vnet_tx_port) diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c index a1f5a1e61040..07046a2370b3 100644 --- a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c +++ b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c @@ -689,7 +689,7 @@ static int xlgmac_close(struct net_device *netdev) return 0; } -static void xlgmac_tx_timeout(struct net_device *netdev) +static void xlgmac_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct xlgmac_pdata *pdata = netdev_priv(netdev); diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c index 3a655a4dc10e..5e1b8292cd3f 100644 --- a/drivers/net/ethernet/ti/cpmac.c +++ b/drivers/net/ethernet/ti/cpmac.c @@ -797,7 +797,7 @@ static irqreturn_t cpmac_irq(int irq, void *dev_id) return IRQ_HANDLED; } -static void cpmac_tx_timeout(struct net_device *dev) +static void cpmac_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct cpmac_priv *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c index 707d5eb480ce..97a058ca60ac 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.c +++ b/drivers/net/ethernet/ti/cpsw_priv.c @@ -272,7 +272,7 @@ void soft_reset(const char *module, void __iomem *reg) WARN(readl_relaxed(reg) & 1, "failed to soft-reset %s\n", module); } -void cpsw_ndo_tx_timeout(struct net_device *ndev) +void cpsw_ndo_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct cpsw_priv *priv = netdev_priv(ndev); struct cpsw_common *cpsw = priv->cpsw; diff --git a/drivers/net/ethernet/ti/cpsw_priv.h b/drivers/net/ethernet/ti/cpsw_priv.h index bc726356a72c..b8d7b924ee3d 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.h +++ b/drivers/net/ethernet/ti/cpsw_priv.h @@ -449,7 +449,7 @@ int cpsw_rx_poll(struct napi_struct *napi_rx, int budget); void cpsw_rx_vlan_encap(struct sk_buff *skb); void soft_reset(const char *module, void __iomem *reg); void cpsw_set_slave_mac(struct cpsw_slave *slave, struct cpsw_priv *priv); -void cpsw_ndo_tx_timeout(struct net_device *ndev); +void cpsw_ndo_tx_timeout(struct net_device *ndev, unsigned int txqueue); int cpsw_need_resplit(struct cpsw_common *cpsw); int cpsw_ndo_ioctl(struct net_device *dev, struct ifreq *req, int cmd); int cpsw_ndo_set_tx_maxrate(struct net_device *ndev, int queue, u32 rate); diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index ae27be85e363..75d4e16c692b 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -983,7 +983,7 @@ fail_tx: * error and re-initialize the TX channel for hardware operation * */ -static void emac_dev_tx_timeout(struct net_device *ndev) +static void emac_dev_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct emac_priv *priv = netdev_priv(ndev); struct device *emac_dev = &ndev->dev; diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 1b2702f74455..432645e86495 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -1811,7 +1811,7 @@ out: return (ret == 0) ? 0 : err; } -static void netcp_ndo_tx_timeout(struct net_device *ndev) +static void netcp_ndo_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct netcp_intf *netcp = netdev_priv(ndev); unsigned int descs = knav_pool_count(netcp->tx_pool); diff --git a/drivers/net/ethernet/ti/tlan.c b/drivers/net/ethernet/ti/tlan.c index 78f0f2d59e22..ad465202980a 100644 --- a/drivers/net/ethernet/ti/tlan.c +++ b/drivers/net/ethernet/ti/tlan.c @@ -161,7 +161,7 @@ static void tlan_set_multicast_list(struct net_device *); static int tlan_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static int tlan_probe1(struct pci_dev *pdev, long ioaddr, int irq, int rev, const struct pci_device_id *ent); -static void tlan_tx_timeout(struct net_device *dev); +static void tlan_tx_timeout(struct net_device *dev, unsigned int txqueue); static void tlan_tx_timeout_work(struct work_struct *work); static int tlan_init_one(struct pci_dev *pdev, const struct pci_device_id *ent); @@ -997,7 +997,7 @@ static int tlan_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) * **************************************************************/ -static void tlan_tx_timeout(struct net_device *dev) +static void tlan_tx_timeout(struct net_device *dev, unsigned int txqueue) { TLAN_DBG(TLAN_DEBUG_GNRL, "%s: Transmit timed out.\n", dev->name); @@ -1028,7 +1028,7 @@ static void tlan_tx_timeout_work(struct work_struct *work) struct tlan_priv *priv = container_of(work, struct tlan_priv, tlan_tqueue); - tlan_tx_timeout(priv->dev); + tlan_tx_timeout(priv->dev, UINT_MAX); } diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c index 9d9f8acb7ee3..070dd6fa9401 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c @@ -1405,7 +1405,7 @@ out: * * called, if tx hangs. Schedules a task that resets the interface */ -void gelic_net_tx_timeout(struct net_device *netdev) +void gelic_net_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct gelic_card *card; diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.h b/drivers/net/ethernet/toshiba/ps3_gelic_net.h index 051033580f0a..805903dbddcc 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.h +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.h @@ -359,7 +359,7 @@ int gelic_net_open(struct net_device *netdev); int gelic_net_stop(struct net_device *netdev); netdev_tx_t gelic_net_xmit(struct sk_buff *skb, struct net_device *netdev); void gelic_net_set_multi(struct net_device *netdev); -void gelic_net_tx_timeout(struct net_device *netdev); +void gelic_net_tx_timeout(struct net_device *netdev, unsigned int txqueue); int gelic_net_setup_netdev(struct net_device *netdev, struct gelic_card *card); /* shared ethtool ops */ diff --git a/drivers/net/ethernet/toshiba/spider_net.c b/drivers/net/ethernet/toshiba/spider_net.c index 538e70810d3d..6576271642c1 100644 --- a/drivers/net/ethernet/toshiba/spider_net.c +++ b/drivers/net/ethernet/toshiba/spider_net.c @@ -2180,7 +2180,7 @@ out: * called, if tx hangs. Schedules a task that resets the interface */ static void -spider_net_tx_timeout(struct net_device *netdev) +spider_net_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct spider_net_card *card; diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c index 12466a72cefc..708de826200e 100644 --- a/drivers/net/ethernet/toshiba/tc35815.c +++ b/drivers/net/ethernet/toshiba/tc35815.c @@ -483,7 +483,7 @@ static void tc35815_txdone(struct net_device *dev); static int tc35815_close(struct net_device *dev); static struct net_device_stats *tc35815_get_stats(struct net_device *dev); static void tc35815_set_multicast_list(struct net_device *dev); -static void tc35815_tx_timeout(struct net_device *dev); +static void tc35815_tx_timeout(struct net_device *dev, unsigned int txqueue); static int tc35815_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); #ifdef CONFIG_NET_POLL_CONTROLLER static void tc35815_poll_controller(struct net_device *dev); @@ -1189,7 +1189,7 @@ static void tc35815_schedule_restart(struct net_device *dev) spin_unlock_irqrestore(&lp->lock, flags); } -static void tc35815_tx_timeout(struct net_device *dev) +static void tc35815_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct tc35815_regs __iomem *tr = (struct tc35815_regs __iomem *)dev->base_addr; diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index ed12dbd156f0..803247d51fe9 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -506,7 +506,7 @@ static void mdio_write(struct net_device *dev, int phy_id, int location, int val static int rhine_open(struct net_device *dev); static void rhine_reset_task(struct work_struct *work); static void rhine_slow_event_task(struct work_struct *work); -static void rhine_tx_timeout(struct net_device *dev); +static void rhine_tx_timeout(struct net_device *dev, unsigned int txqueue); static netdev_tx_t rhine_start_tx(struct sk_buff *skb, struct net_device *dev); static irqreturn_t rhine_interrupt(int irq, void *dev_instance); @@ -1761,7 +1761,7 @@ out_unlock: mutex_unlock(&rp->task_lock); } -static void rhine_tx_timeout(struct net_device *dev) +static void rhine_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct rhine_private *rp = netdev_priv(dev); void __iomem *ioaddr = rp->base; diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c index bede1ff289c5..c0d181a7f83a 100644 --- a/drivers/net/ethernet/wiznet/w5100.c +++ b/drivers/net/ethernet/wiznet/w5100.c @@ -790,7 +790,7 @@ static void w5100_restart_work(struct work_struct *work) w5100_restart(priv->ndev); } -static void w5100_tx_timeout(struct net_device *ndev) +static void w5100_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct w5100_priv *priv = netdev_priv(ndev); diff --git a/drivers/net/ethernet/wiznet/w5300.c b/drivers/net/ethernet/wiznet/w5300.c index 6ba2747779ce..46aae30c4636 100644 --- a/drivers/net/ethernet/wiznet/w5300.c +++ b/drivers/net/ethernet/wiznet/w5300.c @@ -341,7 +341,7 @@ static void w5300_get_regs(struct net_device *ndev, } } -static void w5300_tx_timeout(struct net_device *ndev) +static void w5300_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct w5300_priv *priv = netdev_priv(ndev); diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index 0de52e70abcc..0c26f5bcc523 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -521,7 +521,7 @@ static int xemaclite_set_mac_address(struct net_device *dev, void *address) * * This function is called when Tx time out occurs for Emaclite device. */ -static void xemaclite_tx_timeout(struct net_device *dev) +static void xemaclite_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct net_local *lp = netdev_priv(dev); unsigned long flags; diff --git a/drivers/net/ethernet/xircom/xirc2ps_cs.c b/drivers/net/ethernet/xircom/xirc2ps_cs.c index fd5288ff53b5..480ab7251515 100644 --- a/drivers/net/ethernet/xircom/xirc2ps_cs.c +++ b/drivers/net/ethernet/xircom/xirc2ps_cs.c @@ -288,7 +288,7 @@ struct local_info { */ static netdev_tx_t do_start_xmit(struct sk_buff *skb, struct net_device *dev); -static void xirc_tx_timeout(struct net_device *dev); +static void xirc_tx_timeout(struct net_device *dev, unsigned int txqueue); static void xirc2ps_tx_timeout_task(struct work_struct *work); static void set_addresses(struct net_device *dev); static void set_multicast_list(struct net_device *dev); @@ -1203,7 +1203,7 @@ xirc2ps_tx_timeout_task(struct work_struct *work) } static void -xirc_tx_timeout(struct net_device *dev) +xirc_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct local_info *lp = netdev_priv(dev); dev->stats.tx_errors++; diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index b517c1af9de0..309a74da2ec3 100644 --- a/drivers/net/fjes/fjes_main.c +++ b/drivers/net/fjes/fjes_main.c @@ -48,7 +48,7 @@ static void fjes_get_stats64(struct net_device *, struct rtnl_link_stats64 *); static int fjes_change_mtu(struct net_device *, int); static int fjes_vlan_rx_add_vid(struct net_device *, __be16 proto, u16); static int fjes_vlan_rx_kill_vid(struct net_device *, __be16 proto, u16); -static void fjes_tx_retry(struct net_device *); +static void fjes_tx_retry(struct net_device *, unsigned int txqueue); static int fjes_acpi_add(struct acpi_device *); static int fjes_acpi_remove(struct acpi_device *); @@ -792,7 +792,7 @@ fjes_xmit_frame(struct sk_buff *skb, struct net_device *netdev) return ret; } -static void fjes_tx_retry(struct net_device *netdev) +static void fjes_tx_retry(struct net_device *netdev, unsigned int txqueue) { struct netdev_queue *queue = netdev_get_tx_queue(netdev, 0); diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c index df495b5595f5..e7413a643929 100644 --- a/drivers/net/hamradio/hdlcdrv.c +++ b/drivers/net/hamradio/hdlcdrv.c @@ -687,8 +687,6 @@ struct net_device *hdlcdrv_register(const struct hdlcdrv_ops *ops, struct hdlcdrv_state *s; int err; - BUG_ON(ops == NULL); - if (privsize < sizeof(struct hdlcdrv_state)) privsize = sizeof(struct hdlcdrv_state); diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c index 13540dee7364..4e02a4231fcb 100644 --- a/drivers/net/netdevsim/fib.c +++ b/drivers/net/netdevsim/fib.c @@ -177,10 +177,10 @@ static int nsim_fib_event_nb(struct notifier_block *nb, unsigned long event, event == FIB_EVENT_RULE_ADD); break; + case FIB_EVENT_ENTRY_REPLACE: /* fall through */ case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_DEL: - err = nsim_fib_event(data, info, - event == FIB_EVENT_ENTRY_ADD); + err = nsim_fib_event(data, info, event != FIB_EVENT_ENTRY_DEL); break; } diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 5848219005d7..2e016271e126 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -324,6 +324,12 @@ config BROADCOM_PHY Currently supports the BCM5411, BCM5421, BCM5461, BCM54616S, BCM5464, BCM5481, BCM54810 and BCM5482 PHYs. +config BCM84881_PHY + bool "Broadcom BCM84881 PHY" + depends on PHYLIB=y + ---help--- + Support the Broadcom BCM84881 PHY. + config CICADA_PHY tristate "Cicada PHYs" ---help--- diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index b433ec3bf9a6..d846b4dc1c68 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -62,6 +62,7 @@ obj-$(CONFIG_BCM87XX_PHY) += bcm87xx.o obj-$(CONFIG_BCM_CYGNUS_PHY) += bcm-cygnus.o obj-$(CONFIG_BCM_NET_PHYLIB) += bcm-phy-lib.o obj-$(CONFIG_BROADCOM_PHY) += broadcom.o +obj-$(CONFIG_BCM84881_PHY) += bcm84881.o obj-$(CONFIG_CICADA_PHY) += cicada.o obj-$(CONFIG_CORTINA_PHY) += cortina.o obj-$(CONFIG_DAVICOM_PHY) += davicom.o diff --git a/drivers/net/phy/bcm84881.c b/drivers/net/phy/bcm84881.c new file mode 100644 index 000000000000..db59911b9b3c --- /dev/null +++ b/drivers/net/phy/bcm84881.c @@ -0,0 +1,269 @@ +// SPDX-License-Identifier: GPL-2.0 +// Broadcom BCM84881 NBASE-T PHY driver, as found on a SFP+ module. +// Copyright (C) 2019 Russell King, Deep Blue Solutions Ltd. +// +// Like the Marvell 88x3310, the Broadcom 84881 changes its host-side +// interface according to the operating speed between 10GBASE-R, +// 2500BASE-X and SGMII (but unlike the 88x3310, without the control +// word). +// +// This driver only supports those aspects of the PHY that I'm able to +// observe and test with the SFP+ module, which is an incomplete subset +// of what this PHY is able to support. For example, I only assume it +// supports a single lane Serdes connection, but it may be that the PHY +// is able to support more than that. +#include <linux/delay.h> +#include <linux/module.h> +#include <linux/phy.h> + +enum { + MDIO_AN_C22 = 0xffe0, +}; + +static int bcm84881_wait_init(struct phy_device *phydev) +{ + unsigned int tries = 20; + int ret, val; + + do { + val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_CTRL1); + if (val < 0) { + ret = val; + break; + } + if (!(val & MDIO_CTRL1_RESET)) { + ret = 0; + break; + } + if (!--tries) { + ret = -ETIMEDOUT; + break; + } + msleep(100); + } while (1); + + if (ret) + phydev_err(phydev, "%s failed: %d\n", __func__, ret); + + return ret; +} + +static int bcm84881_config_init(struct phy_device *phydev) +{ + switch (phydev->interface) { + case PHY_INTERFACE_MODE_SGMII: + case PHY_INTERFACE_MODE_2500BASEX: + case PHY_INTERFACE_MODE_10GKR: + break; + default: + return -ENODEV; + } + return 0; +} + +static int bcm84881_probe(struct phy_device *phydev) +{ + /* This driver requires PMAPMD and AN blocks */ + const u32 mmd_mask = MDIO_DEVS_PMAPMD | MDIO_DEVS_AN; + + if (!phydev->is_c45 || + (phydev->c45_ids.devices_in_package & mmd_mask) != mmd_mask) + return -ENODEV; + + return 0; +} + +static int bcm84881_get_features(struct phy_device *phydev) +{ + int ret; + + ret = genphy_c45_pma_read_abilities(phydev); + if (ret) + return ret; + + /* Although the PHY sets bit 1.11.8, it does not support 10M modes */ + linkmode_clear_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, + phydev->supported); + linkmode_clear_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, + phydev->supported); + + return 0; +} + +static int bcm84881_config_aneg(struct phy_device *phydev) +{ + bool changed = false; + u32 adv; + int ret; + + /* Wait for the PHY to finish initialising, otherwise our + * advertisement may be overwritten. + */ + ret = bcm84881_wait_init(phydev); + if (ret) + return ret; + + /* We don't support manual MDI control */ + phydev->mdix_ctrl = ETH_TP_MDI_AUTO; + + /* disabled autoneg doesn't seem to work with this PHY */ + if (phydev->autoneg == AUTONEG_DISABLE) + return -EINVAL; + + ret = genphy_c45_an_config_aneg(phydev); + if (ret < 0) + return ret; + if (ret > 0) + changed = true; + + adv = linkmode_adv_to_mii_ctrl1000_t(phydev->advertising); + ret = phy_modify_mmd_changed(phydev, MDIO_MMD_AN, + MDIO_AN_C22 + MII_CTRL1000, + ADVERTISE_1000FULL | ADVERTISE_1000HALF, + adv); + if (ret < 0) + return ret; + if (ret > 0) + changed = true; + + return genphy_c45_check_and_restart_aneg(phydev, changed); +} + +static int bcm84881_aneg_done(struct phy_device *phydev) +{ + int bmsr, val; + + val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1); + if (val < 0) + return val; + + bmsr = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_C22 + MII_BMSR); + if (bmsr < 0) + return val; + + return !!(val & MDIO_AN_STAT1_COMPLETE) && + !!(bmsr & BMSR_ANEGCOMPLETE); +} + +static int bcm84881_read_status(struct phy_device *phydev) +{ + unsigned int mode; + int bmsr, val; + + val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_CTRL1); + if (val < 0) + return val; + + if (val & MDIO_AN_CTRL1_RESTART) { + phydev->link = 0; + return 0; + } + + val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1); + if (val < 0) + return val; + + bmsr = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_C22 + MII_BMSR); + if (bmsr < 0) + return val; + + phydev->autoneg_complete = !!(val & MDIO_AN_STAT1_COMPLETE) && + !!(bmsr & BMSR_ANEGCOMPLETE); + phydev->link = !!(val & MDIO_STAT1_LSTATUS) && + !!(bmsr & BMSR_LSTATUS); + if (phydev->autoneg == AUTONEG_ENABLE && !phydev->autoneg_complete) + phydev->link = false; + + if (!phydev->link) + return 0; + + linkmode_zero(phydev->lp_advertising); + phydev->speed = SPEED_UNKNOWN; + phydev->duplex = DUPLEX_UNKNOWN; + phydev->pause = 0; + phydev->asym_pause = 0; + phydev->mdix = 0; + + if (phydev->autoneg_complete) { + val = genphy_c45_read_lpa(phydev); + if (val < 0) + return val; + + val = phy_read_mmd(phydev, MDIO_MMD_AN, + MDIO_AN_C22 + MII_STAT1000); + if (val < 0) + return val; + + mii_stat1000_mod_linkmode_lpa_t(phydev->lp_advertising, val); + + if (phydev->autoneg == AUTONEG_ENABLE) + phy_resolve_aneg_linkmode(phydev); + } + + if (phydev->autoneg == AUTONEG_DISABLE) { + /* disabled autoneg doesn't seem to work, so force the link + * down. + */ + phydev->link = 0; + return 0; + } + + /* Set the host link mode - we set the phy interface mode and + * the speed according to this register so that downshift works. + * We leave the duplex setting as per the resolution from the + * above. + */ + val = phy_read_mmd(phydev, MDIO_MMD_VEND1, 0x4011); + mode = (val & 0x1e) >> 1; + if (mode == 1 || mode == 2) + phydev->interface = PHY_INTERFACE_MODE_SGMII; + else if (mode == 3) + phydev->interface = PHY_INTERFACE_MODE_10GKR; + else if (mode == 4) + phydev->interface = PHY_INTERFACE_MODE_2500BASEX; + switch (mode & 7) { + case 1: + phydev->speed = SPEED_100; + break; + case 2: + phydev->speed = SPEED_1000; + break; + case 3: + phydev->speed = SPEED_10000; + break; + case 4: + phydev->speed = SPEED_2500; + break; + case 5: + phydev->speed = SPEED_5000; + break; + } + + return genphy_c45_read_mdix(phydev); +} + +static struct phy_driver bcm84881_drivers[] = { + { + .phy_id = 0xae025150, + .phy_id_mask = 0xfffffff0, + .name = "Broadcom BCM84881", + .config_init = bcm84881_config_init, + .probe = bcm84881_probe, + .get_features = bcm84881_get_features, + .config_aneg = bcm84881_config_aneg, + .aneg_done = bcm84881_aneg_done, + .read_status = bcm84881_read_status, + }, +}; + +module_phy_driver(bcm84881_drivers); + +/* FIXME: module auto-loading for Clause 45 PHYs seems non-functional */ +static struct mdio_device_id __maybe_unused bcm84881_tbl[] = { + { 0xae025150, 0xfffffff0 }, + { }, +}; +MODULE_AUTHOR("Russell King"); +MODULE_DESCRIPTION("Broadcom BCM84881 PHY driver"); +MODULE_DEVICE_TABLE(mdio, bcm84881_tbl); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index 9cd9dcee4eb2..adda0d0eab80 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -93,9 +93,11 @@ #define DP83867_STRAP_STS2_CLK_SKEW_NONE BIT(2) /* PHY CTRL bits */ -#define DP83867_PHYCR_FIFO_DEPTH_SHIFT 14 +#define DP83867_PHYCR_TX_FIFO_DEPTH_SHIFT 14 +#define DP83867_PHYCR_RX_FIFO_DEPTH_SHIFT 12 #define DP83867_PHYCR_FIFO_DEPTH_MAX 0x03 -#define DP83867_PHYCR_FIFO_DEPTH_MASK GENMASK(15, 14) +#define DP83867_PHYCR_TX_FIFO_DEPTH_MASK GENMASK(15, 14) +#define DP83867_PHYCR_RX_FIFO_DEPTH_MASK GENMASK(13, 12) #define DP83867_PHYCR_RESERVED_MASK BIT(11) /* RGMIIDCTL bits */ @@ -131,7 +133,8 @@ enum { struct dp83867_private { u32 rx_id_delay; u32 tx_id_delay; - u32 fifo_depth; + u32 tx_fifo_depth; + u32 rx_fifo_depth; int io_impedance; int port_mirroring; bool rxctrl_strap_quirk; @@ -408,18 +411,32 @@ static int dp83867_of_init(struct phy_device *phydev) dp83867->port_mirroring = DP83867_PORT_MIRROING_DIS; ret = of_property_read_u32(of_node, "ti,fifo-depth", - &dp83867->fifo_depth); + &dp83867->tx_fifo_depth); if (ret) { - phydev_err(phydev, - "ti,fifo-depth property is required\n"); - return ret; + ret = of_property_read_u32(of_node, "tx-fifo-depth", + &dp83867->tx_fifo_depth); + if (ret) + dp83867->tx_fifo_depth = + DP83867_PHYCR_FIFO_DEPTH_4_B_NIB; } - if (dp83867->fifo_depth > DP83867_PHYCR_FIFO_DEPTH_MAX) { - phydev_err(phydev, - "ti,fifo-depth value %u out of range\n", - dp83867->fifo_depth); + + if (dp83867->tx_fifo_depth > DP83867_PHYCR_FIFO_DEPTH_MAX) { + phydev_err(phydev, "tx-fifo-depth value %u out of range\n", + dp83867->tx_fifo_depth); + return -EINVAL; + } + + ret = of_property_read_u32(of_node, "rx-fifo-depth", + &dp83867->rx_fifo_depth); + if (ret) + dp83867->rx_fifo_depth = DP83867_PHYCR_FIFO_DEPTH_4_B_NIB; + + if (dp83867->rx_fifo_depth > DP83867_PHYCR_FIFO_DEPTH_MAX) { + phydev_err(phydev, "rx-fifo-depth value %u out of range\n", + dp83867->rx_fifo_depth); return -EINVAL; } + return 0; } #else @@ -458,12 +475,31 @@ static int dp83867_config_init(struct phy_device *phydev) phy_clear_bits_mmd(phydev, DP83867_DEVADDR, DP83867_CFG4, BIT(7)); + if (phy_interface_is_rgmii(phydev) || + phydev->interface == PHY_INTERFACE_MODE_SGMII) { + val = phy_read(phydev, MII_DP83867_PHYCTRL); + if (val < 0) + return val; + + val &= ~DP83867_PHYCR_TX_FIFO_DEPTH_MASK; + val |= (dp83867->tx_fifo_depth << + DP83867_PHYCR_TX_FIFO_DEPTH_SHIFT); + + if (phydev->interface == PHY_INTERFACE_MODE_SGMII) { + val &= ~DP83867_PHYCR_RX_FIFO_DEPTH_MASK; + val |= (dp83867->rx_fifo_depth << + DP83867_PHYCR_RX_FIFO_DEPTH_SHIFT); + } + + ret = phy_write(phydev, MII_DP83867_PHYCTRL, val); + if (ret) + return ret; + } + if (phy_interface_is_rgmii(phydev)) { val = phy_read(phydev, MII_DP83867_PHYCTRL); if (val < 0) return val; - val &= ~DP83867_PHYCR_FIFO_DEPTH_MASK; - val |= (dp83867->fifo_depth << DP83867_PHYCR_FIFO_DEPTH_SHIFT); /* The code below checks if "port mirroring" N/A MODE4 has been * enabled during power on bootstrap. diff --git a/drivers/net/phy/dp83869.c b/drivers/net/phy/dp83869.c index 93021904c5e4..7996a4aea8d2 100644 --- a/drivers/net/phy/dp83869.c +++ b/drivers/net/phy/dp83869.c @@ -334,7 +334,7 @@ static int dp83869_configure_mode(struct phy_device *phydev, break; default: return -EINVAL; - }; + } return ret; } diff --git a/drivers/net/phy/lxt.c b/drivers/net/phy/lxt.c index 356bd6472f49..fec58ad69e02 100644 --- a/drivers/net/phy/lxt.c +++ b/drivers/net/phy/lxt.c @@ -190,27 +190,11 @@ static int lxt973a2_read_status(struct phy_device *phydev) phydev->duplex = DUPLEX_FULL; } - if (phydev->duplex == DUPLEX_FULL) { - phydev->pause = lpa & LPA_PAUSE_CAP ? 1 : 0; - phydev->asym_pause = lpa & LPA_PAUSE_ASYM ? 1 : 0; - } + phy_resolve_aneg_pause(phydev); } else { - int bmcr = phy_read(phydev, MII_BMCR); - - if (bmcr < 0) - return bmcr; - - if (bmcr & BMCR_FULLDPLX) - phydev->duplex = DUPLEX_FULL; - else - phydev->duplex = DUPLEX_HALF; - - if (bmcr & BMCR_SPEED1000) - phydev->speed = SPEED_1000; - else if (bmcr & BMCR_SPEED100) - phydev->speed = SPEED_100; - else - phydev->speed = SPEED_10; + err = genphy_read_status_fixed(phydev); + if (err < 0) + return err; phydev->pause = phydev->asym_pause = 0; linkmode_zero(phydev->lp_advertising); diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index b1fbd1937328..28e33ece4ce1 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -162,19 +162,9 @@ #define MII_88E1510_GEN_CTRL_REG_1_MODE_SGMII 0x1 /* SGMII to copper */ #define MII_88E1510_GEN_CTRL_REG_1_RESET 0x8000 /* Soft reset */ -#define LPA_FIBER_1000HALF 0x40 -#define LPA_FIBER_1000FULL 0x20 - #define LPA_PAUSE_FIBER 0x180 #define LPA_PAUSE_ASYM_FIBER 0x100 -#define ADVERTISE_FIBER_1000HALF 0x40 -#define ADVERTISE_FIBER_1000FULL 0x20 - -#define ADVERTISE_PAUSE_FIBER 0x180 -#define ADVERTISE_PAUSE_ASYM_FIBER 0x100 - -#define REGISTER_LINK_STATUS 0x400 #define NB_FIBER_STATS 1 MODULE_DESCRIPTION("Marvell PHY driver"); @@ -497,16 +487,15 @@ static inline u32 linkmode_adv_to_fiber_adv_t(unsigned long *advertise) u32 result = 0; if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, advertise)) - result |= ADVERTISE_FIBER_1000HALF; + result |= ADVERTISE_1000XHALF; if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, advertise)) - result |= ADVERTISE_FIBER_1000FULL; + result |= ADVERTISE_1000XFULL; if (linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, advertise) && linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, advertise)) - result |= LPA_PAUSE_ASYM_FIBER; + result |= ADVERTISE_1000XPSE_ASYM; else if (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, advertise)) - result |= (ADVERTISE_PAUSE_FIBER - & (~ADVERTISE_PAUSE_ASYM_FIBER)); + result |= ADVERTISE_1000XPAUSE; return result; } @@ -524,7 +513,7 @@ static int marvell_config_aneg_fiber(struct phy_device *phydev) { int changed = 0; int err; - int adv, oldadv; + u16 adv; if (phydev->autoneg != AUTONEG_ENABLE) return genphy_setup_forced(phydev); @@ -533,44 +522,19 @@ static int marvell_config_aneg_fiber(struct phy_device *phydev) linkmode_and(phydev->advertising, phydev->advertising, phydev->supported); - /* Setup fiber advertisement */ - adv = phy_read(phydev, MII_ADVERTISE); - if (adv < 0) - return adv; - - oldadv = adv; - adv &= ~(ADVERTISE_FIBER_1000HALF | ADVERTISE_FIBER_1000FULL - | LPA_PAUSE_FIBER); - adv |= linkmode_adv_to_fiber_adv_t(phydev->advertising); - - if (adv != oldadv) { - err = phy_write(phydev, MII_ADVERTISE, adv); - if (err < 0) - return err; + adv = linkmode_adv_to_fiber_adv_t(phydev->advertising); + /* Setup fiber advertisement */ + err = phy_modify_changed(phydev, MII_ADVERTISE, + ADVERTISE_1000XHALF | ADVERTISE_1000XFULL | + ADVERTISE_1000XPAUSE | ADVERTISE_1000XPSE_ASYM, + adv); + if (err < 0) + return err; + if (err > 0) changed = 1; - } - - if (changed == 0) { - /* Advertisement hasn't changed, but maybe aneg was never on to - * begin with? Or maybe phy was isolated? - */ - int ctl = phy_read(phydev, MII_BMCR); - - if (ctl < 0) - return ctl; - - if (!(ctl & BMCR_ANENABLE) || (ctl & BMCR_ISOLATE)) - changed = 1; /* do restart aneg */ - } - /* Only restart aneg if we are advertising something different - * than we were before. - */ - if (changed > 0) - changed = genphy_restart_aneg(phydev); - - return changed; + return genphy_check_and_restart_aneg(phydev, changed); } static int m88e1510_config_aneg(struct phy_device *phydev) @@ -1302,93 +1266,29 @@ static int m88e6390_config_aneg(struct phy_device *phydev) static void fiber_lpa_mod_linkmode_lpa_t(unsigned long *advertising, u32 lpa) { linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, - advertising, lpa & LPA_FIBER_1000HALF); + advertising, lpa & LPA_1000XHALF); linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, - advertising, lpa & LPA_FIBER_1000FULL); -} - -/** - * marvell_update_link - update link status in real time in @phydev - * @phydev: target phy_device struct - * - * Description: Update the value in phydev->link to reflect the - * current link value. - */ -static int marvell_update_link(struct phy_device *phydev, int fiber) -{ - int status; - - /* Use the generic register for copper link, or specific - * register for fiber case - */ - if (fiber) { - status = phy_read(phydev, MII_M1011_PHY_STATUS); - if (status < 0) - return status; - - if ((status & REGISTER_LINK_STATUS) == 0) - phydev->link = 0; - else - phydev->link = 1; - } else { - return genphy_update_link(phydev); - } - - return 0; + advertising, lpa & LPA_1000XFULL); } static int marvell_read_status_page_an(struct phy_device *phydev, - int fiber) + int fiber, int status) { - int status; int lpa; - int lpagb; - - status = phy_read(phydev, MII_M1011_PHY_STATUS); - if (status < 0) - return status; - - lpa = phy_read(phydev, MII_LPA); - if (lpa < 0) - return lpa; - - lpagb = phy_read(phydev, MII_STAT1000); - if (lpagb < 0) - return lpagb; - - if (status & MII_M1011_PHY_STATUS_FULLDUPLEX) - phydev->duplex = DUPLEX_FULL; - else - phydev->duplex = DUPLEX_HALF; - - status = status & MII_M1011_PHY_STATUS_SPD_MASK; - phydev->pause = 0; - phydev->asym_pause = 0; - - switch (status) { - case MII_M1011_PHY_STATUS_1000: - phydev->speed = SPEED_1000; - break; - - case MII_M1011_PHY_STATUS_100: - phydev->speed = SPEED_100; - break; - - default: - phydev->speed = SPEED_10; - break; - } + int err; if (!fiber) { - mii_lpa_to_linkmode_lpa_t(phydev->lp_advertising, lpa); - mii_stat1000_mod_linkmode_lpa_t(phydev->lp_advertising, lpagb); + err = genphy_read_lpa(phydev); + if (err < 0) + return err; - if (phydev->duplex == DUPLEX_FULL) { - phydev->pause = lpa & LPA_PAUSE_CAP ? 1 : 0; - phydev->asym_pause = lpa & LPA_PAUSE_ASYM ? 1 : 0; - } + phy_resolve_aneg_pause(phydev); } else { + lpa = phy_read(phydev, MII_LPA); + if (lpa < 0) + return lpa; + /* The fiber link is only 1000M capable */ fiber_lpa_mod_linkmode_lpa_t(phydev->lp_advertising, lpa); @@ -1405,31 +1305,25 @@ static int marvell_read_status_page_an(struct phy_device *phydev, } } } - return 0; -} -static int marvell_read_status_page_fixed(struct phy_device *phydev) -{ - int bmcr = phy_read(phydev, MII_BMCR); - - if (bmcr < 0) - return bmcr; - - if (bmcr & BMCR_FULLDPLX) + if (status & MII_M1011_PHY_STATUS_FULLDUPLEX) phydev->duplex = DUPLEX_FULL; else phydev->duplex = DUPLEX_HALF; - if (bmcr & BMCR_SPEED1000) + switch (status & MII_M1011_PHY_STATUS_SPD_MASK) { + case MII_M1011_PHY_STATUS_1000: phydev->speed = SPEED_1000; - else if (bmcr & BMCR_SPEED100) + break; + + case MII_M1011_PHY_STATUS_100: phydev->speed = SPEED_100; - else - phydev->speed = SPEED_10; + break; - phydev->pause = 0; - phydev->asym_pause = 0; - linkmode_zero(phydev->lp_advertising); + default: + phydev->speed = SPEED_10; + break; + } return 0; } @@ -1444,25 +1338,38 @@ static int marvell_read_status_page_fixed(struct phy_device *phydev) */ static int marvell_read_status_page(struct phy_device *phydev, int page) { + int status; int fiber; int err; - /* Detect and update the link, but return if there - * was an error + status = phy_read(phydev, MII_M1011_PHY_STATUS); + if (status < 0) + return status; + + /* Use the generic register for copper link status, + * and the PHY status register for fiber link status. */ + if (page == MII_MARVELL_FIBER_PAGE) { + phydev->link = !!(status & MII_M1011_PHY_STATUS_LINK); + } else { + err = genphy_update_link(phydev); + if (err) + return err; + } + if (page == MII_MARVELL_FIBER_PAGE) fiber = 1; else fiber = 0; - err = marvell_update_link(phydev, fiber); - if (err) - return err; + linkmode_zero(phydev->lp_advertising); + phydev->pause = 0; + phydev->asym_pause = 0; if (phydev->autoneg == AUTONEG_ENABLE) - err = marvell_read_status_page_an(phydev, fiber); + err = marvell_read_status_page_an(phydev, fiber, status); else - err = marvell_read_status_page_fixed(phydev); + err = genphy_read_status_fixed(phydev); return err; } diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index 1bf13017d288..512f27b0b5cd 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -214,7 +214,7 @@ static int mv3310_sfp_insert(void *upstream, const struct sfp_eeprom_id *id) phy_interface_t iface; sfp_parse_support(phydev->sfp_bus, id, support); - iface = sfp_select_interface(phydev->sfp_bus, id, support); + iface = sfp_select_interface(phydev->sfp_bus, support); if (iface != PHY_INTERFACE_MODE_10GKR) { dev_err(&phydev->mdio.dev, "incompatible SFP module inserted\n"); diff --git a/drivers/net/phy/mdio-i2c.c b/drivers/net/phy/mdio-i2c.c index 0dce67672548..0746e2cc39ae 100644 --- a/drivers/net/phy/mdio-i2c.c +++ b/drivers/net/phy/mdio-i2c.c @@ -33,17 +33,24 @@ static int i2c_mii_read(struct mii_bus *bus, int phy_id, int reg) { struct i2c_adapter *i2c = bus->priv; struct i2c_msg msgs[2]; - u8 data[2], dev_addr = reg; + u8 addr[3], data[2], *p; int bus_addr, ret; if (!i2c_mii_valid_phy_id(phy_id)) return 0xffff; + p = addr; + if (reg & MII_ADDR_C45) { + *p++ = 0x20 | ((reg >> 16) & 31); + *p++ = reg >> 8; + } + *p++ = reg; + bus_addr = i2c_mii_phy_addr(phy_id); msgs[0].addr = bus_addr; msgs[0].flags = 0; - msgs[0].len = 1; - msgs[0].buf = &dev_addr; + msgs[0].len = p - addr; + msgs[0].buf = addr; msgs[1].addr = bus_addr; msgs[1].flags = I2C_M_RD; msgs[1].len = sizeof(data); @@ -61,18 +68,23 @@ static int i2c_mii_write(struct mii_bus *bus, int phy_id, int reg, u16 val) struct i2c_adapter *i2c = bus->priv; struct i2c_msg msg; int ret; - u8 data[3]; + u8 data[5], *p; if (!i2c_mii_valid_phy_id(phy_id)) return 0; - data[0] = reg; - data[1] = val >> 8; - data[2] = val; + p = data; + if (reg & MII_ADDR_C45) { + *p++ = (reg >> 16) & 31; + *p++ = reg >> 8; + } + *p++ = reg; + *p++ = val >> 8; + *p++ = val; msg.addr = i2c_mii_phy_addr(phy_id); msg.flags = 0; - msg.len = 3; + msg.len = p - data; msg.buf = data; ret = i2c_transfer(i2c, &msg, 1); diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c index d5f8f351d9ef..50214c081164 100644 --- a/drivers/net/phy/mscc.c +++ b/drivers/net/phy/mscc.c @@ -2404,7 +2404,6 @@ static struct phy_driver vsc85xx_driver[] = { .soft_reset = &genphy_soft_reset, .config_init = &vsc85xx_config_init, .config_aneg = &vsc85xx_config_aneg, - .aneg_done = &genphy_aneg_done, .read_status = &vsc85xx_read_status, .ack_interrupt = &vsc85xx_ack_interrupt, .config_intr = &vsc85xx_config_intr, @@ -2429,7 +2428,6 @@ static struct phy_driver vsc85xx_driver[] = { .soft_reset = &genphy_soft_reset, .config_init = &vsc85xx_config_init, .config_aneg = &vsc85xx_config_aneg, - .aneg_done = &genphy_aneg_done, .read_status = &vsc85xx_read_status, .ack_interrupt = &vsc85xx_ack_interrupt, .config_intr = &vsc85xx_config_intr, @@ -2454,7 +2452,6 @@ static struct phy_driver vsc85xx_driver[] = { .soft_reset = &genphy_soft_reset, .config_init = &vsc85xx_config_init, .config_aneg = &vsc85xx_config_aneg, - .aneg_done = &genphy_aneg_done, .read_status = &vsc85xx_read_status, .ack_interrupt = &vsc85xx_ack_interrupt, .config_intr = &vsc85xx_config_intr, @@ -2479,7 +2476,6 @@ static struct phy_driver vsc85xx_driver[] = { .soft_reset = &genphy_soft_reset, .config_init = &vsc85xx_config_init, .config_aneg = &vsc85xx_config_aneg, - .aneg_done = &genphy_aneg_done, .read_status = &vsc85xx_read_status, .ack_interrupt = &vsc85xx_ack_interrupt, .config_intr = &vsc85xx_config_intr, @@ -2504,7 +2500,6 @@ static struct phy_driver vsc85xx_driver[] = { .soft_reset = &genphy_soft_reset, .config_init = &vsc8584_config_init, .config_aneg = &vsc85xx_config_aneg, - .aneg_done = &genphy_aneg_done, .read_status = &vsc85xx_read_status, .ack_interrupt = &vsc85xx_ack_interrupt, .config_intr = &vsc85xx_config_intr, @@ -2530,7 +2525,6 @@ static struct phy_driver vsc85xx_driver[] = { .soft_reset = &genphy_soft_reset, .config_init = &vsc8584_config_init, .config_aneg = &vsc85xx_config_aneg, - .aneg_done = &genphy_aneg_done, .read_status = &vsc85xx_read_status, .ack_interrupt = &vsc85xx_ack_interrupt, .config_intr = &vsc85xx_config_intr, diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 0887ed2bb050..ec19efe7ea99 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1771,6 +1771,36 @@ int genphy_restart_aneg(struct phy_device *phydev) EXPORT_SYMBOL(genphy_restart_aneg); /** + * genphy_check_and_restart_aneg - Enable and restart auto-negotiation + * @phydev: target phy_device struct + * @restart: whether aneg restart is requested + * + * Check, and restart auto-negotiation if needed. + */ +int genphy_check_and_restart_aneg(struct phy_device *phydev, bool restart) +{ + int ret = 0; + + if (!restart) { + /* Advertisement hasn't changed, but maybe aneg was never on to + * begin with? Or maybe phy was isolated? + */ + ret = phy_read(phydev, MII_BMCR); + if (ret < 0) + return ret; + + if (!(ret & BMCR_ANENABLE) || (ret & BMCR_ISOLATE)) + restart = true; + } + + if (restart) + ret = genphy_restart_aneg(phydev); + + return ret; +} +EXPORT_SYMBOL(genphy_check_and_restart_aneg); + +/** * __genphy_config_aneg - restart auto-negotiation or write BMCR * @phydev: target phy_device struct * @changed: whether autoneg is requested @@ -1795,23 +1825,7 @@ int __genphy_config_aneg(struct phy_device *phydev, bool changed) else if (err) changed = true; - if (!changed) { - /* Advertisement hasn't changed, but maybe aneg was never on to - * begin with? Or maybe phy was isolated? - */ - int ctl = phy_read(phydev, MII_BMCR); - - if (ctl < 0) - return ctl; - - if (!(ctl & BMCR_ANENABLE) || (ctl & BMCR_ISOLATE)) - changed = true; /* do restart aneg */ - } - - /* Only restart aneg if we are advertising something different - * than we were before. - */ - return changed ? genphy_restart_aneg(phydev) : 0; + return genphy_check_and_restart_aneg(phydev, changed); } EXPORT_SYMBOL(__genphy_config_aneg); @@ -1979,6 +1993,36 @@ int genphy_read_lpa(struct phy_device *phydev) EXPORT_SYMBOL(genphy_read_lpa); /** + * genphy_read_status_fixed - read the link parameters for !aneg mode + * @phydev: target phy_device struct + * + * Read the current duplex and speed state for a PHY operating with + * autonegotiation disabled. + */ +int genphy_read_status_fixed(struct phy_device *phydev) +{ + int bmcr = phy_read(phydev, MII_BMCR); + + if (bmcr < 0) + return bmcr; + + if (bmcr & BMCR_FULLDPLX) + phydev->duplex = DUPLEX_FULL; + else + phydev->duplex = DUPLEX_HALF; + + if (bmcr & BMCR_SPEED1000) + phydev->speed = SPEED_1000; + else if (bmcr & BMCR_SPEED100) + phydev->speed = SPEED_100; + else + phydev->speed = SPEED_10; + + return 0; +} +EXPORT_SYMBOL(genphy_read_status_fixed); + +/** * genphy_read_status - check the link status and update current link state * @phydev: target phy_device struct * @@ -2012,22 +2056,9 @@ int genphy_read_status(struct phy_device *phydev) if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) { phy_resolve_aneg_linkmode(phydev); } else if (phydev->autoneg == AUTONEG_DISABLE) { - int bmcr = phy_read(phydev, MII_BMCR); - - if (bmcr < 0) - return bmcr; - - if (bmcr & BMCR_FULLDPLX) - phydev->duplex = DUPLEX_FULL; - else - phydev->duplex = DUPLEX_HALF; - - if (bmcr & BMCR_SPEED1000) - phydev->speed = SPEED_1000; - else if (bmcr & BMCR_SPEED100) - phydev->speed = SPEED_100; - else - phydev->speed = SPEED_10; + err = genphy_read_status_fixed(phydev); + if (err < 0) + return err; } return 0; @@ -2575,7 +2606,6 @@ static struct phy_driver genphy_driver = { .name = "Generic PHY", .soft_reset = genphy_no_soft_reset, .get_features = genphy_read_abilities, - .aneg_done = genphy_aneg_done, .suspend = genphy_suspend, .resume = genphy_resume, .set_loopback = genphy_loopback, diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 9a616d6bc4eb..a917f95372cd 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -48,7 +48,8 @@ struct phylink { unsigned long phylink_disable_state; /* bitmask of disables */ struct phy_device *phydev; phy_interface_t link_interface; /* PHY_INTERFACE_xxx */ - u8 link_an_mode; /* MLO_AN_xxx */ + u8 cfg_link_an_mode; /* MLO_AN_xxx */ + u8 cur_link_an_mode; u8 link_port; /* The current non-phy ethtool port */ __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); @@ -71,6 +72,9 @@ struct phylink { bool mac_link_dropped; struct sfp_bus *sfp_bus; + bool sfp_may_have_phy; + __ETHTOOL_DECLARE_LINK_MODE_MASK(sfp_support); + u8 sfp_port; }; #define phylink_printk(level, pl, fmt, ...) \ @@ -256,12 +260,12 @@ static int phylink_parse_mode(struct phylink *pl, struct fwnode_handle *fwnode) dn = fwnode_get_named_child_node(fwnode, "fixed-link"); if (dn || fwnode_property_present(fwnode, "fixed-link")) - pl->link_an_mode = MLO_AN_FIXED; + pl->cfg_link_an_mode = MLO_AN_FIXED; fwnode_handle_put(dn); if (fwnode_property_read_string(fwnode, "managed", &managed) == 0 && strcmp(managed, "in-band-status") == 0) { - if (pl->link_an_mode == MLO_AN_FIXED) { + if (pl->cfg_link_an_mode == MLO_AN_FIXED) { phylink_err(pl, "can't use both fixed-link and in-band-status\n"); return -EINVAL; @@ -273,7 +277,7 @@ static int phylink_parse_mode(struct phylink *pl, struct fwnode_handle *fwnode) phylink_set(pl->supported, Asym_Pause); phylink_set(pl->supported, Pause); pl->link_config.an_enabled = true; - pl->link_an_mode = MLO_AN_INBAND; + pl->cfg_link_an_mode = MLO_AN_INBAND; switch (pl->link_config.interface) { case PHY_INTERFACE_MODE_SGMII: @@ -333,14 +337,14 @@ static void phylink_mac_config(struct phylink *pl, { phylink_dbg(pl, "%s: mode=%s/%s/%s/%s adv=%*pb pause=%02x link=%u an=%u\n", - __func__, phylink_an_mode_str(pl->link_an_mode), + __func__, phylink_an_mode_str(pl->cur_link_an_mode), phy_modes(state->interface), phy_speed_to_str(state->speed), phy_duplex_to_str(state->duplex), __ETHTOOL_LINK_MODE_MASK_NBITS, state->advertising, state->pause, state->link, state->an_enabled); - pl->ops->mac_config(pl->config, pl->link_an_mode, state); + pl->ops->mac_config(pl->config, pl->cur_link_an_mode, state); } static void phylink_mac_config_up(struct phylink *pl, @@ -441,7 +445,7 @@ static void phylink_mac_link_up(struct phylink *pl, struct net_device *ndev = pl->netdev; pl->cur_interface = link_state.interface; - pl->ops->mac_link_up(pl->config, pl->link_an_mode, + pl->ops->mac_link_up(pl->config, pl->cur_link_an_mode, pl->phy_state.interface, pl->phydev); @@ -461,7 +465,7 @@ static void phylink_mac_link_down(struct phylink *pl) if (ndev) netif_carrier_off(ndev); - pl->ops->mac_link_down(pl->config, pl->link_an_mode, + pl->ops->mac_link_down(pl->config, pl->cur_link_an_mode, pl->cur_interface); phylink_info(pl, "Link is Down\n"); } @@ -480,7 +484,7 @@ static void phylink_resolve(struct work_struct *w) } else if (pl->mac_link_dropped) { link_state.link = false; } else { - switch (pl->link_an_mode) { + switch (pl->cur_link_an_mode) { case MLO_AN_PHY: link_state = pl->phy_state; phylink_resolve_flow(pl, &link_state); @@ -648,7 +652,7 @@ struct phylink *phylink_create(struct phylink_config *config, return ERR_PTR(ret); } - if (pl->link_an_mode == MLO_AN_FIXED) { + if (pl->cfg_link_an_mode == MLO_AN_FIXED) { ret = phylink_parse_fixedlink(pl, fwnode); if (ret < 0) { kfree(pl); @@ -656,6 +660,8 @@ struct phylink *phylink_create(struct phylink_config *config, } } + pl->cur_link_an_mode = pl->cfg_link_an_mode; + ret = phylink_register_sfp(pl, fwnode); if (ret < 0) { kfree(pl); @@ -711,7 +717,8 @@ static void phylink_phy_change(struct phy_device *phydev, bool up, phy_duplex_to_str(phydev->duplex)); } -static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy) +static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy, + phy_interface_t interface) { struct phylink_link_state config; __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); @@ -729,7 +736,19 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy) memset(&config, 0, sizeof(config)); linkmode_copy(supported, phy->supported); linkmode_copy(config.advertising, phy->advertising); - config.interface = pl->link_config.interface; + + /* Clause 45 PHYs switch their Serdes lane between several different + * modes, normally 10GBASE-R, SGMII. Some use 2500BASE-X for 2.5G + * speeds. We really need to know which interface modes the PHY and + * MAC supports to properly work out which linkmodes can be supported. + */ + if (phy->is_c45 && + interface != PHY_INTERFACE_MODE_RXAUI && + interface != PHY_INTERFACE_MODE_XAUI && + interface != PHY_INTERFACE_MODE_USXGMII) + config.interface = PHY_INTERFACE_MODE_NA; + else + config.interface = interface; ret = phylink_validate(pl, supported, &config); if (ret) @@ -745,6 +764,7 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy) mutex_lock(&phy->lock); mutex_lock(&pl->state_mutex); pl->phydev = phy; + pl->phy_state.interface = interface; linkmode_copy(pl->supported, supported); linkmode_copy(pl->link_config.advertising, config.advertising); @@ -764,28 +784,18 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy) return 0; } -static int __phylink_connect_phy(struct phylink *pl, struct phy_device *phy, - phy_interface_t interface) +static int phylink_attach_phy(struct phylink *pl, struct phy_device *phy, + phy_interface_t interface) { - int ret; - - if (WARN_ON(pl->link_an_mode == MLO_AN_FIXED || - (pl->link_an_mode == MLO_AN_INBAND && + if (WARN_ON(pl->cfg_link_an_mode == MLO_AN_FIXED || + (pl->cfg_link_an_mode == MLO_AN_INBAND && phy_interface_mode_is_8023z(interface)))) return -EINVAL; if (pl->phydev) return -EBUSY; - ret = phy_attach_direct(pl->netdev, phy, 0, interface); - if (ret) - return ret; - - ret = phylink_bringup_phy(pl, phy); - if (ret) - phy_detach(phy); - - return ret; + return phy_attach_direct(pl->netdev, phy, 0, interface); } /** @@ -805,13 +815,23 @@ static int __phylink_connect_phy(struct phylink *pl, struct phy_device *phy, */ int phylink_connect_phy(struct phylink *pl, struct phy_device *phy) { + int ret; + /* Use PHY device/driver interface */ if (pl->link_interface == PHY_INTERFACE_MODE_NA) { pl->link_interface = phy->interface; pl->link_config.interface = pl->link_interface; } - return __phylink_connect_phy(pl, phy, pl->link_interface); + ret = phylink_attach_phy(pl, phy, pl->link_interface); + if (ret < 0) + return ret; + + ret = phylink_bringup_phy(pl, phy, pl->link_config.interface); + if (ret) + phy_detach(phy); + + return ret; } EXPORT_SYMBOL_GPL(phylink_connect_phy); @@ -835,8 +855,8 @@ int phylink_of_phy_connect(struct phylink *pl, struct device_node *dn, int ret; /* Fixed links and 802.3z are handled without needing a PHY */ - if (pl->link_an_mode == MLO_AN_FIXED || - (pl->link_an_mode == MLO_AN_INBAND && + if (pl->cfg_link_an_mode == MLO_AN_FIXED || + (pl->cfg_link_an_mode == MLO_AN_INBAND && phy_interface_mode_is_8023z(pl->link_interface))) return 0; @@ -847,20 +867,23 @@ int phylink_of_phy_connect(struct phylink *pl, struct device_node *dn, phy_node = of_parse_phandle(dn, "phy-device", 0); if (!phy_node) { - if (pl->link_an_mode == MLO_AN_PHY) + if (pl->cfg_link_an_mode == MLO_AN_PHY) return -ENODEV; return 0; } - phy_dev = of_phy_attach(pl->netdev, phy_node, flags, - pl->link_interface); + phy_dev = of_phy_find_device(phy_node); /* We're done with the phy_node handle */ of_node_put(phy_node); - if (!phy_dev) return -ENODEV; - ret = phylink_bringup_phy(pl, phy_dev); + ret = phy_attach_direct(pl->netdev, phy_dev, flags, + pl->link_interface); + if (ret) + return ret; + + ret = phylink_bringup_phy(pl, phy_dev, pl->link_config.interface); if (ret) phy_detach(phy_dev); @@ -910,7 +933,7 @@ int phylink_fixed_state_cb(struct phylink *pl, /* It does not make sense to let the link be overriden unless we use * MLO_AN_FIXED */ - if (pl->link_an_mode != MLO_AN_FIXED) + if (pl->cfg_link_an_mode != MLO_AN_FIXED) return -EINVAL; mutex_lock(&pl->state_mutex); @@ -960,7 +983,7 @@ void phylink_start(struct phylink *pl) ASSERT_RTNL(); phylink_info(pl, "configuring for %s/%s link mode\n", - phylink_an_mode_str(pl->link_an_mode), + phylink_an_mode_str(pl->cur_link_an_mode), phy_modes(pl->link_config.interface)); /* Always set the carrier off */ @@ -983,7 +1006,7 @@ void phylink_start(struct phylink *pl) clear_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state); phylink_run_resolve(pl); - if (pl->link_an_mode == MLO_AN_FIXED && pl->link_gpio) { + if (pl->cfg_link_an_mode == MLO_AN_FIXED && pl->link_gpio) { int irq = gpiod_to_irq(pl->link_gpio); if (irq > 0) { @@ -998,7 +1021,7 @@ void phylink_start(struct phylink *pl) if (irq <= 0) mod_timer(&pl->link_poll, jiffies + HZ); } - if (pl->link_an_mode == MLO_AN_FIXED && pl->get_fixed_state) + if (pl->cfg_link_an_mode == MLO_AN_FIXED && pl->get_fixed_state) mod_timer(&pl->link_poll, jiffies + HZ); if (pl->phydev) phy_start(pl->phydev); @@ -1125,7 +1148,7 @@ int phylink_ethtool_ksettings_get(struct phylink *pl, linkmode_copy(kset->link_modes.supported, pl->supported); - switch (pl->link_an_mode) { + switch (pl->cur_link_an_mode) { case MLO_AN_FIXED: /* We are using fixed settings. Report these as the * current link settings - and note that these also @@ -1197,7 +1220,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl, /* If we have a fixed link (as specified by firmware), refuse * to change link parameters. */ - if (pl->link_an_mode == MLO_AN_FIXED && + if (pl->cur_link_an_mode == MLO_AN_FIXED && (s->speed != pl->link_config.speed || s->duplex != pl->link_config.duplex)) return -EINVAL; @@ -1209,7 +1232,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl, __clear_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, config.advertising); } else { /* If we have a fixed link, refuse to enable autonegotiation */ - if (pl->link_an_mode == MLO_AN_FIXED) + if (pl->cur_link_an_mode == MLO_AN_FIXED) return -EINVAL; config.speed = SPEED_UNKNOWN; @@ -1219,44 +1242,66 @@ int phylink_ethtool_ksettings_set(struct phylink *pl, __set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, config.advertising); } - if (phylink_validate(pl, support, &config)) - return -EINVAL; - - /* If autonegotiation is enabled, we must have an advertisement */ - if (config.an_enabled && phylink_is_empty_linkmode(config.advertising)) - return -EINVAL; - - our_kset = *kset; - linkmode_copy(our_kset.link_modes.advertising, config.advertising); - our_kset.base.speed = config.speed; - our_kset.base.duplex = config.duplex; - - /* If we have a PHY, configure the phy */ if (pl->phydev) { + /* If we have a PHY, we process the kset change via phylib. + * phylib will call our link state function if the PHY + * parameters have changed, which will trigger a resolve + * and update the MAC configuration. + */ + our_kset = *kset; + linkmode_copy(our_kset.link_modes.advertising, + config.advertising); + our_kset.base.speed = config.speed; + our_kset.base.duplex = config.duplex; + ret = phy_ethtool_ksettings_set(pl->phydev, &our_kset); if (ret) return ret; - } - mutex_lock(&pl->state_mutex); - /* Configure the MAC to match the new settings */ - linkmode_copy(pl->link_config.advertising, our_kset.link_modes.advertising); - pl->link_config.interface = config.interface; - pl->link_config.speed = our_kset.base.speed; - pl->link_config.duplex = our_kset.base.duplex; - pl->link_config.an_enabled = our_kset.base.autoneg != AUTONEG_DISABLE; + mutex_lock(&pl->state_mutex); + /* Save the new configuration */ + linkmode_copy(pl->link_config.advertising, + our_kset.link_modes.advertising); + pl->link_config.interface = config.interface; + pl->link_config.speed = our_kset.base.speed; + pl->link_config.duplex = our_kset.base.duplex; + pl->link_config.an_enabled = our_kset.base.autoneg != + AUTONEG_DISABLE; + mutex_unlock(&pl->state_mutex); + } else { + /* For a fixed link, this isn't able to change any parameters, + * which just leaves inband mode. + */ + if (phylink_validate(pl, support, &config)) + return -EINVAL; - /* If we have a PHY, phylib will call our link state function if the - * mode has changed, which will trigger a resolve and update the MAC - * configuration. For a fixed link, this isn't able to change any - * parameters, which just leaves inband mode. - */ - if (pl->link_an_mode == MLO_AN_INBAND && - !test_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state)) { - phylink_mac_config(pl, &pl->link_config); - phylink_mac_an_restart(pl); + /* If autonegotiation is enabled, we must have an advertisement */ + if (config.an_enabled && + phylink_is_empty_linkmode(config.advertising)) + return -EINVAL; + + mutex_lock(&pl->state_mutex); + linkmode_copy(pl->link_config.advertising, config.advertising); + pl->link_config.interface = config.interface; + pl->link_config.speed = config.speed; + pl->link_config.duplex = config.duplex; + pl->link_config.an_enabled = kset->base.autoneg != + AUTONEG_DISABLE; + + if (pl->cur_link_an_mode == MLO_AN_INBAND && + !test_bit(PHYLINK_DISABLE_STOPPED, + &pl->phylink_disable_state)) { + /* If in 802.3z mode, this updates the advertisement. + * + * If we are in SGMII mode without a PHY, there is no + * advertisement; the only thing we have is the pause + * modes which can only come from a PHY. + */ + phylink_mac_config(pl, &pl->link_config); + phylink_mac_an_restart(pl); + } + mutex_unlock(&pl->state_mutex); } - mutex_unlock(&pl->state_mutex); return 0; } @@ -1341,7 +1386,7 @@ int phylink_ethtool_set_pauseparam(struct phylink *pl, pause->tx_pause); } else if (!test_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state)) { - switch (pl->link_an_mode) { + switch (pl->cur_link_an_mode) { case MLO_AN_FIXED: /* Should we allow fixed links to change against the config? */ phylink_resolve_flow(pl, config); @@ -1548,7 +1593,7 @@ static int phylink_mii_read(struct phylink *pl, unsigned int phy_id, struct phylink_link_state state; int val = 0xffff; - switch (pl->link_an_mode) { + switch (pl->cur_link_an_mode) { case MLO_AN_FIXED: if (phy_id == 0) { phylink_get_fixed_state(pl, &state); @@ -1573,7 +1618,7 @@ static int phylink_mii_read(struct phylink *pl, unsigned int phy_id, static int phylink_mii_write(struct phylink *pl, unsigned int phy_id, unsigned int reg, unsigned int val) { - switch (pl->link_an_mode) { + switch (pl->cur_link_an_mode) { case MLO_AN_FIXED: break; @@ -1679,25 +1724,21 @@ static void phylink_sfp_detach(void *upstream, struct sfp_bus *bus) pl->netdev->sfp_bus = NULL; } -static int phylink_sfp_module_insert(void *upstream, - const struct sfp_eeprom_id *id) +static int phylink_sfp_config(struct phylink *pl, u8 mode, + const unsigned long *supported, + const unsigned long *advertising) { - struct phylink *pl = upstream; - __ETHTOOL_DECLARE_LINK_MODE_MASK(support) = { 0, }; __ETHTOOL_DECLARE_LINK_MODE_MASK(support1); + __ETHTOOL_DECLARE_LINK_MODE_MASK(support); struct phylink_link_state config; phy_interface_t iface; - int ret = 0; bool changed; - u8 port; - - ASSERT_RTNL(); + int ret; - sfp_parse_support(pl->sfp_bus, id, support); - port = sfp_parse_port(pl->sfp_bus, id, support); + linkmode_copy(support, supported); memset(&config, 0, sizeof(config)); - linkmode_copy(config.advertising, support); + linkmode_copy(config.advertising, advertising); config.interface = PHY_INTERFACE_MODE_NA; config.speed = SPEED_UNKNOWN; config.duplex = DUPLEX_UNKNOWN; @@ -1712,9 +1753,7 @@ static int phylink_sfp_module_insert(void *upstream, return ret; } - linkmode_copy(support1, support); - - iface = sfp_select_interface(pl->sfp_bus, id, config.advertising); + iface = sfp_select_interface(pl->sfp_bus, config.advertising); if (iface == PHY_INTERFACE_MODE_NA) { phylink_err(pl, "selection of interface failed, advertisement %*pb\n", @@ -1723,18 +1762,18 @@ static int phylink_sfp_module_insert(void *upstream, } config.interface = iface; + linkmode_copy(support1, support); ret = phylink_validate(pl, support1, &config); if (ret) { phylink_err(pl, "validation of %s/%s with support %*pb failed: %d\n", - phylink_an_mode_str(MLO_AN_INBAND), + phylink_an_mode_str(mode), phy_modes(config.interface), __ETHTOOL_LINK_MODE_MASK_NBITS, support, ret); return ret; } phylink_dbg(pl, "requesting link mode %s/%s with support %*pb\n", - phylink_an_mode_str(MLO_AN_INBAND), - phy_modes(config.interface), + phylink_an_mode_str(mode), phy_modes(config.interface), __ETHTOOL_LINK_MODE_MASK_NBITS, support); if (phy_interface_mode_is_8023z(iface) && pl->phydev) @@ -1746,19 +1785,19 @@ static int phylink_sfp_module_insert(void *upstream, linkmode_copy(pl->link_config.advertising, config.advertising); } - if (pl->link_an_mode != MLO_AN_INBAND || + if (pl->cur_link_an_mode != mode || pl->link_config.interface != config.interface) { pl->link_config.interface = config.interface; - pl->link_an_mode = MLO_AN_INBAND; + pl->cur_link_an_mode = mode; changed = true; phylink_info(pl, "switched to %s/%s link mode\n", - phylink_an_mode_str(MLO_AN_INBAND), + phylink_an_mode_str(mode), phy_modes(config.interface)); } - pl->link_port = port; + pl->link_port = pl->sfp_port; if (changed && !test_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state)) @@ -1767,6 +1806,55 @@ static int phylink_sfp_module_insert(void *upstream, return ret; } +static int phylink_sfp_module_insert(void *upstream, + const struct sfp_eeprom_id *id) +{ + struct phylink *pl = upstream; + unsigned long *support = pl->sfp_support; + + ASSERT_RTNL(); + + linkmode_zero(support); + sfp_parse_support(pl->sfp_bus, id, support); + pl->sfp_port = sfp_parse_port(pl->sfp_bus, id, support); + + /* If this module may have a PHY connecting later, defer until later */ + pl->sfp_may_have_phy = sfp_may_have_phy(pl->sfp_bus, id); + if (pl->sfp_may_have_phy) + return 0; + + return phylink_sfp_config(pl, MLO_AN_INBAND, support, support); +} + +static int phylink_sfp_module_start(void *upstream) +{ + struct phylink *pl = upstream; + + /* If this SFP module has a PHY, start the PHY now. */ + if (pl->phydev) { + phy_start(pl->phydev); + return 0; + } + + /* If the module may have a PHY but we didn't detect one we + * need to configure the MAC here. + */ + if (!pl->sfp_may_have_phy) + return 0; + + return phylink_sfp_config(pl, MLO_AN_INBAND, + pl->sfp_support, pl->sfp_support); +} + +static void phylink_sfp_module_stop(void *upstream) +{ + struct phylink *pl = upstream; + + /* If this SFP module has a PHY, stop it. */ + if (pl->phydev) + phy_stop(pl->phydev); +} + static void phylink_sfp_link_down(void *upstream) { struct phylink *pl = upstream; @@ -1786,11 +1874,51 @@ static void phylink_sfp_link_up(void *upstream) phylink_run_resolve(pl); } +/* The Broadcom BCM84881 in the Methode DM7052 is unable to provide a SGMII + * or 802.3z control word, so inband will not work. + */ +static bool phylink_phy_no_inband(struct phy_device *phy) +{ + return phy->is_c45 && + (phy->c45_ids.device_ids[1] & 0xfffffff0) == 0xae025150; +} + static int phylink_sfp_connect_phy(void *upstream, struct phy_device *phy) { struct phylink *pl = upstream; + phy_interface_t interface; + u8 mode; + int ret; - return __phylink_connect_phy(upstream, phy, pl->link_config.interface); + /* + * This is the new way of dealing with flow control for PHYs, + * as described by Timur Tabi in commit 529ed1275263 ("net: phy: + * phy drivers should not set SUPPORTED_[Asym_]Pause") except + * using our validate call to the MAC, we rely upon the MAC + * clearing the bits from both supported and advertising fields. + */ + phy_support_asym_pause(phy); + + if (phylink_phy_no_inband(phy)) + mode = MLO_AN_PHY; + else + mode = MLO_AN_INBAND; + + /* Do the initial configuration */ + ret = phylink_sfp_config(pl, mode, phy->supported, phy->advertising); + if (ret < 0) + return ret; + + interface = pl->link_config.interface; + ret = phylink_attach_phy(pl, phy, interface); + if (ret < 0) + return ret; + + ret = phylink_bringup_phy(pl, phy, interface); + if (ret) + phy_detach(phy); + + return ret; } static void phylink_sfp_disconnect_phy(void *upstream) @@ -1802,6 +1930,8 @@ static const struct sfp_upstream_ops sfp_phylink_ops = { .attach = phylink_sfp_attach, .detach = phylink_sfp_detach, .module_insert = phylink_sfp_module_insert, + .module_start = phylink_sfp_module_start, + .module_stop = phylink_sfp_module_stop, .link_up = phylink_sfp_link_up, .link_down = phylink_sfp_link_down, .connect_phy = phylink_sfp_connect_phy, diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index 5a72093ab6e7..06e6429b8b71 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -103,6 +103,7 @@ static const struct sfp_quirk *sfp_lookup_quirk(const struct sfp_eeprom_id *id) return NULL; } + /** * sfp_parse_port() - Parse the EEPROM base ID, setting the port type * @bus: a pointer to the &struct sfp_bus structure for the sfp module @@ -124,35 +125,35 @@ int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id, /* port is the physical connector, set this from the connector field. */ switch (id->base.connector) { - case SFP_CONNECTOR_SC: - case SFP_CONNECTOR_FIBERJACK: - case SFP_CONNECTOR_LC: - case SFP_CONNECTOR_MT_RJ: - case SFP_CONNECTOR_MU: - case SFP_CONNECTOR_OPTICAL_PIGTAIL: + case SFF8024_CONNECTOR_SC: + case SFF8024_CONNECTOR_FIBERJACK: + case SFF8024_CONNECTOR_LC: + case SFF8024_CONNECTOR_MT_RJ: + case SFF8024_CONNECTOR_MU: + case SFF8024_CONNECTOR_OPTICAL_PIGTAIL: + case SFF8024_CONNECTOR_MPO_1X12: + case SFF8024_CONNECTOR_MPO_2X16: port = PORT_FIBRE; break; - case SFP_CONNECTOR_RJ45: + case SFF8024_CONNECTOR_RJ45: port = PORT_TP; break; - case SFP_CONNECTOR_COPPER_PIGTAIL: + case SFF8024_CONNECTOR_COPPER_PIGTAIL: port = PORT_DA; break; - case SFP_CONNECTOR_UNSPEC: + case SFF8024_CONNECTOR_UNSPEC: if (id->base.e1000_base_t) { port = PORT_TP; break; } /* fallthrough */ - case SFP_CONNECTOR_SG: /* guess */ - case SFP_CONNECTOR_MPO_1X12: - case SFP_CONNECTOR_MPO_2X16: - case SFP_CONNECTOR_HSSDC_II: - case SFP_CONNECTOR_NOSEPARATE: - case SFP_CONNECTOR_MXC_2X16: + case SFF8024_CONNECTOR_SG: /* guess */ + case SFF8024_CONNECTOR_HSSDC_II: + case SFF8024_CONNECTOR_NOSEPARATE: + case SFF8024_CONNECTOR_MXC_2X16: port = PORT_OTHER; break; default: @@ -179,6 +180,33 @@ int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id, EXPORT_SYMBOL_GPL(sfp_parse_port); /** + * sfp_may_have_phy() - indicate whether the module may have a PHY + * @bus: a pointer to the &struct sfp_bus structure for the sfp module + * @id: a pointer to the module's &struct sfp_eeprom_id + * + * Parse the EEPROM identification given in @id, and return whether + * this module may have a PHY. + */ +bool sfp_may_have_phy(struct sfp_bus *bus, const struct sfp_eeprom_id *id) +{ + if (id->base.e1000_base_t) + return true; + + if (id->base.phys_id != SFF8024_ID_DWDM_SFP) { + switch (id->base.extended_cc) { + case SFF8024_ECC_10GBASE_T_SFI: + case SFF8024_ECC_10GBASE_T_SR: + case SFF8024_ECC_5GBASE_T: + case SFF8024_ECC_2_5GBASE_T: + return true; + } + } + + return false; +} +EXPORT_SYMBOL_GPL(sfp_may_have_phy); + +/** * sfp_parse_support() - Parse the eeprom id for supported link modes * @bus: a pointer to the &struct sfp_bus structure for the sfp module * @id: a pointer to the module's &struct sfp_eeprom_id @@ -261,22 +289,33 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, } switch (id->base.extended_cc) { - case 0x00: /* Unspecified */ + case SFF8024_ECC_UNSPEC: break; - case 0x02: /* 100Gbase-SR4 or 25Gbase-SR */ + case SFF8024_ECC_100GBASE_SR4_25GBASE_SR: phylink_set(modes, 100000baseSR4_Full); phylink_set(modes, 25000baseSR_Full); break; - case 0x03: /* 100Gbase-LR4 or 25Gbase-LR */ - case 0x04: /* 100Gbase-ER4 or 25Gbase-ER */ + case SFF8024_ECC_100GBASE_LR4_25GBASE_LR: + case SFF8024_ECC_100GBASE_ER4_25GBASE_ER: phylink_set(modes, 100000baseLR4_ER4_Full); break; - case 0x0b: /* 100Gbase-CR4 or 25Gbase-CR CA-L */ - case 0x0c: /* 25Gbase-CR CA-S */ - case 0x0d: /* 25Gbase-CR CA-N */ + case SFF8024_ECC_100GBASE_CR4: phylink_set(modes, 100000baseCR4_Full); + /* fallthrough */ + case SFF8024_ECC_25GBASE_CR_S: + case SFF8024_ECC_25GBASE_CR_N: phylink_set(modes, 25000baseCR_Full); break; + case SFF8024_ECC_10GBASE_T_SFI: + case SFF8024_ECC_10GBASE_T_SR: + phylink_set(modes, 10000baseT_Full); + break; + case SFF8024_ECC_5GBASE_T: + phylink_set(modes, 5000baseT_Full); + break; + case SFF8024_ECC_2_5GBASE_T: + phylink_set(modes, 2500baseT_Full); + break; default: dev_warn(bus->sfp_dev, "Unknown/unsupported extended compliance code: 0x%02x\n", @@ -301,7 +340,7 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, */ if (bitmap_empty(modes, __ETHTOOL_LINK_MODE_MASK_NBITS)) { /* If the encoding and bit rate allows 1000baseX */ - if (id->base.encoding == SFP_ENCODING_8B10B && br_nom && + if (id->base.encoding == SFF8024_ENCODING_8B10B && br_nom && br_min <= 1300 && br_max >= 1200) phylink_set(modes, 1000baseX_Full); } @@ -320,31 +359,27 @@ EXPORT_SYMBOL_GPL(sfp_parse_support); /** * sfp_select_interface() - Select appropriate phy_interface_t mode * @bus: a pointer to the &struct sfp_bus structure for the sfp module - * @id: a pointer to the module's &struct sfp_eeprom_id * @link_modes: ethtool link modes mask * - * Derive the phy_interface_t mode for the information found in the - * module's identifying EEPROM and the link modes mask. There is no - * standard or defined way to derive this information, so we decide - * based upon the link mode mask. + * Derive the phy_interface_t mode for the SFP module from the link + * modes mask. */ phy_interface_t sfp_select_interface(struct sfp_bus *bus, - const struct sfp_eeprom_id *id, unsigned long *link_modes) { if (phylink_test(link_modes, 10000baseCR_Full) || phylink_test(link_modes, 10000baseSR_Full) || phylink_test(link_modes, 10000baseLR_Full) || phylink_test(link_modes, 10000baseLRM_Full) || - phylink_test(link_modes, 10000baseER_Full)) + phylink_test(link_modes, 10000baseER_Full) || + phylink_test(link_modes, 10000baseT_Full)) return PHY_INTERFACE_MODE_10GKR; if (phylink_test(link_modes, 2500baseX_Full)) return PHY_INTERFACE_MODE_2500BASEX; - if (id->base.e1000_base_t || - id->base.e100_base_lx || - id->base.e100_base_fx) + if (phylink_test(link_modes, 1000baseT_Half) || + phylink_test(link_modes, 1000baseT_Full)) return PHY_INTERFACE_MODE_SGMII; if (phylink_test(link_modes, 1000baseX_Full)) @@ -705,6 +740,27 @@ void sfp_module_remove(struct sfp_bus *bus) } EXPORT_SYMBOL_GPL(sfp_module_remove); +int sfp_module_start(struct sfp_bus *bus) +{ + const struct sfp_upstream_ops *ops = sfp_get_upstream_ops(bus); + int ret = 0; + + if (ops && ops->module_start) + ret = ops->module_start(bus->upstream); + + return ret; +} +EXPORT_SYMBOL_GPL(sfp_module_start); + +void sfp_module_stop(struct sfp_bus *bus) +{ + const struct sfp_upstream_ops *ops = sfp_get_upstream_ops(bus); + + if (ops && ops->module_stop) + ops->module_stop(bus->upstream); +} +EXPORT_SYMBOL_GPL(sfp_module_stop); + static void sfp_socket_clear(struct sfp_bus *bus) { bus->sfp_dev = NULL; diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index c0b9a8e4e65a..73c2969f11a4 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -59,8 +59,10 @@ enum { SFP_DEV_UP, SFP_S_DOWN = 0, + SFP_S_FAIL, SFP_S_WAIT, SFP_S_INIT, + SFP_S_INIT_PHY, SFP_S_INIT_TX_FAULT, SFP_S_WAIT_LOS, SFP_S_LINK_UP, @@ -122,8 +124,10 @@ static const char *event_to_str(unsigned short event) static const char * const sm_state_strings[] = { [SFP_S_DOWN] = "down", + [SFP_S_FAIL] = "fail", [SFP_S_WAIT] = "wait", [SFP_S_INIT] = "init", + [SFP_S_INIT_PHY] = "init_phy", [SFP_S_INIT_TX_FAULT] = "init_tx_fault", [SFP_S_WAIT_LOS] = "wait_los", [SFP_S_LINK_UP] = "link_up", @@ -155,10 +159,34 @@ static const enum gpiod_flags gpio_flags[] = { GPIOD_ASIS, }; -#define T_WAIT msecs_to_jiffies(50) -#define T_INIT_JIFFIES msecs_to_jiffies(300) -#define T_RESET_US 10 -#define T_FAULT_RECOVER msecs_to_jiffies(1000) +/* t_start_up (SFF-8431) or t_init (SFF-8472) is the time required for a + * non-cooled module to initialise its laser safety circuitry. We wait + * an initial T_WAIT period before we check the tx fault to give any PHY + * on board (for a copper SFP) time to initialise. + */ +#define T_WAIT msecs_to_jiffies(50) +#define T_START_UP msecs_to_jiffies(300) +#define T_START_UP_BAD_GPON msecs_to_jiffies(60000) + +/* t_reset is the time required to assert the TX_DISABLE signal to reset + * an indicated TX_FAULT. + */ +#define T_RESET_US 10 +#define T_FAULT_RECOVER msecs_to_jiffies(1000) + +/* N_FAULT_INIT is the number of recovery attempts at module initialisation + * time. If the TX_FAULT signal is not deasserted after this number of + * attempts at clearing it, we decide that the module is faulty. + * N_FAULT is the same but after the module has initialised. + */ +#define N_FAULT_INIT 5 +#define N_FAULT 5 + +/* T_PHY_RETRY is the time interval between attempts to probe the PHY. + * R_PHY_RETRY is the number of attempts. + */ +#define T_PHY_RETRY msecs_to_jiffies(50) +#define R_PHY_RETRY 12 /* SFP module presence detection is poor: the three MOD DEF signals are * the same length on the PCB, which means it's possible for MOD DEF 0 to @@ -214,10 +242,12 @@ struct sfp { unsigned char sm_mod_tries; unsigned char sm_dev_state; unsigned short sm_state; - unsigned int sm_retries; + unsigned char sm_fault_retries; + unsigned char sm_phy_retries; struct sfp_eeprom_id id; unsigned int module_power_mW; + unsigned int module_t_start_up; #if IS_ENABLED(CONFIG_HWMON) struct sfp_diag diag; @@ -231,7 +261,7 @@ struct sfp { static bool sff_module_supported(const struct sfp_eeprom_id *id) { - return id->base.phys_id == SFP_PHYS_ID_SFF && + return id->base.phys_id == SFF8024_ID_SFF_8472 && id->base.phys_ext_id == SFP_PHYS_EXT_ID_SFP; } @@ -242,7 +272,7 @@ static const struct sff_data sff_data = { static bool sfp_module_supported(const struct sfp_eeprom_id *id) { - return id->base.phys_id == SFP_PHYS_ID_SFP && + return id->base.phys_id == SFF8024_ID_SFP && id->base.phys_ext_id == SFP_PHYS_EXT_ID_SFP; } @@ -412,13 +442,20 @@ static unsigned int sfp_soft_get_state(struct sfp *sfp) { unsigned int state = 0; u8 status; + int ret; - if (sfp_read(sfp, true, SFP_STATUS, &status, sizeof(status)) == - sizeof(status)) { + ret = sfp_read(sfp, true, SFP_STATUS, &status, sizeof(status)); + if (ret == sizeof(status)) { if (status & SFP_STATUS_RX_LOS) state |= SFP_F_LOS; if (status & SFP_STATUS_TX_FAULT) state |= SFP_F_TX_FAULT; + } else { + dev_err_ratelimited(sfp->dev, + "failed to read SFP soft status: %d\n", + ret); + /* Preserve the current state */ + state = sfp->state; } return state & sfp->state_soft_mask; @@ -1383,26 +1420,30 @@ static void sfp_sm_mod_next(struct sfp *sfp, unsigned int state, static void sfp_sm_phy_detach(struct sfp *sfp) { - phy_stop(sfp->mod_phy); sfp_remove_phy(sfp->sfp_bus); phy_device_remove(sfp->mod_phy); phy_device_free(sfp->mod_phy); sfp->mod_phy = NULL; } -static void sfp_sm_probe_phy(struct sfp *sfp) +static int sfp_sm_probe_phy(struct sfp *sfp, bool is_c45) { struct phy_device *phy; int err; - phy = mdiobus_scan(sfp->i2c_mii, SFP_PHY_ADDR); - if (phy == ERR_PTR(-ENODEV)) { - dev_info(sfp->dev, "no PHY detected\n"); - return; - } + phy = get_phy_device(sfp->i2c_mii, SFP_PHY_ADDR, is_c45); + if (phy == ERR_PTR(-ENODEV)) + return PTR_ERR(phy); if (IS_ERR(phy)) { dev_err(sfp->dev, "mdiobus scan returned %ld\n", PTR_ERR(phy)); - return; + return PTR_ERR(phy); + } + + err = phy_device_register(phy); + if (err) { + phy_device_free(phy); + dev_err(sfp->dev, "phy_device_register failed: %d\n", err); + return err; } err = sfp_add_phy(sfp->sfp_bus, phy); @@ -1410,11 +1451,12 @@ static void sfp_sm_probe_phy(struct sfp *sfp) phy_device_remove(phy); phy_device_free(phy); dev_err(sfp->dev, "sfp_add_phy failed: %d\n", err); - return; + return err; } sfp->mod_phy = phy; - phy_start(phy); + + return 0; } static void sfp_sm_link_up(struct sfp *sfp) @@ -1464,7 +1506,7 @@ static bool sfp_los_event_inactive(struct sfp *sfp, unsigned int event) static void sfp_sm_fault(struct sfp *sfp, unsigned int next_state, bool warn) { - if (sfp->sm_retries && !--sfp->sm_retries) { + if (sfp->sm_fault_retries && !--sfp->sm_fault_retries) { dev_err(sfp->dev, "module persistently indicates fault, disabling\n"); sfp_sm_next(sfp, SFP_S_TX_DISABLE, 0); @@ -1476,21 +1518,35 @@ static void sfp_sm_fault(struct sfp *sfp, unsigned int next_state, bool warn) } } -static void sfp_sm_probe_for_phy(struct sfp *sfp) +/* Probe a SFP for a PHY device if the module supports copper - the PHY + * normally sits at I2C bus address 0x56, and may either be a clause 22 + * or clause 45 PHY. + * + * Clause 22 copper SFP modules normally operate in Cisco SGMII mode with + * negotiation enabled, but some may be in 1000base-X - which is for the + * PHY driver to determine. + * + * Clause 45 copper SFP+ modules (10G) appear to switch their interface + * mode according to the negotiated line speed. + */ +static int sfp_sm_probe_for_phy(struct sfp *sfp) { - /* Setting the serdes link mode is guesswork: there's no - * field in the EEPROM which indicates what mode should - * be used. - * - * If it's a gigabit-only fiber module, it probably does - * not have a PHY, so switch to 802.3z negotiation mode. - * Otherwise, switch to SGMII mode (which is required to - * support non-gigabit speeds) and probe for a PHY. - */ - if (sfp->id.base.e1000_base_t || - sfp->id.base.e100_base_lx || - sfp->id.base.e100_base_fx) - sfp_sm_probe_phy(sfp); + int err = 0; + + switch (sfp->id.base.extended_cc) { + case SFF8024_ECC_10GBASE_T_SFI: + case SFF8024_ECC_10GBASE_T_SR: + case SFF8024_ECC_5GBASE_T: + case SFF8024_ECC_2_5GBASE_T: + err = sfp_sm_probe_phy(sfp, true); + break; + + default: + if (sfp->id.base.e1000_base_t) + err = sfp_sm_probe_phy(sfp, false); + break; + } + return err; } static int sfp_module_parse_power(struct sfp *sfp) @@ -1550,6 +1606,13 @@ static int sfp_sm_mod_hpower(struct sfp *sfp, bool enable) return -EAGAIN; } + /* DM7052 reports as a high power module, responds to reads (with + * all bytes 0xff) at 0x51 but does not accept writes. In any case, + * if the bit is already set, we're already in high power mode. + */ + if (!!(val & BIT(0)) == enable) + return 0; + if (enable) val |= BIT(0); else @@ -1655,6 +1718,12 @@ static int sfp_sm_mod_probe(struct sfp *sfp, bool report) if (ret < 0) return ret; + if (!memcmp(id.base.vendor_name, "ALCATELLUCENT ", 16) && + !memcmp(id.base.vendor_pn, "3FE46541AA ", 16)) + sfp->module_t_start_up = T_START_UP_BAD_GPON; + else + sfp->module_t_start_up = T_START_UP; + return 0; } @@ -1812,6 +1881,7 @@ static void sfp_sm_module(struct sfp *sfp, unsigned int event) static void sfp_sm_main(struct sfp *sfp, unsigned int event) { unsigned long timeout; + int ret; /* Some events are global */ if (sfp->sm_state != SFP_S_DOWN && @@ -1820,6 +1890,8 @@ static void sfp_sm_main(struct sfp *sfp, unsigned int event) if (sfp->sm_state == SFP_S_LINK_UP && sfp->sm_dev_state == SFP_DEV_UP) sfp_sm_link_down(sfp); + if (sfp->sm_state > SFP_S_INIT) + sfp_module_stop(sfp->sfp_bus); if (sfp->mod_phy) sfp_sm_phy_detach(sfp); sfp_module_tx_disable(sfp); @@ -1841,7 +1913,7 @@ static void sfp_sm_main(struct sfp *sfp, unsigned int event) sfp_module_tx_enable(sfp); /* Initialise the fault clearance retries */ - sfp->sm_retries = 5; + sfp->sm_fault_retries = N_FAULT_INIT; /* We need to check the TX_FAULT state, which is not defined * while TX_DISABLE is asserted. The earliest we want to do @@ -1855,11 +1927,12 @@ static void sfp_sm_main(struct sfp *sfp, unsigned int event) break; if (sfp->state & SFP_F_TX_FAULT) { - /* Wait t_init before indicating that the link is up, - * provided the current state indicates no TX_FAULT. If - * TX_FAULT clears before this time, that's fine too. + /* Wait up to t_init (SFF-8472) or t_start_up (SFF-8431) + * from the TX_DISABLE deassertion for the module to + * initialise, which is indicated by TX_FAULT + * deasserting. */ - timeout = T_INIT_JIFFIES; + timeout = sfp->module_t_start_up; if (timeout > T_WAIT) timeout -= T_WAIT; else @@ -1876,27 +1949,51 @@ static void sfp_sm_main(struct sfp *sfp, unsigned int event) case SFP_S_INIT: if (event == SFP_E_TIMEOUT && sfp->state & SFP_F_TX_FAULT) { - /* TX_FAULT is still asserted after t_init, so assume - * there is a fault. + /* TX_FAULT is still asserted after t_init or + * or t_start_up, so assume there is a fault. */ sfp_sm_fault(sfp, SFP_S_INIT_TX_FAULT, - sfp->sm_retries == 5); + sfp->sm_fault_retries == N_FAULT_INIT); } else if (event == SFP_E_TIMEOUT || event == SFP_E_TX_CLEAR) { - init_done: /* TX_FAULT deasserted or we timed out with TX_FAULT - * clear. Probe for the PHY and check the LOS state. - */ - sfp_sm_probe_for_phy(sfp); - sfp_sm_link_check_los(sfp); + init_done: + sfp->sm_phy_retries = R_PHY_RETRY; + goto phy_probe; + } + break; - /* Reset the fault retry count */ - sfp->sm_retries = 5; + case SFP_S_INIT_PHY: + if (event != SFP_E_TIMEOUT) + break; + phy_probe: + /* TX_FAULT deasserted or we timed out with TX_FAULT + * clear. Probe for the PHY and check the LOS state. + */ + ret = sfp_sm_probe_for_phy(sfp); + if (ret == -ENODEV) { + if (--sfp->sm_phy_retries) { + sfp_sm_next(sfp, SFP_S_INIT_PHY, T_PHY_RETRY); + break; + } else { + dev_info(sfp->dev, "no PHY detected\n"); + } + } else if (ret) { + sfp_sm_next(sfp, SFP_S_FAIL, 0); + break; } + if (sfp_module_start(sfp->sfp_bus)) { + sfp_sm_next(sfp, SFP_S_FAIL, 0); + break; + } + sfp_sm_link_check_los(sfp); + + /* Reset the fault retry count */ + sfp->sm_fault_retries = N_FAULT; break; case SFP_S_INIT_TX_FAULT: if (event == SFP_E_TIMEOUT) { sfp_module_tx_fault_reset(sfp); - sfp_sm_next(sfp, SFP_S_INIT, T_INIT_JIFFIES); + sfp_sm_next(sfp, SFP_S_INIT, sfp->module_t_start_up); } break; @@ -1920,7 +2017,7 @@ static void sfp_sm_main(struct sfp *sfp, unsigned int event) case SFP_S_TX_FAULT: if (event == SFP_E_TIMEOUT) { sfp_module_tx_fault_reset(sfp); - sfp_sm_next(sfp, SFP_S_REINIT, T_INIT_JIFFIES); + sfp_sm_next(sfp, SFP_S_REINIT, sfp->module_t_start_up); } break; diff --git a/drivers/net/phy/sfp.h b/drivers/net/phy/sfp.h index 64f54b0bbd8c..b83f70526270 100644 --- a/drivers/net/phy/sfp.h +++ b/drivers/net/phy/sfp.h @@ -22,6 +22,8 @@ void sfp_link_up(struct sfp_bus *bus); void sfp_link_down(struct sfp_bus *bus); int sfp_module_insert(struct sfp_bus *bus, const struct sfp_eeprom_id *id); void sfp_module_remove(struct sfp_bus *bus); +int sfp_module_start(struct sfp_bus *bus); +void sfp_module_stop(struct sfp_bus *bus); int sfp_link_configure(struct sfp_bus *bus, const struct sfp_eeprom_id *id); struct sfp_bus *sfp_register_socket(struct device *dev, struct sfp *sfp, const struct sfp_socket_ops *ops); diff --git a/drivers/net/phy/uPD60620.c b/drivers/net/phy/uPD60620.c index a32b3fd8a370..38834347a427 100644 --- a/drivers/net/phy/uPD60620.c +++ b/drivers/net/phy/uPD60620.c @@ -68,12 +68,7 @@ static int upd60620_read_status(struct phy_device *phydev) mii_lpa_to_linkmode_lpa_t(phydev->lp_advertising, phy_state); - if (phydev->duplex == DUPLEX_FULL) { - if (phy_state & LPA_PAUSE_CAP) - phydev->pause = 1; - if (phy_state & LPA_PAUSE_ASYM) - phydev->asym_pause = 1; - } + phy_resolve_aneg_pause(phydev); } } return 0; diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c index a7b9cf3269bf..29a0917a81e6 100644 --- a/drivers/net/ppp/ppp_async.c +++ b/drivers/net/ppp/ppp_async.c @@ -874,15 +874,15 @@ ppp_async_input(struct asyncppp *ap, const unsigned char *buf, skb = dev_alloc_skb(ap->mru + PPP_HDRLEN + 2); if (!skb) goto nomem; - ap->rpkt = skb; - } - if (skb->len == 0) { - /* Try to get the payload 4-byte aligned. - * This should match the - * PPP_ALLSTATIONS/PPP_UI/compressed tests in - * process_input_packet, but we do not have - * enough chars here to test buf[1] and buf[2]. - */ + ap->rpkt = skb; + } + if (skb->len == 0) { + /* Try to get the payload 4-byte aligned. + * This should match the + * PPP_ALLSTATIONS/PPP_UI/compressed tests in + * process_input_packet, but we do not have + * enough chars here to test buf[1] and buf[2]. + */ if (buf[0] != PPP_ALLSTATIONS) skb_reserve(skb, 2 + (buf[0] & 1)); } diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index 2a91c192659f..317d3a8df316 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -457,7 +457,7 @@ static void slip_write_wakeup(struct tty_struct *tty) schedule_work(&sl->tx_work); } -static void sl_tx_timeout(struct net_device *dev) +static void sl_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct slip *sl = netdev_priv(dev); diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c index 1e58702c737f..d387bc7ac1b6 100644 --- a/drivers/net/usb/catc.c +++ b/drivers/net/usb/catc.c @@ -447,7 +447,7 @@ static netdev_tx_t catc_start_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } -static void catc_tx_timeout(struct net_device *netdev) +static void catc_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct catc *catc = netdev_priv(netdev); diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index ca827802f291..417e42c9fd03 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -820,7 +820,7 @@ static const struct ethtool_ops ops = { }; /* called when a packet did not ack after watchdogtimeout */ -static void hso_net_tx_timeout(struct net_device *net) +static void hso_net_tx_timeout(struct net_device *net, unsigned int txqueue) { struct hso_net *odev = netdev_priv(net); diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index 8c01fbf68a89..c792d65dd7b4 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -400,7 +400,7 @@ static int ipheth_tx(struct sk_buff *skb, struct net_device *net) return NETDEV_TX_OK; } -static void ipheth_tx_timeout(struct net_device *net) +static void ipheth_tx_timeout(struct net_device *net, unsigned int txqueue) { struct ipheth_device *dev = netdev_priv(net); diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c index 8e210ba4a313..ed01dc964c99 100644 --- a/drivers/net/usb/kaweth.c +++ b/drivers/net/usb/kaweth.c @@ -894,7 +894,7 @@ static void kaweth_async_set_rx_mode(struct kaweth_device *kaweth) /**************************************************************** * kaweth_tx_timeout ****************************************************************/ -static void kaweth_tx_timeout(struct net_device *net) +static void kaweth_tx_timeout(struct net_device *net, unsigned int txqueue) { struct kaweth_device *kaweth = netdev_priv(net); diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index cf1f3f0a4b9b..0c8b9363366b 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3662,7 +3662,7 @@ static void lan78xx_disconnect(struct usb_interface *intf) usb_put_dev(udev); } -static void lan78xx_tx_timeout(struct net_device *net) +static void lan78xx_tx_timeout(struct net_device *net, unsigned int txqueue) { struct lan78xx_net *dev = netdev_priv(net); diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index f7d117d80cfb..8783e2ab3ec0 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -693,7 +693,7 @@ static void intr_callback(struct urb *urb) "can't resubmit interrupt urb, %d\n", res); } -static void pegasus_tx_timeout(struct net_device *net) +static void pegasus_tx_timeout(struct net_device *net, unsigned int txqueue) { pegasus_t *pegasus = netdev_priv(net); netif_warn(pegasus, timer, net, "tx timeout\n"); diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index c5ebf35d2488..9ec1da429514 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -2507,7 +2507,7 @@ static void rtl_drop_queued_tx(struct r8152 *tp) } } -static void rtl8152_tx_timeout(struct net_device *netdev) +static void rtl8152_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct r8152 *tp = netdev_priv(netdev); diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c index 13e51ccf0214..e7c630d37589 100644 --- a/drivers/net/usb/rtl8150.c +++ b/drivers/net/usb/rtl8150.c @@ -655,7 +655,7 @@ static void disable_net_traffic(rtl8150_t * dev) set_registers(dev, CR, 1, &cr); } -static void rtl8150_tx_timeout(struct net_device *netdev) +static void rtl8150_tx_timeout(struct net_device *netdev, unsigned int txqueue) { rtl8150_t *dev = netdev_priv(netdev); dev_warn(&netdev->dev, "Tx timeout.\n"); diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 30e511c2c8d0..bc88923db369 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1293,7 +1293,7 @@ static void tx_complete (struct urb *urb) /*-------------------------------------------------------------------------*/ -void usbnet_tx_timeout (struct net_device *net) +void usbnet_tx_timeout (struct net_device *net, unsigned int txqueue) { struct usbnet *dev = netdev_priv(net); diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 216acf37ca7c..18f152fa0068 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -3198,7 +3198,7 @@ vmxnet3_free_intr_resources(struct vmxnet3_adapter *adapter) static void -vmxnet3_tx_timeout(struct net_device *netdev) +vmxnet3_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct vmxnet3_adapter *adapter = netdev_priv(netdev); adapter->tx_timeout_count++; diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c index af539151d663..5d6532ad6b78 100644 --- a/drivers/net/wan/cosa.c +++ b/drivers/net/wan/cosa.c @@ -268,7 +268,7 @@ static int cosa_net_attach(struct net_device *dev, unsigned short encoding, unsigned short parity); static int cosa_net_open(struct net_device *d); static int cosa_net_close(struct net_device *d); -static void cosa_net_timeout(struct net_device *d); +static void cosa_net_timeout(struct net_device *d, unsigned int txqueue); static netdev_tx_t cosa_net_tx(struct sk_buff *skb, struct net_device *d); static char *cosa_net_setup_rx(struct channel_data *channel, int size); static int cosa_net_rx_done(struct channel_data *channel); @@ -670,7 +670,7 @@ static netdev_tx_t cosa_net_tx(struct sk_buff *skb, return NETDEV_TX_OK; } -static void cosa_net_timeout(struct net_device *dev) +static void cosa_net_timeout(struct net_device *dev, unsigned int txqueue) { struct channel_data *chan = dev_to_chan(dev); diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c index 1901ec7948d8..7916efce7188 100644 --- a/drivers/net/wan/farsync.c +++ b/drivers/net/wan/farsync.c @@ -2239,7 +2239,7 @@ fst_attach(struct net_device *dev, unsigned short encoding, unsigned short parit } static void -fst_tx_timeout(struct net_device *dev) +fst_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct fst_port_info *port; struct fst_card_info *card; diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index ca0f3be2b6bf..308384756e6f 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -1039,7 +1039,7 @@ static const struct dev_pm_ops uhdlc_pm_ops = { #define HDLC_PM_OPS NULL #endif -static void uhdlc_tx_timeout(struct net_device *ndev) +static void uhdlc_tx_timeout(struct net_device *ndev, unsigned int txqueue) { netdev_err(ndev, "%s\n", __func__); } diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c index 0e6a51525d91..a20f467ca48a 100644 --- a/drivers/net/wan/lmc/lmc_main.c +++ b/drivers/net/wan/lmc/lmc_main.c @@ -99,7 +99,7 @@ static int lmc_ifdown(struct net_device * const); static void lmc_watchdog(struct timer_list *t); static void lmc_reset(lmc_softc_t * const sc); static void lmc_dec_reset(lmc_softc_t * const sc); -static void lmc_driver_timeout(struct net_device *dev); +static void lmc_driver_timeout(struct net_device *dev, unsigned int txqueue); /* * linux reserves 16 device specific IOCTLs. We call them @@ -2044,7 +2044,7 @@ static void lmc_initcsrs(lmc_softc_t * const sc, lmc_csrptr_t csr_base, /*fold00 lmc_trace(sc->lmc_device, "lmc_initcsrs out"); } -static void lmc_driver_timeout(struct net_device *dev) +static void lmc_driver_timeout(struct net_device *dev, unsigned int txqueue) { lmc_softc_t *sc = dev_to_sc(dev); u32 csr6; diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c index 914be5847386..69773d228ec1 100644 --- a/drivers/net/wan/x25_asy.c +++ b/drivers/net/wan/x25_asy.c @@ -276,7 +276,7 @@ static void x25_asy_write_wakeup(struct tty_struct *tty) sl->xhead += actual; } -static void x25_asy_timeout(struct net_device *dev) +static void x25_asy_timeout(struct net_device *dev, unsigned int txqueue) { struct x25_asy *sl = netdev_priv(dev); diff --git a/drivers/net/wimax/i2400m/netdev.c b/drivers/net/wimax/i2400m/netdev.c index a5db3c06b646..a7fcbceb6e6b 100644 --- a/drivers/net/wimax/i2400m/netdev.c +++ b/drivers/net/wimax/i2400m/netdev.c @@ -380,7 +380,7 @@ drop: static -void i2400m_tx_timeout(struct net_device *net_dev) +void i2400m_tx_timeout(struct net_device *net_dev, unsigned int txqueue) { /* * We might want to kick the device diff --git a/drivers/net/wireguard/Makefile b/drivers/net/wireguard/Makefile new file mode 100644 index 000000000000..fc52b2cb500b --- /dev/null +++ b/drivers/net/wireguard/Makefile @@ -0,0 +1,18 @@ +ccflags-y := -O3 +ccflags-y += -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt' +ccflags-$(CONFIG_WIREGUARD_DEBUG) += -DDEBUG +wireguard-y := main.o +wireguard-y += noise.o +wireguard-y += device.o +wireguard-y += peer.o +wireguard-y += timers.o +wireguard-y += queueing.o +wireguard-y += send.o +wireguard-y += receive.o +wireguard-y += socket.o +wireguard-y += peerlookup.o +wireguard-y += allowedips.o +wireguard-y += ratelimiter.o +wireguard-y += cookie.o +wireguard-y += netlink.o +obj-$(CONFIG_WIREGUARD) := wireguard.o diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c new file mode 100644 index 000000000000..121d9ea0f135 --- /dev/null +++ b/drivers/net/wireguard/allowedips.c @@ -0,0 +1,376 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include "allowedips.h" +#include "peer.h" + +static void swap_endian(u8 *dst, const u8 *src, u8 bits) +{ + if (bits == 32) { + *(u32 *)dst = be32_to_cpu(*(const __be32 *)src); + } else if (bits == 128) { + ((u64 *)dst)[0] = be64_to_cpu(((const __be64 *)src)[0]); + ((u64 *)dst)[1] = be64_to_cpu(((const __be64 *)src)[1]); + } +} + +static void copy_and_assign_cidr(struct allowedips_node *node, const u8 *src, + u8 cidr, u8 bits) +{ + node->cidr = cidr; + node->bit_at_a = cidr / 8U; +#ifdef __LITTLE_ENDIAN + node->bit_at_a ^= (bits / 8U - 1U) % 8U; +#endif + node->bit_at_b = 7U - (cidr % 8U); + node->bitlen = bits; + memcpy(node->bits, src, bits / 8U); +} +#define CHOOSE_NODE(parent, key) \ + parent->bit[(key[parent->bit_at_a] >> parent->bit_at_b) & 1] + +static void push_rcu(struct allowedips_node **stack, + struct allowedips_node __rcu *p, unsigned int *len) +{ + if (rcu_access_pointer(p)) { + WARN_ON(IS_ENABLED(DEBUG) && *len >= 128); + stack[(*len)++] = rcu_dereference_raw(p); + } +} + +static void root_free_rcu(struct rcu_head *rcu) +{ + struct allowedips_node *node, *stack[128] = { + container_of(rcu, struct allowedips_node, rcu) }; + unsigned int len = 1; + + while (len > 0 && (node = stack[--len])) { + push_rcu(stack, node->bit[0], &len); + push_rcu(stack, node->bit[1], &len); + kfree(node); + } +} + +static void root_remove_peer_lists(struct allowedips_node *root) +{ + struct allowedips_node *node, *stack[128] = { root }; + unsigned int len = 1; + + while (len > 0 && (node = stack[--len])) { + push_rcu(stack, node->bit[0], &len); + push_rcu(stack, node->bit[1], &len); + if (rcu_access_pointer(node->peer)) + list_del(&node->peer_list); + } +} + +static void walk_remove_by_peer(struct allowedips_node __rcu **top, + struct wg_peer *peer, struct mutex *lock) +{ +#define REF(p) rcu_access_pointer(p) +#define DEREF(p) rcu_dereference_protected(*(p), lockdep_is_held(lock)) +#define PUSH(p) ({ \ + WARN_ON(IS_ENABLED(DEBUG) && len >= 128); \ + stack[len++] = p; \ + }) + + struct allowedips_node __rcu **stack[128], **nptr; + struct allowedips_node *node, *prev; + unsigned int len; + + if (unlikely(!peer || !REF(*top))) + return; + + for (prev = NULL, len = 0, PUSH(top); len > 0; prev = node) { + nptr = stack[len - 1]; + node = DEREF(nptr); + if (!node) { + --len; + continue; + } + if (!prev || REF(prev->bit[0]) == node || + REF(prev->bit[1]) == node) { + if (REF(node->bit[0])) + PUSH(&node->bit[0]); + else if (REF(node->bit[1])) + PUSH(&node->bit[1]); + } else if (REF(node->bit[0]) == prev) { + if (REF(node->bit[1])) + PUSH(&node->bit[1]); + } else { + if (rcu_dereference_protected(node->peer, + lockdep_is_held(lock)) == peer) { + RCU_INIT_POINTER(node->peer, NULL); + list_del_init(&node->peer_list); + if (!node->bit[0] || !node->bit[1]) { + rcu_assign_pointer(*nptr, DEREF( + &node->bit[!REF(node->bit[0])])); + kfree_rcu(node, rcu); + node = DEREF(nptr); + } + } + --len; + } + } + +#undef REF +#undef DEREF +#undef PUSH +} + +static unsigned int fls128(u64 a, u64 b) +{ + return a ? fls64(a) + 64U : fls64(b); +} + +static u8 common_bits(const struct allowedips_node *node, const u8 *key, + u8 bits) +{ + if (bits == 32) + return 32U - fls(*(const u32 *)node->bits ^ *(const u32 *)key); + else if (bits == 128) + return 128U - fls128( + *(const u64 *)&node->bits[0] ^ *(const u64 *)&key[0], + *(const u64 *)&node->bits[8] ^ *(const u64 *)&key[8]); + return 0; +} + +static bool prefix_matches(const struct allowedips_node *node, const u8 *key, + u8 bits) +{ + /* This could be much faster if it actually just compared the common + * bits properly, by precomputing a mask bswap(~0 << (32 - cidr)), and + * the rest, but it turns out that common_bits is already super fast on + * modern processors, even taking into account the unfortunate bswap. + * So, we just inline it like this instead. + */ + return common_bits(node, key, bits) >= node->cidr; +} + +static struct allowedips_node *find_node(struct allowedips_node *trie, u8 bits, + const u8 *key) +{ + struct allowedips_node *node = trie, *found = NULL; + + while (node && prefix_matches(node, key, bits)) { + if (rcu_access_pointer(node->peer)) + found = node; + if (node->cidr == bits) + break; + node = rcu_dereference_bh(CHOOSE_NODE(node, key)); + } + return found; +} + +/* Returns a strong reference to a peer */ +static struct wg_peer *lookup(struct allowedips_node __rcu *root, u8 bits, + const void *be_ip) +{ + /* Aligned so it can be passed to fls/fls64 */ + u8 ip[16] __aligned(__alignof(u64)); + struct allowedips_node *node; + struct wg_peer *peer = NULL; + + swap_endian(ip, be_ip, bits); + + rcu_read_lock_bh(); +retry: + node = find_node(rcu_dereference_bh(root), bits, ip); + if (node) { + peer = wg_peer_get_maybe_zero(rcu_dereference_bh(node->peer)); + if (!peer) + goto retry; + } + rcu_read_unlock_bh(); + return peer; +} + +static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key, + u8 cidr, u8 bits, struct allowedips_node **rnode, + struct mutex *lock) +{ + struct allowedips_node *node = rcu_dereference_protected(trie, + lockdep_is_held(lock)); + struct allowedips_node *parent = NULL; + bool exact = false; + + while (node && node->cidr <= cidr && prefix_matches(node, key, bits)) { + parent = node; + if (parent->cidr == cidr) { + exact = true; + break; + } + node = rcu_dereference_protected(CHOOSE_NODE(parent, key), + lockdep_is_held(lock)); + } + *rnode = parent; + return exact; +} + +static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key, + u8 cidr, struct wg_peer *peer, struct mutex *lock) +{ + struct allowedips_node *node, *parent, *down, *newnode; + + if (unlikely(cidr > bits || !peer)) + return -EINVAL; + + if (!rcu_access_pointer(*trie)) { + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (unlikely(!node)) + return -ENOMEM; + RCU_INIT_POINTER(node->peer, peer); + list_add_tail(&node->peer_list, &peer->allowedips_list); + copy_and_assign_cidr(node, key, cidr, bits); + rcu_assign_pointer(*trie, node); + return 0; + } + if (node_placement(*trie, key, cidr, bits, &node, lock)) { + rcu_assign_pointer(node->peer, peer); + list_move_tail(&node->peer_list, &peer->allowedips_list); + return 0; + } + + newnode = kzalloc(sizeof(*newnode), GFP_KERNEL); + if (unlikely(!newnode)) + return -ENOMEM; + RCU_INIT_POINTER(newnode->peer, peer); + list_add_tail(&newnode->peer_list, &peer->allowedips_list); + copy_and_assign_cidr(newnode, key, cidr, bits); + + if (!node) { + down = rcu_dereference_protected(*trie, lockdep_is_held(lock)); + } else { + down = rcu_dereference_protected(CHOOSE_NODE(node, key), + lockdep_is_held(lock)); + if (!down) { + rcu_assign_pointer(CHOOSE_NODE(node, key), newnode); + return 0; + } + } + cidr = min(cidr, common_bits(down, key, bits)); + parent = node; + + if (newnode->cidr == cidr) { + rcu_assign_pointer(CHOOSE_NODE(newnode, down->bits), down); + if (!parent) + rcu_assign_pointer(*trie, newnode); + else + rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits), + newnode); + } else { + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (unlikely(!node)) { + kfree(newnode); + return -ENOMEM; + } + INIT_LIST_HEAD(&node->peer_list); + copy_and_assign_cidr(node, newnode->bits, cidr, bits); + + rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down); + rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode); + if (!parent) + rcu_assign_pointer(*trie, node); + else + rcu_assign_pointer(CHOOSE_NODE(parent, node->bits), + node); + } + return 0; +} + +void wg_allowedips_init(struct allowedips *table) +{ + table->root4 = table->root6 = NULL; + table->seq = 1; +} + +void wg_allowedips_free(struct allowedips *table, struct mutex *lock) +{ + struct allowedips_node __rcu *old4 = table->root4, *old6 = table->root6; + + ++table->seq; + RCU_INIT_POINTER(table->root4, NULL); + RCU_INIT_POINTER(table->root6, NULL); + if (rcu_access_pointer(old4)) { + struct allowedips_node *node = rcu_dereference_protected(old4, + lockdep_is_held(lock)); + + root_remove_peer_lists(node); + call_rcu(&node->rcu, root_free_rcu); + } + if (rcu_access_pointer(old6)) { + struct allowedips_node *node = rcu_dereference_protected(old6, + lockdep_is_held(lock)); + + root_remove_peer_lists(node); + call_rcu(&node->rcu, root_free_rcu); + } +} + +int wg_allowedips_insert_v4(struct allowedips *table, const struct in_addr *ip, + u8 cidr, struct wg_peer *peer, struct mutex *lock) +{ + /* Aligned so it can be passed to fls */ + u8 key[4] __aligned(__alignof(u32)); + + ++table->seq; + swap_endian(key, (const u8 *)ip, 32); + return add(&table->root4, 32, key, cidr, peer, lock); +} + +int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip, + u8 cidr, struct wg_peer *peer, struct mutex *lock) +{ + /* Aligned so it can be passed to fls64 */ + u8 key[16] __aligned(__alignof(u64)); + + ++table->seq; + swap_endian(key, (const u8 *)ip, 128); + return add(&table->root6, 128, key, cidr, peer, lock); +} + +void wg_allowedips_remove_by_peer(struct allowedips *table, + struct wg_peer *peer, struct mutex *lock) +{ + ++table->seq; + walk_remove_by_peer(&table->root4, peer, lock); + walk_remove_by_peer(&table->root6, peer, lock); +} + +int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr) +{ + const unsigned int cidr_bytes = DIV_ROUND_UP(node->cidr, 8U); + swap_endian(ip, node->bits, node->bitlen); + memset(ip + cidr_bytes, 0, node->bitlen / 8U - cidr_bytes); + if (node->cidr) + ip[cidr_bytes - 1U] &= ~0U << (-node->cidr % 8U); + + *cidr = node->cidr; + return node->bitlen == 32 ? AF_INET : AF_INET6; +} + +/* Returns a strong reference to a peer */ +struct wg_peer *wg_allowedips_lookup_dst(struct allowedips *table, + struct sk_buff *skb) +{ + if (skb->protocol == htons(ETH_P_IP)) + return lookup(table->root4, 32, &ip_hdr(skb)->daddr); + else if (skb->protocol == htons(ETH_P_IPV6)) + return lookup(table->root6, 128, &ipv6_hdr(skb)->daddr); + return NULL; +} + +/* Returns a strong reference to a peer */ +struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table, + struct sk_buff *skb) +{ + if (skb->protocol == htons(ETH_P_IP)) + return lookup(table->root4, 32, &ip_hdr(skb)->saddr); + else if (skb->protocol == htons(ETH_P_IPV6)) + return lookup(table->root6, 128, &ipv6_hdr(skb)->saddr); + return NULL; +} + +#include "selftest/allowedips.c" diff --git a/drivers/net/wireguard/allowedips.h b/drivers/net/wireguard/allowedips.h new file mode 100644 index 000000000000..e5c83cafcef4 --- /dev/null +++ b/drivers/net/wireguard/allowedips.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#ifndef _WG_ALLOWEDIPS_H +#define _WG_ALLOWEDIPS_H + +#include <linux/mutex.h> +#include <linux/ip.h> +#include <linux/ipv6.h> + +struct wg_peer; + +struct allowedips_node { + struct wg_peer __rcu *peer; + struct allowedips_node __rcu *bit[2]; + /* While it may seem scandalous that we waste space for v4, + * we're alloc'ing to the nearest power of 2 anyway, so this + * doesn't actually make a difference. + */ + u8 bits[16] __aligned(__alignof(u64)); + u8 cidr, bit_at_a, bit_at_b, bitlen; + + /* Keep rarely used list at bottom to be beyond cache line. */ + union { + struct list_head peer_list; + struct rcu_head rcu; + }; +}; + +struct allowedips { + struct allowedips_node __rcu *root4; + struct allowedips_node __rcu *root6; + u64 seq; +}; + +void wg_allowedips_init(struct allowedips *table); +void wg_allowedips_free(struct allowedips *table, struct mutex *mutex); +int wg_allowedips_insert_v4(struct allowedips *table, const struct in_addr *ip, + u8 cidr, struct wg_peer *peer, struct mutex *lock); +int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip, + u8 cidr, struct wg_peer *peer, struct mutex *lock); +void wg_allowedips_remove_by_peer(struct allowedips *table, + struct wg_peer *peer, struct mutex *lock); +/* The ip input pointer should be __aligned(__alignof(u64))) */ +int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr); + +/* These return a strong reference to a peer: */ +struct wg_peer *wg_allowedips_lookup_dst(struct allowedips *table, + struct sk_buff *skb); +struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table, + struct sk_buff *skb); + +#ifdef DEBUG +bool wg_allowedips_selftest(void); +#endif + +#endif /* _WG_ALLOWEDIPS_H */ diff --git a/drivers/net/wireguard/cookie.c b/drivers/net/wireguard/cookie.c new file mode 100644 index 000000000000..4956f0499c19 --- /dev/null +++ b/drivers/net/wireguard/cookie.c @@ -0,0 +1,236 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include "cookie.h" +#include "peer.h" +#include "device.h" +#include "messages.h" +#include "ratelimiter.h" +#include "timers.h" + +#include <crypto/blake2s.h> +#include <crypto/chacha20poly1305.h> + +#include <net/ipv6.h> +#include <crypto/algapi.h> + +void wg_cookie_checker_init(struct cookie_checker *checker, + struct wg_device *wg) +{ + init_rwsem(&checker->secret_lock); + checker->secret_birthdate = ktime_get_coarse_boottime_ns(); + get_random_bytes(checker->secret, NOISE_HASH_LEN); + checker->device = wg; +} + +enum { COOKIE_KEY_LABEL_LEN = 8 }; +static const u8 mac1_key_label[COOKIE_KEY_LABEL_LEN] = "mac1----"; +static const u8 cookie_key_label[COOKIE_KEY_LABEL_LEN] = "cookie--"; + +static void precompute_key(u8 key[NOISE_SYMMETRIC_KEY_LEN], + const u8 pubkey[NOISE_PUBLIC_KEY_LEN], + const u8 label[COOKIE_KEY_LABEL_LEN]) +{ + struct blake2s_state blake; + + blake2s_init(&blake, NOISE_SYMMETRIC_KEY_LEN); + blake2s_update(&blake, label, COOKIE_KEY_LABEL_LEN); + blake2s_update(&blake, pubkey, NOISE_PUBLIC_KEY_LEN); + blake2s_final(&blake, key); +} + +/* Must hold peer->handshake.static_identity->lock */ +void wg_cookie_checker_precompute_device_keys(struct cookie_checker *checker) +{ + if (likely(checker->device->static_identity.has_identity)) { + precompute_key(checker->cookie_encryption_key, + checker->device->static_identity.static_public, + cookie_key_label); + precompute_key(checker->message_mac1_key, + checker->device->static_identity.static_public, + mac1_key_label); + } else { + memset(checker->cookie_encryption_key, 0, + NOISE_SYMMETRIC_KEY_LEN); + memset(checker->message_mac1_key, 0, NOISE_SYMMETRIC_KEY_LEN); + } +} + +void wg_cookie_checker_precompute_peer_keys(struct wg_peer *peer) +{ + precompute_key(peer->latest_cookie.cookie_decryption_key, + peer->handshake.remote_static, cookie_key_label); + precompute_key(peer->latest_cookie.message_mac1_key, + peer->handshake.remote_static, mac1_key_label); +} + +void wg_cookie_init(struct cookie *cookie) +{ + memset(cookie, 0, sizeof(*cookie)); + init_rwsem(&cookie->lock); +} + +static void compute_mac1(u8 mac1[COOKIE_LEN], const void *message, size_t len, + const u8 key[NOISE_SYMMETRIC_KEY_LEN]) +{ + len = len - sizeof(struct message_macs) + + offsetof(struct message_macs, mac1); + blake2s(mac1, message, key, COOKIE_LEN, len, NOISE_SYMMETRIC_KEY_LEN); +} + +static void compute_mac2(u8 mac2[COOKIE_LEN], const void *message, size_t len, + const u8 cookie[COOKIE_LEN]) +{ + len = len - sizeof(struct message_macs) + + offsetof(struct message_macs, mac2); + blake2s(mac2, message, cookie, COOKIE_LEN, len, COOKIE_LEN); +} + +static void make_cookie(u8 cookie[COOKIE_LEN], struct sk_buff *skb, + struct cookie_checker *checker) +{ + struct blake2s_state state; + + if (wg_birthdate_has_expired(checker->secret_birthdate, + COOKIE_SECRET_MAX_AGE)) { + down_write(&checker->secret_lock); + checker->secret_birthdate = ktime_get_coarse_boottime_ns(); + get_random_bytes(checker->secret, NOISE_HASH_LEN); + up_write(&checker->secret_lock); + } + + down_read(&checker->secret_lock); + + blake2s_init_key(&state, COOKIE_LEN, checker->secret, NOISE_HASH_LEN); + if (skb->protocol == htons(ETH_P_IP)) + blake2s_update(&state, (u8 *)&ip_hdr(skb)->saddr, + sizeof(struct in_addr)); + else if (skb->protocol == htons(ETH_P_IPV6)) + blake2s_update(&state, (u8 *)&ipv6_hdr(skb)->saddr, + sizeof(struct in6_addr)); + blake2s_update(&state, (u8 *)&udp_hdr(skb)->source, sizeof(__be16)); + blake2s_final(&state, cookie); + + up_read(&checker->secret_lock); +} + +enum cookie_mac_state wg_cookie_validate_packet(struct cookie_checker *checker, + struct sk_buff *skb, + bool check_cookie) +{ + struct message_macs *macs = (struct message_macs *) + (skb->data + skb->len - sizeof(*macs)); + enum cookie_mac_state ret; + u8 computed_mac[COOKIE_LEN]; + u8 cookie[COOKIE_LEN]; + + ret = INVALID_MAC; + compute_mac1(computed_mac, skb->data, skb->len, + checker->message_mac1_key); + if (crypto_memneq(computed_mac, macs->mac1, COOKIE_LEN)) + goto out; + + ret = VALID_MAC_BUT_NO_COOKIE; + + if (!check_cookie) + goto out; + + make_cookie(cookie, skb, checker); + + compute_mac2(computed_mac, skb->data, skb->len, cookie); + if (crypto_memneq(computed_mac, macs->mac2, COOKIE_LEN)) + goto out; + + ret = VALID_MAC_WITH_COOKIE_BUT_RATELIMITED; + if (!wg_ratelimiter_allow(skb, dev_net(checker->device->dev))) + goto out; + + ret = VALID_MAC_WITH_COOKIE; + +out: + return ret; +} + +void wg_cookie_add_mac_to_packet(void *message, size_t len, + struct wg_peer *peer) +{ + struct message_macs *macs = (struct message_macs *) + ((u8 *)message + len - sizeof(*macs)); + + down_write(&peer->latest_cookie.lock); + compute_mac1(macs->mac1, message, len, + peer->latest_cookie.message_mac1_key); + memcpy(peer->latest_cookie.last_mac1_sent, macs->mac1, COOKIE_LEN); + peer->latest_cookie.have_sent_mac1 = true; + up_write(&peer->latest_cookie.lock); + + down_read(&peer->latest_cookie.lock); + if (peer->latest_cookie.is_valid && + !wg_birthdate_has_expired(peer->latest_cookie.birthdate, + COOKIE_SECRET_MAX_AGE - COOKIE_SECRET_LATENCY)) + compute_mac2(macs->mac2, message, len, + peer->latest_cookie.cookie); + else + memset(macs->mac2, 0, COOKIE_LEN); + up_read(&peer->latest_cookie.lock); +} + +void wg_cookie_message_create(struct message_handshake_cookie *dst, + struct sk_buff *skb, __le32 index, + struct cookie_checker *checker) +{ + struct message_macs *macs = (struct message_macs *) + ((u8 *)skb->data + skb->len - sizeof(*macs)); + u8 cookie[COOKIE_LEN]; + + dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE); + dst->receiver_index = index; + get_random_bytes_wait(dst->nonce, COOKIE_NONCE_LEN); + + make_cookie(cookie, skb, checker); + xchacha20poly1305_encrypt(dst->encrypted_cookie, cookie, COOKIE_LEN, + macs->mac1, COOKIE_LEN, dst->nonce, + checker->cookie_encryption_key); +} + +void wg_cookie_message_consume(struct message_handshake_cookie *src, + struct wg_device *wg) +{ + struct wg_peer *peer = NULL; + u8 cookie[COOKIE_LEN]; + bool ret; + + if (unlikely(!wg_index_hashtable_lookup(wg->index_hashtable, + INDEX_HASHTABLE_HANDSHAKE | + INDEX_HASHTABLE_KEYPAIR, + src->receiver_index, &peer))) + return; + + down_read(&peer->latest_cookie.lock); + if (unlikely(!peer->latest_cookie.have_sent_mac1)) { + up_read(&peer->latest_cookie.lock); + goto out; + } + ret = xchacha20poly1305_decrypt( + cookie, src->encrypted_cookie, sizeof(src->encrypted_cookie), + peer->latest_cookie.last_mac1_sent, COOKIE_LEN, src->nonce, + peer->latest_cookie.cookie_decryption_key); + up_read(&peer->latest_cookie.lock); + + if (ret) { + down_write(&peer->latest_cookie.lock); + memcpy(peer->latest_cookie.cookie, cookie, COOKIE_LEN); + peer->latest_cookie.birthdate = ktime_get_coarse_boottime_ns(); + peer->latest_cookie.is_valid = true; + peer->latest_cookie.have_sent_mac1 = false; + up_write(&peer->latest_cookie.lock); + } else { + net_dbg_ratelimited("%s: Could not decrypt invalid cookie response\n", + wg->dev->name); + } + +out: + wg_peer_put(peer); +} diff --git a/drivers/net/wireguard/cookie.h b/drivers/net/wireguard/cookie.h new file mode 100644 index 000000000000..c4bd61ca03f2 --- /dev/null +++ b/drivers/net/wireguard/cookie.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#ifndef _WG_COOKIE_H +#define _WG_COOKIE_H + +#include "messages.h" +#include <linux/rwsem.h> + +struct wg_peer; + +struct cookie_checker { + u8 secret[NOISE_HASH_LEN]; + u8 cookie_encryption_key[NOISE_SYMMETRIC_KEY_LEN]; + u8 message_mac1_key[NOISE_SYMMETRIC_KEY_LEN]; + u64 secret_birthdate; + struct rw_semaphore secret_lock; + struct wg_device *device; +}; + +struct cookie { + u64 birthdate; + bool is_valid; + u8 cookie[COOKIE_LEN]; + bool have_sent_mac1; + u8 last_mac1_sent[COOKIE_LEN]; + u8 cookie_decryption_key[NOISE_SYMMETRIC_KEY_LEN]; + u8 message_mac1_key[NOISE_SYMMETRIC_KEY_LEN]; + struct rw_semaphore lock; +}; + +enum cookie_mac_state { + INVALID_MAC, + VALID_MAC_BUT_NO_COOKIE, + VALID_MAC_WITH_COOKIE_BUT_RATELIMITED, + VALID_MAC_WITH_COOKIE +}; + +void wg_cookie_checker_init(struct cookie_checker *checker, + struct wg_device *wg); +void wg_cookie_checker_precompute_device_keys(struct cookie_checker *checker); +void wg_cookie_checker_precompute_peer_keys(struct wg_peer *peer); +void wg_cookie_init(struct cookie *cookie); + +enum cookie_mac_state wg_cookie_validate_packet(struct cookie_checker *checker, + struct sk_buff *skb, + bool check_cookie); +void wg_cookie_add_mac_to_packet(void *message, size_t len, + struct wg_peer *peer); + +void wg_cookie_message_create(struct message_handshake_cookie *src, + struct sk_buff *skb, __le32 index, + struct cookie_checker *checker); +void wg_cookie_message_consume(struct message_handshake_cookie *src, + struct wg_device *wg); + +#endif /* _WG_COOKIE_H */ diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c new file mode 100644 index 000000000000..16b19824b9ad --- /dev/null +++ b/drivers/net/wireguard/device.c @@ -0,0 +1,458 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include "queueing.h" +#include "socket.h" +#include "timers.h" +#include "device.h" +#include "ratelimiter.h" +#include "peer.h" +#include "messages.h" + +#include <linux/module.h> +#include <linux/rtnetlink.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/inetdevice.h> +#include <linux/if_arp.h> +#include <linux/icmp.h> +#include <linux/suspend.h> +#include <net/icmp.h> +#include <net/rtnetlink.h> +#include <net/ip_tunnels.h> +#include <net/addrconf.h> + +static LIST_HEAD(device_list); + +static int wg_open(struct net_device *dev) +{ + struct in_device *dev_v4 = __in_dev_get_rtnl(dev); + struct inet6_dev *dev_v6 = __in6_dev_get(dev); + struct wg_device *wg = netdev_priv(dev); + struct wg_peer *peer; + int ret; + + if (dev_v4) { + /* At some point we might put this check near the ip_rt_send_ + * redirect call of ip_forward in net/ipv4/ip_forward.c, similar + * to the current secpath check. + */ + IN_DEV_CONF_SET(dev_v4, SEND_REDIRECTS, false); + IPV4_DEVCONF_ALL(dev_net(dev), SEND_REDIRECTS) = false; + } + if (dev_v6) + dev_v6->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_NONE; + + ret = wg_socket_init(wg, wg->incoming_port); + if (ret < 0) + return ret; + mutex_lock(&wg->device_update_lock); + list_for_each_entry(peer, &wg->peer_list, peer_list) { + wg_packet_send_staged_packets(peer); + if (peer->persistent_keepalive_interval) + wg_packet_send_keepalive(peer); + } + mutex_unlock(&wg->device_update_lock); + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int wg_pm_notification(struct notifier_block *nb, unsigned long action, + void *data) +{ + struct wg_device *wg; + struct wg_peer *peer; + + /* If the machine is constantly suspending and resuming, as part of + * its normal operation rather than as a somewhat rare event, then we + * don't actually want to clear keys. + */ + if (IS_ENABLED(CONFIG_PM_AUTOSLEEP) || IS_ENABLED(CONFIG_ANDROID)) + return 0; + + if (action != PM_HIBERNATION_PREPARE && action != PM_SUSPEND_PREPARE) + return 0; + + rtnl_lock(); + list_for_each_entry(wg, &device_list, device_list) { + mutex_lock(&wg->device_update_lock); + list_for_each_entry(peer, &wg->peer_list, peer_list) { + del_timer(&peer->timer_zero_key_material); + wg_noise_handshake_clear(&peer->handshake); + wg_noise_keypairs_clear(&peer->keypairs); + } + mutex_unlock(&wg->device_update_lock); + } + rtnl_unlock(); + rcu_barrier(); + return 0; +} + +static struct notifier_block pm_notifier = { .notifier_call = wg_pm_notification }; +#endif + +static int wg_stop(struct net_device *dev) +{ + struct wg_device *wg = netdev_priv(dev); + struct wg_peer *peer; + + mutex_lock(&wg->device_update_lock); + list_for_each_entry(peer, &wg->peer_list, peer_list) { + wg_packet_purge_staged_packets(peer); + wg_timers_stop(peer); + wg_noise_handshake_clear(&peer->handshake); + wg_noise_keypairs_clear(&peer->keypairs); + wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake); + } + mutex_unlock(&wg->device_update_lock); + skb_queue_purge(&wg->incoming_handshakes); + wg_socket_reinit(wg, NULL, NULL); + return 0; +} + +static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct wg_device *wg = netdev_priv(dev); + struct sk_buff_head packets; + struct wg_peer *peer; + struct sk_buff *next; + sa_family_t family; + u32 mtu; + int ret; + + if (unlikely(wg_skb_examine_untrusted_ip_hdr(skb) != skb->protocol)) { + ret = -EPROTONOSUPPORT; + net_dbg_ratelimited("%s: Invalid IP packet\n", dev->name); + goto err; + } + + peer = wg_allowedips_lookup_dst(&wg->peer_allowedips, skb); + if (unlikely(!peer)) { + ret = -ENOKEY; + if (skb->protocol == htons(ETH_P_IP)) + net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI4\n", + dev->name, &ip_hdr(skb)->daddr); + else if (skb->protocol == htons(ETH_P_IPV6)) + net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI6\n", + dev->name, &ipv6_hdr(skb)->daddr); + goto err; + } + + family = READ_ONCE(peer->endpoint.addr.sa_family); + if (unlikely(family != AF_INET && family != AF_INET6)) { + ret = -EDESTADDRREQ; + net_dbg_ratelimited("%s: No valid endpoint has been configured or discovered for peer %llu\n", + dev->name, peer->internal_id); + goto err_peer; + } + + mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; + + __skb_queue_head_init(&packets); + if (!skb_is_gso(skb)) { + skb_mark_not_on_list(skb); + } else { + struct sk_buff *segs = skb_gso_segment(skb, 0); + + if (unlikely(IS_ERR(segs))) { + ret = PTR_ERR(segs); + goto err_peer; + } + dev_kfree_skb(skb); + skb = segs; + } + + skb_list_walk_safe(skb, skb, next) { + skb_mark_not_on_list(skb); + + skb = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(!skb)) + continue; + + /* We only need to keep the original dst around for icmp, + * so at this point we're in a position to drop it. + */ + skb_dst_drop(skb); + + PACKET_CB(skb)->mtu = mtu; + + __skb_queue_tail(&packets, skb); + } + + spin_lock_bh(&peer->staged_packet_queue.lock); + /* If the queue is getting too big, we start removing the oldest packets + * until it's small again. We do this before adding the new packet, so + * we don't remove GSO segments that are in excess. + */ + while (skb_queue_len(&peer->staged_packet_queue) > MAX_STAGED_PACKETS) { + dev_kfree_skb(__skb_dequeue(&peer->staged_packet_queue)); + ++dev->stats.tx_dropped; + } + skb_queue_splice_tail(&packets, &peer->staged_packet_queue); + spin_unlock_bh(&peer->staged_packet_queue.lock); + + wg_packet_send_staged_packets(peer); + + wg_peer_put(peer); + return NETDEV_TX_OK; + +err_peer: + wg_peer_put(peer); +err: + ++dev->stats.tx_errors; + if (skb->protocol == htons(ETH_P_IP)) + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); + else if (skb->protocol == htons(ETH_P_IPV6)) + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); + kfree_skb(skb); + return ret; +} + +static const struct net_device_ops netdev_ops = { + .ndo_open = wg_open, + .ndo_stop = wg_stop, + .ndo_start_xmit = wg_xmit, + .ndo_get_stats64 = ip_tunnel_get_stats64 +}; + +static void wg_destruct(struct net_device *dev) +{ + struct wg_device *wg = netdev_priv(dev); + + rtnl_lock(); + list_del(&wg->device_list); + rtnl_unlock(); + mutex_lock(&wg->device_update_lock); + wg->incoming_port = 0; + wg_socket_reinit(wg, NULL, NULL); + /* The final references are cleared in the below calls to destroy_workqueue. */ + wg_peer_remove_all(wg); + destroy_workqueue(wg->handshake_receive_wq); + destroy_workqueue(wg->handshake_send_wq); + destroy_workqueue(wg->packet_crypt_wq); + wg_packet_queue_free(&wg->decrypt_queue, true); + wg_packet_queue_free(&wg->encrypt_queue, true); + rcu_barrier(); /* Wait for all the peers to be actually freed. */ + wg_ratelimiter_uninit(); + memzero_explicit(&wg->static_identity, sizeof(wg->static_identity)); + skb_queue_purge(&wg->incoming_handshakes); + free_percpu(dev->tstats); + free_percpu(wg->incoming_handshakes_worker); + if (wg->have_creating_net_ref) + put_net(wg->creating_net); + kvfree(wg->index_hashtable); + kvfree(wg->peer_hashtable); + mutex_unlock(&wg->device_update_lock); + + pr_debug("%s: Interface deleted\n", dev->name); + free_netdev(dev); +} + +static const struct device_type device_type = { .name = KBUILD_MODNAME }; + +static void wg_setup(struct net_device *dev) +{ + struct wg_device *wg = netdev_priv(dev); + enum { WG_NETDEV_FEATURES = NETIF_F_HW_CSUM | NETIF_F_RXCSUM | + NETIF_F_SG | NETIF_F_GSO | + NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA }; + + dev->netdev_ops = &netdev_ops; + dev->hard_header_len = 0; + dev->addr_len = 0; + dev->needed_headroom = DATA_PACKET_HEAD_ROOM; + dev->needed_tailroom = noise_encrypted_len(MESSAGE_PADDING_MULTIPLE); + dev->type = ARPHRD_NONE; + dev->flags = IFF_POINTOPOINT | IFF_NOARP; + dev->priv_flags |= IFF_NO_QUEUE; + dev->features |= NETIF_F_LLTX; + dev->features |= WG_NETDEV_FEATURES; + dev->hw_features |= WG_NETDEV_FEATURES; + dev->hw_enc_features |= WG_NETDEV_FEATURES; + dev->mtu = ETH_DATA_LEN - MESSAGE_MINIMUM_LENGTH - + sizeof(struct udphdr) - + max(sizeof(struct ipv6hdr), sizeof(struct iphdr)); + + SET_NETDEV_DEVTYPE(dev, &device_type); + + /* We need to keep the dst around in case of icmp replies. */ + netif_keep_dst(dev); + + memset(wg, 0, sizeof(*wg)); + wg->dev = dev; +} + +static int wg_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct wg_device *wg = netdev_priv(dev); + int ret = -ENOMEM; + + wg->creating_net = src_net; + init_rwsem(&wg->static_identity.lock); + mutex_init(&wg->socket_update_lock); + mutex_init(&wg->device_update_lock); + skb_queue_head_init(&wg->incoming_handshakes); + wg_allowedips_init(&wg->peer_allowedips); + wg_cookie_checker_init(&wg->cookie_checker, wg); + INIT_LIST_HEAD(&wg->peer_list); + wg->device_update_gen = 1; + + wg->peer_hashtable = wg_pubkey_hashtable_alloc(); + if (!wg->peer_hashtable) + return ret; + + wg->index_hashtable = wg_index_hashtable_alloc(); + if (!wg->index_hashtable) + goto err_free_peer_hashtable; + + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!dev->tstats) + goto err_free_index_hashtable; + + wg->incoming_handshakes_worker = + wg_packet_percpu_multicore_worker_alloc( + wg_packet_handshake_receive_worker, wg); + if (!wg->incoming_handshakes_worker) + goto err_free_tstats; + + wg->handshake_receive_wq = alloc_workqueue("wg-kex-%s", + WQ_CPU_INTENSIVE | WQ_FREEZABLE, 0, dev->name); + if (!wg->handshake_receive_wq) + goto err_free_incoming_handshakes; + + wg->handshake_send_wq = alloc_workqueue("wg-kex-%s", + WQ_UNBOUND | WQ_FREEZABLE, 0, dev->name); + if (!wg->handshake_send_wq) + goto err_destroy_handshake_receive; + + wg->packet_crypt_wq = alloc_workqueue("wg-crypt-%s", + WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 0, dev->name); + if (!wg->packet_crypt_wq) + goto err_destroy_handshake_send; + + ret = wg_packet_queue_init(&wg->encrypt_queue, wg_packet_encrypt_worker, + true, MAX_QUEUED_PACKETS); + if (ret < 0) + goto err_destroy_packet_crypt; + + ret = wg_packet_queue_init(&wg->decrypt_queue, wg_packet_decrypt_worker, + true, MAX_QUEUED_PACKETS); + if (ret < 0) + goto err_free_encrypt_queue; + + ret = wg_ratelimiter_init(); + if (ret < 0) + goto err_free_decrypt_queue; + + ret = register_netdevice(dev); + if (ret < 0) + goto err_uninit_ratelimiter; + + list_add(&wg->device_list, &device_list); + + /* We wait until the end to assign priv_destructor, so that + * register_netdevice doesn't call it for us if it fails. + */ + dev->priv_destructor = wg_destruct; + + pr_debug("%s: Interface created\n", dev->name); + return ret; + +err_uninit_ratelimiter: + wg_ratelimiter_uninit(); +err_free_decrypt_queue: + wg_packet_queue_free(&wg->decrypt_queue, true); +err_free_encrypt_queue: + wg_packet_queue_free(&wg->encrypt_queue, true); +err_destroy_packet_crypt: + destroy_workqueue(wg->packet_crypt_wq); +err_destroy_handshake_send: + destroy_workqueue(wg->handshake_send_wq); +err_destroy_handshake_receive: + destroy_workqueue(wg->handshake_receive_wq); +err_free_incoming_handshakes: + free_percpu(wg->incoming_handshakes_worker); +err_free_tstats: + free_percpu(dev->tstats); +err_free_index_hashtable: + kvfree(wg->index_hashtable); +err_free_peer_hashtable: + kvfree(wg->peer_hashtable); + return ret; +} + +static struct rtnl_link_ops link_ops __read_mostly = { + .kind = KBUILD_MODNAME, + .priv_size = sizeof(struct wg_device), + .setup = wg_setup, + .newlink = wg_newlink, +}; + +static int wg_netdevice_notification(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct net_device *dev = ((struct netdev_notifier_info *)data)->dev; + struct wg_device *wg = netdev_priv(dev); + + ASSERT_RTNL(); + + if (action != NETDEV_REGISTER || dev->netdev_ops != &netdev_ops) + return 0; + + if (dev_net(dev) == wg->creating_net && wg->have_creating_net_ref) { + put_net(wg->creating_net); + wg->have_creating_net_ref = false; + } else if (dev_net(dev) != wg->creating_net && + !wg->have_creating_net_ref) { + wg->have_creating_net_ref = true; + get_net(wg->creating_net); + } + return 0; +} + +static struct notifier_block netdevice_notifier = { + .notifier_call = wg_netdevice_notification +}; + +int __init wg_device_init(void) +{ + int ret; + +#ifdef CONFIG_PM_SLEEP + ret = register_pm_notifier(&pm_notifier); + if (ret) + return ret; +#endif + + ret = register_netdevice_notifier(&netdevice_notifier); + if (ret) + goto error_pm; + + ret = rtnl_link_register(&link_ops); + if (ret) + goto error_netdevice; + + return 0; + +error_netdevice: + unregister_netdevice_notifier(&netdevice_notifier); +error_pm: +#ifdef CONFIG_PM_SLEEP + unregister_pm_notifier(&pm_notifier); +#endif + return ret; +} + +void wg_device_uninit(void) +{ + rtnl_link_unregister(&link_ops); + unregister_netdevice_notifier(&netdevice_notifier); +#ifdef CONFIG_PM_SLEEP + unregister_pm_notifier(&pm_notifier); +#endif + rcu_barrier(); +} diff --git a/drivers/net/wireguard/device.h b/drivers/net/wireguard/device.h new file mode 100644 index 000000000000..c91f3051c5c7 --- /dev/null +++ b/drivers/net/wireguard/device.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#ifndef _WG_DEVICE_H +#define _WG_DEVICE_H + +#include "noise.h" +#include "allowedips.h" +#include "peerlookup.h" +#include "cookie.h" + +#include <linux/types.h> +#include <linux/netdevice.h> +#include <linux/workqueue.h> +#include <linux/mutex.h> +#include <linux/net.h> +#include <linux/ptr_ring.h> + +struct wg_device; + +struct multicore_worker { + void *ptr; + struct work_struct work; +}; + +struct crypt_queue { + struct ptr_ring ring; + union { + struct { + struct multicore_worker __percpu *worker; + int last_cpu; + }; + struct work_struct work; + }; +}; + +struct wg_device { + struct net_device *dev; + struct crypt_queue encrypt_queue, decrypt_queue; + struct sock __rcu *sock4, *sock6; + struct net *creating_net; + struct noise_static_identity static_identity; + struct workqueue_struct *handshake_receive_wq, *handshake_send_wq; + struct workqueue_struct *packet_crypt_wq; + struct sk_buff_head incoming_handshakes; + int incoming_handshake_cpu; + struct multicore_worker __percpu *incoming_handshakes_worker; + struct cookie_checker cookie_checker; + struct pubkey_hashtable *peer_hashtable; + struct index_hashtable *index_hashtable; + struct allowedips peer_allowedips; + struct mutex device_update_lock, socket_update_lock; + struct list_head device_list, peer_list; + unsigned int num_peers, device_update_gen; + u32 fwmark; + u16 incoming_port; + bool have_creating_net_ref; +}; + +int wg_device_init(void); +void wg_device_uninit(void); + +/* Later after the dust settles, this can be moved into include/linux/skbuff.h, + * where virtually all code that deals with GSO segs can benefit, around ~30 + * drivers as of writing. + */ +#define skb_list_walk_safe(first, skb, next) \ + for (skb = first, next = skb->next; skb; \ + skb = next, next = skb ? skb->next : NULL) + +#endif /* _WG_DEVICE_H */ diff --git a/drivers/net/wireguard/main.c b/drivers/net/wireguard/main.c new file mode 100644 index 000000000000..7a7d5f1a80fc --- /dev/null +++ b/drivers/net/wireguard/main.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include "version.h" +#include "device.h" +#include "noise.h" +#include "queueing.h" +#include "ratelimiter.h" +#include "netlink.h" + +#include <uapi/linux/wireguard.h> + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/genetlink.h> +#include <net/rtnetlink.h> + +static int __init mod_init(void) +{ + int ret; + +#ifdef DEBUG + if (!wg_allowedips_selftest() || !wg_packet_counter_selftest() || + !wg_ratelimiter_selftest()) + return -ENOTRECOVERABLE; +#endif + wg_noise_init(); + + ret = wg_device_init(); + if (ret < 0) + goto err_device; + + ret = wg_genetlink_init(); + if (ret < 0) + goto err_netlink; + + pr_info("WireGuard " WIREGUARD_VERSION " loaded. See www.wireguard.com for information.\n"); + pr_info("Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.\n"); + + return 0; + +err_netlink: + wg_device_uninit(); +err_device: + return ret; +} + +static void __exit mod_exit(void) +{ + wg_genetlink_uninit(); + wg_device_uninit(); +} + +module_init(mod_init); +module_exit(mod_exit); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("WireGuard secure network tunnel"); +MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>"); +MODULE_VERSION(WIREGUARD_VERSION); +MODULE_ALIAS_RTNL_LINK(KBUILD_MODNAME); +MODULE_ALIAS_GENL_FAMILY(WG_GENL_NAME); diff --git a/drivers/net/wireguard/messages.h b/drivers/net/wireguard/messages.h new file mode 100644 index 000000000000..b8a7b9ce32ba --- /dev/null +++ b/drivers/net/wireguard/messages.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#ifndef _WG_MESSAGES_H +#define _WG_MESSAGES_H + +#include <crypto/curve25519.h> +#include <crypto/chacha20poly1305.h> +#include <crypto/blake2s.h> + +#include <linux/kernel.h> +#include <linux/param.h> +#include <linux/skbuff.h> + +enum noise_lengths { + NOISE_PUBLIC_KEY_LEN = CURVE25519_KEY_SIZE, + NOISE_SYMMETRIC_KEY_LEN = CHACHA20POLY1305_KEY_SIZE, + NOISE_TIMESTAMP_LEN = sizeof(u64) + sizeof(u32), + NOISE_AUTHTAG_LEN = CHACHA20POLY1305_AUTHTAG_SIZE, + NOISE_HASH_LEN = BLAKE2S_HASH_SIZE +}; + +#define noise_encrypted_len(plain_len) ((plain_len) + NOISE_AUTHTAG_LEN) + +enum cookie_values { + COOKIE_SECRET_MAX_AGE = 2 * 60, + COOKIE_SECRET_LATENCY = 5, + COOKIE_NONCE_LEN = XCHACHA20POLY1305_NONCE_SIZE, + COOKIE_LEN = 16 +}; + +enum counter_values { + COUNTER_BITS_TOTAL = 2048, + COUNTER_REDUNDANT_BITS = BITS_PER_LONG, + COUNTER_WINDOW_SIZE = COUNTER_BITS_TOTAL - COUNTER_REDUNDANT_BITS +}; + +enum limits { + REKEY_AFTER_MESSAGES = 1ULL << 60, + REJECT_AFTER_MESSAGES = U64_MAX - COUNTER_WINDOW_SIZE - 1, + REKEY_TIMEOUT = 5, + REKEY_TIMEOUT_JITTER_MAX_JIFFIES = HZ / 3, + REKEY_AFTER_TIME = 120, + REJECT_AFTER_TIME = 180, + INITIATIONS_PER_SECOND = 50, + MAX_PEERS_PER_DEVICE = 1U << 20, + KEEPALIVE_TIMEOUT = 10, + MAX_TIMER_HANDSHAKES = 90 / REKEY_TIMEOUT, + MAX_QUEUED_INCOMING_HANDSHAKES = 4096, /* TODO: replace this with DQL */ + MAX_STAGED_PACKETS = 128, + MAX_QUEUED_PACKETS = 1024 /* TODO: replace this with DQL */ +}; + +enum message_type { + MESSAGE_INVALID = 0, + MESSAGE_HANDSHAKE_INITIATION = 1, + MESSAGE_HANDSHAKE_RESPONSE = 2, + MESSAGE_HANDSHAKE_COOKIE = 3, + MESSAGE_DATA = 4 +}; + +struct message_header { + /* The actual layout of this that we want is: + * u8 type + * u8 reserved_zero[3] + * + * But it turns out that by encoding this as little endian, + * we achieve the same thing, and it makes checking faster. + */ + __le32 type; +}; + +struct message_macs { + u8 mac1[COOKIE_LEN]; + u8 mac2[COOKIE_LEN]; +}; + +struct message_handshake_initiation { + struct message_header header; + __le32 sender_index; + u8 unencrypted_ephemeral[NOISE_PUBLIC_KEY_LEN]; + u8 encrypted_static[noise_encrypted_len(NOISE_PUBLIC_KEY_LEN)]; + u8 encrypted_timestamp[noise_encrypted_len(NOISE_TIMESTAMP_LEN)]; + struct message_macs macs; +}; + +struct message_handshake_response { + struct message_header header; + __le32 sender_index; + __le32 receiver_index; + u8 unencrypted_ephemeral[NOISE_PUBLIC_KEY_LEN]; + u8 encrypted_nothing[noise_encrypted_len(0)]; + struct message_macs macs; +}; + +struct message_handshake_cookie { + struct message_header header; + __le32 receiver_index; + u8 nonce[COOKIE_NONCE_LEN]; + u8 encrypted_cookie[noise_encrypted_len(COOKIE_LEN)]; +}; + +struct message_data { + struct message_header header; + __le32 key_idx; + __le64 counter; + u8 encrypted_data[]; +}; + +#define message_data_len(plain_len) \ + (noise_encrypted_len(plain_len) + sizeof(struct message_data)) + +enum message_alignments { + MESSAGE_PADDING_MULTIPLE = 16, + MESSAGE_MINIMUM_LENGTH = message_data_len(0) +}; + +#define SKB_HEADER_LEN \ + (max(sizeof(struct iphdr), sizeof(struct ipv6hdr)) + \ + sizeof(struct udphdr) + NET_SKB_PAD) +#define DATA_PACKET_HEAD_ROOM \ + ALIGN(sizeof(struct message_data) + SKB_HEADER_LEN, 4) + +enum { HANDSHAKE_DSCP = 0x88 /* AF41, plus 00 ECN */ }; + +#endif /* _WG_MESSAGES_H */ diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c new file mode 100644 index 000000000000..0fdbd1c45977 --- /dev/null +++ b/drivers/net/wireguard/netlink.c @@ -0,0 +1,642 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include "netlink.h" +#include "device.h" +#include "peer.h" +#include "socket.h" +#include "queueing.h" +#include "messages.h" + +#include <uapi/linux/wireguard.h> + +#include <linux/if.h> +#include <net/genetlink.h> +#include <net/sock.h> +#include <crypto/algapi.h> + +static struct genl_family genl_family; + +static const struct nla_policy device_policy[WGDEVICE_A_MAX + 1] = { + [WGDEVICE_A_IFINDEX] = { .type = NLA_U32 }, + [WGDEVICE_A_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, + [WGDEVICE_A_PRIVATE_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN }, + [WGDEVICE_A_PUBLIC_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN }, + [WGDEVICE_A_FLAGS] = { .type = NLA_U32 }, + [WGDEVICE_A_LISTEN_PORT] = { .type = NLA_U16 }, + [WGDEVICE_A_FWMARK] = { .type = NLA_U32 }, + [WGDEVICE_A_PEERS] = { .type = NLA_NESTED } +}; + +static const struct nla_policy peer_policy[WGPEER_A_MAX + 1] = { + [WGPEER_A_PUBLIC_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN }, + [WGPEER_A_PRESHARED_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_SYMMETRIC_KEY_LEN }, + [WGPEER_A_FLAGS] = { .type = NLA_U32 }, + [WGPEER_A_ENDPOINT] = { .type = NLA_MIN_LEN, .len = sizeof(struct sockaddr) }, + [WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL] = { .type = NLA_U16 }, + [WGPEER_A_LAST_HANDSHAKE_TIME] = { .type = NLA_EXACT_LEN, .len = sizeof(struct __kernel_timespec) }, + [WGPEER_A_RX_BYTES] = { .type = NLA_U64 }, + [WGPEER_A_TX_BYTES] = { .type = NLA_U64 }, + [WGPEER_A_ALLOWEDIPS] = { .type = NLA_NESTED }, + [WGPEER_A_PROTOCOL_VERSION] = { .type = NLA_U32 } +}; + +static const struct nla_policy allowedip_policy[WGALLOWEDIP_A_MAX + 1] = { + [WGALLOWEDIP_A_FAMILY] = { .type = NLA_U16 }, + [WGALLOWEDIP_A_IPADDR] = { .type = NLA_MIN_LEN, .len = sizeof(struct in_addr) }, + [WGALLOWEDIP_A_CIDR_MASK] = { .type = NLA_U8 } +}; + +static struct wg_device *lookup_interface(struct nlattr **attrs, + struct sk_buff *skb) +{ + struct net_device *dev = NULL; + + if (!attrs[WGDEVICE_A_IFINDEX] == !attrs[WGDEVICE_A_IFNAME]) + return ERR_PTR(-EBADR); + if (attrs[WGDEVICE_A_IFINDEX]) + dev = dev_get_by_index(sock_net(skb->sk), + nla_get_u32(attrs[WGDEVICE_A_IFINDEX])); + else if (attrs[WGDEVICE_A_IFNAME]) + dev = dev_get_by_name(sock_net(skb->sk), + nla_data(attrs[WGDEVICE_A_IFNAME])); + if (!dev) + return ERR_PTR(-ENODEV); + if (!dev->rtnl_link_ops || !dev->rtnl_link_ops->kind || + strcmp(dev->rtnl_link_ops->kind, KBUILD_MODNAME)) { + dev_put(dev); + return ERR_PTR(-EOPNOTSUPP); + } + return netdev_priv(dev); +} + +static int get_allowedips(struct sk_buff *skb, const u8 *ip, u8 cidr, + int family) +{ + struct nlattr *allowedip_nest; + + allowedip_nest = nla_nest_start(skb, 0); + if (!allowedip_nest) + return -EMSGSIZE; + + if (nla_put_u8(skb, WGALLOWEDIP_A_CIDR_MASK, cidr) || + nla_put_u16(skb, WGALLOWEDIP_A_FAMILY, family) || + nla_put(skb, WGALLOWEDIP_A_IPADDR, family == AF_INET6 ? + sizeof(struct in6_addr) : sizeof(struct in_addr), ip)) { + nla_nest_cancel(skb, allowedip_nest); + return -EMSGSIZE; + } + + nla_nest_end(skb, allowedip_nest); + return 0; +} + +struct dump_ctx { + struct wg_device *wg; + struct wg_peer *next_peer; + u64 allowedips_seq; + struct allowedips_node *next_allowedip; +}; + +#define DUMP_CTX(cb) ((struct dump_ctx *)(cb)->args) + +static int +get_peer(struct wg_peer *peer, struct sk_buff *skb, struct dump_ctx *ctx) +{ + + struct nlattr *allowedips_nest, *peer_nest = nla_nest_start(skb, 0); + struct allowedips_node *allowedips_node = ctx->next_allowedip; + bool fail; + + if (!peer_nest) + return -EMSGSIZE; + + down_read(&peer->handshake.lock); + fail = nla_put(skb, WGPEER_A_PUBLIC_KEY, NOISE_PUBLIC_KEY_LEN, + peer->handshake.remote_static); + up_read(&peer->handshake.lock); + if (fail) + goto err; + + if (!allowedips_node) { + const struct __kernel_timespec last_handshake = { + .tv_sec = peer->walltime_last_handshake.tv_sec, + .tv_nsec = peer->walltime_last_handshake.tv_nsec + }; + + down_read(&peer->handshake.lock); + fail = nla_put(skb, WGPEER_A_PRESHARED_KEY, + NOISE_SYMMETRIC_KEY_LEN, + peer->handshake.preshared_key); + up_read(&peer->handshake.lock); + if (fail) + goto err; + + if (nla_put(skb, WGPEER_A_LAST_HANDSHAKE_TIME, + sizeof(last_handshake), &last_handshake) || + nla_put_u16(skb, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, + peer->persistent_keepalive_interval) || + nla_put_u64_64bit(skb, WGPEER_A_TX_BYTES, peer->tx_bytes, + WGPEER_A_UNSPEC) || + nla_put_u64_64bit(skb, WGPEER_A_RX_BYTES, peer->rx_bytes, + WGPEER_A_UNSPEC) || + nla_put_u32(skb, WGPEER_A_PROTOCOL_VERSION, 1)) + goto err; + + read_lock_bh(&peer->endpoint_lock); + if (peer->endpoint.addr.sa_family == AF_INET) + fail = nla_put(skb, WGPEER_A_ENDPOINT, + sizeof(peer->endpoint.addr4), + &peer->endpoint.addr4); + else if (peer->endpoint.addr.sa_family == AF_INET6) + fail = nla_put(skb, WGPEER_A_ENDPOINT, + sizeof(peer->endpoint.addr6), + &peer->endpoint.addr6); + read_unlock_bh(&peer->endpoint_lock); + if (fail) + goto err; + allowedips_node = + list_first_entry_or_null(&peer->allowedips_list, + struct allowedips_node, peer_list); + } + if (!allowedips_node) + goto no_allowedips; + if (!ctx->allowedips_seq) + ctx->allowedips_seq = peer->device->peer_allowedips.seq; + else if (ctx->allowedips_seq != peer->device->peer_allowedips.seq) + goto no_allowedips; + + allowedips_nest = nla_nest_start(skb, WGPEER_A_ALLOWEDIPS); + if (!allowedips_nest) + goto err; + + list_for_each_entry_from(allowedips_node, &peer->allowedips_list, + peer_list) { + u8 cidr, ip[16] __aligned(__alignof(u64)); + int family; + + family = wg_allowedips_read_node(allowedips_node, ip, &cidr); + if (get_allowedips(skb, ip, cidr, family)) { + nla_nest_end(skb, allowedips_nest); + nla_nest_end(skb, peer_nest); + ctx->next_allowedip = allowedips_node; + return -EMSGSIZE; + } + } + nla_nest_end(skb, allowedips_nest); +no_allowedips: + nla_nest_end(skb, peer_nest); + ctx->next_allowedip = NULL; + ctx->allowedips_seq = 0; + return 0; +err: + nla_nest_cancel(skb, peer_nest); + return -EMSGSIZE; +} + +static int wg_get_device_start(struct netlink_callback *cb) +{ + struct wg_device *wg; + + wg = lookup_interface(genl_dumpit_info(cb)->attrs, cb->skb); + if (IS_ERR(wg)) + return PTR_ERR(wg); + DUMP_CTX(cb)->wg = wg; + return 0; +} + +static int wg_get_device_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct wg_peer *peer, *next_peer_cursor; + struct dump_ctx *ctx = DUMP_CTX(cb); + struct wg_device *wg = ctx->wg; + struct nlattr *peers_nest; + int ret = -EMSGSIZE; + bool done = true; + void *hdr; + + rtnl_lock(); + mutex_lock(&wg->device_update_lock); + cb->seq = wg->device_update_gen; + next_peer_cursor = ctx->next_peer; + + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + &genl_family, NLM_F_MULTI, WG_CMD_GET_DEVICE); + if (!hdr) + goto out; + genl_dump_check_consistent(cb, hdr); + + if (!ctx->next_peer) { + if (nla_put_u16(skb, WGDEVICE_A_LISTEN_PORT, + wg->incoming_port) || + nla_put_u32(skb, WGDEVICE_A_FWMARK, wg->fwmark) || + nla_put_u32(skb, WGDEVICE_A_IFINDEX, wg->dev->ifindex) || + nla_put_string(skb, WGDEVICE_A_IFNAME, wg->dev->name)) + goto out; + + down_read(&wg->static_identity.lock); + if (wg->static_identity.has_identity) { + if (nla_put(skb, WGDEVICE_A_PRIVATE_KEY, + NOISE_PUBLIC_KEY_LEN, + wg->static_identity.static_private) || + nla_put(skb, WGDEVICE_A_PUBLIC_KEY, + NOISE_PUBLIC_KEY_LEN, + wg->static_identity.static_public)) { + up_read(&wg->static_identity.lock); + goto out; + } + } + up_read(&wg->static_identity.lock); + } + + peers_nest = nla_nest_start(skb, WGDEVICE_A_PEERS); + if (!peers_nest) + goto out; + ret = 0; + /* If the last cursor was removed via list_del_init in peer_remove, then + * we just treat this the same as there being no more peers left. The + * reason is that seq_nr should indicate to userspace that this isn't a + * coherent dump anyway, so they'll try again. + */ + if (list_empty(&wg->peer_list) || + (ctx->next_peer && list_empty(&ctx->next_peer->peer_list))) { + nla_nest_cancel(skb, peers_nest); + goto out; + } + lockdep_assert_held(&wg->device_update_lock); + peer = list_prepare_entry(ctx->next_peer, &wg->peer_list, peer_list); + list_for_each_entry_continue(peer, &wg->peer_list, peer_list) { + if (get_peer(peer, skb, ctx)) { + done = false; + break; + } + next_peer_cursor = peer; + } + nla_nest_end(skb, peers_nest); + +out: + if (!ret && !done && next_peer_cursor) + wg_peer_get(next_peer_cursor); + wg_peer_put(ctx->next_peer); + mutex_unlock(&wg->device_update_lock); + rtnl_unlock(); + + if (ret) { + genlmsg_cancel(skb, hdr); + return ret; + } + genlmsg_end(skb, hdr); + if (done) { + ctx->next_peer = NULL; + return 0; + } + ctx->next_peer = next_peer_cursor; + return skb->len; + + /* At this point, we can't really deal ourselves with safely zeroing out + * the private key material after usage. This will need an additional API + * in the kernel for marking skbs as zero_on_free. + */ +} + +static int wg_get_device_done(struct netlink_callback *cb) +{ + struct dump_ctx *ctx = DUMP_CTX(cb); + + if (ctx->wg) + dev_put(ctx->wg->dev); + wg_peer_put(ctx->next_peer); + return 0; +} + +static int set_port(struct wg_device *wg, u16 port) +{ + struct wg_peer *peer; + + if (wg->incoming_port == port) + return 0; + list_for_each_entry(peer, &wg->peer_list, peer_list) + wg_socket_clear_peer_endpoint_src(peer); + if (!netif_running(wg->dev)) { + wg->incoming_port = port; + return 0; + } + return wg_socket_init(wg, port); +} + +static int set_allowedip(struct wg_peer *peer, struct nlattr **attrs) +{ + int ret = -EINVAL; + u16 family; + u8 cidr; + + if (!attrs[WGALLOWEDIP_A_FAMILY] || !attrs[WGALLOWEDIP_A_IPADDR] || + !attrs[WGALLOWEDIP_A_CIDR_MASK]) + return ret; + family = nla_get_u16(attrs[WGALLOWEDIP_A_FAMILY]); + cidr = nla_get_u8(attrs[WGALLOWEDIP_A_CIDR_MASK]); + + if (family == AF_INET && cidr <= 32 && + nla_len(attrs[WGALLOWEDIP_A_IPADDR]) == sizeof(struct in_addr)) + ret = wg_allowedips_insert_v4( + &peer->device->peer_allowedips, + nla_data(attrs[WGALLOWEDIP_A_IPADDR]), cidr, peer, + &peer->device->device_update_lock); + else if (family == AF_INET6 && cidr <= 128 && + nla_len(attrs[WGALLOWEDIP_A_IPADDR]) == sizeof(struct in6_addr)) + ret = wg_allowedips_insert_v6( + &peer->device->peer_allowedips, + nla_data(attrs[WGALLOWEDIP_A_IPADDR]), cidr, peer, + &peer->device->device_update_lock); + + return ret; +} + +static int set_peer(struct wg_device *wg, struct nlattr **attrs) +{ + u8 *public_key = NULL, *preshared_key = NULL; + struct wg_peer *peer = NULL; + u32 flags = 0; + int ret; + + ret = -EINVAL; + if (attrs[WGPEER_A_PUBLIC_KEY] && + nla_len(attrs[WGPEER_A_PUBLIC_KEY]) == NOISE_PUBLIC_KEY_LEN) + public_key = nla_data(attrs[WGPEER_A_PUBLIC_KEY]); + else + goto out; + if (attrs[WGPEER_A_PRESHARED_KEY] && + nla_len(attrs[WGPEER_A_PRESHARED_KEY]) == NOISE_SYMMETRIC_KEY_LEN) + preshared_key = nla_data(attrs[WGPEER_A_PRESHARED_KEY]); + + if (attrs[WGPEER_A_FLAGS]) + flags = nla_get_u32(attrs[WGPEER_A_FLAGS]); + ret = -EOPNOTSUPP; + if (flags & ~__WGPEER_F_ALL) + goto out; + + ret = -EPFNOSUPPORT; + if (attrs[WGPEER_A_PROTOCOL_VERSION]) { + if (nla_get_u32(attrs[WGPEER_A_PROTOCOL_VERSION]) != 1) + goto out; + } + + peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable, + nla_data(attrs[WGPEER_A_PUBLIC_KEY])); + ret = 0; + if (!peer) { /* Peer doesn't exist yet. Add a new one. */ + if (flags & (WGPEER_F_REMOVE_ME | WGPEER_F_UPDATE_ONLY)) + goto out; + + /* The peer is new, so there aren't allowed IPs to remove. */ + flags &= ~WGPEER_F_REPLACE_ALLOWEDIPS; + + down_read(&wg->static_identity.lock); + if (wg->static_identity.has_identity && + !memcmp(nla_data(attrs[WGPEER_A_PUBLIC_KEY]), + wg->static_identity.static_public, + NOISE_PUBLIC_KEY_LEN)) { + /* We silently ignore peers that have the same public + * key as the device. The reason we do it silently is + * that we'd like for people to be able to reuse the + * same set of API calls across peers. + */ + up_read(&wg->static_identity.lock); + ret = 0; + goto out; + } + up_read(&wg->static_identity.lock); + + peer = wg_peer_create(wg, public_key, preshared_key); + if (IS_ERR(peer)) { + /* Similar to the above, if the key is invalid, we skip + * it without fanfare, so that services don't need to + * worry about doing key validation themselves. + */ + ret = PTR_ERR(peer) == -EKEYREJECTED ? 0 : PTR_ERR(peer); + peer = NULL; + goto out; + } + /* Take additional reference, as though we've just been + * looked up. + */ + wg_peer_get(peer); + } + + if (flags & WGPEER_F_REMOVE_ME) { + wg_peer_remove(peer); + goto out; + } + + if (preshared_key) { + down_write(&peer->handshake.lock); + memcpy(&peer->handshake.preshared_key, preshared_key, + NOISE_SYMMETRIC_KEY_LEN); + up_write(&peer->handshake.lock); + } + + if (attrs[WGPEER_A_ENDPOINT]) { + struct sockaddr *addr = nla_data(attrs[WGPEER_A_ENDPOINT]); + size_t len = nla_len(attrs[WGPEER_A_ENDPOINT]); + + if ((len == sizeof(struct sockaddr_in) && + addr->sa_family == AF_INET) || + (len == sizeof(struct sockaddr_in6) && + addr->sa_family == AF_INET6)) { + struct endpoint endpoint = { { { 0 } } }; + + memcpy(&endpoint.addr, addr, len); + wg_socket_set_peer_endpoint(peer, &endpoint); + } + } + + if (flags & WGPEER_F_REPLACE_ALLOWEDIPS) + wg_allowedips_remove_by_peer(&wg->peer_allowedips, peer, + &wg->device_update_lock); + + if (attrs[WGPEER_A_ALLOWEDIPS]) { + struct nlattr *attr, *allowedip[WGALLOWEDIP_A_MAX + 1]; + int rem; + + nla_for_each_nested(attr, attrs[WGPEER_A_ALLOWEDIPS], rem) { + ret = nla_parse_nested(allowedip, WGALLOWEDIP_A_MAX, + attr, allowedip_policy, NULL); + if (ret < 0) + goto out; + ret = set_allowedip(peer, allowedip); + if (ret < 0) + goto out; + } + } + + if (attrs[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL]) { + const u16 persistent_keepalive_interval = nla_get_u16( + attrs[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL]); + const bool send_keepalive = + !peer->persistent_keepalive_interval && + persistent_keepalive_interval && + netif_running(wg->dev); + + peer->persistent_keepalive_interval = persistent_keepalive_interval; + if (send_keepalive) + wg_packet_send_keepalive(peer); + } + + if (netif_running(wg->dev)) + wg_packet_send_staged_packets(peer); + +out: + wg_peer_put(peer); + if (attrs[WGPEER_A_PRESHARED_KEY]) + memzero_explicit(nla_data(attrs[WGPEER_A_PRESHARED_KEY]), + nla_len(attrs[WGPEER_A_PRESHARED_KEY])); + return ret; +} + +static int wg_set_device(struct sk_buff *skb, struct genl_info *info) +{ + struct wg_device *wg = lookup_interface(info->attrs, skb); + u32 flags = 0; + int ret; + + if (IS_ERR(wg)) { + ret = PTR_ERR(wg); + goto out_nodev; + } + + rtnl_lock(); + mutex_lock(&wg->device_update_lock); + + if (info->attrs[WGDEVICE_A_FLAGS]) + flags = nla_get_u32(info->attrs[WGDEVICE_A_FLAGS]); + ret = -EOPNOTSUPP; + if (flags & ~__WGDEVICE_F_ALL) + goto out; + + ret = -EPERM; + if ((info->attrs[WGDEVICE_A_LISTEN_PORT] || + info->attrs[WGDEVICE_A_FWMARK]) && + !ns_capable(wg->creating_net->user_ns, CAP_NET_ADMIN)) + goto out; + + ++wg->device_update_gen; + + if (info->attrs[WGDEVICE_A_FWMARK]) { + struct wg_peer *peer; + + wg->fwmark = nla_get_u32(info->attrs[WGDEVICE_A_FWMARK]); + list_for_each_entry(peer, &wg->peer_list, peer_list) + wg_socket_clear_peer_endpoint_src(peer); + } + + if (info->attrs[WGDEVICE_A_LISTEN_PORT]) { + ret = set_port(wg, + nla_get_u16(info->attrs[WGDEVICE_A_LISTEN_PORT])); + if (ret) + goto out; + } + + if (flags & WGDEVICE_F_REPLACE_PEERS) + wg_peer_remove_all(wg); + + if (info->attrs[WGDEVICE_A_PRIVATE_KEY] && + nla_len(info->attrs[WGDEVICE_A_PRIVATE_KEY]) == + NOISE_PUBLIC_KEY_LEN) { + u8 *private_key = nla_data(info->attrs[WGDEVICE_A_PRIVATE_KEY]); + u8 public_key[NOISE_PUBLIC_KEY_LEN]; + struct wg_peer *peer, *temp; + + if (!crypto_memneq(wg->static_identity.static_private, + private_key, NOISE_PUBLIC_KEY_LEN)) + goto skip_set_private_key; + + /* We remove before setting, to prevent race, which means doing + * two 25519-genpub ops. + */ + if (curve25519_generate_public(public_key, private_key)) { + peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable, + public_key); + if (peer) { + wg_peer_put(peer); + wg_peer_remove(peer); + } + } + + down_write(&wg->static_identity.lock); + wg_noise_set_static_identity_private_key(&wg->static_identity, + private_key); + list_for_each_entry_safe(peer, temp, &wg->peer_list, + peer_list) { + if (wg_noise_precompute_static_static(peer)) + wg_noise_expire_current_peer_keypairs(peer); + else + wg_peer_remove(peer); + } + wg_cookie_checker_precompute_device_keys(&wg->cookie_checker); + up_write(&wg->static_identity.lock); + } +skip_set_private_key: + + if (info->attrs[WGDEVICE_A_PEERS]) { + struct nlattr *attr, *peer[WGPEER_A_MAX + 1]; + int rem; + + nla_for_each_nested(attr, info->attrs[WGDEVICE_A_PEERS], rem) { + ret = nla_parse_nested(peer, WGPEER_A_MAX, attr, + peer_policy, NULL); + if (ret < 0) + goto out; + ret = set_peer(wg, peer); + if (ret < 0) + goto out; + } + } + ret = 0; + +out: + mutex_unlock(&wg->device_update_lock); + rtnl_unlock(); + dev_put(wg->dev); +out_nodev: + if (info->attrs[WGDEVICE_A_PRIVATE_KEY]) + memzero_explicit(nla_data(info->attrs[WGDEVICE_A_PRIVATE_KEY]), + nla_len(info->attrs[WGDEVICE_A_PRIVATE_KEY])); + return ret; +} + +static const struct genl_ops genl_ops[] = { + { + .cmd = WG_CMD_GET_DEVICE, + .start = wg_get_device_start, + .dumpit = wg_get_device_dump, + .done = wg_get_device_done, + .flags = GENL_UNS_ADMIN_PERM + }, { + .cmd = WG_CMD_SET_DEVICE, + .doit = wg_set_device, + .flags = GENL_UNS_ADMIN_PERM + } +}; + +static struct genl_family genl_family __ro_after_init = { + .ops = genl_ops, + .n_ops = ARRAY_SIZE(genl_ops), + .name = WG_GENL_NAME, + .version = WG_GENL_VERSION, + .maxattr = WGDEVICE_A_MAX, + .module = THIS_MODULE, + .policy = device_policy, + .netnsok = true +}; + +int __init wg_genetlink_init(void) +{ + return genl_register_family(&genl_family); +} + +void __exit wg_genetlink_uninit(void) +{ + genl_unregister_family(&genl_family); +} diff --git a/drivers/net/wireguard/netlink.h b/drivers/net/wireguard/netlink.h new file mode 100644 index 000000000000..15100d92e2e3 --- /dev/null +++ b/drivers/net/wireguard/netlink.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#ifndef _WG_NETLINK_H +#define _WG_NETLINK_H + +int wg_genetlink_init(void); +void wg_genetlink_uninit(void); + +#endif /* _WG_NETLINK_H */ diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c new file mode 100644 index 000000000000..d71c8db68a8c --- /dev/null +++ b/drivers/net/wireguard/noise.c @@ -0,0 +1,828 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include "noise.h" +#include "device.h" +#include "peer.h" +#include "messages.h" +#include "queueing.h" +#include "peerlookup.h" + +#include <linux/rcupdate.h> +#include <linux/slab.h> +#include <linux/bitmap.h> +#include <linux/scatterlist.h> +#include <linux/highmem.h> +#include <crypto/algapi.h> + +/* This implements Noise_IKpsk2: + * + * <- s + * ****** + * -> e, es, s, ss, {t} + * <- e, ee, se, psk, {} + */ + +static const u8 handshake_name[37] = "Noise_IKpsk2_25519_ChaChaPoly_BLAKE2s"; +static const u8 identifier_name[34] = "WireGuard v1 zx2c4 Jason@zx2c4.com"; +static u8 handshake_init_hash[NOISE_HASH_LEN] __ro_after_init; +static u8 handshake_init_chaining_key[NOISE_HASH_LEN] __ro_after_init; +static atomic64_t keypair_counter = ATOMIC64_INIT(0); + +void __init wg_noise_init(void) +{ + struct blake2s_state blake; + + blake2s(handshake_init_chaining_key, handshake_name, NULL, + NOISE_HASH_LEN, sizeof(handshake_name), 0); + blake2s_init(&blake, NOISE_HASH_LEN); + blake2s_update(&blake, handshake_init_chaining_key, NOISE_HASH_LEN); + blake2s_update(&blake, identifier_name, sizeof(identifier_name)); + blake2s_final(&blake, handshake_init_hash); +} + +/* Must hold peer->handshake.static_identity->lock */ +bool wg_noise_precompute_static_static(struct wg_peer *peer) +{ + bool ret = true; + + down_write(&peer->handshake.lock); + if (peer->handshake.static_identity->has_identity) + ret = curve25519( + peer->handshake.precomputed_static_static, + peer->handshake.static_identity->static_private, + peer->handshake.remote_static); + else + memset(peer->handshake.precomputed_static_static, 0, + NOISE_PUBLIC_KEY_LEN); + up_write(&peer->handshake.lock); + return ret; +} + +bool wg_noise_handshake_init(struct noise_handshake *handshake, + struct noise_static_identity *static_identity, + const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], + const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], + struct wg_peer *peer) +{ + memset(handshake, 0, sizeof(*handshake)); + init_rwsem(&handshake->lock); + handshake->entry.type = INDEX_HASHTABLE_HANDSHAKE; + handshake->entry.peer = peer; + memcpy(handshake->remote_static, peer_public_key, NOISE_PUBLIC_KEY_LEN); + if (peer_preshared_key) + memcpy(handshake->preshared_key, peer_preshared_key, + NOISE_SYMMETRIC_KEY_LEN); + handshake->static_identity = static_identity; + handshake->state = HANDSHAKE_ZEROED; + return wg_noise_precompute_static_static(peer); +} + +static void handshake_zero(struct noise_handshake *handshake) +{ + memset(&handshake->ephemeral_private, 0, NOISE_PUBLIC_KEY_LEN); + memset(&handshake->remote_ephemeral, 0, NOISE_PUBLIC_KEY_LEN); + memset(&handshake->hash, 0, NOISE_HASH_LEN); + memset(&handshake->chaining_key, 0, NOISE_HASH_LEN); + handshake->remote_index = 0; + handshake->state = HANDSHAKE_ZEROED; +} + +void wg_noise_handshake_clear(struct noise_handshake *handshake) +{ + wg_index_hashtable_remove( + handshake->entry.peer->device->index_hashtable, + &handshake->entry); + down_write(&handshake->lock); + handshake_zero(handshake); + up_write(&handshake->lock); + wg_index_hashtable_remove( + handshake->entry.peer->device->index_hashtable, + &handshake->entry); +} + +static struct noise_keypair *keypair_create(struct wg_peer *peer) +{ + struct noise_keypair *keypair = kzalloc(sizeof(*keypair), GFP_KERNEL); + + if (unlikely(!keypair)) + return NULL; + keypair->internal_id = atomic64_inc_return(&keypair_counter); + keypair->entry.type = INDEX_HASHTABLE_KEYPAIR; + keypair->entry.peer = peer; + kref_init(&keypair->refcount); + return keypair; +} + +static void keypair_free_rcu(struct rcu_head *rcu) +{ + kzfree(container_of(rcu, struct noise_keypair, rcu)); +} + +static void keypair_free_kref(struct kref *kref) +{ + struct noise_keypair *keypair = + container_of(kref, struct noise_keypair, refcount); + + net_dbg_ratelimited("%s: Keypair %llu destroyed for peer %llu\n", + keypair->entry.peer->device->dev->name, + keypair->internal_id, + keypair->entry.peer->internal_id); + wg_index_hashtable_remove(keypair->entry.peer->device->index_hashtable, + &keypair->entry); + call_rcu(&keypair->rcu, keypair_free_rcu); +} + +void wg_noise_keypair_put(struct noise_keypair *keypair, bool unreference_now) +{ + if (unlikely(!keypair)) + return; + if (unlikely(unreference_now)) + wg_index_hashtable_remove( + keypair->entry.peer->device->index_hashtable, + &keypair->entry); + kref_put(&keypair->refcount, keypair_free_kref); +} + +struct noise_keypair *wg_noise_keypair_get(struct noise_keypair *keypair) +{ + RCU_LOCKDEP_WARN(!rcu_read_lock_bh_held(), + "Taking noise keypair reference without holding the RCU BH read lock"); + if (unlikely(!keypair || !kref_get_unless_zero(&keypair->refcount))) + return NULL; + return keypair; +} + +void wg_noise_keypairs_clear(struct noise_keypairs *keypairs) +{ + struct noise_keypair *old; + + spin_lock_bh(&keypairs->keypair_update_lock); + + /* We zero the next_keypair before zeroing the others, so that + * wg_noise_received_with_keypair returns early before subsequent ones + * are zeroed. + */ + old = rcu_dereference_protected(keypairs->next_keypair, + lockdep_is_held(&keypairs->keypair_update_lock)); + RCU_INIT_POINTER(keypairs->next_keypair, NULL); + wg_noise_keypair_put(old, true); + + old = rcu_dereference_protected(keypairs->previous_keypair, + lockdep_is_held(&keypairs->keypair_update_lock)); + RCU_INIT_POINTER(keypairs->previous_keypair, NULL); + wg_noise_keypair_put(old, true); + + old = rcu_dereference_protected(keypairs->current_keypair, + lockdep_is_held(&keypairs->keypair_update_lock)); + RCU_INIT_POINTER(keypairs->current_keypair, NULL); + wg_noise_keypair_put(old, true); + + spin_unlock_bh(&keypairs->keypair_update_lock); +} + +void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer) +{ + struct noise_keypair *keypair; + + wg_noise_handshake_clear(&peer->handshake); + wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake); + + spin_lock_bh(&peer->keypairs.keypair_update_lock); + keypair = rcu_dereference_protected(peer->keypairs.next_keypair, + lockdep_is_held(&peer->keypairs.keypair_update_lock)); + if (keypair) + keypair->sending.is_valid = false; + keypair = rcu_dereference_protected(peer->keypairs.current_keypair, + lockdep_is_held(&peer->keypairs.keypair_update_lock)); + if (keypair) + keypair->sending.is_valid = false; + spin_unlock_bh(&peer->keypairs.keypair_update_lock); +} + +static void add_new_keypair(struct noise_keypairs *keypairs, + struct noise_keypair *new_keypair) +{ + struct noise_keypair *previous_keypair, *next_keypair, *current_keypair; + + spin_lock_bh(&keypairs->keypair_update_lock); + previous_keypair = rcu_dereference_protected(keypairs->previous_keypair, + lockdep_is_held(&keypairs->keypair_update_lock)); + next_keypair = rcu_dereference_protected(keypairs->next_keypair, + lockdep_is_held(&keypairs->keypair_update_lock)); + current_keypair = rcu_dereference_protected(keypairs->current_keypair, + lockdep_is_held(&keypairs->keypair_update_lock)); + if (new_keypair->i_am_the_initiator) { + /* If we're the initiator, it means we've sent a handshake, and + * received a confirmation response, which means this new + * keypair can now be used. + */ + if (next_keypair) { + /* If there already was a next keypair pending, we + * demote it to be the previous keypair, and free the + * existing current. Note that this means KCI can result + * in this transition. It would perhaps be more sound to + * always just get rid of the unused next keypair + * instead of putting it in the previous slot, but this + * might be a bit less robust. Something to think about + * for the future. + */ + RCU_INIT_POINTER(keypairs->next_keypair, NULL); + rcu_assign_pointer(keypairs->previous_keypair, + next_keypair); + wg_noise_keypair_put(current_keypair, true); + } else /* If there wasn't an existing next keypair, we replace + * the previous with the current one. + */ + rcu_assign_pointer(keypairs->previous_keypair, + current_keypair); + /* At this point we can get rid of the old previous keypair, and + * set up the new keypair. + */ + wg_noise_keypair_put(previous_keypair, true); + rcu_assign_pointer(keypairs->current_keypair, new_keypair); + } else { + /* If we're the responder, it means we can't use the new keypair + * until we receive confirmation via the first data packet, so + * we get rid of the existing previous one, the possibly + * existing next one, and slide in the new next one. + */ + rcu_assign_pointer(keypairs->next_keypair, new_keypair); + wg_noise_keypair_put(next_keypair, true); + RCU_INIT_POINTER(keypairs->previous_keypair, NULL); + wg_noise_keypair_put(previous_keypair, true); + } + spin_unlock_bh(&keypairs->keypair_update_lock); +} + +bool wg_noise_received_with_keypair(struct noise_keypairs *keypairs, + struct noise_keypair *received_keypair) +{ + struct noise_keypair *old_keypair; + bool key_is_new; + + /* We first check without taking the spinlock. */ + key_is_new = received_keypair == + rcu_access_pointer(keypairs->next_keypair); + if (likely(!key_is_new)) + return false; + + spin_lock_bh(&keypairs->keypair_update_lock); + /* After locking, we double check that things didn't change from + * beneath us. + */ + if (unlikely(received_keypair != + rcu_dereference_protected(keypairs->next_keypair, + lockdep_is_held(&keypairs->keypair_update_lock)))) { + spin_unlock_bh(&keypairs->keypair_update_lock); + return false; + } + + /* When we've finally received the confirmation, we slide the next + * into the current, the current into the previous, and get rid of + * the old previous. + */ + old_keypair = rcu_dereference_protected(keypairs->previous_keypair, + lockdep_is_held(&keypairs->keypair_update_lock)); + rcu_assign_pointer(keypairs->previous_keypair, + rcu_dereference_protected(keypairs->current_keypair, + lockdep_is_held(&keypairs->keypair_update_lock))); + wg_noise_keypair_put(old_keypair, true); + rcu_assign_pointer(keypairs->current_keypair, received_keypair); + RCU_INIT_POINTER(keypairs->next_keypair, NULL); + + spin_unlock_bh(&keypairs->keypair_update_lock); + return true; +} + +/* Must hold static_identity->lock */ +void wg_noise_set_static_identity_private_key( + struct noise_static_identity *static_identity, + const u8 private_key[NOISE_PUBLIC_KEY_LEN]) +{ + memcpy(static_identity->static_private, private_key, + NOISE_PUBLIC_KEY_LEN); + curve25519_clamp_secret(static_identity->static_private); + static_identity->has_identity = curve25519_generate_public( + static_identity->static_public, private_key); +} + +/* This is Hugo Krawczyk's HKDF: + * - https://eprint.iacr.org/2010/264.pdf + * - https://tools.ietf.org/html/rfc5869 + */ +static void kdf(u8 *first_dst, u8 *second_dst, u8 *third_dst, const u8 *data, + size_t first_len, size_t second_len, size_t third_len, + size_t data_len, const u8 chaining_key[NOISE_HASH_LEN]) +{ + u8 output[BLAKE2S_HASH_SIZE + 1]; + u8 secret[BLAKE2S_HASH_SIZE]; + + WARN_ON(IS_ENABLED(DEBUG) && + (first_len > BLAKE2S_HASH_SIZE || + second_len > BLAKE2S_HASH_SIZE || + third_len > BLAKE2S_HASH_SIZE || + ((second_len || second_dst || third_len || third_dst) && + (!first_len || !first_dst)) || + ((third_len || third_dst) && (!second_len || !second_dst)))); + + /* Extract entropy from data into secret */ + blake2s256_hmac(secret, data, chaining_key, data_len, NOISE_HASH_LEN); + + if (!first_dst || !first_len) + goto out; + + /* Expand first key: key = secret, data = 0x1 */ + output[0] = 1; + blake2s256_hmac(output, output, secret, 1, BLAKE2S_HASH_SIZE); + memcpy(first_dst, output, first_len); + + if (!second_dst || !second_len) + goto out; + + /* Expand second key: key = secret, data = first-key || 0x2 */ + output[BLAKE2S_HASH_SIZE] = 2; + blake2s256_hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1, + BLAKE2S_HASH_SIZE); + memcpy(second_dst, output, second_len); + + if (!third_dst || !third_len) + goto out; + + /* Expand third key: key = secret, data = second-key || 0x3 */ + output[BLAKE2S_HASH_SIZE] = 3; + blake2s256_hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1, + BLAKE2S_HASH_SIZE); + memcpy(third_dst, output, third_len); + +out: + /* Clear sensitive data from stack */ + memzero_explicit(secret, BLAKE2S_HASH_SIZE); + memzero_explicit(output, BLAKE2S_HASH_SIZE + 1); +} + +static void symmetric_key_init(struct noise_symmetric_key *key) +{ + spin_lock_init(&key->counter.receive.lock); + atomic64_set(&key->counter.counter, 0); + memset(key->counter.receive.backtrack, 0, + sizeof(key->counter.receive.backtrack)); + key->birthdate = ktime_get_coarse_boottime_ns(); + key->is_valid = true; +} + +static void derive_keys(struct noise_symmetric_key *first_dst, + struct noise_symmetric_key *second_dst, + const u8 chaining_key[NOISE_HASH_LEN]) +{ + kdf(first_dst->key, second_dst->key, NULL, NULL, + NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, 0, + chaining_key); + symmetric_key_init(first_dst); + symmetric_key_init(second_dst); +} + +static bool __must_check mix_dh(u8 chaining_key[NOISE_HASH_LEN], + u8 key[NOISE_SYMMETRIC_KEY_LEN], + const u8 private[NOISE_PUBLIC_KEY_LEN], + const u8 public[NOISE_PUBLIC_KEY_LEN]) +{ + u8 dh_calculation[NOISE_PUBLIC_KEY_LEN]; + + if (unlikely(!curve25519(dh_calculation, private, public))) + return false; + kdf(chaining_key, key, NULL, dh_calculation, NOISE_HASH_LEN, + NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, chaining_key); + memzero_explicit(dh_calculation, NOISE_PUBLIC_KEY_LEN); + return true; +} + +static void mix_hash(u8 hash[NOISE_HASH_LEN], const u8 *src, size_t src_len) +{ + struct blake2s_state blake; + + blake2s_init(&blake, NOISE_HASH_LEN); + blake2s_update(&blake, hash, NOISE_HASH_LEN); + blake2s_update(&blake, src, src_len); + blake2s_final(&blake, hash); +} + +static void mix_psk(u8 chaining_key[NOISE_HASH_LEN], u8 hash[NOISE_HASH_LEN], + u8 key[NOISE_SYMMETRIC_KEY_LEN], + const u8 psk[NOISE_SYMMETRIC_KEY_LEN]) +{ + u8 temp_hash[NOISE_HASH_LEN]; + + kdf(chaining_key, temp_hash, key, psk, NOISE_HASH_LEN, NOISE_HASH_LEN, + NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, chaining_key); + mix_hash(hash, temp_hash, NOISE_HASH_LEN); + memzero_explicit(temp_hash, NOISE_HASH_LEN); +} + +static void handshake_init(u8 chaining_key[NOISE_HASH_LEN], + u8 hash[NOISE_HASH_LEN], + const u8 remote_static[NOISE_PUBLIC_KEY_LEN]) +{ + memcpy(hash, handshake_init_hash, NOISE_HASH_LEN); + memcpy(chaining_key, handshake_init_chaining_key, NOISE_HASH_LEN); + mix_hash(hash, remote_static, NOISE_PUBLIC_KEY_LEN); +} + +static void message_encrypt(u8 *dst_ciphertext, const u8 *src_plaintext, + size_t src_len, u8 key[NOISE_SYMMETRIC_KEY_LEN], + u8 hash[NOISE_HASH_LEN]) +{ + chacha20poly1305_encrypt(dst_ciphertext, src_plaintext, src_len, hash, + NOISE_HASH_LEN, + 0 /* Always zero for Noise_IK */, key); + mix_hash(hash, dst_ciphertext, noise_encrypted_len(src_len)); +} + +static bool message_decrypt(u8 *dst_plaintext, const u8 *src_ciphertext, + size_t src_len, u8 key[NOISE_SYMMETRIC_KEY_LEN], + u8 hash[NOISE_HASH_LEN]) +{ + if (!chacha20poly1305_decrypt(dst_plaintext, src_ciphertext, src_len, + hash, NOISE_HASH_LEN, + 0 /* Always zero for Noise_IK */, key)) + return false; + mix_hash(hash, src_ciphertext, src_len); + return true; +} + +static void message_ephemeral(u8 ephemeral_dst[NOISE_PUBLIC_KEY_LEN], + const u8 ephemeral_src[NOISE_PUBLIC_KEY_LEN], + u8 chaining_key[NOISE_HASH_LEN], + u8 hash[NOISE_HASH_LEN]) +{ + if (ephemeral_dst != ephemeral_src) + memcpy(ephemeral_dst, ephemeral_src, NOISE_PUBLIC_KEY_LEN); + mix_hash(hash, ephemeral_src, NOISE_PUBLIC_KEY_LEN); + kdf(chaining_key, NULL, NULL, ephemeral_src, NOISE_HASH_LEN, 0, 0, + NOISE_PUBLIC_KEY_LEN, chaining_key); +} + +static void tai64n_now(u8 output[NOISE_TIMESTAMP_LEN]) +{ + struct timespec64 now; + + ktime_get_real_ts64(&now); + + /* In order to prevent some sort of infoleak from precise timers, we + * round down the nanoseconds part to the closest rounded-down power of + * two to the maximum initiations per second allowed anyway by the + * implementation. + */ + now.tv_nsec = ALIGN_DOWN(now.tv_nsec, + rounddown_pow_of_two(NSEC_PER_SEC / INITIATIONS_PER_SECOND)); + + /* https://cr.yp.to/libtai/tai64.html */ + *(__be64 *)output = cpu_to_be64(0x400000000000000aULL + now.tv_sec); + *(__be32 *)(output + sizeof(__be64)) = cpu_to_be32(now.tv_nsec); +} + +bool +wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst, + struct noise_handshake *handshake) +{ + u8 timestamp[NOISE_TIMESTAMP_LEN]; + u8 key[NOISE_SYMMETRIC_KEY_LEN]; + bool ret = false; + + /* We need to wait for crng _before_ taking any locks, since + * curve25519_generate_secret uses get_random_bytes_wait. + */ + wait_for_random_bytes(); + + down_read(&handshake->static_identity->lock); + down_write(&handshake->lock); + + if (unlikely(!handshake->static_identity->has_identity)) + goto out; + + dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION); + + handshake_init(handshake->chaining_key, handshake->hash, + handshake->remote_static); + + /* e */ + curve25519_generate_secret(handshake->ephemeral_private); + if (!curve25519_generate_public(dst->unencrypted_ephemeral, + handshake->ephemeral_private)) + goto out; + message_ephemeral(dst->unencrypted_ephemeral, + dst->unencrypted_ephemeral, handshake->chaining_key, + handshake->hash); + + /* es */ + if (!mix_dh(handshake->chaining_key, key, handshake->ephemeral_private, + handshake->remote_static)) + goto out; + + /* s */ + message_encrypt(dst->encrypted_static, + handshake->static_identity->static_public, + NOISE_PUBLIC_KEY_LEN, key, handshake->hash); + + /* ss */ + kdf(handshake->chaining_key, key, NULL, + handshake->precomputed_static_static, NOISE_HASH_LEN, + NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, + handshake->chaining_key); + + /* {t} */ + tai64n_now(timestamp); + message_encrypt(dst->encrypted_timestamp, timestamp, + NOISE_TIMESTAMP_LEN, key, handshake->hash); + + dst->sender_index = wg_index_hashtable_insert( + handshake->entry.peer->device->index_hashtable, + &handshake->entry); + + handshake->state = HANDSHAKE_CREATED_INITIATION; + ret = true; + +out: + up_write(&handshake->lock); + up_read(&handshake->static_identity->lock); + memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN); + return ret; +} + +struct wg_peer * +wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src, + struct wg_device *wg) +{ + struct wg_peer *peer = NULL, *ret_peer = NULL; + struct noise_handshake *handshake; + bool replay_attack, flood_attack; + u8 key[NOISE_SYMMETRIC_KEY_LEN]; + u8 chaining_key[NOISE_HASH_LEN]; + u8 hash[NOISE_HASH_LEN]; + u8 s[NOISE_PUBLIC_KEY_LEN]; + u8 e[NOISE_PUBLIC_KEY_LEN]; + u8 t[NOISE_TIMESTAMP_LEN]; + u64 initiation_consumption; + + down_read(&wg->static_identity.lock); + if (unlikely(!wg->static_identity.has_identity)) + goto out; + + handshake_init(chaining_key, hash, wg->static_identity.static_public); + + /* e */ + message_ephemeral(e, src->unencrypted_ephemeral, chaining_key, hash); + + /* es */ + if (!mix_dh(chaining_key, key, wg->static_identity.static_private, e)) + goto out; + + /* s */ + if (!message_decrypt(s, src->encrypted_static, + sizeof(src->encrypted_static), key, hash)) + goto out; + + /* Lookup which peer we're actually talking to */ + peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable, s); + if (!peer) + goto out; + handshake = &peer->handshake; + + /* ss */ + kdf(chaining_key, key, NULL, handshake->precomputed_static_static, + NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, + chaining_key); + + /* {t} */ + if (!message_decrypt(t, src->encrypted_timestamp, + sizeof(src->encrypted_timestamp), key, hash)) + goto out; + + down_read(&handshake->lock); + replay_attack = memcmp(t, handshake->latest_timestamp, + NOISE_TIMESTAMP_LEN) <= 0; + flood_attack = (s64)handshake->last_initiation_consumption + + NSEC_PER_SEC / INITIATIONS_PER_SECOND > + (s64)ktime_get_coarse_boottime_ns(); + up_read(&handshake->lock); + if (replay_attack || flood_attack) + goto out; + + /* Success! Copy everything to peer */ + down_write(&handshake->lock); + memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN); + if (memcmp(t, handshake->latest_timestamp, NOISE_TIMESTAMP_LEN) > 0) + memcpy(handshake->latest_timestamp, t, NOISE_TIMESTAMP_LEN); + memcpy(handshake->hash, hash, NOISE_HASH_LEN); + memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN); + handshake->remote_index = src->sender_index; + if ((s64)(handshake->last_initiation_consumption - + (initiation_consumption = ktime_get_coarse_boottime_ns())) < 0) + handshake->last_initiation_consumption = initiation_consumption; + handshake->state = HANDSHAKE_CONSUMED_INITIATION; + up_write(&handshake->lock); + ret_peer = peer; + +out: + memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN); + memzero_explicit(hash, NOISE_HASH_LEN); + memzero_explicit(chaining_key, NOISE_HASH_LEN); + up_read(&wg->static_identity.lock); + if (!ret_peer) + wg_peer_put(peer); + return ret_peer; +} + +bool wg_noise_handshake_create_response(struct message_handshake_response *dst, + struct noise_handshake *handshake) +{ + u8 key[NOISE_SYMMETRIC_KEY_LEN]; + bool ret = false; + + /* We need to wait for crng _before_ taking any locks, since + * curve25519_generate_secret uses get_random_bytes_wait. + */ + wait_for_random_bytes(); + + down_read(&handshake->static_identity->lock); + down_write(&handshake->lock); + + if (handshake->state != HANDSHAKE_CONSUMED_INITIATION) + goto out; + + dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE); + dst->receiver_index = handshake->remote_index; + + /* e */ + curve25519_generate_secret(handshake->ephemeral_private); + if (!curve25519_generate_public(dst->unencrypted_ephemeral, + handshake->ephemeral_private)) + goto out; + message_ephemeral(dst->unencrypted_ephemeral, + dst->unencrypted_ephemeral, handshake->chaining_key, + handshake->hash); + + /* ee */ + if (!mix_dh(handshake->chaining_key, NULL, handshake->ephemeral_private, + handshake->remote_ephemeral)) + goto out; + + /* se */ + if (!mix_dh(handshake->chaining_key, NULL, handshake->ephemeral_private, + handshake->remote_static)) + goto out; + + /* psk */ + mix_psk(handshake->chaining_key, handshake->hash, key, + handshake->preshared_key); + + /* {} */ + message_encrypt(dst->encrypted_nothing, NULL, 0, key, handshake->hash); + + dst->sender_index = wg_index_hashtable_insert( + handshake->entry.peer->device->index_hashtable, + &handshake->entry); + + handshake->state = HANDSHAKE_CREATED_RESPONSE; + ret = true; + +out: + up_write(&handshake->lock); + up_read(&handshake->static_identity->lock); + memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN); + return ret; +} + +struct wg_peer * +wg_noise_handshake_consume_response(struct message_handshake_response *src, + struct wg_device *wg) +{ + enum noise_handshake_state state = HANDSHAKE_ZEROED; + struct wg_peer *peer = NULL, *ret_peer = NULL; + struct noise_handshake *handshake; + u8 key[NOISE_SYMMETRIC_KEY_LEN]; + u8 hash[NOISE_HASH_LEN]; + u8 chaining_key[NOISE_HASH_LEN]; + u8 e[NOISE_PUBLIC_KEY_LEN]; + u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN]; + u8 static_private[NOISE_PUBLIC_KEY_LEN]; + + down_read(&wg->static_identity.lock); + + if (unlikely(!wg->static_identity.has_identity)) + goto out; + + handshake = (struct noise_handshake *)wg_index_hashtable_lookup( + wg->index_hashtable, INDEX_HASHTABLE_HANDSHAKE, + src->receiver_index, &peer); + if (unlikely(!handshake)) + goto out; + + down_read(&handshake->lock); + state = handshake->state; + memcpy(hash, handshake->hash, NOISE_HASH_LEN); + memcpy(chaining_key, handshake->chaining_key, NOISE_HASH_LEN); + memcpy(ephemeral_private, handshake->ephemeral_private, + NOISE_PUBLIC_KEY_LEN); + up_read(&handshake->lock); + + if (state != HANDSHAKE_CREATED_INITIATION) + goto fail; + + /* e */ + message_ephemeral(e, src->unencrypted_ephemeral, chaining_key, hash); + + /* ee */ + if (!mix_dh(chaining_key, NULL, ephemeral_private, e)) + goto fail; + + /* se */ + if (!mix_dh(chaining_key, NULL, wg->static_identity.static_private, e)) + goto fail; + + /* psk */ + mix_psk(chaining_key, hash, key, handshake->preshared_key); + + /* {} */ + if (!message_decrypt(NULL, src->encrypted_nothing, + sizeof(src->encrypted_nothing), key, hash)) + goto fail; + + /* Success! Copy everything to peer */ + down_write(&handshake->lock); + /* It's important to check that the state is still the same, while we + * have an exclusive lock. + */ + if (handshake->state != state) { + up_write(&handshake->lock); + goto fail; + } + memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN); + memcpy(handshake->hash, hash, NOISE_HASH_LEN); + memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN); + handshake->remote_index = src->sender_index; + handshake->state = HANDSHAKE_CONSUMED_RESPONSE; + up_write(&handshake->lock); + ret_peer = peer; + goto out; + +fail: + wg_peer_put(peer); +out: + memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN); + memzero_explicit(hash, NOISE_HASH_LEN); + memzero_explicit(chaining_key, NOISE_HASH_LEN); + memzero_explicit(ephemeral_private, NOISE_PUBLIC_KEY_LEN); + memzero_explicit(static_private, NOISE_PUBLIC_KEY_LEN); + up_read(&wg->static_identity.lock); + return ret_peer; +} + +bool wg_noise_handshake_begin_session(struct noise_handshake *handshake, + struct noise_keypairs *keypairs) +{ + struct noise_keypair *new_keypair; + bool ret = false; + + down_write(&handshake->lock); + if (handshake->state != HANDSHAKE_CREATED_RESPONSE && + handshake->state != HANDSHAKE_CONSUMED_RESPONSE) + goto out; + + new_keypair = keypair_create(handshake->entry.peer); + if (!new_keypair) + goto out; + new_keypair->i_am_the_initiator = handshake->state == + HANDSHAKE_CONSUMED_RESPONSE; + new_keypair->remote_index = handshake->remote_index; + + if (new_keypair->i_am_the_initiator) + derive_keys(&new_keypair->sending, &new_keypair->receiving, + handshake->chaining_key); + else + derive_keys(&new_keypair->receiving, &new_keypair->sending, + handshake->chaining_key); + + handshake_zero(handshake); + rcu_read_lock_bh(); + if (likely(!READ_ONCE(container_of(handshake, struct wg_peer, + handshake)->is_dead))) { + add_new_keypair(keypairs, new_keypair); + net_dbg_ratelimited("%s: Keypair %llu created for peer %llu\n", + handshake->entry.peer->device->dev->name, + new_keypair->internal_id, + handshake->entry.peer->internal_id); + ret = wg_index_hashtable_replace( + handshake->entry.peer->device->index_hashtable, + &handshake->entry, &new_keypair->entry); + } else { + kzfree(new_keypair); + } + rcu_read_unlock_bh(); + +out: + up_write(&handshake->lock); + return ret; +} diff --git a/drivers/net/wireguard/noise.h b/drivers/net/wireguard/noise.h new file mode 100644 index 000000000000..138a07bb817c --- /dev/null +++ b/drivers/net/wireguard/noise.h @@ -0,0 +1,137 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ +#ifndef _WG_NOISE_H +#define _WG_NOISE_H + +#include "messages.h" +#include "peerlookup.h" + +#include <linux/types.h> +#include <linux/spinlock.h> +#include <linux/atomic.h> +#include <linux/rwsem.h> +#include <linux/mutex.h> +#include <linux/kref.h> + +union noise_counter { + struct { + u64 counter; + unsigned long backtrack[COUNTER_BITS_TOTAL / BITS_PER_LONG]; + spinlock_t lock; + } receive; + atomic64_t counter; +}; + +struct noise_symmetric_key { + u8 key[NOISE_SYMMETRIC_KEY_LEN]; + union noise_counter counter; + u64 birthdate; + bool is_valid; +}; + +struct noise_keypair { + struct index_hashtable_entry entry; + struct noise_symmetric_key sending; + struct noise_symmetric_key receiving; + __le32 remote_index; + bool i_am_the_initiator; + struct kref refcount; + struct rcu_head rcu; + u64 internal_id; +}; + +struct noise_keypairs { + struct noise_keypair __rcu *current_keypair; + struct noise_keypair __rcu *previous_keypair; + struct noise_keypair __rcu *next_keypair; + spinlock_t keypair_update_lock; +}; + +struct noise_static_identity { + u8 static_public[NOISE_PUBLIC_KEY_LEN]; + u8 static_private[NOISE_PUBLIC_KEY_LEN]; + struct rw_semaphore lock; + bool has_identity; +}; + +enum noise_handshake_state { + HANDSHAKE_ZEROED, + HANDSHAKE_CREATED_INITIATION, + HANDSHAKE_CONSUMED_INITIATION, + HANDSHAKE_CREATED_RESPONSE, + HANDSHAKE_CONSUMED_RESPONSE +}; + +struct noise_handshake { + struct index_hashtable_entry entry; + + enum noise_handshake_state state; + u64 last_initiation_consumption; + + struct noise_static_identity *static_identity; + + u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN]; + u8 remote_static[NOISE_PUBLIC_KEY_LEN]; + u8 remote_ephemeral[NOISE_PUBLIC_KEY_LEN]; + u8 precomputed_static_static[NOISE_PUBLIC_KEY_LEN]; + + u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]; + + u8 hash[NOISE_HASH_LEN]; + u8 chaining_key[NOISE_HASH_LEN]; + + u8 latest_timestamp[NOISE_TIMESTAMP_LEN]; + __le32 remote_index; + + /* Protects all members except the immutable (after noise_handshake_ + * init): remote_static, precomputed_static_static, static_identity. + */ + struct rw_semaphore lock; +}; + +struct wg_device; + +void wg_noise_init(void); +bool wg_noise_handshake_init(struct noise_handshake *handshake, + struct noise_static_identity *static_identity, + const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], + const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], + struct wg_peer *peer); +void wg_noise_handshake_clear(struct noise_handshake *handshake); +static inline void wg_noise_reset_last_sent_handshake(atomic64_t *handshake_ns) +{ + atomic64_set(handshake_ns, ktime_get_coarse_boottime_ns() - + (u64)(REKEY_TIMEOUT + 1) * NSEC_PER_SEC); +} + +void wg_noise_keypair_put(struct noise_keypair *keypair, bool unreference_now); +struct noise_keypair *wg_noise_keypair_get(struct noise_keypair *keypair); +void wg_noise_keypairs_clear(struct noise_keypairs *keypairs); +bool wg_noise_received_with_keypair(struct noise_keypairs *keypairs, + struct noise_keypair *received_keypair); +void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer); + +void wg_noise_set_static_identity_private_key( + struct noise_static_identity *static_identity, + const u8 private_key[NOISE_PUBLIC_KEY_LEN]); +bool wg_noise_precompute_static_static(struct wg_peer *peer); + +bool +wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst, + struct noise_handshake *handshake); +struct wg_peer * +wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src, + struct wg_device *wg); + +bool wg_noise_handshake_create_response(struct message_handshake_response *dst, + struct noise_handshake *handshake); +struct wg_peer * +wg_noise_handshake_consume_response(struct message_handshake_response *src, + struct wg_device *wg); + +bool wg_noise_handshake_begin_session(struct noise_handshake *handshake, + struct noise_keypairs *keypairs); + +#endif /* _WG_NOISE_H */ diff --git a/drivers/net/wireguard/peer.c b/drivers/net/wireguard/peer.c new file mode 100644 index 000000000000..071eedf33f5a --- /dev/null +++ b/drivers/net/wireguard/peer.c @@ -0,0 +1,240 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include "peer.h" +#include "device.h" +#include "queueing.h" +#include "timers.h" +#include "peerlookup.h" +#include "noise.h" + +#include <linux/kref.h> +#include <linux/lockdep.h> +#include <linux/rcupdate.h> +#include <linux/list.h> + +static atomic64_t peer_counter = ATOMIC64_INIT(0); + +struct wg_peer *wg_peer_create(struct wg_device *wg, + const u8 public_key[NOISE_PUBLIC_KEY_LEN], + const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]) +{ + struct wg_peer *peer; + int ret = -ENOMEM; + + lockdep_assert_held(&wg->device_update_lock); + + if (wg->num_peers >= MAX_PEERS_PER_DEVICE) + return ERR_PTR(ret); + + peer = kzalloc(sizeof(*peer), GFP_KERNEL); + if (unlikely(!peer)) + return ERR_PTR(ret); + peer->device = wg; + + if (!wg_noise_handshake_init(&peer->handshake, &wg->static_identity, + public_key, preshared_key, peer)) { + ret = -EKEYREJECTED; + goto err_1; + } + if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL)) + goto err_1; + if (wg_packet_queue_init(&peer->tx_queue, wg_packet_tx_worker, false, + MAX_QUEUED_PACKETS)) + goto err_2; + if (wg_packet_queue_init(&peer->rx_queue, NULL, false, + MAX_QUEUED_PACKETS)) + goto err_3; + + peer->internal_id = atomic64_inc_return(&peer_counter); + peer->serial_work_cpu = nr_cpumask_bits; + wg_cookie_init(&peer->latest_cookie); + wg_timers_init(peer); + wg_cookie_checker_precompute_peer_keys(peer); + spin_lock_init(&peer->keypairs.keypair_update_lock); + INIT_WORK(&peer->transmit_handshake_work, + wg_packet_handshake_send_worker); + rwlock_init(&peer->endpoint_lock); + kref_init(&peer->refcount); + skb_queue_head_init(&peer->staged_packet_queue); + wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake); + set_bit(NAPI_STATE_NO_BUSY_POLL, &peer->napi.state); + netif_napi_add(wg->dev, &peer->napi, wg_packet_rx_poll, + NAPI_POLL_WEIGHT); + napi_enable(&peer->napi); + list_add_tail(&peer->peer_list, &wg->peer_list); + INIT_LIST_HEAD(&peer->allowedips_list); + wg_pubkey_hashtable_add(wg->peer_hashtable, peer); + ++wg->num_peers; + pr_debug("%s: Peer %llu created\n", wg->dev->name, peer->internal_id); + return peer; + +err_3: + wg_packet_queue_free(&peer->tx_queue, false); +err_2: + dst_cache_destroy(&peer->endpoint_cache); +err_1: + kfree(peer); + return ERR_PTR(ret); +} + +struct wg_peer *wg_peer_get_maybe_zero(struct wg_peer *peer) +{ + RCU_LOCKDEP_WARN(!rcu_read_lock_bh_held(), + "Taking peer reference without holding the RCU read lock"); + if (unlikely(!peer || !kref_get_unless_zero(&peer->refcount))) + return NULL; + return peer; +} + +static void peer_make_dead(struct wg_peer *peer) +{ + /* Remove from configuration-time lookup structures. */ + list_del_init(&peer->peer_list); + wg_allowedips_remove_by_peer(&peer->device->peer_allowedips, peer, + &peer->device->device_update_lock); + wg_pubkey_hashtable_remove(peer->device->peer_hashtable, peer); + + /* Mark as dead, so that we don't allow jumping contexts after. */ + WRITE_ONCE(peer->is_dead, true); + + /* The caller must now synchronize_rcu() for this to take effect. */ +} + +static void peer_remove_after_dead(struct wg_peer *peer) +{ + WARN_ON(!peer->is_dead); + + /* No more keypairs can be created for this peer, since is_dead protects + * add_new_keypair, so we can now destroy existing ones. + */ + wg_noise_keypairs_clear(&peer->keypairs); + + /* Destroy all ongoing timers that were in-flight at the beginning of + * this function. + */ + wg_timers_stop(peer); + + /* The transition between packet encryption/decryption queues isn't + * guarded by is_dead, but each reference's life is strictly bounded by + * two generations: once for parallel crypto and once for serial + * ingestion, so we can simply flush twice, and be sure that we no + * longer have references inside these queues. + */ + + /* a) For encrypt/decrypt. */ + flush_workqueue(peer->device->packet_crypt_wq); + /* b.1) For send (but not receive, since that's napi). */ + flush_workqueue(peer->device->packet_crypt_wq); + /* b.2.1) For receive (but not send, since that's wq). */ + napi_disable(&peer->napi); + /* b.2.1) It's now safe to remove the napi struct, which must be done + * here from process context. + */ + netif_napi_del(&peer->napi); + + /* Ensure any workstructs we own (like transmit_handshake_work or + * clear_peer_work) no longer are in use. + */ + flush_workqueue(peer->device->handshake_send_wq); + + /* After the above flushes, a peer might still be active in a few + * different contexts: 1) from xmit(), before hitting is_dead and + * returning, 2) from wg_packet_consume_data(), before hitting is_dead + * and returning, 3) from wg_receive_handshake_packet() after a point + * where it has processed an incoming handshake packet, but where + * all calls to pass it off to timers fails because of is_dead. We won't + * have new references in (1) eventually, because we're removed from + * allowedips; we won't have new references in (2) eventually, because + * wg_index_hashtable_lookup will always return NULL, since we removed + * all existing keypairs and no more can be created; we won't have new + * references in (3) eventually, because we're removed from the pubkey + * hash table, which allows for a maximum of one handshake response, + * via the still-uncleared index hashtable entry, but not more than one, + * and in wg_cookie_message_consume, the lookup eventually gets a peer + * with a refcount of zero, so no new reference is taken. + */ + + --peer->device->num_peers; + wg_peer_put(peer); +} + +/* We have a separate "remove" function make sure that all active places where + * a peer is currently operating will eventually come to an end and not pass + * their reference onto another context. + */ +void wg_peer_remove(struct wg_peer *peer) +{ + if (unlikely(!peer)) + return; + lockdep_assert_held(&peer->device->device_update_lock); + + peer_make_dead(peer); + synchronize_rcu(); + peer_remove_after_dead(peer); +} + +void wg_peer_remove_all(struct wg_device *wg) +{ + struct wg_peer *peer, *temp; + LIST_HEAD(dead_peers); + + lockdep_assert_held(&wg->device_update_lock); + + /* Avoid having to traverse individually for each one. */ + wg_allowedips_free(&wg->peer_allowedips, &wg->device_update_lock); + + list_for_each_entry_safe(peer, temp, &wg->peer_list, peer_list) { + peer_make_dead(peer); + list_add_tail(&peer->peer_list, &dead_peers); + } + synchronize_rcu(); + list_for_each_entry_safe(peer, temp, &dead_peers, peer_list) + peer_remove_after_dead(peer); +} + +static void rcu_release(struct rcu_head *rcu) +{ + struct wg_peer *peer = container_of(rcu, struct wg_peer, rcu); + + dst_cache_destroy(&peer->endpoint_cache); + wg_packet_queue_free(&peer->rx_queue, false); + wg_packet_queue_free(&peer->tx_queue, false); + + /* The final zeroing takes care of clearing any remaining handshake key + * material and other potentially sensitive information. + */ + kzfree(peer); +} + +static void kref_release(struct kref *refcount) +{ + struct wg_peer *peer = container_of(refcount, struct wg_peer, refcount); + + pr_debug("%s: Peer %llu (%pISpfsc) destroyed\n", + peer->device->dev->name, peer->internal_id, + &peer->endpoint.addr); + + /* Remove ourself from dynamic runtime lookup structures, now that the + * last reference is gone. + */ + wg_index_hashtable_remove(peer->device->index_hashtable, + &peer->handshake.entry); + + /* Remove any lingering packets that didn't have a chance to be + * transmitted. + */ + wg_packet_purge_staged_packets(peer); + + /* Free the memory used. */ + call_rcu(&peer->rcu, rcu_release); +} + +void wg_peer_put(struct wg_peer *peer) +{ + if (unlikely(!peer)) + return; + kref_put(&peer->refcount, kref_release); +} diff --git a/drivers/net/wireguard/peer.h b/drivers/net/wireguard/peer.h new file mode 100644 index 000000000000..23af40922997 --- /dev/null +++ b/drivers/net/wireguard/peer.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#ifndef _WG_PEER_H +#define _WG_PEER_H + +#include "device.h" +#include "noise.h" +#include "cookie.h" + +#include <linux/types.h> +#include <linux/netfilter.h> +#include <linux/spinlock.h> +#include <linux/kref.h> +#include <net/dst_cache.h> + +struct wg_device; + +struct endpoint { + union { + struct sockaddr addr; + struct sockaddr_in addr4; + struct sockaddr_in6 addr6; + }; + union { + struct { + struct in_addr src4; + /* Essentially the same as addr6->scope_id */ + int src_if4; + }; + struct in6_addr src6; + }; +}; + +struct wg_peer { + struct wg_device *device; + struct crypt_queue tx_queue, rx_queue; + struct sk_buff_head staged_packet_queue; + int serial_work_cpu; + struct noise_keypairs keypairs; + struct endpoint endpoint; + struct dst_cache endpoint_cache; + rwlock_t endpoint_lock; + struct noise_handshake handshake; + atomic64_t last_sent_handshake; + struct work_struct transmit_handshake_work, clear_peer_work; + struct cookie latest_cookie; + struct hlist_node pubkey_hash; + u64 rx_bytes, tx_bytes; + struct timer_list timer_retransmit_handshake, timer_send_keepalive; + struct timer_list timer_new_handshake, timer_zero_key_material; + struct timer_list timer_persistent_keepalive; + unsigned int timer_handshake_attempts; + u16 persistent_keepalive_interval; + bool timer_need_another_keepalive; + bool sent_lastminute_handshake; + struct timespec64 walltime_last_handshake; + struct kref refcount; + struct rcu_head rcu; + struct list_head peer_list; + struct list_head allowedips_list; + u64 internal_id; + struct napi_struct napi; + bool is_dead; +}; + +struct wg_peer *wg_peer_create(struct wg_device *wg, + const u8 public_key[NOISE_PUBLIC_KEY_LEN], + const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]); + +struct wg_peer *__must_check wg_peer_get_maybe_zero(struct wg_peer *peer); +static inline struct wg_peer *wg_peer_get(struct wg_peer *peer) +{ + kref_get(&peer->refcount); + return peer; +} +void wg_peer_put(struct wg_peer *peer); +void wg_peer_remove(struct wg_peer *peer); +void wg_peer_remove_all(struct wg_device *wg); + +#endif /* _WG_PEER_H */ diff --git a/drivers/net/wireguard/peerlookup.c b/drivers/net/wireguard/peerlookup.c new file mode 100644 index 000000000000..e4deb331476b --- /dev/null +++ b/drivers/net/wireguard/peerlookup.c @@ -0,0 +1,221 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include "peerlookup.h" +#include "peer.h" +#include "noise.h" + +static struct hlist_head *pubkey_bucket(struct pubkey_hashtable *table, + const u8 pubkey[NOISE_PUBLIC_KEY_LEN]) +{ + /* siphash gives us a secure 64bit number based on a random key. Since + * the bits are uniformly distributed, we can then mask off to get the + * bits we need. + */ + const u64 hash = siphash(pubkey, NOISE_PUBLIC_KEY_LEN, &table->key); + + return &table->hashtable[hash & (HASH_SIZE(table->hashtable) - 1)]; +} + +struct pubkey_hashtable *wg_pubkey_hashtable_alloc(void) +{ + struct pubkey_hashtable *table = kvmalloc(sizeof(*table), GFP_KERNEL); + + if (!table) + return NULL; + + get_random_bytes(&table->key, sizeof(table->key)); + hash_init(table->hashtable); + mutex_init(&table->lock); + return table; +} + +void wg_pubkey_hashtable_add(struct pubkey_hashtable *table, + struct wg_peer *peer) +{ + mutex_lock(&table->lock); + hlist_add_head_rcu(&peer->pubkey_hash, + pubkey_bucket(table, peer->handshake.remote_static)); + mutex_unlock(&table->lock); +} + +void wg_pubkey_hashtable_remove(struct pubkey_hashtable *table, + struct wg_peer *peer) +{ + mutex_lock(&table->lock); + hlist_del_init_rcu(&peer->pubkey_hash); + mutex_unlock(&table->lock); +} + +/* Returns a strong reference to a peer */ +struct wg_peer * +wg_pubkey_hashtable_lookup(struct pubkey_hashtable *table, + const u8 pubkey[NOISE_PUBLIC_KEY_LEN]) +{ + struct wg_peer *iter_peer, *peer = NULL; + + rcu_read_lock_bh(); + hlist_for_each_entry_rcu_bh(iter_peer, pubkey_bucket(table, pubkey), + pubkey_hash) { + if (!memcmp(pubkey, iter_peer->handshake.remote_static, + NOISE_PUBLIC_KEY_LEN)) { + peer = iter_peer; + break; + } + } + peer = wg_peer_get_maybe_zero(peer); + rcu_read_unlock_bh(); + return peer; +} + +static struct hlist_head *index_bucket(struct index_hashtable *table, + const __le32 index) +{ + /* Since the indices are random and thus all bits are uniformly + * distributed, we can find its bucket simply by masking. + */ + return &table->hashtable[(__force u32)index & + (HASH_SIZE(table->hashtable) - 1)]; +} + +struct index_hashtable *wg_index_hashtable_alloc(void) +{ + struct index_hashtable *table = kvmalloc(sizeof(*table), GFP_KERNEL); + + if (!table) + return NULL; + + hash_init(table->hashtable); + spin_lock_init(&table->lock); + return table; +} + +/* At the moment, we limit ourselves to 2^20 total peers, which generally might + * amount to 2^20*3 items in this hashtable. The algorithm below works by + * picking a random number and testing it. We can see that these limits mean we + * usually succeed pretty quickly: + * + * >>> def calculation(tries, size): + * ... return (size / 2**32)**(tries - 1) * (1 - (size / 2**32)) + * ... + * >>> calculation(1, 2**20 * 3) + * 0.999267578125 + * >>> calculation(2, 2**20 * 3) + * 0.0007318854331970215 + * >>> calculation(3, 2**20 * 3) + * 5.360489012673497e-07 + * >>> calculation(4, 2**20 * 3) + * 3.9261394135792216e-10 + * + * At the moment, we don't do any masking, so this algorithm isn't exactly + * constant time in either the random guessing or in the hash list lookup. We + * could require a minimum of 3 tries, which would successfully mask the + * guessing. this would not, however, help with the growing hash lengths, which + * is another thing to consider moving forward. + */ + +__le32 wg_index_hashtable_insert(struct index_hashtable *table, + struct index_hashtable_entry *entry) +{ + struct index_hashtable_entry *existing_entry; + + spin_lock_bh(&table->lock); + hlist_del_init_rcu(&entry->index_hash); + spin_unlock_bh(&table->lock); + + rcu_read_lock_bh(); + +search_unused_slot: + /* First we try to find an unused slot, randomly, while unlocked. */ + entry->index = (__force __le32)get_random_u32(); + hlist_for_each_entry_rcu_bh(existing_entry, + index_bucket(table, entry->index), + index_hash) { + if (existing_entry->index == entry->index) + /* If it's already in use, we continue searching. */ + goto search_unused_slot; + } + + /* Once we've found an unused slot, we lock it, and then double-check + * that nobody else stole it from us. + */ + spin_lock_bh(&table->lock); + hlist_for_each_entry_rcu_bh(existing_entry, + index_bucket(table, entry->index), + index_hash) { + if (existing_entry->index == entry->index) { + spin_unlock_bh(&table->lock); + /* If it was stolen, we start over. */ + goto search_unused_slot; + } + } + /* Otherwise, we know we have it exclusively (since we're locked), + * so we insert. + */ + hlist_add_head_rcu(&entry->index_hash, + index_bucket(table, entry->index)); + spin_unlock_bh(&table->lock); + + rcu_read_unlock_bh(); + + return entry->index; +} + +bool wg_index_hashtable_replace(struct index_hashtable *table, + struct index_hashtable_entry *old, + struct index_hashtable_entry *new) +{ + if (unlikely(hlist_unhashed(&old->index_hash))) + return false; + spin_lock_bh(&table->lock); + new->index = old->index; + hlist_replace_rcu(&old->index_hash, &new->index_hash); + + /* Calling init here NULLs out index_hash, and in fact after this + * function returns, it's theoretically possible for this to get + * reinserted elsewhere. That means the RCU lookup below might either + * terminate early or jump between buckets, in which case the packet + * simply gets dropped, which isn't terrible. + */ + INIT_HLIST_NODE(&old->index_hash); + spin_unlock_bh(&table->lock); + return true; +} + +void wg_index_hashtable_remove(struct index_hashtable *table, + struct index_hashtable_entry *entry) +{ + spin_lock_bh(&table->lock); + hlist_del_init_rcu(&entry->index_hash); + spin_unlock_bh(&table->lock); +} + +/* Returns a strong reference to a entry->peer */ +struct index_hashtable_entry * +wg_index_hashtable_lookup(struct index_hashtable *table, + const enum index_hashtable_type type_mask, + const __le32 index, struct wg_peer **peer) +{ + struct index_hashtable_entry *iter_entry, *entry = NULL; + + rcu_read_lock_bh(); + hlist_for_each_entry_rcu_bh(iter_entry, index_bucket(table, index), + index_hash) { + if (iter_entry->index == index) { + if (likely(iter_entry->type & type_mask)) + entry = iter_entry; + break; + } + } + if (likely(entry)) { + entry->peer = wg_peer_get_maybe_zero(entry->peer); + if (likely(entry->peer)) + *peer = entry->peer; + else + entry = NULL; + } + rcu_read_unlock_bh(); + return entry; +} diff --git a/drivers/net/wireguard/peerlookup.h b/drivers/net/wireguard/peerlookup.h new file mode 100644 index 000000000000..ced811797680 --- /dev/null +++ b/drivers/net/wireguard/peerlookup.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#ifndef _WG_PEERLOOKUP_H +#define _WG_PEERLOOKUP_H + +#include "messages.h" + +#include <linux/hashtable.h> +#include <linux/mutex.h> +#include <linux/siphash.h> + +struct wg_peer; + +struct pubkey_hashtable { + /* TODO: move to rhashtable */ + DECLARE_HASHTABLE(hashtable, 11); + siphash_key_t key; + struct mutex lock; +}; + +struct pubkey_hashtable *wg_pubkey_hashtable_alloc(void); +void wg_pubkey_hashtable_add(struct pubkey_hashtable *table, + struct wg_peer *peer); +void wg_pubkey_hashtable_remove(struct pubkey_hashtable *table, + struct wg_peer *peer); +struct wg_peer * +wg_pubkey_hashtable_lookup(struct pubkey_hashtable *table, + const u8 pubkey[NOISE_PUBLIC_KEY_LEN]); + +struct index_hashtable { + /* TODO: move to rhashtable */ + DECLARE_HASHTABLE(hashtable, 13); + spinlock_t lock; +}; + +enum index_hashtable_type { + INDEX_HASHTABLE_HANDSHAKE = 1U << 0, + INDEX_HASHTABLE_KEYPAIR = 1U << 1 +}; + +struct index_hashtable_entry { + struct wg_peer *peer; + struct hlist_node index_hash; + enum index_hashtable_type type; + __le32 index; +}; + +struct index_hashtable *wg_index_hashtable_alloc(void); +__le32 wg_index_hashtable_insert(struct index_hashtable *table, + struct index_hashtable_entry *entry); +bool wg_index_hashtable_replace(struct index_hashtable *table, + struct index_hashtable_entry *old, + struct index_hashtable_entry *new); +void wg_index_hashtable_remove(struct index_hashtable *table, + struct index_hashtable_entry *entry); +struct index_hashtable_entry * +wg_index_hashtable_lookup(struct index_hashtable *table, + const enum index_hashtable_type type_mask, + const __le32 index, struct wg_peer **peer); + +#endif /* _WG_PEERLOOKUP_H */ diff --git a/drivers/net/wireguard/queueing.c b/drivers/net/wireguard/queueing.c new file mode 100644 index 000000000000..5c964fcb994e --- /dev/null +++ b/drivers/net/wireguard/queueing.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include "queueing.h" + +struct multicore_worker __percpu * +wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr) +{ + int cpu; + struct multicore_worker __percpu *worker = + alloc_percpu(struct multicore_worker); + + if (!worker) + return NULL; + + for_each_possible_cpu(cpu) { + per_cpu_ptr(worker, cpu)->ptr = ptr; + INIT_WORK(&per_cpu_ptr(worker, cpu)->work, function); + } + return worker; +} + +int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, + bool multicore, unsigned int len) +{ + int ret; + + memset(queue, 0, sizeof(*queue)); + ret = ptr_ring_init(&queue->ring, len, GFP_KERNEL); + if (ret) + return ret; + if (function) { + if (multicore) { + queue->worker = wg_packet_percpu_multicore_worker_alloc( + function, queue); + if (!queue->worker) + return -ENOMEM; + } else { + INIT_WORK(&queue->work, function); + } + } + return 0; +} + +void wg_packet_queue_free(struct crypt_queue *queue, bool multicore) +{ + if (multicore) + free_percpu(queue->worker); + WARN_ON(!__ptr_ring_empty(&queue->ring)); + ptr_ring_cleanup(&queue->ring, NULL); +} diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h new file mode 100644 index 000000000000..e49a464238fd --- /dev/null +++ b/drivers/net/wireguard/queueing.h @@ -0,0 +1,197 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#ifndef _WG_QUEUEING_H +#define _WG_QUEUEING_H + +#include "peer.h" +#include <linux/types.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <linux/ipv6.h> + +struct wg_device; +struct wg_peer; +struct multicore_worker; +struct crypt_queue; +struct sk_buff; + +/* queueing.c APIs: */ +int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, + bool multicore, unsigned int len); +void wg_packet_queue_free(struct crypt_queue *queue, bool multicore); +struct multicore_worker __percpu * +wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr); + +/* receive.c APIs: */ +void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb); +void wg_packet_handshake_receive_worker(struct work_struct *work); +/* NAPI poll function: */ +int wg_packet_rx_poll(struct napi_struct *napi, int budget); +/* Workqueue worker: */ +void wg_packet_decrypt_worker(struct work_struct *work); + +/* send.c APIs: */ +void wg_packet_send_queued_handshake_initiation(struct wg_peer *peer, + bool is_retry); +void wg_packet_send_handshake_response(struct wg_peer *peer); +void wg_packet_send_handshake_cookie(struct wg_device *wg, + struct sk_buff *initiating_skb, + __le32 sender_index); +void wg_packet_send_keepalive(struct wg_peer *peer); +void wg_packet_purge_staged_packets(struct wg_peer *peer); +void wg_packet_send_staged_packets(struct wg_peer *peer); +/* Workqueue workers: */ +void wg_packet_handshake_send_worker(struct work_struct *work); +void wg_packet_tx_worker(struct work_struct *work); +void wg_packet_encrypt_worker(struct work_struct *work); + +enum packet_state { + PACKET_STATE_UNCRYPTED, + PACKET_STATE_CRYPTED, + PACKET_STATE_DEAD +}; + +struct packet_cb { + u64 nonce; + struct noise_keypair *keypair; + atomic_t state; + u32 mtu; + u8 ds; +}; + +#define PACKET_CB(skb) ((struct packet_cb *)((skb)->cb)) +#define PACKET_PEER(skb) (PACKET_CB(skb)->keypair->entry.peer) + +/* Returns either the correct skb->protocol value, or 0 if invalid. */ +static inline __be16 wg_skb_examine_untrusted_ip_hdr(struct sk_buff *skb) +{ + if (skb_network_header(skb) >= skb->head && + (skb_network_header(skb) + sizeof(struct iphdr)) <= + skb_tail_pointer(skb) && + ip_hdr(skb)->version == 4) + return htons(ETH_P_IP); + if (skb_network_header(skb) >= skb->head && + (skb_network_header(skb) + sizeof(struct ipv6hdr)) <= + skb_tail_pointer(skb) && + ipv6_hdr(skb)->version == 6) + return htons(ETH_P_IPV6); + return 0; +} + +static inline void wg_reset_packet(struct sk_buff *skb) +{ + const int pfmemalloc = skb->pfmemalloc; + + skb_scrub_packet(skb, true); + memset(&skb->headers_start, 0, + offsetof(struct sk_buff, headers_end) - + offsetof(struct sk_buff, headers_start)); + skb->pfmemalloc = pfmemalloc; + skb->queue_mapping = 0; + skb->nohdr = 0; + skb->peeked = 0; + skb->mac_len = 0; + skb->dev = NULL; +#ifdef CONFIG_NET_SCHED + skb->tc_index = 0; + skb_reset_tc(skb); +#endif + skb->hdr_len = skb_headroom(skb); + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + skb_probe_transport_header(skb); + skb_reset_inner_headers(skb); +} + +static inline int wg_cpumask_choose_online(int *stored_cpu, unsigned int id) +{ + unsigned int cpu = *stored_cpu, cpu_index, i; + + if (unlikely(cpu == nr_cpumask_bits || + !cpumask_test_cpu(cpu, cpu_online_mask))) { + cpu_index = id % cpumask_weight(cpu_online_mask); + cpu = cpumask_first(cpu_online_mask); + for (i = 0; i < cpu_index; ++i) + cpu = cpumask_next(cpu, cpu_online_mask); + *stored_cpu = cpu; + } + return cpu; +} + +/* This function is racy, in the sense that next is unlocked, so it could return + * the same CPU twice. A race-free version of this would be to instead store an + * atomic sequence number, do an increment-and-return, and then iterate through + * every possible CPU until we get to that index -- choose_cpu. However that's + * a bit slower, and it doesn't seem like this potential race actually + * introduces any performance loss, so we live with it. + */ +static inline int wg_cpumask_next_online(int *next) +{ + int cpu = *next; + + while (unlikely(!cpumask_test_cpu(cpu, cpu_online_mask))) + cpu = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits; + *next = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits; + return cpu; +} + +static inline int wg_queue_enqueue_per_device_and_peer( + struct crypt_queue *device_queue, struct crypt_queue *peer_queue, + struct sk_buff *skb, struct workqueue_struct *wq, int *next_cpu) +{ + int cpu; + + atomic_set_release(&PACKET_CB(skb)->state, PACKET_STATE_UNCRYPTED); + /* We first queue this up for the peer ingestion, but the consumer + * will wait for the state to change to CRYPTED or DEAD before. + */ + if (unlikely(ptr_ring_produce_bh(&peer_queue->ring, skb))) + return -ENOSPC; + /* Then we queue it up in the device queue, which consumes the + * packet as soon as it can. + */ + cpu = wg_cpumask_next_online(next_cpu); + if (unlikely(ptr_ring_produce_bh(&device_queue->ring, skb))) + return -EPIPE; + queue_work_on(cpu, wq, &per_cpu_ptr(device_queue->worker, cpu)->work); + return 0; +} + +static inline void wg_queue_enqueue_per_peer(struct crypt_queue *queue, + struct sk_buff *skb, + enum packet_state state) +{ + /* We take a reference, because as soon as we call atomic_set, the + * peer can be freed from below us. + */ + struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb)); + + atomic_set_release(&PACKET_CB(skb)->state, state); + queue_work_on(wg_cpumask_choose_online(&peer->serial_work_cpu, + peer->internal_id), + peer->device->packet_crypt_wq, &queue->work); + wg_peer_put(peer); +} + +static inline void wg_queue_enqueue_per_peer_napi(struct sk_buff *skb, + enum packet_state state) +{ + /* We take a reference, because as soon as we call atomic_set, the + * peer can be freed from below us. + */ + struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb)); + + atomic_set_release(&PACKET_CB(skb)->state, state); + napi_schedule(&peer->napi); + wg_peer_put(peer); +} + +#ifdef DEBUG +bool wg_packet_counter_selftest(void); +#endif + +#endif /* _WG_QUEUEING_H */ diff --git a/drivers/net/wireguard/ratelimiter.c b/drivers/net/wireguard/ratelimiter.c new file mode 100644 index 000000000000..3fedd1d21f5e --- /dev/null +++ b/drivers/net/wireguard/ratelimiter.c @@ -0,0 +1,223 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include "ratelimiter.h" +#include <linux/siphash.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <net/ip.h> + +static struct kmem_cache *entry_cache; +static hsiphash_key_t key; +static spinlock_t table_lock = __SPIN_LOCK_UNLOCKED("ratelimiter_table_lock"); +static DEFINE_MUTEX(init_lock); +static u64 init_refcnt; /* Protected by init_lock, hence not atomic. */ +static atomic_t total_entries = ATOMIC_INIT(0); +static unsigned int max_entries, table_size; +static void wg_ratelimiter_gc_entries(struct work_struct *); +static DECLARE_DEFERRABLE_WORK(gc_work, wg_ratelimiter_gc_entries); +static struct hlist_head *table_v4; +#if IS_ENABLED(CONFIG_IPV6) +static struct hlist_head *table_v6; +#endif + +struct ratelimiter_entry { + u64 last_time_ns, tokens, ip; + void *net; + spinlock_t lock; + struct hlist_node hash; + struct rcu_head rcu; +}; + +enum { + PACKETS_PER_SECOND = 20, + PACKETS_BURSTABLE = 5, + PACKET_COST = NSEC_PER_SEC / PACKETS_PER_SECOND, + TOKEN_MAX = PACKET_COST * PACKETS_BURSTABLE +}; + +static void entry_free(struct rcu_head *rcu) +{ + kmem_cache_free(entry_cache, + container_of(rcu, struct ratelimiter_entry, rcu)); + atomic_dec(&total_entries); +} + +static void entry_uninit(struct ratelimiter_entry *entry) +{ + hlist_del_rcu(&entry->hash); + call_rcu(&entry->rcu, entry_free); +} + +/* Calling this function with a NULL work uninits all entries. */ +static void wg_ratelimiter_gc_entries(struct work_struct *work) +{ + const u64 now = ktime_get_coarse_boottime_ns(); + struct ratelimiter_entry *entry; + struct hlist_node *temp; + unsigned int i; + + for (i = 0; i < table_size; ++i) { + spin_lock(&table_lock); + hlist_for_each_entry_safe(entry, temp, &table_v4[i], hash) { + if (unlikely(!work) || + now - entry->last_time_ns > NSEC_PER_SEC) + entry_uninit(entry); + } +#if IS_ENABLED(CONFIG_IPV6) + hlist_for_each_entry_safe(entry, temp, &table_v6[i], hash) { + if (unlikely(!work) || + now - entry->last_time_ns > NSEC_PER_SEC) + entry_uninit(entry); + } +#endif + spin_unlock(&table_lock); + if (likely(work)) + cond_resched(); + } + if (likely(work)) + queue_delayed_work(system_power_efficient_wq, &gc_work, HZ); +} + +bool wg_ratelimiter_allow(struct sk_buff *skb, struct net *net) +{ + /* We only take the bottom half of the net pointer, so that we can hash + * 3 words in the end. This way, siphash's len param fits into the final + * u32, and we don't incur an extra round. + */ + const u32 net_word = (unsigned long)net; + struct ratelimiter_entry *entry; + struct hlist_head *bucket; + u64 ip; + + if (skb->protocol == htons(ETH_P_IP)) { + ip = (u64 __force)ip_hdr(skb)->saddr; + bucket = &table_v4[hsiphash_2u32(net_word, ip, &key) & + (table_size - 1)]; + } +#if IS_ENABLED(CONFIG_IPV6) + else if (skb->protocol == htons(ETH_P_IPV6)) { + /* Only use 64 bits, so as to ratelimit the whole /64. */ + memcpy(&ip, &ipv6_hdr(skb)->saddr, sizeof(ip)); + bucket = &table_v6[hsiphash_3u32(net_word, ip >> 32, ip, &key) & + (table_size - 1)]; + } +#endif + else + return false; + rcu_read_lock(); + hlist_for_each_entry_rcu(entry, bucket, hash) { + if (entry->net == net && entry->ip == ip) { + u64 now, tokens; + bool ret; + /* Quasi-inspired by nft_limit.c, but this is actually a + * slightly different algorithm. Namely, we incorporate + * the burst as part of the maximum tokens, rather than + * as part of the rate. + */ + spin_lock(&entry->lock); + now = ktime_get_coarse_boottime_ns(); + tokens = min_t(u64, TOKEN_MAX, + entry->tokens + now - + entry->last_time_ns); + entry->last_time_ns = now; + ret = tokens >= PACKET_COST; + entry->tokens = ret ? tokens - PACKET_COST : tokens; + spin_unlock(&entry->lock); + rcu_read_unlock(); + return ret; + } + } + rcu_read_unlock(); + + if (atomic_inc_return(&total_entries) > max_entries) + goto err_oom; + + entry = kmem_cache_alloc(entry_cache, GFP_KERNEL); + if (unlikely(!entry)) + goto err_oom; + + entry->net = net; + entry->ip = ip; + INIT_HLIST_NODE(&entry->hash); + spin_lock_init(&entry->lock); + entry->last_time_ns = ktime_get_coarse_boottime_ns(); + entry->tokens = TOKEN_MAX - PACKET_COST; + spin_lock(&table_lock); + hlist_add_head_rcu(&entry->hash, bucket); + spin_unlock(&table_lock); + return true; + +err_oom: + atomic_dec(&total_entries); + return false; +} + +int wg_ratelimiter_init(void) +{ + mutex_lock(&init_lock); + if (++init_refcnt != 1) + goto out; + + entry_cache = KMEM_CACHE(ratelimiter_entry, 0); + if (!entry_cache) + goto err; + + /* xt_hashlimit.c uses a slightly different algorithm for ratelimiting, + * but what it shares in common is that it uses a massive hashtable. So, + * we borrow their wisdom about good table sizes on different systems + * dependent on RAM. This calculation here comes from there. + */ + table_size = (totalram_pages() > (1U << 30) / PAGE_SIZE) ? 8192 : + max_t(unsigned long, 16, roundup_pow_of_two( + (totalram_pages() << PAGE_SHIFT) / + (1U << 14) / sizeof(struct hlist_head))); + max_entries = table_size * 8; + + table_v4 = kvzalloc(table_size * sizeof(*table_v4), GFP_KERNEL); + if (unlikely(!table_v4)) + goto err_kmemcache; + +#if IS_ENABLED(CONFIG_IPV6) + table_v6 = kvzalloc(table_size * sizeof(*table_v6), GFP_KERNEL); + if (unlikely(!table_v6)) { + kvfree(table_v4); + goto err_kmemcache; + } +#endif + + queue_delayed_work(system_power_efficient_wq, &gc_work, HZ); + get_random_bytes(&key, sizeof(key)); +out: + mutex_unlock(&init_lock); + return 0; + +err_kmemcache: + kmem_cache_destroy(entry_cache); +err: + --init_refcnt; + mutex_unlock(&init_lock); + return -ENOMEM; +} + +void wg_ratelimiter_uninit(void) +{ + mutex_lock(&init_lock); + if (!init_refcnt || --init_refcnt) + goto out; + + cancel_delayed_work_sync(&gc_work); + wg_ratelimiter_gc_entries(NULL); + rcu_barrier(); + kvfree(table_v4); +#if IS_ENABLED(CONFIG_IPV6) + kvfree(table_v6); +#endif + kmem_cache_destroy(entry_cache); +out: + mutex_unlock(&init_lock); +} + +#include "selftest/ratelimiter.c" diff --git a/drivers/net/wireguard/ratelimiter.h b/drivers/net/wireguard/ratelimiter.h new file mode 100644 index 000000000000..83067f71ea99 --- /dev/null +++ b/drivers/net/wireguard/ratelimiter.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#ifndef _WG_RATELIMITER_H +#define _WG_RATELIMITER_H + +#include <linux/skbuff.h> + +int wg_ratelimiter_init(void); +void wg_ratelimiter_uninit(void); +bool wg_ratelimiter_allow(struct sk_buff *skb, struct net *net); + +#ifdef DEBUG +bool wg_ratelimiter_selftest(void); +#endif + +#endif /* _WG_RATELIMITER_H */ diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c new file mode 100644 index 000000000000..9c6bab9c981f --- /dev/null +++ b/drivers/net/wireguard/receive.c @@ -0,0 +1,595 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include "queueing.h" +#include "device.h" +#include "peer.h" +#include "timers.h" +#include "messages.h" +#include "cookie.h" +#include "socket.h" + +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/udp.h> +#include <net/ip_tunnels.h> + +/* Must be called with bh disabled. */ +static void update_rx_stats(struct wg_peer *peer, size_t len) +{ + struct pcpu_sw_netstats *tstats = + get_cpu_ptr(peer->device->dev->tstats); + + u64_stats_update_begin(&tstats->syncp); + ++tstats->rx_packets; + tstats->rx_bytes += len; + peer->rx_bytes += len; + u64_stats_update_end(&tstats->syncp); + put_cpu_ptr(tstats); +} + +#define SKB_TYPE_LE32(skb) (((struct message_header *)(skb)->data)->type) + +static size_t validate_header_len(struct sk_buff *skb) +{ + if (unlikely(skb->len < sizeof(struct message_header))) + return 0; + if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_DATA) && + skb->len >= MESSAGE_MINIMUM_LENGTH) + return sizeof(struct message_data); + if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION) && + skb->len == sizeof(struct message_handshake_initiation)) + return sizeof(struct message_handshake_initiation); + if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE) && + skb->len == sizeof(struct message_handshake_response)) + return sizeof(struct message_handshake_response); + if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE) && + skb->len == sizeof(struct message_handshake_cookie)) + return sizeof(struct message_handshake_cookie); + return 0; +} + +static int prepare_skb_header(struct sk_buff *skb, struct wg_device *wg) +{ + size_t data_offset, data_len, header_len; + struct udphdr *udp; + + if (unlikely(wg_skb_examine_untrusted_ip_hdr(skb) != skb->protocol || + skb_transport_header(skb) < skb->head || + (skb_transport_header(skb) + sizeof(struct udphdr)) > + skb_tail_pointer(skb))) + return -EINVAL; /* Bogus IP header */ + udp = udp_hdr(skb); + data_offset = (u8 *)udp - skb->data; + if (unlikely(data_offset > U16_MAX || + data_offset + sizeof(struct udphdr) > skb->len)) + /* Packet has offset at impossible location or isn't big enough + * to have UDP fields. + */ + return -EINVAL; + data_len = ntohs(udp->len); + if (unlikely(data_len < sizeof(struct udphdr) || + data_len > skb->len - data_offset)) + /* UDP packet is reporting too small of a size or lying about + * its size. + */ + return -EINVAL; + data_len -= sizeof(struct udphdr); + data_offset = (u8 *)udp + sizeof(struct udphdr) - skb->data; + if (unlikely(!pskb_may_pull(skb, + data_offset + sizeof(struct message_header)) || + pskb_trim(skb, data_len + data_offset) < 0)) + return -EINVAL; + skb_pull(skb, data_offset); + if (unlikely(skb->len != data_len)) + /* Final len does not agree with calculated len */ + return -EINVAL; + header_len = validate_header_len(skb); + if (unlikely(!header_len)) + return -EINVAL; + __skb_push(skb, data_offset); + if (unlikely(!pskb_may_pull(skb, data_offset + header_len))) + return -EINVAL; + __skb_pull(skb, data_offset); + return 0; +} + +static void wg_receive_handshake_packet(struct wg_device *wg, + struct sk_buff *skb) +{ + enum cookie_mac_state mac_state; + struct wg_peer *peer = NULL; + /* This is global, so that our load calculation applies to the whole + * system. We don't care about races with it at all. + */ + static u64 last_under_load; + bool packet_needs_cookie; + bool under_load; + + if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE)) { + net_dbg_skb_ratelimited("%s: Receiving cookie response from %pISpfsc\n", + wg->dev->name, skb); + wg_cookie_message_consume( + (struct message_handshake_cookie *)skb->data, wg); + return; + } + + under_load = skb_queue_len(&wg->incoming_handshakes) >= + MAX_QUEUED_INCOMING_HANDSHAKES / 8; + if (under_load) + last_under_load = ktime_get_coarse_boottime_ns(); + else if (last_under_load) + under_load = !wg_birthdate_has_expired(last_under_load, 1); + mac_state = wg_cookie_validate_packet(&wg->cookie_checker, skb, + under_load); + if ((under_load && mac_state == VALID_MAC_WITH_COOKIE) || + (!under_load && mac_state == VALID_MAC_BUT_NO_COOKIE)) { + packet_needs_cookie = false; + } else if (under_load && mac_state == VALID_MAC_BUT_NO_COOKIE) { + packet_needs_cookie = true; + } else { + net_dbg_skb_ratelimited("%s: Invalid MAC of handshake, dropping packet from %pISpfsc\n", + wg->dev->name, skb); + return; + } + + switch (SKB_TYPE_LE32(skb)) { + case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION): { + struct message_handshake_initiation *message = + (struct message_handshake_initiation *)skb->data; + + if (packet_needs_cookie) { + wg_packet_send_handshake_cookie(wg, skb, + message->sender_index); + return; + } + peer = wg_noise_handshake_consume_initiation(message, wg); + if (unlikely(!peer)) { + net_dbg_skb_ratelimited("%s: Invalid handshake initiation from %pISpfsc\n", + wg->dev->name, skb); + return; + } + wg_socket_set_peer_endpoint_from_skb(peer, skb); + net_dbg_ratelimited("%s: Receiving handshake initiation from peer %llu (%pISpfsc)\n", + wg->dev->name, peer->internal_id, + &peer->endpoint.addr); + wg_packet_send_handshake_response(peer); + break; + } + case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE): { + struct message_handshake_response *message = + (struct message_handshake_response *)skb->data; + + if (packet_needs_cookie) { + wg_packet_send_handshake_cookie(wg, skb, + message->sender_index); + return; + } + peer = wg_noise_handshake_consume_response(message, wg); + if (unlikely(!peer)) { + net_dbg_skb_ratelimited("%s: Invalid handshake response from %pISpfsc\n", + wg->dev->name, skb); + return; + } + wg_socket_set_peer_endpoint_from_skb(peer, skb); + net_dbg_ratelimited("%s: Receiving handshake response from peer %llu (%pISpfsc)\n", + wg->dev->name, peer->internal_id, + &peer->endpoint.addr); + if (wg_noise_handshake_begin_session(&peer->handshake, + &peer->keypairs)) { + wg_timers_session_derived(peer); + wg_timers_handshake_complete(peer); + /* Calling this function will either send any existing + * packets in the queue and not send a keepalive, which + * is the best case, Or, if there's nothing in the + * queue, it will send a keepalive, in order to give + * immediate confirmation of the session. + */ + wg_packet_send_keepalive(peer); + } + break; + } + } + + if (unlikely(!peer)) { + WARN(1, "Somehow a wrong type of packet wound up in the handshake queue!\n"); + return; + } + + local_bh_disable(); + update_rx_stats(peer, skb->len); + local_bh_enable(); + + wg_timers_any_authenticated_packet_received(peer); + wg_timers_any_authenticated_packet_traversal(peer); + wg_peer_put(peer); +} + +void wg_packet_handshake_receive_worker(struct work_struct *work) +{ + struct wg_device *wg = container_of(work, struct multicore_worker, + work)->ptr; + struct sk_buff *skb; + + while ((skb = skb_dequeue(&wg->incoming_handshakes)) != NULL) { + wg_receive_handshake_packet(wg, skb); + dev_kfree_skb(skb); + cond_resched(); + } +} + +static void keep_key_fresh(struct wg_peer *peer) +{ + struct noise_keypair *keypair; + bool send = false; + + if (peer->sent_lastminute_handshake) + return; + + rcu_read_lock_bh(); + keypair = rcu_dereference_bh(peer->keypairs.current_keypair); + if (likely(keypair && READ_ONCE(keypair->sending.is_valid)) && + keypair->i_am_the_initiator && + unlikely(wg_birthdate_has_expired(keypair->sending.birthdate, + REJECT_AFTER_TIME - KEEPALIVE_TIMEOUT - REKEY_TIMEOUT))) + send = true; + rcu_read_unlock_bh(); + + if (send) { + peer->sent_lastminute_handshake = true; + wg_packet_send_queued_handshake_initiation(peer, false); + } +} + +static bool decrypt_packet(struct sk_buff *skb, struct noise_symmetric_key *key) +{ + struct scatterlist sg[MAX_SKB_FRAGS + 8]; + struct sk_buff *trailer; + unsigned int offset; + int num_frags; + + if (unlikely(!key)) + return false; + + if (unlikely(!READ_ONCE(key->is_valid) || + wg_birthdate_has_expired(key->birthdate, REJECT_AFTER_TIME) || + key->counter.receive.counter >= REJECT_AFTER_MESSAGES)) { + WRITE_ONCE(key->is_valid, false); + return false; + } + + PACKET_CB(skb)->nonce = + le64_to_cpu(((struct message_data *)skb->data)->counter); + + /* We ensure that the network header is part of the packet before we + * call skb_cow_data, so that there's no chance that data is removed + * from the skb, so that later we can extract the original endpoint. + */ + offset = skb->data - skb_network_header(skb); + skb_push(skb, offset); + num_frags = skb_cow_data(skb, 0, &trailer); + offset += sizeof(struct message_data); + skb_pull(skb, offset); + if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg))) + return false; + + sg_init_table(sg, num_frags); + if (skb_to_sgvec(skb, sg, 0, skb->len) <= 0) + return false; + + if (!chacha20poly1305_decrypt_sg_inplace(sg, skb->len, NULL, 0, + PACKET_CB(skb)->nonce, + key->key)) + return false; + + /* Another ugly situation of pushing and pulling the header so as to + * keep endpoint information intact. + */ + skb_push(skb, offset); + if (pskb_trim(skb, skb->len - noise_encrypted_len(0))) + return false; + skb_pull(skb, offset); + + return true; +} + +/* This is RFC6479, a replay detection bitmap algorithm that avoids bitshifts */ +static bool counter_validate(union noise_counter *counter, u64 their_counter) +{ + unsigned long index, index_current, top, i; + bool ret = false; + + spin_lock_bh(&counter->receive.lock); + + if (unlikely(counter->receive.counter >= REJECT_AFTER_MESSAGES + 1 || + their_counter >= REJECT_AFTER_MESSAGES)) + goto out; + + ++their_counter; + + if (unlikely((COUNTER_WINDOW_SIZE + their_counter) < + counter->receive.counter)) + goto out; + + index = their_counter >> ilog2(BITS_PER_LONG); + + if (likely(their_counter > counter->receive.counter)) { + index_current = counter->receive.counter >> ilog2(BITS_PER_LONG); + top = min_t(unsigned long, index - index_current, + COUNTER_BITS_TOTAL / BITS_PER_LONG); + for (i = 1; i <= top; ++i) + counter->receive.backtrack[(i + index_current) & + ((COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1)] = 0; + counter->receive.counter = their_counter; + } + + index &= (COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1; + ret = !test_and_set_bit(their_counter & (BITS_PER_LONG - 1), + &counter->receive.backtrack[index]); + +out: + spin_unlock_bh(&counter->receive.lock); + return ret; +} + +#include "selftest/counter.c" + +static void wg_packet_consume_data_done(struct wg_peer *peer, + struct sk_buff *skb, + struct endpoint *endpoint) +{ + struct net_device *dev = peer->device->dev; + unsigned int len, len_before_trim; + struct wg_peer *routed_peer; + + wg_socket_set_peer_endpoint(peer, endpoint); + + if (unlikely(wg_noise_received_with_keypair(&peer->keypairs, + PACKET_CB(skb)->keypair))) { + wg_timers_handshake_complete(peer); + wg_packet_send_staged_packets(peer); + } + + keep_key_fresh(peer); + + wg_timers_any_authenticated_packet_received(peer); + wg_timers_any_authenticated_packet_traversal(peer); + + /* A packet with length 0 is a keepalive packet */ + if (unlikely(!skb->len)) { + update_rx_stats(peer, message_data_len(0)); + net_dbg_ratelimited("%s: Receiving keepalive packet from peer %llu (%pISpfsc)\n", + dev->name, peer->internal_id, + &peer->endpoint.addr); + goto packet_processed; + } + + wg_timers_data_received(peer); + + if (unlikely(skb_network_header(skb) < skb->head)) + goto dishonest_packet_size; + if (unlikely(!(pskb_network_may_pull(skb, sizeof(struct iphdr)) && + (ip_hdr(skb)->version == 4 || + (ip_hdr(skb)->version == 6 && + pskb_network_may_pull(skb, sizeof(struct ipv6hdr))))))) + goto dishonest_packet_type; + + skb->dev = dev; + /* We've already verified the Poly1305 auth tag, which means this packet + * was not modified in transit. We can therefore tell the networking + * stack that all checksums of every layer of encapsulation have already + * been checked "by the hardware" and therefore is unnecessary to check + * again in software. + */ + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->csum_level = ~0; /* All levels */ + skb->protocol = wg_skb_examine_untrusted_ip_hdr(skb); + if (skb->protocol == htons(ETH_P_IP)) { + len = ntohs(ip_hdr(skb)->tot_len); + if (unlikely(len < sizeof(struct iphdr))) + goto dishonest_packet_size; + if (INET_ECN_is_ce(PACKET_CB(skb)->ds)) + IP_ECN_set_ce(ip_hdr(skb)); + } else if (skb->protocol == htons(ETH_P_IPV6)) { + len = ntohs(ipv6_hdr(skb)->payload_len) + + sizeof(struct ipv6hdr); + if (INET_ECN_is_ce(PACKET_CB(skb)->ds)) + IP6_ECN_set_ce(skb, ipv6_hdr(skb)); + } else { + goto dishonest_packet_type; + } + + if (unlikely(len > skb->len)) + goto dishonest_packet_size; + len_before_trim = skb->len; + if (unlikely(pskb_trim(skb, len))) + goto packet_processed; + + routed_peer = wg_allowedips_lookup_src(&peer->device->peer_allowedips, + skb); + wg_peer_put(routed_peer); /* We don't need the extra reference. */ + + if (unlikely(routed_peer != peer)) + goto dishonest_packet_peer; + + if (unlikely(napi_gro_receive(&peer->napi, skb) == GRO_DROP)) { + ++dev->stats.rx_dropped; + net_dbg_ratelimited("%s: Failed to give packet to userspace from peer %llu (%pISpfsc)\n", + dev->name, peer->internal_id, + &peer->endpoint.addr); + } else { + update_rx_stats(peer, message_data_len(len_before_trim)); + } + return; + +dishonest_packet_peer: + net_dbg_skb_ratelimited("%s: Packet has unallowed src IP (%pISc) from peer %llu (%pISpfsc)\n", + dev->name, skb, peer->internal_id, + &peer->endpoint.addr); + ++dev->stats.rx_errors; + ++dev->stats.rx_frame_errors; + goto packet_processed; +dishonest_packet_type: + net_dbg_ratelimited("%s: Packet is neither ipv4 nor ipv6 from peer %llu (%pISpfsc)\n", + dev->name, peer->internal_id, &peer->endpoint.addr); + ++dev->stats.rx_errors; + ++dev->stats.rx_frame_errors; + goto packet_processed; +dishonest_packet_size: + net_dbg_ratelimited("%s: Packet has incorrect size from peer %llu (%pISpfsc)\n", + dev->name, peer->internal_id, &peer->endpoint.addr); + ++dev->stats.rx_errors; + ++dev->stats.rx_length_errors; + goto packet_processed; +packet_processed: + dev_kfree_skb(skb); +} + +int wg_packet_rx_poll(struct napi_struct *napi, int budget) +{ + struct wg_peer *peer = container_of(napi, struct wg_peer, napi); + struct crypt_queue *queue = &peer->rx_queue; + struct noise_keypair *keypair; + struct endpoint endpoint; + enum packet_state state; + struct sk_buff *skb; + int work_done = 0; + bool free; + + if (unlikely(budget <= 0)) + return 0; + + while ((skb = __ptr_ring_peek(&queue->ring)) != NULL && + (state = atomic_read_acquire(&PACKET_CB(skb)->state)) != + PACKET_STATE_UNCRYPTED) { + __ptr_ring_discard_one(&queue->ring); + peer = PACKET_PEER(skb); + keypair = PACKET_CB(skb)->keypair; + free = true; + + if (unlikely(state != PACKET_STATE_CRYPTED)) + goto next; + + if (unlikely(!counter_validate(&keypair->receiving.counter, + PACKET_CB(skb)->nonce))) { + net_dbg_ratelimited("%s: Packet has invalid nonce %llu (max %llu)\n", + peer->device->dev->name, + PACKET_CB(skb)->nonce, + keypair->receiving.counter.receive.counter); + goto next; + } + + if (unlikely(wg_socket_endpoint_from_skb(&endpoint, skb))) + goto next; + + wg_reset_packet(skb); + wg_packet_consume_data_done(peer, skb, &endpoint); + free = false; + +next: + wg_noise_keypair_put(keypair, false); + wg_peer_put(peer); + if (unlikely(free)) + dev_kfree_skb(skb); + + if (++work_done >= budget) + break; + } + + if (work_done < budget) + napi_complete_done(napi, work_done); + + return work_done; +} + +void wg_packet_decrypt_worker(struct work_struct *work) +{ + struct crypt_queue *queue = container_of(work, struct multicore_worker, + work)->ptr; + struct sk_buff *skb; + + while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) { + enum packet_state state = likely(decrypt_packet(skb, + &PACKET_CB(skb)->keypair->receiving)) ? + PACKET_STATE_CRYPTED : PACKET_STATE_DEAD; + wg_queue_enqueue_per_peer_napi(skb, state); + } +} + +static void wg_packet_consume_data(struct wg_device *wg, struct sk_buff *skb) +{ + __le32 idx = ((struct message_data *)skb->data)->key_idx; + struct wg_peer *peer = NULL; + int ret; + + rcu_read_lock_bh(); + PACKET_CB(skb)->keypair = + (struct noise_keypair *)wg_index_hashtable_lookup( + wg->index_hashtable, INDEX_HASHTABLE_KEYPAIR, idx, + &peer); + if (unlikely(!wg_noise_keypair_get(PACKET_CB(skb)->keypair))) + goto err_keypair; + + if (unlikely(READ_ONCE(peer->is_dead))) + goto err; + + ret = wg_queue_enqueue_per_device_and_peer(&wg->decrypt_queue, + &peer->rx_queue, skb, + wg->packet_crypt_wq, + &wg->decrypt_queue.last_cpu); + if (unlikely(ret == -EPIPE)) + wg_queue_enqueue_per_peer_napi(skb, PACKET_STATE_DEAD); + if (likely(!ret || ret == -EPIPE)) { + rcu_read_unlock_bh(); + return; + } +err: + wg_noise_keypair_put(PACKET_CB(skb)->keypair, false); +err_keypair: + rcu_read_unlock_bh(); + wg_peer_put(peer); + dev_kfree_skb(skb); +} + +void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb) +{ + if (unlikely(prepare_skb_header(skb, wg) < 0)) + goto err; + switch (SKB_TYPE_LE32(skb)) { + case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION): + case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE): + case cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE): { + int cpu; + + if (skb_queue_len(&wg->incoming_handshakes) > + MAX_QUEUED_INCOMING_HANDSHAKES || + unlikely(!rng_is_initialized())) { + net_dbg_skb_ratelimited("%s: Dropping handshake packet from %pISpfsc\n", + wg->dev->name, skb); + goto err; + } + skb_queue_tail(&wg->incoming_handshakes, skb); + /* Queues up a call to packet_process_queued_handshake_ + * packets(skb): + */ + cpu = wg_cpumask_next_online(&wg->incoming_handshake_cpu); + queue_work_on(cpu, wg->handshake_receive_wq, + &per_cpu_ptr(wg->incoming_handshakes_worker, cpu)->work); + break; + } + case cpu_to_le32(MESSAGE_DATA): + PACKET_CB(skb)->ds = ip_tunnel_get_dsfield(ip_hdr(skb), skb); + wg_packet_consume_data(wg, skb); + break; + default: + net_dbg_skb_ratelimited("%s: Invalid packet from %pISpfsc\n", + wg->dev->name, skb); + goto err; + } + return; + +err: + dev_kfree_skb(skb); +} diff --git a/drivers/net/wireguard/selftest/allowedips.c b/drivers/net/wireguard/selftest/allowedips.c new file mode 100644 index 000000000000..846db14cb046 --- /dev/null +++ b/drivers/net/wireguard/selftest/allowedips.c @@ -0,0 +1,683 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + * + * This contains some basic static unit tests for the allowedips data structure. + * It also has two additional modes that are disabled and meant to be used by + * folks directly playing with this file. If you define the macro + * DEBUG_PRINT_TRIE_GRAPHVIZ to be 1, then every time there's a full tree in + * memory, it will be printed out as KERN_DEBUG in a format that can be passed + * to graphviz (the dot command) to visualize it. If you define the macro + * DEBUG_RANDOM_TRIE to be 1, then there will be an extremely costly set of + * randomized tests done against a trivial implementation, which may take + * upwards of a half-hour to complete. There's no set of users who should be + * enabling these, and the only developers that should go anywhere near these + * nobs are the ones who are reading this comment. + */ + +#ifdef DEBUG + +#include <linux/siphash.h> + +static __init void swap_endian_and_apply_cidr(u8 *dst, const u8 *src, u8 bits, + u8 cidr) +{ + swap_endian(dst, src, bits); + memset(dst + (cidr + 7) / 8, 0, bits / 8 - (cidr + 7) / 8); + if (cidr) + dst[(cidr + 7) / 8 - 1] &= ~0U << ((8 - (cidr % 8)) % 8); +} + +static __init void print_node(struct allowedips_node *node, u8 bits) +{ + char *fmt_connection = KERN_DEBUG "\t\"%p/%d\" -> \"%p/%d\";\n"; + char *fmt_declaration = KERN_DEBUG + "\t\"%p/%d\"[style=%s, color=\"#%06x\"];\n"; + char *style = "dotted"; + u8 ip1[16], ip2[16]; + u32 color = 0; + + if (bits == 32) { + fmt_connection = KERN_DEBUG "\t\"%pI4/%d\" -> \"%pI4/%d\";\n"; + fmt_declaration = KERN_DEBUG + "\t\"%pI4/%d\"[style=%s, color=\"#%06x\"];\n"; + } else if (bits == 128) { + fmt_connection = KERN_DEBUG "\t\"%pI6/%d\" -> \"%pI6/%d\";\n"; + fmt_declaration = KERN_DEBUG + "\t\"%pI6/%d\"[style=%s, color=\"#%06x\"];\n"; + } + if (node->peer) { + hsiphash_key_t key = { { 0 } }; + + memcpy(&key, &node->peer, sizeof(node->peer)); + color = hsiphash_1u32(0xdeadbeef, &key) % 200 << 16 | + hsiphash_1u32(0xbabecafe, &key) % 200 << 8 | + hsiphash_1u32(0xabad1dea, &key) % 200; + style = "bold"; + } + swap_endian_and_apply_cidr(ip1, node->bits, bits, node->cidr); + printk(fmt_declaration, ip1, node->cidr, style, color); + if (node->bit[0]) { + swap_endian_and_apply_cidr(ip2, + rcu_dereference_raw(node->bit[0])->bits, bits, + node->cidr); + printk(fmt_connection, ip1, node->cidr, ip2, + rcu_dereference_raw(node->bit[0])->cidr); + print_node(rcu_dereference_raw(node->bit[0]), bits); + } + if (node->bit[1]) { + swap_endian_and_apply_cidr(ip2, + rcu_dereference_raw(node->bit[1])->bits, + bits, node->cidr); + printk(fmt_connection, ip1, node->cidr, ip2, + rcu_dereference_raw(node->bit[1])->cidr); + print_node(rcu_dereference_raw(node->bit[1]), bits); + } +} + +static __init void print_tree(struct allowedips_node __rcu *top, u8 bits) +{ + printk(KERN_DEBUG "digraph trie {\n"); + print_node(rcu_dereference_raw(top), bits); + printk(KERN_DEBUG "}\n"); +} + +enum { + NUM_PEERS = 2000, + NUM_RAND_ROUTES = 400, + NUM_MUTATED_ROUTES = 100, + NUM_QUERIES = NUM_RAND_ROUTES * NUM_MUTATED_ROUTES * 30 +}; + +struct horrible_allowedips { + struct hlist_head head; +}; + +struct horrible_allowedips_node { + struct hlist_node table; + union nf_inet_addr ip; + union nf_inet_addr mask; + u8 ip_version; + void *value; +}; + +static __init void horrible_allowedips_init(struct horrible_allowedips *table) +{ + INIT_HLIST_HEAD(&table->head); +} + +static __init void horrible_allowedips_free(struct horrible_allowedips *table) +{ + struct horrible_allowedips_node *node; + struct hlist_node *h; + + hlist_for_each_entry_safe(node, h, &table->head, table) { + hlist_del(&node->table); + kfree(node); + } +} + +static __init inline union nf_inet_addr horrible_cidr_to_mask(u8 cidr) +{ + union nf_inet_addr mask; + + memset(&mask, 0x00, 128 / 8); + memset(&mask, 0xff, cidr / 8); + if (cidr % 32) + mask.all[cidr / 32] = (__force u32)htonl( + (0xFFFFFFFFUL << (32 - (cidr % 32))) & 0xFFFFFFFFUL); + return mask; +} + +static __init inline u8 horrible_mask_to_cidr(union nf_inet_addr subnet) +{ + return hweight32(subnet.all[0]) + hweight32(subnet.all[1]) + + hweight32(subnet.all[2]) + hweight32(subnet.all[3]); +} + +static __init inline void +horrible_mask_self(struct horrible_allowedips_node *node) +{ + if (node->ip_version == 4) { + node->ip.ip &= node->mask.ip; + } else if (node->ip_version == 6) { + node->ip.ip6[0] &= node->mask.ip6[0]; + node->ip.ip6[1] &= node->mask.ip6[1]; + node->ip.ip6[2] &= node->mask.ip6[2]; + node->ip.ip6[3] &= node->mask.ip6[3]; + } +} + +static __init inline bool +horrible_match_v4(const struct horrible_allowedips_node *node, + struct in_addr *ip) +{ + return (ip->s_addr & node->mask.ip) == node->ip.ip; +} + +static __init inline bool +horrible_match_v6(const struct horrible_allowedips_node *node, + struct in6_addr *ip) +{ + return (ip->in6_u.u6_addr32[0] & node->mask.ip6[0]) == + node->ip.ip6[0] && + (ip->in6_u.u6_addr32[1] & node->mask.ip6[1]) == + node->ip.ip6[1] && + (ip->in6_u.u6_addr32[2] & node->mask.ip6[2]) == + node->ip.ip6[2] && + (ip->in6_u.u6_addr32[3] & node->mask.ip6[3]) == node->ip.ip6[3]; +} + +static __init void +horrible_insert_ordered(struct horrible_allowedips *table, + struct horrible_allowedips_node *node) +{ + struct horrible_allowedips_node *other = NULL, *where = NULL; + u8 my_cidr = horrible_mask_to_cidr(node->mask); + + hlist_for_each_entry(other, &table->head, table) { + if (!memcmp(&other->mask, &node->mask, + sizeof(union nf_inet_addr)) && + !memcmp(&other->ip, &node->ip, + sizeof(union nf_inet_addr)) && + other->ip_version == node->ip_version) { + other->value = node->value; + kfree(node); + return; + } + where = other; + if (horrible_mask_to_cidr(other->mask) <= my_cidr) + break; + } + if (!other && !where) + hlist_add_head(&node->table, &table->head); + else if (!other) + hlist_add_behind(&node->table, &where->table); + else + hlist_add_before(&node->table, &where->table); +} + +static __init int +horrible_allowedips_insert_v4(struct horrible_allowedips *table, + struct in_addr *ip, u8 cidr, void *value) +{ + struct horrible_allowedips_node *node = kzalloc(sizeof(*node), + GFP_KERNEL); + + if (unlikely(!node)) + return -ENOMEM; + node->ip.in = *ip; + node->mask = horrible_cidr_to_mask(cidr); + node->ip_version = 4; + node->value = value; + horrible_mask_self(node); + horrible_insert_ordered(table, node); + return 0; +} + +static __init int +horrible_allowedips_insert_v6(struct horrible_allowedips *table, + struct in6_addr *ip, u8 cidr, void *value) +{ + struct horrible_allowedips_node *node = kzalloc(sizeof(*node), + GFP_KERNEL); + + if (unlikely(!node)) + return -ENOMEM; + node->ip.in6 = *ip; + node->mask = horrible_cidr_to_mask(cidr); + node->ip_version = 6; + node->value = value; + horrible_mask_self(node); + horrible_insert_ordered(table, node); + return 0; +} + +static __init void * +horrible_allowedips_lookup_v4(struct horrible_allowedips *table, + struct in_addr *ip) +{ + struct horrible_allowedips_node *node; + void *ret = NULL; + + hlist_for_each_entry(node, &table->head, table) { + if (node->ip_version != 4) + continue; + if (horrible_match_v4(node, ip)) { + ret = node->value; + break; + } + } + return ret; +} + +static __init void * +horrible_allowedips_lookup_v6(struct horrible_allowedips *table, + struct in6_addr *ip) +{ + struct horrible_allowedips_node *node; + void *ret = NULL; + + hlist_for_each_entry(node, &table->head, table) { + if (node->ip_version != 6) + continue; + if (horrible_match_v6(node, ip)) { + ret = node->value; + break; + } + } + return ret; +} + +static __init bool randomized_test(void) +{ + unsigned int i, j, k, mutate_amount, cidr; + u8 ip[16], mutate_mask[16], mutated[16]; + struct wg_peer **peers, *peer; + struct horrible_allowedips h; + DEFINE_MUTEX(mutex); + struct allowedips t; + bool ret = false; + + mutex_init(&mutex); + + wg_allowedips_init(&t); + horrible_allowedips_init(&h); + + peers = kcalloc(NUM_PEERS, sizeof(*peers), GFP_KERNEL); + if (unlikely(!peers)) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free; + } + for (i = 0; i < NUM_PEERS; ++i) { + peers[i] = kzalloc(sizeof(*peers[i]), GFP_KERNEL); + if (unlikely(!peers[i])) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free; + } + kref_init(&peers[i]->refcount); + } + + mutex_lock(&mutex); + + for (i = 0; i < NUM_RAND_ROUTES; ++i) { + prandom_bytes(ip, 4); + cidr = prandom_u32_max(32) + 1; + peer = peers[prandom_u32_max(NUM_PEERS)]; + if (wg_allowedips_insert_v4(&t, (struct in_addr *)ip, cidr, + peer, &mutex) < 0) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free_locked; + } + if (horrible_allowedips_insert_v4(&h, (struct in_addr *)ip, + cidr, peer) < 0) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free_locked; + } + for (j = 0; j < NUM_MUTATED_ROUTES; ++j) { + memcpy(mutated, ip, 4); + prandom_bytes(mutate_mask, 4); + mutate_amount = prandom_u32_max(32); + for (k = 0; k < mutate_amount / 8; ++k) + mutate_mask[k] = 0xff; + mutate_mask[k] = 0xff + << ((8 - (mutate_amount % 8)) % 8); + for (; k < 4; ++k) + mutate_mask[k] = 0; + for (k = 0; k < 4; ++k) + mutated[k] = (mutated[k] & mutate_mask[k]) | + (~mutate_mask[k] & + prandom_u32_max(256)); + cidr = prandom_u32_max(32) + 1; + peer = peers[prandom_u32_max(NUM_PEERS)]; + if (wg_allowedips_insert_v4(&t, + (struct in_addr *)mutated, + cidr, peer, &mutex) < 0) { + pr_err("allowedips random malloc: FAIL\n"); + goto free_locked; + } + if (horrible_allowedips_insert_v4(&h, + (struct in_addr *)mutated, cidr, peer)) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free_locked; + } + } + } + + for (i = 0; i < NUM_RAND_ROUTES; ++i) { + prandom_bytes(ip, 16); + cidr = prandom_u32_max(128) + 1; + peer = peers[prandom_u32_max(NUM_PEERS)]; + if (wg_allowedips_insert_v6(&t, (struct in6_addr *)ip, cidr, + peer, &mutex) < 0) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free_locked; + } + if (horrible_allowedips_insert_v6(&h, (struct in6_addr *)ip, + cidr, peer) < 0) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free_locked; + } + for (j = 0; j < NUM_MUTATED_ROUTES; ++j) { + memcpy(mutated, ip, 16); + prandom_bytes(mutate_mask, 16); + mutate_amount = prandom_u32_max(128); + for (k = 0; k < mutate_amount / 8; ++k) + mutate_mask[k] = 0xff; + mutate_mask[k] = 0xff + << ((8 - (mutate_amount % 8)) % 8); + for (; k < 4; ++k) + mutate_mask[k] = 0; + for (k = 0; k < 4; ++k) + mutated[k] = (mutated[k] & mutate_mask[k]) | + (~mutate_mask[k] & + prandom_u32_max(256)); + cidr = prandom_u32_max(128) + 1; + peer = peers[prandom_u32_max(NUM_PEERS)]; + if (wg_allowedips_insert_v6(&t, + (struct in6_addr *)mutated, + cidr, peer, &mutex) < 0) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free_locked; + } + if (horrible_allowedips_insert_v6( + &h, (struct in6_addr *)mutated, cidr, + peer)) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free_locked; + } + } + } + + mutex_unlock(&mutex); + + if (IS_ENABLED(DEBUG_PRINT_TRIE_GRAPHVIZ)) { + print_tree(t.root4, 32); + print_tree(t.root6, 128); + } + + for (i = 0; i < NUM_QUERIES; ++i) { + prandom_bytes(ip, 4); + if (lookup(t.root4, 32, ip) != + horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip)) { + pr_err("allowedips random self-test: FAIL\n"); + goto free; + } + } + + for (i = 0; i < NUM_QUERIES; ++i) { + prandom_bytes(ip, 16); + if (lookup(t.root6, 128, ip) != + horrible_allowedips_lookup_v6(&h, (struct in6_addr *)ip)) { + pr_err("allowedips random self-test: FAIL\n"); + goto free; + } + } + ret = true; + +free: + mutex_lock(&mutex); +free_locked: + wg_allowedips_free(&t, &mutex); + mutex_unlock(&mutex); + horrible_allowedips_free(&h); + if (peers) { + for (i = 0; i < NUM_PEERS; ++i) + kfree(peers[i]); + } + kfree(peers); + return ret; +} + +static __init inline struct in_addr *ip4(u8 a, u8 b, u8 c, u8 d) +{ + static struct in_addr ip; + u8 *split = (u8 *)&ip; + + split[0] = a; + split[1] = b; + split[2] = c; + split[3] = d; + return &ip; +} + +static __init inline struct in6_addr *ip6(u32 a, u32 b, u32 c, u32 d) +{ + static struct in6_addr ip; + __be32 *split = (__be32 *)&ip; + + split[0] = cpu_to_be32(a); + split[1] = cpu_to_be32(b); + split[2] = cpu_to_be32(c); + split[3] = cpu_to_be32(d); + return &ip; +} + +static __init struct wg_peer *init_peer(void) +{ + struct wg_peer *peer = kzalloc(sizeof(*peer), GFP_KERNEL); + + if (!peer) + return NULL; + kref_init(&peer->refcount); + INIT_LIST_HEAD(&peer->allowedips_list); + return peer; +} + +#define insert(version, mem, ipa, ipb, ipc, ipd, cidr) \ + wg_allowedips_insert_v##version(&t, ip##version(ipa, ipb, ipc, ipd), \ + cidr, mem, &mutex) + +#define maybe_fail() do { \ + ++i; \ + if (!_s) { \ + pr_info("allowedips self-test %zu: FAIL\n", i); \ + success = false; \ + } \ + } while (0) + +#define test(version, mem, ipa, ipb, ipc, ipd) do { \ + bool _s = lookup(t.root##version, (version) == 4 ? 32 : 128, \ + ip##version(ipa, ipb, ipc, ipd)) == (mem); \ + maybe_fail(); \ + } while (0) + +#define test_negative(version, mem, ipa, ipb, ipc, ipd) do { \ + bool _s = lookup(t.root##version, (version) == 4 ? 32 : 128, \ + ip##version(ipa, ipb, ipc, ipd)) != (mem); \ + maybe_fail(); \ + } while (0) + +#define test_boolean(cond) do { \ + bool _s = (cond); \ + maybe_fail(); \ + } while (0) + +bool __init wg_allowedips_selftest(void) +{ + bool found_a = false, found_b = false, found_c = false, found_d = false, + found_e = false, found_other = false; + struct wg_peer *a = init_peer(), *b = init_peer(), *c = init_peer(), + *d = init_peer(), *e = init_peer(), *f = init_peer(), + *g = init_peer(), *h = init_peer(); + struct allowedips_node *iter_node; + bool success = false; + struct allowedips t; + DEFINE_MUTEX(mutex); + struct in6_addr ip; + size_t i = 0, count = 0; + __be64 part; + + mutex_init(&mutex); + mutex_lock(&mutex); + wg_allowedips_init(&t); + + if (!a || !b || !c || !d || !e || !f || !g || !h) { + pr_err("allowedips self-test malloc: FAIL\n"); + goto free; + } + + insert(4, a, 192, 168, 4, 0, 24); + insert(4, b, 192, 168, 4, 4, 32); + insert(4, c, 192, 168, 0, 0, 16); + insert(4, d, 192, 95, 5, 64, 27); + /* replaces previous entry, and maskself is required */ + insert(4, c, 192, 95, 5, 65, 27); + insert(6, d, 0x26075300, 0x60006b00, 0, 0xc05f0543, 128); + insert(6, c, 0x26075300, 0x60006b00, 0, 0, 64); + insert(4, e, 0, 0, 0, 0, 0); + insert(6, e, 0, 0, 0, 0, 0); + /* replaces previous entry */ + insert(6, f, 0, 0, 0, 0, 0); + insert(6, g, 0x24046800, 0, 0, 0, 32); + /* maskself is required */ + insert(6, h, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef, 64); + insert(6, a, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef, 128); + insert(6, c, 0x24446800, 0x40e40800, 0xdeaebeef, 0xdefbeef, 128); + insert(6, b, 0x24446800, 0xf0e40800, 0xeeaebeef, 0, 98); + insert(4, g, 64, 15, 112, 0, 20); + /* maskself is required */ + insert(4, h, 64, 15, 123, 211, 25); + insert(4, a, 10, 0, 0, 0, 25); + insert(4, b, 10, 0, 0, 128, 25); + insert(4, a, 10, 1, 0, 0, 30); + insert(4, b, 10, 1, 0, 4, 30); + insert(4, c, 10, 1, 0, 8, 29); + insert(4, d, 10, 1, 0, 16, 29); + + if (IS_ENABLED(DEBUG_PRINT_TRIE_GRAPHVIZ)) { + print_tree(t.root4, 32); + print_tree(t.root6, 128); + } + + success = true; + + test(4, a, 192, 168, 4, 20); + test(4, a, 192, 168, 4, 0); + test(4, b, 192, 168, 4, 4); + test(4, c, 192, 168, 200, 182); + test(4, c, 192, 95, 5, 68); + test(4, e, 192, 95, 5, 96); + test(6, d, 0x26075300, 0x60006b00, 0, 0xc05f0543); + test(6, c, 0x26075300, 0x60006b00, 0, 0xc02e01ee); + test(6, f, 0x26075300, 0x60006b01, 0, 0); + test(6, g, 0x24046800, 0x40040806, 0, 0x1006); + test(6, g, 0x24046800, 0x40040806, 0x1234, 0x5678); + test(6, f, 0x240467ff, 0x40040806, 0x1234, 0x5678); + test(6, f, 0x24046801, 0x40040806, 0x1234, 0x5678); + test(6, h, 0x24046800, 0x40040800, 0x1234, 0x5678); + test(6, h, 0x24046800, 0x40040800, 0, 0); + test(6, h, 0x24046800, 0x40040800, 0x10101010, 0x10101010); + test(6, a, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef); + test(4, g, 64, 15, 116, 26); + test(4, g, 64, 15, 127, 3); + test(4, g, 64, 15, 123, 1); + test(4, h, 64, 15, 123, 128); + test(4, h, 64, 15, 123, 129); + test(4, a, 10, 0, 0, 52); + test(4, b, 10, 0, 0, 220); + test(4, a, 10, 1, 0, 2); + test(4, b, 10, 1, 0, 6); + test(4, c, 10, 1, 0, 10); + test(4, d, 10, 1, 0, 20); + + insert(4, a, 1, 0, 0, 0, 32); + insert(4, a, 64, 0, 0, 0, 32); + insert(4, a, 128, 0, 0, 0, 32); + insert(4, a, 192, 0, 0, 0, 32); + insert(4, a, 255, 0, 0, 0, 32); + wg_allowedips_remove_by_peer(&t, a, &mutex); + test_negative(4, a, 1, 0, 0, 0); + test_negative(4, a, 64, 0, 0, 0); + test_negative(4, a, 128, 0, 0, 0); + test_negative(4, a, 192, 0, 0, 0); + test_negative(4, a, 255, 0, 0, 0); + + wg_allowedips_free(&t, &mutex); + wg_allowedips_init(&t); + insert(4, a, 192, 168, 0, 0, 16); + insert(4, a, 192, 168, 0, 0, 24); + wg_allowedips_remove_by_peer(&t, a, &mutex); + test_negative(4, a, 192, 168, 0, 1); + + /* These will hit the WARN_ON(len >= 128) in free_node if something + * goes wrong. + */ + for (i = 0; i < 128; ++i) { + part = cpu_to_be64(~(1LLU << (i % 64))); + memset(&ip, 0xff, 16); + memcpy((u8 *)&ip + (i < 64) * 8, &part, 8); + wg_allowedips_insert_v6(&t, &ip, 128, a, &mutex); + } + + wg_allowedips_free(&t, &mutex); + + wg_allowedips_init(&t); + insert(4, a, 192, 95, 5, 93, 27); + insert(6, a, 0x26075300, 0x60006b00, 0, 0xc05f0543, 128); + insert(4, a, 10, 1, 0, 20, 29); + insert(6, a, 0x26075300, 0x6d8a6bf8, 0xdab1f1df, 0xc05f1523, 83); + insert(6, a, 0x26075300, 0x6d8a6bf8, 0xdab1f1df, 0xc05f1523, 21); + list_for_each_entry(iter_node, &a->allowedips_list, peer_list) { + u8 cidr, ip[16] __aligned(__alignof(u64)); + int family = wg_allowedips_read_node(iter_node, ip, &cidr); + + count++; + + if (cidr == 27 && family == AF_INET && + !memcmp(ip, ip4(192, 95, 5, 64), sizeof(struct in_addr))) + found_a = true; + else if (cidr == 128 && family == AF_INET6 && + !memcmp(ip, ip6(0x26075300, 0x60006b00, 0, 0xc05f0543), + sizeof(struct in6_addr))) + found_b = true; + else if (cidr == 29 && family == AF_INET && + !memcmp(ip, ip4(10, 1, 0, 16), sizeof(struct in_addr))) + found_c = true; + else if (cidr == 83 && family == AF_INET6 && + !memcmp(ip, ip6(0x26075300, 0x6d8a6bf8, 0xdab1e000, 0), + sizeof(struct in6_addr))) + found_d = true; + else if (cidr == 21 && family == AF_INET6 && + !memcmp(ip, ip6(0x26075000, 0, 0, 0), + sizeof(struct in6_addr))) + found_e = true; + else + found_other = true; + } + test_boolean(count == 5); + test_boolean(found_a); + test_boolean(found_b); + test_boolean(found_c); + test_boolean(found_d); + test_boolean(found_e); + test_boolean(!found_other); + + if (IS_ENABLED(DEBUG_RANDOM_TRIE) && success) + success = randomized_test(); + + if (success) + pr_info("allowedips self-tests: pass\n"); + +free: + wg_allowedips_free(&t, &mutex); + kfree(a); + kfree(b); + kfree(c); + kfree(d); + kfree(e); + kfree(f); + kfree(g); + kfree(h); + mutex_unlock(&mutex); + + return success; +} + +#undef test_negative +#undef test +#undef remove +#undef insert +#undef init_peer + +#endif diff --git a/drivers/net/wireguard/selftest/counter.c b/drivers/net/wireguard/selftest/counter.c new file mode 100644 index 000000000000..f4fbb9072ed7 --- /dev/null +++ b/drivers/net/wireguard/selftest/counter.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#ifdef DEBUG +bool __init wg_packet_counter_selftest(void) +{ + unsigned int test_num = 0, i; + union noise_counter counter; + bool success = true; + +#define T_INIT do { \ + memset(&counter, 0, sizeof(union noise_counter)); \ + spin_lock_init(&counter.receive.lock); \ + } while (0) +#define T_LIM (COUNTER_WINDOW_SIZE + 1) +#define T(n, v) do { \ + ++test_num; \ + if (counter_validate(&counter, n) != (v)) { \ + pr_err("nonce counter self-test %u: FAIL\n", \ + test_num); \ + success = false; \ + } \ + } while (0) + + T_INIT; + /* 1 */ T(0, true); + /* 2 */ T(1, true); + /* 3 */ T(1, false); + /* 4 */ T(9, true); + /* 5 */ T(8, true); + /* 6 */ T(7, true); + /* 7 */ T(7, false); + /* 8 */ T(T_LIM, true); + /* 9 */ T(T_LIM - 1, true); + /* 10 */ T(T_LIM - 1, false); + /* 11 */ T(T_LIM - 2, true); + /* 12 */ T(2, true); + /* 13 */ T(2, false); + /* 14 */ T(T_LIM + 16, true); + /* 15 */ T(3, false); + /* 16 */ T(T_LIM + 16, false); + /* 17 */ T(T_LIM * 4, true); + /* 18 */ T(T_LIM * 4 - (T_LIM - 1), true); + /* 19 */ T(10, false); + /* 20 */ T(T_LIM * 4 - T_LIM, false); + /* 21 */ T(T_LIM * 4 - (T_LIM + 1), false); + /* 22 */ T(T_LIM * 4 - (T_LIM - 2), true); + /* 23 */ T(T_LIM * 4 + 1 - T_LIM, false); + /* 24 */ T(0, false); + /* 25 */ T(REJECT_AFTER_MESSAGES, false); + /* 26 */ T(REJECT_AFTER_MESSAGES - 1, true); + /* 27 */ T(REJECT_AFTER_MESSAGES, false); + /* 28 */ T(REJECT_AFTER_MESSAGES - 1, false); + /* 29 */ T(REJECT_AFTER_MESSAGES - 2, true); + /* 30 */ T(REJECT_AFTER_MESSAGES + 1, false); + /* 31 */ T(REJECT_AFTER_MESSAGES + 2, false); + /* 32 */ T(REJECT_AFTER_MESSAGES - 2, false); + /* 33 */ T(REJECT_AFTER_MESSAGES - 3, true); + /* 34 */ T(0, false); + + T_INIT; + for (i = 1; i <= COUNTER_WINDOW_SIZE; ++i) + T(i, true); + T(0, true); + T(0, false); + + T_INIT; + for (i = 2; i <= COUNTER_WINDOW_SIZE + 1; ++i) + T(i, true); + T(1, true); + T(0, false); + + T_INIT; + for (i = COUNTER_WINDOW_SIZE + 1; i-- > 0;) + T(i, true); + + T_INIT; + for (i = COUNTER_WINDOW_SIZE + 2; i-- > 1;) + T(i, true); + T(0, false); + + T_INIT; + for (i = COUNTER_WINDOW_SIZE + 1; i-- > 1;) + T(i, true); + T(COUNTER_WINDOW_SIZE + 1, true); + T(0, false); + + T_INIT; + for (i = COUNTER_WINDOW_SIZE + 1; i-- > 1;) + T(i, true); + T(0, true); + T(COUNTER_WINDOW_SIZE + 1, true); + +#undef T +#undef T_LIM +#undef T_INIT + + if (success) + pr_info("nonce counter self-tests: pass\n"); + return success; +} +#endif diff --git a/drivers/net/wireguard/selftest/ratelimiter.c b/drivers/net/wireguard/selftest/ratelimiter.c new file mode 100644 index 000000000000..bcd6462e4540 --- /dev/null +++ b/drivers/net/wireguard/selftest/ratelimiter.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#ifdef DEBUG + +#include <linux/jiffies.h> + +static const struct { + bool result; + unsigned int msec_to_sleep_before; +} expected_results[] __initconst = { + [0 ... PACKETS_BURSTABLE - 1] = { true, 0 }, + [PACKETS_BURSTABLE] = { false, 0 }, + [PACKETS_BURSTABLE + 1] = { true, MSEC_PER_SEC / PACKETS_PER_SECOND }, + [PACKETS_BURSTABLE + 2] = { false, 0 }, + [PACKETS_BURSTABLE + 3] = { true, (MSEC_PER_SEC / PACKETS_PER_SECOND) * 2 }, + [PACKETS_BURSTABLE + 4] = { true, 0 }, + [PACKETS_BURSTABLE + 5] = { false, 0 } +}; + +static __init unsigned int maximum_jiffies_at_index(int index) +{ + unsigned int total_msecs = 2 * MSEC_PER_SEC / PACKETS_PER_SECOND / 3; + int i; + + for (i = 0; i <= index; ++i) + total_msecs += expected_results[i].msec_to_sleep_before; + return msecs_to_jiffies(total_msecs); +} + +static __init int timings_test(struct sk_buff *skb4, struct iphdr *hdr4, + struct sk_buff *skb6, struct ipv6hdr *hdr6, + int *test) +{ + unsigned long loop_start_time; + int i; + + wg_ratelimiter_gc_entries(NULL); + rcu_barrier(); + loop_start_time = jiffies; + + for (i = 0; i < ARRAY_SIZE(expected_results); ++i) { + if (expected_results[i].msec_to_sleep_before) + msleep(expected_results[i].msec_to_sleep_before); + + if (time_is_before_jiffies(loop_start_time + + maximum_jiffies_at_index(i))) + return -ETIMEDOUT; + if (wg_ratelimiter_allow(skb4, &init_net) != + expected_results[i].result) + return -EXFULL; + ++(*test); + + hdr4->saddr = htonl(ntohl(hdr4->saddr) + i + 1); + if (time_is_before_jiffies(loop_start_time + + maximum_jiffies_at_index(i))) + return -ETIMEDOUT; + if (!wg_ratelimiter_allow(skb4, &init_net)) + return -EXFULL; + ++(*test); + + hdr4->saddr = htonl(ntohl(hdr4->saddr) - i - 1); + +#if IS_ENABLED(CONFIG_IPV6) + hdr6->saddr.in6_u.u6_addr32[2] = htonl(i); + hdr6->saddr.in6_u.u6_addr32[3] = htonl(i); + if (time_is_before_jiffies(loop_start_time + + maximum_jiffies_at_index(i))) + return -ETIMEDOUT; + if (wg_ratelimiter_allow(skb6, &init_net) != + expected_results[i].result) + return -EXFULL; + ++(*test); + + hdr6->saddr.in6_u.u6_addr32[0] = + htonl(ntohl(hdr6->saddr.in6_u.u6_addr32[0]) + i + 1); + if (time_is_before_jiffies(loop_start_time + + maximum_jiffies_at_index(i))) + return -ETIMEDOUT; + if (!wg_ratelimiter_allow(skb6, &init_net)) + return -EXFULL; + ++(*test); + + hdr6->saddr.in6_u.u6_addr32[0] = + htonl(ntohl(hdr6->saddr.in6_u.u6_addr32[0]) - i - 1); + + if (time_is_before_jiffies(loop_start_time + + maximum_jiffies_at_index(i))) + return -ETIMEDOUT; +#endif + } + return 0; +} + +static __init int capacity_test(struct sk_buff *skb4, struct iphdr *hdr4, + int *test) +{ + int i; + + wg_ratelimiter_gc_entries(NULL); + rcu_barrier(); + + if (atomic_read(&total_entries)) + return -EXFULL; + ++(*test); + + for (i = 0; i <= max_entries; ++i) { + hdr4->saddr = htonl(i); + if (wg_ratelimiter_allow(skb4, &init_net) != (i != max_entries)) + return -EXFULL; + ++(*test); + } + return 0; +} + +bool __init wg_ratelimiter_selftest(void) +{ + enum { TRIALS_BEFORE_GIVING_UP = 5000 }; + bool success = false; + int test = 0, trials; + struct sk_buff *skb4, *skb6; + struct iphdr *hdr4; + struct ipv6hdr *hdr6; + + if (IS_ENABLED(CONFIG_KASAN) || IS_ENABLED(CONFIG_UBSAN)) + return true; + + BUILD_BUG_ON(MSEC_PER_SEC % PACKETS_PER_SECOND != 0); + + if (wg_ratelimiter_init()) + goto out; + ++test; + if (wg_ratelimiter_init()) { + wg_ratelimiter_uninit(); + goto out; + } + ++test; + if (wg_ratelimiter_init()) { + wg_ratelimiter_uninit(); + wg_ratelimiter_uninit(); + goto out; + } + ++test; + + skb4 = alloc_skb(sizeof(struct iphdr), GFP_KERNEL); + if (unlikely(!skb4)) + goto err_nofree; + skb4->protocol = htons(ETH_P_IP); + hdr4 = (struct iphdr *)skb_put(skb4, sizeof(*hdr4)); + hdr4->saddr = htonl(8182); + skb_reset_network_header(skb4); + ++test; + +#if IS_ENABLED(CONFIG_IPV6) + skb6 = alloc_skb(sizeof(struct ipv6hdr), GFP_KERNEL); + if (unlikely(!skb6)) { + kfree_skb(skb4); + goto err_nofree; + } + skb6->protocol = htons(ETH_P_IPV6); + hdr6 = (struct ipv6hdr *)skb_put(skb6, sizeof(*hdr6)); + hdr6->saddr.in6_u.u6_addr32[0] = htonl(1212); + hdr6->saddr.in6_u.u6_addr32[1] = htonl(289188); + skb_reset_network_header(skb6); + ++test; +#endif + + for (trials = TRIALS_BEFORE_GIVING_UP;;) { + int test_count = 0, ret; + + ret = timings_test(skb4, hdr4, skb6, hdr6, &test_count); + if (ret == -ETIMEDOUT) { + if (!trials--) { + test += test_count; + goto err; + } + msleep(500); + continue; + } else if (ret < 0) { + test += test_count; + goto err; + } else { + test += test_count; + break; + } + } + + for (trials = TRIALS_BEFORE_GIVING_UP;;) { + int test_count = 0; + + if (capacity_test(skb4, hdr4, &test_count) < 0) { + if (!trials--) { + test += test_count; + goto err; + } + msleep(50); + continue; + } + test += test_count; + break; + } + + success = true; + +err: + kfree_skb(skb4); +#if IS_ENABLED(CONFIG_IPV6) + kfree_skb(skb6); +#endif +err_nofree: + wg_ratelimiter_uninit(); + wg_ratelimiter_uninit(); + wg_ratelimiter_uninit(); + /* Uninit one extra time to check underflow detection. */ + wg_ratelimiter_uninit(); +out: + if (success) + pr_info("ratelimiter self-tests: pass\n"); + else + pr_err("ratelimiter self-test %d: FAIL\n", test); + + return success; +} +#endif diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c new file mode 100644 index 000000000000..c13260563446 --- /dev/null +++ b/drivers/net/wireguard/send.c @@ -0,0 +1,413 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include "queueing.h" +#include "timers.h" +#include "device.h" +#include "peer.h" +#include "socket.h" +#include "messages.h" +#include "cookie.h" + +#include <linux/uio.h> +#include <linux/inetdevice.h> +#include <linux/socket.h> +#include <net/ip_tunnels.h> +#include <net/udp.h> +#include <net/sock.h> + +static void wg_packet_send_handshake_initiation(struct wg_peer *peer) +{ + struct message_handshake_initiation packet; + + if (!wg_birthdate_has_expired(atomic64_read(&peer->last_sent_handshake), + REKEY_TIMEOUT)) + return; /* This function is rate limited. */ + + atomic64_set(&peer->last_sent_handshake, ktime_get_coarse_boottime_ns()); + net_dbg_ratelimited("%s: Sending handshake initiation to peer %llu (%pISpfsc)\n", + peer->device->dev->name, peer->internal_id, + &peer->endpoint.addr); + + if (wg_noise_handshake_create_initiation(&packet, &peer->handshake)) { + wg_cookie_add_mac_to_packet(&packet, sizeof(packet), peer); + wg_timers_any_authenticated_packet_traversal(peer); + wg_timers_any_authenticated_packet_sent(peer); + atomic64_set(&peer->last_sent_handshake, + ktime_get_coarse_boottime_ns()); + wg_socket_send_buffer_to_peer(peer, &packet, sizeof(packet), + HANDSHAKE_DSCP); + wg_timers_handshake_initiated(peer); + } +} + +void wg_packet_handshake_send_worker(struct work_struct *work) +{ + struct wg_peer *peer = container_of(work, struct wg_peer, + transmit_handshake_work); + + wg_packet_send_handshake_initiation(peer); + wg_peer_put(peer); +} + +void wg_packet_send_queued_handshake_initiation(struct wg_peer *peer, + bool is_retry) +{ + if (!is_retry) + peer->timer_handshake_attempts = 0; + + rcu_read_lock_bh(); + /* We check last_sent_handshake here in addition to the actual function + * we're queueing up, so that we don't queue things if not strictly + * necessary: + */ + if (!wg_birthdate_has_expired(atomic64_read(&peer->last_sent_handshake), + REKEY_TIMEOUT) || + unlikely(READ_ONCE(peer->is_dead))) + goto out; + + wg_peer_get(peer); + /* Queues up calling packet_send_queued_handshakes(peer), where we do a + * peer_put(peer) after: + */ + if (!queue_work(peer->device->handshake_send_wq, + &peer->transmit_handshake_work)) + /* If the work was already queued, we want to drop the + * extra reference: + */ + wg_peer_put(peer); +out: + rcu_read_unlock_bh(); +} + +void wg_packet_send_handshake_response(struct wg_peer *peer) +{ + struct message_handshake_response packet; + + atomic64_set(&peer->last_sent_handshake, ktime_get_coarse_boottime_ns()); + net_dbg_ratelimited("%s: Sending handshake response to peer %llu (%pISpfsc)\n", + peer->device->dev->name, peer->internal_id, + &peer->endpoint.addr); + + if (wg_noise_handshake_create_response(&packet, &peer->handshake)) { + wg_cookie_add_mac_to_packet(&packet, sizeof(packet), peer); + if (wg_noise_handshake_begin_session(&peer->handshake, + &peer->keypairs)) { + wg_timers_session_derived(peer); + wg_timers_any_authenticated_packet_traversal(peer); + wg_timers_any_authenticated_packet_sent(peer); + atomic64_set(&peer->last_sent_handshake, + ktime_get_coarse_boottime_ns()); + wg_socket_send_buffer_to_peer(peer, &packet, + sizeof(packet), + HANDSHAKE_DSCP); + } + } +} + +void wg_packet_send_handshake_cookie(struct wg_device *wg, + struct sk_buff *initiating_skb, + __le32 sender_index) +{ + struct message_handshake_cookie packet; + + net_dbg_skb_ratelimited("%s: Sending cookie response for denied handshake message for %pISpfsc\n", + wg->dev->name, initiating_skb); + wg_cookie_message_create(&packet, initiating_skb, sender_index, + &wg->cookie_checker); + wg_socket_send_buffer_as_reply_to_skb(wg, initiating_skb, &packet, + sizeof(packet)); +} + +static void keep_key_fresh(struct wg_peer *peer) +{ + struct noise_keypair *keypair; + bool send = false; + + rcu_read_lock_bh(); + keypair = rcu_dereference_bh(peer->keypairs.current_keypair); + if (likely(keypair && READ_ONCE(keypair->sending.is_valid)) && + (unlikely(atomic64_read(&keypair->sending.counter.counter) > + REKEY_AFTER_MESSAGES) || + (keypair->i_am_the_initiator && + unlikely(wg_birthdate_has_expired(keypair->sending.birthdate, + REKEY_AFTER_TIME))))) + send = true; + rcu_read_unlock_bh(); + + if (send) + wg_packet_send_queued_handshake_initiation(peer, false); +} + +static unsigned int calculate_skb_padding(struct sk_buff *skb) +{ + /* We do this modulo business with the MTU, just in case the networking + * layer gives us a packet that's bigger than the MTU. In that case, we + * wouldn't want the final subtraction to overflow in the case of the + * padded_size being clamped. + */ + unsigned int last_unit = skb->len % PACKET_CB(skb)->mtu; + unsigned int padded_size = ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE); + + if (padded_size > PACKET_CB(skb)->mtu) + padded_size = PACKET_CB(skb)->mtu; + return padded_size - last_unit; +} + +static bool encrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair) +{ + unsigned int padding_len, plaintext_len, trailer_len; + struct scatterlist sg[MAX_SKB_FRAGS + 8]; + struct message_data *header; + struct sk_buff *trailer; + int num_frags; + + /* Calculate lengths. */ + padding_len = calculate_skb_padding(skb); + trailer_len = padding_len + noise_encrypted_len(0); + plaintext_len = skb->len + padding_len; + + /* Expand data section to have room for padding and auth tag. */ + num_frags = skb_cow_data(skb, trailer_len, &trailer); + if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg))) + return false; + + /* Set the padding to zeros, and make sure it and the auth tag are part + * of the skb. + */ + memset(skb_tail_pointer(trailer), 0, padding_len); + + /* Expand head section to have room for our header and the network + * stack's headers. + */ + if (unlikely(skb_cow_head(skb, DATA_PACKET_HEAD_ROOM) < 0)) + return false; + + /* Finalize checksum calculation for the inner packet, if required. */ + if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_help(skb))) + return false; + + /* Only after checksumming can we safely add on the padding at the end + * and the header. + */ + skb_set_inner_network_header(skb, 0); + header = (struct message_data *)skb_push(skb, sizeof(*header)); + header->header.type = cpu_to_le32(MESSAGE_DATA); + header->key_idx = keypair->remote_index; + header->counter = cpu_to_le64(PACKET_CB(skb)->nonce); + pskb_put(skb, trailer, trailer_len); + + /* Now we can encrypt the scattergather segments */ + sg_init_table(sg, num_frags); + if (skb_to_sgvec(skb, sg, sizeof(struct message_data), + noise_encrypted_len(plaintext_len)) <= 0) + return false; + return chacha20poly1305_encrypt_sg_inplace(sg, plaintext_len, NULL, 0, + PACKET_CB(skb)->nonce, + keypair->sending.key); +} + +void wg_packet_send_keepalive(struct wg_peer *peer) +{ + struct sk_buff *skb; + + if (skb_queue_empty(&peer->staged_packet_queue)) { + skb = alloc_skb(DATA_PACKET_HEAD_ROOM + MESSAGE_MINIMUM_LENGTH, + GFP_ATOMIC); + if (unlikely(!skb)) + return; + skb_reserve(skb, DATA_PACKET_HEAD_ROOM); + skb->dev = peer->device->dev; + PACKET_CB(skb)->mtu = skb->dev->mtu; + skb_queue_tail(&peer->staged_packet_queue, skb); + net_dbg_ratelimited("%s: Sending keepalive packet to peer %llu (%pISpfsc)\n", + peer->device->dev->name, peer->internal_id, + &peer->endpoint.addr); + } + + wg_packet_send_staged_packets(peer); +} + +static void wg_packet_create_data_done(struct sk_buff *first, + struct wg_peer *peer) +{ + struct sk_buff *skb, *next; + bool is_keepalive, data_sent = false; + + wg_timers_any_authenticated_packet_traversal(peer); + wg_timers_any_authenticated_packet_sent(peer); + skb_list_walk_safe(first, skb, next) { + is_keepalive = skb->len == message_data_len(0); + if (likely(!wg_socket_send_skb_to_peer(peer, skb, + PACKET_CB(skb)->ds) && !is_keepalive)) + data_sent = true; + } + + if (likely(data_sent)) + wg_timers_data_sent(peer); + + keep_key_fresh(peer); +} + +void wg_packet_tx_worker(struct work_struct *work) +{ + struct crypt_queue *queue = container_of(work, struct crypt_queue, + work); + struct noise_keypair *keypair; + enum packet_state state; + struct sk_buff *first; + struct wg_peer *peer; + + while ((first = __ptr_ring_peek(&queue->ring)) != NULL && + (state = atomic_read_acquire(&PACKET_CB(first)->state)) != + PACKET_STATE_UNCRYPTED) { + __ptr_ring_discard_one(&queue->ring); + peer = PACKET_PEER(first); + keypair = PACKET_CB(first)->keypair; + + if (likely(state == PACKET_STATE_CRYPTED)) + wg_packet_create_data_done(first, peer); + else + kfree_skb_list(first); + + wg_noise_keypair_put(keypair, false); + wg_peer_put(peer); + } +} + +void wg_packet_encrypt_worker(struct work_struct *work) +{ + struct crypt_queue *queue = container_of(work, struct multicore_worker, + work)->ptr; + struct sk_buff *first, *skb, *next; + + while ((first = ptr_ring_consume_bh(&queue->ring)) != NULL) { + enum packet_state state = PACKET_STATE_CRYPTED; + + skb_list_walk_safe(first, skb, next) { + if (likely(encrypt_packet(skb, + PACKET_CB(first)->keypair))) { + wg_reset_packet(skb); + } else { + state = PACKET_STATE_DEAD; + break; + } + } + wg_queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first, + state); + + } +} + +static void wg_packet_create_data(struct sk_buff *first) +{ + struct wg_peer *peer = PACKET_PEER(first); + struct wg_device *wg = peer->device; + int ret = -EINVAL; + + rcu_read_lock_bh(); + if (unlikely(READ_ONCE(peer->is_dead))) + goto err; + + ret = wg_queue_enqueue_per_device_and_peer(&wg->encrypt_queue, + &peer->tx_queue, first, + wg->packet_crypt_wq, + &wg->encrypt_queue.last_cpu); + if (unlikely(ret == -EPIPE)) + wg_queue_enqueue_per_peer(&peer->tx_queue, first, + PACKET_STATE_DEAD); +err: + rcu_read_unlock_bh(); + if (likely(!ret || ret == -EPIPE)) + return; + wg_noise_keypair_put(PACKET_CB(first)->keypair, false); + wg_peer_put(peer); + kfree_skb_list(first); +} + +void wg_packet_purge_staged_packets(struct wg_peer *peer) +{ + spin_lock_bh(&peer->staged_packet_queue.lock); + peer->device->dev->stats.tx_dropped += peer->staged_packet_queue.qlen; + __skb_queue_purge(&peer->staged_packet_queue); + spin_unlock_bh(&peer->staged_packet_queue.lock); +} + +void wg_packet_send_staged_packets(struct wg_peer *peer) +{ + struct noise_symmetric_key *key; + struct noise_keypair *keypair; + struct sk_buff_head packets; + struct sk_buff *skb; + + /* Steal the current queue into our local one. */ + __skb_queue_head_init(&packets); + spin_lock_bh(&peer->staged_packet_queue.lock); + skb_queue_splice_init(&peer->staged_packet_queue, &packets); + spin_unlock_bh(&peer->staged_packet_queue.lock); + if (unlikely(skb_queue_empty(&packets))) + return; + + /* First we make sure we have a valid reference to a valid key. */ + rcu_read_lock_bh(); + keypair = wg_noise_keypair_get( + rcu_dereference_bh(peer->keypairs.current_keypair)); + rcu_read_unlock_bh(); + if (unlikely(!keypair)) + goto out_nokey; + key = &keypair->sending; + if (unlikely(!READ_ONCE(key->is_valid))) + goto out_nokey; + if (unlikely(wg_birthdate_has_expired(key->birthdate, + REJECT_AFTER_TIME))) + goto out_invalid; + + /* After we know we have a somewhat valid key, we now try to assign + * nonces to all of the packets in the queue. If we can't assign nonces + * for all of them, we just consider it a failure and wait for the next + * handshake. + */ + skb_queue_walk(&packets, skb) { + /* 0 for no outer TOS: no leak. TODO: at some later point, we + * might consider using flowi->tos as outer instead. + */ + PACKET_CB(skb)->ds = ip_tunnel_ecn_encap(0, ip_hdr(skb), skb); + PACKET_CB(skb)->nonce = + atomic64_inc_return(&key->counter.counter) - 1; + if (unlikely(PACKET_CB(skb)->nonce >= REJECT_AFTER_MESSAGES)) + goto out_invalid; + } + + packets.prev->next = NULL; + wg_peer_get(keypair->entry.peer); + PACKET_CB(packets.next)->keypair = keypair; + wg_packet_create_data(packets.next); + return; + +out_invalid: + WRITE_ONCE(key->is_valid, false); +out_nokey: + wg_noise_keypair_put(keypair, false); + + /* We orphan the packets if we're waiting on a handshake, so that they + * don't block a socket's pool. + */ + skb_queue_walk(&packets, skb) + skb_orphan(skb); + /* Then we put them back on the top of the queue. We're not too + * concerned about accidentally getting things a little out of order if + * packets are being added really fast, because this queue is for before + * packets can even be sent and it's small anyway. + */ + spin_lock_bh(&peer->staged_packet_queue.lock); + skb_queue_splice(&packets, &peer->staged_packet_queue); + spin_unlock_bh(&peer->staged_packet_queue.lock); + + /* If we're exiting because there's something wrong with the key, it + * means we should initiate a new handshake. + */ + wg_packet_send_queued_handshake_initiation(peer, false); +} diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c new file mode 100644 index 000000000000..c46256d0d81c --- /dev/null +++ b/drivers/net/wireguard/socket.c @@ -0,0 +1,437 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include "device.h" +#include "peer.h" +#include "socket.h" +#include "queueing.h" +#include "messages.h" + +#include <linux/ctype.h> +#include <linux/net.h> +#include <linux/if_vlan.h> +#include <linux/if_ether.h> +#include <linux/inetdevice.h> +#include <net/udp_tunnel.h> +#include <net/ipv6.h> + +static int send4(struct wg_device *wg, struct sk_buff *skb, + struct endpoint *endpoint, u8 ds, struct dst_cache *cache) +{ + struct flowi4 fl = { + .saddr = endpoint->src4.s_addr, + .daddr = endpoint->addr4.sin_addr.s_addr, + .fl4_dport = endpoint->addr4.sin_port, + .flowi4_mark = wg->fwmark, + .flowi4_proto = IPPROTO_UDP + }; + struct rtable *rt = NULL; + struct sock *sock; + int ret = 0; + + skb_mark_not_on_list(skb); + skb->dev = wg->dev; + skb->mark = wg->fwmark; + + rcu_read_lock_bh(); + sock = rcu_dereference_bh(wg->sock4); + + if (unlikely(!sock)) { + ret = -ENONET; + goto err; + } + + fl.fl4_sport = inet_sk(sock)->inet_sport; + + if (cache) + rt = dst_cache_get_ip4(cache, &fl.saddr); + + if (!rt) { + security_sk_classify_flow(sock, flowi4_to_flowi(&fl)); + if (unlikely(!inet_confirm_addr(sock_net(sock), NULL, 0, + fl.saddr, RT_SCOPE_HOST))) { + endpoint->src4.s_addr = 0; + *(__force __be32 *)&endpoint->src_if4 = 0; + fl.saddr = 0; + if (cache) + dst_cache_reset(cache); + } + rt = ip_route_output_flow(sock_net(sock), &fl, sock); + if (unlikely(endpoint->src_if4 && ((IS_ERR(rt) && + PTR_ERR(rt) == -EINVAL) || (!IS_ERR(rt) && + rt->dst.dev->ifindex != endpoint->src_if4)))) { + endpoint->src4.s_addr = 0; + *(__force __be32 *)&endpoint->src_if4 = 0; + fl.saddr = 0; + if (cache) + dst_cache_reset(cache); + if (!IS_ERR(rt)) + ip_rt_put(rt); + rt = ip_route_output_flow(sock_net(sock), &fl, sock); + } + if (unlikely(IS_ERR(rt))) { + ret = PTR_ERR(rt); + net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", + wg->dev->name, &endpoint->addr, ret); + goto err; + } else if (unlikely(rt->dst.dev == skb->dev)) { + ip_rt_put(rt); + ret = -ELOOP; + net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n", + wg->dev->name, &endpoint->addr); + goto err; + } + if (cache) + dst_cache_set_ip4(cache, &rt->dst, fl.saddr); + } + + skb->ignore_df = 1; + udp_tunnel_xmit_skb(rt, sock, skb, fl.saddr, fl.daddr, ds, + ip4_dst_hoplimit(&rt->dst), 0, fl.fl4_sport, + fl.fl4_dport, false, false); + goto out; + +err: + kfree_skb(skb); +out: + rcu_read_unlock_bh(); + return ret; +} + +static int send6(struct wg_device *wg, struct sk_buff *skb, + struct endpoint *endpoint, u8 ds, struct dst_cache *cache) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct flowi6 fl = { + .saddr = endpoint->src6, + .daddr = endpoint->addr6.sin6_addr, + .fl6_dport = endpoint->addr6.sin6_port, + .flowi6_mark = wg->fwmark, + .flowi6_oif = endpoint->addr6.sin6_scope_id, + .flowi6_proto = IPPROTO_UDP + /* TODO: addr->sin6_flowinfo */ + }; + struct dst_entry *dst = NULL; + struct sock *sock; + int ret = 0; + + skb_mark_not_on_list(skb); + skb->dev = wg->dev; + skb->mark = wg->fwmark; + + rcu_read_lock_bh(); + sock = rcu_dereference_bh(wg->sock6); + + if (unlikely(!sock)) { + ret = -ENONET; + goto err; + } + + fl.fl6_sport = inet_sk(sock)->inet_sport; + + if (cache) + dst = dst_cache_get_ip6(cache, &fl.saddr); + + if (!dst) { + security_sk_classify_flow(sock, flowi6_to_flowi(&fl)); + if (unlikely(!ipv6_addr_any(&fl.saddr) && + !ipv6_chk_addr(sock_net(sock), &fl.saddr, NULL, 0))) { + endpoint->src6 = fl.saddr = in6addr_any; + if (cache) + dst_cache_reset(cache); + } + dst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(sock), sock, &fl, + NULL); + if (unlikely(IS_ERR(dst))) { + ret = PTR_ERR(dst); + net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", + wg->dev->name, &endpoint->addr, ret); + goto err; + } else if (unlikely(dst->dev == skb->dev)) { + dst_release(dst); + ret = -ELOOP; + net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n", + wg->dev->name, &endpoint->addr); + goto err; + } + if (cache) + dst_cache_set_ip6(cache, dst, &fl.saddr); + } + + skb->ignore_df = 1; + udp_tunnel6_xmit_skb(dst, sock, skb, skb->dev, &fl.saddr, &fl.daddr, ds, + ip6_dst_hoplimit(dst), 0, fl.fl6_sport, + fl.fl6_dport, false); + goto out; + +err: + kfree_skb(skb); +out: + rcu_read_unlock_bh(); + return ret; +#else + return -EAFNOSUPPORT; +#endif +} + +int wg_socket_send_skb_to_peer(struct wg_peer *peer, struct sk_buff *skb, u8 ds) +{ + size_t skb_len = skb->len; + int ret = -EAFNOSUPPORT; + + read_lock_bh(&peer->endpoint_lock); + if (peer->endpoint.addr.sa_family == AF_INET) + ret = send4(peer->device, skb, &peer->endpoint, ds, + &peer->endpoint_cache); + else if (peer->endpoint.addr.sa_family == AF_INET6) + ret = send6(peer->device, skb, &peer->endpoint, ds, + &peer->endpoint_cache); + else + dev_kfree_skb(skb); + if (likely(!ret)) + peer->tx_bytes += skb_len; + read_unlock_bh(&peer->endpoint_lock); + + return ret; +} + +int wg_socket_send_buffer_to_peer(struct wg_peer *peer, void *buffer, + size_t len, u8 ds) +{ + struct sk_buff *skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC); + + if (unlikely(!skb)) + return -ENOMEM; + + skb_reserve(skb, SKB_HEADER_LEN); + skb_set_inner_network_header(skb, 0); + skb_put_data(skb, buffer, len); + return wg_socket_send_skb_to_peer(peer, skb, ds); +} + +int wg_socket_send_buffer_as_reply_to_skb(struct wg_device *wg, + struct sk_buff *in_skb, void *buffer, + size_t len) +{ + int ret = 0; + struct sk_buff *skb; + struct endpoint endpoint; + + if (unlikely(!in_skb)) + return -EINVAL; + ret = wg_socket_endpoint_from_skb(&endpoint, in_skb); + if (unlikely(ret < 0)) + return ret; + + skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC); + if (unlikely(!skb)) + return -ENOMEM; + skb_reserve(skb, SKB_HEADER_LEN); + skb_set_inner_network_header(skb, 0); + skb_put_data(skb, buffer, len); + + if (endpoint.addr.sa_family == AF_INET) + ret = send4(wg, skb, &endpoint, 0, NULL); + else if (endpoint.addr.sa_family == AF_INET6) + ret = send6(wg, skb, &endpoint, 0, NULL); + /* No other possibilities if the endpoint is valid, which it is, + * as we checked above. + */ + + return ret; +} + +int wg_socket_endpoint_from_skb(struct endpoint *endpoint, + const struct sk_buff *skb) +{ + memset(endpoint, 0, sizeof(*endpoint)); + if (skb->protocol == htons(ETH_P_IP)) { + endpoint->addr4.sin_family = AF_INET; + endpoint->addr4.sin_port = udp_hdr(skb)->source; + endpoint->addr4.sin_addr.s_addr = ip_hdr(skb)->saddr; + endpoint->src4.s_addr = ip_hdr(skb)->daddr; + endpoint->src_if4 = skb->skb_iif; + } else if (skb->protocol == htons(ETH_P_IPV6)) { + endpoint->addr6.sin6_family = AF_INET6; + endpoint->addr6.sin6_port = udp_hdr(skb)->source; + endpoint->addr6.sin6_addr = ipv6_hdr(skb)->saddr; + endpoint->addr6.sin6_scope_id = ipv6_iface_scope_id( + &ipv6_hdr(skb)->saddr, skb->skb_iif); + endpoint->src6 = ipv6_hdr(skb)->daddr; + } else { + return -EINVAL; + } + return 0; +} + +static bool endpoint_eq(const struct endpoint *a, const struct endpoint *b) +{ + return (a->addr.sa_family == AF_INET && b->addr.sa_family == AF_INET && + a->addr4.sin_port == b->addr4.sin_port && + a->addr4.sin_addr.s_addr == b->addr4.sin_addr.s_addr && + a->src4.s_addr == b->src4.s_addr && a->src_if4 == b->src_if4) || + (a->addr.sa_family == AF_INET6 && + b->addr.sa_family == AF_INET6 && + a->addr6.sin6_port == b->addr6.sin6_port && + ipv6_addr_equal(&a->addr6.sin6_addr, &b->addr6.sin6_addr) && + a->addr6.sin6_scope_id == b->addr6.sin6_scope_id && + ipv6_addr_equal(&a->src6, &b->src6)) || + unlikely(!a->addr.sa_family && !b->addr.sa_family); +} + +void wg_socket_set_peer_endpoint(struct wg_peer *peer, + const struct endpoint *endpoint) +{ + /* First we check unlocked, in order to optimize, since it's pretty rare + * that an endpoint will change. If we happen to be mid-write, and two + * CPUs wind up writing the same thing or something slightly different, + * it doesn't really matter much either. + */ + if (endpoint_eq(endpoint, &peer->endpoint)) + return; + write_lock_bh(&peer->endpoint_lock); + if (endpoint->addr.sa_family == AF_INET) { + peer->endpoint.addr4 = endpoint->addr4; + peer->endpoint.src4 = endpoint->src4; + peer->endpoint.src_if4 = endpoint->src_if4; + } else if (endpoint->addr.sa_family == AF_INET6) { + peer->endpoint.addr6 = endpoint->addr6; + peer->endpoint.src6 = endpoint->src6; + } else { + goto out; + } + dst_cache_reset(&peer->endpoint_cache); +out: + write_unlock_bh(&peer->endpoint_lock); +} + +void wg_socket_set_peer_endpoint_from_skb(struct wg_peer *peer, + const struct sk_buff *skb) +{ + struct endpoint endpoint; + + if (!wg_socket_endpoint_from_skb(&endpoint, skb)) + wg_socket_set_peer_endpoint(peer, &endpoint); +} + +void wg_socket_clear_peer_endpoint_src(struct wg_peer *peer) +{ + write_lock_bh(&peer->endpoint_lock); + memset(&peer->endpoint.src6, 0, sizeof(peer->endpoint.src6)); + dst_cache_reset(&peer->endpoint_cache); + write_unlock_bh(&peer->endpoint_lock); +} + +static int wg_receive(struct sock *sk, struct sk_buff *skb) +{ + struct wg_device *wg; + + if (unlikely(!sk)) + goto err; + wg = sk->sk_user_data; + if (unlikely(!wg)) + goto err; + wg_packet_receive(wg, skb); + return 0; + +err: + kfree_skb(skb); + return 0; +} + +static void sock_free(struct sock *sock) +{ + if (unlikely(!sock)) + return; + sk_clear_memalloc(sock); + udp_tunnel_sock_release(sock->sk_socket); +} + +static void set_sock_opts(struct socket *sock) +{ + sock->sk->sk_allocation = GFP_ATOMIC; + sock->sk->sk_sndbuf = INT_MAX; + sk_set_memalloc(sock->sk); +} + +int wg_socket_init(struct wg_device *wg, u16 port) +{ + int ret; + struct udp_tunnel_sock_cfg cfg = { + .sk_user_data = wg, + .encap_type = 1, + .encap_rcv = wg_receive + }; + struct socket *new4 = NULL, *new6 = NULL; + struct udp_port_cfg port4 = { + .family = AF_INET, + .local_ip.s_addr = htonl(INADDR_ANY), + .local_udp_port = htons(port), + .use_udp_checksums = true + }; +#if IS_ENABLED(CONFIG_IPV6) + int retries = 0; + struct udp_port_cfg port6 = { + .family = AF_INET6, + .local_ip6 = IN6ADDR_ANY_INIT, + .use_udp6_tx_checksums = true, + .use_udp6_rx_checksums = true, + .ipv6_v6only = true + }; +#endif + +#if IS_ENABLED(CONFIG_IPV6) +retry: +#endif + + ret = udp_sock_create(wg->creating_net, &port4, &new4); + if (ret < 0) { + pr_err("%s: Could not create IPv4 socket\n", wg->dev->name); + return ret; + } + set_sock_opts(new4); + setup_udp_tunnel_sock(wg->creating_net, new4, &cfg); + +#if IS_ENABLED(CONFIG_IPV6) + if (ipv6_mod_enabled()) { + port6.local_udp_port = inet_sk(new4->sk)->inet_sport; + ret = udp_sock_create(wg->creating_net, &port6, &new6); + if (ret < 0) { + udp_tunnel_sock_release(new4); + if (ret == -EADDRINUSE && !port && retries++ < 100) + goto retry; + pr_err("%s: Could not create IPv6 socket\n", + wg->dev->name); + return ret; + } + set_sock_opts(new6); + setup_udp_tunnel_sock(wg->creating_net, new6, &cfg); + } +#endif + + wg_socket_reinit(wg, new4->sk, new6 ? new6->sk : NULL); + return 0; +} + +void wg_socket_reinit(struct wg_device *wg, struct sock *new4, + struct sock *new6) +{ + struct sock *old4, *old6; + + mutex_lock(&wg->socket_update_lock); + old4 = rcu_dereference_protected(wg->sock4, + lockdep_is_held(&wg->socket_update_lock)); + old6 = rcu_dereference_protected(wg->sock6, + lockdep_is_held(&wg->socket_update_lock)); + rcu_assign_pointer(wg->sock4, new4); + rcu_assign_pointer(wg->sock6, new6); + if (new4) + wg->incoming_port = ntohs(inet_sk(new4)->inet_sport); + mutex_unlock(&wg->socket_update_lock); + synchronize_rcu(); + synchronize_net(); + sock_free(old4); + sock_free(old6); +} diff --git a/drivers/net/wireguard/socket.h b/drivers/net/wireguard/socket.h new file mode 100644 index 000000000000..bab5848efbcd --- /dev/null +++ b/drivers/net/wireguard/socket.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#ifndef _WG_SOCKET_H +#define _WG_SOCKET_H + +#include <linux/netdevice.h> +#include <linux/udp.h> +#include <linux/if_vlan.h> +#include <linux/if_ether.h> + +int wg_socket_init(struct wg_device *wg, u16 port); +void wg_socket_reinit(struct wg_device *wg, struct sock *new4, + struct sock *new6); +int wg_socket_send_buffer_to_peer(struct wg_peer *peer, void *data, + size_t len, u8 ds); +int wg_socket_send_skb_to_peer(struct wg_peer *peer, struct sk_buff *skb, + u8 ds); +int wg_socket_send_buffer_as_reply_to_skb(struct wg_device *wg, + struct sk_buff *in_skb, + void *out_buffer, size_t len); + +int wg_socket_endpoint_from_skb(struct endpoint *endpoint, + const struct sk_buff *skb); +void wg_socket_set_peer_endpoint(struct wg_peer *peer, + const struct endpoint *endpoint); +void wg_socket_set_peer_endpoint_from_skb(struct wg_peer *peer, + const struct sk_buff *skb); +void wg_socket_clear_peer_endpoint_src(struct wg_peer *peer); + +#if defined(CONFIG_DYNAMIC_DEBUG) || defined(DEBUG) +#define net_dbg_skb_ratelimited(fmt, dev, skb, ...) do { \ + struct endpoint __endpoint; \ + wg_socket_endpoint_from_skb(&__endpoint, skb); \ + net_dbg_ratelimited(fmt, dev, &__endpoint.addr, \ + ##__VA_ARGS__); \ + } while (0) +#else +#define net_dbg_skb_ratelimited(fmt, skb, ...) +#endif + +#endif /* _WG_SOCKET_H */ diff --git a/drivers/net/wireguard/timers.c b/drivers/net/wireguard/timers.c new file mode 100644 index 000000000000..d54d32ac9bc4 --- /dev/null +++ b/drivers/net/wireguard/timers.c @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include "timers.h" +#include "device.h" +#include "peer.h" +#include "queueing.h" +#include "socket.h" + +/* + * - Timer for retransmitting the handshake if we don't hear back after + * `REKEY_TIMEOUT + jitter` ms. + * + * - Timer for sending empty packet if we have received a packet but after have + * not sent one for `KEEPALIVE_TIMEOUT` ms. + * + * - Timer for initiating new handshake if we have sent a packet but after have + * not received one (even empty) for `(KEEPALIVE_TIMEOUT + REKEY_TIMEOUT) + + * jitter` ms. + * + * - Timer for zeroing out all ephemeral keys after `(REJECT_AFTER_TIME * 3)` ms + * if no new keys have been received. + * + * - Timer for, if enabled, sending an empty authenticated packet every user- + * specified seconds. + */ + +static inline void mod_peer_timer(struct wg_peer *peer, + struct timer_list *timer, + unsigned long expires) +{ + rcu_read_lock_bh(); + if (likely(netif_running(peer->device->dev) && + !READ_ONCE(peer->is_dead))) + mod_timer(timer, expires); + rcu_read_unlock_bh(); +} + +static void wg_expired_retransmit_handshake(struct timer_list *timer) +{ + struct wg_peer *peer = from_timer(peer, timer, + timer_retransmit_handshake); + + if (peer->timer_handshake_attempts > MAX_TIMER_HANDSHAKES) { + pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d attempts, giving up\n", + peer->device->dev->name, peer->internal_id, + &peer->endpoint.addr, MAX_TIMER_HANDSHAKES + 2); + + del_timer(&peer->timer_send_keepalive); + /* We drop all packets without a keypair and don't try again, + * if we try unsuccessfully for too long to make a handshake. + */ + wg_packet_purge_staged_packets(peer); + + /* We set a timer for destroying any residue that might be left + * of a partial exchange. + */ + if (!timer_pending(&peer->timer_zero_key_material)) + mod_peer_timer(peer, &peer->timer_zero_key_material, + jiffies + REJECT_AFTER_TIME * 3 * HZ); + } else { + ++peer->timer_handshake_attempts; + pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d seconds, retrying (try %d)\n", + peer->device->dev->name, peer->internal_id, + &peer->endpoint.addr, REKEY_TIMEOUT, + peer->timer_handshake_attempts + 1); + + /* We clear the endpoint address src address, in case this is + * the cause of trouble. + */ + wg_socket_clear_peer_endpoint_src(peer); + + wg_packet_send_queued_handshake_initiation(peer, true); + } +} + +static void wg_expired_send_keepalive(struct timer_list *timer) +{ + struct wg_peer *peer = from_timer(peer, timer, timer_send_keepalive); + + wg_packet_send_keepalive(peer); + if (peer->timer_need_another_keepalive) { + peer->timer_need_another_keepalive = false; + mod_peer_timer(peer, &peer->timer_send_keepalive, + jiffies + KEEPALIVE_TIMEOUT * HZ); + } +} + +static void wg_expired_new_handshake(struct timer_list *timer) +{ + struct wg_peer *peer = from_timer(peer, timer, timer_new_handshake); + + pr_debug("%s: Retrying handshake with peer %llu (%pISpfsc) because we stopped hearing back after %d seconds\n", + peer->device->dev->name, peer->internal_id, + &peer->endpoint.addr, KEEPALIVE_TIMEOUT + REKEY_TIMEOUT); + /* We clear the endpoint address src address, in case this is the cause + * of trouble. + */ + wg_socket_clear_peer_endpoint_src(peer); + wg_packet_send_queued_handshake_initiation(peer, false); +} + +static void wg_expired_zero_key_material(struct timer_list *timer) +{ + struct wg_peer *peer = from_timer(peer, timer, timer_zero_key_material); + + rcu_read_lock_bh(); + if (!READ_ONCE(peer->is_dead)) { + wg_peer_get(peer); + if (!queue_work(peer->device->handshake_send_wq, + &peer->clear_peer_work)) + /* If the work was already on the queue, we want to drop + * the extra reference. + */ + wg_peer_put(peer); + } + rcu_read_unlock_bh(); +} + +static void wg_queued_expired_zero_key_material(struct work_struct *work) +{ + struct wg_peer *peer = container_of(work, struct wg_peer, + clear_peer_work); + + pr_debug("%s: Zeroing out all keys for peer %llu (%pISpfsc), since we haven't received a new one in %d seconds\n", + peer->device->dev->name, peer->internal_id, + &peer->endpoint.addr, REJECT_AFTER_TIME * 3); + wg_noise_handshake_clear(&peer->handshake); + wg_noise_keypairs_clear(&peer->keypairs); + wg_peer_put(peer); +} + +static void wg_expired_send_persistent_keepalive(struct timer_list *timer) +{ + struct wg_peer *peer = from_timer(peer, timer, + timer_persistent_keepalive); + + if (likely(peer->persistent_keepalive_interval)) + wg_packet_send_keepalive(peer); +} + +/* Should be called after an authenticated data packet is sent. */ +void wg_timers_data_sent(struct wg_peer *peer) +{ + if (!timer_pending(&peer->timer_new_handshake)) + mod_peer_timer(peer, &peer->timer_new_handshake, + jiffies + (KEEPALIVE_TIMEOUT + REKEY_TIMEOUT) * HZ + + prandom_u32_max(REKEY_TIMEOUT_JITTER_MAX_JIFFIES)); +} + +/* Should be called after an authenticated data packet is received. */ +void wg_timers_data_received(struct wg_peer *peer) +{ + if (likely(netif_running(peer->device->dev))) { + if (!timer_pending(&peer->timer_send_keepalive)) + mod_peer_timer(peer, &peer->timer_send_keepalive, + jiffies + KEEPALIVE_TIMEOUT * HZ); + else + peer->timer_need_another_keepalive = true; + } +} + +/* Should be called after any type of authenticated packet is sent, whether + * keepalive, data, or handshake. + */ +void wg_timers_any_authenticated_packet_sent(struct wg_peer *peer) +{ + del_timer(&peer->timer_send_keepalive); +} + +/* Should be called after any type of authenticated packet is received, whether + * keepalive, data, or handshake. + */ +void wg_timers_any_authenticated_packet_received(struct wg_peer *peer) +{ + del_timer(&peer->timer_new_handshake); +} + +/* Should be called after a handshake initiation message is sent. */ +void wg_timers_handshake_initiated(struct wg_peer *peer) +{ + mod_peer_timer(peer, &peer->timer_retransmit_handshake, + jiffies + REKEY_TIMEOUT * HZ + + prandom_u32_max(REKEY_TIMEOUT_JITTER_MAX_JIFFIES)); +} + +/* Should be called after a handshake response message is received and processed + * or when getting key confirmation via the first data message. + */ +void wg_timers_handshake_complete(struct wg_peer *peer) +{ + del_timer(&peer->timer_retransmit_handshake); + peer->timer_handshake_attempts = 0; + peer->sent_lastminute_handshake = false; + ktime_get_real_ts64(&peer->walltime_last_handshake); +} + +/* Should be called after an ephemeral key is created, which is before sending a + * handshake response or after receiving a handshake response. + */ +void wg_timers_session_derived(struct wg_peer *peer) +{ + mod_peer_timer(peer, &peer->timer_zero_key_material, + jiffies + REJECT_AFTER_TIME * 3 * HZ); +} + +/* Should be called before a packet with authentication, whether + * keepalive, data, or handshakem is sent, or after one is received. + */ +void wg_timers_any_authenticated_packet_traversal(struct wg_peer *peer) +{ + if (peer->persistent_keepalive_interval) + mod_peer_timer(peer, &peer->timer_persistent_keepalive, + jiffies + peer->persistent_keepalive_interval * HZ); +} + +void wg_timers_init(struct wg_peer *peer) +{ + timer_setup(&peer->timer_retransmit_handshake, + wg_expired_retransmit_handshake, 0); + timer_setup(&peer->timer_send_keepalive, wg_expired_send_keepalive, 0); + timer_setup(&peer->timer_new_handshake, wg_expired_new_handshake, 0); + timer_setup(&peer->timer_zero_key_material, + wg_expired_zero_key_material, 0); + timer_setup(&peer->timer_persistent_keepalive, + wg_expired_send_persistent_keepalive, 0); + INIT_WORK(&peer->clear_peer_work, wg_queued_expired_zero_key_material); + peer->timer_handshake_attempts = 0; + peer->sent_lastminute_handshake = false; + peer->timer_need_another_keepalive = false; +} + +void wg_timers_stop(struct wg_peer *peer) +{ + del_timer_sync(&peer->timer_retransmit_handshake); + del_timer_sync(&peer->timer_send_keepalive); + del_timer_sync(&peer->timer_new_handshake); + del_timer_sync(&peer->timer_zero_key_material); + del_timer_sync(&peer->timer_persistent_keepalive); + flush_work(&peer->clear_peer_work); +} diff --git a/drivers/net/wireguard/timers.h b/drivers/net/wireguard/timers.h new file mode 100644 index 000000000000..f0653dcb1326 --- /dev/null +++ b/drivers/net/wireguard/timers.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#ifndef _WG_TIMERS_H +#define _WG_TIMERS_H + +#include <linux/ktime.h> + +struct wg_peer; + +void wg_timers_init(struct wg_peer *peer); +void wg_timers_stop(struct wg_peer *peer); +void wg_timers_data_sent(struct wg_peer *peer); +void wg_timers_data_received(struct wg_peer *peer); +void wg_timers_any_authenticated_packet_sent(struct wg_peer *peer); +void wg_timers_any_authenticated_packet_received(struct wg_peer *peer); +void wg_timers_handshake_initiated(struct wg_peer *peer); +void wg_timers_handshake_complete(struct wg_peer *peer); +void wg_timers_session_derived(struct wg_peer *peer); +void wg_timers_any_authenticated_packet_traversal(struct wg_peer *peer); + +static inline bool wg_birthdate_has_expired(u64 birthday_nanoseconds, + u64 expiration_seconds) +{ + return (s64)(birthday_nanoseconds + expiration_seconds * NSEC_PER_SEC) + <= (s64)ktime_get_coarse_boottime_ns(); +} + +#endif /* _WG_TIMERS_H */ diff --git a/drivers/net/wireguard/version.h b/drivers/net/wireguard/version.h new file mode 100644 index 000000000000..a1a269a11634 --- /dev/null +++ b/drivers/net/wireguard/version.h @@ -0,0 +1 @@ +#define WIREGUARD_VERSION "1.0.0" diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2100.c b/drivers/net/wireless/intel/ipw2x00/ipw2100.c index 0579554ed4b3..3c505636b7cc 100644 --- a/drivers/net/wireless/intel/ipw2x00/ipw2100.c +++ b/drivers/net/wireless/intel/ipw2x00/ipw2100.c @@ -5834,7 +5834,7 @@ static int ipw2100_close(struct net_device *dev) /* * TODO: Fix this function... its just wrong */ -static void ipw2100_tx_timeout(struct net_device *dev) +static void ipw2100_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct ipw2100_priv *priv = libipw_priv(dev); diff --git a/drivers/net/wireless/intersil/hostap/hostap_main.c b/drivers/net/wireless/intersil/hostap/hostap_main.c index 05466281afb6..de97b3304115 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_main.c +++ b/drivers/net/wireless/intersil/hostap/hostap_main.c @@ -761,7 +761,7 @@ static void hostap_set_multicast_list(struct net_device *dev) } -static void prism2_tx_timeout(struct net_device *dev) +static void prism2_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct hostap_interface *iface; local_info_t *local; diff --git a/drivers/net/wireless/intersil/orinoco/main.c b/drivers/net/wireless/intersil/orinoco/main.c index 28dac36d7c4c..00264a14e52c 100644 --- a/drivers/net/wireless/intersil/orinoco/main.c +++ b/drivers/net/wireless/intersil/orinoco/main.c @@ -647,7 +647,7 @@ static void __orinoco_ev_txexc(struct net_device *dev, struct hermes *hw) netif_wake_queue(dev); } -void orinoco_tx_timeout(struct net_device *dev) +void orinoco_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct orinoco_private *priv = ndev_priv(dev); struct net_device_stats *stats = &dev->stats; diff --git a/drivers/net/wireless/intersil/orinoco/orinoco.h b/drivers/net/wireless/intersil/orinoco/orinoco.h index 430862a6a24b..cdd026af100b 100644 --- a/drivers/net/wireless/intersil/orinoco/orinoco.h +++ b/drivers/net/wireless/intersil/orinoco/orinoco.h @@ -207,7 +207,7 @@ int orinoco_open(struct net_device *dev); int orinoco_stop(struct net_device *dev); void orinoco_set_multicast_list(struct net_device *dev); int orinoco_change_mtu(struct net_device *dev, int new_mtu); -void orinoco_tx_timeout(struct net_device *dev); +void orinoco_tx_timeout(struct net_device *dev, unsigned int txqueue); /********************************************************************/ /* Locking and synchronization functions */ diff --git a/drivers/net/wireless/intersil/prism54/islpci_eth.c b/drivers/net/wireless/intersil/prism54/islpci_eth.c index 2b8fb07d07e7..8d680250a281 100644 --- a/drivers/net/wireless/intersil/prism54/islpci_eth.c +++ b/drivers/net/wireless/intersil/prism54/islpci_eth.c @@ -473,7 +473,7 @@ islpci_do_reset_and_wake(struct work_struct *work) } void -islpci_eth_tx_timeout(struct net_device *ndev) +islpci_eth_tx_timeout(struct net_device *ndev, unsigned int txqueue) { islpci_private *priv = netdev_priv(ndev); diff --git a/drivers/net/wireless/intersil/prism54/islpci_eth.h b/drivers/net/wireless/intersil/prism54/islpci_eth.h index 61f4b43c6054..e433ccdc526b 100644 --- a/drivers/net/wireless/intersil/prism54/islpci_eth.h +++ b/drivers/net/wireless/intersil/prism54/islpci_eth.h @@ -53,7 +53,7 @@ struct avs_80211_1_header { void islpci_eth_cleanup_transmit(islpci_private *, isl38xx_control_block *); netdev_tx_t islpci_eth_transmit(struct sk_buff *, struct net_device *); int islpci_eth_receive(islpci_private *); -void islpci_eth_tx_timeout(struct net_device *); +void islpci_eth_tx_timeout(struct net_device *, unsigned int txqueue); void islpci_do_reset_and_wake(struct work_struct *); #endif /* _ISL_GEN_H */ diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c index d14e55e3c9da..7d94695e7961 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.c +++ b/drivers/net/wireless/marvell/mwifiex/main.c @@ -1020,7 +1020,7 @@ static void mwifiex_set_multicast_list(struct net_device *dev) * CFG802.11 network device handler for transmission timeout. */ static void -mwifiex_tx_timeout(struct net_device *dev) +mwifiex_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev); diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.c b/drivers/net/wireless/quantenna/qtnfmac/core.c index 5fb598389487..648dfc38bd70 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/core.c +++ b/drivers/net/wireless/quantenna/qtnfmac/core.c @@ -156,7 +156,7 @@ static void qtnf_netdev_get_stats64(struct net_device *ndev, /* Netdev handler for transmission timeout. */ -static void qtnf_netdev_tx_timeout(struct net_device *ndev) +static void qtnf_netdev_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct qtnf_vif *vif = qtnf_netdev_get_priv(ndev); struct qtnf_wmac *mac; diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c index 007bf6803293..686161db8706 100644 --- a/drivers/net/wireless/wl3501_cs.c +++ b/drivers/net/wireless/wl3501_cs.c @@ -1285,7 +1285,7 @@ out: return rc; } -static void wl3501_tx_timeout(struct net_device *dev) +static void wl3501_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct net_device_stats *stats = &dev->stats; int rc; diff --git a/drivers/net/wireless/zydas/zd1201.c b/drivers/net/wireless/zydas/zd1201.c index 0db7362bedb4..41641fc2be74 100644 --- a/drivers/net/wireless/zydas/zd1201.c +++ b/drivers/net/wireless/zydas/zd1201.c @@ -830,7 +830,7 @@ static netdev_tx_t zd1201_hard_start_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } -static void zd1201_tx_timeout(struct net_device *dev) +static void zd1201_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct zd1201 *zd = netdev_priv(dev); diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index f533b7372d59..17b4950ec051 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -195,185 +195,6 @@ static void xenvif_debugfs_delif(struct xenvif *vif) } #endif /* CONFIG_DEBUG_FS */ -static int netback_remove(struct xenbus_device *dev) -{ - struct backend_info *be = dev_get_drvdata(&dev->dev); - - set_backend_state(be, XenbusStateClosed); - - unregister_hotplug_status_watch(be); - if (be->vif) { - kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); - xen_unregister_watchers(be->vif); - xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status"); - xenvif_free(be->vif); - be->vif = NULL; - } - kfree(be->hotplug_script); - kfree(be); - dev_set_drvdata(&dev->dev, NULL); - return 0; -} - - -/** - * Entry point to this code when a new device is created. Allocate the basic - * structures and switch to InitWait. - */ -static int netback_probe(struct xenbus_device *dev, - const struct xenbus_device_id *id) -{ - const char *message; - struct xenbus_transaction xbt; - int err; - int sg; - const char *script; - struct backend_info *be = kzalloc(sizeof(struct backend_info), - GFP_KERNEL); - if (!be) { - xenbus_dev_fatal(dev, -ENOMEM, - "allocating backend structure"); - return -ENOMEM; - } - - be->dev = dev; - dev_set_drvdata(&dev->dev, be); - - be->state = XenbusStateInitialising; - err = xenbus_switch_state(dev, XenbusStateInitialising); - if (err) - goto fail; - - sg = 1; - - do { - err = xenbus_transaction_start(&xbt); - if (err) { - xenbus_dev_fatal(dev, err, "starting transaction"); - goto fail; - } - - err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg); - if (err) { - message = "writing feature-sg"; - goto abort_transaction; - } - - err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", - "%d", sg); - if (err) { - message = "writing feature-gso-tcpv4"; - goto abort_transaction; - } - - err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv6", - "%d", sg); - if (err) { - message = "writing feature-gso-tcpv6"; - goto abort_transaction; - } - - /* We support partial checksum setup for IPv6 packets */ - err = xenbus_printf(xbt, dev->nodename, - "feature-ipv6-csum-offload", - "%d", 1); - if (err) { - message = "writing feature-ipv6-csum-offload"; - goto abort_transaction; - } - - /* We support rx-copy path. */ - err = xenbus_printf(xbt, dev->nodename, - "feature-rx-copy", "%d", 1); - if (err) { - message = "writing feature-rx-copy"; - goto abort_transaction; - } - - /* - * We don't support rx-flip path (except old guests who don't - * grok this feature flag). - */ - err = xenbus_printf(xbt, dev->nodename, - "feature-rx-flip", "%d", 0); - if (err) { - message = "writing feature-rx-flip"; - goto abort_transaction; - } - - /* We support dynamic multicast-control. */ - err = xenbus_printf(xbt, dev->nodename, - "feature-multicast-control", "%d", 1); - if (err) { - message = "writing feature-multicast-control"; - goto abort_transaction; - } - - err = xenbus_printf(xbt, dev->nodename, - "feature-dynamic-multicast-control", - "%d", 1); - if (err) { - message = "writing feature-dynamic-multicast-control"; - goto abort_transaction; - } - - err = xenbus_transaction_end(xbt, 0); - } while (err == -EAGAIN); - - if (err) { - xenbus_dev_fatal(dev, err, "completing transaction"); - goto fail; - } - - /* - * Split event channels support, this is optional so it is not - * put inside the above loop. - */ - err = xenbus_printf(XBT_NIL, dev->nodename, - "feature-split-event-channels", - "%u", separate_tx_rx_irq); - if (err) - pr_debug("Error writing feature-split-event-channels\n"); - - /* Multi-queue support: This is an optional feature. */ - err = xenbus_printf(XBT_NIL, dev->nodename, - "multi-queue-max-queues", "%u", xenvif_max_queues); - if (err) - pr_debug("Error writing multi-queue-max-queues\n"); - - err = xenbus_printf(XBT_NIL, dev->nodename, - "feature-ctrl-ring", - "%u", true); - if (err) - pr_debug("Error writing feature-ctrl-ring\n"); - - script = xenbus_read(XBT_NIL, dev->nodename, "script", NULL); - if (IS_ERR(script)) { - err = PTR_ERR(script); - xenbus_dev_fatal(dev, err, "reading script"); - goto fail; - } - - be->hotplug_script = script; - - - /* This kicks hotplug scripts, so do it immediately. */ - err = backend_create_xenvif(be); - if (err) - goto fail; - - return 0; - -abort_transaction: - xenbus_transaction_end(xbt, 1); - xenbus_dev_fatal(dev, err, "%s", message); -fail: - pr_debug("failed\n"); - netback_remove(dev); - return err; -} - - /* * Handle the creation of the hotplug script environment. We add the script * and vif variables to the environment, for the benefit of the vif-* hotplug @@ -827,6 +648,7 @@ static void hotplug_status_changed(struct xenbus_watch *watch, /* Not interested in this watch anymore. */ unregister_hotplug_status_watch(be); + xenbus_rm(XBT_NIL, be->dev->nodename, "hotplug-status"); } kfree(str); } @@ -1128,6 +950,176 @@ static int read_xenbus_vif_flags(struct backend_info *be) return 0; } +static int netback_remove(struct xenbus_device *dev) +{ + struct backend_info *be = dev_get_drvdata(&dev->dev); + + set_backend_state(be, XenbusStateClosed); + + unregister_hotplug_status_watch(be); + if (be->vif) { + kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); + xen_unregister_watchers(be->vif); + xenvif_free(be->vif); + be->vif = NULL; + } + kfree(be->hotplug_script); + kfree(be); + dev_set_drvdata(&dev->dev, NULL); + return 0; +} + +/** + * Entry point to this code when a new device is created. Allocate the basic + * structures and switch to InitWait. + */ +static int netback_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + const char *message; + struct xenbus_transaction xbt; + int err; + int sg; + const char *script; + struct backend_info *be = kzalloc(sizeof(*be), GFP_KERNEL); + + if (!be) { + xenbus_dev_fatal(dev, -ENOMEM, + "allocating backend structure"); + return -ENOMEM; + } + + be->dev = dev; + dev_set_drvdata(&dev->dev, be); + + sg = 1; + + do { + err = xenbus_transaction_start(&xbt); + if (err) { + xenbus_dev_fatal(dev, err, "starting transaction"); + goto fail; + } + + err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg); + if (err) { + message = "writing feature-sg"; + goto abort_transaction; + } + + err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", + "%d", sg); + if (err) { + message = "writing feature-gso-tcpv4"; + goto abort_transaction; + } + + err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv6", + "%d", sg); + if (err) { + message = "writing feature-gso-tcpv6"; + goto abort_transaction; + } + + /* We support partial checksum setup for IPv6 packets */ + err = xenbus_printf(xbt, dev->nodename, + "feature-ipv6-csum-offload", + "%d", 1); + if (err) { + message = "writing feature-ipv6-csum-offload"; + goto abort_transaction; + } + + /* We support rx-copy path. */ + err = xenbus_printf(xbt, dev->nodename, + "feature-rx-copy", "%d", 1); + if (err) { + message = "writing feature-rx-copy"; + goto abort_transaction; + } + + /* We don't support rx-flip path (except old guests who + * don't grok this feature flag). + */ + err = xenbus_printf(xbt, dev->nodename, + "feature-rx-flip", "%d", 0); + if (err) { + message = "writing feature-rx-flip"; + goto abort_transaction; + } + + /* We support dynamic multicast-control. */ + err = xenbus_printf(xbt, dev->nodename, + "feature-multicast-control", "%d", 1); + if (err) { + message = "writing feature-multicast-control"; + goto abort_transaction; + } + + err = xenbus_printf(xbt, dev->nodename, + "feature-dynamic-multicast-control", + "%d", 1); + if (err) { + message = "writing feature-dynamic-multicast-control"; + goto abort_transaction; + } + + err = xenbus_transaction_end(xbt, 0); + } while (err == -EAGAIN); + + if (err) { + xenbus_dev_fatal(dev, err, "completing transaction"); + goto fail; + } + + /* Split event channels support, this is optional so it is not + * put inside the above loop. + */ + err = xenbus_printf(XBT_NIL, dev->nodename, + "feature-split-event-channels", + "%u", separate_tx_rx_irq); + if (err) + pr_debug("Error writing feature-split-event-channels\n"); + + /* Multi-queue support: This is an optional feature. */ + err = xenbus_printf(XBT_NIL, dev->nodename, + "multi-queue-max-queues", "%u", xenvif_max_queues); + if (err) + pr_debug("Error writing multi-queue-max-queues\n"); + + err = xenbus_printf(XBT_NIL, dev->nodename, + "feature-ctrl-ring", + "%u", true); + if (err) + pr_debug("Error writing feature-ctrl-ring\n"); + + backend_switch_state(be, XenbusStateInitWait); + + script = xenbus_read(XBT_NIL, dev->nodename, "script", NULL); + if (IS_ERR(script)) { + err = PTR_ERR(script); + xenbus_dev_fatal(dev, err, "reading script"); + goto fail; + } + + be->hotplug_script = script; + + /* This kicks hotplug scripts, so do it immediately. */ + err = backend_create_xenvif(be); + if (err) + goto fail; + + return 0; + +abort_transaction: + xenbus_transaction_end(xbt, 1); + xenbus_dev_fatal(dev, err, "%s", message); +fail: + pr_debug("failed\n"); + netback_remove(dev); + return err; +} + static const struct xenbus_device_id netback_ids[] = { { "vif" }, { "" } diff --git a/drivers/nfc/pn544/pn544.c b/drivers/nfc/pn544/pn544.c index cda996f6954e..2b83156efe3f 100644 --- a/drivers/nfc/pn544/pn544.c +++ b/drivers/nfc/pn544/pn544.c @@ -693,7 +693,7 @@ static int pn544_hci_check_presence(struct nfc_hci_dev *hdev, target->nfcid1_len != 10) return -EOPNOTSUPP; - return nfc_hci_send_cmd(hdev, NFC_HCI_RF_READER_A_GATE, + return nfc_hci_send_cmd(hdev, NFC_HCI_RF_READER_A_GATE, PN544_RF_READER_CMD_ACTIVATE_NEXT, target->nfcid1, target->nfcid1_len, NULL); } else if (target->supported_protocols & (NFC_PROTO_JEWEL_MASK | diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c index 604dba4f18af..8e4d355dc3ae 100644 --- a/drivers/nfc/port100.c +++ b/drivers/nfc/port100.c @@ -565,7 +565,7 @@ static void port100_tx_update_payload_len(void *_frame, int len) { struct port100_frame *frame = _frame; - frame->datalen = cpu_to_le16(le16_to_cpu(frame->datalen) + len); + le16_add_cpu(&frame->datalen, len); } static bool port100_rx_frame_is_valid(void *_frame) diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c index a577218d1ab7..b27c46ebfc8f 100644 --- a/drivers/ptp/ptp_qoriq.c +++ b/drivers/ptp/ptp_qoriq.c @@ -74,14 +74,13 @@ static void set_fipers(struct ptp_qoriq *ptp_qoriq) ptp_qoriq->write(®s->fiper_regs->tmr_fiper2, ptp_qoriq->tmr_fiper2); } -static int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index, - bool update_event) +int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index, bool update_event) { struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; struct ptp_clock_event event; void __iomem *reg_etts_l; void __iomem *reg_etts_h; - u32 valid, stat, lo, hi; + u32 valid, lo, hi; switch (index) { case 0: @@ -101,6 +100,10 @@ static int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index, event.type = PTP_CLOCK_EXTTS; event.index = index; + if (ptp_qoriq->extts_fifo_support) + if (!(ptp_qoriq->read(®s->ctrl_regs->tmr_stat) & valid)) + return 0; + do { lo = ptp_qoriq->read(reg_etts_l); hi = ptp_qoriq->read(reg_etts_h); @@ -111,11 +114,13 @@ static int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index, ptp_clock_event(ptp_qoriq->clock, &event); } - stat = ptp_qoriq->read(®s->ctrl_regs->tmr_stat); - } while (ptp_qoriq->extts_fifo_support && (stat & valid)); + if (!ptp_qoriq->extts_fifo_support) + break; + } while (ptp_qoriq->read(®s->ctrl_regs->tmr_stat) & valid); return 0; } +EXPORT_SYMBOL_GPL(extts_clean_up); /* * Interrupt service routine diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 871d44746f5c..6e16b19732f6 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -125,12 +125,6 @@ struct qeth_routing_info { enum qeth_routing_types type; }; -/* IPA stuff */ -struct qeth_ipa_info { - __u32 supported_funcs; - __u32 enabled_funcs; -}; - /* SETBRIDGEPORT stuff */ enum qeth_sbp_roles { QETH_SBP_ROLE_NONE = 0, @@ -169,41 +163,6 @@ struct qeth_vnicc_info { bool rx_bcast_enabled; }; -static inline int qeth_is_adp_supported(struct qeth_ipa_info *ipa, - enum qeth_ipa_setadp_cmd func) -{ - return (ipa->supported_funcs & func); -} - -static inline int qeth_is_ipa_supported(struct qeth_ipa_info *ipa, - enum qeth_ipa_funcs func) -{ - return (ipa->supported_funcs & func); -} - -static inline int qeth_is_ipa_enabled(struct qeth_ipa_info *ipa, - enum qeth_ipa_funcs func) -{ - return (ipa->supported_funcs & ipa->enabled_funcs & func); -} - -#define qeth_adp_supported(c, f) \ - qeth_is_adp_supported(&c->options.adp, f) -#define qeth_is_supported(c, f) \ - qeth_is_ipa_supported(&c->options.ipa4, f) -#define qeth_is_enabled(c, f) \ - qeth_is_ipa_enabled(&c->options.ipa4, f) -#define qeth_is_supported6(c, f) \ - qeth_is_ipa_supported(&c->options.ipa6, f) -#define qeth_is_enabled6(c, f) \ - qeth_is_ipa_enabled(&c->options.ipa6, f) -#define qeth_is_ipafunc_supported(c, prot, f) \ - ((prot == QETH_PROT_IPV6) ? \ - qeth_is_supported6(c, f) : qeth_is_supported(c, f)) -#define qeth_is_ipafunc_enabled(c, prot, f) \ - ((prot == QETH_PROT_IPV6) ? \ - qeth_is_enabled6(c, f) : qeth_is_enabled(c, f)) - #define QETH_IDX_FUNC_LEVEL_OSD 0x0101 #define QETH_IDX_FUNC_LEVEL_IQD 0x4108 @@ -735,11 +694,11 @@ enum qeth_discipline_id { }; struct qeth_card_options { + struct qeth_ipa_caps ipa4; + struct qeth_ipa_caps ipa6; struct qeth_routing_info route4; - struct qeth_ipa_info ipa4; - struct qeth_ipa_info adp; /*Adapter parameters*/ struct qeth_routing_info route6; - struct qeth_ipa_info ipa6; + struct qeth_ipa_caps adp; /* Adapter parameters */ struct qeth_sbp_info sbp; /* SETBRIDGEPORT options */ struct qeth_vnicc_info vnicc; /* VNICC options */ int fake_broadcast; @@ -862,6 +821,13 @@ static inline bool qeth_card_hw_is_reachable(struct qeth_card *card) return card->state == CARD_STATE_SOFTSETUP; } +static inline void qeth_unlock_channel(struct qeth_card *card, + struct qeth_channel *channel) +{ + atomic_set(&channel->irq_pending, 0); + wake_up(&card->wait_q); +} + struct qeth_trap_id { __u16 lparnr; char vmname[8]; @@ -1076,7 +1042,7 @@ void qeth_clear_working_pool_list(struct qeth_card *); void qeth_drain_output_queues(struct qeth_card *card); void qeth_setadp_promisc_mode(struct qeth_card *card, bool enable); int qeth_setadpparms_change_macaddr(struct qeth_card *); -void qeth_tx_timeout(struct net_device *); +void qeth_tx_timeout(struct net_device *, unsigned int txqueue); void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob, u16 cmd_length); int qeth_query_switch_attributes(struct qeth_card *card, diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index b9a2349e4b90..78349355c582 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -520,11 +520,10 @@ static int __qeth_issue_next_read(struct qeth_card *card) } else { QETH_DBF_MESSAGE(2, "error %i on device %x when starting next read ccw!\n", rc, CARD_DEVID(card)); - atomic_set(&channel->irq_pending, 0); + qeth_unlock_channel(card, channel); qeth_put_cmd(iob); card->read_or_write_problem = 1; qeth_schedule_recovery(card); - wake_up(&card->wait_q); } return rc; } @@ -972,8 +971,6 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, /* while we hold the ccwdev lock, this stays valid: */ gdev = dev_get_drvdata(&cdev->dev); card = dev_get_drvdata(&gdev->dev); - if (!card) - return; QETH_CARD_TEXT(card, 5, "irq"); @@ -1003,24 +1000,25 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, } channel->active_cmd = NULL; + qeth_unlock_channel(card, channel); rc = qeth_check_irb_error(card, cdev, irb); if (rc) { /* IO was terminated, free its resources. */ if (iob) qeth_cancel_cmd(iob, rc); - atomic_set(&channel->irq_pending, 0); - wake_up(&card->wait_q); return; } - atomic_set(&channel->irq_pending, 0); - - if (irb->scsw.cmd.fctl & (SCSW_FCTL_CLEAR_FUNC)) + if (irb->scsw.cmd.fctl & SCSW_FCTL_CLEAR_FUNC) { channel->state = CH_STATE_STOPPED; + wake_up(&card->wait_q); + } - if (irb->scsw.cmd.fctl & (SCSW_FCTL_HALT_FUNC)) + if (irb->scsw.cmd.fctl & SCSW_FCTL_HALT_FUNC) { channel->state = CH_STATE_HALTED; + wake_up(&card->wait_q); + } if (iob && (irb->scsw.cmd.fctl & (SCSW_FCTL_CLEAR_FUNC | SCSW_FCTL_HALT_FUNC))) { @@ -1054,7 +1052,7 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, qeth_cancel_cmd(iob, rc); qeth_clear_ipacmd_list(card); qeth_schedule_recovery(card); - goto out; + return; } } @@ -1062,16 +1060,12 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, /* sanity check: */ if (irb->scsw.cmd.count > iob->length) { qeth_cancel_cmd(iob, -EIO); - goto out; + return; } if (iob->callback) iob->callback(card, iob, iob->length - irb->scsw.cmd.count); } - -out: - wake_up(&card->wait_q); - return; } static void qeth_notify_skbs(struct qeth_qdio_out_q *q, @@ -1198,31 +1192,6 @@ static void qeth_free_buffer_pool(struct qeth_card *card) } } -static void qeth_clean_channel(struct qeth_channel *channel) -{ - struct ccw_device *cdev = channel->ccwdev; - - QETH_DBF_TEXT(SETUP, 2, "freech"); - - spin_lock_irq(get_ccwdev_lock(cdev)); - cdev->handler = NULL; - spin_unlock_irq(get_ccwdev_lock(cdev)); -} - -static void qeth_setup_channel(struct qeth_channel *channel) -{ - struct ccw_device *cdev = channel->ccwdev; - - QETH_DBF_TEXT(SETUP, 2, "setupch"); - - channel->state = CH_STATE_DOWN; - atomic_set(&channel->irq_pending, 0); - - spin_lock_irq(get_ccwdev_lock(cdev)); - cdev->handler = qeth_irq; - spin_unlock_irq(get_ccwdev_lock(cdev)); -} - static int qeth_osa_set_output_queues(struct qeth_card *card, bool single) { unsigned int count = single ? 1 : card->dev->num_tx_queues; @@ -1395,9 +1364,6 @@ static struct qeth_card *qeth_alloc_card(struct ccwgroup_device *gdev) if (!card->read_cmd) goto out_read_cmd; - qeth_setup_channel(&card->read); - qeth_setup_channel(&card->write); - qeth_setup_channel(&card->data); card->qeth_service_level.seq_print = qeth_core_sl_print; register_service_level(&card->qeth_service_level); return card; @@ -1467,12 +1433,38 @@ int qeth_stop_channel(struct qeth_channel *channel) channel->active_cmd); channel->active_cmd = NULL; } + cdev->handler = NULL; spin_unlock_irq(get_ccwdev_lock(cdev)); return rc; } EXPORT_SYMBOL_GPL(qeth_stop_channel); +static int qeth_start_channel(struct qeth_channel *channel) +{ + struct ccw_device *cdev = channel->ccwdev; + int rc; + + channel->state = CH_STATE_DOWN; + atomic_set(&channel->irq_pending, 0); + + spin_lock_irq(get_ccwdev_lock(cdev)); + cdev->handler = qeth_irq; + spin_unlock_irq(get_ccwdev_lock(cdev)); + + rc = ccw_device_set_online(cdev); + if (rc) + goto err; + + return 0; + +err: + spin_lock_irq(get_ccwdev_lock(cdev)); + cdev->handler = NULL; + spin_unlock_irq(get_ccwdev_lock(cdev)); + return rc; +} + static int qeth_halt_channels(struct qeth_card *card) { int rc1 = 0, rc2 = 0, rc3 = 0; @@ -1784,8 +1776,7 @@ static int qeth_send_control_data(struct qeth_card *card, QETH_CARD_TEXT_(card, 2, " err%d", rc); qeth_dequeue_cmd(card, iob); qeth_put_cmd(iob); - atomic_set(&channel->irq_pending, 0); - wake_up(&card->wait_q); + qeth_unlock_channel(card, channel); goto out; } @@ -2871,7 +2862,7 @@ static int qeth_query_setadapterparms_cb(struct qeth_card *card, cmd->data.setadapterparms.data.query_cmds_supp.lan_type; QETH_CARD_TEXT_(card, 2, "lnk %d", card->info.link_type); } - card->options.adp.supported_funcs = + card->options.adp.supported = cmd->data.setadapterparms.data.query_cmds_supp.supported_cmds; return 0; } @@ -2927,8 +2918,8 @@ static int qeth_query_ipassists_cb(struct qeth_card *card, case IPA_RC_NOTSUPP: case IPA_RC_L2_UNSUPPORTED_CMD: QETH_CARD_TEXT(card, 2, "ipaunsup"); - card->options.ipa4.supported_funcs |= IPA_SETADAPTERPARMS; - card->options.ipa6.supported_funcs |= IPA_SETADAPTERPARMS; + card->options.ipa4.supported |= IPA_SETADAPTERPARMS; + card->options.ipa6.supported |= IPA_SETADAPTERPARMS; return -EOPNOTSUPP; default: QETH_DBF_MESSAGE(1, "IPA_CMD_QIPASSIST on device %x: Unhandled rc=%#x\n", @@ -2936,13 +2927,11 @@ static int qeth_query_ipassists_cb(struct qeth_card *card, return -EIO; } - if (cmd->hdr.prot_version == QETH_PROT_IPV4) { - card->options.ipa4.supported_funcs = cmd->hdr.ipa_supported; - card->options.ipa4.enabled_funcs = cmd->hdr.ipa_enabled; - } else if (cmd->hdr.prot_version == QETH_PROT_IPV6) { - card->options.ipa6.supported_funcs = cmd->hdr.ipa_supported; - card->options.ipa6.enabled_funcs = cmd->hdr.ipa_enabled; - } else + if (cmd->hdr.prot_version == QETH_PROT_IPV4) + card->options.ipa4 = cmd->hdr.assists; + else if (cmd->hdr.prot_version == QETH_PROT_IPV6) + card->options.ipa6 = cmd->hdr.assists; + else QETH_DBF_MESSAGE(1, "IPA_CMD_QIPASSIST on device %x: Flawed LIC detected\n", CARD_DEVID(card)); return 0; @@ -3413,7 +3402,7 @@ static void qeth_qdio_start_poll(struct ccw_device *ccwdev, int queue, struct qeth_card *card = (struct qeth_card *)card_ptr; if (card->dev->flags & IFF_UP) - napi_schedule(&card->napi); + napi_schedule_irqoff(&card->napi); } int qeth_configure_cq(struct qeth_card *card, enum qeth_cq cq) @@ -4325,7 +4314,7 @@ int qeth_set_access_ctrl_online(struct qeth_card *card, int fallback) return rc; } -void qeth_tx_timeout(struct net_device *dev) +void qeth_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct qeth_card *card; @@ -4706,7 +4695,7 @@ static void qeth_determine_capabilities(struct qeth_card *card) QETH_CARD_TEXT(card, 2, "detcapab"); if (!ddev->online) { ddev_offline = 1; - rc = ccw_device_set_online(ddev); + rc = qeth_start_channel(channel); if (rc) { QETH_CARD_TEXT_(card, 2, "3err%d", rc); goto out; @@ -4881,9 +4870,6 @@ out_free_nothing: static void qeth_core_free_card(struct qeth_card *card) { QETH_CARD_TEXT(card, 2, "freecrd"); - qeth_clean_channel(&card->read); - qeth_clean_channel(&card->write); - qeth_clean_channel(&card->data); qeth_put_cmd(card->read_cmd); destroy_workqueue(card->event_wq); unregister_service_level(&card->qeth_service_level); @@ -4946,13 +4932,14 @@ retry: qeth_stop_channel(&card->write); qeth_stop_channel(&card->read); qdio_free(CARD_DDEV(card)); - rc = ccw_device_set_online(CARD_RDEV(card)); + + rc = qeth_start_channel(&card->read); if (rc) goto retriable; - rc = ccw_device_set_online(CARD_WDEV(card)); + rc = qeth_start_channel(&card->write); if (rc) goto retriable; - rc = ccw_device_set_online(CARD_DDEV(card)); + rc = qeth_start_channel(&card->data); if (rc) goto retriable; retriable: @@ -5013,9 +5000,9 @@ retriable: *carrier_ok = true; } - card->options.ipa4.supported_funcs = 0; - card->options.ipa6.supported_funcs = 0; - card->options.adp.supported_funcs = 0; + card->options.ipa4.supported = 0; + card->options.ipa6.supported = 0; + card->options.adp.supported = 0; card->options.sbp.supported_funcs = 0; card->info.diagass_support = 0; rc = qeth_query_ipassists(card, QETH_PROT_IPV4); @@ -5432,9 +5419,9 @@ int qeth_setassparms_cb(struct qeth_card *card, cmd->hdr.return_code = cmd->data.setassparms.hdr.return_code; if (cmd->hdr.prot_version == QETH_PROT_IPV4) - card->options.ipa4.enabled_funcs = cmd->hdr.ipa_enabled; + card->options.ipa4.enabled = cmd->hdr.assists.enabled; if (cmd->hdr.prot_version == QETH_PROT_IPV6) - card->options.ipa6.enabled_funcs = cmd->hdr.ipa_enabled; + card->options.ipa6.enabled = cmd->hdr.assists.enabled; return 0; } EXPORT_SYMBOL_GPL(qeth_setassparms_cb); diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h index 88f4dc140751..f4dc37e28ac7 100644 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@ -53,6 +53,16 @@ static inline bool qeth_ipa_caps_enabled(struct qeth_ipa_caps *caps, u32 mask) return (caps->enabled & mask) == mask; } +#define qeth_adp_supported(c, f) \ + qeth_ipa_caps_supported(&c->options.adp, f) +#define qeth_is_supported(c, f) \ + qeth_ipa_caps_supported(&c->options.ipa4, f) +#define qeth_is_supported6(c, f) \ + qeth_ipa_caps_supported(&c->options.ipa6, f) +#define qeth_is_ipafunc_supported(c, prot, f) \ + ((prot == QETH_PROT_IPV6) ? qeth_is_supported6(c, f) : \ + qeth_is_supported(c, f)) + enum qeth_card_types { QETH_CARD_TYPE_OSD = 1, QETH_CARD_TYPE_IQD = 5, @@ -338,14 +348,14 @@ enum qeth_card_info_port_speed { /* (SET)DELIP(M) IPA stuff ***************************************************/ struct qeth_ipacmd_setdelip4 { - __u8 ip_addr[4]; - __u8 mask[4]; + __be32 addr; + __be32 mask; __u32 flags; } __attribute__ ((packed)); struct qeth_ipacmd_setdelip6 { - __u8 ip_addr[16]; - __u8 mask[16]; + struct in6_addr addr; + struct in6_addr prefix; __u32 flags; } __attribute__ ((packed)); @@ -766,8 +776,7 @@ struct qeth_ipacmd_hdr { __u8 prim_version_no; __u8 param_count; __u16 prot_version; - __u32 ipa_supported; - __u32 ipa_enabled; + struct qeth_ipa_caps assists; } __attribute__ ((packed)); /* The IPA command itself */ diff --git a/drivers/s390/net/qeth_l3.h b/drivers/s390/net/qeth_l3.h index 5db04fe472c0..6ccfe2121095 100644 --- a/drivers/s390/net/qeth_l3.h +++ b/drivers/s390/net/qeth_l3.h @@ -23,7 +23,6 @@ struct qeth_ipaddr { struct hlist_node hnode; enum qeth_ip_types type; u8 is_multicast:1; - u8 in_progress:1; u8 disp_flag:2; u8 ipato:1; /* ucast only */ @@ -35,7 +34,7 @@ struct qeth_ipaddr { union { struct { __be32 addr; - unsigned int mask; + __be32 mask; } a4; struct { struct in6_addr addr; @@ -102,7 +101,8 @@ struct qeth_ipato_entry { extern const struct attribute_group *qeth_l3_attr_groups[]; -void qeth_l3_ipaddr_to_string(enum qeth_prot_versions, const __u8 *, char *); +int qeth_l3_ipaddr_to_string(enum qeth_prot_versions proto, const u8 *addr, + char *buf); int qeth_l3_create_device_attributes(struct device *); void qeth_l3_remove_device_attributes(struct device *); int qeth_l3_setrouting_v4(struct qeth_card *); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 27126330a4b0..789d3b2ba0de 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -44,23 +44,13 @@ static int qeth_l3_register_addr_entry(struct qeth_card *, static int qeth_l3_deregister_addr_entry(struct qeth_card *, struct qeth_ipaddr *); -static void qeth_l3_ipaddr4_to_string(const __u8 *addr, char *buf) -{ - sprintf(buf, "%pI4", addr); -} - -static void qeth_l3_ipaddr6_to_string(const __u8 *addr, char *buf) -{ - sprintf(buf, "%pI6", addr); -} - -void qeth_l3_ipaddr_to_string(enum qeth_prot_versions proto, const __u8 *addr, - char *buf) +int qeth_l3_ipaddr_to_string(enum qeth_prot_versions proto, const u8 *addr, + char *buf) { if (proto == QETH_PROT_IPV4) - qeth_l3_ipaddr4_to_string(addr, buf); - else if (proto == QETH_PROT_IPV6) - qeth_l3_ipaddr6_to_string(addr, buf); + return sprintf(buf, "%pI4", addr); + else + return sprintf(buf, "%pI6", addr); } static struct qeth_ipaddr *qeth_l3_find_addr_by_ip(struct qeth_card *card, @@ -161,8 +151,6 @@ static int qeth_l3_delete_ip(struct qeth_card *card, addr->ref_counter--; if (addr->type == QETH_IP_TYPE_NORMAL && addr->ref_counter > 0) return rc; - if (addr->in_progress) - return -EINPROGRESS; if (qeth_card_hw_is_reachable(card)) rc = qeth_l3_deregister_addr_entry(card, addr); @@ -223,29 +211,10 @@ static int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr) return 0; } - /* qeth_l3_register_addr_entry can go to sleep - * if we add a IPV4 addr. It is caused by the reason - * that SETIP ipa cmd starts ARP staff for IPV4 addr. - * Thus we should unlock spinlock, and make a protection - * using in_progress variable to indicate that there is - * an hardware operation with this IPV4 address - */ - if (addr->proto == QETH_PROT_IPV4) { - addr->in_progress = 1; - mutex_unlock(&card->ip_lock); - rc = qeth_l3_register_addr_entry(card, addr); - mutex_lock(&card->ip_lock); - addr->in_progress = 0; - } else - rc = qeth_l3_register_addr_entry(card, addr); + rc = qeth_l3_register_addr_entry(card, addr); if (!rc || rc == -EADDRINUSE || rc == -ENETDOWN) { addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING; - if (addr->ref_counter < 1) { - qeth_l3_deregister_addr_entry(card, addr); - hash_del(&addr->hnode); - kfree(addr); - } } else { hash_del(&addr->hnode); kfree(addr); @@ -313,19 +282,10 @@ static void qeth_l3_recover_ip(struct qeth_card *card) hash_for_each_safe(card->ip_htable, i, tmp, addr, hnode) { if (addr->disp_flag == QETH_DISP_ADDR_ADD) { - if (addr->proto == QETH_PROT_IPV4) { - addr->in_progress = 1; - mutex_unlock(&card->ip_lock); - rc = qeth_l3_register_addr_entry(card, addr); - mutex_lock(&card->ip_lock); - addr->in_progress = 0; - } else - rc = qeth_l3_register_addr_entry(card, addr); + rc = qeth_l3_register_addr_entry(card, addr); if (!rc) { addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING; - if (addr->ref_counter < 1) - qeth_l3_delete_ip(card, addr); } else { hash_del(&addr->hnode); kfree(addr); @@ -379,17 +339,16 @@ static int qeth_l3_send_setdelmc(struct qeth_card *card, return qeth_send_ipa_cmd(card, iob, qeth_l3_setdelip_cb, NULL); } -static void qeth_l3_fill_netmask(u8 *netmask, unsigned int len) +static void qeth_l3_set_ipv6_prefix(struct in6_addr *prefix, unsigned int len) { - int i, j; - for (i = 0; i < 16; i++) { - j = (len) - (i * 8); - if (j >= 8) - netmask[i] = 0xff; - else if (j > 0) - netmask[i] = (u8)(0xFF00 >> j); - else - netmask[i] = 0; + unsigned int i = 0; + + while (len && i < 4) { + int mask_len = min_t(int, len, 32); + + prefix->s6_addr32[i] = inet_make_mask(mask_len); + len -= mask_len; + i++; } } @@ -412,7 +371,6 @@ static int qeth_l3_send_setdelip(struct qeth_card *card, { struct qeth_cmd_buffer *iob; struct qeth_ipa_cmd *cmd; - __u8 netmask[16]; u32 flags; QETH_CARD_TEXT(card, 4, "setdelip"); @@ -427,15 +385,13 @@ static int qeth_l3_send_setdelip(struct qeth_card *card, QETH_CARD_TEXT_(card, 4, "flags%02X", flags); if (addr->proto == QETH_PROT_IPV6) { - memcpy(cmd->data.setdelip6.ip_addr, &addr->u.a6.addr, - sizeof(struct in6_addr)); - qeth_l3_fill_netmask(netmask, addr->u.a6.pfxlen); - memcpy(cmd->data.setdelip6.mask, netmask, - sizeof(struct in6_addr)); + cmd->data.setdelip6.addr = addr->u.a6.addr; + qeth_l3_set_ipv6_prefix(&cmd->data.setdelip6.prefix, + addr->u.a6.pfxlen); cmd->data.setdelip6.flags = flags; } else { - memcpy(cmd->data.setdelip4.ip_addr, &addr->u.a4.addr, 4); - memcpy(cmd->data.setdelip4.mask, &addr->u.a4.mask, 4); + cmd->data.setdelip4.addr = addr->u.a4.addr; + cmd->data.setdelip4.mask = addr->u.a4.mask; cmd->data.setdelip4.flags = flags; } @@ -581,6 +537,7 @@ int qeth_l3_add_ipato_entry(struct qeth_card *card, QETH_CARD_TEXT(card, 2, "addipato"); + mutex_lock(&card->conf_mutex); mutex_lock(&card->ip_lock); list_for_each_entry(ipatoe, &card->ipato.entries, entry) { @@ -600,6 +557,7 @@ int qeth_l3_add_ipato_entry(struct qeth_card *card, } mutex_unlock(&card->ip_lock); + mutex_unlock(&card->conf_mutex); return rc; } @@ -613,6 +571,7 @@ int qeth_l3_del_ipato_entry(struct qeth_card *card, QETH_CARD_TEXT(card, 2, "delipato"); + mutex_lock(&card->conf_mutex); mutex_lock(&card->ip_lock); list_for_each_entry_safe(ipatoe, tmp, &card->ipato.entries, entry) { @@ -629,6 +588,8 @@ int qeth_l3_del_ipato_entry(struct qeth_card *card, } mutex_unlock(&card->ip_lock); + mutex_unlock(&card->conf_mutex); + return rc; } @@ -637,6 +598,7 @@ int qeth_l3_modify_rxip_vipa(struct qeth_card *card, bool add, const u8 *ip, enum qeth_prot_versions proto) { struct qeth_ipaddr addr; + int rc; qeth_l3_init_ipaddr(&addr, type, proto); if (proto == QETH_PROT_IPV4) @@ -644,7 +606,11 @@ int qeth_l3_modify_rxip_vipa(struct qeth_card *card, bool add, const u8 *ip, else memcpy(&addr.u.a6.addr, ip, 16); - return qeth_l3_modify_ip(card, &addr, add); + mutex_lock(&card->conf_mutex); + rc = qeth_l3_modify_ip(card, &addr, add); + mutex_unlock(&card->conf_mutex); + + return rc; } int qeth_l3_modify_hsuid(struct qeth_card *card, bool add) @@ -2292,12 +2258,6 @@ static int __qeth_l3_set_offline(struct ccwgroup_device *cgdev, rtnl_unlock(); qeth_l3_stop_card(card); - if (card->options.cq == QETH_CQ_ENABLED) { - rtnl_lock(); - call_netdevice_notifiers(NETDEV_REBOOT, card->dev); - rtnl_unlock(); - } - rc = qeth_stop_channel(&card->data); rc2 = qeth_stop_channel(&card->write); rc3 = qeth_stop_channel(&card->read); @@ -2436,7 +2396,7 @@ static int qeth_l3_ip_event(struct notifier_block *this, qeth_l3_init_ipaddr(&addr, QETH_IP_TYPE_NORMAL, QETH_PROT_IPV4); addr.u.a4.addr = ifa->ifa_address; - addr.u.a4.mask = be32_to_cpu(ifa->ifa_mask); + addr.u.a4.mask = ifa->ifa_mask; return qeth_l3_handle_ip_event(card, &addr, event); } diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c index f9067ed6c7d3..96c73965eb68 100644 --- a/drivers/s390/net/qeth_l3_sys.c +++ b/drivers/s390/net/qeth_l3_sys.c @@ -386,30 +386,35 @@ static ssize_t qeth_l3_dev_ipato_add_show(char *buf, struct qeth_card *card, enum qeth_prot_versions proto) { struct qeth_ipato_entry *ipatoe; - char addr_str[40]; - int entry_len; /* length of 1 entry string, differs between v4 and v6 */ - int i = 0; + int str_len = 0; - entry_len = (proto == QETH_PROT_IPV4)? 12 : 40; - /* add strlen for "/<mask>\n" */ - entry_len += (proto == QETH_PROT_IPV4)? 5 : 6; mutex_lock(&card->ip_lock); list_for_each_entry(ipatoe, &card->ipato.entries, entry) { + char addr_str[40]; + int entry_len; + if (ipatoe->proto != proto) continue; - /* String must not be longer than PAGE_SIZE. So we check if - * string length gets near PAGE_SIZE. Then we can savely display - * the next IPv6 address (worst case, compared to IPv4) */ - if ((PAGE_SIZE - i) <= entry_len) + + entry_len = qeth_l3_ipaddr_to_string(proto, ipatoe->addr, + addr_str); + if (entry_len < 0) + continue; + + /* Append /%mask to the entry: */ + entry_len += 1 + ((proto == QETH_PROT_IPV4) ? 2 : 3); + /* Enough room to format %entry\n into null terminated page? */ + if (entry_len + 1 > PAGE_SIZE - str_len - 1) break; - qeth_l3_ipaddr_to_string(proto, ipatoe->addr, addr_str); - i += snprintf(buf + i, PAGE_SIZE - i, - "%s/%i\n", addr_str, ipatoe->mask_bits); + + entry_len = scnprintf(buf, PAGE_SIZE - str_len, + "%s/%i\n", addr_str, ipatoe->mask_bits); + str_len += entry_len; + buf += entry_len; } mutex_unlock(&card->ip_lock); - i += snprintf(buf + i, PAGE_SIZE - i, "\n"); - return i; + return str_len ? str_len : scnprintf(buf, PAGE_SIZE, "\n"); } static ssize_t qeth_l3_dev_ipato_add4_show(struct device *dev, @@ -455,16 +460,14 @@ static ssize_t qeth_l3_dev_ipato_add_store(const char *buf, size_t count, int mask_bits; int rc = 0; - mutex_lock(&card->conf_mutex); rc = qeth_l3_parse_ipatoe(buf, proto, addr, &mask_bits); if (rc) - goto out; + return rc; ipatoe = kzalloc(sizeof(struct qeth_ipato_entry), GFP_KERNEL); - if (!ipatoe) { - rc = -ENOMEM; - goto out; - } + if (!ipatoe) + return -ENOMEM; + ipatoe->proto = proto; memcpy(ipatoe->addr, addr, (proto == QETH_PROT_IPV4)? 4:16); ipatoe->mask_bits = mask_bits; @@ -472,8 +475,7 @@ static ssize_t qeth_l3_dev_ipato_add_store(const char *buf, size_t count, rc = qeth_l3_add_ipato_entry(card, ipatoe); if (rc) kfree(ipatoe); -out: - mutex_unlock(&card->conf_mutex); + return rc ? rc : count; } @@ -496,11 +498,9 @@ static ssize_t qeth_l3_dev_ipato_del_store(const char *buf, size_t count, int mask_bits; int rc = 0; - mutex_lock(&card->conf_mutex); rc = qeth_l3_parse_ipatoe(buf, proto, addr, &mask_bits); if (!rc) rc = qeth_l3_del_ipato_entry(card, proto, addr, mask_bits); - mutex_unlock(&card->conf_mutex); return rc ? rc : count; } @@ -607,31 +607,34 @@ static ssize_t qeth_l3_dev_ip_add_show(struct device *dev, char *buf, { struct qeth_card *card = dev_get_drvdata(dev); struct qeth_ipaddr *ipaddr; - char addr_str[40]; int str_len = 0; - int entry_len; /* length of 1 entry string, differs between v4 and v6 */ int i; - entry_len = (proto == QETH_PROT_IPV4)? 12 : 40; - entry_len += 2; /* \n + terminator */ mutex_lock(&card->ip_lock); hash_for_each(card->ip_htable, i, ipaddr, hnode) { + char addr_str[40]; + int entry_len; + if (ipaddr->proto != proto || ipaddr->type != type) continue; - /* String must not be longer than PAGE_SIZE. So we check if - * string length gets near PAGE_SIZE. Then we can savely display - * the next IPv6 address (worst case, compared to IPv4) */ - if ((PAGE_SIZE - str_len) <= entry_len) + + entry_len = qeth_l3_ipaddr_to_string(proto, (u8 *)&ipaddr->u, + addr_str); + if (entry_len < 0) + continue; + + /* Enough room to format %addr\n into null terminated page? */ + if (entry_len + 1 > PAGE_SIZE - str_len - 1) break; - qeth_l3_ipaddr_to_string(proto, (const u8 *)&ipaddr->u, - addr_str); - str_len += snprintf(buf + str_len, PAGE_SIZE - str_len, "%s\n", - addr_str); + + entry_len = scnprintf(buf, PAGE_SIZE - str_len, "%s\n", + addr_str); + str_len += entry_len; + buf += entry_len; } mutex_unlock(&card->ip_lock); - str_len += snprintf(buf + str_len, PAGE_SIZE - str_len, "\n"); - return str_len; + return str_len ? str_len : scnprintf(buf, PAGE_SIZE, "\n"); } static ssize_t qeth_l3_dev_vipa_add4_show(struct device *dev, @@ -642,63 +645,34 @@ static ssize_t qeth_l3_dev_vipa_add4_show(struct device *dev, QETH_IP_TYPE_VIPA); } -static int qeth_l3_parse_vipae(const char *buf, enum qeth_prot_versions proto, - u8 *addr) -{ - if (qeth_l3_string_to_ipaddr(buf, proto, addr)) { - return -EINVAL; - } - return 0; -} - -static ssize_t qeth_l3_dev_vipa_add_store(const char *buf, size_t count, - struct qeth_card *card, enum qeth_prot_versions proto) +static ssize_t qeth_l3_vipa_store(struct device *dev, const char *buf, bool add, + size_t count, enum qeth_prot_versions proto) { + struct qeth_card *card = dev_get_drvdata(dev); u8 addr[16] = {0, }; int rc; - mutex_lock(&card->conf_mutex); - rc = qeth_l3_parse_vipae(buf, proto, addr); + rc = qeth_l3_string_to_ipaddr(buf, proto, addr); if (!rc) - rc = qeth_l3_modify_rxip_vipa(card, true, addr, + rc = qeth_l3_modify_rxip_vipa(card, add, addr, QETH_IP_TYPE_VIPA, proto); - mutex_unlock(&card->conf_mutex); return rc ? rc : count; } static ssize_t qeth_l3_dev_vipa_add4_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct qeth_card *card = dev_get_drvdata(dev); - - return qeth_l3_dev_vipa_add_store(buf, count, card, QETH_PROT_IPV4); + return qeth_l3_vipa_store(dev, buf, true, count, QETH_PROT_IPV4); } static QETH_DEVICE_ATTR(vipa_add4, add4, 0644, qeth_l3_dev_vipa_add4_show, qeth_l3_dev_vipa_add4_store); -static ssize_t qeth_l3_dev_vipa_del_store(const char *buf, size_t count, - struct qeth_card *card, enum qeth_prot_versions proto) -{ - u8 addr[16]; - int rc; - - mutex_lock(&card->conf_mutex); - rc = qeth_l3_parse_vipae(buf, proto, addr); - if (!rc) - rc = qeth_l3_modify_rxip_vipa(card, false, addr, - QETH_IP_TYPE_VIPA, proto); - mutex_unlock(&card->conf_mutex); - return rc ? rc : count; -} - static ssize_t qeth_l3_dev_vipa_del4_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct qeth_card *card = dev_get_drvdata(dev); - - return qeth_l3_dev_vipa_del_store(buf, count, card, QETH_PROT_IPV4); + return qeth_l3_vipa_store(dev, buf, true, count, QETH_PROT_IPV4); } static QETH_DEVICE_ATTR(vipa_del4, del4, 0200, NULL, @@ -715,9 +689,7 @@ static ssize_t qeth_l3_dev_vipa_add6_show(struct device *dev, static ssize_t qeth_l3_dev_vipa_add6_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct qeth_card *card = dev_get_drvdata(dev); - - return qeth_l3_dev_vipa_add_store(buf, count, card, QETH_PROT_IPV6); + return qeth_l3_vipa_store(dev, buf, true, count, QETH_PROT_IPV6); } static QETH_DEVICE_ATTR(vipa_add6, add6, 0644, @@ -727,9 +699,7 @@ static QETH_DEVICE_ATTR(vipa_add6, add6, 0644, static ssize_t qeth_l3_dev_vipa_del6_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct qeth_card *card = dev_get_drvdata(dev); - - return qeth_l3_dev_vipa_del_store(buf, count, card, QETH_PROT_IPV6); + return qeth_l3_vipa_store(dev, buf, false, count, QETH_PROT_IPV6); } static QETH_DEVICE_ATTR(vipa_del6, del6, 0200, NULL, @@ -782,54 +752,34 @@ static int qeth_l3_parse_rxipe(const char *buf, enum qeth_prot_versions proto, return 0; } -static ssize_t qeth_l3_dev_rxip_add_store(const char *buf, size_t count, - struct qeth_card *card, enum qeth_prot_versions proto) +static ssize_t qeth_l3_rxip_store(struct device *dev, const char *buf, bool add, + size_t count, enum qeth_prot_versions proto) { + struct qeth_card *card = dev_get_drvdata(dev); u8 addr[16] = {0, }; int rc; - mutex_lock(&card->conf_mutex); rc = qeth_l3_parse_rxipe(buf, proto, addr); if (!rc) - rc = qeth_l3_modify_rxip_vipa(card, true, addr, + rc = qeth_l3_modify_rxip_vipa(card, add, addr, QETH_IP_TYPE_RXIP, proto); - mutex_unlock(&card->conf_mutex); return rc ? rc : count; } static ssize_t qeth_l3_dev_rxip_add4_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct qeth_card *card = dev_get_drvdata(dev); - - return qeth_l3_dev_rxip_add_store(buf, count, card, QETH_PROT_IPV4); + return qeth_l3_rxip_store(dev, buf, true, count, QETH_PROT_IPV4); } static QETH_DEVICE_ATTR(rxip_add4, add4, 0644, qeth_l3_dev_rxip_add4_show, qeth_l3_dev_rxip_add4_store); -static ssize_t qeth_l3_dev_rxip_del_store(const char *buf, size_t count, - struct qeth_card *card, enum qeth_prot_versions proto) -{ - u8 addr[16]; - int rc; - - mutex_lock(&card->conf_mutex); - rc = qeth_l3_parse_rxipe(buf, proto, addr); - if (!rc) - rc = qeth_l3_modify_rxip_vipa(card, false, addr, - QETH_IP_TYPE_RXIP, proto); - mutex_unlock(&card->conf_mutex); - return rc ? rc : count; -} - static ssize_t qeth_l3_dev_rxip_del4_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct qeth_card *card = dev_get_drvdata(dev); - - return qeth_l3_dev_rxip_del_store(buf, count, card, QETH_PROT_IPV4); + return qeth_l3_rxip_store(dev, buf, false, count, QETH_PROT_IPV4); } static QETH_DEVICE_ATTR(rxip_del4, del4, 0200, NULL, @@ -846,9 +796,7 @@ static ssize_t qeth_l3_dev_rxip_add6_show(struct device *dev, static ssize_t qeth_l3_dev_rxip_add6_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct qeth_card *card = dev_get_drvdata(dev); - - return qeth_l3_dev_rxip_add_store(buf, count, card, QETH_PROT_IPV6); + return qeth_l3_rxip_store(dev, buf, true, count, QETH_PROT_IPV6); } static QETH_DEVICE_ATTR(rxip_add6, add6, 0644, @@ -858,9 +806,7 @@ static QETH_DEVICE_ATTR(rxip_add6, add6, 0644, static ssize_t qeth_l3_dev_rxip_del6_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct qeth_card *card = dev_get_drvdata(dev); - - return qeth_l3_dev_rxip_del_store(buf, count, card, QETH_PROT_IPV6); + return qeth_l3_rxip_store(dev, buf, false, count, QETH_PROT_IPV6); } static QETH_DEVICE_ATTR(rxip_del6, del6, 0200, NULL, diff --git a/drivers/staging/ks7010/ks_wlan_net.c b/drivers/staging/ks7010/ks_wlan_net.c index 3cffc8be6656..211dd4a11cac 100644 --- a/drivers/staging/ks7010/ks_wlan_net.c +++ b/drivers/staging/ks7010/ks_wlan_net.c @@ -45,7 +45,7 @@ struct wep_key { * function prototypes */ static int ks_wlan_open(struct net_device *dev); -static void ks_wlan_tx_timeout(struct net_device *dev); +static void ks_wlan_tx_timeout(struct net_device *dev, unsigned int txqueue); static int ks_wlan_start_xmit(struct sk_buff *skb, struct net_device *dev); static int ks_wlan_close(struct net_device *dev); static void ks_wlan_set_rx_mode(struct net_device *dev); @@ -2498,7 +2498,7 @@ int ks_wlan_set_mac_address(struct net_device *dev, void *addr) } static -void ks_wlan_tx_timeout(struct net_device *dev) +void ks_wlan_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct ks_wlan_private *priv = netdev_priv(dev); diff --git a/drivers/staging/qlge/qlge_main.c b/drivers/staging/qlge/qlge_main.c index 6ad4515311f7..24d20f000435 100644 --- a/drivers/staging/qlge/qlge_main.c +++ b/drivers/staging/qlge/qlge_main.c @@ -4274,7 +4274,7 @@ static int qlge_set_mac_address(struct net_device *ndev, void *p) return status; } -static void qlge_tx_timeout(struct net_device *ndev) +static void qlge_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct ql_adapter *qdev = netdev_priv(ndev); ql_queue_asic_error(qdev); diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c index dace81a7d1ba..a51d627284d1 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c @@ -267,7 +267,7 @@ static short _rtl92e_check_nic_enough_desc(struct net_device *dev, int prio) return 0; } -static void _rtl92e_tx_timeout(struct net_device *dev) +static void _rtl92e_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct r8192_priv *priv = rtllib_priv(dev); diff --git a/drivers/staging/rtl8192u/r8192U_core.c b/drivers/staging/rtl8192u/r8192U_core.c index 7e2cabd16e88..482382a887f8 100644 --- a/drivers/staging/rtl8192u/r8192U_core.c +++ b/drivers/staging/rtl8192u/r8192U_core.c @@ -640,7 +640,7 @@ short check_nic_enough_desc(struct net_device *dev, int queue_index) return (used < MAX_TX_URB); } -static void tx_timeout(struct net_device *dev) +static void tx_timeout(struct net_device *dev, unsigned int txqueue) { struct r8192_priv *priv = ieee80211_priv(dev); diff --git a/drivers/staging/unisys/visornic/visornic_main.c b/drivers/staging/unisys/visornic/visornic_main.c index 1d1440d43002..0433536930a9 100644 --- a/drivers/staging/unisys/visornic/visornic_main.c +++ b/drivers/staging/unisys/visornic/visornic_main.c @@ -1078,7 +1078,7 @@ out_save_flags: * Queue the work and return. Make sure we have not already been informed that * the IO Partition is gone; if so, we will have already timed-out the xmits. */ -static void visornic_xmit_timeout(struct net_device *netdev) +static void visornic_xmit_timeout(struct net_device *netdev, unsigned int txqueue) { struct visornic_devdata *devdata = netdev_priv(netdev); unsigned long flags; diff --git a/drivers/staging/wlan-ng/p80211netdev.c b/drivers/staging/wlan-ng/p80211netdev.c index a70fb84f38f1..b809c0015c0c 100644 --- a/drivers/staging/wlan-ng/p80211netdev.c +++ b/drivers/staging/wlan-ng/p80211netdev.c @@ -101,7 +101,7 @@ static void p80211knetdev_set_multicast_list(struct net_device *dev); static int p80211knetdev_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); static int p80211knetdev_set_mac_address(struct net_device *dev, void *addr); -static void p80211knetdev_tx_timeout(struct net_device *netdev); +static void p80211knetdev_tx_timeout(struct net_device *netdev, unsigned int txqueue); static int p80211_rx_typedrop(struct wlandevice *wlandev, u16 fc); int wlan_watchdog = 5000; @@ -1074,7 +1074,7 @@ static int p80211_rx_typedrop(struct wlandevice *wlandev, u16 fc) return drop; } -static void p80211knetdev_tx_timeout(struct net_device *netdev) +static void p80211knetdev_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct wlandevice *wlandev = netdev->ml_priv; diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 36a3eb4ad4c5..f1c90fa2978e 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -2704,7 +2704,7 @@ static netdev_tx_t gsm_mux_net_start_xmit(struct sk_buff *skb, } /* called when a packet did not ack after watchdogtimeout */ -static void gsm_mux_net_tx_timeout(struct net_device *net) +static void gsm_mux_net_tx_timeout(struct net_device *net, unsigned int txqueue) { /* Tell syslog we are hosed. */ dev_dbg(&net->dev, "Tx timed out.\n"); diff --git a/drivers/tty/synclink.c b/drivers/tty/synclink.c index 84f26e43b229..61dc6b4a43d0 100644 --- a/drivers/tty/synclink.c +++ b/drivers/tty/synclink.c @@ -7837,7 +7837,7 @@ static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) * * dev pointer to network device structure */ -static void hdlcdev_tx_timeout(struct net_device *dev) +static void hdlcdev_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct mgsl_struct *info = dev_to_port(dev); unsigned long flags; diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c index e8a9047de451..5d59e2369c8a 100644 --- a/drivers/tty/synclink_gt.c +++ b/drivers/tty/synclink_gt.c @@ -1682,7 +1682,7 @@ static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) * * dev pointer to network device structure */ -static void hdlcdev_tx_timeout(struct net_device *dev) +static void hdlcdev_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct slgt_info *info = dev_to_port(dev); unsigned long flags; diff --git a/drivers/tty/synclinkmp.c b/drivers/tty/synclinkmp.c index fcb91bf7a15b..33181fa6eb18 100644 --- a/drivers/tty/synclinkmp.c +++ b/drivers/tty/synclinkmp.c @@ -1807,7 +1807,7 @@ static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) * * dev pointer to network device structure */ -static void hdlcdev_tx_timeout(struct net_device *dev) +static void hdlcdev_tx_timeout(struct net_device *dev, unsigned int txqueue) { SLMP_INFO *info = dev_to_port(dev); unsigned long flags; diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h index 992bf9fa1729..b0b743563f43 100644 --- a/include/linux/fsl/ptp_qoriq.h +++ b/include/linux/fsl/ptp_qoriq.h @@ -192,6 +192,7 @@ int ptp_qoriq_settime(struct ptp_clock_info *ptp, const struct timespec64 *ts); int ptp_qoriq_enable(struct ptp_clock_info *ptp, struct ptp_clock_request *rq, int on); +int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index, bool update_event); #ifdef CONFIG_DEBUG_FS void ptp_qoriq_create_debugfs(struct ptp_qoriq *ptp_qoriq); void ptp_qoriq_remove_debugfs(struct ptp_qoriq *ptp_qoriq); diff --git a/include/linux/net.h b/include/linux/net.h index 9cafb5f353a9..6451425e828f 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -171,6 +171,7 @@ struct proto_ops { int (*compat_getsockopt)(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); #endif + void (*show_fdinfo)(struct seq_file *m, struct socket *sock); int (*sendmsg) (struct socket *sock, struct msghdr *m, size_t total_len); /* Notes for implementing recvmsg: diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9ef20389622d..7a8ed11f5d45 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -849,6 +849,7 @@ enum tc_setup_type { TC_SETUP_QDISC_GRED, TC_SETUP_QDISC_TAPRIO, TC_SETUP_FT, + TC_SETUP_QDISC_ETS, }; /* These structures hold the attributes of bpf state that are being passed @@ -1014,7 +1015,7 @@ int netdev_name_node_alt_destroy(struct net_device *dev, const char *name); * Called when a user wants to change the Maximum Transfer Unit * of a device. * - * void (*ndo_tx_timeout)(struct net_device *dev); + * void (*ndo_tx_timeout)(struct net_device *dev, unsigned int txqueue); * Callback used when the transmitter has not made any progress * for dev->watchdog ticks. * @@ -1281,7 +1282,8 @@ struct net_device_ops { int new_mtu); int (*ndo_neigh_setup)(struct net_device *dev, struct neigh_parms *); - void (*ndo_tx_timeout) (struct net_device *dev); + void (*ndo_tx_timeout) (struct net_device *dev, + unsigned int txqueue); void (*ndo_get_stats64)(struct net_device *dev, struct rtnl_link_stats64 *storage); diff --git a/include/linux/phy.h b/include/linux/phy.h index 5032d453ac66..b2105e0d72d3 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1094,11 +1094,13 @@ void phy_attached_info(struct phy_device *phydev); int genphy_read_abilities(struct phy_device *phydev); int genphy_setup_forced(struct phy_device *phydev); int genphy_restart_aneg(struct phy_device *phydev); +int genphy_check_and_restart_aneg(struct phy_device *phydev, bool restart); int genphy_config_eee_advert(struct phy_device *phydev); int __genphy_config_aneg(struct phy_device *phydev, bool changed); int genphy_aneg_done(struct phy_device *phydev); int genphy_update_link(struct phy_device *phydev); int genphy_read_lpa(struct phy_device *phydev); +int genphy_read_status_fixed(struct phy_device *phydev); int genphy_read_status(struct phy_device *phydev); int genphy_suspend(struct phy_device *phydev); int genphy_resume(struct phy_device *phydev); diff --git a/include/linux/sfp.h b/include/linux/sfp.h index 487fd9412d10..38893e4dd0f0 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -275,6 +275,61 @@ struct sfp_diag { __be16 cal_v_offset; } __packed; +/* SFF8024 defined constants */ +enum { + SFF8024_ID_UNK = 0x00, + SFF8024_ID_SFF_8472 = 0x02, + SFF8024_ID_SFP = 0x03, + SFF8024_ID_DWDM_SFP = 0x0b, + SFF8024_ID_QSFP_8438 = 0x0c, + SFF8024_ID_QSFP_8436_8636 = 0x0d, + SFF8024_ID_QSFP28_8636 = 0x11, + + SFF8024_ENCODING_UNSPEC = 0x00, + SFF8024_ENCODING_8B10B = 0x01, + SFF8024_ENCODING_4B5B = 0x02, + SFF8024_ENCODING_NRZ = 0x03, + SFF8024_ENCODING_8472_MANCHESTER= 0x04, + SFF8024_ENCODING_8472_SONET = 0x05, + SFF8024_ENCODING_8472_64B66B = 0x06, + SFF8024_ENCODING_8436_MANCHESTER= 0x06, + SFF8024_ENCODING_8436_SONET = 0x04, + SFF8024_ENCODING_8436_64B66B = 0x05, + SFF8024_ENCODING_256B257B = 0x07, + SFF8024_ENCODING_PAM4 = 0x08, + + SFF8024_CONNECTOR_UNSPEC = 0x00, + /* codes 01-05 not supportable on SFP, but some modules have single SC */ + SFF8024_CONNECTOR_SC = 0x01, + SFF8024_CONNECTOR_FIBERJACK = 0x06, + SFF8024_CONNECTOR_LC = 0x07, + SFF8024_CONNECTOR_MT_RJ = 0x08, + SFF8024_CONNECTOR_MU = 0x09, + SFF8024_CONNECTOR_SG = 0x0a, + SFF8024_CONNECTOR_OPTICAL_PIGTAIL= 0x0b, + SFF8024_CONNECTOR_MPO_1X12 = 0x0c, + SFF8024_CONNECTOR_MPO_2X16 = 0x0d, + SFF8024_CONNECTOR_HSSDC_II = 0x20, + SFF8024_CONNECTOR_COPPER_PIGTAIL= 0x21, + SFF8024_CONNECTOR_RJ45 = 0x22, + SFF8024_CONNECTOR_NOSEPARATE = 0x23, + SFF8024_CONNECTOR_MXC_2X16 = 0x24, + + SFF8024_ECC_UNSPEC = 0x00, + SFF8024_ECC_100G_25GAUI_C2M_AOC = 0x01, + SFF8024_ECC_100GBASE_SR4_25GBASE_SR = 0x02, + SFF8024_ECC_100GBASE_LR4_25GBASE_LR = 0x03, + SFF8024_ECC_100GBASE_ER4_25GBASE_ER = 0x04, + SFF8024_ECC_100GBASE_SR10 = 0x05, + SFF8024_ECC_100GBASE_CR4 = 0x0b, + SFF8024_ECC_25GBASE_CR_S = 0x0c, + SFF8024_ECC_25GBASE_CR_N = 0x0d, + SFF8024_ECC_10GBASE_T_SFI = 0x16, + SFF8024_ECC_10GBASE_T_SR = 0x1c, + SFF8024_ECC_5GBASE_T = 0x1d, + SFF8024_ECC_2_5GBASE_T = 0x1e, +}; + /* SFP EEPROM registers */ enum { SFP_PHYS_ID = 0x00, @@ -309,34 +364,7 @@ enum { SFP_SFF8472_COMPLIANCE = 0x5e, SFP_CC_EXT = 0x5f, - SFP_PHYS_ID_SFF = 0x02, - SFP_PHYS_ID_SFP = 0x03, SFP_PHYS_EXT_ID_SFP = 0x04, - SFP_CONNECTOR_UNSPEC = 0x00, - /* codes 01-05 not supportable on SFP, but some modules have single SC */ - SFP_CONNECTOR_SC = 0x01, - SFP_CONNECTOR_FIBERJACK = 0x06, - SFP_CONNECTOR_LC = 0x07, - SFP_CONNECTOR_MT_RJ = 0x08, - SFP_CONNECTOR_MU = 0x09, - SFP_CONNECTOR_SG = 0x0a, - SFP_CONNECTOR_OPTICAL_PIGTAIL = 0x0b, - SFP_CONNECTOR_MPO_1X12 = 0x0c, - SFP_CONNECTOR_MPO_2X16 = 0x0d, - SFP_CONNECTOR_HSSDC_II = 0x20, - SFP_CONNECTOR_COPPER_PIGTAIL = 0x21, - SFP_CONNECTOR_RJ45 = 0x22, - SFP_CONNECTOR_NOSEPARATE = 0x23, - SFP_CONNECTOR_MXC_2X16 = 0x24, - SFP_ENCODING_UNSPEC = 0x00, - SFP_ENCODING_8B10B = 0x01, - SFP_ENCODING_4B5B = 0x02, - SFP_ENCODING_NRZ = 0x03, - SFP_ENCODING_8472_MANCHESTER = 0x04, - SFP_ENCODING_8472_SONET = 0x05, - SFP_ENCODING_8472_64B66B = 0x06, - SFP_ENCODING_256B257B = 0x07, - SFP_ENCODING_PAM4 = 0x08, SFP_OPTIONS_HIGH_POWER_LEVEL = BIT(13), SFP_OPTIONS_PAGING_A2 = BIT(12), SFP_OPTIONS_RETIMER = BIT(11), @@ -479,6 +507,8 @@ struct sfp_bus; * @module_insert: called after a module has been detected to determine * whether the module is supported for the upstream device. * @module_remove: called after the module has been removed. + * @module_start: called after the PHY probe step + * @module_stop: called before the PHY is removed * @link_down: called when the link is non-operational for whatever * reason. * @link_up: called when the link is operational. @@ -492,6 +522,8 @@ struct sfp_upstream_ops { void (*detach)(void *priv, struct sfp_bus *bus); int (*module_insert)(void *priv, const struct sfp_eeprom_id *id); void (*module_remove)(void *priv); + int (*module_start)(void *priv); + void (*module_stop)(void *priv); void (*link_down)(void *priv); void (*link_up)(void *priv); int (*connect_phy)(void *priv, struct phy_device *); @@ -501,10 +533,10 @@ struct sfp_upstream_ops { #if IS_ENABLED(CONFIG_SFP) int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id, unsigned long *support); +bool sfp_may_have_phy(struct sfp_bus *bus, const struct sfp_eeprom_id *id); void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, unsigned long *support); phy_interface_t sfp_select_interface(struct sfp_bus *bus, - const struct sfp_eeprom_id *id, unsigned long *link_modes); int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo); @@ -525,6 +557,12 @@ static inline int sfp_parse_port(struct sfp_bus *bus, return PORT_OTHER; } +static inline bool sfp_may_have_phy(struct sfp_bus *bus, + const struct sfp_eeprom_id *id) +{ + return false; +} + static inline void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, unsigned long *support) @@ -532,7 +570,6 @@ static inline void sfp_parse_support(struct sfp_bus *bus, } static inline phy_interface_t sfp_select_interface(struct sfp_bus *bus, - const struct sfp_eeprom_id *id, unsigned long *link_modes) { return PHY_INTERFACE_MODE_NA; diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index d4bcd9387136..0531afa9b21e 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -109,6 +109,18 @@ struct stmmac_axi { bool axi_rb; }; +#define EST_GCL 1024 +struct stmmac_est { + int enable; + u32 btr_offset[2]; + u32 btr[2]; + u32 ctr[2]; + u32 ter; + u32 gcl_unaligned[EST_GCL]; + u32 gcl[EST_GCL]; + u32 gcl_size; +}; + struct stmmac_rxq_cfg { u8 mode_to_use; u32 chan; @@ -139,6 +151,7 @@ struct plat_stmmacenet_data { struct device_node *phylink_node; struct device_node *mdio_node; struct stmmac_dma_cfg *dma_cfg; + struct stmmac_est *est; int clk_csr; int has_gmac; int enh_desc; diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index d8860f2d0976..b0bff3083278 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -253,7 +253,7 @@ extern int usbnet_open(struct net_device *net); extern int usbnet_stop(struct net_device *net); extern netdev_tx_t usbnet_start_xmit(struct sk_buff *skb, struct net_device *net); -extern void usbnet_tx_timeout(struct net_device *net); +extern void usbnet_tx_timeout(struct net_device *net, unsigned int txqueue); extern int usbnet_change_mtu(struct net_device *net, int new_mtu); extern int usbnet_get_endpoints(struct usbnet *, struct usb_interface *); diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 1bab88184d3c..a088349dd94f 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -437,7 +437,7 @@ static inline void addrconf_addr_solict_mult(const struct in6_addr *addr, static inline bool ipv6_addr_is_ll_all_nodes(const struct in6_addr *addr) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 - __be64 *p = (__be64 *)addr; + __be64 *p = (__force __be64 *)addr; return ((p[0] ^ cpu_to_be64(0xff02000000000000UL)) | (p[1] ^ cpu_to_be64(1))) == 0UL; #else return ((addr->s6_addr32[0] ^ htonl(0xff020000)) | @@ -449,7 +449,7 @@ static inline bool ipv6_addr_is_ll_all_nodes(const struct in6_addr *addr) static inline bool ipv6_addr_is_ll_all_routers(const struct in6_addr *addr) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 - __be64 *p = (__be64 *)addr; + __be64 *p = (__force __be64 *)addr; return ((p[0] ^ cpu_to_be64(0xff02000000000000UL)) | (p[1] ^ cpu_to_be64(2))) == 0UL; #else return ((addr->s6_addr32[0] ^ htonl(0xff020000)) | @@ -466,7 +466,7 @@ static inline bool ipv6_addr_is_isatap(const struct in6_addr *addr) static inline bool ipv6_addr_is_solict_mult(const struct in6_addr *addr) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 - __be64 *p = (__be64 *)addr; + __be64 *p = (__force __be64 *)addr; return ((p[0] ^ cpu_to_be64(0xff02000000000000UL)) | ((p[1] ^ cpu_to_be64(0x00000001ff000000UL)) & cpu_to_be64(0xffffffffff000000UL))) == 0UL; @@ -481,7 +481,7 @@ static inline bool ipv6_addr_is_solict_mult(const struct in6_addr *addr) static inline bool ipv6_addr_is_all_snoopers(const struct in6_addr *addr) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 - __be64 *p = (__be64 *)addr; + __be64 *p = (__force __be64 *)addr; return ((p[0] ^ cpu_to_be64(0xff02000000000000UL)) | (p[1] ^ cpu_to_be64(0x6a))) == 0UL; diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 3426d6dacc45..17e10fba2152 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -41,6 +41,10 @@ struct unix_skb_parms { u32 consumed; } __randomize_layout; +struct scm_stat { + u32 nr_fds; +}; + #define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb)) #define unix_state_lock(s) spin_lock(&unix_sk(s)->lock) @@ -65,6 +69,7 @@ struct unix_sock { #define UNIX_GC_MAYBE_CYCLE 1 struct socket_wq peer_wq; wait_queue_entry_t peer_wake; + struct scm_stat scm_stat; }; static inline struct unix_sock *unix_sk(const struct sock *sk) diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index 4206dc6d813f..b1c717286993 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -98,6 +98,8 @@ struct vsock_transport_send_notify_data { #define VSOCK_TRANSPORT_F_G2H 0x00000002 /* Transport provides DGRAM communication */ #define VSOCK_TRANSPORT_F_DGRAM 0x00000004 +/* Transport provides local (loopback) communication */ +#define VSOCK_TRANSPORT_F_LOCAL 0x00000008 struct vsock_transport { struct module *module; diff --git a/include/net/dsa.h b/include/net/dsa.h index 6767dc3f66c0..da5578db228e 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -43,6 +43,7 @@ struct phylink_link_state; #define DSA_TAG_PROTO_SJA1105_VALUE 13 #define DSA_TAG_PROTO_KSZ8795_VALUE 14 #define DSA_TAG_PROTO_OCELOT_VALUE 15 +#define DSA_TAG_PROTO_AR9331_VALUE 16 enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE, @@ -61,6 +62,7 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_SJA1105 = DSA_TAG_PROTO_SJA1105_VALUE, DSA_TAG_PROTO_KSZ8795 = DSA_TAG_PROTO_KSZ8795_VALUE, DSA_TAG_PROTO_OCELOT = DSA_TAG_PROTO_OCELOT_VALUE, + DSA_TAG_PROTO_AR9331 = DSA_TAG_PROTO_AR9331_VALUE, }; struct packet_type; diff --git a/include/net/dsfield.h b/include/net/dsfield.h index 1a245ee10c95..a59a57ffc546 100644 --- a/include/net/dsfield.h +++ b/include/net/dsfield.h @@ -21,7 +21,7 @@ static inline __u8 ipv4_get_dsfield(const struct iphdr *iph) static inline __u8 ipv6_get_dsfield(const struct ipv6hdr *ipv6h) { - return ntohs(*(const __be16 *)ipv6h) >> 4; + return ntohs(*(__force const __be16 *)ipv6h) >> 4; } diff --git a/include/net/netlink.h b/include/net/netlink.h index b140c8f1be22..56c365dc6dc7 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -1735,7 +1735,7 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) } /** - * nla_validate_nested - Validate a stream of nested attributes + * __nla_validate_nested - Validate a stream of nested attributes * @start: container attribute * @maxtype: maximum attribute type to be expected * @policy: validation policy @@ -1758,9 +1758,9 @@ static inline int __nla_validate_nested(const struct nlattr *start, int maxtype, } static inline int -nl80211_validate_nested(const struct nlattr *start, int maxtype, - const struct nla_policy *policy, - struct netlink_ext_ack *extack) +nla_validate_nested(const struct nlattr *start, int maxtype, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) { return __nla_validate_nested(start, maxtype, policy, NL_VALIDATE_STRICT, extack); diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index c0c0791b1912..08b98414d94e 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -154,6 +154,7 @@ struct netns_ipv4 { int sysctl_tcp_adv_win_scale; int sysctl_tcp_frto; int sysctl_tcp_nometrics_save; + int sysctl_tcp_no_ssthresh_metrics_save; int sysctl_tcp_moderate_rcvbuf; int sysctl_tcp_tso_win_divisor; int sysctl_tcp_workaround_signed_windows; diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index e553fc80eb23..47b115e2012a 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -791,9 +791,8 @@ enum tc_prio_command { struct tc_prio_qopt_offload_params { int bands; u8 priomap[TC_PRIO_MAX + 1]; - /* In case that a prio qdisc is offloaded and now is changed to a - * non-offloadedable config, it needs to update the backlog & qlen - * values to negate the HW backlog & qlen values (and only them). + /* At the point of un-offloading the Qdisc, the reported backlog and + * qlen need to be reduced by the portion that is in HW. */ struct gnet_stats_queue *qstats; }; @@ -824,4 +823,35 @@ struct tc_root_qopt_offload { bool ingress; }; +enum tc_ets_command { + TC_ETS_REPLACE, + TC_ETS_DESTROY, + TC_ETS_STATS, + TC_ETS_GRAFT, +}; + +struct tc_ets_qopt_offload_replace_params { + unsigned int bands; + u8 priomap[TC_PRIO_MAX + 1]; + unsigned int quanta[TCQ_ETS_MAX_BANDS]; /* 0 for strict bands. */ + unsigned int weights[TCQ_ETS_MAX_BANDS]; + struct gnet_stats_queue *qstats; +}; + +struct tc_ets_qopt_offload_graft_params { + u8 band; + u32 child_handle; +}; + +struct tc_ets_qopt_offload { + enum tc_ets_command command; + u32 handle; + u32 parent; + union { + struct tc_ets_qopt_offload_replace_params replace_params; + struct tc_qopt_offload_stats stats; + struct tc_ets_qopt_offload_graft_params graft_params; + }; +}; + #endif diff --git a/include/net/tls.h b/include/net/tls.h index df630f5fc723..bf9eb4823933 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -641,6 +641,7 @@ int tls_sw_fallback_init(struct sock *sk, #ifdef CONFIG_TLS_DEVICE void tls_device_init(void); void tls_device_cleanup(void); +void tls_device_sk_destruct(struct sock *sk); int tls_set_device_offload(struct sock *sk, struct tls_context *ctx); void tls_device_free_resources_tx(struct sock *sk); int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx); @@ -649,6 +650,14 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq); void tls_offload_tx_resync_request(struct sock *sk, u32 got_seq, u32 exp_seq); int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx, struct sk_buff *skb, struct strp_msg *rxm); + +static inline bool tls_is_sk_rx_device_offloaded(struct sock *sk) +{ + if (!sk_fullsock(sk) || + smp_load_acquire(&sk->sk_destruct) != tls_device_sk_destruct) + return false; + return tls_get_ctx(sk)->rx_conf == TLS_HW; +} #else static inline void tls_device_init(void) {} static inline void tls_device_cleanup(void) {} diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index d4591792f0b4..f44155840b07 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -593,6 +593,7 @@ struct ethtool_pauseparam { * @ETH_SS_RSS_HASH_FUNCS: RSS hush function names * @ETH_SS_PHY_STATS: Statistic names, for use with %ETHTOOL_GPHYSTATS * @ETH_SS_PHY_TUNABLES: PHY tunable names + * @ETH_SS_LINK_MODES: link mode names */ enum ethtool_stringset { ETH_SS_TEST = 0, @@ -604,6 +605,7 @@ enum ethtool_stringset { ETH_SS_TUNABLES, ETH_SS_PHY_STATS, ETH_SS_PHY_TUNABLES, + ETH_SS_LINK_MODES, }; /** diff --git a/include/uapi/linux/if_bonding.h b/include/uapi/linux/if_bonding.h index 790585f0e61b..6829213a54c5 100644 --- a/include/uapi/linux/if_bonding.h +++ b/include/uapi/linux/if_bonding.h @@ -95,6 +95,16 @@ #define BOND_XMIT_POLICY_ENCAP23 3 /* encapsulated layer 2+3 */ #define BOND_XMIT_POLICY_ENCAP34 4 /* encapsulated layer 3+4 */ +/* 802.3ad port state definitions (43.4.2.2 in the 802.3ad standard) */ +#define AD_STATE_LACP_ACTIVITY 0x1 +#define AD_STATE_LACP_TIMEOUT 0x2 +#define AD_STATE_AGGREGATION 0x4 +#define AD_STATE_SYNCHRONIZATION 0x8 +#define AD_STATE_COLLECTING 0x10 +#define AD_STATE_DISTRIBUTING 0x20 +#define AD_STATE_DEFAULTED 0x40 +#define AD_STATE_EXPIRED 0x80 + typedef struct ifbond { __s32 bond_mode; __s32 num_slaves; diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 1b3c2b643a02..4a58e3d7de46 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -156,6 +156,15 @@ struct bridge_vlan_xstats { __u32 pad2; }; +struct bridge_stp_xstats { + __u64 transition_blk; + __u64 transition_fwd; + __u64 rx_bpdu; + __u64 tx_bpdu; + __u64 rx_tcn; + __u64 tx_tcn; +}; + /* Bridge multicast database attributes * [MDBA_MDB] = { * [MDBA_MDB_ENTRY] = { @@ -262,6 +271,7 @@ enum { BRIDGE_XSTATS_VLAN, BRIDGE_XSTATS_MCAST, BRIDGE_XSTATS_PAD, + BRIDGE_XSTATS_STP, __BRIDGE_XSTATS_MAX }; #define BRIDGE_XSTATS_MAX (__BRIDGE_XSTATS_MAX - 1) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 8aec8769d944..1d69f637c5d6 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -169,6 +169,7 @@ enum { IFLA_MAX_MTU, IFLA_PROP_LIST, IFLA_ALT_IFNAME, /* Alternative ifname */ + IFLA_PERM_ADDRESS, __IFLA_MAX }; diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 9f1a72876212..bf5a5b1dfb0b 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -1187,4 +1187,21 @@ enum { #define TCA_TAPRIO_ATTR_MAX (__TCA_TAPRIO_ATTR_MAX - 1) +/* ETS */ + +#define TCQ_ETS_MAX_BANDS 16 + +enum { + TCA_ETS_UNSPEC, + TCA_ETS_NBANDS, /* u8 */ + TCA_ETS_NSTRICT, /* u8 */ + TCA_ETS_QUANTA, /* nested TCA_ETS_QUANTA_BAND */ + TCA_ETS_QUANTA_BAND, /* u32 */ + TCA_ETS_PRIOMAP, /* nested TCA_ETS_PRIOMAP_BAND */ + TCA_ETS_PRIOMAP_BAND, /* u8 */ + __TCA_ETS_MAX, +}; + +#define TCA_ETS_MAX (__TCA_ETS_MAX - 1) + #endif diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index 6c2194ab745b..dc0d23a50e69 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -65,6 +65,7 @@ enum { TIPC_NL_UDP_GET_REMOTEIP, TIPC_NL_KEY_SET, TIPC_NL_KEY_FLUSH, + TIPC_NL_ADDR_LEGACY_GET, __TIPC_NL_CMD_MAX, TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 @@ -176,6 +177,7 @@ enum { TIPC_NLA_NET_ADDR, /* u32 */ TIPC_NLA_NET_NODEID, /* u64 */ TIPC_NLA_NET_NODEID_W1, /* u64 */ + TIPC_NLA_NET_ADDR_LEGACY, /* flag */ __TIPC_NLA_NET_MAX, TIPC_NLA_NET_MAX = __TIPC_NLA_NET_MAX - 1 diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h index 68d57c5e99bc..fd0ed7221645 100644 --- a/include/uapi/linux/vm_sockets.h +++ b/include/uapi/linux/vm_sockets.h @@ -99,11 +99,13 @@ #define VMADDR_CID_HYPERVISOR 0 -/* This CID is specific to VMCI and can be considered reserved (even VMCI - * doesn't use it anymore, it's a legacy value from an older release). +/* Use this as the destination CID in an address when referring to the + * local communication (loopback). + * (This was VMADDR_CID_RESERVED, but even VMCI doesn't use it anymore, + * it was a legacy value from an older release). */ -#define VMADDR_CID_RESERVED 1 +#define VMADDR_CID_LOCAL 1 /* Use this as the destination CID in an address when referring to the host * (any process other than the hypervisor). VMCI relies on it being 2, but diff --git a/include/uapi/linux/wireguard.h b/include/uapi/linux/wireguard.h new file mode 100644 index 000000000000..ae88be14c947 --- /dev/null +++ b/include/uapi/linux/wireguard.h @@ -0,0 +1,196 @@ +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + * + * Documentation + * ============= + * + * The below enums and macros are for interfacing with WireGuard, using generic + * netlink, with family WG_GENL_NAME and version WG_GENL_VERSION. It defines two + * methods: get and set. Note that while they share many common attributes, + * these two functions actually accept a slightly different set of inputs and + * outputs. + * + * WG_CMD_GET_DEVICE + * ----------------- + * + * May only be called via NLM_F_REQUEST | NLM_F_DUMP. The command should contain + * one but not both of: + * + * WGDEVICE_A_IFINDEX: NLA_U32 + * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1 + * + * The kernel will then return several messages (NLM_F_MULTI) containing the + * following tree of nested items: + * + * WGDEVICE_A_IFINDEX: NLA_U32 + * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1 + * WGDEVICE_A_PRIVATE_KEY: NLA_EXACT_LEN, len WG_KEY_LEN + * WGDEVICE_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN + * WGDEVICE_A_LISTEN_PORT: NLA_U16 + * WGDEVICE_A_FWMARK: NLA_U32 + * WGDEVICE_A_PEERS: NLA_NESTED + * 0: NLA_NESTED + * WGPEER_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN + * WGPEER_A_PRESHARED_KEY: NLA_EXACT_LEN, len WG_KEY_LEN + * WGPEER_A_ENDPOINT: NLA_MIN_LEN(struct sockaddr), struct sockaddr_in or struct sockaddr_in6 + * WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16 + * WGPEER_A_LAST_HANDSHAKE_TIME: NLA_EXACT_LEN, struct __kernel_timespec + * WGPEER_A_RX_BYTES: NLA_U64 + * WGPEER_A_TX_BYTES: NLA_U64 + * WGPEER_A_ALLOWEDIPS: NLA_NESTED + * 0: NLA_NESTED + * WGALLOWEDIP_A_FAMILY: NLA_U16 + * WGALLOWEDIP_A_IPADDR: NLA_MIN_LEN(struct in_addr), struct in_addr or struct in6_addr + * WGALLOWEDIP_A_CIDR_MASK: NLA_U8 + * 0: NLA_NESTED + * ... + * 0: NLA_NESTED + * ... + * ... + * WGPEER_A_PROTOCOL_VERSION: NLA_U32 + * 0: NLA_NESTED + * ... + * ... + * + * It is possible that all of the allowed IPs of a single peer will not + * fit within a single netlink message. In that case, the same peer will + * be written in the following message, except it will only contain + * WGPEER_A_PUBLIC_KEY and WGPEER_A_ALLOWEDIPS. This may occur several + * times in a row for the same peer. It is then up to the receiver to + * coalesce adjacent peers. Likewise, it is possible that all peers will + * not fit within a single message. So, subsequent peers will be sent + * in following messages, except those will only contain WGDEVICE_A_IFNAME + * and WGDEVICE_A_PEERS. It is then up to the receiver to coalesce these + * messages to form the complete list of peers. + * + * Since this is an NLA_F_DUMP command, the final message will always be + * NLMSG_DONE, even if an error occurs. However, this NLMSG_DONE message + * contains an integer error code. It is either zero or a negative error + * code corresponding to the errno. + * + * WG_CMD_SET_DEVICE + * ----------------- + * + * May only be called via NLM_F_REQUEST. The command should contain the + * following tree of nested items, containing one but not both of + * WGDEVICE_A_IFINDEX and WGDEVICE_A_IFNAME: + * + * WGDEVICE_A_IFINDEX: NLA_U32 + * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1 + * WGDEVICE_A_FLAGS: NLA_U32, 0 or WGDEVICE_F_REPLACE_PEERS if all current + * peers should be removed prior to adding the list below. + * WGDEVICE_A_PRIVATE_KEY: len WG_KEY_LEN, all zeros to remove + * WGDEVICE_A_LISTEN_PORT: NLA_U16, 0 to choose randomly + * WGDEVICE_A_FWMARK: NLA_U32, 0 to disable + * WGDEVICE_A_PEERS: NLA_NESTED + * 0: NLA_NESTED + * WGPEER_A_PUBLIC_KEY: len WG_KEY_LEN + * WGPEER_A_FLAGS: NLA_U32, 0 and/or WGPEER_F_REMOVE_ME if the + * specified peer should not exist at the end of the + * operation, rather than added/updated and/or + * WGPEER_F_REPLACE_ALLOWEDIPS if all current allowed + * IPs of this peer should be removed prior to adding + * the list below and/or WGPEER_F_UPDATE_ONLY if the + * peer should only be set if it already exists. + * WGPEER_A_PRESHARED_KEY: len WG_KEY_LEN, all zeros to remove + * WGPEER_A_ENDPOINT: struct sockaddr_in or struct sockaddr_in6 + * WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16, 0 to disable + * WGPEER_A_ALLOWEDIPS: NLA_NESTED + * 0: NLA_NESTED + * WGALLOWEDIP_A_FAMILY: NLA_U16 + * WGALLOWEDIP_A_IPADDR: struct in_addr or struct in6_addr + * WGALLOWEDIP_A_CIDR_MASK: NLA_U8 + * 0: NLA_NESTED + * ... + * 0: NLA_NESTED + * ... + * ... + * WGPEER_A_PROTOCOL_VERSION: NLA_U32, should not be set or used at + * all by most users of this API, as the + * most recent protocol will be used when + * this is unset. Otherwise, must be set + * to 1. + * 0: NLA_NESTED + * ... + * ... + * + * It is possible that the amount of configuration data exceeds that of + * the maximum message length accepted by the kernel. In that case, several + * messages should be sent one after another, with each successive one + * filling in information not contained in the prior. Note that if + * WGDEVICE_F_REPLACE_PEERS is specified in the first message, it probably + * should not be specified in fragments that come after, so that the list + * of peers is only cleared the first time but appended after. Likewise for + * peers, if WGPEER_F_REPLACE_ALLOWEDIPS is specified in the first message + * of a peer, it likely should not be specified in subsequent fragments. + * + * If an error occurs, NLMSG_ERROR will reply containing an errno. + */ + +#ifndef _WG_UAPI_WIREGUARD_H +#define _WG_UAPI_WIREGUARD_H + +#define WG_GENL_NAME "wireguard" +#define WG_GENL_VERSION 1 + +#define WG_KEY_LEN 32 + +enum wg_cmd { + WG_CMD_GET_DEVICE, + WG_CMD_SET_DEVICE, + __WG_CMD_MAX +}; +#define WG_CMD_MAX (__WG_CMD_MAX - 1) + +enum wgdevice_flag { + WGDEVICE_F_REPLACE_PEERS = 1U << 0, + __WGDEVICE_F_ALL = WGDEVICE_F_REPLACE_PEERS +}; +enum wgdevice_attribute { + WGDEVICE_A_UNSPEC, + WGDEVICE_A_IFINDEX, + WGDEVICE_A_IFNAME, + WGDEVICE_A_PRIVATE_KEY, + WGDEVICE_A_PUBLIC_KEY, + WGDEVICE_A_FLAGS, + WGDEVICE_A_LISTEN_PORT, + WGDEVICE_A_FWMARK, + WGDEVICE_A_PEERS, + __WGDEVICE_A_LAST +}; +#define WGDEVICE_A_MAX (__WGDEVICE_A_LAST - 1) + +enum wgpeer_flag { + WGPEER_F_REMOVE_ME = 1U << 0, + WGPEER_F_REPLACE_ALLOWEDIPS = 1U << 1, + WGPEER_F_UPDATE_ONLY = 1U << 2, + __WGPEER_F_ALL = WGPEER_F_REMOVE_ME | WGPEER_F_REPLACE_ALLOWEDIPS | + WGPEER_F_UPDATE_ONLY +}; +enum wgpeer_attribute { + WGPEER_A_UNSPEC, + WGPEER_A_PUBLIC_KEY, + WGPEER_A_PRESHARED_KEY, + WGPEER_A_FLAGS, + WGPEER_A_ENDPOINT, + WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, + WGPEER_A_LAST_HANDSHAKE_TIME, + WGPEER_A_RX_BYTES, + WGPEER_A_TX_BYTES, + WGPEER_A_ALLOWEDIPS, + WGPEER_A_PROTOCOL_VERSION, + __WGPEER_A_LAST +}; +#define WGPEER_A_MAX (__WGPEER_A_LAST - 1) + +enum wgallowedip_attribute { + WGALLOWEDIP_A_UNSPEC, + WGALLOWEDIP_A_FAMILY, + WGALLOWEDIP_A_IPADDR, + WGALLOWEDIP_A_CIDR_MASK, + __WGALLOWEDIP_A_LAST +}; +#define WGALLOWEDIP_A_MAX (__WGALLOWEDIP_A_LAST - 1) + +#endif /* _WG_UAPI_WIREGUARD_H */ diff --git a/net/Makefile b/net/Makefile index 449fc0b221f8..848303d98d3d 100644 --- a/net/Makefile +++ b/net/Makefile @@ -13,7 +13,7 @@ obj-$(CONFIG_NET) += $(tmp-y) # LLC has to be linked before the files in net/802/ obj-$(CONFIG_LLC) += llc/ -obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ bpf/ +obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ bpf/ ethtool/ obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_INET) += ipv4/ obj-$(CONFIG_TLS) += tls/ diff --git a/net/atm/lec.c b/net/atm/lec.c index 5a77c235a212..b57368e70aab 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -194,7 +194,7 @@ lec_send(struct atm_vcc *vcc, struct sk_buff *skb) dev->stats.tx_bytes += skb->len; } -static void lec_tx_timeout(struct net_device *dev) +static void lec_tx_timeout(struct net_device *dev, unsigned int txqueue) { pr_info("%s\n", dev->name); netif_trans_update(dev); diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index 1d4d7d415730..cc1cff63194f 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -112,7 +112,7 @@ static int bnep_net_set_mac_addr(struct net_device *dev, void *arg) return 0; } -static void bnep_net_timeout(struct net_device *dev) +static void bnep_net_timeout(struct net_device *dev, unsigned int txqueue) { BT_DBG("net_timeout"); netif_wake_queue(dev); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index a0a54482aabc..60136575aea4 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1607,6 +1607,19 @@ static int br_fill_linkxstats(struct sk_buff *skb, br_multicast_get_stats(br, p, nla_data(nla)); } #endif + + if (p) { + nla = nla_reserve_64bit(skb, BRIDGE_XSTATS_STP, + sizeof(p->stp_xstats), + BRIDGE_XSTATS_PAD); + if (!nla) + goto nla_put_failure; + + spin_lock_bh(&br->lock); + memcpy(nla_data(nla), &p->stp_xstats, sizeof(p->stp_xstats)); + spin_unlock_bh(&br->lock); + } + nla_nest_end(skb, nest); *prividx = 0; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 36b0367ca1e0..f540f3bdf294 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -283,6 +283,8 @@ struct net_bridge_port { #endif u16 group_fwd_mask; u16 backup_redirected_cnt; + + struct bridge_stp_xstats stp_xstats; }; #define kobj_to_brport(obj) container_of(obj, struct net_bridge_port, kobj) diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 1f1410f8d312..6856a6d9282b 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -45,6 +45,17 @@ void br_set_state(struct net_bridge_port *p, unsigned int state) br_info(p->br, "port %u(%s) entered %s state\n", (unsigned int) p->port_no, p->dev->name, br_port_state_names[p->state]); + + if (p->br->stp_enabled == BR_KERNEL_STP) { + switch (p->state) { + case BR_STATE_BLOCKING: + p->stp_xstats.transition_blk++; + break; + case BR_STATE_FORWARDING: + p->stp_xstats.transition_fwd++; + break; + } + } } /* called under bridge lock */ @@ -484,6 +495,8 @@ void br_received_config_bpdu(struct net_bridge_port *p, struct net_bridge *br; int was_root; + p->stp_xstats.rx_bpdu++; + br = p->br; was_root = br_is_root_bridge(br); @@ -517,6 +530,8 @@ void br_received_config_bpdu(struct net_bridge_port *p, /* called under bridge lock */ void br_received_tcn_bpdu(struct net_bridge_port *p) { + p->stp_xstats.rx_tcn++; + if (br_is_designated_port(p)) { br_info(p->br, "port %u(%s) received tcn bpdu\n", (unsigned int) p->port_no, p->dev->name); diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index 7796dd9d42d7..0e4572f31330 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -118,6 +118,8 @@ void br_send_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *bpdu) br_set_ticks(buf+33, bpdu->forward_delay); br_send_bpdu(p, buf, 35); + + p->stp_xstats.tx_bpdu++; } /* called under bridge lock */ @@ -133,6 +135,8 @@ void br_send_tcn_bpdu(struct net_bridge_port *p) buf[2] = 0; buf[3] = BPDU_TYPE_TCN; br_send_bpdu(p, buf, 4); + + p->stp_xstats.tx_tcn++; } /* diff --git a/net/core/Makefile b/net/core/Makefile index a104dc8faafc..3e2c378e5f31 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -8,7 +8,7 @@ obj-y := sock.o request_sock.o skbuff.o datagram.o stream.o scm.o \ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o -obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ +obj-y += dev.o dev_addr_lists.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \ fib_notifier.o xdp.o flow_offload.o diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 02916f43bf63..20bc406f3871 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1041,6 +1041,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_MIN_MTU */ + nla_total_size(4) /* IFLA_MAX_MTU */ + rtnl_prop_list_size(dev) + + nla_total_size(MAX_ADDR_LEN) /* IFLA_PERM_ADDRESS */ + 0; } @@ -1757,6 +1758,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, nla_put_s32(skb, IFLA_NEW_IFINDEX, new_ifindex) < 0) goto nla_put_failure; + if (memchr_inv(dev->perm_addr, '\0', dev->addr_len) && + nla_put(skb, IFLA_PERM_ADDRESS, dev->addr_len, dev->perm_addr)) + goto nla_put_failure; rcu_read_lock(); if (rtnl_fill_link_af(skb, dev, ext_filter_mask)) @@ -1822,6 +1826,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_PROP_LIST] = { .type = NLA_NESTED }, [IFLA_ALT_IFNAME] = { .type = NLA_STRING, .len = ALTIFNAMSIZ - 1 }, + [IFLA_PERM_ADDRESS] = { .type = NLA_REJECT }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 1e6c3cac11e6..92663dcb3aa2 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -29,6 +29,12 @@ config NET_DSA_TAG_8021Q Drivers which use these helpers should select this as dependency. +config NET_DSA_TAG_AR9331 + tristate "Tag driver for Atheros AR9331 SoC with built-in switch" + help + Say Y or M if you want to enable support for tagging frames for + the Atheros AR9331 SoC with built-in switch. + config NET_DSA_TAG_BRCM_COMMON tristate default n diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 9a482c38bdb1..108486cfdeef 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -5,6 +5,7 @@ dsa_core-y += dsa.o dsa2.o master.o port.o slave.o switch.o # tagging formats obj-$(CONFIG_NET_DSA_TAG_8021Q) += tag_8021q.o +obj-$(CONFIG_NET_DSA_TAG_AR9331) += tag_ar9331.o obj-$(CONFIG_NET_DSA_TAG_BRCM_COMMON) += tag_brcm.o obj-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o obj-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 2dd86d9bcda9..09ea2fd78c74 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -150,22 +150,6 @@ int dsa_port_vid_add(struct dsa_port *dp, u16 vid, u16 flags); int dsa_port_vid_del(struct dsa_port *dp, u16 vid); int dsa_port_link_register_of(struct dsa_port *dp); void dsa_port_link_unregister_of(struct dsa_port *dp); -void dsa_port_phylink_validate(struct phylink_config *config, - unsigned long *supported, - struct phylink_link_state *state); -void dsa_port_phylink_mac_pcs_get_state(struct phylink_config *config, - struct phylink_link_state *state); -void dsa_port_phylink_mac_config(struct phylink_config *config, - unsigned int mode, - const struct phylink_link_state *state); -void dsa_port_phylink_mac_an_restart(struct phylink_config *config); -void dsa_port_phylink_mac_link_down(struct phylink_config *config, - unsigned int mode, - phy_interface_t interface); -void dsa_port_phylink_mac_link_up(struct phylink_config *config, - unsigned int mode, - phy_interface_t interface, - struct phy_device *phydev); extern const struct phylink_mac_ops dsa_port_phylink_mac_ops; /* slave.c */ diff --git a/net/dsa/port.c b/net/dsa/port.c index 46ac9ba21987..ffb5601f7ed6 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -415,9 +415,9 @@ static struct phy_device *dsa_port_get_phy_device(struct dsa_port *dp) return phydev; } -void dsa_port_phylink_validate(struct phylink_config *config, - unsigned long *supported, - struct phylink_link_state *state) +static void dsa_port_phylink_validate(struct phylink_config *config, + unsigned long *supported, + struct phylink_link_state *state) { struct dsa_port *dp = container_of(config, struct dsa_port, pl_config); struct dsa_switch *ds = dp->ds; @@ -427,10 +427,9 @@ void dsa_port_phylink_validate(struct phylink_config *config, ds->ops->phylink_validate(ds, dp->index, supported, state); } -EXPORT_SYMBOL_GPL(dsa_port_phylink_validate); -void dsa_port_phylink_mac_pcs_get_state(struct phylink_config *config, - struct phylink_link_state *state) +static void dsa_port_phylink_mac_pcs_get_state(struct phylink_config *config, + struct phylink_link_state *state) { struct dsa_port *dp = container_of(config, struct dsa_port, pl_config); struct dsa_switch *ds = dp->ds; @@ -444,11 +443,10 @@ void dsa_port_phylink_mac_pcs_get_state(struct phylink_config *config, if (ds->ops->phylink_mac_link_state(ds, dp->index, state) < 0) state->link = 0; } -EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_pcs_get_state); -void dsa_port_phylink_mac_config(struct phylink_config *config, - unsigned int mode, - const struct phylink_link_state *state) +static void dsa_port_phylink_mac_config(struct phylink_config *config, + unsigned int mode, + const struct phylink_link_state *state) { struct dsa_port *dp = container_of(config, struct dsa_port, pl_config); struct dsa_switch *ds = dp->ds; @@ -458,9 +456,8 @@ void dsa_port_phylink_mac_config(struct phylink_config *config, ds->ops->phylink_mac_config(ds, dp->index, mode, state); } -EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_config); -void dsa_port_phylink_mac_an_restart(struct phylink_config *config) +static void dsa_port_phylink_mac_an_restart(struct phylink_config *config) { struct dsa_port *dp = container_of(config, struct dsa_port, pl_config); struct dsa_switch *ds = dp->ds; @@ -470,11 +467,10 @@ void dsa_port_phylink_mac_an_restart(struct phylink_config *config) ds->ops->phylink_mac_an_restart(ds, dp->index); } -EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_an_restart); -void dsa_port_phylink_mac_link_down(struct phylink_config *config, - unsigned int mode, - phy_interface_t interface) +static void dsa_port_phylink_mac_link_down(struct phylink_config *config, + unsigned int mode, + phy_interface_t interface) { struct dsa_port *dp = container_of(config, struct dsa_port, pl_config); struct phy_device *phydev = NULL; @@ -491,12 +487,11 @@ void dsa_port_phylink_mac_link_down(struct phylink_config *config, ds->ops->phylink_mac_link_down(ds, dp->index, mode, interface); } -EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_link_down); -void dsa_port_phylink_mac_link_up(struct phylink_config *config, - unsigned int mode, - phy_interface_t interface, - struct phy_device *phydev) +static void dsa_port_phylink_mac_link_up(struct phylink_config *config, + unsigned int mode, + phy_interface_t interface, + struct phy_device *phydev) { struct dsa_port *dp = container_of(config, struct dsa_port, pl_config); struct dsa_switch *ds = dp->ds; @@ -509,7 +504,6 @@ void dsa_port_phylink_mac_link_up(struct phylink_config *config, ds->ops->phylink_mac_link_up(ds, dp->index, mode, interface, phydev); } -EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_link_up); const struct phylink_mac_ops dsa_port_phylink_mac_ops = { .validate = dsa_port_phylink_validate, diff --git a/net/dsa/tag_ar9331.c b/net/dsa/tag_ar9331.c new file mode 100644 index 000000000000..466ffa92a474 --- /dev/null +++ b/net/dsa/tag_ar9331.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2019 Pengutronix, Oleksij Rempel <kernel@pengutronix.de> + */ + + +#include <linux/bitfield.h> +#include <linux/etherdevice.h> + +#include "dsa_priv.h" + +#define AR9331_HDR_LEN 2 +#define AR9331_HDR_VERSION 1 + +#define AR9331_HDR_VERSION_MASK GENMASK(15, 14) +#define AR9331_HDR_PRIORITY_MASK GENMASK(13, 12) +#define AR9331_HDR_TYPE_MASK GENMASK(10, 8) +#define AR9331_HDR_BROADCAST BIT(7) +#define AR9331_HDR_FROM_CPU BIT(6) +/* AR9331_HDR_RESERVED - not used or may be version field. + * According to the AR8216 doc it should 0b10. On AR9331 it is 0b11 on RX path + * and should be set to 0b11 to make it work. + */ +#define AR9331_HDR_RESERVED_MASK GENMASK(5, 4) +#define AR9331_HDR_PORT_NUM_MASK GENMASK(3, 0) + +static struct sk_buff *ar9331_tag_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + __le16 *phdr; + u16 hdr; + + if (skb_cow_head(skb, 0) < 0) + return NULL; + + phdr = skb_push(skb, AR9331_HDR_LEN); + + hdr = FIELD_PREP(AR9331_HDR_VERSION_MASK, AR9331_HDR_VERSION); + hdr |= AR9331_HDR_FROM_CPU | dp->index; + /* 0b10 for AR8216 and 0b11 for AR9331 */ + hdr |= AR9331_HDR_RESERVED_MASK; + + phdr[0] = cpu_to_le16(hdr); + + return skb; +} + +static struct sk_buff *ar9331_tag_rcv(struct sk_buff *skb, + struct net_device *ndev, + struct packet_type *pt) +{ + u8 ver, port; + u16 hdr; + + if (unlikely(!pskb_may_pull(skb, AR9331_HDR_LEN))) + return NULL; + + hdr = le16_to_cpu(*(__le16 *)skb_mac_header(skb)); + + ver = FIELD_GET(AR9331_HDR_VERSION_MASK, hdr); + if (unlikely(ver != AR9331_HDR_VERSION)) { + netdev_warn_once(ndev, "%s:%i wrong header version 0x%2x\n", + __func__, __LINE__, hdr); + return NULL; + } + + if (unlikely(hdr & AR9331_HDR_FROM_CPU)) { + netdev_warn_once(ndev, "%s:%i packet should not be from cpu 0x%2x\n", + __func__, __LINE__, hdr); + return NULL; + } + + skb_pull_rcsum(skb, AR9331_HDR_LEN); + + /* Get source port information */ + port = FIELD_GET(AR9331_HDR_PORT_NUM_MASK, hdr); + + skb->dev = dsa_master_find_slave(ndev, 0, port); + if (!skb->dev) + return NULL; + + return skb; +} + +static const struct dsa_device_ops ar9331_netdev_ops = { + .name = "ar9331", + .proto = DSA_TAG_PROTO_AR9331, + .xmit = ar9331_tag_xmit, + .rcv = ar9331_tag_rcv, + .overhead = AR9331_HDR_LEN, +}; + +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_AR9331); +module_dsa_tag_driver(ar9331_netdev_ops); diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile new file mode 100644 index 000000000000..f68387618973 --- /dev/null +++ b/net/ethtool/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only + +obj-y += ioctl.o common.o diff --git a/net/ethtool/common.c b/net/ethtool/common.c new file mode 100644 index 000000000000..0a8728565356 --- /dev/null +++ b/net/ethtool/common.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "common.h" + +const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = { + [NETIF_F_SG_BIT] = "tx-scatter-gather", + [NETIF_F_IP_CSUM_BIT] = "tx-checksum-ipv4", + [NETIF_F_HW_CSUM_BIT] = "tx-checksum-ip-generic", + [NETIF_F_IPV6_CSUM_BIT] = "tx-checksum-ipv6", + [NETIF_F_HIGHDMA_BIT] = "highdma", + [NETIF_F_FRAGLIST_BIT] = "tx-scatter-gather-fraglist", + [NETIF_F_HW_VLAN_CTAG_TX_BIT] = "tx-vlan-hw-insert", + + [NETIF_F_HW_VLAN_CTAG_RX_BIT] = "rx-vlan-hw-parse", + [NETIF_F_HW_VLAN_CTAG_FILTER_BIT] = "rx-vlan-filter", + [NETIF_F_HW_VLAN_STAG_TX_BIT] = "tx-vlan-stag-hw-insert", + [NETIF_F_HW_VLAN_STAG_RX_BIT] = "rx-vlan-stag-hw-parse", + [NETIF_F_HW_VLAN_STAG_FILTER_BIT] = "rx-vlan-stag-filter", + [NETIF_F_VLAN_CHALLENGED_BIT] = "vlan-challenged", + [NETIF_F_GSO_BIT] = "tx-generic-segmentation", + [NETIF_F_LLTX_BIT] = "tx-lockless", + [NETIF_F_NETNS_LOCAL_BIT] = "netns-local", + [NETIF_F_GRO_BIT] = "rx-gro", + [NETIF_F_GRO_HW_BIT] = "rx-gro-hw", + [NETIF_F_LRO_BIT] = "rx-lro", + + [NETIF_F_TSO_BIT] = "tx-tcp-segmentation", + [NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust", + [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation", + [NETIF_F_TSO_MANGLEID_BIT] = "tx-tcp-mangleid-segmentation", + [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", + [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", + [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", + [NETIF_F_GSO_GRE_CSUM_BIT] = "tx-gre-csum-segmentation", + [NETIF_F_GSO_IPXIP4_BIT] = "tx-ipxip4-segmentation", + [NETIF_F_GSO_IPXIP6_BIT] = "tx-ipxip6-segmentation", + [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", + [NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation", + [NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial", + [NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation", + [NETIF_F_GSO_ESP_BIT] = "tx-esp-segmentation", + [NETIF_F_GSO_UDP_L4_BIT] = "tx-udp-segmentation", + + [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", + [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp", + [NETIF_F_FCOE_MTU_BIT] = "fcoe-mtu", + [NETIF_F_NTUPLE_BIT] = "rx-ntuple-filter", + [NETIF_F_RXHASH_BIT] = "rx-hashing", + [NETIF_F_RXCSUM_BIT] = "rx-checksum", + [NETIF_F_NOCACHE_COPY_BIT] = "tx-nocache-copy", + [NETIF_F_LOOPBACK_BIT] = "loopback", + [NETIF_F_RXFCS_BIT] = "rx-fcs", + [NETIF_F_RXALL_BIT] = "rx-all", + [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload", + [NETIF_F_HW_TC_BIT] = "hw-tc-offload", + [NETIF_F_HW_ESP_BIT] = "esp-hw-offload", + [NETIF_F_HW_ESP_TX_CSUM_BIT] = "esp-tx-csum-hw-offload", + [NETIF_F_RX_UDP_TUNNEL_PORT_BIT] = "rx-udp_tunnel-port-offload", + [NETIF_F_HW_TLS_RECORD_BIT] = "tls-hw-record", + [NETIF_F_HW_TLS_TX_BIT] = "tls-hw-tx-offload", + [NETIF_F_HW_TLS_RX_BIT] = "tls-hw-rx-offload", +}; + +const char +rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN] = { + [ETH_RSS_HASH_TOP_BIT] = "toeplitz", + [ETH_RSS_HASH_XOR_BIT] = "xor", + [ETH_RSS_HASH_CRC32_BIT] = "crc32", +}; + +const char +tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN] = { + [ETHTOOL_ID_UNSPEC] = "Unspec", + [ETHTOOL_RX_COPYBREAK] = "rx-copybreak", + [ETHTOOL_TX_COPYBREAK] = "tx-copybreak", + [ETHTOOL_PFC_PREVENTION_TOUT] = "pfc-prevention-tout", +}; + +const char +phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN] = { + [ETHTOOL_ID_UNSPEC] = "Unspec", + [ETHTOOL_PHY_DOWNSHIFT] = "phy-downshift", + [ETHTOOL_PHY_FAST_LINK_DOWN] = "phy-fast-link-down", + [ETHTOOL_PHY_EDPD] = "phy-energy-detect-power-down", +}; + +#define __LINK_MODE_NAME(speed, type, duplex) \ + #speed "base" #type "/" #duplex +#define __DEFINE_LINK_MODE_NAME(speed, type, duplex) \ + [ETHTOOL_LINK_MODE(speed, type, duplex)] = \ + __LINK_MODE_NAME(speed, type, duplex) +#define __DEFINE_SPECIAL_MODE_NAME(_mode, _name) \ + [ETHTOOL_LINK_MODE_ ## _mode ## _BIT] = _name + +const char link_mode_names[][ETH_GSTRING_LEN] = { + __DEFINE_LINK_MODE_NAME(10, T, Half), + __DEFINE_LINK_MODE_NAME(10, T, Full), + __DEFINE_LINK_MODE_NAME(100, T, Half), + __DEFINE_LINK_MODE_NAME(100, T, Full), + __DEFINE_LINK_MODE_NAME(1000, T, Half), + __DEFINE_LINK_MODE_NAME(1000, T, Full), + __DEFINE_SPECIAL_MODE_NAME(Autoneg, "Autoneg"), + __DEFINE_SPECIAL_MODE_NAME(TP, "TP"), + __DEFINE_SPECIAL_MODE_NAME(AUI, "AUI"), + __DEFINE_SPECIAL_MODE_NAME(MII, "MII"), + __DEFINE_SPECIAL_MODE_NAME(FIBRE, "FIBRE"), + __DEFINE_SPECIAL_MODE_NAME(BNC, "BNC"), + __DEFINE_LINK_MODE_NAME(10000, T, Full), + __DEFINE_SPECIAL_MODE_NAME(Pause, "Pause"), + __DEFINE_SPECIAL_MODE_NAME(Asym_Pause, "Asym_Pause"), + __DEFINE_LINK_MODE_NAME(2500, X, Full), + __DEFINE_SPECIAL_MODE_NAME(Backplane, "Backplane"), + __DEFINE_LINK_MODE_NAME(1000, KX, Full), + __DEFINE_LINK_MODE_NAME(10000, KX4, Full), + __DEFINE_LINK_MODE_NAME(10000, KR, Full), + __DEFINE_SPECIAL_MODE_NAME(10000baseR_FEC, "10000baseR_FEC"), + __DEFINE_LINK_MODE_NAME(20000, MLD2, Full), + __DEFINE_LINK_MODE_NAME(20000, KR2, Full), + __DEFINE_LINK_MODE_NAME(40000, KR4, Full), + __DEFINE_LINK_MODE_NAME(40000, CR4, Full), + __DEFINE_LINK_MODE_NAME(40000, SR4, Full), + __DEFINE_LINK_MODE_NAME(40000, LR4, Full), + __DEFINE_LINK_MODE_NAME(56000, KR4, Full), + __DEFINE_LINK_MODE_NAME(56000, CR4, Full), + __DEFINE_LINK_MODE_NAME(56000, SR4, Full), + __DEFINE_LINK_MODE_NAME(56000, LR4, Full), + __DEFINE_LINK_MODE_NAME(25000, CR, Full), + __DEFINE_LINK_MODE_NAME(25000, KR, Full), + __DEFINE_LINK_MODE_NAME(25000, SR, Full), + __DEFINE_LINK_MODE_NAME(50000, CR2, Full), + __DEFINE_LINK_MODE_NAME(50000, KR2, Full), + __DEFINE_LINK_MODE_NAME(100000, KR4, Full), + __DEFINE_LINK_MODE_NAME(100000, SR4, Full), + __DEFINE_LINK_MODE_NAME(100000, CR4, Full), + __DEFINE_LINK_MODE_NAME(100000, LR4_ER4, Full), + __DEFINE_LINK_MODE_NAME(50000, SR2, Full), + __DEFINE_LINK_MODE_NAME(1000, X, Full), + __DEFINE_LINK_MODE_NAME(10000, CR, Full), + __DEFINE_LINK_MODE_NAME(10000, SR, Full), + __DEFINE_LINK_MODE_NAME(10000, LR, Full), + __DEFINE_LINK_MODE_NAME(10000, LRM, Full), + __DEFINE_LINK_MODE_NAME(10000, ER, Full), + __DEFINE_LINK_MODE_NAME(2500, T, Full), + __DEFINE_LINK_MODE_NAME(5000, T, Full), + __DEFINE_SPECIAL_MODE_NAME(FEC_NONE, "None"), + __DEFINE_SPECIAL_MODE_NAME(FEC_RS, "RS"), + __DEFINE_SPECIAL_MODE_NAME(FEC_BASER, "BASER"), + __DEFINE_LINK_MODE_NAME(50000, KR, Full), + __DEFINE_LINK_MODE_NAME(50000, SR, Full), + __DEFINE_LINK_MODE_NAME(50000, CR, Full), + __DEFINE_LINK_MODE_NAME(50000, LR_ER_FR, Full), + __DEFINE_LINK_MODE_NAME(50000, DR, Full), + __DEFINE_LINK_MODE_NAME(100000, KR2, Full), + __DEFINE_LINK_MODE_NAME(100000, SR2, Full), + __DEFINE_LINK_MODE_NAME(100000, CR2, Full), + __DEFINE_LINK_MODE_NAME(100000, LR2_ER2_FR2, Full), + __DEFINE_LINK_MODE_NAME(100000, DR2, Full), + __DEFINE_LINK_MODE_NAME(200000, KR4, Full), + __DEFINE_LINK_MODE_NAME(200000, SR4, Full), + __DEFINE_LINK_MODE_NAME(200000, LR4_ER4_FR4, Full), + __DEFINE_LINK_MODE_NAME(200000, DR4, Full), + __DEFINE_LINK_MODE_NAME(200000, CR4, Full), + __DEFINE_LINK_MODE_NAME(100, T1, Full), + __DEFINE_LINK_MODE_NAME(1000, T1, Full), + __DEFINE_LINK_MODE_NAME(400000, KR8, Full), + __DEFINE_LINK_MODE_NAME(400000, SR8, Full), + __DEFINE_LINK_MODE_NAME(400000, LR8_ER8_FR8, Full), + __DEFINE_LINK_MODE_NAME(400000, DR8, Full), + __DEFINE_LINK_MODE_NAME(400000, CR8, Full), +}; +static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); diff --git a/net/ethtool/common.h b/net/ethtool/common.h new file mode 100644 index 000000000000..bbb788908cb1 --- /dev/null +++ b/net/ethtool/common.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ETHTOOL_COMMON_H +#define _ETHTOOL_COMMON_H + +#include <linux/ethtool.h> + +/* compose link mode index from speed, type and duplex */ +#define ETHTOOL_LINK_MODE(speed, type, duplex) \ + ETHTOOL_LINK_MODE_ ## speed ## base ## type ## _ ## duplex ## _BIT + +extern const char +netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]; +extern const char +rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN]; +extern const char +tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN]; +extern const char +phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN]; +extern const char link_mode_names[][ETH_GSTRING_LEN]; + +#endif /* _ETHTOOL_COMMON_H */ diff --git a/net/core/ethtool.c b/net/ethtool/ioctl.c index cd9bc67381b2..aed2c2cf1623 100644 --- a/net/core/ethtool.c +++ b/net/ethtool/ioctl.c @@ -27,6 +27,8 @@ #include <net/xdp_sock.h> #include <net/flow_offload.h> +#include "common.h" + /* * Some useful ethtool_ops methods that're device independent. * If we find that all drivers want to do the same thing here, @@ -54,88 +56,6 @@ EXPORT_SYMBOL(ethtool_op_get_ts_info); #define ETHTOOL_DEV_FEATURE_WORDS ((NETDEV_FEATURE_COUNT + 31) / 32) -static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = { - [NETIF_F_SG_BIT] = "tx-scatter-gather", - [NETIF_F_IP_CSUM_BIT] = "tx-checksum-ipv4", - [NETIF_F_HW_CSUM_BIT] = "tx-checksum-ip-generic", - [NETIF_F_IPV6_CSUM_BIT] = "tx-checksum-ipv6", - [NETIF_F_HIGHDMA_BIT] = "highdma", - [NETIF_F_FRAGLIST_BIT] = "tx-scatter-gather-fraglist", - [NETIF_F_HW_VLAN_CTAG_TX_BIT] = "tx-vlan-hw-insert", - - [NETIF_F_HW_VLAN_CTAG_RX_BIT] = "rx-vlan-hw-parse", - [NETIF_F_HW_VLAN_CTAG_FILTER_BIT] = "rx-vlan-filter", - [NETIF_F_HW_VLAN_STAG_TX_BIT] = "tx-vlan-stag-hw-insert", - [NETIF_F_HW_VLAN_STAG_RX_BIT] = "rx-vlan-stag-hw-parse", - [NETIF_F_HW_VLAN_STAG_FILTER_BIT] = "rx-vlan-stag-filter", - [NETIF_F_VLAN_CHALLENGED_BIT] = "vlan-challenged", - [NETIF_F_GSO_BIT] = "tx-generic-segmentation", - [NETIF_F_LLTX_BIT] = "tx-lockless", - [NETIF_F_NETNS_LOCAL_BIT] = "netns-local", - [NETIF_F_GRO_BIT] = "rx-gro", - [NETIF_F_GRO_HW_BIT] = "rx-gro-hw", - [NETIF_F_LRO_BIT] = "rx-lro", - - [NETIF_F_TSO_BIT] = "tx-tcp-segmentation", - [NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust", - [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation", - [NETIF_F_TSO_MANGLEID_BIT] = "tx-tcp-mangleid-segmentation", - [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", - [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", - [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", - [NETIF_F_GSO_GRE_CSUM_BIT] = "tx-gre-csum-segmentation", - [NETIF_F_GSO_IPXIP4_BIT] = "tx-ipxip4-segmentation", - [NETIF_F_GSO_IPXIP6_BIT] = "tx-ipxip6-segmentation", - [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", - [NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation", - [NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial", - [NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation", - [NETIF_F_GSO_ESP_BIT] = "tx-esp-segmentation", - [NETIF_F_GSO_UDP_L4_BIT] = "tx-udp-segmentation", - - [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", - [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp", - [NETIF_F_FCOE_MTU_BIT] = "fcoe-mtu", - [NETIF_F_NTUPLE_BIT] = "rx-ntuple-filter", - [NETIF_F_RXHASH_BIT] = "rx-hashing", - [NETIF_F_RXCSUM_BIT] = "rx-checksum", - [NETIF_F_NOCACHE_COPY_BIT] = "tx-nocache-copy", - [NETIF_F_LOOPBACK_BIT] = "loopback", - [NETIF_F_RXFCS_BIT] = "rx-fcs", - [NETIF_F_RXALL_BIT] = "rx-all", - [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload", - [NETIF_F_HW_TC_BIT] = "hw-tc-offload", - [NETIF_F_HW_ESP_BIT] = "esp-hw-offload", - [NETIF_F_HW_ESP_TX_CSUM_BIT] = "esp-tx-csum-hw-offload", - [NETIF_F_RX_UDP_TUNNEL_PORT_BIT] = "rx-udp_tunnel-port-offload", - [NETIF_F_HW_TLS_RECORD_BIT] = "tls-hw-record", - [NETIF_F_HW_TLS_TX_BIT] = "tls-hw-tx-offload", - [NETIF_F_HW_TLS_RX_BIT] = "tls-hw-rx-offload", -}; - -static const char -rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN] = { - [ETH_RSS_HASH_TOP_BIT] = "toeplitz", - [ETH_RSS_HASH_XOR_BIT] = "xor", - [ETH_RSS_HASH_CRC32_BIT] = "crc32", -}; - -static const char -tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN] = { - [ETHTOOL_ID_UNSPEC] = "Unspec", - [ETHTOOL_RX_COPYBREAK] = "rx-copybreak", - [ETHTOOL_TX_COPYBREAK] = "tx-copybreak", - [ETHTOOL_PFC_PREVENTION_TOUT] = "pfc-prevention-tout", -}; - -static const char -phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN] = { - [ETHTOOL_ID_UNSPEC] = "Unspec", - [ETHTOOL_PHY_DOWNSHIFT] = "phy-downshift", - [ETHTOOL_PHY_FAST_LINK_DOWN] = "phy-fast-link-down", - [ETHTOOL_PHY_EDPD] = "phy-energy-detect-power-down", -}; - static int ethtool_get_features(struct net_device *dev, void __user *useraddr) { struct ethtool_gfeatures cmd = { @@ -234,6 +154,9 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset) !ops->get_ethtool_phy_stats) return phy_ethtool_get_sset_count(dev->phydev); + if (sset == ETH_SS_LINK_MODES) + return __ETHTOOL_LINK_MODE_MASK_NBITS; + if (ops->get_sset_count && ops->get_strings) return ops->get_sset_count(dev, sset); else @@ -258,6 +181,9 @@ static void __ethtool_get_strings(struct net_device *dev, else if (stringset == ETH_SS_PHY_STATS && dev->phydev && !ops->get_ethtool_phy_stats) phy_ethtool_get_strings(dev->phydev, data); + else if (stringset == ETH_SS_LINK_MODES) + memcpy(data, link_mode_names, + __ETHTOOL_LINK_MODE_MASK_NBITS * ETH_GSTRING_LEN); else /* ops->get_strings is valid because checked earlier */ ops->get_strings(dev, stringset, data); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index b9df9c09b84e..b92a42433a7d 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -980,9 +980,12 @@ static struct key_vector *fib_find_node(struct trie *t, /* Return the first fib alias matching TOS with * priority less than or equal to PRIO. + * If 'find_first' is set, return the first matching + * fib alias, regardless of TOS and priority. */ static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen, - u8 tos, u32 prio, u32 tb_id) + u8 tos, u32 prio, u32 tb_id, + bool find_first) { struct fib_alias *fa; @@ -998,6 +1001,8 @@ static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen, continue; if (fa->tb_id != tb_id) break; + if (find_first) + return fa; if (fa->fa_tos > tos) continue; if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos) @@ -1063,9 +1068,6 @@ noleaf: return -ENOMEM; } -/* fib notifier for ADD is sent before calling fib_insert_alias with - * the expectation that the only possible failure ENOMEM - */ static int fib_insert_alias(struct trie *t, struct key_vector *tp, struct key_vector *l, struct fib_alias *new, struct fib_alias *fa, t_key key) @@ -1118,11 +1120,13 @@ static bool fib_valid_key_len(u32 key, u8 plen, struct netlink_ext_ack *extack) return true; } +static void fib_remove_alias(struct trie *t, struct key_vector *tp, + struct key_vector *l, struct fib_alias *old); + /* Caller must hold RTNL. */ int fib_table_insert(struct net *net, struct fib_table *tb, struct fib_config *cfg, struct netlink_ext_ack *extack) { - enum fib_event_type event = FIB_EVENT_ENTRY_ADD; struct trie *t = (struct trie *)tb->tb_data; struct fib_alias *fa, *new_fa; struct key_vector *l, *tp; @@ -1149,7 +1153,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb, l = fib_find_node(t, &tp, key); fa = l ? fib_find_alias(&l->leaf, slen, tos, fi->fib_priority, - tb->tb_id) : NULL; + tb->tb_id, false) : NULL; /* Now fa, if non-NULL, points to the first fib alias * with the same keys [prefix,tos,priority], if such key already @@ -1217,12 +1221,17 @@ int fib_table_insert(struct net *net, struct fib_table *tb, new_fa->tb_id = tb->tb_id; new_fa->fa_default = -1; - err = call_fib_entry_notifiers(net, - FIB_EVENT_ENTRY_REPLACE, - key, plen, new_fa, - extack); - if (err) - goto out_free_new_fa; + if (fib_find_alias(&l->leaf, fa->fa_slen, 0, 0, + tb->tb_id, true) == fa) { + enum fib_event_type fib_event; + + fib_event = FIB_EVENT_ENTRY_REPLACE; + err = call_fib_entry_notifiers(net, fib_event, + key, plen, + new_fa, extack); + if (err) + goto out_free_new_fa; + } rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, &cfg->fc_nlinfo, nlflags); @@ -1244,12 +1253,10 @@ int fib_table_insert(struct net *net, struct fib_table *tb, if (fa_match) goto out; - if (cfg->fc_nlflags & NLM_F_APPEND) { - event = FIB_EVENT_ENTRY_APPEND; + if (cfg->fc_nlflags & NLM_F_APPEND) nlflags |= NLM_F_APPEND; - } else { + else fa = fa_first; - } } err = -ENOENT; if (!(cfg->fc_nlflags & NLM_F_CREATE)) @@ -1269,14 +1276,26 @@ int fib_table_insert(struct net *net, struct fib_table *tb, new_fa->tb_id = tb->tb_id; new_fa->fa_default = -1; - err = call_fib_entry_notifiers(net, event, key, plen, new_fa, extack); - if (err) - goto out_free_new_fa; - /* Insert new entry to the list. */ err = fib_insert_alias(t, tp, l, new_fa, fa, key); if (err) - goto out_fib_notif; + goto out_free_new_fa; + + /* The alias was already inserted, so the node must exist. */ + l = l ? l : fib_find_node(t, &tp, key); + if (WARN_ON_ONCE(!l)) + goto out_free_new_fa; + + if (fib_find_alias(&l->leaf, new_fa->fa_slen, 0, 0, tb->tb_id, true) == + new_fa) { + enum fib_event_type fib_event; + + fib_event = FIB_EVENT_ENTRY_REPLACE; + err = call_fib_entry_notifiers(net, fib_event, key, plen, + new_fa, extack); + if (err) + goto out_remove_new_fa; + } if (!plen) tb->tb_num_default++; @@ -1287,14 +1306,8 @@ int fib_table_insert(struct net *net, struct fib_table *tb, succeeded: return 0; -out_fib_notif: - /* notifier was sent that entry would be added to trie, but - * the add failed and need to recover. Only failure for - * fib_insert_alias is ENOMEM. - */ - NL_SET_ERR_MSG(extack, "Failed to insert route into trie"); - call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, - plen, new_fa, NULL); +out_remove_new_fa: + fib_remove_alias(t, tp, l, new_fa); out_free_new_fa: kmem_cache_free(fn_alias_kmem, new_fa); out: @@ -1545,6 +1558,36 @@ static void fib_remove_alias(struct trie *t, struct key_vector *tp, node_pull_suffix(tp, fa->fa_slen); } +static void fib_notify_alias_delete(struct net *net, u32 key, + struct hlist_head *fah, + struct fib_alias *fa_to_delete, + struct netlink_ext_ack *extack) +{ + struct fib_alias *fa_next, *fa_to_notify; + u32 tb_id = fa_to_delete->tb_id; + u8 slen = fa_to_delete->fa_slen; + enum fib_event_type fib_event; + + /* Do not notify if we do not care about the route. */ + if (fib_find_alias(fah, slen, 0, 0, tb_id, true) != fa_to_delete) + return; + + /* Determine if the route should be replaced by the next route in the + * list. + */ + fa_next = hlist_entry_safe(fa_to_delete->fa_list.next, + struct fib_alias, fa_list); + if (fa_next && fa_next->fa_slen == slen && fa_next->tb_id == tb_id) { + fib_event = FIB_EVENT_ENTRY_REPLACE; + fa_to_notify = fa_next; + } else { + fib_event = FIB_EVENT_ENTRY_DEL; + fa_to_notify = fa_to_delete; + } + call_fib_entry_notifiers(net, fib_event, key, KEYLENGTH - slen, + fa_to_notify, extack); +} + /* Caller must hold RTNL. */ int fib_table_delete(struct net *net, struct fib_table *tb, struct fib_config *cfg, struct netlink_ext_ack *extack) @@ -1566,7 +1609,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb, if (!l) return -ESRCH; - fa = fib_find_alias(&l->leaf, slen, tos, 0, tb->tb_id); + fa = fib_find_alias(&l->leaf, slen, tos, 0, tb->tb_id, false); if (!fa) return -ESRCH; @@ -1598,8 +1641,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb, if (!fa_to_delete) return -ESRCH; - call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen, - fa_to_delete, extack); + fib_notify_alias_delete(net, key, &l->leaf, fa_to_delete, extack); rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id, &cfg->fc_nlinfo, 0); @@ -1923,10 +1965,8 @@ int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all) continue; } - call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, - n->key, - KEYLENGTH - fa->fa_slen, fa, - NULL); + fib_notify_alias_delete(net, n->key, &n->leaf, fa, + NULL); hlist_del_rcu(&fa->fa_list); fib_release_info(fa->fa_info); alias_free_mem_rcu(fa); @@ -2022,6 +2062,7 @@ static int fib_leaf_notify(struct key_vector *l, struct fib_table *tb, struct netlink_ext_ack *extack) { struct fib_alias *fa; + int last_slen = -1; int err; hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { @@ -2036,8 +2077,12 @@ static int fib_leaf_notify(struct key_vector *l, struct fib_table *tb, if (tb->tb_id != fa->tb_id) continue; - err = call_fib_entry_notifier(nb, FIB_EVENT_ENTRY_ADD, l->key, - KEYLENGTH - fa->fa_slen, + if (fa->fa_slen == last_slen) + continue; + + last_slen = fa->fa_slen; + err = call_fib_entry_notifier(nb, FIB_EVENT_ENTRY_REPLACE, + l->key, KEYLENGTH - fa->fa_slen, fa, extack); if (err) return err; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index fcb2cd167f64..9684af02e0a5 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1193,6 +1193,15 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec, }, { + .procname = "tcp_no_ssthresh_metrics_save", + .data = &init_net.ipv4.sysctl_tcp_no_ssthresh_metrics_save, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { .procname = "tcp_moderate_rcvbuf", .data = &init_net.ipv4.sysctl_tcp_moderate_rcvbuf, .maxlen = sizeof(int), diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8a39ee794891..93fe77c5b02d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -443,8 +443,6 @@ void tcp_init_sock(struct sock *sk) tp->tsoffset = 0; tp->rack.reo_wnd_steps = 1; - sk->sk_state = TCP_CLOSE; - sk->sk_write_space = sk_stream_write_space; sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); @@ -1780,6 +1778,8 @@ static int tcp_zerocopy_receive(struct sock *sk, while (length + PAGE_SIZE <= zc->length) { if (zc->recv_skip_hint < PAGE_SIZE) { if (skb) { + if (zc->recv_skip_hint > 0) + break; skb = skb->next; offset = seq - TCP_SKB_CB(skb)->seq; } else { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 92282f98dc82..26637fce324d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2674,6 +2674,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX; net->ipv4.sysctl_tcp_tw_reuse = 2; + net->ipv4.sysctl_tcp_no_ssthresh_metrics_save = 1; cnt = tcp_hashinfo.ehash_mask + 1; net->ipv4.tcp_death_row.sysctl_max_tw_buckets = cnt / 2; diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index c4848e7a0aad..279db8822439 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -385,7 +385,8 @@ void tcp_update_metrics(struct sock *sk) if (tcp_in_initial_slowstart(tp)) { /* Slow start still did not finish. */ - if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { + if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save && + !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); if (val && (tp->snd_cwnd >> 1) > val) tcp_metric_set(tm, TCP_METRIC_SSTHRESH, @@ -400,7 +401,8 @@ void tcp_update_metrics(struct sock *sk) } else if (!tcp_in_slow_start(tp) && icsk->icsk_ca_state == TCP_CA_Open) { /* Cong. avoidance phase, cwnd is reliable. */ - if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) + if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save && + !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) tcp_metric_set(tm, TCP_METRIC_SSTHRESH, max(tp->snd_cwnd >> 1, tp->snd_ssthresh)); if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) { @@ -416,7 +418,8 @@ void tcp_update_metrics(struct sock *sk) tcp_metric_set(tm, TCP_METRIC_CWND, (val + tp->snd_ssthresh) >> 1); } - if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { + if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save && + !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); if (val && tp->snd_ssthresh > val) tcp_metric_set(tm, TCP_METRIC_SSTHRESH, @@ -441,6 +444,7 @@ void tcp_init_metrics(struct sock *sk) { struct dst_entry *dst = __sk_dst_get(sk); struct tcp_sock *tp = tcp_sk(sk); + struct net *net = sock_net(sk); struct tcp_metrics_block *tm; u32 val, crtt = 0; /* cached RTT scaled by 8 */ @@ -458,7 +462,8 @@ void tcp_init_metrics(struct sock *sk) if (tcp_metric_locked(tm, TCP_METRIC_CWND)) tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND); - val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); + val = net->ipv4.sysctl_tcp_no_ssthresh_metrics_save ? + 0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH); if (val) { tp->snd_ssthresh = val; if (tp->snd_ssthresh > tp->snd_cwnd_clamp) diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 2985509147a2..b1e7ec726958 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -409,6 +409,23 @@ config NET_SCH_PLUG To compile this code as a module, choose M here: the module will be called sch_plug. +config NET_SCH_ETS + tristate "Enhanced transmission selection scheduler (ETS)" + help + The Enhanced Transmission Selection scheduler is a classful + queuing discipline that merges functionality of PRIO and DRR + qdiscs in one scheduler. ETS makes it easy to configure a set of + strict and bandwidth-sharing bands to implement the transmission + selection described in 802.1Qaz. + + Say Y here if you want to use the ETS packet scheduling + algorithm. + + To compile this driver as a module, choose M here: the module + will be called sch_ets. + + If unsure, say N. + menuconfig NET_SCH_DEFAULT bool "Allow override default queue discipline" ---help--- diff --git a/net/sched/Makefile b/net/sched/Makefile index 415d1e1f237e..bc8856b865ff 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -48,6 +48,7 @@ obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o obj-$(CONFIG_NET_SCH_PLUG) += sch_plug.o +obj-$(CONFIG_NET_SCH_ETS) += sch_ets.o obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o obj-$(CONFIG_NET_SCH_SKBPRIO) += sch_skbprio.o obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index e0f40400f679..6cc3ab145513 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -173,8 +173,7 @@ struct cake_tin_data { u64 tin_rate_bps; u16 tin_rate_shft; - u16 tin_quantum_prio; - u16 tin_quantum_band; + u16 tin_quantum; s32 tin_deficit; u32 tin_backlog; u32 tin_dropped; @@ -1919,7 +1918,7 @@ begin: while (b->tin_deficit < 0 || !(b->sparse_flow_count + b->bulk_flow_count)) { if (b->tin_deficit <= 0) - b->tin_deficit += b->tin_quantum_band; + b->tin_deficit += b->tin_quantum; if (b->sparse_flow_count + b->bulk_flow_count) empty = false; @@ -2241,8 +2240,7 @@ static int cake_config_besteffort(struct Qdisc *sch) cake_set_rate(b, rate, mtu, us_to_ns(q->target), us_to_ns(q->interval)); - b->tin_quantum_band = 65535; - b->tin_quantum_prio = 65535; + b->tin_quantum = 65535; return 0; } @@ -2253,8 +2251,7 @@ static int cake_config_precedence(struct Qdisc *sch) struct cake_sched_data *q = qdisc_priv(sch); u32 mtu = psched_mtu(qdisc_dev(sch)); u64 rate = q->rate_bps; - u32 quantum1 = 256; - u32 quantum2 = 256; + u32 quantum = 256; u32 i; q->tin_cnt = 8; @@ -2267,18 +2264,14 @@ static int cake_config_precedence(struct Qdisc *sch) cake_set_rate(b, rate, mtu, us_to_ns(q->target), us_to_ns(q->interval)); - b->tin_quantum_prio = max_t(u16, 1U, quantum1); - b->tin_quantum_band = max_t(u16, 1U, quantum2); + b->tin_quantum = max_t(u16, 1U, quantum); /* calculate next class's parameters */ rate *= 7; rate >>= 3; - quantum1 *= 3; - quantum1 >>= 1; - - quantum2 *= 7; - quantum2 >>= 3; + quantum *= 7; + quantum >>= 3; } return 0; @@ -2347,8 +2340,7 @@ static int cake_config_diffserv8(struct Qdisc *sch) struct cake_sched_data *q = qdisc_priv(sch); u32 mtu = psched_mtu(qdisc_dev(sch)); u64 rate = q->rate_bps; - u32 quantum1 = 256; - u32 quantum2 = 256; + u32 quantum = 256; u32 i; q->tin_cnt = 8; @@ -2364,18 +2356,14 @@ static int cake_config_diffserv8(struct Qdisc *sch) cake_set_rate(b, rate, mtu, us_to_ns(q->target), us_to_ns(q->interval)); - b->tin_quantum_prio = max_t(u16, 1U, quantum1); - b->tin_quantum_band = max_t(u16, 1U, quantum2); + b->tin_quantum = max_t(u16, 1U, quantum); /* calculate next class's parameters */ rate *= 7; rate >>= 3; - quantum1 *= 3; - quantum1 >>= 1; - - quantum2 *= 7; - quantum2 >>= 3; + quantum *= 7; + quantum >>= 3; } return 0; @@ -2414,17 +2402,11 @@ static int cake_config_diffserv4(struct Qdisc *sch) cake_set_rate(&q->tins[3], rate >> 2, mtu, us_to_ns(q->target), us_to_ns(q->interval)); - /* priority weights */ - q->tins[0].tin_quantum_prio = quantum; - q->tins[1].tin_quantum_prio = quantum >> 4; - q->tins[2].tin_quantum_prio = quantum << 2; - q->tins[3].tin_quantum_prio = quantum << 4; - /* bandwidth-sharing weights */ - q->tins[0].tin_quantum_band = quantum; - q->tins[1].tin_quantum_band = quantum >> 4; - q->tins[2].tin_quantum_band = quantum >> 1; - q->tins[3].tin_quantum_band = quantum >> 2; + q->tins[0].tin_quantum = quantum; + q->tins[1].tin_quantum = quantum >> 4; + q->tins[2].tin_quantum = quantum >> 1; + q->tins[3].tin_quantum = quantum >> 2; return 0; } @@ -2455,15 +2437,10 @@ static int cake_config_diffserv3(struct Qdisc *sch) cake_set_rate(&q->tins[2], rate >> 2, mtu, us_to_ns(q->target), us_to_ns(q->interval)); - /* priority weights */ - q->tins[0].tin_quantum_prio = quantum; - q->tins[1].tin_quantum_prio = quantum >> 4; - q->tins[2].tin_quantum_prio = quantum << 4; - /* bandwidth-sharing weights */ - q->tins[0].tin_quantum_band = quantum; - q->tins[1].tin_quantum_band = quantum >> 4; - q->tins[2].tin_quantum_band = quantum >> 2; + q->tins[0].tin_quantum = quantum; + q->tins[1].tin_quantum = quantum >> 4; + q->tins[2].tin_quantum = quantum >> 2; return 0; } diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c new file mode 100644 index 000000000000..a87e9159338c --- /dev/null +++ b/net/sched/sch_ets.c @@ -0,0 +1,828 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * net/sched/sch_ets.c Enhanced Transmission Selection scheduler + * + * Description + * ----------- + * + * The Enhanced Transmission Selection scheduler is a classful queuing + * discipline that merges functionality of PRIO and DRR qdiscs in one scheduler. + * ETS makes it easy to configure a set of strict and bandwidth-sharing bands to + * implement the transmission selection described in 802.1Qaz. + * + * Although ETS is technically classful, it's not possible to add and remove + * classes at will. Instead one specifies number of classes, how many are + * PRIO-like and how many DRR-like, and quanta for the latter. + * + * Algorithm + * --------- + * + * The strict classes, if any, are tried for traffic first: first band 0, if it + * has no traffic then band 1, etc. + * + * When there is no traffic in any of the strict queues, the bandwidth-sharing + * ones are tried next. Each band is assigned a deficit counter, initialized to + * "quantum" of that band. ETS maintains a list of active bandwidth-sharing + * bands whose qdiscs are non-empty. A packet is dequeued from the band at the + * head of the list if the packet size is smaller or equal to the deficit + * counter. If the counter is too small, it is increased by "quantum" and the + * scheduler moves on to the next band in the active list. + */ + +#include <linux/module.h> +#include <net/gen_stats.h> +#include <net/netlink.h> +#include <net/pkt_cls.h> +#include <net/pkt_sched.h> +#include <net/sch_generic.h> + +struct ets_class { + struct list_head alist; /* In struct ets_sched.active. */ + struct Qdisc *qdisc; + u32 quantum; + u32 deficit; + struct gnet_stats_basic_packed bstats; + struct gnet_stats_queue qstats; +}; + +struct ets_sched { + struct list_head active; + struct tcf_proto __rcu *filter_list; + struct tcf_block *block; + unsigned int nbands; + unsigned int nstrict; + u8 prio2band[TC_PRIO_MAX + 1]; + struct ets_class classes[TCQ_ETS_MAX_BANDS]; +}; + +static const struct nla_policy ets_policy[TCA_ETS_MAX + 1] = { + [TCA_ETS_NBANDS] = { .type = NLA_U8 }, + [TCA_ETS_NSTRICT] = { .type = NLA_U8 }, + [TCA_ETS_QUANTA] = { .type = NLA_NESTED }, + [TCA_ETS_PRIOMAP] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy ets_priomap_policy[TCA_ETS_MAX + 1] = { + [TCA_ETS_PRIOMAP_BAND] = { .type = NLA_U8 }, +}; + +static const struct nla_policy ets_quanta_policy[TCA_ETS_MAX + 1] = { + [TCA_ETS_QUANTA_BAND] = { .type = NLA_U32 }, +}; + +static const struct nla_policy ets_class_policy[TCA_ETS_MAX + 1] = { + [TCA_ETS_QUANTA_BAND] = { .type = NLA_U32 }, +}; + +static int ets_quantum_parse(struct Qdisc *sch, const struct nlattr *attr, + unsigned int *quantum, + struct netlink_ext_ack *extack) +{ + *quantum = nla_get_u32(attr); + if (!*quantum) { + NL_SET_ERR_MSG(extack, "ETS quantum cannot be zero"); + return -EINVAL; + } + return 0; +} + +static struct ets_class * +ets_class_from_arg(struct Qdisc *sch, unsigned long arg) +{ + struct ets_sched *q = qdisc_priv(sch); + + return &q->classes[arg - 1]; +} + +static u32 ets_class_id(struct Qdisc *sch, const struct ets_class *cl) +{ + struct ets_sched *q = qdisc_priv(sch); + int band = cl - q->classes; + + return TC_H_MAKE(sch->handle, band + 1); +} + +static void ets_offload_change(struct Qdisc *sch) +{ + struct net_device *dev = qdisc_dev(sch); + struct ets_sched *q = qdisc_priv(sch); + struct tc_ets_qopt_offload qopt; + unsigned int w_psum_prev = 0; + unsigned int q_psum = 0; + unsigned int q_sum = 0; + unsigned int quantum; + unsigned int w_psum; + unsigned int weight; + unsigned int i; + + if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) + return; + + qopt.command = TC_ETS_REPLACE; + qopt.handle = sch->handle; + qopt.parent = sch->parent; + qopt.replace_params.bands = q->nbands; + qopt.replace_params.qstats = &sch->qstats; + memcpy(&qopt.replace_params.priomap, + q->prio2band, sizeof(q->prio2band)); + + for (i = 0; i < q->nbands; i++) + q_sum += q->classes[i].quantum; + + for (i = 0; i < q->nbands; i++) { + quantum = q->classes[i].quantum; + q_psum += quantum; + w_psum = quantum ? q_psum * 100 / q_sum : 0; + weight = w_psum - w_psum_prev; + w_psum_prev = w_psum; + + qopt.replace_params.quanta[i] = quantum; + qopt.replace_params.weights[i] = weight; + } + + dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETS, &qopt); +} + +static void ets_offload_destroy(struct Qdisc *sch) +{ + struct net_device *dev = qdisc_dev(sch); + struct tc_ets_qopt_offload qopt; + + if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) + return; + + qopt.command = TC_ETS_DESTROY; + qopt.handle = sch->handle; + qopt.parent = sch->parent; + dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETS, &qopt); +} + +static void ets_offload_graft(struct Qdisc *sch, struct Qdisc *new, + struct Qdisc *old, unsigned long arg, + struct netlink_ext_ack *extack) +{ + struct net_device *dev = qdisc_dev(sch); + struct tc_ets_qopt_offload qopt; + + qopt.command = TC_ETS_GRAFT; + qopt.handle = sch->handle; + qopt.parent = sch->parent; + qopt.graft_params.band = arg - 1; + qopt.graft_params.child_handle = new->handle; + + qdisc_offload_graft_helper(dev, sch, new, old, TC_SETUP_QDISC_ETS, + &qopt, extack); +} + +static int ets_offload_dump(struct Qdisc *sch) +{ + struct tc_ets_qopt_offload qopt; + + qopt.command = TC_ETS_STATS; + qopt.handle = sch->handle; + qopt.parent = sch->parent; + qopt.stats.bstats = &sch->bstats; + qopt.stats.qstats = &sch->qstats; + + return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_ETS, &qopt); +} + +static bool ets_class_is_strict(struct ets_sched *q, const struct ets_class *cl) +{ + unsigned int band = cl - q->classes; + + return band < q->nstrict; +} + +static int ets_class_change(struct Qdisc *sch, u32 classid, u32 parentid, + struct nlattr **tca, unsigned long *arg, + struct netlink_ext_ack *extack) +{ + struct ets_class *cl = ets_class_from_arg(sch, *arg); + struct ets_sched *q = qdisc_priv(sch); + struct nlattr *opt = tca[TCA_OPTIONS]; + struct nlattr *tb[TCA_ETS_MAX + 1]; + unsigned int quantum; + int err; + + /* Classes can be added and removed only through Qdisc_ops.change + * interface. + */ + if (!cl) { + NL_SET_ERR_MSG(extack, "Fine-grained class addition and removal is not supported"); + return -EOPNOTSUPP; + } + + if (!opt) { + NL_SET_ERR_MSG(extack, "ETS options are required for this operation"); + return -EINVAL; + } + + err = nla_parse_nested(tb, TCA_ETS_MAX, opt, ets_class_policy, extack); + if (err < 0) + return err; + + if (!tb[TCA_ETS_QUANTA_BAND]) + /* Nothing to configure. */ + return 0; + + if (ets_class_is_strict(q, cl)) { + NL_SET_ERR_MSG(extack, "Strict bands do not have a configurable quantum"); + return -EINVAL; + } + + err = ets_quantum_parse(sch, tb[TCA_ETS_QUANTA_BAND], &quantum, + extack); + if (err) + return err; + + sch_tree_lock(sch); + cl->quantum = quantum; + sch_tree_unlock(sch); + + ets_offload_change(sch); + return 0; +} + +static int ets_class_graft(struct Qdisc *sch, unsigned long arg, + struct Qdisc *new, struct Qdisc **old, + struct netlink_ext_ack *extack) +{ + struct ets_class *cl = ets_class_from_arg(sch, arg); + + if (!new) { + new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + ets_class_id(sch, cl), NULL); + if (!new) + new = &noop_qdisc; + else + qdisc_hash_add(new, true); + } + + *old = qdisc_replace(sch, new, &cl->qdisc); + ets_offload_graft(sch, new, *old, arg, extack); + return 0; +} + +static struct Qdisc *ets_class_leaf(struct Qdisc *sch, unsigned long arg) +{ + struct ets_class *cl = ets_class_from_arg(sch, arg); + + return cl->qdisc; +} + +static unsigned long ets_class_find(struct Qdisc *sch, u32 classid) +{ + unsigned long band = TC_H_MIN(classid); + struct ets_sched *q = qdisc_priv(sch); + + if (band - 1 >= q->nbands) + return 0; + return band; +} + +static void ets_class_qlen_notify(struct Qdisc *sch, unsigned long arg) +{ + struct ets_class *cl = ets_class_from_arg(sch, arg); + struct ets_sched *q = qdisc_priv(sch); + + /* We get notified about zero-length child Qdiscs as well if they are + * offloaded. Those aren't on the active list though, so don't attempt + * to remove them. + */ + if (!ets_class_is_strict(q, cl) && sch->q.qlen) + list_del(&cl->alist); +} + +static int ets_class_dump(struct Qdisc *sch, unsigned long arg, + struct sk_buff *skb, struct tcmsg *tcm) +{ + struct ets_class *cl = ets_class_from_arg(sch, arg); + struct ets_sched *q = qdisc_priv(sch); + struct nlattr *nest; + + tcm->tcm_parent = TC_H_ROOT; + tcm->tcm_handle = ets_class_id(sch, cl); + tcm->tcm_info = cl->qdisc->handle; + + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); + if (!nest) + goto nla_put_failure; + if (!ets_class_is_strict(q, cl)) { + if (nla_put_u32(skb, TCA_ETS_QUANTA_BAND, cl->quantum)) + goto nla_put_failure; + } + return nla_nest_end(skb, nest); + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int ets_class_dump_stats(struct Qdisc *sch, unsigned long arg, + struct gnet_dump *d) +{ + struct ets_class *cl = ets_class_from_arg(sch, arg); + struct Qdisc *cl_q = cl->qdisc; + + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), + d, NULL, &cl_q->bstats) < 0 || + qdisc_qstats_copy(d, cl_q) < 0) + return -1; + + return 0; +} + +static void ets_qdisc_walk(struct Qdisc *sch, struct qdisc_walker *arg) +{ + struct ets_sched *q = qdisc_priv(sch); + int i; + + if (arg->stop) + return; + + for (i = 0; i < q->nbands; i++) { + if (arg->count < arg->skip) { + arg->count++; + continue; + } + if (arg->fn(sch, i + 1, arg) < 0) { + arg->stop = 1; + break; + } + arg->count++; + } +} + +static struct tcf_block * +ets_qdisc_tcf_block(struct Qdisc *sch, unsigned long cl, + struct netlink_ext_ack *extack) +{ + struct ets_sched *q = qdisc_priv(sch); + + if (cl) { + NL_SET_ERR_MSG(extack, "ETS classid must be zero"); + return NULL; + } + + return q->block; +} + +static unsigned long ets_qdisc_bind_tcf(struct Qdisc *sch, unsigned long parent, + u32 classid) +{ + return ets_class_find(sch, classid); +} + +static void ets_qdisc_unbind_tcf(struct Qdisc *sch, unsigned long arg) +{ +} + +static struct ets_class *ets_classify(struct sk_buff *skb, struct Qdisc *sch, + int *qerr) +{ + struct ets_sched *q = qdisc_priv(sch); + u32 band = skb->priority; + struct tcf_result res; + struct tcf_proto *fl; + int err; + + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; + if (TC_H_MAJ(skb->priority) != sch->handle) { + fl = rcu_dereference_bh(q->filter_list); + err = tcf_classify(skb, fl, &res, false); +#ifdef CONFIG_NET_CLS_ACT + switch (err) { + case TC_ACT_STOLEN: + case TC_ACT_QUEUED: + case TC_ACT_TRAP: + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + /* fall through */ + case TC_ACT_SHOT: + return NULL; + } +#endif + if (!fl || err < 0) { + if (TC_H_MAJ(band)) + band = 0; + return &q->classes[q->prio2band[band & TC_PRIO_MAX]]; + } + band = res.classid; + } + band = TC_H_MIN(band) - 1; + if (band >= q->nbands) + return &q->classes[q->prio2band[0]]; + return &q->classes[band]; +} + +static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) +{ + unsigned int len = qdisc_pkt_len(skb); + struct ets_sched *q = qdisc_priv(sch); + struct ets_class *cl; + int err = 0; + bool first; + + cl = ets_classify(skb, sch, &err); + if (!cl) { + if (err & __NET_XMIT_BYPASS) + qdisc_qstats_drop(sch); + __qdisc_drop(skb, to_free); + return err; + } + + first = !cl->qdisc->q.qlen; + err = qdisc_enqueue(skb, cl->qdisc, to_free); + if (unlikely(err != NET_XMIT_SUCCESS)) { + if (net_xmit_drop_count(err)) { + cl->qstats.drops++; + qdisc_qstats_drop(sch); + } + return err; + } + + if (first && !ets_class_is_strict(q, cl)) { + list_add_tail(&cl->alist, &q->active); + cl->deficit = cl->quantum; + } + + sch->qstats.backlog += len; + sch->q.qlen++; + return err; +} + +static struct sk_buff * +ets_qdisc_dequeue_skb(struct Qdisc *sch, struct sk_buff *skb) +{ + qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); + sch->q.qlen--; + return skb; +} + +static struct sk_buff *ets_qdisc_dequeue(struct Qdisc *sch) +{ + struct ets_sched *q = qdisc_priv(sch); + struct ets_class *cl; + struct sk_buff *skb; + unsigned int band; + unsigned int len; + + while (1) { + for (band = 0; band < q->nstrict; band++) { + cl = &q->classes[band]; + skb = qdisc_dequeue_peeked(cl->qdisc); + if (skb) + return ets_qdisc_dequeue_skb(sch, skb); + } + + if (list_empty(&q->active)) + goto out; + + cl = list_first_entry(&q->active, struct ets_class, alist); + skb = cl->qdisc->ops->peek(cl->qdisc); + if (!skb) { + qdisc_warn_nonwc(__func__, cl->qdisc); + goto out; + } + + len = qdisc_pkt_len(skb); + if (len <= cl->deficit) { + cl->deficit -= len; + skb = qdisc_dequeue_peeked(cl->qdisc); + if (unlikely(!skb)) + goto out; + if (cl->qdisc->q.qlen == 0) + list_del(&cl->alist); + return ets_qdisc_dequeue_skb(sch, skb); + } + + cl->deficit += cl->quantum; + list_move_tail(&cl->alist, &q->active); + } +out: + return NULL; +} + +static int ets_qdisc_priomap_parse(struct nlattr *priomap_attr, + unsigned int nbands, u8 *priomap, + struct netlink_ext_ack *extack) +{ + const struct nlattr *attr; + int prio = 0; + u8 band; + int rem; + int err; + + err = __nla_validate_nested(priomap_attr, TCA_ETS_MAX, + ets_priomap_policy, NL_VALIDATE_STRICT, + extack); + if (err) + return err; + + nla_for_each_nested(attr, priomap_attr, rem) { + switch (nla_type(attr)) { + case TCA_ETS_PRIOMAP_BAND: + if (prio > TC_PRIO_MAX) { + NL_SET_ERR_MSG_MOD(extack, "Too many priorities in ETS priomap"); + return -EINVAL; + } + band = nla_get_u8(attr); + if (band >= nbands) { + NL_SET_ERR_MSG_MOD(extack, "Invalid band number in ETS priomap"); + return -EINVAL; + } + priomap[prio++] = band; + break; + default: + WARN_ON_ONCE(1); /* Validate should have caught this. */ + return -EINVAL; + } + } + + return 0; +} + +static int ets_qdisc_quanta_parse(struct Qdisc *sch, struct nlattr *quanta_attr, + unsigned int nbands, unsigned int nstrict, + unsigned int *quanta, + struct netlink_ext_ack *extack) +{ + const struct nlattr *attr; + int band = nstrict; + int rem; + int err; + + err = __nla_validate_nested(quanta_attr, TCA_ETS_MAX, + ets_quanta_policy, NL_VALIDATE_STRICT, + extack); + if (err < 0) + return err; + + nla_for_each_nested(attr, quanta_attr, rem) { + switch (nla_type(attr)) { + case TCA_ETS_QUANTA_BAND: + if (band >= nbands) { + NL_SET_ERR_MSG_MOD(extack, "ETS quanta has more values than bands"); + return -EINVAL; + } + err = ets_quantum_parse(sch, attr, &quanta[band++], + extack); + if (err) + return err; + break; + default: + WARN_ON_ONCE(1); /* Validate should have caught this. */ + return -EINVAL; + } + } + + return 0; +} + +static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + unsigned int quanta[TCQ_ETS_MAX_BANDS] = {0}; + struct Qdisc *queues[TCQ_ETS_MAX_BANDS]; + struct ets_sched *q = qdisc_priv(sch); + struct nlattr *tb[TCA_ETS_MAX + 1]; + unsigned int oldbands = q->nbands; + u8 priomap[TC_PRIO_MAX + 1]; + unsigned int nstrict = 0; + unsigned int nbands; + unsigned int i; + int err; + + if (!opt) { + NL_SET_ERR_MSG(extack, "ETS options are required for this operation"); + return -EINVAL; + } + + err = nla_parse_nested(tb, TCA_ETS_MAX, opt, ets_policy, extack); + if (err < 0) + return err; + + if (!tb[TCA_ETS_NBANDS]) { + NL_SET_ERR_MSG_MOD(extack, "Number of bands is a required argument"); + return -EINVAL; + } + nbands = nla_get_u8(tb[TCA_ETS_NBANDS]); + if (nbands < 1 || nbands > TCQ_ETS_MAX_BANDS) { + NL_SET_ERR_MSG_MOD(extack, "Invalid number of bands"); + return -EINVAL; + } + /* Unless overridden, traffic goes to the last band. */ + memset(priomap, nbands - 1, sizeof(priomap)); + + if (tb[TCA_ETS_NSTRICT]) { + nstrict = nla_get_u8(tb[TCA_ETS_NSTRICT]); + if (nstrict > nbands) { + NL_SET_ERR_MSG_MOD(extack, "Invalid number of strict bands"); + return -EINVAL; + } + } + + if (tb[TCA_ETS_PRIOMAP]) { + err = ets_qdisc_priomap_parse(tb[TCA_ETS_PRIOMAP], + nbands, priomap, extack); + if (err) + return err; + } + + if (tb[TCA_ETS_QUANTA]) { + err = ets_qdisc_quanta_parse(sch, tb[TCA_ETS_QUANTA], + nbands, nstrict, quanta, extack); + if (err) + return err; + } + /* If there are more bands than strict + quanta provided, the remaining + * ones are ETS with quantum of MTU. Initialize the missing values here. + */ + for (i = nstrict; i < nbands; i++) { + if (!quanta[i]) + quanta[i] = psched_mtu(qdisc_dev(sch)); + } + + /* Before commit, make sure we can allocate all new qdiscs */ + for (i = oldbands; i < nbands; i++) { + queues[i] = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + ets_class_id(sch, &q->classes[i]), + extack); + if (!queues[i]) { + while (i > oldbands) + qdisc_put(queues[--i]); + return -ENOMEM; + } + } + + sch_tree_lock(sch); + + q->nbands = nbands; + q->nstrict = nstrict; + memcpy(q->prio2band, priomap, sizeof(priomap)); + + for (i = q->nbands; i < oldbands; i++) + qdisc_tree_flush_backlog(q->classes[i].qdisc); + + for (i = 0; i < q->nbands; i++) + q->classes[i].quantum = quanta[i]; + + for (i = oldbands; i < q->nbands; i++) { + q->classes[i].qdisc = queues[i]; + if (q->classes[i].qdisc != &noop_qdisc) + qdisc_hash_add(q->classes[i].qdisc, true); + } + + sch_tree_unlock(sch); + + ets_offload_change(sch); + for (i = q->nbands; i < oldbands; i++) { + qdisc_put(q->classes[i].qdisc); + memset(&q->classes[i], 0, sizeof(q->classes[i])); + } + return 0; +} + +static int ets_qdisc_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + struct ets_sched *q = qdisc_priv(sch); + int err; + + if (!opt) + return -EINVAL; + + err = tcf_block_get(&q->block, &q->filter_list, sch, extack); + if (err) + return err; + + INIT_LIST_HEAD(&q->active); + return ets_qdisc_change(sch, opt, extack); +} + +static void ets_qdisc_reset(struct Qdisc *sch) +{ + struct ets_sched *q = qdisc_priv(sch); + int band; + + for (band = q->nstrict; band < q->nbands; band++) { + if (q->classes[band].qdisc->q.qlen) + list_del(&q->classes[band].alist); + } + for (band = 0; band < q->nbands; band++) + qdisc_reset(q->classes[band].qdisc); + sch->qstats.backlog = 0; + sch->q.qlen = 0; +} + +static void ets_qdisc_destroy(struct Qdisc *sch) +{ + struct ets_sched *q = qdisc_priv(sch); + int band; + + ets_offload_destroy(sch); + tcf_block_put(q->block); + for (band = 0; band < q->nbands; band++) + qdisc_put(q->classes[band].qdisc); +} + +static int ets_qdisc_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct ets_sched *q = qdisc_priv(sch); + struct nlattr *opts; + struct nlattr *nest; + int band; + int prio; + int err; + + err = ets_offload_dump(sch); + if (err) + return err; + + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); + if (!opts) + goto nla_err; + + if (nla_put_u8(skb, TCA_ETS_NBANDS, q->nbands)) + goto nla_err; + + if (q->nstrict && + nla_put_u8(skb, TCA_ETS_NSTRICT, q->nstrict)) + goto nla_err; + + if (q->nbands > q->nstrict) { + nest = nla_nest_start(skb, TCA_ETS_QUANTA); + if (!nest) + goto nla_err; + + for (band = q->nstrict; band < q->nbands; band++) { + if (nla_put_u32(skb, TCA_ETS_QUANTA_BAND, + q->classes[band].quantum)) + goto nla_err; + } + + nla_nest_end(skb, nest); + } + + nest = nla_nest_start(skb, TCA_ETS_PRIOMAP); + if (!nest) + goto nla_err; + + for (prio = 0; prio <= TC_PRIO_MAX; prio++) { + if (nla_put_u8(skb, TCA_ETS_PRIOMAP_BAND, q->prio2band[prio])) + goto nla_err; + } + + nla_nest_end(skb, nest); + + return nla_nest_end(skb, opts); + +nla_err: + nla_nest_cancel(skb, opts); + return -EMSGSIZE; +} + +static const struct Qdisc_class_ops ets_class_ops = { + .change = ets_class_change, + .graft = ets_class_graft, + .leaf = ets_class_leaf, + .find = ets_class_find, + .qlen_notify = ets_class_qlen_notify, + .dump = ets_class_dump, + .dump_stats = ets_class_dump_stats, + .walk = ets_qdisc_walk, + .tcf_block = ets_qdisc_tcf_block, + .bind_tcf = ets_qdisc_bind_tcf, + .unbind_tcf = ets_qdisc_unbind_tcf, +}; + +static struct Qdisc_ops ets_qdisc_ops __read_mostly = { + .cl_ops = &ets_class_ops, + .id = "ets", + .priv_size = sizeof(struct ets_sched), + .enqueue = ets_qdisc_enqueue, + .dequeue = ets_qdisc_dequeue, + .peek = qdisc_peek_dequeued, + .change = ets_qdisc_change, + .init = ets_qdisc_init, + .reset = ets_qdisc_reset, + .destroy = ets_qdisc_destroy, + .dump = ets_qdisc_dump, + .owner = THIS_MODULE, +}; + +static int __init ets_init(void) +{ + return register_qdisc(&ets_qdisc_ops); +} + +static void __exit ets_exit(void) +{ + unregister_qdisc(&ets_qdisc_ops); +} + +module_init(ets_init); +module_exit(ets_exit); +MODULE_LICENSE("GPL"); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 5ab696efca95..6c9595f1048a 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -441,7 +441,7 @@ static void dev_watchdog(struct timer_list *t) trace_net_dev_xmit_timeout(dev, i); WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n", dev->name, netdev_drivername(dev), i); - dev->netdev_ops->ndo_tx_timeout(dev); + dev->netdev_ops->ndo_tx_timeout(dev, i); } if (!mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + diff --git a/net/sctp/associola.c b/net/sctp/associola.c index bbd5004a5d09..437079a4883d 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -584,7 +584,6 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, const gfp_t gfp, const int peer_state) { - struct net *net = sock_net(asoc->base.sk); struct sctp_transport *peer; struct sctp_sock *sp; unsigned short port; @@ -614,7 +613,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, return peer; } - peer = sctp_transport_new(net, addr, gfp); + peer = sctp_transport_new(asoc->base.net, addr, gfp); if (!peer) return NULL; @@ -974,7 +973,7 @@ static void sctp_assoc_bh_rcv(struct work_struct *work) struct sctp_association *asoc = container_of(work, struct sctp_association, base.inqueue.immediate); - struct net *net = sock_net(asoc->base.sk); + struct net *net = asoc->base.net; union sctp_subtype subtype; struct sctp_endpoint *ep; struct sctp_chunk *chunk; @@ -1442,7 +1441,8 @@ void sctp_assoc_sync_pmtu(struct sctp_association *asoc) /* Should we send a SACK to update our peer? */ static inline bool sctp_peer_needs_update(struct sctp_association *asoc) { - struct net *net = sock_net(asoc->base.sk); + struct net *net = asoc->base.net; + switch (asoc->state) { case SCTP_STATE_ESTABLISHED: case SCTP_STATE_SHUTDOWN_PENDING: @@ -1576,7 +1576,7 @@ int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc, if (asoc->peer.ipv6_address) flags |= SCTP_ADDR6_PEERSUPP; - return sctp_bind_addr_copy(sock_net(asoc->base.sk), + return sctp_bind_addr_copy(asoc->base.net, &asoc->base.bind_addr, &asoc->ep->base.bind_addr, scope, gfp, flags); diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index cc3ce5d80b08..ab6a997e222f 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -225,7 +225,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, if (msg_len >= first_len) { msg->can_delay = 0; if (msg_len > first_len) - SCTP_INC_STATS(sock_net(asoc->base.sk), + SCTP_INC_STATS(asoc->base.net, SCTP_MIB_FRAGUSRMSGS); } else { /* Which may be the only one... */ diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 3ccab7440c9e..48c9c2c7602f 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -244,7 +244,7 @@ struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep, struct sctp_endpoint *retval = NULL; if ((htons(ep->base.bind_addr.port) == laddr->v4.sin_port) && - net_eq(sock_net(ep->base.sk), net)) { + net_eq(ep->base.net, net)) { if (sctp_bind_addr_match(&ep->base.bind_addr, laddr, sctp_sk(ep->base.sk))) retval = ep; @@ -292,8 +292,8 @@ bool sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep, const union sctp_addr *paddr) { struct sctp_sockaddr_entry *addr; + struct net *net = ep->base.net; struct sctp_bind_addr *bp; - struct net *net = sock_net(ep->base.sk); bp = &ep->base.bind_addr; /* This function is called with the socket lock held, @@ -384,7 +384,7 @@ normal: if (asoc && sctp_chunk_is_data(chunk)) asoc->peer.last_data_from = chunk->transport; else { - SCTP_INC_STATS(sock_net(ep->base.sk), SCTP_MIB_INCTRLCHUNKS); + SCTP_INC_STATS(ep->base.net, SCTP_MIB_INCTRLCHUNKS); if (asoc) asoc->stats.ictrlchunks++; } diff --git a/net/sctp/input.c b/net/sctp/input.c index 4d2bcfc9d7f8..efaaefc3bb1c 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -937,7 +937,7 @@ int sctp_hash_transport(struct sctp_transport *t) if (t->asoc->temp) return 0; - arg.net = sock_net(t->asoc->base.sk); + arg.net = t->asoc->base.net; arg.paddr = &t->ipaddr; arg.lport = htons(t->asoc->base.bind_addr.port); @@ -1004,12 +1004,11 @@ struct sctp_transport *sctp_epaddr_lookup_transport( const struct sctp_endpoint *ep, const union sctp_addr *paddr) { - struct net *net = sock_net(ep->base.sk); struct rhlist_head *tmp, *list; struct sctp_transport *t; struct sctp_hash_cmp_arg arg = { .paddr = paddr, - .net = net, + .net = ep->base.net, .lport = htons(ep->base.bind_addr.port), }; diff --git a/net/sctp/output.c b/net/sctp/output.c index dbda7e7927fd..1441eaf460bb 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -282,7 +282,7 @@ static enum sctp_xmit sctp_packet_bundle_sack(struct sctp_packet *pkt, sctp_chunk_free(sack); goto out; } - SCTP_INC_STATS(sock_net(asoc->base.sk), + SCTP_INC_STATS(asoc->base.net, SCTP_MIB_OUTCTRLCHUNKS); asoc->stats.octrlchunks++; asoc->peer.sack_needed = 0; diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 0dab62b67b9a..a031d1134c60 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -279,7 +279,7 @@ void sctp_outq_free(struct sctp_outq *q) /* Put a new chunk in an sctp_outq. */ void sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp) { - struct net *net = sock_net(q->asoc->base.sk); + struct net *net = q->asoc->base.net; pr_debug("%s: outq:%p, chunk:%p[%s]\n", __func__, q, chunk, chunk && chunk->chunk_hdr ? @@ -533,7 +533,7 @@ void sctp_retransmit_mark(struct sctp_outq *q, void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, enum sctp_retransmit_reason reason) { - struct net *net = sock_net(q->asoc->base.sk); + struct net *net = q->asoc->base.net; switch (reason) { case SCTP_RTXR_T3_RTX: @@ -1884,6 +1884,6 @@ void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 ctsn) if (ftsn_chunk) { list_add_tail(&ftsn_chunk->list, &q->control_chunk_list); - SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(asoc->base.net, SCTP_MIB_OUTCTRLCHUNKS); } } diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 48d63956a68c..09050c1d5517 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2307,7 +2307,6 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, const union sctp_addr *peer_addr, struct sctp_init_chunk *peer_init, gfp_t gfp) { - struct net *net = sock_net(asoc->base.sk); struct sctp_transport *transport; struct list_head *pos, *temp; union sctp_params param; @@ -2363,8 +2362,8 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, * also give us an option to silently ignore the packet, which * is what we'll do here. */ - if (!net->sctp.addip_noauth && - (asoc->peer.asconf_capable && !asoc->peer.auth_capable)) { + if (!asoc->base.net->sctp.addip_noauth && + (asoc->peer.asconf_capable && !asoc->peer.auth_capable)) { asoc->peer.addip_disabled_mask |= (SCTP_PARAM_ADD_IP | SCTP_PARAM_DEL_IP | SCTP_PARAM_SET_PRIMARY); @@ -2491,9 +2490,9 @@ static int sctp_process_param(struct sctp_association *asoc, const union sctp_addr *peer_addr, gfp_t gfp) { - struct net *net = sock_net(asoc->base.sk); struct sctp_endpoint *ep = asoc->ep; union sctp_addr_param *addr_param; + struct net *net = asoc->base.net; struct sctp_transport *t; enum sctp_scope scope; union sctp_addr addr; diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index acd737d4c0e0..ce82699d0dca 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -516,8 +516,6 @@ static void sctp_do_8_2_transport_strike(struct sctp_cmd_seq *commands, struct sctp_transport *transport, int is_hb) { - struct net *net = sock_net(asoc->base.sk); - /* The check for association's overall error counter exceeding the * threshold is done in the state function. */ @@ -544,10 +542,10 @@ static void sctp_do_8_2_transport_strike(struct sctp_cmd_seq *commands, * is SCTP_ACTIVE, then mark this transport as Partially Failed, * see SCTP Quick Failover Draft, section 5.1 */ - if (net->sctp.pf_enable && - (transport->state == SCTP_ACTIVE) && - (transport->error_count < transport->pathmaxrxt) && - (transport->error_count > transport->pf_retrans)) { + if (asoc->base.net->sctp.pf_enable && + transport->state == SCTP_ACTIVE && + transport->error_count < transport->pathmaxrxt && + transport->error_count > transport->pf_retrans) { sctp_assoc_control_transport(asoc, transport, SCTP_TRANSPORT_PF, @@ -798,10 +796,8 @@ static int sctp_cmd_process_sack(struct sctp_cmd_seq *cmds, int err = 0; if (sctp_outq_sack(&asoc->outqueue, chunk)) { - struct net *net = sock_net(asoc->base.sk); - /* There are no more TSNs awaiting SACK. */ - err = sctp_do_sm(net, SCTP_EVENT_T_OTHER, + err = sctp_do_sm(asoc->base.net, SCTP_EVENT_T_OTHER, SCTP_ST_OTHER(SCTP_EVENT_NO_PENDING_TSN), asoc->state, asoc->ep, asoc, NULL, GFP_ATOMIC); @@ -834,7 +830,7 @@ static void sctp_cmd_assoc_update(struct sctp_cmd_seq *cmds, struct sctp_association *asoc, struct sctp_association *new) { - struct net *net = sock_net(asoc->base.sk); + struct net *net = asoc->base.net; struct sctp_chunk *abort; if (!sctp_assoc_update(asoc, new)) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 4ab8208a2dd4..42558fa3f3e4 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1320,7 +1320,7 @@ static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc, struct sctp_chunk *init, struct sctp_cmd_seq *commands) { - struct net *net = sock_net(new_asoc->base.sk); + struct net *net = new_asoc->base.net; struct sctp_transport *new_addr; int ret = 1; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 0b485952a71c..1b56fc440606 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -436,8 +436,7 @@ static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) static int sctp_send_asconf(struct sctp_association *asoc, struct sctp_chunk *chunk) { - struct net *net = sock_net(asoc->base.sk); - int retval = 0; + int retval = 0; /* If there is an outstanding ASCONF chunk, queue it for later * transmission. @@ -449,7 +448,7 @@ static int sctp_send_asconf(struct sctp_association *asoc, /* Hold the chunk until an ASCONF_ACK is received. */ sctp_chunk_hold(chunk); - retval = sctp_primitive_ASCONF(net, asoc, chunk); + retval = sctp_primitive_ASCONF(asoc->base.net, asoc, chunk); if (retval) sctp_chunk_free(chunk); else @@ -2428,9 +2427,8 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, int error; if (params->spp_flags & SPP_HB_DEMAND && trans) { - struct net *net = sock_net(trans->asoc->base.sk); - - error = sctp_primitive_REQUESTHEARTBEAT(net, trans->asoc, trans); + error = sctp_primitive_REQUESTHEARTBEAT(trans->asoc->base.net, + trans->asoc, trans); if (error) return error; } @@ -5364,7 +5362,7 @@ struct sctp_transport *sctp_transport_get_next(struct net *net, if (!sctp_transport_hold(t)) continue; - if (net_eq(sock_net(t->asoc->base.sk), net) && + if (net_eq(t->asoc->base.net, net) && t->asoc->peer.primary_path == t) break; diff --git a/net/sctp/stream.c b/net/sctp/stream.c index e83cdaa2ab76..df60b5ef24cb 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -218,10 +218,9 @@ void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new) static int sctp_send_reconf(struct sctp_association *asoc, struct sctp_chunk *chunk) { - struct net *net = sock_net(asoc->base.sk); int retval = 0; - retval = sctp_primitive_RECONF(net, asoc, chunk); + retval = sctp_primitive_RECONF(asoc->base.net, asoc, chunk); if (retval) sctp_chunk_free(chunk); diff --git a/net/sctp/stream_interleave.c b/net/sctp/stream_interleave.c index 40c40be23fcb..6b13f737ebf2 100644 --- a/net/sctp/stream_interleave.c +++ b/net/sctp/stream_interleave.c @@ -241,9 +241,8 @@ out: if (!first_frag) return NULL; - retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk), - &ulpq->reasm, first_frag, - last_frag); + retval = sctp_make_reassembled_event(ulpq->asoc->base.net, &ulpq->reasm, + first_frag, last_frag); if (retval) { sin->fsn = next_fsn; if (is_last) { @@ -326,7 +325,7 @@ static struct sctp_ulpevent *sctp_intl_retrieve_reassembled( pd_point = sctp_sk(asoc->base.sk)->pd_point; if (pd_point && pd_point <= pd_len) { - retval = sctp_make_reassembled_event(sock_net(asoc->base.sk), + retval = sctp_make_reassembled_event(asoc->base.net, &ulpq->reasm, pd_first, pd_last); if (retval) { @@ -337,8 +336,7 @@ static struct sctp_ulpevent *sctp_intl_retrieve_reassembled( goto out; found: - retval = sctp_make_reassembled_event(sock_net(asoc->base.sk), - &ulpq->reasm, + retval = sctp_make_reassembled_event(asoc->base.net, &ulpq->reasm, first_frag, pos); if (retval) retval->msg_flags |= MSG_EOR; @@ -630,7 +628,7 @@ out: if (!first_frag) return NULL; - retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk), + retval = sctp_make_reassembled_event(ulpq->asoc->base.net, &ulpq->reasm_uo, first_frag, last_frag); if (retval) { @@ -716,7 +714,7 @@ static struct sctp_ulpevent *sctp_intl_retrieve_reassembled_uo( pd_point = sctp_sk(asoc->base.sk)->pd_point; if (pd_point && pd_point <= pd_len) { - retval = sctp_make_reassembled_event(sock_net(asoc->base.sk), + retval = sctp_make_reassembled_event(asoc->base.net, &ulpq->reasm_uo, pd_first, pd_last); if (retval) { @@ -727,8 +725,7 @@ static struct sctp_ulpevent *sctp_intl_retrieve_reassembled_uo( goto out; found: - retval = sctp_make_reassembled_event(sock_net(asoc->base.sk), - &ulpq->reasm_uo, + retval = sctp_make_reassembled_event(asoc->base.net, &ulpq->reasm_uo, first_frag, pos); if (retval) retval->msg_flags |= MSG_EOR; @@ -814,7 +811,7 @@ static struct sctp_ulpevent *sctp_intl_retrieve_first_uo(struct sctp_ulpq *ulpq) return NULL; out: - retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk), + retval = sctp_make_reassembled_event(ulpq->asoc->base.net, &ulpq->reasm_uo, first_frag, last_frag); if (retval) { @@ -921,7 +918,7 @@ static struct sctp_ulpevent *sctp_intl_retrieve_first(struct sctp_ulpq *ulpq) return NULL; out: - retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk), + retval = sctp_make_reassembled_event(ulpq->asoc->base.net, &ulpq->reasm, first_frag, last_frag); if (retval) { @@ -1159,7 +1156,7 @@ static void sctp_generate_iftsn(struct sctp_outq *q, __u32 ctsn) if (ftsn_chunk) { list_add_tail(&ftsn_chunk->list, &q->control_chunk_list); - SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(asoc->base.net, SCTP_MIB_OUTCTRLCHUNKS); } } diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 7235a6032671..f4de064598f8 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -334,7 +334,7 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) pr_debug("%s: rto_pending not set on transport %p!\n", __func__, tp); if (tp->rttvar || tp->srtt) { - struct net *net = sock_net(tp->asoc->base.sk); + struct net *net = tp->asoc->base.net; /* 6.3.1 C3) When a new RTT measurement R' is made, set * RTTVAR <- (1 - RTO.Beta) * RTTVAR + RTO.Beta * |SRTT - R'| * SRTT <- (1 - RTO.Alpha) * SRTT + RTO.Alpha * R' diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index b6536b7f14c0..1c6c640607c5 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -486,10 +486,9 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_ulpq *ul cevent = sctp_skb2event(pd_first); pd_point = sctp_sk(asoc->base.sk)->pd_point; if (pd_point && pd_point <= pd_len) { - retval = sctp_make_reassembled_event(sock_net(asoc->base.sk), + retval = sctp_make_reassembled_event(asoc->base.net, &ulpq->reasm, - pd_first, - pd_last); + pd_first, pd_last); if (retval) sctp_ulpq_set_pd(ulpq); } @@ -497,7 +496,7 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_ulpq *ul done: return retval; found: - retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk), + retval = sctp_make_reassembled_event(ulpq->asoc->base.net, &ulpq->reasm, first_frag, pos); if (retval) retval->msg_flags |= MSG_EOR; @@ -563,8 +562,8 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_partial(struct sctp_ulpq *ulpq) * further. */ done: - retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk), - &ulpq->reasm, first_frag, last_frag); + retval = sctp_make_reassembled_event(ulpq->asoc->base.net, &ulpq->reasm, + first_frag, last_frag); if (retval && is_last) retval->msg_flags |= MSG_EOR; @@ -664,8 +663,8 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_first(struct sctp_ulpq *ulpq) * further. */ done: - retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk), - &ulpq->reasm, first_frag, last_frag); + retval = sctp_make_reassembled_event(ulpq->asoc->base.net, &ulpq->reasm, + first_frag, last_frag); return retval; } diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index bb92c7c6214c..c16b8d3e200b 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -41,7 +41,7 @@ static struct smc_lgr_list smc_lgr_list = { /* established link groups */ .num = 0, }; -static atomic_t lgr_cnt; /* number of existing link groups */ +static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */ static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted); static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, @@ -1297,7 +1297,6 @@ static struct notifier_block smc_reboot_notifier = { int __init smc_core_init(void) { - atomic_set(&lgr_cnt, 0); return register_reboot_notifier(&smc_reboot_notifier); } diff --git a/net/socket.c b/net/socket.c index 4d38d49d6ad9..532f123e0459 100644 --- a/net/socket.c +++ b/net/socket.c @@ -128,6 +128,7 @@ static ssize_t sock_sendpage(struct file *file, struct page *page, static ssize_t sock_splice_read(struct file *file, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); +static void sock_show_fdinfo(struct seq_file *m, struct file *f); /* * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear @@ -150,6 +151,9 @@ static const struct file_operations socket_file_ops = { .sendpage = sock_sendpage, .splice_write = generic_splice_sendpage, .splice_read = sock_splice_read, +#ifdef CONFIG_PROC_FS + .show_fdinfo = sock_show_fdinfo, +#endif }; /* @@ -993,6 +997,14 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from) return res; } +static void sock_show_fdinfo(struct seq_file *m, struct file *f) +{ + struct socket *sock = f->private_data; + + if (sock->ops->show_fdinfo) + sock->ops->show_fdinfo(m, sock); +} + /* * Atomic setting of ioctl hooks to avoid race * with module unload. diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 55aeba681cf4..42e01e9cf893 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -562,18 +562,18 @@ int tipc_bclink_reset_stats(struct net *net) return 0; } -static int tipc_bc_link_set_queue_limits(struct net *net, u32 limit) +static int tipc_bc_link_set_queue_limits(struct net *net, u32 max_win) { struct tipc_link *l = tipc_bc_sndlink(net); if (!l) return -ENOPROTOOPT; - if (limit < BCLINK_WIN_MIN) - limit = BCLINK_WIN_MIN; - if (limit > TIPC_MAX_LINK_WIN) + if (max_win < BCLINK_WIN_MIN) + max_win = BCLINK_WIN_MIN; + if (max_win > TIPC_MAX_LINK_WIN) return -EINVAL; tipc_bcast_lock(net); - tipc_link_set_queue_limits(l, limit); + tipc_link_set_queue_limits(l, BCLINK_WIN_MIN, max_win); tipc_bcast_unlock(net); return 0; } @@ -683,6 +683,7 @@ int tipc_bcast_init(struct net *net) if (!tipc_link_bc_create(net, 0, 0, FB_MTU, BCLINK_WIN_DEFAULT, + BCLINK_WIN_DEFAULT, 0, &bb->inputq, NULL, diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index d7ec26bd739d..34ca7b789eba 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -311,7 +311,8 @@ static int tipc_enable_bearer(struct net *net, const char *name, b->identity = bearer_id; b->tolerance = m->tolerance; - b->window = m->window; + b->min_win = m->min_win; + b->max_win = m->max_win; b->domain = disc_domain; b->net_plane = bearer_id + 'A'; b->priority = prio; @@ -796,7 +797,7 @@ static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg, goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, bearer->tolerance)) goto prop_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bearer->window)) + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bearer->max_win)) goto prop_msg_full; if (bearer->media->type_id == TIPC_MEDIA_TYPE_UDP) if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, bearer->mtu)) @@ -1088,7 +1089,7 @@ int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) if (props[TIPC_NLA_PROP_PRIO]) b->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); if (props[TIPC_NLA_PROP_WIN]) - b->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + b->max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); if (props[TIPC_NLA_PROP_MTU]) { if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) return -EINVAL; @@ -1142,7 +1143,7 @@ static int __tipc_nl_add_media(struct tipc_nl_msg *msg, goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, media->tolerance)) goto prop_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, media->window)) + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, media->max_win)) goto prop_msg_full; if (media->type_id == TIPC_MEDIA_TYPE_UDP) if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, media->mtu)) @@ -1275,7 +1276,7 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) if (props[TIPC_NLA_PROP_PRIO]) m->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); if (props[TIPC_NLA_PROP_WIN]) - m->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + m->max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); if (props[TIPC_NLA_PROP_MTU]) { if (m->type_id != TIPC_MEDIA_TYPE_UDP) return -EINVAL; diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index d0c79cc6c0c2..bc0023119da2 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -119,7 +119,8 @@ struct tipc_media { char *raw); u32 priority; u32 tolerance; - u32 window; + u32 min_win; + u32 max_win; u32 mtu; u32 type_id; u32 hwaddr_len; @@ -158,7 +159,8 @@ struct tipc_bearer { struct packet_type pt; struct rcu_head rcu; u32 priority; - u32 window; + u32 min_win; + u32 max_win; u32 tolerance; u32 domain; u32 identity; diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index f69a2fde9f4a..8b0bb600602d 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -92,7 +92,8 @@ struct tipc_media eth_media_info = { .raw2addr = tipc_eth_raw2addr, .priority = TIPC_DEF_LINK_PRI, .tolerance = TIPC_DEF_LINK_TOL, - .window = TIPC_DEF_LINK_WIN, + .min_win = TIPC_DEF_LINK_WIN, + .max_win = TIPC_MAX_LINK_WIN, .type_id = TIPC_MEDIA_TYPE_ETH, .hwaddr_len = ETH_ALEN, .name = "eth" diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c index e8c16718e3fa..7aa9ff88458d 100644 --- a/net/tipc/ib_media.c +++ b/net/tipc/ib_media.c @@ -42,6 +42,8 @@ #include "core.h" #include "bearer.h" +#define TIPC_MAX_IB_LINK_WIN 500 + /* convert InfiniBand address (media address format) media address to string */ static int tipc_ib_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size) @@ -94,7 +96,8 @@ struct tipc_media ib_media_info = { .raw2addr = tipc_ib_raw2addr, .priority = TIPC_DEF_LINK_PRI, .tolerance = TIPC_DEF_LINK_TOL, - .window = TIPC_DEF_LINK_WIN, + .min_win = TIPC_DEF_LINK_WIN, + .max_win = TIPC_MAX_IB_LINK_WIN, .type_id = TIPC_MEDIA_TYPE_IB, .hwaddr_len = INFINIBAND_ALEN, .name = "ib" diff --git a/net/tipc/link.c b/net/tipc/link.c index 24d4d10756d3..467c53a1fb5c 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -164,7 +164,6 @@ struct tipc_link { struct sk_buff *target_bskb; } backlog[5]; u16 snd_nxt; - u16 window; /* Reception */ u16 rcv_nxt; @@ -175,6 +174,12 @@ struct tipc_link { /* Congestion handling */ struct sk_buff_head wakeupq; + u16 window; + u16 min_win; + u16 ssthresh; + u16 max_win; + u16 cong_acks; + u16 checkpoint; /* Fragmentation/reassembly */ struct sk_buff *reasm_buf; @@ -244,12 +249,13 @@ static int tipc_link_build_nack_msg(struct tipc_link *l, struct sk_buff_head *xmitq); static void tipc_link_build_bc_init_msg(struct tipc_link *l, struct sk_buff_head *xmitq); -static bool tipc_link_release_pkts(struct tipc_link *l, u16 to); -static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data); +static int tipc_link_release_pkts(struct tipc_link *l, u16 to); +static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data, u16 gap); static int tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap, struct tipc_gap_ack_blks *ga, struct sk_buff_head *xmitq); - +static void tipc_link_update_cwin(struct tipc_link *l, int released, + bool retransmitted); /* * Simple non-static link routines (i.e. referenced outside this file) */ @@ -308,9 +314,14 @@ u32 tipc_link_id(struct tipc_link *l) return l->peer_bearer_id << 16 | l->bearer_id; } -int tipc_link_window(struct tipc_link *l) +int tipc_link_min_win(struct tipc_link *l) { - return l->window; + return l->min_win; +} + +int tipc_link_max_win(struct tipc_link *l) +{ + return l->max_win; } int tipc_link_prio(struct tipc_link *l) @@ -436,7 +447,8 @@ u32 tipc_link_state(struct tipc_link *l) * @net_plane: network plane (A,B,c..) this link belongs to * @mtu: mtu to be advertised by link * @priority: priority to be used by link - * @window: send window to be used by link + * @min_win: minimal send window to be used by link + * @max_win: maximal send window to be used by link * @session: session to be used by link * @ownnode: identity of own node * @peer: node id of peer node @@ -451,7 +463,7 @@ u32 tipc_link_state(struct tipc_link *l) */ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, int tolerance, char net_plane, u32 mtu, int priority, - int window, u32 session, u32 self, + u32 min_win, u32 max_win, u32 session, u32 self, u32 peer, u8 *peer_id, u16 peer_caps, struct tipc_link *bc_sndlink, struct tipc_link *bc_rcvlink, @@ -495,7 +507,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, l->advertised_mtu = mtu; l->mtu = mtu; l->priority = priority; - tipc_link_set_queue_limits(l, window); + tipc_link_set_queue_limits(l, min_win, max_win); l->ackers = 1; l->bc_sndlink = bc_sndlink; l->bc_rcvlink = bc_rcvlink; @@ -523,7 +535,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, * Returns true if link was created, otherwise false */ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, - int mtu, int window, u16 peer_caps, + int mtu, u32 min_win, u32 max_win, u16 peer_caps, struct sk_buff_head *inputq, struct sk_buff_head *namedq, struct tipc_link *bc_sndlink, @@ -531,9 +543,9 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, { struct tipc_link *l; - if (!tipc_link_create(net, "", MAX_BEARERS, 0, 'Z', mtu, 0, window, - 0, ownnode, peer, NULL, peer_caps, bc_sndlink, - NULL, inputq, namedq, link)) + if (!tipc_link_create(net, "", MAX_BEARERS, 0, 'Z', mtu, 0, min_win, + max_win, 0, ownnode, peer, NULL, peer_caps, + bc_sndlink, NULL, inputq, namedq, link)) return false; l = *link; @@ -772,6 +784,8 @@ bool tipc_link_too_silent(struct tipc_link *l) return (l->silent_intv_cnt + 2 > l->abort_limit); } +static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r, + u16 from, u16 to, struct sk_buff_head *xmitq); /* tipc_link_timeout - perform periodic task as instructed from node timeout */ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) @@ -804,6 +818,11 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) probe |= l->silent_intv_cnt; if (probe || mstate->monitoring) l->silent_intv_cnt++; + if (l->snd_nxt == l->checkpoint) { + tipc_link_update_cwin(l, 0, 0); + probe = true; + } + l->checkpoint = l->snd_nxt; break; case LINK_RESET: setup = l->rst_cnt++ <= 4; @@ -959,7 +978,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, int pkt_cnt = skb_queue_len(list); int imp = msg_importance(hdr); unsigned int mss = tipc_link_mss(l); - unsigned int maxwin = l->window; + unsigned int cwin = l->window; unsigned int mtu = l->mtu; bool new_bundle; int rc = 0; @@ -988,7 +1007,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, /* Prepare each packet for sending, and add to relevant queue: */ while ((skb = __skb_dequeue(list))) { - if (likely(skb_queue_len(transmq) < maxwin)) { + if (likely(skb_queue_len(transmq) < cwin)) { hdr = buf_msg(skb); msg_set_seqno(hdr, seqno); msg_set_ack(hdr, ack); @@ -1035,17 +1054,61 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, return rc; } +static void tipc_link_update_cwin(struct tipc_link *l, int released, + bool retransmitted) +{ + int bklog_len = skb_queue_len(&l->backlogq); + struct sk_buff_head *txq = &l->transmq; + int txq_len = skb_queue_len(txq); + u16 cwin = l->window; + + /* Enter fast recovery */ + if (unlikely(retransmitted)) { + l->ssthresh = max_t(u16, l->window / 2, 300); + l->window = l->ssthresh; + return; + } + /* Enter slow start */ + if (unlikely(!released)) { + l->ssthresh = max_t(u16, l->window / 2, 300); + l->window = l->min_win; + return; + } + /* Don't increase window if no pressure on the transmit queue */ + if (txq_len + bklog_len < cwin) + return; + + /* Don't increase window if there are holes the transmit queue */ + if (txq_len && l->snd_nxt - buf_seqno(skb_peek(txq)) != txq_len) + return; + + l->cong_acks += released; + + /* Slow start */ + if (cwin <= l->ssthresh) { + l->window = min_t(u16, cwin + released, l->max_win); + return; + } + /* Congestion avoidance */ + if (l->cong_acks < cwin) + return; + l->window = min_t(u16, ++cwin, l->max_win); + l->cong_acks = 0; +} + static void tipc_link_advance_backlog(struct tipc_link *l, struct sk_buff_head *xmitq) { + u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; + struct sk_buff_head *txq = &l->transmq; struct sk_buff *skb, *_skb; - struct tipc_msg *hdr; - u16 seqno = l->snd_nxt; u16 ack = l->rcv_nxt - 1; - u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; + u16 seqno = l->snd_nxt; + struct tipc_msg *hdr; + u16 cwin = l->window; u32 imp; - while (skb_queue_len(&l->transmq) < l->window) { + while (skb_queue_len(txq) < cwin) { skb = skb_peek(&l->backlogq); if (!skb) break; @@ -1141,6 +1204,7 @@ static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r, struct sk_buff *_skb, *skb = skb_peek(&l->transmq); u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; u16 ack = l->rcv_nxt - 1; + int retransmitted = 0; struct tipc_msg *hdr; int rc = 0; @@ -1160,7 +1224,6 @@ static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r, continue; if (more(msg_seqno(hdr), to)) break; - if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr)) continue; TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM; @@ -1173,11 +1236,12 @@ static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r, _skb->priority = TC_PRIO_CONTROL; __skb_queue_tail(xmitq, _skb); l->stats.retransmitted++; - + retransmitted++; /* Increase actual retrans counter & mark first time */ if (!TIPC_SKB_CB(skb)->retr_cnt++) TIPC_SKB_CB(skb)->retr_stamp = jiffies; } + tipc_link_update_cwin(l, 0, retransmitted); return 0; } @@ -1338,9 +1402,9 @@ static int tipc_link_tnl_rcv(struct tipc_link *l, struct sk_buff *skb, return rc; } -static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked) +static int tipc_link_release_pkts(struct tipc_link *l, u16 acked) { - bool released = false; + int released = 0; struct sk_buff *skb, *tmp; skb_queue_walk_safe(&l->transmq, skb, tmp) { @@ -1348,7 +1412,7 @@ static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked) break; __skb_unlink(skb, &l->transmq); kfree_skb(skb); - released = true; + released++; } return released; } @@ -1359,14 +1423,14 @@ static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked) * * returns the actual allocated memory size */ -static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data) +static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data, u16 gap) { struct sk_buff *skb = skb_peek(&l->deferdq); struct tipc_gap_ack_blks *ga = data; u16 len, expect, seqno = 0; u8 n = 0; - if (!skb) + if (!skb || !gap) goto exit; expect = buf_seqno(skb); @@ -1417,8 +1481,10 @@ static int tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap, struct sk_buff *skb, *_skb, *tmp; struct tipc_msg *hdr; u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; + bool retransmitted = false; u16 ack = l->rcv_nxt - 1; bool passed = false; + u16 released = 0; u16 seqno, n = 0; int rc = 0; @@ -1430,6 +1496,7 @@ next_gap_ack: /* release skb */ __skb_unlink(skb, &l->transmq); kfree_skb(skb); + released++; } else if (less_eq(seqno, acked + gap)) { /* First, check if repeated retrans failures occurs? */ if (!passed && link_retransmit_failure(l, l, &rc)) @@ -1449,7 +1516,7 @@ next_gap_ack: _skb->priority = TC_PRIO_CONTROL; __skb_queue_tail(xmitq, _skb); l->stats.retransmitted++; - + retransmitted = true; /* Increase actual retrans counter & mark first time */ if (!TIPC_SKB_CB(skb)->retr_cnt++) TIPC_SKB_CB(skb)->retr_stamp = jiffies; @@ -1463,7 +1530,10 @@ next_gap_ack: goto next_gap_ack; } } - + if (released || retransmitted) + tipc_link_update_cwin(l, released, retransmitted); + if (released) + tipc_link_advance_backlog(l, xmitq); return 0; } @@ -1487,7 +1557,6 @@ int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq) l->snd_nxt = l->rcv_nxt; return TIPC_LINK_SND_STATE; } - /* Unicast ACK */ l->rcv_unacked = 0; l->stats.sent_acks++; @@ -1521,7 +1590,8 @@ static int tipc_link_build_nack_msg(struct tipc_link *l, struct sk_buff_head *xmitq) { u32 def_cnt = ++l->stats.deferred_recv; - u32 defq_len = skb_queue_len(&l->deferdq); + struct sk_buff_head *dfq = &l->deferdq; + u32 defq_len = skb_queue_len(dfq); int match1, match2; if (link_is_bc_rcvlink(l)) { @@ -1532,8 +1602,12 @@ static int tipc_link_build_nack_msg(struct tipc_link *l, return 0; } - if (defq_len >= 3 && !((defq_len - 3) % 16)) - tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq); + if (defq_len >= 3 && !((defq_len - 3) % 16)) { + u16 rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt; + + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, + rcvgap, 0, 0, xmitq); + } return 0; } @@ -1548,6 +1622,7 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *defq = &l->deferdq; struct tipc_msg *hdr = buf_msg(skb); u16 seqno, rcv_nxt, win_lim; + int released = 0; int rc = 0; /* Verify and update link state */ @@ -1566,21 +1641,17 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, if (unlikely(!link_is_up(l))) { if (l->state == LINK_ESTABLISHING) rc = TIPC_LINK_UP_EVT; - goto drop; + kfree_skb(skb); + break; } /* Drop if outside receive window */ if (unlikely(less(seqno, rcv_nxt) || more(seqno, win_lim))) { l->stats.duplicates++; - goto drop; - } - - /* Forward queues and wake up waiting users */ - if (likely(tipc_link_release_pkts(l, msg_ack(hdr)))) { - tipc_link_advance_backlog(l, xmitq); - if (unlikely(!skb_queue_empty(&l->wakeupq))) - link_prepare_wakeup(l); + kfree_skb(skb); + break; } + released += tipc_link_release_pkts(l, msg_ack(hdr)); /* Defer delivery if sequence gap */ if (unlikely(seqno != rcv_nxt)) { @@ -1603,9 +1674,13 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, break; } while ((skb = __tipc_skb_dequeue(defq, l->rcv_nxt))); - return rc; -drop: - kfree_skb(skb); + /* Forward queues and wake up waiting users */ + if (released) { + tipc_link_update_cwin(l, released, 0); + tipc_link_advance_backlog(l, xmitq); + if (unlikely(!skb_queue_empty(&l->wakeupq))) + link_prepare_wakeup(l); + } return rc; } @@ -1631,7 +1706,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, if (!tipc_link_is_up(l) && (mtyp == STATE_MSG)) return; - if (!skb_queue_empty(dfq)) + if ((probe || probe_reply) && !skb_queue_empty(dfq)) rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt; skb = tipc_msg_create(LINK_PROTOCOL, mtyp, INT_H_SIZE, @@ -1664,7 +1739,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, msg_set_probe(hdr, probe); msg_set_is_keepalive(hdr, probe || probe_reply); if (l->peer_caps & TIPC_GAP_ACK_BLOCK) - glen = tipc_build_gap_ack_blks(l, data); + glen = tipc_build_gap_ack_blks(l, data, rcvgap); tipc_mon_prep(l->net, data + glen, &dlen, mstate, l->bearer_id); msg_set_size(hdr, INT_H_SIZE + glen + dlen); skb_trim(skb, INT_H_SIZE + glen + dlen); @@ -2074,19 +2149,18 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, &l->mon_state, l->bearer_id); /* Send NACK if peer has sent pkts we haven't received yet */ - if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l)) + if ((reply || msg_is_keepalive(hdr)) && + more(peers_snd_nxt, rcv_nxt) && + !tipc_link_is_synching(l) && + skb_queue_empty(&l->deferdq)) rcvgap = peers_snd_nxt - l->rcv_nxt; if (rcvgap || reply) tipc_link_build_proto_msg(l, STATE_MSG, 0, reply, rcvgap, 0, 0, xmitq); rc |= tipc_link_advance_transmq(l, ack, gap, ga, xmitq); - - /* If NACK, retransmit will now start at right position */ if (gap) l->stats.recv_nacks++; - - tipc_link_advance_backlog(l, xmitq); if (unlikely(!skb_queue_empty(&l->wakeupq))) link_prepare_wakeup(l); } @@ -2305,15 +2379,18 @@ int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb, return 0; } -void tipc_link_set_queue_limits(struct tipc_link *l, u32 win) +void tipc_link_set_queue_limits(struct tipc_link *l, u32 min_win, u32 max_win) { int max_bulk = TIPC_MAX_PUBL / (l->mtu / ITEM_SIZE); - l->window = win; - l->backlog[TIPC_LOW_IMPORTANCE].limit = max_t(u16, 50, win); - l->backlog[TIPC_MEDIUM_IMPORTANCE].limit = max_t(u16, 100, win * 2); - l->backlog[TIPC_HIGH_IMPORTANCE].limit = max_t(u16, 150, win * 3); - l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = max_t(u16, 200, win * 4); + l->min_win = min_win; + l->ssthresh = max_win; + l->max_win = max_win; + l->window = min_win; + l->backlog[TIPC_LOW_IMPORTANCE].limit = min_win * 2; + l->backlog[TIPC_MEDIUM_IMPORTANCE].limit = min_win * 4; + l->backlog[TIPC_HIGH_IMPORTANCE].limit = min_win * 6; + l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = min_win * 8; l->backlog[TIPC_SYSTEM_IMPORTANCE].limit = max_bulk; } @@ -2366,10 +2443,10 @@ int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]) } if (props[TIPC_NLA_PROP_WIN]) { - u32 win; + u32 max_win; - win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); - if ((win < TIPC_MIN_LINK_WIN) || (win > TIPC_MAX_LINK_WIN)) + max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + if (max_win < TIPC_DEF_LINK_WIN || max_win > TIPC_MAX_LINK_WIN) return -EINVAL; } @@ -2605,7 +2682,7 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK_PROP); if (!prop) goto attr_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->window)) + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->max_win)) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_BROADCAST, bc_mode)) goto prop_msg_full; diff --git a/net/tipc/link.h b/net/tipc/link.h index c09e9d49d0a3..d3c1c3fc1659 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -73,7 +73,7 @@ enum { bool tipc_link_create(struct net *net, char *if_name, int bearer_id, int tolerance, char net_plane, u32 mtu, int priority, - int window, u32 session, u32 ownnode, + u32 min_win, u32 max_win, u32 session, u32 ownnode, u32 peer, u8 *peer_id, u16 peer_caps, struct tipc_link *bc_sndlink, struct tipc_link *bc_rcvlink, @@ -81,7 +81,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, struct sk_buff_head *namedq, struct tipc_link **link); bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, - int mtu, int window, u16 peer_caps, + int mtu, u32 min_win, u32 max_win, u16 peer_caps, struct sk_buff_head *inputq, struct sk_buff_head *namedq, struct tipc_link *bc_sndlink, @@ -115,7 +115,8 @@ char *tipc_link_name_ext(struct tipc_link *l, char *buf); u32 tipc_link_state(struct tipc_link *l); char tipc_link_plane(struct tipc_link *l); int tipc_link_prio(struct tipc_link *l); -int tipc_link_window(struct tipc_link *l); +int tipc_link_min_win(struct tipc_link *l); +int tipc_link_max_win(struct tipc_link *l); void tipc_link_update_caps(struct tipc_link *l, u16 capabilities); bool tipc_link_validate_msg(struct tipc_link *l, struct tipc_msg *hdr); unsigned long tipc_link_tolerance(struct tipc_link *l); @@ -124,7 +125,7 @@ void tipc_link_set_tolerance(struct tipc_link *l, u32 tol, void tipc_link_set_prio(struct tipc_link *l, u32 prio, struct sk_buff_head *xmitq); void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit); -void tipc_link_set_queue_limits(struct tipc_link *l, u32 window); +void tipc_link_set_queue_limits(struct tipc_link *l, u32 min_win, u32 max_win); int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, struct tipc_link *link, int nlflags); int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]); diff --git a/net/tipc/net.c b/net/tipc/net.c index 2de3cec9929d..85400e4242de 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -302,3 +302,59 @@ int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) return err; } + +static int __tipc_nl_addr_legacy_get(struct net *net, struct tipc_nl_msg *msg) +{ + struct tipc_net *tn = tipc_net(net); + struct nlattr *attrs; + void *hdr; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + 0, TIPC_NL_ADDR_LEGACY_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_NET); + if (!attrs) + goto msg_full; + + if (tn->legacy_addr_format) + if (nla_put_flag(msg->skb, TIPC_NLA_NET_ADDR_LEGACY)) + goto attr_msg_full; + + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +int tipc_nl_net_addr_legacy_get(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = sock_net(skb->sk); + struct tipc_nl_msg msg; + struct sk_buff *rep; + int err; + + rep = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!rep) + return -ENOMEM; + + msg.skb = rep; + msg.portid = info->snd_portid; + msg.seq = info->snd_seq; + + err = __tipc_nl_addr_legacy_get(net, &msg); + if (err) { + nlmsg_free(msg.skb); + return err; + } + + return genlmsg_reply(msg.skb, info); +} diff --git a/net/tipc/net.h b/net/tipc/net.h index b7f2e364eb99..6740d97c706e 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -47,5 +47,6 @@ void tipc_net_stop(struct net *net); int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info); int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_net_addr_legacy_get(struct sk_buff *skb, struct genl_info *info); #endif diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index e53231bd23b4..7c35094c20b8 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -83,6 +83,7 @@ const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = { [TIPC_NLA_NET_ADDR] = { .type = NLA_U32 }, [TIPC_NLA_NET_NODEID] = { .type = NLA_U64 }, [TIPC_NLA_NET_NODEID_W1] = { .type = NLA_U64 }, + [TIPC_NLA_NET_ADDR_LEGACY] = { .type = NLA_FLAG } }; const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = { @@ -273,6 +274,11 @@ static const struct genl_ops tipc_genl_v2_ops[] = { .doit = tipc_nl_node_flush_key, }, #endif + { + .cmd = TIPC_NL_ADDR_LEGACY_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = tipc_nl_net_addr_legacy_get, + }, }; struct genl_family tipc_genl_family __ro_after_init = { diff --git a/net/tipc/node.c b/net/tipc/node.c index ab04e00cb95b..99b28b69fc17 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1139,7 +1139,8 @@ void tipc_node_check_dest(struct net *net, u32 addr, snd_l = tipc_bc_sndlink(net); if (!tipc_link_bc_create(net, tipc_own_addr(net), addr, U16_MAX, - tipc_link_window(snd_l), + tipc_link_min_win(snd_l), + tipc_link_max_win(snd_l), n->capabilities, &n->bc_entry.inputq1, &n->bc_entry.namedq, snd_l, @@ -1233,7 +1234,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, get_random_bytes(&session, sizeof(u16)); if (!tipc_link_create(net, if_name, b->identity, b->tolerance, b->net_plane, b->mtu, b->priority, - b->window, session, + b->min_win, b->max_win, session, tipc_own_addr(net), addr, peer_id, n->capabilities, tipc_bc_sndlink(n->net), n->bc_entry.link, @@ -2360,8 +2361,7 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info) if (attrs[TIPC_NLA_LINK_PROP]) { struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; - err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP], - props); + err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP], props); if (err) { res = err; goto out; @@ -2380,10 +2380,12 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info) tipc_link_set_prio(link, prio, &xmitq); } if (props[TIPC_NLA_PROP_WIN]) { - u32 win; + u32 max_win; - win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); - tipc_link_set_queue_limits(link, win); + max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + tipc_link_set_queue_limits(link, + tipc_link_min_win(link), + max_win); } } diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index ed113735c019..d6620ad53546 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -828,7 +828,8 @@ struct tipc_media udp_media_info = { .msg2addr = tipc_udp_msg2addr, .priority = TIPC_DEF_LINK_PRI, .tolerance = TIPC_DEF_LINK_TOL, - .window = TIPC_DEF_LINK_WIN, + .min_win = TIPC_DEF_LINK_WIN, + .max_win = TIPC_DEF_LINK_WIN, .mtu = TIPC_DEF_LINK_UDP_MTU, .type_id = TIPC_MEDIA_TYPE_UDP, .hwaddr_len = 0, diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index cd91ad812291..1ba5a92832bb 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -178,7 +178,7 @@ static void tls_icsk_clean_acked(struct sock *sk, u32 acked_seq) * socket and no in-flight SKBs associated with this * socket, so it is safe to free all the resources. */ -static void tls_device_sk_destruct(struct sock *sk) +void tls_device_sk_destruct(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx); @@ -196,6 +196,7 @@ static void tls_device_sk_destruct(struct sock *sk) if (refcount_dec_and_test(&tls_ctx->refcount)) tls_device_queue_ctx_destruction(tls_ctx); } +EXPORT_SYMBOL_GPL(tls_device_sk_destruct); void tls_device_free_resources_tx(struct sock *sk) { @@ -903,7 +904,7 @@ static void tls_device_attach(struct tls_context *ctx, struct sock *sk, spin_unlock_irq(&tls_device_lock); ctx->sk_destruct = sk->sk_destruct; - sk->sk_destruct = tls_device_sk_destruct; + smp_store_release(&sk->sk_destruct, tls_device_sk_destruct); } } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 7cfdce10de36..050eed3b30dc 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -676,6 +676,16 @@ static int unix_set_peek_off(struct sock *sk, int val) return 0; } +static void unix_show_fdinfo(struct seq_file *m, struct socket *sock) +{ + struct sock *sk = sock->sk; + struct unix_sock *u; + + if (sk) { + u = unix_sk(sock->sk); + seq_printf(m, "scm_fds: %u\n", READ_ONCE(u->scm_stat.nr_fds)); + } +} static const struct proto_ops unix_stream_ops = { .family = PF_UNIX, @@ -701,6 +711,7 @@ static const struct proto_ops unix_stream_ops = { .sendpage = unix_stream_sendpage, .splice_read = unix_stream_splice_read, .set_peek_off = unix_set_peek_off, + .show_fdinfo = unix_show_fdinfo, }; static const struct proto_ops unix_dgram_ops = { @@ -726,6 +737,7 @@ static const struct proto_ops unix_dgram_ops = { .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, .set_peek_off = unix_set_peek_off, + .show_fdinfo = unix_show_fdinfo, }; static const struct proto_ops unix_seqpacket_ops = { @@ -751,6 +763,7 @@ static const struct proto_ops unix_seqpacket_ops = { .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, .set_peek_off = unix_set_peek_off, + .show_fdinfo = unix_show_fdinfo, }; static struct proto unix_proto = { @@ -788,6 +801,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern) mutex_init(&u->bindlock); /* single task binding lock */ init_waitqueue_head(&u->peer_wait); init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); + memset(&u->scm_stat, 0, sizeof(struct scm_stat)); unix_insert_socket(unix_sockets_unbound(sk), sk); out: if (sk == NULL) @@ -1572,6 +1586,28 @@ static bool unix_skb_scm_eq(struct sk_buff *skb, unix_secdata_eq(scm, skb); } +static void scm_stat_add(struct sock *sk, struct sk_buff *skb) +{ + struct scm_fp_list *fp = UNIXCB(skb).fp; + struct unix_sock *u = unix_sk(sk); + + lockdep_assert_held(&sk->sk_receive_queue.lock); + + if (unlikely(fp && fp->count)) + u->scm_stat.nr_fds += fp->count; +} + +static void scm_stat_del(struct sock *sk, struct sk_buff *skb) +{ + struct scm_fp_list *fp = UNIXCB(skb).fp; + struct unix_sock *u = unix_sk(sk); + + lockdep_assert_held(&sk->sk_receive_queue.lock); + + if (unlikely(fp && fp->count)) + u->scm_stat.nr_fds -= fp->count; +} + /* * Send AF_UNIX data. */ @@ -1757,7 +1793,10 @@ restart_locked: if (sock_flag(other, SOCK_RCVTSTAMP)) __net_timestamp(skb); maybe_add_creds(skb, sock, other); - skb_queue_tail(&other->sk_receive_queue, skb); + spin_lock(&other->sk_receive_queue.lock); + scm_stat_add(other, skb); + __skb_queue_tail(&other->sk_receive_queue, skb); + spin_unlock(&other->sk_receive_queue.lock); unix_state_unlock(other); other->sk_data_ready(other); sock_put(other); @@ -1859,7 +1898,10 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, goto pipe_err_free; maybe_add_creds(skb, sock, other); - skb_queue_tail(&other->sk_receive_queue, skb); + spin_lock(&other->sk_receive_queue.lock); + scm_stat_add(other, skb); + __skb_queue_tail(&other->sk_receive_queue, skb); + spin_unlock(&other->sk_receive_queue.lock); unix_state_unlock(other); other->sk_data_ready(other); sent += size; @@ -2058,8 +2100,8 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, mutex_lock(&u->iolock); skip = sk_peek_offset(sk, flags); - skb = __skb_try_recv_datagram(sk, flags, NULL, &skip, &err, - &last); + skb = __skb_try_recv_datagram(sk, flags, scm_stat_del, + &skip, &err, &last); if (skb) break; @@ -2353,8 +2395,12 @@ unlock: sk_peek_offset_bwd(sk, chunk); - if (UNIXCB(skb).fp) + if (UNIXCB(skb).fp) { + spin_lock(&sk->sk_receive_queue.lock); + scm_stat_del(sk, skb); + spin_unlock(&sk->sk_receive_queue.lock); unix_detach_fds(&scm, skb); + } if (unix_skb_len(skb)) break; diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig index 8abcb815af2d..56356d2980c8 100644 --- a/net/vmw_vsock/Kconfig +++ b/net/vmw_vsock/Kconfig @@ -26,6 +26,18 @@ config VSOCKETS_DIAG Enable this module so userspace applications can query open sockets. +config VSOCKETS_LOOPBACK + tristate "Virtual Sockets loopback transport" + depends on VSOCKETS + default y + select VIRTIO_VSOCKETS_COMMON + help + This module implements a loopback transport for Virtual Sockets, + using vmw_vsock_virtio_transport_common. + + To compile this driver as a module, choose M here: the module + will be called vsock_loopback. If unsure, say N. + config VMWARE_VMCI_VSOCKETS tristate "VMware VMCI transport for Virtual Sockets" depends on VSOCKETS && VMWARE_VMCI diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile index 7c6f9a0b67b0..6a943ec95c4a 100644 --- a/net/vmw_vsock/Makefile +++ b/net/vmw_vsock/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o obj-$(CONFIG_VIRTIO_VSOCKETS) += vmw_vsock_virtio_transport.o obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += vmw_vsock_virtio_transport_common.o obj-$(CONFIG_HYPERV_VSOCKETS) += hv_sock.o +obj-$(CONFIG_VSOCKETS_LOOPBACK) += vsock_loopback.o vsock-y += af_vsock.o af_vsock_tap.o vsock_addr.o diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 74db4cd637a7..9c5b2a91baad 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -136,6 +136,8 @@ static const struct vsock_transport *transport_h2g; static const struct vsock_transport *transport_g2h; /* Transport used for DGRAM communication */ static const struct vsock_transport *transport_dgram; +/* Transport used for local communication */ +static const struct vsock_transport *transport_local; static DEFINE_MUTEX(vsock_register_mutex); /**** UTILS ****/ @@ -386,6 +388,21 @@ void vsock_enqueue_accept(struct sock *listener, struct sock *connected) } EXPORT_SYMBOL_GPL(vsock_enqueue_accept); +static bool vsock_use_local_transport(unsigned int remote_cid) +{ + if (!transport_local) + return false; + + if (remote_cid == VMADDR_CID_LOCAL) + return true; + + if (transport_g2h) { + return remote_cid == transport_g2h->get_local_cid(); + } else { + return remote_cid == VMADDR_CID_HOST; + } +} + static void vsock_deassign_transport(struct vsock_sock *vsk) { if (!vsk->transport) @@ -402,9 +419,9 @@ static void vsock_deassign_transport(struct vsock_sock *vsk) * (e.g. during the connect() or when a connection request on a listener * socket is received). * The vsk->remote_addr is used to decide which transport to use: + * - remote CID == VMADDR_CID_LOCAL or g2h->local_cid or VMADDR_CID_HOST if + * g2h is not loaded, will use local transport; * - remote CID <= VMADDR_CID_HOST will use guest->host transport; - * - remote CID == local_cid (guest->host transport) will use guest->host - * transport for loopback (host->guest transports don't support loopback); * - remote CID > VMADDR_CID_HOST will use host->guest transport; */ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) @@ -419,9 +436,9 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) new_transport = transport_dgram; break; case SOCK_STREAM: - if (remote_cid <= VMADDR_CID_HOST || - (transport_g2h && - remote_cid == transport_g2h->get_local_cid())) + if (vsock_use_local_transport(remote_cid)) + new_transport = transport_local; + else if (remote_cid <= VMADDR_CID_HOST) new_transport = transport_g2h; else new_transport = transport_h2g; @@ -464,6 +481,9 @@ bool vsock_find_cid(unsigned int cid) if (transport_h2g && cid == VMADDR_CID_HOST) return true; + if (transport_local && cid == VMADDR_CID_LOCAL) + return true; + return false; } EXPORT_SYMBOL_GPL(vsock_find_cid); @@ -2137,7 +2157,7 @@ EXPORT_SYMBOL_GPL(vsock_core_get_transport); int vsock_core_register(const struct vsock_transport *t, int features) { - const struct vsock_transport *t_h2g, *t_g2h, *t_dgram; + const struct vsock_transport *t_h2g, *t_g2h, *t_dgram, *t_local; int err = mutex_lock_interruptible(&vsock_register_mutex); if (err) @@ -2146,6 +2166,7 @@ int vsock_core_register(const struct vsock_transport *t, int features) t_h2g = transport_h2g; t_g2h = transport_g2h; t_dgram = transport_dgram; + t_local = transport_local; if (features & VSOCK_TRANSPORT_F_H2G) { if (t_h2g) { @@ -2171,9 +2192,18 @@ int vsock_core_register(const struct vsock_transport *t, int features) t_dgram = t; } + if (features & VSOCK_TRANSPORT_F_LOCAL) { + if (t_local) { + err = -EBUSY; + goto err_busy; + } + t_local = t; + } + transport_h2g = t_h2g; transport_g2h = t_g2h; transport_dgram = t_dgram; + transport_local = t_local; err_busy: mutex_unlock(&vsock_register_mutex); @@ -2194,6 +2224,9 @@ void vsock_core_unregister(const struct vsock_transport *t) if (transport_dgram == t) transport_dgram = NULL; + if (transport_local == t) + transport_local = NULL; + mutex_unlock(&vsock_register_mutex); } EXPORT_SYMBOL_GPL(vsock_core_unregister); diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 1458c5c8b64d..dfbaf6bd8b1c 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -44,10 +44,6 @@ struct virtio_vsock { spinlock_t send_pkt_list_lock; struct list_head send_pkt_list; - struct work_struct loopback_work; - spinlock_t loopback_list_lock; /* protects loopback_list */ - struct list_head loopback_list; - atomic_t queued_replies; /* The following fields are protected by rx_lock. vqs[VSOCK_VQ_RX] @@ -86,20 +82,6 @@ out_rcu: return ret; } -static int virtio_transport_send_pkt_loopback(struct virtio_vsock *vsock, - struct virtio_vsock_pkt *pkt) -{ - int len = pkt->len; - - spin_lock_bh(&vsock->loopback_list_lock); - list_add_tail(&pkt->list, &vsock->loopback_list); - spin_unlock_bh(&vsock->loopback_list_lock); - - queue_work(virtio_vsock_workqueue, &vsock->loopback_work); - - return len; -} - static void virtio_transport_send_pkt_work(struct work_struct *work) { @@ -194,7 +176,8 @@ virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt) } if (le64_to_cpu(pkt->hdr.dst_cid) == vsock->guest_cid) { - len = virtio_transport_send_pkt_loopback(vsock, pkt); + virtio_transport_free_pkt(pkt); + len = -ENODEV; goto out_rcu; } @@ -502,33 +485,6 @@ static struct virtio_transport virtio_transport = { .send_pkt = virtio_transport_send_pkt, }; -static void virtio_transport_loopback_work(struct work_struct *work) -{ - struct virtio_vsock *vsock = - container_of(work, struct virtio_vsock, loopback_work); - LIST_HEAD(pkts); - - spin_lock_bh(&vsock->loopback_list_lock); - list_splice_init(&vsock->loopback_list, &pkts); - spin_unlock_bh(&vsock->loopback_list_lock); - - mutex_lock(&vsock->rx_lock); - - if (!vsock->rx_run) - goto out; - - while (!list_empty(&pkts)) { - struct virtio_vsock_pkt *pkt; - - pkt = list_first_entry(&pkts, struct virtio_vsock_pkt, list); - list_del_init(&pkt->list); - - virtio_transport_recv_pkt(&virtio_transport, pkt); - } -out: - mutex_unlock(&vsock->rx_lock); -} - static void virtio_transport_rx_work(struct work_struct *work) { struct virtio_vsock *vsock = @@ -633,13 +589,10 @@ static int virtio_vsock_probe(struct virtio_device *vdev) mutex_init(&vsock->event_lock); spin_lock_init(&vsock->send_pkt_list_lock); INIT_LIST_HEAD(&vsock->send_pkt_list); - spin_lock_init(&vsock->loopback_list_lock); - INIT_LIST_HEAD(&vsock->loopback_list); INIT_WORK(&vsock->rx_work, virtio_transport_rx_work); INIT_WORK(&vsock->tx_work, virtio_transport_tx_work); INIT_WORK(&vsock->event_work, virtio_transport_event_work); INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work); - INIT_WORK(&vsock->loopback_work, virtio_transport_loopback_work); mutex_lock(&vsock->tx_lock); vsock->tx_run = true; @@ -720,22 +673,12 @@ static void virtio_vsock_remove(struct virtio_device *vdev) } spin_unlock_bh(&vsock->send_pkt_list_lock); - spin_lock_bh(&vsock->loopback_list_lock); - while (!list_empty(&vsock->loopback_list)) { - pkt = list_first_entry(&vsock->loopback_list, - struct virtio_vsock_pkt, list); - list_del(&pkt->list); - virtio_transport_free_pkt(pkt); - } - spin_unlock_bh(&vsock->loopback_list_lock); - /* Delete virtqueues and flush outstanding callbacks if any */ vdev->config->del_vqs(vdev); /* Other works can be queued before 'config->del_vqs()', so we flush * all works before to free the vsock object to avoid use after free. */ - flush_work(&vsock->loopback_work); flush_work(&vsock->rx_work); flush_work(&vsock->tx_work); flush_work(&vsock->event_work); diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index e5ea29c6bca7..0e20b0f6eb65 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -11,9 +11,6 @@ #include <linux/sched/signal.h> #include <linux/ctype.h> #include <linux/list.h> -#include <linux/virtio.h> -#include <linux/virtio_ids.h> -#include <linux/virtio_config.h> #include <linux/virtio_vsock.h> #include <uapi/linux/vsockmon.h> diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 644d32e43d23..4b8b1150a738 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -648,7 +648,7 @@ static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg) static bool vmci_transport_stream_allow(u32 cid, u32 port) { static const u32 non_socket_contexts[] = { - VMADDR_CID_RESERVED, + VMADDR_CID_LOCAL, }; int i; diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c new file mode 100644 index 000000000000..a45f7ffca8c5 --- /dev/null +++ b/net/vmw_vsock/vsock_loopback.c @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* loopback transport for vsock using virtio_transport_common APIs + * + * Copyright (C) 2013-2019 Red Hat, Inc. + * Authors: Asias He <asias@redhat.com> + * Stefan Hajnoczi <stefanha@redhat.com> + * Stefano Garzarella <sgarzare@redhat.com> + * + */ +#include <linux/spinlock.h> +#include <linux/module.h> +#include <linux/list.h> +#include <linux/virtio_vsock.h> + +struct vsock_loopback { + struct workqueue_struct *workqueue; + + spinlock_t pkt_list_lock; /* protects pkt_list */ + struct list_head pkt_list; + struct work_struct pkt_work; +}; + +static struct vsock_loopback the_vsock_loopback; + +static u32 vsock_loopback_get_local_cid(void) +{ + return VMADDR_CID_LOCAL; +} + +static int vsock_loopback_send_pkt(struct virtio_vsock_pkt *pkt) +{ + struct vsock_loopback *vsock = &the_vsock_loopback; + int len = pkt->len; + + spin_lock_bh(&vsock->pkt_list_lock); + list_add_tail(&pkt->list, &vsock->pkt_list); + spin_unlock_bh(&vsock->pkt_list_lock); + + queue_work(vsock->workqueue, &vsock->pkt_work); + + return len; +} + +static int vsock_loopback_cancel_pkt(struct vsock_sock *vsk) +{ + struct vsock_loopback *vsock = &the_vsock_loopback; + struct virtio_vsock_pkt *pkt, *n; + LIST_HEAD(freeme); + + spin_lock_bh(&vsock->pkt_list_lock); + list_for_each_entry_safe(pkt, n, &vsock->pkt_list, list) { + if (pkt->vsk != vsk) + continue; + list_move(&pkt->list, &freeme); + } + spin_unlock_bh(&vsock->pkt_list_lock); + + list_for_each_entry_safe(pkt, n, &freeme, list) { + list_del(&pkt->list); + virtio_transport_free_pkt(pkt); + } + + return 0; +} + +static struct virtio_transport loopback_transport = { + .transport = { + .module = THIS_MODULE, + + .get_local_cid = vsock_loopback_get_local_cid, + + .init = virtio_transport_do_socket_init, + .destruct = virtio_transport_destruct, + .release = virtio_transport_release, + .connect = virtio_transport_connect, + .shutdown = virtio_transport_shutdown, + .cancel_pkt = vsock_loopback_cancel_pkt, + + .dgram_bind = virtio_transport_dgram_bind, + .dgram_dequeue = virtio_transport_dgram_dequeue, + .dgram_enqueue = virtio_transport_dgram_enqueue, + .dgram_allow = virtio_transport_dgram_allow, + + .stream_dequeue = virtio_transport_stream_dequeue, + .stream_enqueue = virtio_transport_stream_enqueue, + .stream_has_data = virtio_transport_stream_has_data, + .stream_has_space = virtio_transport_stream_has_space, + .stream_rcvhiwat = virtio_transport_stream_rcvhiwat, + .stream_is_active = virtio_transport_stream_is_active, + .stream_allow = virtio_transport_stream_allow, + + .notify_poll_in = virtio_transport_notify_poll_in, + .notify_poll_out = virtio_transport_notify_poll_out, + .notify_recv_init = virtio_transport_notify_recv_init, + .notify_recv_pre_block = virtio_transport_notify_recv_pre_block, + .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue, + .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue, + .notify_send_init = virtio_transport_notify_send_init, + .notify_send_pre_block = virtio_transport_notify_send_pre_block, + .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, + .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, + .notify_buffer_size = virtio_transport_notify_buffer_size, + }, + + .send_pkt = vsock_loopback_send_pkt, +}; + +static void vsock_loopback_work(struct work_struct *work) +{ + struct vsock_loopback *vsock = + container_of(work, struct vsock_loopback, pkt_work); + LIST_HEAD(pkts); + + spin_lock_bh(&vsock->pkt_list_lock); + list_splice_init(&vsock->pkt_list, &pkts); + spin_unlock_bh(&vsock->pkt_list_lock); + + while (!list_empty(&pkts)) { + struct virtio_vsock_pkt *pkt; + + pkt = list_first_entry(&pkts, struct virtio_vsock_pkt, list); + list_del_init(&pkt->list); + + virtio_transport_deliver_tap_pkt(pkt); + virtio_transport_recv_pkt(&loopback_transport, pkt); + } +} + +static int __init vsock_loopback_init(void) +{ + struct vsock_loopback *vsock = &the_vsock_loopback; + int ret; + + vsock->workqueue = alloc_workqueue("vsock-loopback", 0, 0); + if (!vsock->workqueue) + return -ENOMEM; + + spin_lock_init(&vsock->pkt_list_lock); + INIT_LIST_HEAD(&vsock->pkt_list); + INIT_WORK(&vsock->pkt_work, vsock_loopback_work); + + ret = vsock_core_register(&loopback_transport.transport, + VSOCK_TRANSPORT_F_LOCAL); + if (ret) + goto out_wq; + + return 0; + +out_wq: + destroy_workqueue(vsock->workqueue); + return ret; +} + +static void __exit vsock_loopback_exit(void) +{ + struct vsock_loopback *vsock = &the_vsock_loopback; + struct virtio_vsock_pkt *pkt; + + vsock_core_unregister(&loopback_transport.transport); + + flush_work(&vsock->pkt_work); + + spin_lock_bh(&vsock->pkt_list_lock); + while (!list_empty(&vsock->pkt_list)) { + pkt = list_first_entry(&vsock->pkt_list, + struct virtio_vsock_pkt, list); + list_del(&pkt->list); + virtio_transport_free_pkt(pkt); + } + spin_unlock_bh(&vsock->pkt_list_lock); + + destroy_workqueue(vsock->workqueue); +} + +module_init(vsock_loopback_init); +module_exit(vsock_loopback_exit); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Stefano Garzarella <sgarzare@redhat.com>"); +MODULE_DESCRIPTION("loopback transport for vsock"); +MODULE_ALIAS_NETPROTO(PF_VSOCK); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index da5262b2298b..fa3526592c51 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -12900,8 +12900,7 @@ static int nl80211_vendor_check_policy(const struct wiphy_vendor_command *vcmd, return -EINVAL; } - return nl80211_validate_nested(attr, vcmd->maxattr, vcmd->policy, - extack); + return nla_validate_nested(attr, vcmd->maxattr, vcmd->policy, extack); } static int nl80211_vendor_cmd(struct sk_buff *skb, struct genl_info *info) diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh index e80be65799ad..a5937069ac16 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh @@ -24,24 +24,6 @@ rate() echo $((8 * (t1 - t0) / interval)) } -start_traffic() -{ - local h_in=$1; shift # Where the traffic egresses the host - local sip=$1; shift - local dip=$1; shift - local dmac=$1; shift - - $MZ $h_in -p 8000 -A $sip -B $dip -c 0 \ - -a own -b $dmac -t udp -q & - sleep 1 -} - -stop_traffic() -{ - # Suppress noise from killing mausezahn. - { kill %% && wait %%; } 2>/dev/null -} - check_rate() { local rate=$1; shift @@ -96,3 +78,31 @@ measure_rate() echo $ir $er return $ret } + +bail_on_lldpad() +{ + if systemctl is-active --quiet lldpad; then + + cat >/dev/stderr <<-EOF + WARNING: lldpad is running + + lldpad will likely configure DCB, and this test will + configure Qdiscs. mlxsw does not support both at the + same time, one of them is arbitrarily going to overwrite + the other. That will cause spurious failures (or, + unlikely, passes) of this test. + EOF + + if [[ -z $ALLOW_LLDPAD ]]; then + cat >/dev/stderr <<-EOF + + If you want to run the test anyway, please set + an environment variable ALLOW_LLDPAD to a + non-empty string. + EOF + exit 1 + else + return + fi + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh new file mode 100755 index 000000000000..c9fc4d4885c1 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh @@ -0,0 +1,67 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# A driver for the ETS selftest that implements testing in offloaded datapath. +lib_dir=$(dirname $0)/../../../net/forwarding +source $lib_dir/sch_ets_core.sh +source $lib_dir/devlink_lib.sh +source qos_lib.sh + +ALL_TESTS=" + ping_ipv4 + priomap_mode + ets_test_strict + ets_test_mixed + ets_test_dwrr +" + +switch_create() +{ + ets_switch_create + + # Create a bottleneck so that the DWRR process can kick in. + ethtool -s $h2 speed 1000 autoneg off + ethtool -s $swp2 speed 1000 autoneg off + + # Set the ingress quota high and use the three egress TCs to limit the + # amount of traffic that is admitted to the shared buffers. This makes + # sure that there is always enough traffic of all types to select from + # for the DWRR process. + devlink_port_pool_th_set $swp1 0 12 + devlink_tc_bind_pool_th_set $swp1 0 ingress 0 12 + devlink_port_pool_th_set $swp2 4 12 + devlink_tc_bind_pool_th_set $swp2 7 egress 4 5 + devlink_tc_bind_pool_th_set $swp2 6 egress 4 5 + devlink_tc_bind_pool_th_set $swp2 5 egress 4 5 + + # Note: sch_ets_core.sh uses VLAN ingress-qos-map to assign packet + # priorities at $swp1 based on their 802.1p headers. ingress-qos-map is + # not offloaded by mlxsw as of this writing, but the mapping used is + # 1:1, which is the mapping currently hard-coded by the driver. +} + +switch_destroy() +{ + devlink_tc_bind_pool_th_restore $swp2 5 egress + devlink_tc_bind_pool_th_restore $swp2 6 egress + devlink_tc_bind_pool_th_restore $swp2 7 egress + devlink_port_pool_th_restore $swp2 4 + devlink_tc_bind_pool_th_restore $swp1 0 ingress + devlink_port_pool_th_restore $swp1 0 + + ethtool -s $swp2 autoneg on + ethtool -s $h2 autoneg on + + ets_switch_destroy +} + +# Callback from sch_ets_tests.sh +get_stats() +{ + local band=$1; shift + + ethtool_stats_get "$h2" rx_octets_prio_$band +} + +bail_on_lldpad +ets_run diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 1f64e7348f69..a0b09bb6995e 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -1065,3 +1065,21 @@ flood_test() flood_unicast_test $br_port $host1_if $host2_if flood_multicast_test $br_port $host1_if $host2_if } + +start_traffic() +{ + local h_in=$1; shift # Where the traffic egresses the host + local sip=$1; shift + local dip=$1; shift + local dmac=$1; shift + + $MZ $h_in -p 8000 -A $sip -B $dip -c 0 \ + -a own -b $dmac -t udp -q & + sleep 1 +} + +stop_traffic() +{ + # Suppress noise from killing mausezahn. + { kill %% && wait %%; } 2>/dev/null +} diff --git a/tools/testing/selftests/net/forwarding/sch_ets.sh b/tools/testing/selftests/net/forwarding/sch_ets.sh new file mode 100755 index 000000000000..40e0ad1bc4f2 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/sch_ets.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# A driver for the ETS selftest that implements testing in slowpath. +lib_dir=. +source sch_ets_core.sh + +ALL_TESTS=" + ping_ipv4 + priomap_mode + ets_test_strict + ets_test_mixed + ets_test_dwrr + classifier_mode + ets_test_strict + ets_test_mixed + ets_test_dwrr +" + +switch_create() +{ + ets_switch_create + + # Create a bottleneck so that the DWRR process can kick in. + tc qdisc add dev $swp2 root handle 1: tbf \ + rate 1Gbit burst 1Mbit latency 100ms + PARENT="parent 1:" +} + +switch_destroy() +{ + ets_switch_destroy + tc qdisc del dev $swp2 root +} + +# Callback from sch_ets_tests.sh +get_stats() +{ + local stream=$1; shift + + link_stats_get $h2.1$stream rx bytes +} + +ets_run diff --git a/tools/testing/selftests/net/forwarding/sch_ets_core.sh b/tools/testing/selftests/net/forwarding/sch_ets_core.sh new file mode 100644 index 000000000000..f906fcc66572 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/sch_ets_core.sh @@ -0,0 +1,300 @@ +# SPDX-License-Identifier: GPL-2.0 + +# This is a template for ETS Qdisc test. +# +# This test sends from H1 several traffic streams with 802.1p-tagged packets. +# The tags are used at $swp1 to prioritize the traffic. Each stream is then +# queued at a different ETS band according to the assigned priority. After +# runnig for a while, counters at H2 are consulted to determine whether the +# traffic scheduling was according to the ETS configuration. +# +# This template is supposed to be embedded by a test driver, which implements +# statistics collection, any HW-specific stuff, and prominently configures the +# system to assure that there is overcommitment at $swp2. That is necessary so +# that the ETS traffic selection algorithm kicks in and has to schedule some +# traffic at the expense of other. +# +# A driver for veth-based testing is in sch_ets.sh, an example of a driver for +# an offloaded data path is in selftests/drivers/net/mlxsw/sch_ets.sh. +# +# +---------------------------------------------------------------------+ +# | H1 | +# | + $h1.10 + $h1.11 + $h1.12 | +# | | 192.0.2.1/28 | 192.0.2.17/28 | 192.0.2.33/28 | +# | | egress-qos-map | egress-qos-map | egress-qos-map | +# | | 0:0 | 0:1 | 0:2 | +# | \____________________ | ____________________/ | +# | \|/ | +# | + $h1 | +# +---------------------------|-----------------------------------------+ +# | +# +---------------------------|-----------------------------------------+ +# | SW + $swp1 | +# | | >1Gbps | +# | ____________________/|\____________________ | +# | / | \ | +# | +--|----------------+ +--|----------------+ +--|----------------+ | +# | | + $swp1.10 | | + $swp1.11 | | + $swp1.12 | | +# | | ingress-qos-map| | ingress-qos-map| | ingress-qos-map| | +# | | 0:0 1:1 2:2 | | 0:0 1:1 2:2 | | 0:0 1:1 2:2 | | +# | | | | | | | | +# | | BR10 | | BR11 | | BR12 | | +# | | | | | | | | +# | | + $swp2.10 | | + $swp2.11 | | + $swp2.12 | | +# | +--|----------------+ +--|----------------+ +--|----------------+ | +# | \____________________ | ____________________/ | +# | \|/ | +# | + $swp2 | +# | | 1Gbps (ethtool or HTB qdisc) | +# | | qdisc ets quanta $W0 $W1 $W2 | +# | | priomap 0 1 2 | +# +---------------------------|-----------------------------------------+ +# | +# +---------------------------|-----------------------------------------+ +# | H2 + $h2 | +# | ____________________/|\____________________ | +# | / | \ | +# | + $h2.10 + $h2.11 + $h2.12 | +# | 192.0.2.2/28 192.0.2.18/28 192.0.2.34/28 | +# +---------------------------------------------------------------------+ + +NUM_NETIFS=4 +CHECK_TC=yes +source $lib_dir/lib.sh +source $lib_dir/sch_ets_tests.sh + +PARENT=root +QDISC_DEV= + +sip() +{ + echo 192.0.2.$((16 * $1 + 1)) +} + +dip() +{ + echo 192.0.2.$((16 * $1 + 2)) +} + +# Callback from sch_ets_tests.sh +ets_start_traffic() +{ + local dst_mac=$(mac_get $h2) + local i=$1; shift + + start_traffic $h1.1$i $(sip $i) $(dip $i) $dst_mac +} + +ETS_CHANGE_QDISC= + +priomap_mode() +{ + echo "Running in priomap mode" + ets_delete_qdisc + ETS_CHANGE_QDISC=ets_change_qdisc_priomap +} + +classifier_mode() +{ + echo "Running in classifier mode" + ets_delete_qdisc + ETS_CHANGE_QDISC=ets_change_qdisc_classifier +} + +ets_change_qdisc_priomap() +{ + local dev=$1; shift + local nstrict=$1; shift + local priomap=$1; shift + local quanta=("${@}") + + local op=$(if [[ -n $QDISC_DEV ]]; then echo change; else echo add; fi) + + tc qdisc $op dev $dev $PARENT handle 10: ets \ + $(if ((nstrict)); then echo strict $nstrict; fi) \ + $(if ((${#quanta[@]})); then echo quanta ${quanta[@]}; fi) \ + priomap $priomap + QDISC_DEV=$dev +} + +ets_change_qdisc_classifier() +{ + local dev=$1; shift + local nstrict=$1; shift + local priomap=$1; shift + local quanta=("${@}") + + local op=$(if [[ -n $QDISC_DEV ]]; then echo change; else echo add; fi) + + tc qdisc $op dev $dev $PARENT handle 10: ets \ + $(if ((nstrict)); then echo strict $nstrict; fi) \ + $(if ((${#quanta[@]})); then echo quanta ${quanta[@]}; fi) + + if [[ $op == add ]]; then + local prio=0 + local band + + for band in $priomap; do + tc filter add dev $dev parent 10: basic \ + match "meta(priority eq $prio)" \ + flowid 10:$((band + 1)) + ((prio++)) + done + fi + QDISC_DEV=$dev +} + +# Callback from sch_ets_tests.sh +ets_change_qdisc() +{ + if [[ -z "$ETS_CHANGE_QDISC" ]]; then + exit 1 + fi + $ETS_CHANGE_QDISC "$@" +} + +ets_delete_qdisc() +{ + if [[ -n $QDISC_DEV ]]; then + tc qdisc del dev $QDISC_DEV $PARENT + QDISC_DEV= + fi +} + +h1_create() +{ + local i; + + simple_if_init $h1 + mtu_set $h1 9900 + for i in {0..2}; do + vlan_create $h1 1$i v$h1 $(sip $i)/28 + ip link set dev $h1.1$i type vlan egress 0:$i + done +} + +h1_destroy() +{ + local i + + for i in {0..2}; do + vlan_destroy $h1 1$i + done + mtu_restore $h1 + simple_if_fini $h1 +} + +h2_create() +{ + local i + + simple_if_init $h2 + mtu_set $h2 9900 + for i in {0..2}; do + vlan_create $h2 1$i v$h2 $(dip $i)/28 + done +} + +h2_destroy() +{ + local i + + for i in {0..2}; do + vlan_destroy $h2 1$i + done + mtu_restore $h2 + simple_if_fini $h2 +} + +ets_switch_create() +{ + local i + + ip link set dev $swp1 up + mtu_set $swp1 9900 + + ip link set dev $swp2 up + mtu_set $swp2 9900 + + for i in {0..2}; do + vlan_create $swp1 1$i + ip link set dev $swp1.1$i type vlan ingress 0:0 1:1 2:2 + + vlan_create $swp2 1$i + + ip link add dev br1$i type bridge + ip link set dev $swp1.1$i master br1$i + ip link set dev $swp2.1$i master br1$i + + ip link set dev br1$i up + ip link set dev $swp1.1$i up + ip link set dev $swp2.1$i up + done +} + +ets_switch_destroy() +{ + local i + + ets_delete_qdisc + + for i in {0..2}; do + ip link del dev br1$i + vlan_destroy $swp2 1$i + vlan_destroy $swp1 1$i + done + + mtu_restore $swp2 + ip link set dev $swp2 down + + mtu_restore $swp1 + ip link set dev $swp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + put=$swp2 + hut=$h2 + + vrf_prepare + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1.10 $(dip 0) " vlan 10" + ping_test $h1.11 $(dip 1) " vlan 11" + ping_test $h1.12 $(dip 2) " vlan 12" +} + +ets_run() +{ + trap cleanup EXIT + + setup_prepare + setup_wait + + tests_run + + exit $EXIT_STATUS +} diff --git a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh new file mode 100644 index 000000000000..3c3b204d47e8 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh @@ -0,0 +1,227 @@ +# SPDX-License-Identifier: GPL-2.0 + +# Global interface: +# $put -- port under test (e.g. $swp2) +# get_stats($band) -- A function to collect stats for band +# ets_start_traffic($band) -- Start traffic for this band +# ets_change_qdisc($op, $dev, $nstrict, $quanta...) -- Add or change qdisc + +# WS describes the Qdisc configuration. It has one value per band (so the +# number of array elements indicates the number of bands). If the value is +# 0, it is a strict band, otherwise the it's a DRR band and the value is +# that band's quantum. +declare -a WS + +qdisc_describe() +{ + local nbands=${#WS[@]} + local nstrict=0 + local i + + for ((i = 0; i < nbands; i++)); do + if ((!${WS[$i]})); then + : $((nstrict++)) + fi + done + + echo -n "ets bands $nbands" + if ((nstrict)); then + echo -n " strict $nstrict" + fi + if ((nstrict < nbands)); then + echo -n " quanta" + for ((i = nstrict; i < nbands; i++)); do + echo -n " ${WS[$i]}" + done + fi +} + +__strict_eval() +{ + local desc=$1; shift + local d=$1; shift + local total=$1; shift + local above=$1; shift + + RET=0 + + if ((! total)); then + check_err 1 "No traffic observed" + log_test "$desc" + return + fi + + local ratio=$(echo "scale=2; 100 * $d / $total" | bc -l) + if ((above)); then + test $(echo "$ratio > 95.0" | bc -l) -eq 1 + check_err $? "Not enough traffic" + log_test "$desc" + log_info "Expected ratio >95% Measured ratio $ratio" + else + test $(echo "$ratio < 5" | bc -l) -eq 1 + check_err $? "Too much traffic" + log_test "$desc" + log_info "Expected ratio <5% Measured ratio $ratio" + fi +} + +strict_eval() +{ + __strict_eval "$@" 1 +} + +notraf_eval() +{ + __strict_eval "$@" 0 +} + +__ets_dwrr_test() +{ + local -a streams=("$@") + + local low_stream=${streams[0]} + local seen_strict=0 + local -a t0 t1 d + local stream + local total + local i + + echo "Testing $(qdisc_describe), streams ${streams[@]}" + + for stream in ${streams[@]}; do + ets_start_traffic $stream + done + + sleep 10 + + t0=($(for stream in ${streams[@]}; do + get_stats $stream + done)) + + sleep 10 + + t1=($(for stream in ${streams[@]}; do + get_stats $stream + done)) + d=($(for ((i = 0; i < ${#streams[@]}; i++)); do + echo $((${t1[$i]} - ${t0[$i]})) + done)) + total=$(echo ${d[@]} | sed 's/ /+/g' | bc) + + for ((i = 0; i < ${#streams[@]}; i++)); do + local stream=${streams[$i]} + if ((seen_strict)); then + notraf_eval "band $stream" ${d[$i]} $total + elif ((${WS[$stream]} == 0)); then + strict_eval "band $stream" ${d[$i]} $total + seen_strict=1 + elif ((stream == low_stream)); then + # Low stream is used as DWRR evaluation reference. + continue + else + multipath_eval "bands $low_stream:$stream" \ + ${WS[$low_stream]} ${WS[$stream]} \ + ${d[0]} ${d[$i]} + fi + done + + for stream in ${streams[@]}; do + stop_traffic + done +} + +ets_dwrr_test_012() +{ + __ets_dwrr_test 0 1 2 +} + +ets_dwrr_test_01() +{ + __ets_dwrr_test 0 1 +} + +ets_dwrr_test_12() +{ + __ets_dwrr_test 1 2 +} + +ets_qdisc_setup() +{ + local dev=$1; shift + local nstrict=$1; shift + local -a quanta=("$@") + + local ndwrr=${#quanta[@]} + local nbands=$((nstrict + ndwrr)) + local nstreams=$(if ((nbands > 3)); then echo 3; else echo $nbands; fi) + local priomap=$(seq 0 $((nstreams - 1))) + local i + + WS=($( + for ((i = 0; i < nstrict; i++)); do + echo 0 + done + for ((i = 0; i < ndwrr; i++)); do + echo ${quanta[$i]} + done + )) + + ets_change_qdisc $dev $nstrict "$priomap" ${quanta[@]} +} + +ets_set_dwrr_uniform() +{ + ets_qdisc_setup $put 0 3300 3300 3300 +} + +ets_set_dwrr_varying() +{ + ets_qdisc_setup $put 0 5000 3500 1500 +} + +ets_set_strict() +{ + ets_qdisc_setup $put 3 +} + +ets_set_mixed() +{ + ets_qdisc_setup $put 1 5000 2500 1500 +} + +ets_change_quantum() +{ + tc class change dev $put classid 10:2 ets quantum 8000 + WS[1]=8000 +} + +ets_set_dwrr_two_bands() +{ + ets_qdisc_setup $put 0 5000 2500 +} + +ets_test_strict() +{ + ets_set_strict + ets_dwrr_test_01 + ets_dwrr_test_12 +} + +ets_test_mixed() +{ + ets_set_mixed + ets_dwrr_test_01 + ets_dwrr_test_12 +} + +ets_test_dwrr() +{ + ets_set_dwrr_uniform + ets_dwrr_test_012 + ets_set_dwrr_varying + ets_dwrr_test_012 + ets_change_quantum + ets_dwrr_test_012 + ets_set_dwrr_two_bands + ets_dwrr_test_01 +} diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c index 34df4c8882af..383bac05ac32 100644 --- a/tools/testing/selftests/net/so_txtime.c +++ b/tools/testing/selftests/net/so_txtime.c @@ -12,7 +12,11 @@ #include <arpa/inet.h> #include <error.h> #include <errno.h> +#include <inttypes.h> #include <linux/net_tstamp.h> +#include <linux/errqueue.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> #include <stdbool.h> #include <stdlib.h> #include <stdio.h> @@ -28,7 +32,7 @@ static int cfg_clockid = CLOCK_TAI; static bool cfg_do_ipv4; static bool cfg_do_ipv6; static uint16_t cfg_port = 8000; -static int cfg_variance_us = 2000; +static int cfg_variance_us = 4000; static uint64_t glob_tstart; @@ -43,6 +47,9 @@ static struct timed_send cfg_in[MAX_NUM_PKT]; static struct timed_send cfg_out[MAX_NUM_PKT]; static int cfg_num_pkt; +static int cfg_errq_level; +static int cfg_errq_type; + static uint64_t gettime_ns(void) { struct timespec ts; @@ -90,13 +97,15 @@ static void do_send_one(int fdt, struct timed_send *ts) } -static void do_recv_one(int fdr, struct timed_send *ts) +static bool do_recv_one(int fdr, struct timed_send *ts) { int64_t tstop, texpect; char rbuf[2]; int ret; ret = recv(fdr, rbuf, sizeof(rbuf), 0); + if (ret == -1 && errno == EAGAIN) + return true; if (ret == -1) error(1, errno, "read"); if (ret != 1) @@ -113,6 +122,8 @@ static void do_recv_one(int fdr, struct timed_send *ts) if (labs(tstop - texpect) > cfg_variance_us) error(1, 0, "exceeds variance (%d us)", cfg_variance_us); + + return false; } static void do_recv_verify_empty(int fdr) @@ -125,12 +136,70 @@ static void do_recv_verify_empty(int fdr) error(1, 0, "recv: not empty as expected (%d, %d)", ret, errno); } +static void do_recv_errqueue_timeout(int fdt) +{ + char control[CMSG_SPACE(sizeof(struct sock_extended_err)) + + CMSG_SPACE(sizeof(struct sockaddr_in6))] = {0}; + char data[sizeof(struct ipv6hdr) + + sizeof(struct tcphdr) + 1]; + struct sock_extended_err *err; + struct msghdr msg = {0}; + struct iovec iov = {0}; + struct cmsghdr *cm; + int64_t tstamp = 0; + int ret; + + iov.iov_base = data; + iov.iov_len = sizeof(data); + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + while (1) { + ret = recvmsg(fdt, &msg, MSG_ERRQUEUE); + if (ret == -1 && errno == EAGAIN) + break; + if (ret == -1) + error(1, errno, "errqueue"); + if (msg.msg_flags != MSG_ERRQUEUE) + error(1, 0, "errqueue: flags 0x%x\n", msg.msg_flags); + + cm = CMSG_FIRSTHDR(&msg); + if (cm->cmsg_level != cfg_errq_level || + cm->cmsg_type != cfg_errq_type) + error(1, 0, "errqueue: type 0x%x.0x%x\n", + cm->cmsg_level, cm->cmsg_type); + + err = (struct sock_extended_err *)CMSG_DATA(cm); + if (err->ee_origin != SO_EE_ORIGIN_TXTIME) + error(1, 0, "errqueue: origin 0x%x\n", err->ee_origin); + if (err->ee_code != ECANCELED) + error(1, 0, "errqueue: code 0x%x\n", err->ee_code); + + tstamp = ((int64_t) err->ee_data) << 32 | err->ee_info; + tstamp -= (int64_t) glob_tstart; + tstamp /= 1000 * 1000; + fprintf(stderr, "send: pkt %c at %" PRId64 "ms dropped\n", + data[ret - 1], tstamp); + + msg.msg_flags = 0; + msg.msg_controllen = sizeof(control); + } + + error(1, 0, "recv: timeout"); +} + static void setsockopt_txtime(int fd) { struct sock_txtime so_txtime_val = { .clockid = cfg_clockid }; struct sock_txtime so_txtime_val_read = { 0 }; socklen_t vallen = sizeof(so_txtime_val); + so_txtime_val.flags = SOF_TXTIME_REPORT_ERRORS; + if (setsockopt(fd, SOL_SOCKET, SO_TXTIME, &so_txtime_val, sizeof(so_txtime_val))) error(1, errno, "setsockopt txtime"); @@ -194,7 +263,8 @@ static void do_test(struct sockaddr *addr, socklen_t alen) for (i = 0; i < cfg_num_pkt; i++) do_send_one(fdt, &cfg_in[i]); for (i = 0; i < cfg_num_pkt; i++) - do_recv_one(fdr, &cfg_out[i]); + if (do_recv_one(fdr, &cfg_out[i])) + do_recv_errqueue_timeout(fdt); do_recv_verify_empty(fdr); @@ -280,6 +350,10 @@ int main(int argc, char **argv) addr6.sin6_family = AF_INET6; addr6.sin6_port = htons(cfg_port); addr6.sin6_addr = in6addr_loopback; + + cfg_errq_level = SOL_IPV6; + cfg_errq_type = IPV6_RECVERR; + do_test((void *)&addr6, sizeof(addr6)); } @@ -289,6 +363,10 @@ int main(int argc, char **argv) addr4.sin_family = AF_INET; addr4.sin_port = htons(cfg_port); addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + + cfg_errq_level = SOL_IP; + cfg_errq_type = IP_RECVERR; + do_test((void *)&addr4, sizeof(addr4)); } diff --git a/tools/testing/selftests/net/so_txtime.sh b/tools/testing/selftests/net/so_txtime.sh index 5aa519328a5b..3f7800eaecb1 100755 --- a/tools/testing/selftests/net/so_txtime.sh +++ b/tools/testing/selftests/net/so_txtime.sh @@ -5,7 +5,12 @@ # Run in network namespace if [[ $# -eq 0 ]]; then - ./in_netns.sh $0 __subprocess + if ! ./in_netns.sh $0 __subprocess; then + # test is time sensitive, can be flaky + echo "test failed: retry once" + ./in_netns.sh $0 __subprocess + fi + exit $? fi @@ -18,7 +23,7 @@ tc qdisc add dev lo root fq ./so_txtime -4 -6 -c mono a,10,b,20 a,10,b,20 ./so_txtime -4 -6 -c mono a,20,b,10 b,20,a,20 -if tc qdisc replace dev lo root etf clockid CLOCK_TAI delta 200000; then +if tc qdisc replace dev lo root etf clockid CLOCK_TAI delta 400000; then ! ./so_txtime -4 -6 -c tai a,-1 a,-1 ! ./so_txtime -4 -6 -c tai a,0 a,0 ./so_txtime -4 -6 -c tai a,10 a,10 diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ets.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ets.json new file mode 100644 index 000000000000..180593010675 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ets.json @@ -0,0 +1,940 @@ +[ + { + "id": "e90e", + "name": "Add ETS qdisc using bands", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 2", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .* bands 2", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "b059", + "name": "Add ETS qdisc using quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1000 900 800 700", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .*bands 4 quanta 1000 900 800 700", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "e8e7", + "name": "Add ETS qdisc using strict", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 3", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .*bands 3 strict 3", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "233c", + "name": "Add ETS qdisc using bands + quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 4 quanta 1000 900 800 700", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .*bands 4 quanta 1000 900 800 700 priomap", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "3d35", + "name": "Add ETS qdisc using bands + strict", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 3 strict 3", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .*bands 3 strict 3 priomap", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "7f3b", + "name": "Add ETS qdisc using strict + quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 3 quanta 1500 750", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .*bands 5 strict 3 quanta 1500 750 priomap", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "4593", + "name": "Add ETS qdisc using strict 0 + quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 0 quanta 1500 750", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .*bands 2 quanta 1500 750 priomap", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "8938", + "name": "Add ETS qdisc using bands + strict + quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 5 strict 3 quanta 1500 750", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .*bands 5 .*strict 3 quanta 1500 750 priomap", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "0782", + "name": "Add ETS qdisc with more bands than quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 2 quanta 1000", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .*bands 2 .*quanta 1000 [1-9][0-9]* priomap", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "501b", + "name": "Add ETS qdisc with more bands than strict", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 3 strict 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .*bands 3 strict 1 quanta ([1-9][0-9]* ){2}priomap", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "671a", + "name": "Add ETS qdisc with more bands than strict + quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 3 strict 1 quanta 1000", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .*bands 3 strict 1 quanta 1000 [1-9][0-9]* priomap", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "2a23", + "name": "Add ETS qdisc with 16 bands", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 16", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .* bands 16", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "8daf", + "name": "Add ETS qdisc with 17 bands", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 17", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "7f95", + "name": "Add ETS qdisc with 17 strict", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 17", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "837a", + "name": "Add ETS qdisc with 16 quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .* bands 16", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "65b6", + "name": "Add ETS qdisc with 17 quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "b9e9", + "name": "Add ETS qdisc with 16 strict + quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 8 quanta 1 2 3 4 5 6 7 8", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .* bands 16", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "9877", + "name": "Add ETS qdisc with 17 strict + quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 9 quanta 1 2 3 4 5 6 7 8", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "c696", + "name": "Add ETS qdisc with priomap", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 5 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .*priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "30c4", + "name": "Add ETS qdisc with quanta + priomap", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1000 2000 3000 4000 5000 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .*quanta 1000 2000 3000 4000 5000 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "e8ac", + "name": "Add ETS qdisc with strict + priomap", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 5 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .*bands 5 strict 5 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "5a7e", + "name": "Add ETS qdisc with quanta + strict + priomap", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 2 quanta 1000 2000 3000 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .*strict 2 quanta 1000 2000 3000 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "cb8b", + "name": "Show ETS class :1", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 4000 3000 2000", + "expExitCode": "0", + "verifyCmd": "$TC class show dev $DUMMY classid 1:1", + "matchPattern": "class ets 1:1 root quantum 4000", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "1b4e", + "name": "Show ETS class :2", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 4000 3000 2000", + "expExitCode": "0", + "verifyCmd": "$TC class show dev $DUMMY classid 1:2", + "matchPattern": "class ets 1:2 root quantum 3000", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "f642", + "name": "Show ETS class :3", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 4000 3000 2000", + "expExitCode": "0", + "verifyCmd": "$TC class show dev $DUMMY classid 1:3", + "matchPattern": "class ets 1:3 root quantum 2000", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "0a5f", + "name": "Show ETS strict class", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 3", + "expExitCode": "0", + "verifyCmd": "$TC class show dev $DUMMY classid 1:1", + "matchPattern": "class ets 1:1 root $", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "f7c8", + "name": "Add ETS qdisc with too many quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 2 quanta 1000 2000 3000", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "2389", + "name": "Add ETS qdisc with too many strict", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 2 strict 3", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "fe3c", + "name": "Add ETS qdisc with too many strict + quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 4 strict 2 quanta 1000 2000 3000", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "cb04", + "name": "Add ETS qdisc with excess priomap elements", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 5 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0 1 2", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "c32e", + "name": "Add ETS qdisc with priomap above bands", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 2 priomap 0 1 2", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "744c", + "name": "Add ETS qdisc with priomap above quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1000 500 priomap 0 1 2", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "7b33", + "name": "Add ETS qdisc with priomap above strict", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 2 priomap 0 1 2", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "dbe6", + "name": "Add ETS qdisc with priomap above strict + quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 1 quanta 1000 500 priomap 0 1 2 3", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "bdb2", + "name": "Add ETS qdisc with priomap within bands with strict + quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 4 strict 1 quanta 1000 500 priomap 0 1 2 3", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "1", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "39a3", + "name": "Add ETS qdisc with priomap above bands with strict + quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 4 strict 1 quanta 1000 500 priomap 0 1 2 3 4", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "557c", + "name": "Unset priorities default to the last band", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 4 priomap 0 0 0 0", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets .*priomap 0 0 0 0 3 3 3 3 3 3 3 3 3 3 3 3", + "matchCount": "1", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "a347", + "name": "Unset priorities default to the last band -- no priomap", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 4", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets .*priomap 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3", + "matchCount": "1", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "39c4", + "name": "Add ETS qdisc with too few bands", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 0", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "930b", + "name": "Add ETS qdisc with too many bands", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 17", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "406a", + "name": "Add ETS qdisc without parameters", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "e51a", + "name": "Zero element in quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1000 0 800 700", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "e7f2", + "name": "Sole zero element in quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 0", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "d6e6", + "name": "No values after the quanta keyword", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta", + "expExitCode": "255", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "28c6", + "name": "Change ETS band quantum", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1000 2000 3000" + ], + "cmdUnderTest": "$TC class change dev $DUMMY classid 1:1 ets quantum 1500", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .*quanta 1500 2000 3000 priomap ", + "matchCount": "1", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "4714", + "name": "Change ETS band without quantum", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1000 2000 3000" + ], + "cmdUnderTest": "$TC class change dev $DUMMY classid 1:1 ets", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .*quanta 1000 2000 3000 priomap ", + "matchCount": "1", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "6979", + "name": "Change quantum of a strict ETS band", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY handle 1: root ets strict 5" + ], + "cmdUnderTest": "$TC class change dev $DUMMY classid 1:2 ets quantum 1500", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets .*bands 5 .*strict 5", + "matchCount": "1", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "9a7d", + "name": "Change ETS strict band without quantum", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY handle 1: root ets strict 5" + ], + "cmdUnderTest": "$TC class change dev $DUMMY classid 1:2 ets", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets .*bands 5 .*strict 5", + "matchCount": "1", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + } +] diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh new file mode 100755 index 000000000000..e7310d9390f7 --- /dev/null +++ b/tools/testing/selftests/wireguard/netns.sh @@ -0,0 +1,537 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. +# +# This script tests the below topology: +# +# ┌─────────────────────┐ ┌──────────────────────────────────┐ ┌─────────────────────┐ +# │ $ns1 namespace │ │ $ns0 namespace │ │ $ns2 namespace │ +# │ │ │ │ │ │ +# │┌────────┐ │ │ ┌────────┐ │ │ ┌────────┐│ +# ││ wg0 │───────────┼───┼────────────│ lo │────────────┼───┼───────────│ wg0 ││ +# │├────────┴──────────┐│ │ ┌───────┴────────┴────────┐ │ │┌──────────┴────────┤│ +# ││192.168.241.1/24 ││ │ │(ns1) (ns2) │ │ ││192.168.241.2/24 ││ +# ││fd00::1/24 ││ │ │127.0.0.1:1 127.0.0.1:2│ │ ││fd00::2/24 ││ +# │└───────────────────┘│ │ │[::]:1 [::]:2 │ │ │└───────────────────┘│ +# └─────────────────────┘ │ └─────────────────────────┘ │ └─────────────────────┘ +# └──────────────────────────────────┘ +# +# After the topology is prepared we run a series of TCP/UDP iperf3 tests between the +# wireguard peers in $ns1 and $ns2. Note that $ns0 is the endpoint for the wg0 +# interfaces in $ns1 and $ns2. See https://www.wireguard.com/netns/ for further +# details on how this is accomplished. +set -e + +exec 3>&1 +export WG_HIDE_KEYS=never +netns0="wg-test-$$-0" +netns1="wg-test-$$-1" +netns2="wg-test-$$-2" +pretty() { echo -e "\x1b[32m\x1b[1m[+] ${1:+NS$1: }${2}\x1b[0m" >&3; } +pp() { pretty "" "$*"; "$@"; } +maybe_exec() { if [[ $BASHPID -eq $$ ]]; then "$@"; else exec "$@"; fi; } +n0() { pretty 0 "$*"; maybe_exec ip netns exec $netns0 "$@"; } +n1() { pretty 1 "$*"; maybe_exec ip netns exec $netns1 "$@"; } +n2() { pretty 2 "$*"; maybe_exec ip netns exec $netns2 "$@"; } +ip0() { pretty 0 "ip $*"; ip -n $netns0 "$@"; } +ip1() { pretty 1 "ip $*"; ip -n $netns1 "$@"; } +ip2() { pretty 2 "ip $*"; ip -n $netns2 "$@"; } +sleep() { read -t "$1" -N 0 || true; } +waitiperf() { pretty "${1//*-}" "wait for iperf:5201"; while [[ $(ss -N "$1" -tlp 'sport = 5201') != *iperf3* ]]; do sleep 0.1; done; } +waitncatudp() { pretty "${1//*-}" "wait for udp:1111"; while [[ $(ss -N "$1" -ulp 'sport = 1111') != *ncat* ]]; do sleep 0.1; done; } +waitncattcp() { pretty "${1//*-}" "wait for tcp:1111"; while [[ $(ss -N "$1" -tlp 'sport = 1111') != *ncat* ]]; do sleep 0.1; done; } +waitiface() { pretty "${1//*-}" "wait for $2 to come up"; ip netns exec "$1" bash -c "while [[ \$(< \"/sys/class/net/$2/operstate\") != up ]]; do read -t .1 -N 0 || true; done;"; } + +cleanup() { + set +e + exec 2>/dev/null + printf "$orig_message_cost" > /proc/sys/net/core/message_cost + ip0 link del dev wg0 + ip1 link del dev wg0 + ip2 link del dev wg0 + local to_kill="$(ip netns pids $netns0) $(ip netns pids $netns1) $(ip netns pids $netns2)" + [[ -n $to_kill ]] && kill $to_kill + pp ip netns del $netns1 + pp ip netns del $netns2 + pp ip netns del $netns0 + exit +} + +orig_message_cost="$(< /proc/sys/net/core/message_cost)" +trap cleanup EXIT +printf 0 > /proc/sys/net/core/message_cost + +ip netns del $netns0 2>/dev/null || true +ip netns del $netns1 2>/dev/null || true +ip netns del $netns2 2>/dev/null || true +pp ip netns add $netns0 +pp ip netns add $netns1 +pp ip netns add $netns2 +ip0 link set up dev lo + +ip0 link add dev wg0 type wireguard +ip0 link set wg0 netns $netns1 +ip0 link add dev wg0 type wireguard +ip0 link set wg0 netns $netns2 +key1="$(pp wg genkey)" +key2="$(pp wg genkey)" +key3="$(pp wg genkey)" +pub1="$(pp wg pubkey <<<"$key1")" +pub2="$(pp wg pubkey <<<"$key2")" +pub3="$(pp wg pubkey <<<"$key3")" +psk="$(pp wg genpsk)" +[[ -n $key1 && -n $key2 && -n $psk ]] + +configure_peers() { + ip1 addr add 192.168.241.1/24 dev wg0 + ip1 addr add fd00::1/24 dev wg0 + + ip2 addr add 192.168.241.2/24 dev wg0 + ip2 addr add fd00::2/24 dev wg0 + + n1 wg set wg0 \ + private-key <(echo "$key1") \ + listen-port 1 \ + peer "$pub2" \ + preshared-key <(echo "$psk") \ + allowed-ips 192.168.241.2/32,fd00::2/128 + n2 wg set wg0 \ + private-key <(echo "$key2") \ + listen-port 2 \ + peer "$pub1" \ + preshared-key <(echo "$psk") \ + allowed-ips 192.168.241.1/32,fd00::1/128 + + ip1 link set up dev wg0 + ip2 link set up dev wg0 +} +configure_peers + +tests() { + # Ping over IPv4 + n2 ping -c 10 -f -W 1 192.168.241.1 + n1 ping -c 10 -f -W 1 192.168.241.2 + + # Ping over IPv6 + n2 ping6 -c 10 -f -W 1 fd00::1 + n1 ping6 -c 10 -f -W 1 fd00::2 + + # TCP over IPv4 + n2 iperf3 -s -1 -B 192.168.241.2 & + waitiperf $netns2 + n1 iperf3 -Z -t 3 -c 192.168.241.2 + + # TCP over IPv6 + n1 iperf3 -s -1 -B fd00::1 & + waitiperf $netns1 + n2 iperf3 -Z -t 3 -c fd00::1 + + # UDP over IPv4 + n1 iperf3 -s -1 -B 192.168.241.1 & + waitiperf $netns1 + n2 iperf3 -Z -t 3 -b 0 -u -c 192.168.241.1 + + # UDP over IPv6 + n2 iperf3 -s -1 -B fd00::2 & + waitiperf $netns2 + n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2 +} + +[[ $(ip1 link show dev wg0) =~ mtu\ ([0-9]+) ]] && orig_mtu="${BASH_REMATCH[1]}" +big_mtu=$(( 34816 - 1500 + $orig_mtu )) + +# Test using IPv4 as outer transport +n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2 +n2 wg set wg0 peer "$pub1" endpoint 127.0.0.1:1 +# Before calling tests, we first make sure that the stats counters and timestamper are working +n2 ping -c 10 -f -W 1 192.168.241.1 +{ read _; read _; read _; read rx_bytes _; read _; read tx_bytes _; } < <(ip2 -stats link show dev wg0) +(( rx_bytes == 1372 && (tx_bytes == 1428 || tx_bytes == 1460) )) +{ read _; read _; read _; read rx_bytes _; read _; read tx_bytes _; } < <(ip1 -stats link show dev wg0) +(( tx_bytes == 1372 && (rx_bytes == 1428 || rx_bytes == 1460) )) +read _ rx_bytes tx_bytes < <(n2 wg show wg0 transfer) +(( rx_bytes == 1372 && (tx_bytes == 1428 || tx_bytes == 1460) )) +read _ rx_bytes tx_bytes < <(n1 wg show wg0 transfer) +(( tx_bytes == 1372 && (rx_bytes == 1428 || rx_bytes == 1460) )) +read _ timestamp < <(n1 wg show wg0 latest-handshakes) +(( timestamp != 0 )) + +tests +ip1 link set wg0 mtu $big_mtu +ip2 link set wg0 mtu $big_mtu +tests + +ip1 link set wg0 mtu $orig_mtu +ip2 link set wg0 mtu $orig_mtu + +# Test using IPv6 as outer transport +n1 wg set wg0 peer "$pub2" endpoint [::1]:2 +n2 wg set wg0 peer "$pub1" endpoint [::1]:1 +tests +ip1 link set wg0 mtu $big_mtu +ip2 link set wg0 mtu $big_mtu +tests + +# Test that route MTUs work with the padding +ip1 link set wg0 mtu 1300 +ip2 link set wg0 mtu 1300 +n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2 +n2 wg set wg0 peer "$pub1" endpoint 127.0.0.1:1 +n0 iptables -A INPUT -m length --length 1360 -j DROP +n1 ip route add 192.168.241.2/32 dev wg0 mtu 1299 +n2 ip route add 192.168.241.1/32 dev wg0 mtu 1299 +n2 ping -c 1 -W 1 -s 1269 192.168.241.1 +n2 ip route delete 192.168.241.1/32 dev wg0 mtu 1299 +n1 ip route delete 192.168.241.2/32 dev wg0 mtu 1299 +n0 iptables -F INPUT + +ip1 link set wg0 mtu $orig_mtu +ip2 link set wg0 mtu $orig_mtu + +# Test using IPv4 that roaming works +ip0 -4 addr del 127.0.0.1/8 dev lo +ip0 -4 addr add 127.212.121.99/8 dev lo +n1 wg set wg0 listen-port 9999 +n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2 +n1 ping6 -W 1 -c 1 fd00::2 +[[ $(n2 wg show wg0 endpoints) == "$pub1 127.212.121.99:9999" ]] + +# Test using IPv6 that roaming works +n1 wg set wg0 listen-port 9998 +n1 wg set wg0 peer "$pub2" endpoint [::1]:2 +n1 ping -W 1 -c 1 192.168.241.2 +[[ $(n2 wg show wg0 endpoints) == "$pub1 [::1]:9998" ]] + +# Test that crypto-RP filter works +n1 wg set wg0 peer "$pub2" allowed-ips 192.168.241.0/24 +exec 4< <(n1 ncat -l -u -p 1111) +ncat_pid=$! +waitncatudp $netns1 +n2 ncat -u 192.168.241.1 1111 <<<"X" +read -r -N 1 -t 1 out <&4 && [[ $out == "X" ]] +kill $ncat_pid +more_specific_key="$(pp wg genkey | pp wg pubkey)" +n1 wg set wg0 peer "$more_specific_key" allowed-ips 192.168.241.2/32 +n2 wg set wg0 listen-port 9997 +exec 4< <(n1 ncat -l -u -p 1111) +ncat_pid=$! +waitncatudp $netns1 +n2 ncat -u 192.168.241.1 1111 <<<"X" +! read -r -N 1 -t 1 out <&4 || false +kill $ncat_pid +n1 wg set wg0 peer "$more_specific_key" remove +[[ $(n1 wg show wg0 endpoints) == "$pub2 [::1]:9997" ]] + +# Test that we can change private keys keys and immediately handshake +n1 wg set wg0 private-key <(echo "$key1") peer "$pub2" preshared-key <(echo "$psk") allowed-ips 192.168.241.2/32 endpoint 127.0.0.1:2 +n2 wg set wg0 private-key <(echo "$key2") listen-port 2 peer "$pub1" preshared-key <(echo "$psk") allowed-ips 192.168.241.1/32 +n1 ping -W 1 -c 1 192.168.241.2 +n1 wg set wg0 private-key <(echo "$key3") +n2 wg set wg0 peer "$pub3" preshared-key <(echo "$psk") allowed-ips 192.168.241.1/32 peer "$pub1" remove +n1 ping -W 1 -c 1 192.168.241.2 + +ip1 link del wg0 +ip2 link del wg0 + +# Test using NAT. We now change the topology to this: +# ┌────────────────────────────────────────┐ ┌────────────────────────────────────────────────┐ ┌────────────────────────────────────────┐ +# │ $ns1 namespace │ │ $ns0 namespace │ │ $ns2 namespace │ +# │ │ │ │ │ │ +# │ ┌─────┐ ┌─────┐ │ │ ┌──────┐ ┌──────┐ │ │ ┌─────┐ ┌─────┐ │ +# │ │ wg0 │─────────────│vethc│───────────┼────┼────│vethrc│ │vethrs│──────────────┼─────┼──│veths│────────────│ wg0 │ │ +# │ ├─────┴──────────┐ ├─────┴──────────┐│ │ ├──────┴─────────┐ ├──────┴────────────┐ │ │ ├─────┴──────────┐ ├─────┴──────────┐ │ +# │ │192.168.241.1/24│ │192.168.1.100/24││ │ │192.168.1.1/24 │ │10.0.0.1/24 │ │ │ │10.0.0.100/24 │ │192.168.241.2/24│ │ +# │ │fd00::1/24 │ │ ││ │ │ │ │SNAT:192.168.1.0/24│ │ │ │ │ │fd00::2/24 │ │ +# │ └────────────────┘ └────────────────┘│ │ └────────────────┘ └───────────────────┘ │ │ └────────────────┘ └────────────────┘ │ +# └────────────────────────────────────────┘ └────────────────────────────────────────────────┘ └────────────────────────────────────────┘ + +ip1 link add dev wg0 type wireguard +ip2 link add dev wg0 type wireguard +configure_peers + +ip0 link add vethrc type veth peer name vethc +ip0 link add vethrs type veth peer name veths +ip0 link set vethc netns $netns1 +ip0 link set veths netns $netns2 +ip0 link set vethrc up +ip0 link set vethrs up +ip0 addr add 192.168.1.1/24 dev vethrc +ip0 addr add 10.0.0.1/24 dev vethrs +ip1 addr add 192.168.1.100/24 dev vethc +ip1 link set vethc up +ip1 route add default via 192.168.1.1 +ip2 addr add 10.0.0.100/24 dev veths +ip2 link set veths up +waitiface $netns0 vethrc +waitiface $netns0 vethrs +waitiface $netns1 vethc +waitiface $netns2 veths + +n0 bash -c 'printf 1 > /proc/sys/net/ipv4/ip_forward' +n0 bash -c 'printf 2 > /proc/sys/net/netfilter/nf_conntrack_udp_timeout' +n0 bash -c 'printf 2 > /proc/sys/net/netfilter/nf_conntrack_udp_timeout_stream' +n0 iptables -t nat -A POSTROUTING -s 192.168.1.0/24 -d 10.0.0.0/24 -j SNAT --to 10.0.0.1 + +n1 wg set wg0 peer "$pub2" endpoint 10.0.0.100:2 persistent-keepalive 1 +n1 ping -W 1 -c 1 192.168.241.2 +n2 ping -W 1 -c 1 192.168.241.1 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]] +# Demonstrate n2 can still send packets to n1, since persistent-keepalive will prevent connection tracking entry from expiring (to see entries: `n0 conntrack -L`). +pp sleep 3 +n2 ping -W 1 -c 1 192.168.241.1 +n1 wg set wg0 peer "$pub2" persistent-keepalive 0 + +# Do a wg-quick(8)-style policy routing for the default route, making sure vethc has a v6 address to tease out bugs. +ip1 -6 addr add fc00::9/96 dev vethc +ip1 -6 route add default via fc00::1 +ip2 -4 addr add 192.168.99.7/32 dev wg0 +ip2 -6 addr add abab::1111/128 dev wg0 +n1 wg set wg0 fwmark 51820 peer "$pub2" allowed-ips 192.168.99.7,abab::1111 +ip1 -6 route add default dev wg0 table 51820 +ip1 -6 rule add not fwmark 51820 table 51820 +ip1 -6 rule add table main suppress_prefixlength 0 +ip1 -4 route add default dev wg0 table 51820 +ip1 -4 rule add not fwmark 51820 table 51820 +ip1 -4 rule add table main suppress_prefixlength 0 +# suppress_prefixlength only got added in 3.12, and we want to support 3.10+. +if [[ $(ip1 -4 rule show all) == *suppress_prefixlength* ]]; then + # Flood the pings instead of sending just one, to trigger routing table reference counting bugs. + n1 ping -W 1 -c 100 -f 192.168.99.7 + n1 ping -W 1 -c 100 -f abab::1111 +fi + +n0 iptables -t nat -F +ip0 link del vethrc +ip0 link del vethrs +ip1 link del wg0 +ip2 link del wg0 + +# Test that saddr routing is sticky but not too sticky, changing to this topology: +# ┌────────────────────────────────────────┐ ┌────────────────────────────────────────┐ +# │ $ns1 namespace │ │ $ns2 namespace │ +# │ │ │ │ +# │ ┌─────┐ ┌─────┐ │ │ ┌─────┐ ┌─────┐ │ +# │ │ wg0 │─────────────│veth1│───────────┼────┼──│veth2│────────────│ wg0 │ │ +# │ ├─────┴──────────┐ ├─────┴──────────┐│ │ ├─────┴──────────┐ ├─────┴──────────┐ │ +# │ │192.168.241.1/24│ │10.0.0.1/24 ││ │ │10.0.0.2/24 │ │192.168.241.2/24│ │ +# │ │fd00::1/24 │ │fd00:aa::1/96 ││ │ │fd00:aa::2/96 │ │fd00::2/24 │ │ +# │ └────────────────┘ └────────────────┘│ │ └────────────────┘ └────────────────┘ │ +# └────────────────────────────────────────┘ └────────────────────────────────────────┘ + +ip1 link add dev wg0 type wireguard +ip2 link add dev wg0 type wireguard +configure_peers +ip1 link add veth1 type veth peer name veth2 +ip1 link set veth2 netns $netns2 +n1 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/all/accept_dad' +n2 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/all/accept_dad' +n1 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/veth1/accept_dad' +n2 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/veth2/accept_dad' +n1 bash -c 'printf 1 > /proc/sys/net/ipv4/conf/veth1/promote_secondaries' + +# First we check that we aren't overly sticky and can fall over to new IPs when old ones are removed +ip1 addr add 10.0.0.1/24 dev veth1 +ip1 addr add fd00:aa::1/96 dev veth1 +ip2 addr add 10.0.0.2/24 dev veth2 +ip2 addr add fd00:aa::2/96 dev veth2 +ip1 link set veth1 up +ip2 link set veth2 up +waitiface $netns1 veth1 +waitiface $netns2 veth2 +n1 wg set wg0 peer "$pub2" endpoint 10.0.0.2:2 +n1 ping -W 1 -c 1 192.168.241.2 +ip1 addr add 10.0.0.10/24 dev veth1 +ip1 addr del 10.0.0.1/24 dev veth1 +n1 ping -W 1 -c 1 192.168.241.2 +n1 wg set wg0 peer "$pub2" endpoint [fd00:aa::2]:2 +n1 ping -W 1 -c 1 192.168.241.2 +ip1 addr add fd00:aa::10/96 dev veth1 +ip1 addr del fd00:aa::1/96 dev veth1 +n1 ping -W 1 -c 1 192.168.241.2 + +# Now we show that we can successfully do reply to sender routing +ip1 link set veth1 down +ip2 link set veth2 down +ip1 addr flush dev veth1 +ip2 addr flush dev veth2 +ip1 addr add 10.0.0.1/24 dev veth1 +ip1 addr add 10.0.0.2/24 dev veth1 +ip1 addr add fd00:aa::1/96 dev veth1 +ip1 addr add fd00:aa::2/96 dev veth1 +ip2 addr add 10.0.0.3/24 dev veth2 +ip2 addr add fd00:aa::3/96 dev veth2 +ip1 link set veth1 up +ip2 link set veth2 up +waitiface $netns1 veth1 +waitiface $netns2 veth2 +n2 wg set wg0 peer "$pub1" endpoint 10.0.0.1:1 +n2 ping -W 1 -c 1 192.168.241.1 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]] +n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::1]:1 +n2 ping -W 1 -c 1 192.168.241.1 +[[ $(n2 wg show wg0 endpoints) == "$pub1 [fd00:aa::1]:1" ]] +n2 wg set wg0 peer "$pub1" endpoint 10.0.0.2:1 +n2 ping -W 1 -c 1 192.168.241.1 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.2:1" ]] +n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::2]:1 +n2 ping -W 1 -c 1 192.168.241.1 +[[ $(n2 wg show wg0 endpoints) == "$pub1 [fd00:aa::2]:1" ]] + +# What happens if the inbound destination address belongs to a different interface as the default route? +ip1 link add dummy0 type dummy +ip1 addr add 10.50.0.1/24 dev dummy0 +ip1 link set dummy0 up +ip2 route add 10.50.0.0/24 dev veth2 +n2 wg set wg0 peer "$pub1" endpoint 10.50.0.1:1 +n2 ping -W 1 -c 1 192.168.241.1 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.50.0.1:1" ]] + +ip1 link del dummy0 +ip1 addr flush dev veth1 +ip2 addr flush dev veth2 +ip1 route flush dev veth1 +ip2 route flush dev veth2 + +# Now we see what happens if another interface route takes precedence over an ongoing one +ip1 link add veth3 type veth peer name veth4 +ip1 link set veth4 netns $netns2 +ip1 addr add 10.0.0.1/24 dev veth1 +ip2 addr add 10.0.0.2/24 dev veth2 +ip1 addr add 10.0.0.3/24 dev veth3 +ip1 link set veth1 up +ip2 link set veth2 up +ip1 link set veth3 up +ip2 link set veth4 up +waitiface $netns1 veth1 +waitiface $netns2 veth2 +waitiface $netns1 veth3 +waitiface $netns2 veth4 +ip1 route flush dev veth1 +ip1 route flush dev veth3 +ip1 route add 10.0.0.0/24 dev veth1 src 10.0.0.1 metric 2 +n1 wg set wg0 peer "$pub2" endpoint 10.0.0.2:2 +n1 ping -W 1 -c 1 192.168.241.2 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]] +ip1 route add 10.0.0.0/24 dev veth3 src 10.0.0.3 metric 1 +n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/veth1/rp_filter' +n2 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/veth4/rp_filter' +n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/all/rp_filter' +n2 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/all/rp_filter' +n1 ping -W 1 -c 1 192.168.241.2 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.3:1" ]] + +ip1 link del veth1 +ip1 link del veth3 +ip1 link del wg0 +ip2 link del wg0 + +# We test that Netlink/IPC is working properly by doing things that usually cause split responses +ip0 link add dev wg0 type wireguard +config=( "[Interface]" "PrivateKey=$(wg genkey)" "[Peer]" "PublicKey=$(wg genkey)" ) +for a in {1..255}; do + for b in {0..255}; do + config+=( "AllowedIPs=$a.$b.0.0/16,$a::$b/128" ) + done +done +n0 wg setconf wg0 <(printf '%s\n' "${config[@]}") +i=0 +for ip in $(n0 wg show wg0 allowed-ips); do + ((++i)) +done +((i == 255*256*2+1)) +ip0 link del wg0 +ip0 link add dev wg0 type wireguard +config=( "[Interface]" "PrivateKey=$(wg genkey)" ) +for a in {1..40}; do + config+=( "[Peer]" "PublicKey=$(wg genkey)" ) + for b in {1..52}; do + config+=( "AllowedIPs=$a.$b.0.0/16" ) + done +done +n0 wg setconf wg0 <(printf '%s\n' "${config[@]}") +i=0 +while read -r line; do + j=0 + for ip in $line; do + ((++j)) + done + ((j == 53)) + ((++i)) +done < <(n0 wg show wg0 allowed-ips) +((i == 40)) +ip0 link del wg0 +ip0 link add wg0 type wireguard +config=( ) +for i in {1..29}; do + config+=( "[Peer]" "PublicKey=$(wg genkey)" ) +done +config+=( "[Peer]" "PublicKey=$(wg genkey)" "AllowedIPs=255.2.3.4/32,abcd::255/128" ) +n0 wg setconf wg0 <(printf '%s\n' "${config[@]}") +n0 wg showconf wg0 > /dev/null +ip0 link del wg0 + +allowedips=( ) +for i in {1..197}; do + allowedips+=( abcd::$i ) +done +saved_ifs="$IFS" +IFS=, +allowedips="${allowedips[*]}" +IFS="$saved_ifs" +ip0 link add wg0 type wireguard +n0 wg set wg0 peer "$pub1" +n0 wg set wg0 peer "$pub2" allowed-ips "$allowedips" +{ + read -r pub allowedips + [[ $pub == "$pub1" && $allowedips == "(none)" ]] + read -r pub allowedips + [[ $pub == "$pub2" ]] + i=0 + for _ in $allowedips; do + ((++i)) + done + ((i == 197)) +} < <(n0 wg show wg0 allowed-ips) +ip0 link del wg0 + +! n0 wg show doesnotexist || false + +ip0 link add wg0 type wireguard +n0 wg set wg0 private-key <(echo "$key1") peer "$pub2" preshared-key <(echo "$psk") +[[ $(n0 wg show wg0 private-key) == "$key1" ]] +[[ $(n0 wg show wg0 preshared-keys) == "$pub2 $psk" ]] +n0 wg set wg0 private-key /dev/null peer "$pub2" preshared-key /dev/null +[[ $(n0 wg show wg0 private-key) == "(none)" ]] +[[ $(n0 wg show wg0 preshared-keys) == "$pub2 (none)" ]] +n0 wg set wg0 peer "$pub2" +n0 wg set wg0 private-key <(echo "$key2") +[[ $(n0 wg show wg0 public-key) == "$pub2" ]] +[[ -z $(n0 wg show wg0 peers) ]] +n0 wg set wg0 peer "$pub2" +[[ -z $(n0 wg show wg0 peers) ]] +n0 wg set wg0 private-key <(echo "$key1") +n0 wg set wg0 peer "$pub2" +[[ $(n0 wg show wg0 peers) == "$pub2" ]] +n0 wg set wg0 private-key <(echo "/${key1:1}") +[[ $(n0 wg show wg0 private-key) == "+${key1:1}" ]] +n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0,10.0.0.0/8,100.0.0.0/10,172.16.0.0/12,192.168.0.0/16 +n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0 +n0 wg set wg0 peer "$pub2" allowed-ips ::/0,1700::/111,5000::/4,e000::/37,9000::/75 +n0 wg set wg0 peer "$pub2" allowed-ips ::/0 +ip0 link del wg0 + +declare -A objects +while read -t 0.1 -r line 2>/dev/null || [[ $? -ne 142 ]]; do + [[ $line =~ .*(wg[0-9]+:\ [A-Z][a-z]+\ [0-9]+)\ .*(created|destroyed).* ]] || continue + objects["${BASH_REMATCH[1]}"]+="${BASH_REMATCH[2]}" +done < /dev/kmsg +alldeleted=1 +for object in "${!objects[@]}"; do + if [[ ${objects["$object"]} != *createddestroyed ]]; then + echo "Error: $object: merely ${objects["$object"]}" >&3 + alldeleted=0 + fi +done +[[ $alldeleted -eq 1 ]] +pretty "" "Objects that were created were also destroyed." diff --git a/tools/testing/selftests/wireguard/qemu/.gitignore b/tools/testing/selftests/wireguard/qemu/.gitignore new file mode 100644 index 000000000000..415b542a9d59 --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/.gitignore @@ -0,0 +1,2 @@ +build/ +distfiles/ diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile new file mode 100644 index 000000000000..6d51bf78eeff --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/Makefile @@ -0,0 +1,385 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + +PWD := $(shell pwd) + +CHOST := $(shell gcc -dumpmachine) +ifneq (,$(ARCH)) +CBUILD := $(subst -gcc,,$(lastword $(subst /, ,$(firstword $(wildcard $(foreach bindir,$(subst :, ,$(PATH)),$(bindir)/$(ARCH)-*-gcc)))))) +ifeq (,$(CBUILD)) +$(error The toolchain for $(ARCH) is not installed) +endif +else +CBUILD := $(CHOST) +ARCH := $(firstword $(subst -, ,$(CBUILD))) +endif + +# Set these from the environment to override +KERNEL_PATH ?= $(PWD)/../../../../.. +BUILD_PATH ?= $(PWD)/build/$(ARCH) +DISTFILES_PATH ?= $(PWD)/distfiles +NR_CPUS ?= 4 + +MIRROR := https://download.wireguard.com/qemu-test/distfiles/ + +default: qemu + +# variable name, tarball project name, version, tarball extension, default URI base +define tar_download = +$(1)_VERSION := $(3) +$(1)_NAME := $(2)-$$($(1)_VERSION) +$(1)_TAR := $(DISTFILES_PATH)/$$($(1)_NAME)$(4) +$(1)_PATH := $(BUILD_PATH)/$$($(1)_NAME) +$(call file_download,$$($(1)_NAME)$(4),$(5),$(6)) +endef + +define file_download = +$(DISTFILES_PATH)/$(1): + mkdir -p $(DISTFILES_PATH) + flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -t inf --retry-on-http-error=404 -O $$@.tmp $(2)$(1) || rm -f $$@.tmp' + if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi +endef + +$(eval $(call tar_download,MUSL,musl,1.1.20,.tar.gz,https://www.musl-libc.org/releases/,44be8771d0e6c6b5f82dd15662eb2957c9a3173a19a8b49966ac0542bbd40d61)) +$(eval $(call tar_download,LIBMNL,libmnl,1.0.4,.tar.bz2,https://www.netfilter.org/projects/libmnl/files/,171f89699f286a5854b72b91d06e8f8e3683064c5901fb09d954a9ab6f551f81)) +$(eval $(call tar_download,IPERF,iperf,3.1.7,.tar.gz,http://downloads.es.net/pub/iperf/,a4ef73406fe92250602b8da2ae89ec53211f805df97a1d1d629db5a14043734f)) +$(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d)) +$(eval $(call tar_download,IPROUTE2,iproute2,5.1.0,.tar.gz,https://www.kernel.org/pub/linux/utils/net/iproute2/,9b43707d6075ecdca14803ca8ce0c8553848c49fa1586d12fd508d66577243f2)) +$(eval $(call tar_download,IPTABLES,iptables,1.6.1,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,0fc2d7bd5d7be11311726466789d4c65fb4c8e096c9182b56ce97440864f0cf5)) +$(eval $(call tar_download,NMAP,nmap,7.60,.tar.bz2,https://nmap.org/dist/,a8796ecc4fa6c38aad6139d9515dc8113023a82e9d787e5a5fb5fa1b05516f21)) +$(eval $(call tar_download,IPUTILS,iputils,s20161105,.tar.gz,https://github.com/iputils/iputils/archive/s20161105.tar.gz/#,f813092f03d17294fd23544b129b95cdb87fe19f7970a51908a6b88509acad8a)) +$(eval $(call tar_download,WIREGUARD_TOOLS,WireGuard,0.0.20191212,.tar.xz,https://git.zx2c4.com/WireGuard/snapshot/,b0d718380f7a8822b2f12d75e462fa4eafa3a77871002981f367cd4fe2a1b071)) + +KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug) +rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d)) +WIREGUARD_SOURCES := $(call rwildcard,$(KERNEL_PATH)/drivers/net/wireguard/,*) + +export CFLAGS ?= -O3 -pipe +export LDFLAGS ?= +export CPPFLAGS := -I$(BUILD_PATH)/include + +ifeq ($(CHOST),$(CBUILD)) +CROSS_COMPILE_FLAG := --host=$(CHOST) +NOPIE_GCC := gcc -fno-PIE +CFLAGS += -march=native +STRIP := strip +else +$(info Cross compilation: building for $(CBUILD) using $(CHOST)) +CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST) +export CROSS_COMPILE=$(CBUILD)- +NOPIE_GCC := $(CBUILD)-gcc -fno-PIE +STRIP := $(CBUILD)-strip +endif +ifeq ($(ARCH),aarch64) +QEMU_ARCH := aarch64 +KERNEL_ARCH := arm64 +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image +ifeq ($(CHOST),$(CBUILD)) +QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm +else +QEMU_MACHINE := -cpu cortex-a53 -machine virt +CFLAGS += -march=armv8-a -mtune=cortex-a53 +endif +else ifeq ($(ARCH),aarch64_be) +QEMU_ARCH := aarch64 +KERNEL_ARCH := arm64 +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image +ifeq ($(CHOST),$(CBUILD)) +QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm +else +QEMU_MACHINE := -cpu cortex-a53 -machine virt +CFLAGS += -march=armv8-a -mtune=cortex-a53 +endif +else ifeq ($(ARCH),arm) +QEMU_ARCH := arm +KERNEL_ARCH := arm +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage +ifeq ($(CHOST),$(CBUILD)) +QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm +else +QEMU_MACHINE := -cpu cortex-a15 -machine virt +CFLAGS += -march=armv7-a -mtune=cortex-a15 -mabi=aapcs-linux +endif +else ifeq ($(ARCH),armeb) +QEMU_ARCH := arm +KERNEL_ARCH := arm +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage +ifeq ($(CHOST),$(CBUILD)) +QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm +else +QEMU_MACHINE := -cpu cortex-a15 -machine virt +CFLAGS += -march=armv7-a -mabi=aapcs-linux # We don't pass -mtune=cortex-a15 due to a compiler bug on big endian. +LDFLAGS += -Wl,--be8 +endif +else ifeq ($(ARCH),x86_64) +QEMU_ARCH := x86_64 +KERNEL_ARCH := x86_64 +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage +ifeq ($(CHOST),$(CBUILD)) +QEMU_MACHINE := -cpu host -machine q35,accel=kvm +else +QEMU_MACHINE := -cpu Skylake-Server -machine q35 +CFLAGS += -march=skylake-avx512 +endif +else ifeq ($(ARCH),i686) +QEMU_ARCH := i386 +KERNEL_ARCH := x86 +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage +ifeq ($(subst i686,x86_64,$(CBUILD)),$(CHOST)) +QEMU_MACHINE := -cpu host -machine q35,accel=kvm +else +QEMU_MACHINE := -cpu coreduo -machine q35 +CFLAGS += -march=prescott +endif +else ifeq ($(ARCH),mips64) +QEMU_ARCH := mips64 +KERNEL_ARCH := mips +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux +ifeq ($(CHOST),$(CBUILD)) +QEMU_MACHINE := -cpu host -machine malta,accel=kvm +CFLAGS += -EB +else +QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1 +CFLAGS += -march=mips64r2 -EB +endif +else ifeq ($(ARCH),mips64el) +QEMU_ARCH := mips64el +KERNEL_ARCH := mips +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux +ifeq ($(CHOST),$(CBUILD)) +QEMU_MACHINE := -cpu host -machine malta,accel=kvm +CFLAGS += -EL +else +QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1 +CFLAGS += -march=mips64r2 -EL +endif +else ifeq ($(ARCH),mips) +QEMU_ARCH := mips +KERNEL_ARCH := mips +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux +ifeq ($(CHOST),$(CBUILD)) +QEMU_MACHINE := -cpu host -machine malta,accel=kvm +CFLAGS += -EB +else +QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1 +CFLAGS += -march=mips32r2 -EB +endif +else ifeq ($(ARCH),mipsel) +QEMU_ARCH := mipsel +KERNEL_ARCH := mips +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux +ifeq ($(CHOST),$(CBUILD)) +QEMU_MACHINE := -cpu host -machine malta,accel=kvm +CFLAGS += -EL +else +QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1 +CFLAGS += -march=mips32r2 -EL +endif +else ifeq ($(ARCH),powerpc64le) +QEMU_ARCH := ppc64 +KERNEL_ARCH := powerpc +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux +ifeq ($(CHOST),$(CBUILD)) +QEMU_MACHINE := -cpu host,accel=kvm -machine pseries +else +QEMU_MACHINE := -machine pseries +endif +CFLAGS += -mcpu=powerpc64le -mlong-double-64 +else ifeq ($(ARCH),powerpc) +QEMU_ARCH := ppc +KERNEL_ARCH := powerpc +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/powerpc/boot/uImage +ifeq ($(CHOST),$(CBUILD)) +QEMU_MACHINE := -cpu host,accel=kvm -machine ppce500 +else +QEMU_MACHINE := -machine ppce500 +endif +CFLAGS += -mcpu=powerpc -mlong-double-64 -msecure-plt +else ifeq ($(ARCH),m68k) +QEMU_ARCH := m68k +KERNEL_ARCH := m68k +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux +ifeq ($(CHOST),$(CBUILD)) +QEMU_MACHINE := -cpu host,accel=kvm -machine q800 +else +QEMU_MACHINE := -machine q800 +endif +else +$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64le, powerpc, m68k) +endif + +REAL_CC := $(CBUILD)-gcc +MUSL_CC := $(BUILD_PATH)/musl-gcc +export CC := $(MUSL_CC) +USERSPACE_DEPS := $(MUSL_CC) $(BUILD_PATH)/include/.installed $(BUILD_PATH)/include/linux/.installed + +build: $(KERNEL_BZIMAGE) +qemu: $(KERNEL_BZIMAGE) + rm -f $(BUILD_PATH)/result + timeout --foreground 20m qemu-system-$(QEMU_ARCH) \ + -nodefaults \ + -nographic \ + -smp $(NR_CPUS) \ + $(QEMU_MACHINE) \ + -m $$(grep -q CONFIG_DEBUG_KMEMLEAK=y $(KERNEL_BUILD_PATH)/.config && echo 1G || echo 256M) \ + -serial stdio \ + -serial file:$(BUILD_PATH)/result \ + -no-reboot \ + -monitor none \ + -kernel $< + grep -Fq success $(BUILD_PATH)/result + +$(BUILD_PATH)/init-cpio-spec.txt: + mkdir -p $(BUILD_PATH) + echo "file /init $(BUILD_PATH)/init 755 0 0" > $@ + echo "file /init.sh $(PWD)/../netns.sh 755 0 0" >> $@ + echo "dir /dev 755 0 0" >> $@ + echo "nod /dev/console 644 0 0 c 5 1" >> $@ + echo "dir /bin 755 0 0" >> $@ + echo "file /bin/iperf3 $(IPERF_PATH)/src/iperf3 755 0 0" >> $@ + echo "file /bin/wg $(WIREGUARD_TOOLS_PATH)/src/tools/wg 755 0 0" >> $@ + echo "file /bin/bash $(BASH_PATH)/bash 755 0 0" >> $@ + echo "file /bin/ip $(IPROUTE2_PATH)/ip/ip 755 0 0" >> $@ + echo "file /bin/ss $(IPROUTE2_PATH)/misc/ss 755 0 0" >> $@ + echo "file /bin/ping $(IPUTILS_PATH)/ping 755 0 0" >> $@ + echo "file /bin/ncat $(NMAP_PATH)/ncat/ncat 755 0 0" >> $@ + echo "file /bin/xtables-multi $(IPTABLES_PATH)/iptables/xtables-multi 755 0 0" >> $@ + echo "slink /bin/iptables xtables-multi 777 0 0" >> $@ + echo "slink /bin/ping6 ping 777 0 0" >> $@ + echo "dir /lib 755 0 0" >> $@ + echo "file /lib/libc.so $(MUSL_PATH)/lib/libc.so 755 0 0" >> $@ + echo "slink /lib/ld-linux.so.1 libc.so 777 0 0" >> $@ + +$(KERNEL_BUILD_PATH)/.config: kernel.config arch/$(ARCH).config + mkdir -p $(KERNEL_BUILD_PATH) + cp kernel.config $(KERNEL_BUILD_PATH)/minimal.config + printf 'CONFIG_NR_CPUS=$(NR_CPUS)\nCONFIG_INITRAMFS_SOURCE="$(BUILD_PATH)/init-cpio-spec.txt"\n' >> $(KERNEL_BUILD_PATH)/minimal.config + cat arch/$(ARCH).config >> $(KERNEL_BUILD_PATH)/minimal.config + $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) allnoconfig + cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config $(KERNEL_BUILD_PATH)/minimal.config + $(if $(findstring yes,$(DEBUG_KERNEL)),cp debug.config $(KERNEL_BUILD_PATH) && cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config debug.config,) + +$(KERNEL_BZIMAGE): $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(MUSL_PATH)/lib/libc.so $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/tools/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES) + $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) CC="$(NOPIE_GCC)" + +$(BUILD_PATH)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config + $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) INSTALL_HDR_PATH=$(BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) headers_install + touch $@ + +$(MUSL_PATH)/lib/libc.so: $(MUSL_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + cd $(MUSL_PATH) && CC=$(REAL_CC) ./configure --prefix=/ --disable-static --build=$(CBUILD) + $(MAKE) -C $(MUSL_PATH) + $(STRIP) -s $@ + +$(BUILD_PATH)/include/.installed: $(MUSL_PATH)/lib/libc.so + $(MAKE) -C $(MUSL_PATH) DESTDIR=$(BUILD_PATH) install-headers + touch $@ + +$(MUSL_CC): $(MUSL_PATH)/lib/libc.so + sh $(MUSL_PATH)/tools/musl-gcc.specs.sh $(BUILD_PATH)/include $(MUSL_PATH)/lib /lib/ld-linux.so.1 > $(BUILD_PATH)/musl-gcc.specs + printf '#!/bin/sh\nexec "$(REAL_CC)" --specs="$(BUILD_PATH)/musl-gcc.specs" -fno-stack-protector -no-pie "$$@"\n' > $(BUILD_PATH)/musl-gcc + chmod +x $(BUILD_PATH)/musl-gcc + +$(IPERF_PATH)/.installed: $(IPERF_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + sed -i '1s/^/#include <stdint.h>/' $(IPERF_PATH)/src/cjson.h $(IPERF_PATH)/src/timer.h + sed -i -r 's/-p?g//g' $(IPERF_PATH)/src/Makefile* + touch $@ + +$(IPERF_PATH)/src/iperf3: | $(IPERF_PATH)/.installed $(USERSPACE_DEPS) + cd $(IPERF_PATH) && CFLAGS="$(CFLAGS) -D_GNU_SOURCE" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared + $(MAKE) -C $(IPERF_PATH) + $(STRIP) -s $@ + +$(LIBMNL_PATH)/.installed: $(LIBMNL_TAR) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + touch $@ + +$(LIBMNL_PATH)/src/.libs/libmnl.a: | $(LIBMNL_PATH)/.installed $(USERSPACE_DEPS) + cd $(LIBMNL_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared + $(MAKE) -C $(LIBMNL_PATH) + sed -i 's:prefix=.*:prefix=$(LIBMNL_PATH):' $(LIBMNL_PATH)/libmnl.pc + +$(WIREGUARD_TOOLS_PATH)/.installed: $(WIREGUARD_TOOLS_TAR) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + touch $@ + +$(WIREGUARD_TOOLS_PATH)/src/tools/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) + LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src/tools LIBMNL_CFLAGS="-I$(LIBMNL_PATH)/include" LIBMNL_LDLIBS="-lmnl" wg + $(STRIP) -s $@ + +$(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS) + mkdir -p $(BUILD_PATH) + $(MUSL_CC) -o $@ $(CFLAGS) $(LDFLAGS) -std=gnu11 $< + $(STRIP) -s $@ + +$(IPUTILS_PATH)/.installed: $(IPUTILS_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + touch $@ + +$(IPUTILS_PATH)/ping: | $(IPUTILS_PATH)/.installed $(USERSPACE_DEPS) + $(MAKE) -C $(IPUTILS_PATH) USE_CAP=no USE_IDN=no USE_NETTLE=no USE_CRYPTO=no ping + $(STRIP) -s $@ + +$(BASH_PATH)/.installed: $(BASH_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + touch $@ + +$(BASH_PATH)/bash: | $(BASH_PATH)/.installed $(USERSPACE_DEPS) + cd $(BASH_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --without-bash-malloc --disable-debugger --disable-help-builtin --disable-history --disable-multibyte --disable-progcomp --disable-readline --disable-mem-scramble + $(MAKE) -C $(BASH_PATH) + $(STRIP) -s $@ + +$(IPROUTE2_PATH)/.installed: $(IPROUTE2_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=y\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS -DHAVE_LIBMNL -I$(LIBMNL_PATH)/include\nLDLIBS+=-lmnl' > $(IPROUTE2_PATH)/config.mk + printf 'lib: snapshot\n\t$$(MAKE) -C lib\nip/ip: lib\n\t$$(MAKE) -C ip ip\nmisc/ss: lib\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile + touch $@ + +$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) + LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ ip/ip + $(STRIP) -s $(IPROUTE2_PATH)/ip/ip + +$(IPROUTE2_PATH)/misc/ss: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) + LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ misc/ss + $(STRIP) -s $(IPROUTE2_PATH)/misc/ss + +$(IPTABLES_PATH)/.installed: $(IPTABLES_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + sed -i -e "/nfnetlink=[01]/s:=[01]:=0:" -e "/nfconntrack=[01]/s:=[01]:=0:" $(IPTABLES_PATH)/configure + touch $@ + +$(IPTABLES_PATH)/iptables/xtables-multi: | $(IPTABLES_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) + cd $(IPTABLES_PATH) && PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --with-kernel=$(BUILD_PATH)/include + $(MAKE) -C $(IPTABLES_PATH) + $(STRIP) -s $@ + +$(NMAP_PATH)/.installed: $(NMAP_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + touch $@ + +$(NMAP_PATH)/ncat/ncat: | $(NMAP_PATH)/.installed $(USERSPACE_DEPS) + cd $(NMAP_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --without-ndiff --without-zenmap --without-nping --with-libpcap=included --with-libpcre=included --with-libdnet=included --without-liblua --with-liblinear=included --without-nmap-update --without-openssl --with-pcap=linux + $(MAKE) -C $(NMAP_PATH) build-ncat + $(STRIP) -s $@ + +clean: + rm -rf $(BUILD_PATH) + +distclean: clean + rm -rf $(DISTFILES_PATH) + +menuconfig: $(KERNEL_BUILD_PATH)/.config + $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) CC="$(NOPIE_GCC)" menuconfig + +.PHONY: qemu build clean distclean menuconfig +.DELETE_ON_ERROR: diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config new file mode 100644 index 000000000000..3d063bb247bb --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config @@ -0,0 +1,5 @@ +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1" +CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config new file mode 100644 index 000000000000..dbdc7e406a7b --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config @@ -0,0 +1,6 @@ +CONFIG_CPU_BIG_ENDIAN=y +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1" +CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/arch/arm.config b/tools/testing/selftests/wireguard/qemu/arch/arm.config new file mode 100644 index 000000000000..148f49905418 --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/arm.config @@ -0,0 +1,9 @@ +CONFIG_MMU=y +CONFIG_ARCH_MULTI_V7=y +CONFIG_ARCH_VIRT=y +CONFIG_THUMB2_KERNEL=n +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1" +CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/armeb.config b/tools/testing/selftests/wireguard/qemu/arch/armeb.config new file mode 100644 index 000000000000..bd76b07d00a2 --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/armeb.config @@ -0,0 +1,10 @@ +CONFIG_MMU=y +CONFIG_ARCH_MULTI_V7=y +CONFIG_ARCH_VIRT=y +CONFIG_THUMB2_KERNEL=n +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1" +CONFIG_CPU_BIG_ENDIAN=y +CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/i686.config b/tools/testing/selftests/wireguard/qemu/arch/i686.config new file mode 100644 index 000000000000..a85025d7206e --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/i686.config @@ -0,0 +1,5 @@ +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/m68k.config b/tools/testing/selftests/wireguard/qemu/arch/m68k.config new file mode 100644 index 000000000000..5381ea10896c --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/m68k.config @@ -0,0 +1,9 @@ +CONFIG_MMU=y +CONFIG_M68040=y +CONFIG_MAC=y +CONFIG_SERIAL_PMACZILOG=y +CONFIG_SERIAL_PMACZILOG_TTYS=y +CONFIG_SERIAL_PMACZILOG_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips.config b/tools/testing/selftests/wireguard/qemu/arch/mips.config new file mode 100644 index 000000000000..df71d6b95546 --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/mips.config @@ -0,0 +1,11 @@ +CONFIG_CPU_MIPS32_R2=y +CONFIG_MIPS_MALTA=y +CONFIG_MIPS_CPS=y +CONFIG_MIPS_FP_SUPPORT=y +CONFIG_POWER_RESET=y +CONFIG_POWER_RESET_SYSCON=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips64.config b/tools/testing/selftests/wireguard/qemu/arch/mips64.config new file mode 100644 index 000000000000..90c783f725c4 --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/mips64.config @@ -0,0 +1,14 @@ +CONFIG_64BIT=y +CONFIG_CPU_MIPS64_R2=y +CONFIG_MIPS32_N32=y +CONFIG_CPU_HAS_MSA=y +CONFIG_MIPS_MALTA=y +CONFIG_MIPS_CPS=y +CONFIG_MIPS_FP_SUPPORT=y +CONFIG_POWER_RESET=y +CONFIG_POWER_RESET_SYSCON=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips64el.config b/tools/testing/selftests/wireguard/qemu/arch/mips64el.config new file mode 100644 index 000000000000..435b0b43e00c --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/mips64el.config @@ -0,0 +1,15 @@ +CONFIG_64BIT=y +CONFIG_CPU_MIPS64_R2=y +CONFIG_MIPS32_N32=y +CONFIG_CPU_HAS_MSA=y +CONFIG_MIPS_MALTA=y +CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_MIPS_CPS=y +CONFIG_MIPS_FP_SUPPORT=y +CONFIG_POWER_RESET=y +CONFIG_POWER_RESET_SYSCON=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/arch/mipsel.config b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config new file mode 100644 index 000000000000..62bb50c4a85f --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config @@ -0,0 +1,12 @@ +CONFIG_CPU_MIPS32_R2=y +CONFIG_MIPS_MALTA=y +CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_MIPS_CPS=y +CONFIG_MIPS_FP_SUPPORT=y +CONFIG_POWER_RESET=y +CONFIG_POWER_RESET_SYSCON=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config new file mode 100644 index 000000000000..57957093b71b --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config @@ -0,0 +1,10 @@ +CONFIG_PPC_QEMU_E500=y +CONFIG_FSL_SOC_BOOKE=y +CONFIG_PPC_85xx=y +CONFIG_PHYS_64BIT=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_MATH_EMULATION=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config new file mode 100644 index 000000000000..990c510a9cfa --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config @@ -0,0 +1,12 @@ +CONFIG_PPC64=y +CONFIG_PPC_PSERIES=y +CONFIG_ALTIVEC=y +CONFIG_VSX=y +CONFIG_PPC_OF_BOOT_TRAMPOLINE=y +CONFIG_PPC_RADIX_MMU=y +CONFIG_HVC_CONSOLE=y +CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=hvc0 wg.success=hvc1" +CONFIG_SECTION_MISMATCH_WARN_ONLY=y +CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/arch/x86_64.config b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config new file mode 100644 index 000000000000..00a1ef4869d5 --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config @@ -0,0 +1,5 @@ +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config new file mode 100644 index 000000000000..b9c72706fe4d --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/debug.config @@ -0,0 +1,67 @@ +CONFIG_LOCALVERSION="-debug" +CONFIG_ENABLE_WARN_DEPRECATED=y +CONFIG_ENABLE_MUST_CHECK=y +CONFIG_FRAME_POINTER=y +CONFIG_STACK_VALIDATION=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_PAGE_EXTENSION=y +CONFIG_PAGE_POISONING=y +CONFIG_DEBUG_OBJECTS=y +CONFIG_DEBUG_OBJECTS_FREE=y +CONFIG_DEBUG_OBJECTS_TIMERS=y +CONFIG_DEBUG_OBJECTS_WORK=y +CONFIG_DEBUG_OBJECTS_RCU_HEAD=y +CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y +CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT=1 +CONFIG_SLUB_DEBUG_ON=y +CONFIG_DEBUG_VM=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_HAVE_DEBUG_STACKOVERFLOW=y +CONFIG_DEBUG_STACKOVERFLOW=y +CONFIG_HAVE_ARCH_KMEMCHECK=y +CONFIG_HAVE_ARCH_KASAN=y +CONFIG_KASAN=y +CONFIG_KASAN_INLINE=y +CONFIG_UBSAN=y +CONFIG_UBSAN_SANITIZE_ALL=y +CONFIG_UBSAN_NO_ALIGNMENT=y +CONFIG_UBSAN_NULL=y +CONFIG_DEBUG_KMEMLEAK=y +CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=8192 +CONFIG_DEBUG_STACK_USAGE=y +CONFIG_DEBUG_SHIRQ=y +CONFIG_WQ_WATCHDOG=y +CONFIG_SCHED_DEBUG=y +CONFIG_SCHED_INFO=y +CONFIG_SCHEDSTATS=y +CONFIG_SCHED_STACK_END_CHECK=y +CONFIG_DEBUG_TIMEKEEPING=y +CONFIG_TIMER_STATS=y +CONFIG_DEBUG_PREEMPT=y +CONFIG_DEBUG_RT_MUTEXES=y +CONFIG_DEBUG_SPINLOCK=y +CONFIG_DEBUG_MUTEXES=y +CONFIG_DEBUG_LOCK_ALLOC=y +CONFIG_PROVE_LOCKING=y +CONFIG_LOCKDEP=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_TRACE_IRQFLAGS=y +CONFIG_DEBUG_BUGVERBOSE=y +CONFIG_DEBUG_LIST=y +CONFIG_DEBUG_PI_LIST=y +CONFIG_PROVE_RCU=y +CONFIG_SPARSE_RCU_POINTER=y +CONFIG_RCU_CPU_STALL_TIMEOUT=21 +CONFIG_RCU_TRACE=y +CONFIG_RCU_EQS_DEBUG=y +CONFIG_USER_STACKTRACE_SUPPORT=y +CONFIG_DEBUG_SG=y +CONFIG_DEBUG_NOTIFIERS=y +CONFIG_DOUBLEFAULT=y +CONFIG_X86_DEBUG_FPU=y +CONFIG_DEBUG_SECTION_MISMATCH=y +CONFIG_DEBUG_PAGEALLOC=y +CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT=y +CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y diff --git a/tools/testing/selftests/wireguard/qemu/init.c b/tools/testing/selftests/wireguard/qemu/init.c new file mode 100644 index 000000000000..51e5ddedee88 --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/init.c @@ -0,0 +1,284 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#define _GNU_SOURCE +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <fcntl.h> +#include <sys/wait.h> +#include <sys/mount.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/io.h> +#include <sys/ioctl.h> +#include <sys/reboot.h> +#include <sys/utsname.h> +#include <sys/sendfile.h> +#include <linux/random.h> +#include <linux/version.h> + +__attribute__((noreturn)) static void poweroff(void) +{ + fflush(stdout); + fflush(stderr); + reboot(RB_AUTOBOOT); + sleep(30); + fprintf(stderr, "\x1b[37m\x1b[41m\x1b[1mFailed to power off!!!\x1b[0m\n"); + exit(1); +} + +static void panic(const char *what) +{ + fprintf(stderr, "\n\n\x1b[37m\x1b[41m\x1b[1mSOMETHING WENT HORRIBLY WRONG\x1b[0m\n\n \x1b[31m\x1b[1m%s: %s\x1b[0m\n\n\x1b[37m\x1b[44m\x1b[1mPower off...\x1b[0m\n\n", what, strerror(errno)); + poweroff(); +} + +#define pretty_message(msg) puts("\x1b[32m\x1b[1m" msg "\x1b[0m") + +static void print_banner(void) +{ + struct utsname utsname; + int len; + + if (uname(&utsname) < 0) + panic("uname"); + + len = strlen(" WireGuard Test Suite on ") + strlen(utsname.sysname) + strlen(utsname.release) + strlen(utsname.machine); + printf("\x1b[45m\x1b[33m\x1b[1m%*.s\x1b[0m\n\x1b[45m\x1b[33m\x1b[1m WireGuard Test Suite on %s %s %s \x1b[0m\n\x1b[45m\x1b[33m\x1b[1m%*.s\x1b[0m\n\n", len, "", utsname.sysname, utsname.release, utsname.machine, len, ""); +} + +static void seed_rng(void) +{ + int fd; + struct { + int entropy_count; + int buffer_size; + unsigned char buffer[256]; + } entropy = { + .entropy_count = sizeof(entropy.buffer) * 8, + .buffer_size = sizeof(entropy.buffer), + .buffer = "Adding real entropy is not actually important for these tests. Don't try this at home, kids!" + }; + + if (mknod("/dev/urandom", S_IFCHR | 0644, makedev(1, 9))) + panic("mknod(/dev/urandom)"); + fd = open("/dev/urandom", O_WRONLY); + if (fd < 0) + panic("open(urandom)"); + for (int i = 0; i < 256; ++i) { + if (ioctl(fd, RNDADDENTROPY, &entropy) < 0) + panic("ioctl(urandom)"); + } + close(fd); +} + +static void mount_filesystems(void) +{ + pretty_message("[+] Mounting filesystems..."); + mkdir("/dev", 0755); + mkdir("/proc", 0755); + mkdir("/sys", 0755); + mkdir("/tmp", 0755); + mkdir("/run", 0755); + mkdir("/var", 0755); + if (mount("none", "/dev", "devtmpfs", 0, NULL)) + panic("devtmpfs mount"); + if (mount("none", "/proc", "proc", 0, NULL)) + panic("procfs mount"); + if (mount("none", "/sys", "sysfs", 0, NULL)) + panic("sysfs mount"); + if (mount("none", "/tmp", "tmpfs", 0, NULL)) + panic("tmpfs mount"); + if (mount("none", "/run", "tmpfs", 0, NULL)) + panic("tmpfs mount"); + if (mount("none", "/sys/kernel/debug", "debugfs", 0, NULL)) + ; /* Not a problem if it fails.*/ + if (symlink("/run", "/var/run")) + panic("run symlink"); + if (symlink("/proc/self/fd", "/dev/fd")) + panic("fd symlink"); +} + +static void enable_logging(void) +{ + int fd; + pretty_message("[+] Enabling logging..."); + fd = open("/proc/sys/kernel/printk", O_WRONLY); + if (fd >= 0) { + if (write(fd, "9\n", 2) != 2) + panic("write(printk)"); + close(fd); + } + fd = open("/proc/sys/debug/exception-trace", O_WRONLY); + if (fd >= 0) { + if (write(fd, "1\n", 2) != 2) + panic("write(exception-trace)"); + close(fd); + } + fd = open("/proc/sys/kernel/panic_on_warn", O_WRONLY); + if (fd >= 0) { + if (write(fd, "1\n", 2) != 2) + panic("write(panic_on_warn)"); + close(fd); + } +} + +static void kmod_selftests(void) +{ + FILE *file; + char line[2048], *start, *pass; + bool success = true; + pretty_message("[+] Module self-tests:"); + file = fopen("/proc/kmsg", "r"); + if (!file) + panic("fopen(kmsg)"); + if (fcntl(fileno(file), F_SETFL, O_NONBLOCK) < 0) + panic("fcntl(kmsg, nonblock)"); + while (fgets(line, sizeof(line), file)) { + start = strstr(line, "wireguard: "); + if (!start) + continue; + start += 11; + *strchrnul(start, '\n') = '\0'; + if (strstr(start, "www.wireguard.com")) + break; + pass = strstr(start, ": pass"); + if (!pass || pass[6] != '\0') { + success = false; + printf(" \x1b[31m* %s\x1b[0m\n", start); + } else + printf(" \x1b[32m* %s\x1b[0m\n", start); + } + fclose(file); + if (!success) { + puts("\x1b[31m\x1b[1m[-] Tests failed! \u2639\x1b[0m"); + poweroff(); + } +} + +static void launch_tests(void) +{ + char cmdline[4096], *success_dev; + int status, fd; + pid_t pid; + + pretty_message("[+] Launching tests..."); + pid = fork(); + if (pid == -1) + panic("fork"); + else if (pid == 0) { + execl("/init.sh", "init", NULL); + panic("exec"); + } + if (waitpid(pid, &status, 0) < 0) + panic("waitpid"); + if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { + pretty_message("[+] Tests successful! :-)"); + fd = open("/proc/cmdline", O_RDONLY); + if (fd < 0) + panic("open(/proc/cmdline)"); + if (read(fd, cmdline, sizeof(cmdline) - 1) <= 0) + panic("read(/proc/cmdline)"); + cmdline[sizeof(cmdline) - 1] = '\0'; + for (success_dev = strtok(cmdline, " \n"); success_dev; success_dev = strtok(NULL, " \n")) { + if (strncmp(success_dev, "wg.success=", 11)) + continue; + memcpy(success_dev + 11 - 5, "/dev/", 5); + success_dev += 11 - 5; + break; + } + if (!success_dev || !strlen(success_dev)) + panic("Unable to find success device"); + + fd = open(success_dev, O_WRONLY); + if (fd < 0) + panic("open(success_dev)"); + if (write(fd, "success\n", 8) != 8) + panic("write(success_dev)"); + close(fd); + } else { + const char *why = "unknown cause"; + int what = -1; + + if (WIFEXITED(status)) { + why = "exit code"; + what = WEXITSTATUS(status); + } else if (WIFSIGNALED(status)) { + why = "signal"; + what = WTERMSIG(status); + } + printf("\x1b[31m\x1b[1m[-] Tests failed with %s %d! \u2639\x1b[0m\n", why, what); + } +} + +static void ensure_console(void) +{ + for (unsigned int i = 0; i < 1000; ++i) { + int fd = open("/dev/console", O_RDWR); + if (fd < 0) { + usleep(50000); + continue; + } + dup2(fd, 0); + dup2(fd, 1); + dup2(fd, 2); + close(fd); + if (write(1, "\0\0\0\0\n", 5) == 5) + return; + } + panic("Unable to open console device"); +} + +static void clear_leaks(void) +{ + int fd; + + fd = open("/sys/kernel/debug/kmemleak", O_WRONLY); + if (fd < 0) + return; + pretty_message("[+] Starting memory leak detection..."); + write(fd, "clear\n", 5); + close(fd); +} + +static void check_leaks(void) +{ + int fd; + + fd = open("/sys/kernel/debug/kmemleak", O_WRONLY); + if (fd < 0) + return; + pretty_message("[+] Scanning for memory leaks..."); + sleep(2); /* Wait for any grace periods. */ + write(fd, "scan\n", 5); + close(fd); + + fd = open("/sys/kernel/debug/kmemleak", O_RDONLY); + if (fd < 0) + return; + if (sendfile(1, fd, NULL, 0x7ffff000) > 0) + panic("Memory leaks encountered"); + close(fd); +} + +int main(int argc, char *argv[]) +{ + seed_rng(); + ensure_console(); + print_banner(); + mount_filesystems(); + kmod_selftests(); + enable_logging(); + clear_leaks(); + launch_tests(); + check_leaks(); + poweroff(); + return 1; +} diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config new file mode 100644 index 000000000000..9cca30206014 --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -0,0 +1,86 @@ +CONFIG_LOCALVERSION="" +CONFIG_NET=y +CONFIG_NETDEVICES=y +CONFIG_NET_CORE=y +CONFIG_NET_IPIP=y +CONFIG_DUMMY=y +CONFIG_VETH=y +CONFIG_MULTIUSER=y +CONFIG_NAMESPACES=y +CONFIG_NET_NS=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IPV6=y +CONFIG_NETFILTER=y +CONFIG_NETFILTER_ADVANCED=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_NAT=y +CONFIG_NETFILTER_XTABLES=y +CONFIG_NETFILTER_XT_NAT=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_NF_NAT_IPV4=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_NAT=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_TTY=y +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_SCRIPT=y +CONFIG_VDSO=y +CONFIG_VIRTUALIZATION=y +CONFIG_HYPERVISOR_GUEST=y +CONFIG_PARAVIRT=y +CONFIG_KVM_GUEST=y +CONFIG_PARAVIRT_SPINLOCKS=y +CONFIG_PRINTK=y +CONFIG_KALLSYMS=y +CONFIG_BUG=y +CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y +CONFIG_EMBEDDED=n +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_SHMEM=y +CONFIG_SLUB=y +CONFIG_SPARSEMEM_VMEMMAP=y +CONFIG_SMP=y +CONFIG_SCHED_SMT=y +CONFIG_SCHED_MC=y +CONFIG_NUMA=y +CONFIG_PREEMPT=y +CONFIG_NO_HZ=y +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_HZ_PERIODIC=n +CONFIG_HIGH_RES_TIMERS=y +CONFIG_ARCH_RANDOM=y +CONFIG_FILE_LOCKING=y +CONFIG_POSIX_TIMERS=y +CONFIG_DEVTMPFS=y +CONFIG_PROC_FS=y +CONFIG_PROC_SYSCTL=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15 +CONFIG_PRINTK_TIME=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_LEGACY_VSYSCALL_NONE=y +CONFIG_KERNEL_GZIP=y +CONFIG_PANIC_ON_OOPS=y +CONFIG_BUG_ON_DATA_CORRUPTION=y +CONFIG_LOCKUP_DETECTOR=y +CONFIG_SOFTLOCKUP_DETECTOR=y +CONFIG_HARDLOCKUP_DETECTOR=y +CONFIG_WQ_WATCHDOG=y +CONFIG_DETECT_HUNG_TASK=y +CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y +CONFIG_BOOTPARAM_HUNG_TASK_PANIC=y +CONFIG_PANIC_TIMEOUT=-1 +CONFIG_STACKTRACE=y +CONFIG_EARLY_PRINTK=y +CONFIG_GDB_SCRIPTS=y +CONFIG_WIREGUARD=y +CONFIG_WIREGUARD_DEBUG=y diff --git a/tools/testing/vsock/.gitignore b/tools/testing/vsock/.gitignore index dc5f11faf530..7f7a2ccc30c4 100644 --- a/tools/testing/vsock/.gitignore +++ b/tools/testing/vsock/.gitignore @@ -1,2 +1,3 @@ *.d +vsock_test vsock_diag_test diff --git a/tools/testing/vsock/Makefile b/tools/testing/vsock/Makefile index 5be687b1e16c..f8293c6910c9 100644 --- a/tools/testing/vsock/Makefile +++ b/tools/testing/vsock/Makefile @@ -1,10 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-only all: test -test: vsock_diag_test -vsock_diag_test: vsock_diag_test.o timeout.o control.o +test: vsock_test vsock_diag_test +vsock_test: vsock_test.o timeout.o control.o util.o +vsock_diag_test: vsock_diag_test.o timeout.o control.o util.o -CFLAGS += -g -O2 -Werror -Wall -I. -I../../include/uapi -I../../include -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -D_GNU_SOURCE +CFLAGS += -g -O2 -Werror -Wall -I. -I../../include -I../../../usr/include -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -D_GNU_SOURCE .PHONY: all test clean clean: - ${RM} *.o *.d vsock_diag_test + ${RM} *.o *.d vsock_test vsock_diag_test -include *.d diff --git a/tools/testing/vsock/README b/tools/testing/vsock/README index 2cc6d7302db6..4d5045e7d2c3 100644 --- a/tools/testing/vsock/README +++ b/tools/testing/vsock/README @@ -5,12 +5,13 @@ Hyper-V. The following tests are available: + * vsock_test - core AF_VSOCK socket functionality * vsock_diag_test - vsock_diag.ko module for listing open sockets The following prerequisite steps are not automated and must be performed prior to running tests: -1. Build the kernel and these tests. +1. Build the kernel, make headers_install, and build these tests. 2. Install the kernel and tests on the host. 3. Install the kernel and tests inside the guest. 4. Boot the guest and ensure that the AF_VSOCK transport is enabled. diff --git a/tools/testing/vsock/control.c b/tools/testing/vsock/control.c index 45f328c6ff23..4874872fc5a3 100644 --- a/tools/testing/vsock/control.c +++ b/tools/testing/vsock/control.c @@ -205,11 +205,22 @@ void control_expectln(const char *str) char *line; line = control_readln(); - if (strcmp(str, line) != 0) { + + control_cmpln(line, str, true); + + free(line); +} + +bool control_cmpln(char *line, const char *str, bool fail) +{ + if (strcmp(str, line) == 0) + return true; + + if (fail) { fprintf(stderr, "expected \"%s\" on control socket, got \"%s\"\n", str, line); exit(EXIT_FAILURE); } - free(line); + return false; } diff --git a/tools/testing/vsock/control.h b/tools/testing/vsock/control.h index 54a07efd267c..51814b4f9ac1 100644 --- a/tools/testing/vsock/control.h +++ b/tools/testing/vsock/control.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ #ifndef CONTROL_H #define CONTROL_H @@ -9,5 +10,6 @@ void control_cleanup(void); void control_writeln(const char *str); char *control_readln(void); void control_expectln(const char *str); +bool control_cmpln(char *line, const char *str, bool fail); #endif /* CONTROL_H */ diff --git a/tools/testing/vsock/timeout.h b/tools/testing/vsock/timeout.h index 77db9ce9860a..ecb7c840e65a 100644 --- a/tools/testing/vsock/timeout.h +++ b/tools/testing/vsock/timeout.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ #ifndef TIMEOUT_H #define TIMEOUT_H diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c new file mode 100644 index 000000000000..93cbd6f603f9 --- /dev/null +++ b/tools/testing/vsock/util.c @@ -0,0 +1,375 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vsock test utilities + * + * Copyright (C) 2017 Red Hat, Inc. + * + * Author: Stefan Hajnoczi <stefanha@redhat.com> + */ + +#include <errno.h> +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <signal.h> +#include <unistd.h> +#include <assert.h> +#include <sys/epoll.h> + +#include "timeout.h" +#include "control.h" +#include "util.h" + +/* Install signal handlers */ +void init_signals(void) +{ + struct sigaction act = { + .sa_handler = sigalrm, + }; + + sigaction(SIGALRM, &act, NULL); + signal(SIGPIPE, SIG_IGN); +} + +/* Parse a CID in string representation */ +unsigned int parse_cid(const char *str) +{ + char *endptr = NULL; + unsigned long n; + + errno = 0; + n = strtoul(str, &endptr, 10); + if (errno || *endptr != '\0') { + fprintf(stderr, "malformed CID \"%s\"\n", str); + exit(EXIT_FAILURE); + } + return n; +} + +/* Wait for the remote to close the connection */ +void vsock_wait_remote_close(int fd) +{ + struct epoll_event ev; + int epollfd, nfds; + + epollfd = epoll_create1(0); + if (epollfd == -1) { + perror("epoll_create1"); + exit(EXIT_FAILURE); + } + + ev.events = EPOLLRDHUP | EPOLLHUP; + ev.data.fd = fd; + if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev) == -1) { + perror("epoll_ctl"); + exit(EXIT_FAILURE); + } + + nfds = epoll_wait(epollfd, &ev, 1, TIMEOUT * 1000); + if (nfds == -1) { + perror("epoll_wait"); + exit(EXIT_FAILURE); + } + + if (nfds == 0) { + fprintf(stderr, "epoll_wait timed out\n"); + exit(EXIT_FAILURE); + } + + assert(nfds == 1); + assert(ev.events & (EPOLLRDHUP | EPOLLHUP)); + assert(ev.data.fd == fd); + + close(epollfd); +} + +/* Connect to <cid, port> and return the file descriptor. */ +int vsock_stream_connect(unsigned int cid, unsigned int port) +{ + union { + struct sockaddr sa; + struct sockaddr_vm svm; + } addr = { + .svm = { + .svm_family = AF_VSOCK, + .svm_port = port, + .svm_cid = cid, + }, + }; + int ret; + int fd; + + control_expectln("LISTENING"); + + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + + timeout_begin(TIMEOUT); + do { + ret = connect(fd, &addr.sa, sizeof(addr.svm)); + timeout_check("connect"); + } while (ret < 0 && errno == EINTR); + timeout_end(); + + if (ret < 0) { + int old_errno = errno; + + close(fd); + fd = -1; + errno = old_errno; + } + return fd; +} + +/* Listen on <cid, port> and return the first incoming connection. The remote + * address is stored to clientaddrp. clientaddrp may be NULL. + */ +int vsock_stream_accept(unsigned int cid, unsigned int port, + struct sockaddr_vm *clientaddrp) +{ + union { + struct sockaddr sa; + struct sockaddr_vm svm; + } addr = { + .svm = { + .svm_family = AF_VSOCK, + .svm_port = port, + .svm_cid = cid, + }, + }; + union { + struct sockaddr sa; + struct sockaddr_vm svm; + } clientaddr; + socklen_t clientaddr_len = sizeof(clientaddr.svm); + int fd; + int client_fd; + int old_errno; + + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + + if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) { + perror("bind"); + exit(EXIT_FAILURE); + } + + if (listen(fd, 1) < 0) { + perror("listen"); + exit(EXIT_FAILURE); + } + + control_writeln("LISTENING"); + + timeout_begin(TIMEOUT); + do { + client_fd = accept(fd, &clientaddr.sa, &clientaddr_len); + timeout_check("accept"); + } while (client_fd < 0 && errno == EINTR); + timeout_end(); + + old_errno = errno; + close(fd); + errno = old_errno; + + if (client_fd < 0) + return client_fd; + + if (clientaddr_len != sizeof(clientaddr.svm)) { + fprintf(stderr, "unexpected addrlen from accept(2), %zu\n", + (size_t)clientaddr_len); + exit(EXIT_FAILURE); + } + if (clientaddr.sa.sa_family != AF_VSOCK) { + fprintf(stderr, "expected AF_VSOCK from accept(2), got %d\n", + clientaddr.sa.sa_family); + exit(EXIT_FAILURE); + } + + if (clientaddrp) + *clientaddrp = clientaddr.svm; + return client_fd; +} + +/* Transmit one byte and check the return value. + * + * expected_ret: + * <0 Negative errno (for testing errors) + * 0 End-of-file + * 1 Success + */ +void send_byte(int fd, int expected_ret, int flags) +{ + const uint8_t byte = 'A'; + ssize_t nwritten; + + timeout_begin(TIMEOUT); + do { + nwritten = send(fd, &byte, sizeof(byte), flags); + timeout_check("write"); + } while (nwritten < 0 && errno == EINTR); + timeout_end(); + + if (expected_ret < 0) { + if (nwritten != -1) { + fprintf(stderr, "bogus send(2) return value %zd\n", + nwritten); + exit(EXIT_FAILURE); + } + if (errno != -expected_ret) { + perror("write"); + exit(EXIT_FAILURE); + } + return; + } + + if (nwritten < 0) { + perror("write"); + exit(EXIT_FAILURE); + } + if (nwritten == 0) { + if (expected_ret == 0) + return; + + fprintf(stderr, "unexpected EOF while sending byte\n"); + exit(EXIT_FAILURE); + } + if (nwritten != sizeof(byte)) { + fprintf(stderr, "bogus send(2) return value %zd\n", nwritten); + exit(EXIT_FAILURE); + } +} + +/* Receive one byte and check the return value. + * + * expected_ret: + * <0 Negative errno (for testing errors) + * 0 End-of-file + * 1 Success + */ +void recv_byte(int fd, int expected_ret, int flags) +{ + uint8_t byte; + ssize_t nread; + + timeout_begin(TIMEOUT); + do { + nread = recv(fd, &byte, sizeof(byte), flags); + timeout_check("read"); + } while (nread < 0 && errno == EINTR); + timeout_end(); + + if (expected_ret < 0) { + if (nread != -1) { + fprintf(stderr, "bogus recv(2) return value %zd\n", + nread); + exit(EXIT_FAILURE); + } + if (errno != -expected_ret) { + perror("read"); + exit(EXIT_FAILURE); + } + return; + } + + if (nread < 0) { + perror("read"); + exit(EXIT_FAILURE); + } + if (nread == 0) { + if (expected_ret == 0) + return; + + fprintf(stderr, "unexpected EOF while receiving byte\n"); + exit(EXIT_FAILURE); + } + if (nread != sizeof(byte)) { + fprintf(stderr, "bogus recv(2) return value %zd\n", nread); + exit(EXIT_FAILURE); + } + if (byte != 'A') { + fprintf(stderr, "unexpected byte read %c\n", byte); + exit(EXIT_FAILURE); + } +} + +/* Run test cases. The program terminates if a failure occurs. */ +void run_tests(const struct test_case *test_cases, + const struct test_opts *opts) +{ + int i; + + for (i = 0; test_cases[i].name; i++) { + void (*run)(const struct test_opts *opts); + char *line; + + printf("%d - %s...", i, test_cases[i].name); + fflush(stdout); + + /* Full barrier before executing the next test. This + * ensures that client and server are executing the + * same test case. In particular, it means whoever is + * faster will not see the peer still executing the + * last test. This is important because port numbers + * can be used by multiple test cases. + */ + if (test_cases[i].skip) + control_writeln("SKIP"); + else + control_writeln("NEXT"); + + line = control_readln(); + if (control_cmpln(line, "SKIP", false) || test_cases[i].skip) { + + printf("skipped\n"); + + free(line); + continue; + } + + control_cmpln(line, "NEXT", true); + free(line); + + if (opts->mode == TEST_MODE_CLIENT) + run = test_cases[i].run_client; + else + run = test_cases[i].run_server; + + if (run) + run(opts); + + printf("ok\n"); + } +} + +void list_tests(const struct test_case *test_cases) +{ + int i; + + printf("ID\tTest name\n"); + + for (i = 0; test_cases[i].name; i++) + printf("%d\t%s\n", i, test_cases[i].name); + + exit(EXIT_FAILURE); +} + +void skip_test(struct test_case *test_cases, size_t test_cases_len, + const char *test_id_str) +{ + unsigned long test_id; + char *endptr = NULL; + + errno = 0; + test_id = strtoul(test_id_str, &endptr, 10); + if (errno || *endptr != '\0') { + fprintf(stderr, "malformed test ID \"%s\"\n", test_id_str); + exit(EXIT_FAILURE); + } + + if (test_id >= test_cases_len) { + fprintf(stderr, "test ID (%lu) larger than the max allowed (%lu)\n", + test_id, test_cases_len - 1); + exit(EXIT_FAILURE); + } + + test_cases[test_id].skip = true; +} diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h new file mode 100644 index 000000000000..e53dd09d26d9 --- /dev/null +++ b/tools/testing/vsock/util.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef UTIL_H +#define UTIL_H + +#include <sys/socket.h> +#include <linux/vm_sockets.h> + +/* Tests can either run as the client or the server */ +enum test_mode { + TEST_MODE_UNSET, + TEST_MODE_CLIENT, + TEST_MODE_SERVER +}; + +/* Test runner options */ +struct test_opts { + enum test_mode mode; + unsigned int peer_cid; +}; + +/* A test case definition. Test functions must print failures to stderr and + * terminate with exit(EXIT_FAILURE). + */ +struct test_case { + const char *name; /* human-readable name */ + + /* Called when test mode is TEST_MODE_CLIENT */ + void (*run_client)(const struct test_opts *opts); + + /* Called when test mode is TEST_MODE_SERVER */ + void (*run_server)(const struct test_opts *opts); + + bool skip; +}; + +void init_signals(void); +unsigned int parse_cid(const char *str); +int vsock_stream_connect(unsigned int cid, unsigned int port); +int vsock_stream_accept(unsigned int cid, unsigned int port, + struct sockaddr_vm *clientaddrp); +void vsock_wait_remote_close(int fd); +void send_byte(int fd, int expected_ret, int flags); +void recv_byte(int fd, int expected_ret, int flags); +void run_tests(const struct test_case *test_cases, + const struct test_opts *opts); +void list_tests(const struct test_case *test_cases); +void skip_test(struct test_case *test_cases, size_t test_cases_len, + const char *test_id_str); +#endif /* UTIL_H */ diff --git a/tools/testing/vsock/vsock_diag_test.c b/tools/testing/vsock/vsock_diag_test.c index c481101364a4..cec6f5a738e1 100644 --- a/tools/testing/vsock/vsock_diag_test.c +++ b/tools/testing/vsock/vsock_diag_test.c @@ -9,32 +9,22 @@ #include <getopt.h> #include <stdio.h> -#include <stdbool.h> #include <stdlib.h> #include <string.h> #include <errno.h> #include <unistd.h> -#include <signal.h> -#include <sys/socket.h> #include <sys/stat.h> #include <sys/types.h> #include <linux/list.h> #include <linux/net.h> #include <linux/netlink.h> #include <linux/sock_diag.h> +#include <linux/vm_sockets_diag.h> #include <netinet/tcp.h> -#include "../../../include/uapi/linux/vm_sockets.h" -#include "../../../include/uapi/linux/vm_sockets_diag.h" - #include "timeout.h" #include "control.h" - -enum test_mode { - TEST_MODE_UNSET, - TEST_MODE_CLIENT, - TEST_MODE_SERVER -}; +#include "util.h" /* Per-socket status */ struct vsock_stat { @@ -335,7 +325,7 @@ static void free_sock_stat(struct list_head *sockets) free(st); } -static void test_no_sockets(unsigned int peer_cid) +static void test_no_sockets(const struct test_opts *opts) { LIST_HEAD(sockets); @@ -346,7 +336,7 @@ static void test_no_sockets(unsigned int peer_cid) free_sock_stat(&sockets); } -static void test_listen_socket_server(unsigned int peer_cid) +static void test_listen_socket_server(const struct test_opts *opts) { union { struct sockaddr sa; @@ -384,35 +374,14 @@ static void test_listen_socket_server(unsigned int peer_cid) free_sock_stat(&sockets); } -static void test_connect_client(unsigned int peer_cid) +static void test_connect_client(const struct test_opts *opts) { - union { - struct sockaddr sa; - struct sockaddr_vm svm; - } addr = { - .svm = { - .svm_family = AF_VSOCK, - .svm_port = 1234, - .svm_cid = peer_cid, - }, - }; int fd; - int ret; LIST_HEAD(sockets); struct vsock_stat *st; - control_expectln("LISTENING"); - - fd = socket(AF_VSOCK, SOCK_STREAM, 0); - - timeout_begin(TIMEOUT); - do { - ret = connect(fd, &addr.sa, sizeof(addr.svm)); - timeout_check("connect"); - } while (ret < 0 && errno == EINTR); - timeout_end(); - - if (ret < 0) { + fd = vsock_stream_connect(opts->peer_cid, 1234); + if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); } @@ -430,68 +399,21 @@ static void test_connect_client(unsigned int peer_cid) free_sock_stat(&sockets); } -static void test_connect_server(unsigned int peer_cid) +static void test_connect_server(const struct test_opts *opts) { - union { - struct sockaddr sa; - struct sockaddr_vm svm; - } addr = { - .svm = { - .svm_family = AF_VSOCK, - .svm_port = 1234, - .svm_cid = VMADDR_CID_ANY, - }, - }; - union { - struct sockaddr sa; - struct sockaddr_vm svm; - } clientaddr; - socklen_t clientaddr_len = sizeof(clientaddr.svm); - LIST_HEAD(sockets); struct vsock_stat *st; - int fd; + LIST_HEAD(sockets); int client_fd; - fd = socket(AF_VSOCK, SOCK_STREAM, 0); - - if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) { - perror("bind"); - exit(EXIT_FAILURE); - } - - if (listen(fd, 1) < 0) { - perror("listen"); - exit(EXIT_FAILURE); - } - - control_writeln("LISTENING"); - - timeout_begin(TIMEOUT); - do { - client_fd = accept(fd, &clientaddr.sa, &clientaddr_len); - timeout_check("accept"); - } while (client_fd < 0 && errno == EINTR); - timeout_end(); - + client_fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); if (client_fd < 0) { perror("accept"); exit(EXIT_FAILURE); } - if (clientaddr.sa.sa_family != AF_VSOCK) { - fprintf(stderr, "expected AF_VSOCK from accept(2), got %d\n", - clientaddr.sa.sa_family); - exit(EXIT_FAILURE); - } - if (clientaddr.svm.svm_cid != peer_cid) { - fprintf(stderr, "expected peer CID %u from accept(2), got %u\n", - peer_cid, clientaddr.svm.svm_cid); - exit(EXIT_FAILURE); - } read_vsock_stat(&sockets); - check_num_sockets(&sockets, 2); - find_vsock_stat(&sockets, fd); + check_num_sockets(&sockets, 1); st = find_vsock_stat(&sockets, client_fd); check_socket_state(st, TCP_ESTABLISHED); @@ -499,15 +421,10 @@ static void test_connect_server(unsigned int peer_cid) control_expectln("DONE"); close(client_fd); - close(fd); free_sock_stat(&sockets); } -static struct { - const char *name; - void (*run_client)(unsigned int peer_cid); - void (*run_server)(unsigned int peer_cid); -} test_cases[] = { +static struct test_case test_cases[] = { { .name = "No sockets", .run_server = test_no_sockets, @@ -524,30 +441,6 @@ static struct { {}, }; -static void init_signals(void) -{ - struct sigaction act = { - .sa_handler = sigalrm, - }; - - sigaction(SIGALRM, &act, NULL); - signal(SIGPIPE, SIG_IGN); -} - -static unsigned int parse_cid(const char *str) -{ - char *endptr = NULL; - unsigned long int n; - - errno = 0; - n = strtoul(str, &endptr, 10); - if (errno || *endptr != '\0') { - fprintf(stderr, "malformed CID \"%s\"\n", str); - exit(EXIT_FAILURE); - } - return n; -} - static const char optstring[] = ""; static const struct option longopts[] = { { @@ -571,6 +464,16 @@ static const struct option longopts[] = { .val = 'p', }, { + .name = "list", + .has_arg = no_argument, + .val = 'l', + }, + { + .name = "skip", + .has_arg = required_argument, + .val = 's', + }, + { .name = "help", .has_arg = no_argument, .val = '?', @@ -580,7 +483,7 @@ static const struct option longopts[] = { static void usage(void) { - fprintf(stderr, "Usage: vsock_diag_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid>\n" + fprintf(stderr, "Usage: vsock_diag_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid> [--list] [--skip=<test_id>]\n" "\n" " Server: vsock_diag_test --control-port=1234 --mode=server --peer-cid=3\n" " Client: vsock_diag_test --control-host=192.168.0.1 --control-port=1234 --mode=client --peer-cid=2\n" @@ -594,7 +497,18 @@ static void usage(void) "listen address and the client requires an address to\n" "connect to.\n" "\n" - "The CID of the other side must be given with --peer-cid=<cid>.\n"); + "The CID of the other side must be given with --peer-cid=<cid>.\n" + "\n" + "Options:\n" + " --help This help message\n" + " --control-host <host> Server IP address to connect to\n" + " --control-port <port> Server port to listen on/connect to\n" + " --mode client|server Server or client mode\n" + " --peer-cid <cid> CID of the other side\n" + " --list List of tests that will be executed\n" + " --skip <test_id> Test ID to skip;\n" + " use multiple --skip options to skip more tests\n" + ); exit(EXIT_FAILURE); } @@ -602,9 +516,10 @@ int main(int argc, char **argv) { const char *control_host = NULL; const char *control_port = NULL; - int mode = TEST_MODE_UNSET; - unsigned int peer_cid = VMADDR_CID_ANY; - int i; + struct test_opts opts = { + .mode = TEST_MODE_UNSET, + .peer_cid = VMADDR_CID_ANY, + }; init_signals(); @@ -620,20 +535,27 @@ int main(int argc, char **argv) break; case 'm': if (strcmp(optarg, "client") == 0) - mode = TEST_MODE_CLIENT; + opts.mode = TEST_MODE_CLIENT; else if (strcmp(optarg, "server") == 0) - mode = TEST_MODE_SERVER; + opts.mode = TEST_MODE_SERVER; else { fprintf(stderr, "--mode must be \"client\" or \"server\"\n"); return EXIT_FAILURE; } break; case 'p': - peer_cid = parse_cid(optarg); + opts.peer_cid = parse_cid(optarg); break; case 'P': control_port = optarg; break; + case 'l': + list_tests(test_cases); + break; + case 's': + skip_test(test_cases, ARRAY_SIZE(test_cases) - 1, + optarg); + break; case '?': default: usage(); @@ -642,35 +564,21 @@ int main(int argc, char **argv) if (!control_port) usage(); - if (mode == TEST_MODE_UNSET) + if (opts.mode == TEST_MODE_UNSET) usage(); - if (peer_cid == VMADDR_CID_ANY) + if (opts.peer_cid == VMADDR_CID_ANY) usage(); if (!control_host) { - if (mode != TEST_MODE_SERVER) + if (opts.mode != TEST_MODE_SERVER) usage(); control_host = "0.0.0.0"; } - control_init(control_host, control_port, mode == TEST_MODE_SERVER); + control_init(control_host, control_port, + opts.mode == TEST_MODE_SERVER); - for (i = 0; test_cases[i].name; i++) { - void (*run)(unsigned int peer_cid); - - printf("%s...", test_cases[i].name); - fflush(stdout); - - if (mode == TEST_MODE_CLIENT) - run = test_cases[i].run_client; - else - run = test_cases[i].run_server; - - if (run) - run(peer_cid); - - printf("ok\n"); - } + run_tests(test_cases, &opts); control_cleanup(); return EXIT_SUCCESS; diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c new file mode 100644 index 000000000000..1d8b93f1af31 --- /dev/null +++ b/tools/testing/vsock/vsock_test.c @@ -0,0 +1,379 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vsock_test - vsock.ko test suite + * + * Copyright (C) 2017 Red Hat, Inc. + * + * Author: Stefan Hajnoczi <stefanha@redhat.com> + */ + +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <linux/kernel.h> + +#include "timeout.h" +#include "control.h" +#include "util.h" + +static void test_stream_connection_reset(const struct test_opts *opts) +{ + union { + struct sockaddr sa; + struct sockaddr_vm svm; + } addr = { + .svm = { + .svm_family = AF_VSOCK, + .svm_port = 1234, + .svm_cid = opts->peer_cid, + }, + }; + int ret; + int fd; + + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + + timeout_begin(TIMEOUT); + do { + ret = connect(fd, &addr.sa, sizeof(addr.svm)); + timeout_check("connect"); + } while (ret < 0 && errno == EINTR); + timeout_end(); + + if (ret != -1) { + fprintf(stderr, "expected connect(2) failure, got %d\n", ret); + exit(EXIT_FAILURE); + } + if (errno != ECONNRESET) { + fprintf(stderr, "unexpected connect(2) errno %d\n", errno); + exit(EXIT_FAILURE); + } + + close(fd); +} + +static void test_stream_client_close_client(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_connect(opts->peer_cid, 1234); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + send_byte(fd, 1, 0); + close(fd); +} + +static void test_stream_client_close_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + /* Wait for the remote to close the connection, before check + * -EPIPE error on send. + */ + vsock_wait_remote_close(fd); + + send_byte(fd, -EPIPE, 0); + recv_byte(fd, 1, 0); + recv_byte(fd, 0, 0); + close(fd); +} + +static void test_stream_server_close_client(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_connect(opts->peer_cid, 1234); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + /* Wait for the remote to close the connection, before check + * -EPIPE error on send. + */ + vsock_wait_remote_close(fd); + + send_byte(fd, -EPIPE, 0); + recv_byte(fd, 1, 0); + recv_byte(fd, 0, 0); + close(fd); +} + +static void test_stream_server_close_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + send_byte(fd, 1, 0); + close(fd); +} + +/* With the standard socket sizes, VMCI is able to support about 100 + * concurrent stream connections. + */ +#define MULTICONN_NFDS 100 + +static void test_stream_multiconn_client(const struct test_opts *opts) +{ + int fds[MULTICONN_NFDS]; + int i; + + for (i = 0; i < MULTICONN_NFDS; i++) { + fds[i] = vsock_stream_connect(opts->peer_cid, 1234); + if (fds[i] < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + } + + for (i = 0; i < MULTICONN_NFDS; i++) { + if (i % 2) + recv_byte(fds[i], 1, 0); + else + send_byte(fds[i], 1, 0); + } + + for (i = 0; i < MULTICONN_NFDS; i++) + close(fds[i]); +} + +static void test_stream_multiconn_server(const struct test_opts *opts) +{ + int fds[MULTICONN_NFDS]; + int i; + + for (i = 0; i < MULTICONN_NFDS; i++) { + fds[i] = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + if (fds[i] < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + } + + for (i = 0; i < MULTICONN_NFDS; i++) { + if (i % 2) + send_byte(fds[i], 1, 0); + else + recv_byte(fds[i], 1, 0); + } + + for (i = 0; i < MULTICONN_NFDS; i++) + close(fds[i]); +} + +static void test_stream_msg_peek_client(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_connect(opts->peer_cid, 1234); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + send_byte(fd, 1, 0); + close(fd); +} + +static void test_stream_msg_peek_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + recv_byte(fd, 1, MSG_PEEK); + recv_byte(fd, 1, 0); + close(fd); +} + +static struct test_case test_cases[] = { + { + .name = "SOCK_STREAM connection reset", + .run_client = test_stream_connection_reset, + }, + { + .name = "SOCK_STREAM client close", + .run_client = test_stream_client_close_client, + .run_server = test_stream_client_close_server, + }, + { + .name = "SOCK_STREAM server close", + .run_client = test_stream_server_close_client, + .run_server = test_stream_server_close_server, + }, + { + .name = "SOCK_STREAM multiple connections", + .run_client = test_stream_multiconn_client, + .run_server = test_stream_multiconn_server, + }, + { + .name = "SOCK_STREAM MSG_PEEK", + .run_client = test_stream_msg_peek_client, + .run_server = test_stream_msg_peek_server, + }, + {}, +}; + +static const char optstring[] = ""; +static const struct option longopts[] = { + { + .name = "control-host", + .has_arg = required_argument, + .val = 'H', + }, + { + .name = "control-port", + .has_arg = required_argument, + .val = 'P', + }, + { + .name = "mode", + .has_arg = required_argument, + .val = 'm', + }, + { + .name = "peer-cid", + .has_arg = required_argument, + .val = 'p', + }, + { + .name = "list", + .has_arg = no_argument, + .val = 'l', + }, + { + .name = "skip", + .has_arg = required_argument, + .val = 's', + }, + { + .name = "help", + .has_arg = no_argument, + .val = '?', + }, + {}, +}; + +static void usage(void) +{ + fprintf(stderr, "Usage: vsock_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid> [--list] [--skip=<test_id>]\n" + "\n" + " Server: vsock_test --control-port=1234 --mode=server --peer-cid=3\n" + " Client: vsock_test --control-host=192.168.0.1 --control-port=1234 --mode=client --peer-cid=2\n" + "\n" + "Run vsock.ko tests. Must be launched in both guest\n" + "and host. One side must use --mode=client and\n" + "the other side must use --mode=server.\n" + "\n" + "A TCP control socket connection is used to coordinate tests\n" + "between the client and the server. The server requires a\n" + "listen address and the client requires an address to\n" + "connect to.\n" + "\n" + "The CID of the other side must be given with --peer-cid=<cid>.\n" + "\n" + "Options:\n" + " --help This help message\n" + " --control-host <host> Server IP address to connect to\n" + " --control-port <port> Server port to listen on/connect to\n" + " --mode client|server Server or client mode\n" + " --peer-cid <cid> CID of the other side\n" + " --list List of tests that will be executed\n" + " --skip <test_id> Test ID to skip;\n" + " use multiple --skip options to skip more tests\n" + ); + exit(EXIT_FAILURE); +} + +int main(int argc, char **argv) +{ + const char *control_host = NULL; + const char *control_port = NULL; + struct test_opts opts = { + .mode = TEST_MODE_UNSET, + .peer_cid = VMADDR_CID_ANY, + }; + + init_signals(); + + for (;;) { + int opt = getopt_long(argc, argv, optstring, longopts, NULL); + + if (opt == -1) + break; + + switch (opt) { + case 'H': + control_host = optarg; + break; + case 'm': + if (strcmp(optarg, "client") == 0) + opts.mode = TEST_MODE_CLIENT; + else if (strcmp(optarg, "server") == 0) + opts.mode = TEST_MODE_SERVER; + else { + fprintf(stderr, "--mode must be \"client\" or \"server\"\n"); + return EXIT_FAILURE; + } + break; + case 'p': + opts.peer_cid = parse_cid(optarg); + break; + case 'P': + control_port = optarg; + break; + case 'l': + list_tests(test_cases); + break; + case 's': + skip_test(test_cases, ARRAY_SIZE(test_cases) - 1, + optarg); + break; + case '?': + default: + usage(); + } + } + + if (!control_port) + usage(); + if (opts.mode == TEST_MODE_UNSET) + usage(); + if (opts.peer_cid == VMADDR_CID_ANY) + usage(); + + if (!control_host) { + if (opts.mode != TEST_MODE_SERVER) + usage(); + control_host = "0.0.0.0"; + } + + control_init(control_host, control_port, + opts.mode == TEST_MODE_SERVER); + + run_tests(test_cases, &opts); + + control_cleanup(); + return EXIT_SUCCESS; +} |