summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/net/imx-dwmac.txt56
-rw-r--r--Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml93
-rw-r--r--Documentation/devicetree/bindings/net/snps,dwmac.yaml3
-rw-r--r--Documentation/networking/af_xdp.rst6
-rw-r--r--Documentation/networking/ip-sysctl.rst2
-rw-r--r--MAINTAINERS1
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp.dtsi6
-rw-r--r--arch/s390/net/bpf_jit_comp.c2
-rw-r--r--drivers/net/bonding/bond_main.c2
-rw-r--r--drivers/net/dsa/mt7530.c2
-rw-r--r--drivers/net/dsa/mt7530.h1
-rw-r--r--drivers/net/dsa/mv88e6xxx/Kconfig2
-rw-r--r--drivers/net/dsa/sja1105/sja1105_main.c6
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c85
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c10
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c24
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h1
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c9
-rw-r--r--drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c2
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c16
-rw-r--r--drivers/net/ethernet/freescale/fman/mac.c1
-rw-r--r--drivers/net/ethernet/hisilicon/hip04_eth.c6
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h7
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c8
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c1
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c10
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c19
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c2
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c3
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/Makefile2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu.c10
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu.h21
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c3
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c5
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c48
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c36
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c47
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c29
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c258
-rw-r--r--drivers/net/ethernet/microchip/sparx5/Kconfig1
-rw-r--r--drivers/net/ethernet/realtek/r8169_main.c3
-rw-r--r--drivers/net/ethernet/renesas/ravb.h2
-rw-r--r--drivers/net/ethernet/renesas/ravb_main.c2
-rw-r--r--drivers/net/ethernet/xscale/ptp_ixp46x.c2
-rw-r--r--drivers/net/usb/hso.c33
-rw-r--r--drivers/net/usb/r8152.c30
-rw-r--r--include/net/tcp.h1
-rw-r--r--include/trace/events/net.h2
-rw-r--r--include/trace/events/qdisc.h28
-rw-r--r--kernel/bpf/verifier.c2
-rw-r--r--net/bpf/test_run.c3
-rw-r--r--net/bridge/br_fdb.c2
-rw-r--r--net/caif/caif_socket.c3
-rw-r--r--net/core/dev.c34
-rw-r--r--net/core/skbuff.c18
-rw-r--r--net/core/skmsg.c16
-rw-r--r--net/decnet/af_decnet.c27
-rw-r--r--net/dsa/slave.c14
-rw-r--r--net/dsa/tag_ksz.c9
-rw-r--r--net/ipv4/tcp_bpf.c2
-rw-r--r--net/ipv4/tcp_fastopen.c28
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/udp.c25
-rw-r--r--net/ipv4/udp_bpf.c2
-rw-r--r--net/ipv6/ip6_output.c2
-rw-r--r--net/ipv6/route.c2
-rw-r--r--net/ipv6/udp.c25
-rw-r--r--net/netrom/nr_timer.c20
-rw-r--r--net/sched/act_skbmod.c12
-rw-r--r--net/sched/cls_api.c2
-rw-r--r--net/sched/cls_tcindex.c5
-rw-r--r--net/sctp/auth.c2
-rw-r--r--net/sctp/output.c4
-rw-r--r--net/sctp/socket.c4
-rw-r--r--tools/bpf/bpftool/common.c5
-rw-r--r--tools/testing/selftests/net/nettest.c55
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh212
77 files changed, 1217 insertions, 269 deletions
diff --git a/Documentation/devicetree/bindings/net/imx-dwmac.txt b/Documentation/devicetree/bindings/net/imx-dwmac.txt
deleted file mode 100644
index 921d522fe8d7..000000000000
--- a/Documentation/devicetree/bindings/net/imx-dwmac.txt
+++ /dev/null
@@ -1,56 +0,0 @@
-IMX8 glue layer controller, NXP imx8 families support Synopsys MAC 5.10a IP.
-
-This file documents platform glue layer for IMX.
-Please see stmmac.txt for the other unchanged properties.
-
-The device node has following properties.
-
-Required properties:
-- compatible: Should be "nxp,imx8mp-dwmac-eqos" to select glue layer
- and "snps,dwmac-5.10a" to select IP version.
-- clocks: Must contain a phandle for each entry in clock-names.
-- clock-names: Should be "stmmaceth" for the host clock.
- Should be "pclk" for the MAC apb clock.
- Should be "ptp_ref" for the MAC timer clock.
- Should be "tx" for the MAC RGMII TX clock:
- Should be "mem" for EQOS MEM clock.
- - "mem" clock is required for imx8dxl platform.
- - "mem" clock is not required for imx8mp platform.
-- interrupt-names: Should contain a list of interrupt names corresponding to
- the interrupts in the interrupts property, if available.
- Should be "macirq" for the main MAC IRQ
- Should be "eth_wake_irq" for the IT which wake up system
-- intf_mode: Should be phandle/offset pair. The phandle to the syscon node which
- encompases the GPR register, and the offset of the GPR register.
- - required for imx8mp platform.
- - is optional for imx8dxl platform.
-
-Optional properties:
-- intf_mode: is optional for imx8dxl platform.
-- snps,rmii_refclk_ext: to select RMII reference clock from external.
-
-Example:
- eqos: ethernet@30bf0000 {
- compatible = "nxp,imx8mp-dwmac-eqos", "snps,dwmac-5.10a";
- reg = <0x30bf0000 0x10000>;
- interrupts = <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "eth_wake_irq", "macirq";
- clocks = <&clk IMX8MP_CLK_ENET_QOS_ROOT>,
- <&clk IMX8MP_CLK_QOS_ENET_ROOT>,
- <&clk IMX8MP_CLK_ENET_QOS_TIMER>,
- <&clk IMX8MP_CLK_ENET_QOS>;
- clock-names = "stmmaceth", "pclk", "ptp_ref", "tx";
- assigned-clocks = <&clk IMX8MP_CLK_ENET_AXI>,
- <&clk IMX8MP_CLK_ENET_QOS_TIMER>,
- <&clk IMX8MP_CLK_ENET_QOS>;
- assigned-clock-parents = <&clk IMX8MP_SYS_PLL1_266M>,
- <&clk IMX8MP_SYS_PLL2_100M>,
- <&clk IMX8MP_SYS_PLL2_125M>;
- assigned-clock-rates = <0>, <100000000>, <125000000>;
- nvmem-cells = <&eth_mac0>;
- nvmem-cell-names = "mac-address";
- nvmem_macaddr_swap;
- intf_mode = <&gpr 0x4>;
- status = "disabled";
- };
diff --git a/Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml b/Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml
new file mode 100644
index 000000000000..5629b2e4ccf8
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml
@@ -0,0 +1,93 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/nxp,dwmac-imx.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP i.MX8 DWMAC glue layer Device Tree Bindings
+
+maintainers:
+ - Joakim Zhang <qiangqing.zhang@nxp.com>
+
+# We need a select here so we don't match all nodes with 'snps,dwmac'
+select:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - nxp,imx8mp-dwmac-eqos
+ - nxp,imx8dxl-dwmac-eqos
+ required:
+ - compatible
+
+allOf:
+ - $ref: "snps,dwmac.yaml#"
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - nxp,imx8mp-dwmac-eqos
+ - nxp,imx8dxl-dwmac-eqos
+ - const: snps,dwmac-5.10a
+
+ clocks:
+ minItems: 3
+ maxItems: 5
+ items:
+ - description: MAC host clock
+ - description: MAC apb clock
+ - description: MAC timer clock
+ - description: MAC RGMII TX clock
+ - description: EQOS MEM clock
+
+ clock-names:
+ minItems: 3
+ maxItems: 5
+ contains:
+ enum:
+ - stmmaceth
+ - pclk
+ - ptp_ref
+ - tx
+ - mem
+
+ intf_mode:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ description:
+ Should be phandle/offset pair. The phandle to the syscon node which
+ encompases the GPR register, and the offset of the GPR register.
+
+ snps,rmii_refclk_ext:
+ $ref: /schemas/types.yaml#/definitions/flag
+ description:
+ To select RMII reference clock from external.
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/clock/imx8mp-clock.h>
+
+ eqos: ethernet@30bf0000 {
+ compatible = "nxp,imx8mp-dwmac-eqos","snps,dwmac-5.10a";
+ reg = <0x30bf0000 0x10000>;
+ interrupts = <GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq", "eth_wake_irq";
+ clocks = <&clk IMX8MP_CLK_ENET_QOS_ROOT>,
+ <&clk IMX8MP_CLK_QOS_ENET_ROOT>,
+ <&clk IMX8MP_CLK_ENET_QOS_TIMER>,
+ <&clk IMX8MP_CLK_ENET_QOS>;
+ clock-names = "stmmaceth", "pclk", "ptp_ref", "tx";
+ phy-mode = "rgmii";
+ status = "disabled";
+ };
diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index d7652596a09b..42689b7d03a2 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -28,6 +28,7 @@ select:
- snps,dwmac-4.00
- snps,dwmac-4.10a
- snps,dwmac-4.20a
+ - snps,dwmac-5.10a
- snps,dwxgmac
- snps,dwxgmac-2.10
@@ -82,6 +83,7 @@ properties:
- snps,dwmac-4.00
- snps,dwmac-4.10a
- snps,dwmac-4.20a
+ - snps,dwmac-5.10a
- snps,dwxgmac
- snps,dwxgmac-2.10
@@ -375,6 +377,7 @@ allOf:
- snps,dwmac-4.00
- snps,dwmac-4.10a
- snps,dwmac-4.20a
+ - snps,dwmac-5.10a
- snps,dwxgmac
- snps,dwxgmac-2.10
- st,spear600-gmac
diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst
index 42576880aa4a..60b217b436be 100644
--- a/Documentation/networking/af_xdp.rst
+++ b/Documentation/networking/af_xdp.rst
@@ -243,8 +243,8 @@ Configuration Flags and Socket Options
These are the various configuration flags that can be used to control
and monitor the behavior of AF_XDP sockets.
-XDP_COPY and XDP_ZERO_COPY bind flags
--------------------------------------
+XDP_COPY and XDP_ZEROCOPY bind flags
+------------------------------------
When you bind to a socket, the kernel will first try to use zero-copy
copy. If zero-copy is not supported, it will fall back on using copy
@@ -252,7 +252,7 @@ mode, i.e. copying all packets out to user space. But if you would
like to force a certain mode, you can use the following flags. If you
pass the XDP_COPY flag to the bind call, the kernel will force the
socket into copy mode. If it cannot use copy mode, the bind call will
-fail with an error. Conversely, the XDP_ZERO_COPY flag will force the
+fail with an error. Conversely, the XDP_ZEROCOPY flag will force the
socket into zero-copy mode or fail.
XDP_SHARED_UMEM bind flag
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index b3fa522e4cd9..316c7dfa9693 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -826,7 +826,7 @@ tcp_fastopen_blackhole_timeout_sec - INTEGER
initial value when the blackhole issue goes away.
0 to disable the blackhole detection.
- By default, it is set to 1hr.
+ By default, it is set to 0 (feature is disabled).
tcp_fastopen_key - list of comma separated 32-digit hexadecimal INTEGERs
The list consists of a primary key and an optional backup key. The
diff --git a/MAINTAINERS b/MAINTAINERS
index 6c8be735cc91..58afeb12d3b3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11758,6 +11758,7 @@ F: drivers/char/hw_random/mtk-rng.c
MEDIATEK SWITCH DRIVER
M: Sean Wang <sean.wang@mediatek.com>
M: Landen Chao <Landen.Chao@mediatek.com>
+M: DENG Qingfang <dqfext@gmail.com>
L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/dsa/mt7530.*
diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi
index 9f7c7f587d38..ca38d0d6c3c4 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi
@@ -821,9 +821,9 @@
eqos: ethernet@30bf0000 {
compatible = "nxp,imx8mp-dwmac-eqos", "snps,dwmac-5.10a";
reg = <0x30bf0000 0x10000>;
- interrupts = <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "eth_wake_irq", "macirq";
+ interrupts = <GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq", "eth_wake_irq";
clocks = <&clk IMX8MP_CLK_ENET_QOS_ROOT>,
<&clk IMX8MP_CLK_QOS_ENET_ROOT>,
<&clk IMX8MP_CLK_ENET_QOS_TIMER>,
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 63cae0476bb4..2ae419f5115a 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -112,7 +112,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
{
u32 r1 = reg2hex[b1];
- if (!jit->seen_reg[r1] && r1 >= 6 && r1 <= 15)
+ if (r1 >= 6 && r1 <= 15 && !jit->seen_reg[r1])
jit->seen_reg[r1] = 1;
}
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index d22d78303311..31730efa7538 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3450,7 +3450,9 @@ static int bond_master_netdev_event(unsigned long event,
return bond_event_changename(event_bond);
case NETDEV_UNREGISTER:
bond_remove_proc_entry(event_bond);
+#ifdef CONFIG_XFRM_OFFLOAD
xfrm_dev_state_flush(dev_net(bond_dev), bond_dev, true);
+#endif /* CONFIG_XFRM_OFFLOAD */
break;
case NETDEV_REGISTER:
bond_create_proc_entry(event_bond);
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 93136f7e69f5..69f21b71614c 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -366,6 +366,8 @@ mt7530_fdb_write(struct mt7530_priv *priv, u16 vid,
int i;
reg[1] |= vid & CVID_MASK;
+ if (vid > 1)
+ reg[1] |= ATA2_IVL;
reg[2] |= (aging & AGE_TIMER_MASK) << AGE_TIMER;
reg[2] |= (port_mask & PORT_MAP_MASK) << PORT_MAP;
/* STATIC_ENT indicate that entry is static wouldn't
diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h
index 334d610a503d..b19b389ff10a 100644
--- a/drivers/net/dsa/mt7530.h
+++ b/drivers/net/dsa/mt7530.h
@@ -79,6 +79,7 @@ enum mt753x_bpdu_port_fw {
#define STATIC_EMP 0
#define STATIC_ENT 3
#define MT7530_ATA2 0x78
+#define ATA2_IVL BIT(15)
/* Register for address table write data */
#define MT7530_ATWD 0x7c
diff --git a/drivers/net/dsa/mv88e6xxx/Kconfig b/drivers/net/dsa/mv88e6xxx/Kconfig
index 05af632b0f59..634a48e6616b 100644
--- a/drivers/net/dsa/mv88e6xxx/Kconfig
+++ b/drivers/net/dsa/mv88e6xxx/Kconfig
@@ -12,7 +12,7 @@ config NET_DSA_MV88E6XXX
config NET_DSA_MV88E6XXX_PTP
bool "PTP support for Marvell 88E6xxx"
default n
- depends on PTP_1588_CLOCK
+ depends on NET_DSA_MV88E6XXX && PTP_1588_CLOCK
help
Say Y to enable PTP hardware timestamping on Marvell 88E6xxx switch
chips that support it.
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index ced8c9cb29c2..e2dc997580a8 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -397,6 +397,12 @@ static int sja1105_init_static_vlan(struct sja1105_private *priv)
if (dsa_is_cpu_port(ds, port))
v->pvid = true;
list_add(&v->list, &priv->dsa_8021q_vlans);
+
+ v = kmemdup(v, sizeof(*v), GFP_KERNEL);
+ if (!v)
+ return -ENOMEM;
+
+ list_add(&v->list, &priv->bridge_vlans);
}
((struct sja1105_vlan_lookup_entry *)table->entries)[0] = pvid;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index f56245eeef7b..4db162cee911 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1671,11 +1671,16 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
if ((tpa_info->flags2 & RX_CMP_FLAGS2_META_FORMAT_VLAN) &&
(skb->dev->features & BNXT_HW_FEATURE_VLAN_ALL_RX)) {
- u16 vlan_proto = tpa_info->metadata >>
- RX_CMP_FLAGS2_METADATA_TPID_SFT;
+ __be16 vlan_proto = htons(tpa_info->metadata >>
+ RX_CMP_FLAGS2_METADATA_TPID_SFT);
u16 vtag = tpa_info->metadata & RX_CMP_FLAGS2_METADATA_TCI_MASK;
- __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vtag);
+ if (eth_type_vlan(vlan_proto)) {
+ __vlan_hwaccel_put_tag(skb, vlan_proto, vtag);
+ } else {
+ dev_kfree_skb(skb);
+ return NULL;
+ }
}
skb_checksum_none_assert(skb);
@@ -1897,9 +1902,15 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
(skb->dev->features & BNXT_HW_FEATURE_VLAN_ALL_RX)) {
u32 meta_data = le32_to_cpu(rxcmp1->rx_cmp_meta_data);
u16 vtag = meta_data & RX_CMP_FLAGS2_METADATA_TCI_MASK;
- u16 vlan_proto = meta_data >> RX_CMP_FLAGS2_METADATA_TPID_SFT;
+ __be16 vlan_proto = htons(meta_data >>
+ RX_CMP_FLAGS2_METADATA_TPID_SFT);
- __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vtag);
+ if (eth_type_vlan(vlan_proto)) {
+ __vlan_hwaccel_put_tag(skb, vlan_proto, vtag);
+ } else {
+ dev_kfree_skb(skb);
+ goto next_rx;
+ }
}
skb_checksum_none_assert(skb);
@@ -7563,8 +7574,12 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
bp->flags &= ~BNXT_FLAG_WOL_CAP;
if (flags & FUNC_QCAPS_RESP_FLAGS_WOL_MAGICPKT_SUPPORTED)
bp->flags |= BNXT_FLAG_WOL_CAP;
- if (flags & FUNC_QCAPS_RESP_FLAGS_PTP_SUPPORTED)
+ if (flags & FUNC_QCAPS_RESP_FLAGS_PTP_SUPPORTED) {
__bnxt_hwrm_ptp_qcfg(bp);
+ } else {
+ kfree(bp->ptp_cfg);
+ bp->ptp_cfg = NULL;
+ }
} else {
#ifdef CONFIG_BNXT_SRIOV
struct bnxt_vf_info *vf = &bp->vf;
@@ -10123,7 +10138,6 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
}
}
- bnxt_ptp_start(bp);
rc = bnxt_init_nic(bp, irq_re_init);
if (rc) {
netdev_err(bp->dev, "bnxt_init_nic err: %x\n", rc);
@@ -10197,6 +10211,12 @@ int bnxt_half_open_nic(struct bnxt *bp)
{
int rc = 0;
+ if (test_bit(BNXT_STATE_ABORT_ERR, &bp->state)) {
+ netdev_err(bp->dev, "A previous firmware reset has not completed, aborting half open\n");
+ rc = -ENODEV;
+ goto half_open_err;
+ }
+
rc = bnxt_alloc_mem(bp, false);
if (rc) {
netdev_err(bp->dev, "bnxt_alloc_mem err: %x\n", rc);
@@ -10256,9 +10276,16 @@ static int bnxt_open(struct net_device *dev)
rc = bnxt_hwrm_if_change(bp, true);
if (rc)
return rc;
+
+ if (bnxt_ptp_init(bp)) {
+ netdev_warn(dev, "PTP initialization failed.\n");
+ kfree(bp->ptp_cfg);
+ bp->ptp_cfg = NULL;
+ }
rc = __bnxt_open_nic(bp, true, true);
if (rc) {
bnxt_hwrm_if_change(bp, false);
+ bnxt_ptp_clear(bp);
} else {
if (test_and_clear_bit(BNXT_STATE_FW_RESET_DET, &bp->state)) {
if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) {
@@ -10349,6 +10376,7 @@ static int bnxt_close(struct net_device *dev)
{
struct bnxt *bp = netdev_priv(dev);
+ bnxt_ptp_clear(bp);
bnxt_hwmon_close(bp);
bnxt_close_nic(bp, true, true);
bnxt_hwrm_shutdown_link(bp);
@@ -11335,6 +11363,7 @@ static void bnxt_fw_reset_close(struct bnxt *bp)
bnxt_clear_int_mode(bp);
pci_disable_device(bp->pdev);
}
+ bnxt_ptp_clear(bp);
__bnxt_close_nic(bp, true, false);
bnxt_vf_reps_free(bp);
bnxt_clear_int_mode(bp);
@@ -11959,10 +11988,21 @@ static bool bnxt_fw_reset_timeout(struct bnxt *bp)
(bp->fw_reset_max_dsecs * HZ / 10));
}
+static void bnxt_fw_reset_abort(struct bnxt *bp, int rc)
+{
+ clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
+ if (bp->fw_reset_state != BNXT_FW_RESET_STATE_POLL_VF) {
+ bnxt_ulp_start(bp, rc);
+ bnxt_dl_health_status_update(bp, false);
+ }
+ bp->fw_reset_state = 0;
+ dev_close(bp->dev);
+}
+
static void bnxt_fw_reset_task(struct work_struct *work)
{
struct bnxt *bp = container_of(work, struct bnxt, fw_reset_task.work);
- int rc;
+ int rc = 0;
if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) {
netdev_err(bp->dev, "bnxt_fw_reset_task() called when not in fw reset mode!\n");
@@ -11992,6 +12032,11 @@ static void bnxt_fw_reset_task(struct work_struct *work)
}
bp->fw_reset_timestamp = jiffies;
rtnl_lock();
+ if (test_bit(BNXT_STATE_ABORT_ERR, &bp->state)) {
+ bnxt_fw_reset_abort(bp, rc);
+ rtnl_unlock();
+ return;
+ }
bnxt_fw_reset_close(bp);
if (bp->fw_cap & BNXT_FW_CAP_ERR_RECOVER_RELOAD) {
bp->fw_reset_state = BNXT_FW_RESET_STATE_POLL_FW_DOWN;
@@ -12039,6 +12084,7 @@ static void bnxt_fw_reset_task(struct work_struct *work)
if (val == 0xffff) {
if (bnxt_fw_reset_timeout(bp)) {
netdev_err(bp->dev, "Firmware reset aborted, PCI config space invalid\n");
+ rc = -ETIMEDOUT;
goto fw_reset_abort;
}
bnxt_queue_fw_reset_work(bp, HZ / 1000);
@@ -12048,6 +12094,7 @@ static void bnxt_fw_reset_task(struct work_struct *work)
clear_bit(BNXT_STATE_FW_FATAL_COND, &bp->state);
if (pci_enable_device(bp->pdev)) {
netdev_err(bp->dev, "Cannot re-enable PCI device\n");
+ rc = -ENODEV;
goto fw_reset_abort;
}
pci_set_master(bp->pdev);
@@ -12074,9 +12121,10 @@ static void bnxt_fw_reset_task(struct work_struct *work)
}
rc = bnxt_open(bp->dev);
if (rc) {
- netdev_err(bp->dev, "bnxt_open_nic() failed\n");
- clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
- dev_close(bp->dev);
+ netdev_err(bp->dev, "bnxt_open() failed during FW reset\n");
+ bnxt_fw_reset_abort(bp, rc);
+ rtnl_unlock();
+ return;
}
bp->fw_reset_state = 0;
@@ -12103,12 +12151,8 @@ fw_reset_abort_status:
netdev_err(bp->dev, "fw_health_status 0x%x\n", sts);
}
fw_reset_abort:
- clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
- if (bp->fw_reset_state != BNXT_FW_RESET_STATE_POLL_VF)
- bnxt_dl_health_status_update(bp, false);
- bp->fw_reset_state = 0;
rtnl_lock();
- dev_close(bp->dev);
+ bnxt_fw_reset_abort(bp, rc);
rtnl_unlock();
}
@@ -12662,7 +12706,6 @@ static void bnxt_remove_one(struct pci_dev *pdev)
if (BNXT_PF(bp))
devlink_port_type_clear(&bp->dl_port);
- bnxt_ptp_clear(bp);
pci_disable_pcie_error_reporting(pdev);
unregister_netdev(dev);
clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
@@ -13246,11 +13289,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
rc);
}
- if (bnxt_ptp_init(bp)) {
- netdev_warn(dev, "PTP initialization failed.\n");
- kfree(bp->ptp_cfg);
- bp->ptp_cfg = NULL;
- }
bnxt_inv_fw_health_reg(bp);
bnxt_dl_register(bp);
@@ -13436,7 +13474,8 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev,
if (netif_running(netdev))
bnxt_close(netdev);
- pci_disable_device(pdev);
+ if (pci_is_enabled(pdev))
+ pci_disable_device(pdev);
bnxt_free_ctx_mem(bp);
kfree(bp->ctx);
bp->ctx = NULL;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
index 8e90224c43a2..8a68df4d9e59 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
@@ -433,6 +433,7 @@ static int bnxt_hwrm_queue_dscp2pri_cfg(struct bnxt *bp, struct dcb_app *app,
static int bnxt_ets_validate(struct bnxt *bp, struct ieee_ets *ets, u8 *tc)
{
int total_ets_bw = 0;
+ bool zero = false;
u8 max_tc = 0;
int i;
@@ -453,13 +454,20 @@ static int bnxt_ets_validate(struct bnxt *bp, struct ieee_ets *ets, u8 *tc)
break;
case IEEE_8021QAZ_TSA_ETS:
total_ets_bw += ets->tc_tx_bw[i];
+ zero = zero || !ets->tc_tx_bw[i];
break;
default:
return -ENOTSUPP;
}
}
- if (total_ets_bw > 100)
+ if (total_ets_bw > 100) {
+ netdev_warn(bp->dev, "rejecting ETS config exceeding available bandwidth\n");
return -EINVAL;
+ }
+ if (zero && total_ets_bw == 100) {
+ netdev_warn(bp->dev, "rejecting ETS config starving a TC\n");
+ return -EINVAL;
+ }
if (max_tc >= bp->max_tc)
*tc = bp->max_tc;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
index f698b6bd4ff8..9089e7f3fbd4 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
@@ -385,22 +385,6 @@ int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts)
return 0;
}
-void bnxt_ptp_start(struct bnxt *bp)
-{
- struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
-
- if (!ptp)
- return;
-
- if (bp->flags & BNXT_FLAG_CHIP_P5) {
- spin_lock_bh(&ptp->ptp_lock);
- ptp->current_time = bnxt_refclk_read(bp, NULL);
- WRITE_ONCE(ptp->old_time, ptp->current_time);
- spin_unlock_bh(&ptp->ptp_lock);
- ptp_schedule_worker(ptp->ptp_clock, 0);
- }
-}
-
static const struct ptp_clock_info bnxt_ptp_caps = {
.owner = THIS_MODULE,
.name = "bnxt clock",
@@ -450,7 +434,13 @@ int bnxt_ptp_init(struct bnxt *bp)
bnxt_unmap_ptp_regs(bp);
return err;
}
-
+ if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ spin_lock_bh(&ptp->ptp_lock);
+ ptp->current_time = bnxt_refclk_read(bp, NULL);
+ WRITE_ONCE(ptp->old_time, ptp->current_time);
+ spin_unlock_bh(&ptp->ptp_lock);
+ ptp_schedule_worker(ptp->ptp_clock, 0);
+ }
return 0;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
index 6b6245750e20..4135ea3ec788 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
@@ -75,7 +75,6 @@ int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr);
int bnxt_hwtstamp_get(struct net_device *dev, struct ifreq *ifr);
int bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb);
int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts);
-void bnxt_ptp_start(struct bnxt *bp);
int bnxt_ptp_init(struct bnxt *bp);
void bnxt_ptp_clear(struct bnxt *bp);
#endif
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index a918e374f3c5..187ff643ad2a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -479,16 +479,17 @@ struct bnxt_en_dev *bnxt_ulp_probe(struct net_device *dev)
if (!edev)
return ERR_PTR(-ENOMEM);
edev->en_ops = &bnxt_en_ops_tbl;
- if (bp->flags & BNXT_FLAG_ROCEV1_CAP)
- edev->flags |= BNXT_EN_FLAG_ROCEV1_CAP;
- if (bp->flags & BNXT_FLAG_ROCEV2_CAP)
- edev->flags |= BNXT_EN_FLAG_ROCEV2_CAP;
edev->net = dev;
edev->pdev = bp->pdev;
edev->l2_db_size = bp->db_size;
edev->l2_db_size_nc = bp->db_size;
bp->edev = edev;
}
+ edev->flags &= ~BNXT_EN_FLAG_ROCE_CAP;
+ if (bp->flags & BNXT_FLAG_ROCEV1_CAP)
+ edev->flags |= BNXT_EN_FLAG_ROCEV1_CAP;
+ if (bp->flags & BNXT_FLAG_ROCEV2_CAP)
+ edev->flags |= BNXT_EN_FLAG_ROCEV2_CAP;
return bp->edev;
}
EXPORT_SYMBOL(bnxt_ulp_probe);
diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c
index 4cddd628d41b..9ed3d1ab2ca5 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c
@@ -420,7 +420,7 @@ static int cn23xx_pf_setup_global_input_regs(struct octeon_device *oct)
* bits 32:47 indicate the PVF num.
*/
for (q_no = 0; q_no < ern; q_no++) {
- reg_val = oct->pcie_port << CN23XX_PKT_INPUT_CTL_MAC_NUM_POS;
+ reg_val = (u64)oct->pcie_port << CN23XX_PKT_INPUT_CTL_MAC_NUM_POS;
/* for VF assigned queues. */
if (q_no < oct->sriov_info.pf_srn) {
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index f3d12d0714fb..68b78642c045 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -2770,32 +2770,32 @@ static int dpaa2_switch_ctrl_if_setup(struct ethsw_core *ethsw)
if (err)
return err;
- err = dpaa2_switch_seed_bp(ethsw);
- if (err)
- goto err_free_dpbp;
-
err = dpaa2_switch_alloc_rings(ethsw);
if (err)
- goto err_drain_dpbp;
+ goto err_free_dpbp;
err = dpaa2_switch_setup_dpio(ethsw);
if (err)
goto err_destroy_rings;
+ err = dpaa2_switch_seed_bp(ethsw);
+ if (err)
+ goto err_deregister_dpio;
+
err = dpsw_ctrl_if_enable(ethsw->mc_io, 0, ethsw->dpsw_handle);
if (err) {
dev_err(ethsw->dev, "dpsw_ctrl_if_enable err %d\n", err);
- goto err_deregister_dpio;
+ goto err_drain_dpbp;
}
return 0;
+err_drain_dpbp:
+ dpaa2_switch_drain_bp(ethsw);
err_deregister_dpio:
dpaa2_switch_free_dpio(ethsw);
err_destroy_rings:
dpaa2_switch_destroy_rings(ethsw);
-err_drain_dpbp:
- dpaa2_switch_drain_bp(ethsw);
err_free_dpbp:
dpaa2_switch_free_dpbp(ethsw);
diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c
index 46ecb42f2ef8..d9fc5c456bf3 100644
--- a/drivers/net/ethernet/freescale/fman/mac.c
+++ b/drivers/net/ethernet/freescale/fman/mac.c
@@ -524,6 +524,7 @@ static void setup_memac(struct mac_device *mac_dev)
| SUPPORTED_Autoneg \
| SUPPORTED_Pause \
| SUPPORTED_Asym_Pause \
+ | SUPPORTED_FIBRE \
| SUPPORTED_MII)
static DEFINE_MUTEX(eth_lock);
diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c
index 12f6c2442a7a..e53512f6878a 100644
--- a/drivers/net/ethernet/hisilicon/hip04_eth.c
+++ b/drivers/net/ethernet/hisilicon/hip04_eth.c
@@ -131,7 +131,7 @@
/* buf unit size is cache_line_size, which is 64, so the shift is 6 */
#define PPE_BUF_SIZE_SHIFT 6
#define PPE_TX_BUF_HOLD BIT(31)
-#define CACHE_LINE_MASK 0x3F
+#define SOC_CACHE_LINE_MASK 0x3F
#else
#define PPE_CFG_QOS_VMID_GRP_SHIFT 8
#define PPE_CFG_RX_CTRL_ALIGN_SHIFT 11
@@ -531,8 +531,8 @@ hip04_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
#if defined(CONFIG_HI13X1_GMAC)
desc->cfg = (__force u32)cpu_to_be32(TX_CLEAR_WB | TX_FINISH_CACHE_INV
| TX_RELEASE_TO_PPE | priv->port << TX_POOL_SHIFT);
- desc->data_offset = (__force u32)cpu_to_be32(phys & CACHE_LINE_MASK);
- desc->send_addr = (__force u32)cpu_to_be32(phys & ~CACHE_LINE_MASK);
+ desc->data_offset = (__force u32)cpu_to_be32(phys & SOC_CACHE_LINE_MASK);
+ desc->send_addr = (__force u32)cpu_to_be32(phys & ~SOC_CACHE_LINE_MASK);
#else
desc->cfg = (__force u32)cpu_to_be32(TX_CLEAR_WB | TX_FINISH_CACHE_INV);
desc->send_addr = (__force u32)cpu_to_be32(phys);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
index 0a6cda309b24..aa86a81c8f4a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
@@ -98,6 +98,7 @@ struct hclgevf_mbx_resp_status {
u32 origin_mbx_msg;
bool received_resp;
int resp_status;
+ u16 match_id;
u8 additional_info[HCLGE_MBX_MAX_RESP_DATA_SIZE];
};
@@ -143,7 +144,8 @@ struct hclge_mbx_vf_to_pf_cmd {
u8 mbx_need_resp;
u8 rsv1[1];
u8 msg_len;
- u8 rsv2[3];
+ u8 rsv2;
+ u16 match_id;
struct hclge_vf_to_pf_msg msg;
};
@@ -153,7 +155,8 @@ struct hclge_mbx_pf_to_vf_cmd {
u8 dest_vfid;
u8 rsv[3];
u8 msg_len;
- u8 rsv1[3];
+ u8 rsv1;
+ u16 match_id;
struct hclge_pf_to_vf_msg msg;
};
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index dd3354a57c62..ebeaf12e409b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -9552,13 +9552,17 @@ static int hclge_set_vport_vlan_filter(struct hclge_vport *vport, bool enable)
if (ret)
return ret;
- if (test_bit(HNAE3_DEV_SUPPORT_PORT_VLAN_BYPASS_B, ae_dev->caps))
+ if (test_bit(HNAE3_DEV_SUPPORT_PORT_VLAN_BYPASS_B, ae_dev->caps)) {
ret = hclge_set_port_vlan_filter_bypass(hdev, vport->vport_id,
!enable);
- else if (!vport->vport_id)
+ } else if (!vport->vport_id) {
+ if (test_bit(HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B, ae_dev->caps))
+ enable = false;
+
ret = hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_PORT,
HCLGE_FILTER_FE_INGRESS,
enable, 0);
+ }
return ret;
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index e10a2c36b706..c0a478ae9583 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -47,6 +47,7 @@ static int hclge_gen_resp_to_vf(struct hclge_vport *vport,
resp_pf_to_vf->dest_vfid = vf_to_pf_req->mbx_src_vfid;
resp_pf_to_vf->msg_len = vf_to_pf_req->msg_len;
+ resp_pf_to_vf->match_id = vf_to_pf_req->match_id;
resp_pf_to_vf->msg.code = HCLGE_MBX_PF_VF_RESP;
resp_pf_to_vf->msg.vf_mbx_msg_code = vf_to_pf_req->msg.code;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 52eaf82b7cd7..8784d61e833f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -2641,6 +2641,16 @@ static int hclgevf_rss_init_hw(struct hclgevf_dev *hdev)
static int hclgevf_init_vlan_config(struct hclgevf_dev *hdev)
{
+ struct hnae3_handle *nic = &hdev->nic;
+ int ret;
+
+ ret = hclgevf_en_hw_strip_rxvtag(nic, true);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "failed to enable rx vlan offload, ret = %d\n", ret);
+ return ret;
+ }
+
return hclgevf_set_vlan_filter(&hdev->nic, htons(ETH_P_8021Q), 0,
false);
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
index 9b17735b9f4c..772b2f8acd2e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
@@ -13,6 +13,7 @@ static int hclgevf_resp_to_errno(u16 resp_code)
return resp_code ? -resp_code : 0;
}
+#define HCLGEVF_MBX_MATCH_ID_START 1
static void hclgevf_reset_mbx_resp_status(struct hclgevf_dev *hdev)
{
/* this function should be called with mbx_resp.mbx_mutex held
@@ -21,6 +22,10 @@ static void hclgevf_reset_mbx_resp_status(struct hclgevf_dev *hdev)
hdev->mbx_resp.received_resp = false;
hdev->mbx_resp.origin_mbx_msg = 0;
hdev->mbx_resp.resp_status = 0;
+ hdev->mbx_resp.match_id++;
+ /* Update match_id and ensure the value of match_id is not zero */
+ if (hdev->mbx_resp.match_id == 0)
+ hdev->mbx_resp.match_id = HCLGEVF_MBX_MATCH_ID_START;
memset(hdev->mbx_resp.additional_info, 0, HCLGE_MBX_MAX_RESP_DATA_SIZE);
}
@@ -115,6 +120,7 @@ int hclgevf_send_mbx_msg(struct hclgevf_dev *hdev,
if (need_resp) {
mutex_lock(&hdev->mbx_resp.mbx_mutex);
hclgevf_reset_mbx_resp_status(hdev);
+ req->match_id = hdev->mbx_resp.match_id;
status = hclgevf_cmd_send(&hdev->hw, &desc, 1);
if (status) {
dev_err(&hdev->pdev->dev,
@@ -211,6 +217,19 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
resp->additional_info[i] = *temp;
temp++;
}
+
+ /* If match_id is not zero, it means PF support
+ * match_id. If the match_id is right, VF get the
+ * right response, otherwise ignore the response.
+ * Driver will clear hdev->mbx_resp when send
+ * next message which need response.
+ */
+ if (req->match_id) {
+ if (req->match_id == resp->match_id)
+ resp->received_resp = true;
+ } else {
+ resp->received_resp = true;
+ }
break;
case HCLGE_MBX_LINK_STAT_CHANGE:
case HCLGE_MBX_ASSERTING_RESET:
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index ed77191d19f4..a775c69e4fd7 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1731,7 +1731,6 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
tx_send_failed++;
tx_dropped++;
ret = NETDEV_TX_OK;
- ibmvnic_tx_scrq_flush(adapter, tx_scrq);
goto out;
}
@@ -1753,6 +1752,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
dev_kfree_skb_any(skb);
tx_send_failed++;
tx_dropped++;
+ ibmvnic_tx_scrq_flush(adapter, tx_scrq);
ret = NETDEV_TX_OK;
goto out;
}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 913253f8ecb4..14aea40da50f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1825,7 +1825,8 @@ static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring,
struct sk_buff *skb)
{
if (ring_uses_build_skb(rx_ring)) {
- unsigned long offset = (unsigned long)(skb->data) & ~PAGE_MASK;
+ unsigned long mask = (unsigned long)ixgbe_rx_pg_size(rx_ring) - 1;
+ unsigned long offset = (unsigned long)(skb->data) & mask;
dma_sync_single_range_for_cpu(rx_ring->dev,
IXGBE_CB(skb)->dma,
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/Makefile b/drivers/net/ethernet/marvell/octeontx2/af/Makefile
index 1a3455620b38..cc8ac36cf687 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/Makefile
+++ b/drivers/net/ethernet/marvell/octeontx2/af/Makefile
@@ -10,4 +10,4 @@ obj-$(CONFIG_OCTEONTX2_AF) += rvu_af.o
rvu_mbox-y := mbox.o rvu_trace.o
rvu_af-y := cgx.o rvu.o rvu_cgx.o rvu_npa.o rvu_nix.o \
rvu_reg.o rvu_npc.o rvu_debugfs.o ptp.o rvu_npc_fs.o \
- rvu_cpt.o rvu_devlink.o rpm.o rvu_cn10k.o
+ rvu_cpt.o rvu_devlink.o rpm.o rvu_cn10k.o rvu_switch.o
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
index 10cddf1ac7b9..017163fb3cd5 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -1314,7 +1314,7 @@ int rvu_mbox_handler_detach_resources(struct rvu *rvu,
return rvu_detach_rsrcs(rvu, detach, detach->hdr.pcifunc);
}
-static int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc)
+int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc)
{
struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
int blkaddr = BLKADDR_NIX0, vf;
@@ -2859,6 +2859,12 @@ static int rvu_enable_sriov(struct rvu *rvu)
if (!vfs)
return 0;
+ /* LBK channel number 63 is used for switching packets between
+ * CGX mapped VFs. Hence limit LBK pairs till 62 only.
+ */
+ if (vfs > 62)
+ vfs = 62;
+
/* Save VFs number for reference in VF interrupts handlers.
* Since interrupts might start arriving during SRIOV enablement
* ordinary API cannot be used to get number of enabled VFs.
@@ -3001,6 +3007,8 @@ static int rvu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
/* Initialize debugfs */
rvu_dbg_init(rvu);
+ mutex_init(&rvu->rswitch.switch_lock);
+
return 0;
err_dl:
rvu_unregister_dl(rvu);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index 10e58a5d5861..91503fb2762c 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -415,6 +415,16 @@ struct npc_kpu_profile_adapter {
size_t kpus;
};
+#define RVU_SWITCH_LBK_CHAN 63
+
+struct rvu_switch {
+ struct mutex switch_lock; /* Serialize flow installation */
+ u32 used_entries;
+ u16 *entry2pcifunc;
+ u16 mode;
+ u16 start_entry;
+};
+
struct rvu {
void __iomem *afreg_base;
void __iomem *pfreg_base;
@@ -445,6 +455,7 @@ struct rvu {
/* CGX */
#define PF_CGXMAP_BASE 1 /* PF 0 is reserved for RVU PF */
+ u16 cgx_mapped_vfs; /* maximum CGX mapped VFs */
u8 cgx_mapped_pfs;
u8 cgx_cnt_max; /* CGX port count max */
u8 *pf2cgxlmac_map; /* pf to cgx_lmac map */
@@ -477,6 +488,9 @@ struct rvu {
struct rvu_debugfs rvu_dbg;
#endif
struct rvu_devlink *rvu_dl;
+
+ /* RVU switch implementation over NPC with DMAC rules */
+ struct rvu_switch rswitch;
};
static inline void rvu_write64(struct rvu *rvu, u64 block, u64 offset, u64 val)
@@ -691,6 +705,7 @@ int nix_aq_context_read(struct rvu *rvu, struct nix_hw *nix_hw,
struct nix_cn10k_aq_enq_req *aq_req,
struct nix_cn10k_aq_enq_rsp *aq_rsp,
u16 pcifunc, u8 ctype, u32 qidx);
+int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc);
/* NPC APIs */
int rvu_npc_init(struct rvu *rvu);
@@ -768,4 +783,10 @@ void rvu_dbg_exit(struct rvu *rvu);
static inline void rvu_dbg_init(struct rvu *rvu) {}
static inline void rvu_dbg_exit(struct rvu *rvu) {}
#endif
+
+/* RVU Switch */
+void rvu_switch_enable(struct rvu *rvu);
+void rvu_switch_disable(struct rvu *rvu);
+void rvu_switch_update_rules(struct rvu *rvu, u16 pcifunc);
+
#endif /* RVU_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
index 6cc8fbb7190c..fe99ac4a4dd8 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
@@ -126,6 +126,7 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu)
unsigned long lmac_bmap;
int size, free_pkind;
int cgx, lmac, iter;
+ int numvfs, hwvfs;
if (!cgx_cnt_max)
return 0;
@@ -166,6 +167,8 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu)
pkind->pfchan_map[free_pkind] = ((pf) & 0x3F) << 16;
rvu_map_cgx_nix_block(rvu, pf, cgx, lmac);
rvu->cgx_mapped_pfs++;
+ rvu_get_pf_numvfs(rvu, pf, &numvfs, &hwvfs);
+ rvu->cgx_mapped_vfs += numvfs;
pf++;
}
}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
index 370d4ca1e5ed..9b2dfbf90e51 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
@@ -2113,9 +2113,6 @@ static void rvu_print_npc_mcam_info(struct seq_file *s,
int entry_acnt, entry_ecnt;
int cntr_acnt, cntr_ecnt;
- /* Skip PF0 */
- if (!pcifunc)
- return;
rvu_npc_get_mcam_entry_alloc_info(rvu, pcifunc, blkaddr,
&entry_acnt, &entry_ecnt);
rvu_npc_get_mcam_counter_alloc_info(rvu, pcifunc, blkaddr,
@@ -2298,7 +2295,7 @@ static void rvu_dbg_npc_mcam_show_flows(struct seq_file *s,
static void rvu_dbg_npc_mcam_show_action(struct seq_file *s,
struct rvu_npc_mcam_rule *rule)
{
- if (rule->intf == NIX_INTF_TX) {
+ if (is_npc_intf_tx(rule->intf)) {
switch (rule->tx_action.op) {
case NIX_TX_ACTIONOP_DROP:
seq_puts(s, "\taction: Drop\n");
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
index 10a98bcb7c54..2688186066d9 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
@@ -1364,6 +1364,44 @@ static void rvu_health_reporters_destroy(struct rvu *rvu)
rvu_nix_health_reporters_destroy(rvu_dl);
}
+static int rvu_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
+{
+ struct rvu_devlink *rvu_dl = devlink_priv(devlink);
+ struct rvu *rvu = rvu_dl->rvu;
+ struct rvu_switch *rswitch;
+
+ rswitch = &rvu->rswitch;
+ *mode = rswitch->mode;
+
+ return 0;
+}
+
+static int rvu_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
+ struct netlink_ext_ack *extack)
+{
+ struct rvu_devlink *rvu_dl = devlink_priv(devlink);
+ struct rvu *rvu = rvu_dl->rvu;
+ struct rvu_switch *rswitch;
+
+ rswitch = &rvu->rswitch;
+ switch (mode) {
+ case DEVLINK_ESWITCH_MODE_LEGACY:
+ case DEVLINK_ESWITCH_MODE_SWITCHDEV:
+ if (rswitch->mode == mode)
+ return 0;
+ rswitch->mode = mode;
+ if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV)
+ rvu_switch_enable(rvu);
+ else
+ rvu_switch_disable(rvu);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int rvu_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
struct netlink_ext_ack *extack)
{
@@ -1372,6 +1410,8 @@ static int rvu_devlink_info_get(struct devlink *devlink, struct devlink_info_req
static const struct devlink_ops rvu_devlink_ops = {
.info_get = rvu_devlink_info_get,
+ .eswitch_mode_get = rvu_devlink_eswitch_mode_get,
+ .eswitch_mode_set = rvu_devlink_eswitch_mode_set,
};
int rvu_register_dl(struct rvu *rvu)
@@ -1380,14 +1420,9 @@ int rvu_register_dl(struct rvu *rvu)
struct devlink *dl;
int err;
- rvu_dl = kzalloc(sizeof(*rvu_dl), GFP_KERNEL);
- if (!rvu_dl)
- return -ENOMEM;
-
dl = devlink_alloc(&rvu_devlink_ops, sizeof(struct rvu_devlink));
if (!dl) {
dev_warn(rvu->dev, "devlink_alloc failed\n");
- kfree(rvu_dl);
return -ENOMEM;
}
@@ -1395,10 +1430,10 @@ int rvu_register_dl(struct rvu *rvu)
if (err) {
dev_err(rvu->dev, "devlink register failed with error %d\n", err);
devlink_free(dl);
- kfree(rvu_dl);
return err;
}
+ rvu_dl = devlink_priv(dl);
rvu_dl->dl = dl;
rvu_dl->rvu = rvu;
rvu->rvu_dl = rvu_dl;
@@ -1417,5 +1452,4 @@ void rvu_unregister_dl(struct rvu *rvu)
rvu_health_reporters_destroy(rvu);
devlink_unregister(dl);
devlink_free(dl);
- kfree(rvu_dl);
}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index aeae37704428..0933699a0d2d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -1952,6 +1952,35 @@ static void nix_tl1_default_cfg(struct rvu *rvu, struct nix_hw *nix_hw,
pfvf_map[schq] = TXSCH_SET_FLAG(pfvf_map[schq], NIX_TXSCHQ_CFG_DONE);
}
+static void rvu_nix_tx_tl2_cfg(struct rvu *rvu, int blkaddr,
+ u16 pcifunc, struct nix_txsch *txsch)
+{
+ struct rvu_hwinfo *hw = rvu->hw;
+ int lbk_link_start, lbk_links;
+ u8 pf = rvu_get_pf(pcifunc);
+ int schq;
+
+ if (!is_pf_cgxmapped(rvu, pf))
+ return;
+
+ lbk_link_start = hw->cgx_links;
+
+ for (schq = 0; schq < txsch->schq.max; schq++) {
+ if (TXSCH_MAP_FUNC(txsch->pfvf_map[schq]) != pcifunc)
+ continue;
+ /* Enable all LBK links with channel 63 by default so that
+ * packets can be sent to LBK with a NPC TX MCAM rule
+ */
+ lbk_links = hw->lbk_links;
+ while (lbk_links--)
+ rvu_write64(rvu, blkaddr,
+ NIX_AF_TL3_TL2X_LINKX_CFG(schq,
+ lbk_link_start +
+ lbk_links),
+ BIT_ULL(12) | RVU_SWITCH_LBK_CHAN);
+ }
+}
+
int rvu_mbox_handler_nix_txschq_cfg(struct rvu *rvu,
struct nix_txschq_config *req,
struct msg_rsp *rsp)
@@ -2040,6 +2069,9 @@ int rvu_mbox_handler_nix_txschq_cfg(struct rvu *rvu,
rvu_write64(rvu, blkaddr, reg, regval);
}
+ rvu_nix_tx_tl2_cfg(rvu, blkaddr, pcifunc,
+ &nix_hw->txsch[NIX_TXSCH_LVL_TL2]);
+
return 0;
}
@@ -3180,6 +3212,8 @@ int rvu_mbox_handler_nix_set_mac_addr(struct rvu *rvu,
if (test_bit(PF_SET_VF_TRUSTED, &pfvf->flags) && from_vf)
ether_addr_copy(pfvf->default_mac, req->mac_addr);
+ rvu_switch_update_rules(rvu, pcifunc);
+
return 0;
}
@@ -3849,6 +3883,8 @@ int rvu_mbox_handler_nix_lf_start_rx(struct rvu *rvu, struct msg_req *req,
pfvf = rvu_get_pfvf(rvu, pcifunc);
set_bit(NIXLF_INITIALIZED, &pfvf->flags);
+ rvu_switch_update_rules(rvu, pcifunc);
+
return rvu_cgx_start_stop_io(rvu, pcifunc, true);
}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index 3612e0a2cab3..1097291aaa45 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -442,7 +442,8 @@ static void npc_fixup_vf_rule(struct rvu *rvu, struct npc_mcam *mcam,
owner = mcam->entry2pfvf_map[index];
target_func = (entry->action >> 4) & 0xffff;
/* do nothing when target is LBK/PF or owner is not PF */
- if (is_afvf(target_func) || (owner & RVU_PFVF_FUNC_MASK) ||
+ if (is_pffunc_af(owner) || is_afvf(target_func) ||
+ (owner & RVU_PFVF_FUNC_MASK) ||
!(target_func & RVU_PFVF_FUNC_MASK))
return;
@@ -468,6 +469,8 @@ static void npc_config_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam,
{
int bank = npc_get_bank(mcam, index);
int kw = 0, actbank, actindex;
+ u8 tx_intf_mask = ~intf & 0x3;
+ u8 tx_intf = intf;
u64 cam0, cam1;
actbank = bank; /* Save bank id, to set action later on */
@@ -488,12 +491,21 @@ static void npc_config_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam,
*/
for (; bank < (actbank + mcam->banks_per_entry); bank++, kw = kw + 2) {
/* Interface should be set in all banks */
+ if (is_npc_intf_tx(intf)) {
+ /* Last bit must be set and rest don't care
+ * for TX interfaces
+ */
+ tx_intf_mask = 0x1;
+ tx_intf = intf & tx_intf_mask;
+ tx_intf_mask = ~tx_intf & tx_intf_mask;
+ }
+
rvu_write64(rvu, blkaddr,
NPC_AF_MCAMEX_BANKX_CAMX_INTF(index, bank, 1),
- intf);
+ tx_intf);
rvu_write64(rvu, blkaddr,
NPC_AF_MCAMEX_BANKX_CAMX_INTF(index, bank, 0),
- ~intf & 0x3);
+ tx_intf_mask);
/* Set the match key */
npc_get_keyword(entry, kw, &cam0, &cam1);
@@ -650,6 +662,7 @@ void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc,
eth_broadcast_addr((u8 *)&req.mask.dmac);
req.features = BIT_ULL(NPC_DMAC);
req.channel = chan;
+ req.chan_mask = 0xFFFU;
req.intf = pfvf->nix_rx_intf;
req.op = action.op;
req.hdr.pcifunc = 0; /* AF is requester */
@@ -799,6 +812,7 @@ void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc,
eth_broadcast_addr((u8 *)&req.mask.dmac);
req.features = BIT_ULL(NPC_DMAC);
req.channel = chan;
+ req.chan_mask = 0xFFFU;
req.intf = pfvf->nix_rx_intf;
req.entry = index;
req.hdr.pcifunc = 0; /* AF is requester */
@@ -1745,6 +1759,8 @@ static int npc_mcam_rsrcs_init(struct rvu *rvu, int blkaddr)
int nixlf_count = rvu_get_nixlf_count(rvu);
struct npc_mcam *mcam = &rvu->hw->mcam;
int rsvd, err;
+ u16 index;
+ int cntr;
u64 cfg;
/* Actual number of MCAM entries vary by entry size */
@@ -1845,6 +1861,14 @@ static int npc_mcam_rsrcs_init(struct rvu *rvu, int blkaddr)
if (!mcam->entry2target_pffunc)
goto free_mem;
+ for (index = 0; index < mcam->bmap_entries; index++) {
+ mcam->entry2pfvf_map[index] = NPC_MCAM_INVALID_MAP;
+ mcam->entry2cntr_map[index] = NPC_MCAM_INVALID_MAP;
+ }
+
+ for (cntr = 0; cntr < mcam->counters.max; cntr++)
+ mcam->cntr2pfvf_map[cntr] = NPC_MCAM_INVALID_MAP;
+
mutex_init(&mcam->lock);
return 0;
@@ -2562,7 +2586,7 @@ int rvu_mbox_handler_npc_mcam_alloc_entry(struct rvu *rvu,
}
/* Alloc request from PFFUNC with no NIXLF attached should be denied */
- if (!is_nixlf_attached(rvu, pcifunc))
+ if (!is_pffunc_af(pcifunc) && !is_nixlf_attached(rvu, pcifunc))
return NPC_MCAM_ALLOC_DENIED;
return npc_mcam_alloc_entries(mcam, pcifunc, req, rsp);
@@ -2582,7 +2606,7 @@ int rvu_mbox_handler_npc_mcam_free_entry(struct rvu *rvu,
return NPC_MCAM_INVALID_REQ;
/* Free request from PFFUNC with no NIXLF attached, ignore */
- if (!is_nixlf_attached(rvu, pcifunc))
+ if (!is_pffunc_af(pcifunc) && !is_nixlf_attached(rvu, pcifunc))
return NPC_MCAM_INVALID_REQ;
mutex_lock(&mcam->lock);
@@ -2594,7 +2618,7 @@ int rvu_mbox_handler_npc_mcam_free_entry(struct rvu *rvu,
if (rc)
goto exit;
- mcam->entry2pfvf_map[req->entry] = 0;
+ mcam->entry2pfvf_map[req->entry] = NPC_MCAM_INVALID_MAP;
mcam->entry2target_pffunc[req->entry] = 0x0;
npc_mcam_clear_bit(mcam, req->entry);
npc_enable_mcam_entry(rvu, mcam, blkaddr, req->entry, false);
@@ -2679,13 +2703,14 @@ int rvu_mbox_handler_npc_mcam_write_entry(struct rvu *rvu,
else
nix_intf = pfvf->nix_rx_intf;
- if (npc_mcam_verify_channel(rvu, pcifunc, req->intf, channel)) {
+ if (!is_pffunc_af(pcifunc) &&
+ npc_mcam_verify_channel(rvu, pcifunc, req->intf, channel)) {
rc = NPC_MCAM_INVALID_REQ;
goto exit;
}
- if (npc_mcam_verify_pf_func(rvu, &req->entry_data, req->intf,
- pcifunc)) {
+ if (!is_pffunc_af(pcifunc) &&
+ npc_mcam_verify_pf_func(rvu, &req->entry_data, req->intf, pcifunc)) {
rc = NPC_MCAM_INVALID_REQ;
goto exit;
}
@@ -2836,7 +2861,7 @@ int rvu_mbox_handler_npc_mcam_alloc_counter(struct rvu *rvu,
return NPC_MCAM_INVALID_REQ;
/* If the request is from a PFFUNC with no NIXLF attached, ignore */
- if (!is_nixlf_attached(rvu, pcifunc))
+ if (!is_pffunc_af(pcifunc) && !is_nixlf_attached(rvu, pcifunc))
return NPC_MCAM_INVALID_REQ;
/* Since list of allocated counter IDs needs to be sent to requester,
@@ -3081,7 +3106,7 @@ int rvu_mbox_handler_npc_mcam_alloc_and_write_entry(struct rvu *rvu,
if (rc) {
/* Free allocated MCAM entry */
mutex_lock(&mcam->lock);
- mcam->entry2pfvf_map[entry] = 0;
+ mcam->entry2pfvf_map[entry] = NPC_MCAM_INVALID_MAP;
npc_mcam_clear_bit(mcam, entry);
mutex_unlock(&mcam->lock);
return rc;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
index 68633145a8b8..5c01cf4a9c5b 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
@@ -910,14 +910,17 @@ static void rvu_mcam_add_counter_to_rule(struct rvu *rvu, u16 pcifunc,
static void npc_update_rx_entry(struct rvu *rvu, struct rvu_pfvf *pfvf,
struct mcam_entry *entry,
- struct npc_install_flow_req *req, u16 target)
+ struct npc_install_flow_req *req,
+ u16 target, bool pf_set_vfs_mac)
{
+ struct rvu_switch *rswitch = &rvu->rswitch;
struct nix_rx_action action;
- u64 chan_mask;
- chan_mask = req->chan_mask ? req->chan_mask : ~0ULL;
- npc_update_entry(rvu, NPC_CHAN, entry, req->channel, 0, chan_mask, 0,
- NIX_INTF_RX);
+ if (rswitch->mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && pf_set_vfs_mac)
+ req->chan_mask = 0x0; /* Do not care channel */
+
+ npc_update_entry(rvu, NPC_CHAN, entry, req->channel, 0, req->chan_mask,
+ 0, NIX_INTF_RX);
*(u64 *)&action = 0x00;
action.pf_func = target;
@@ -949,9 +952,16 @@ static void npc_update_tx_entry(struct rvu *rvu, struct rvu_pfvf *pfvf,
struct npc_install_flow_req *req, u16 target)
{
struct nix_tx_action action;
+ u64 mask = ~0ULL;
+
+ /* If AF is installing then do not care about
+ * PF_FUNC in Send Descriptor
+ */
+ if (is_pffunc_af(req->hdr.pcifunc))
+ mask = 0;
npc_update_entry(rvu, NPC_PF_FUNC, entry, (__force u16)htons(target),
- 0, ~0ULL, 0, NIX_INTF_TX);
+ 0, mask, 0, NIX_INTF_TX);
*(u64 *)&action = 0x00;
action.op = req->op;
@@ -1002,7 +1012,7 @@ static int npc_install_flow(struct rvu *rvu, int blkaddr, u16 target,
req->intf);
if (is_npc_intf_rx(req->intf))
- npc_update_rx_entry(rvu, pfvf, entry, req, target);
+ npc_update_rx_entry(rvu, pfvf, entry, req, target, pf_set_vfs_mac);
else
npc_update_tx_entry(rvu, pfvf, entry, req, target);
@@ -1164,7 +1174,9 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu,
if (err)
return err;
- if (npc_mcam_verify_channel(rvu, target, req->intf, req->channel))
+ /* Skip channel validation if AF is installing */
+ if (!is_pffunc_af(req->hdr.pcifunc) &&
+ npc_mcam_verify_channel(rvu, target, req->intf, req->channel))
return -EINVAL;
pfvf = rvu_get_pfvf(rvu, target);
@@ -1180,6 +1192,7 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu,
eth_broadcast_addr((u8 *)&req->mask.dmac);
}
+ /* Proceed if NIXLF is attached or not for TX rules */
err = nix_get_nixlf(rvu, target, &nixlf, NULL);
if (err && is_npc_intf_rx(req->intf) && !pf_set_vfs_mac)
return -EINVAL;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c
new file mode 100644
index 000000000000..2e5379710aa5
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c
@@ -0,0 +1,258 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTx2 RVU Admin Function driver
+ *
+ * Copyright (C) 2021 Marvell.
+ */
+
+#include <linux/bitfield.h>
+#include "rvu.h"
+
+static int rvu_switch_install_rx_rule(struct rvu *rvu, u16 pcifunc,
+ u16 chan_mask)
+{
+ struct npc_install_flow_req req = { 0 };
+ struct npc_install_flow_rsp rsp = { 0 };
+ struct rvu_pfvf *pfvf;
+
+ pfvf = rvu_get_pfvf(rvu, pcifunc);
+ /* If the pcifunc is not initialized then nothing to do.
+ * This same function will be called again via rvu_switch_update_rules
+ * after pcifunc is initialized.
+ */
+ if (!test_bit(NIXLF_INITIALIZED, &pfvf->flags))
+ return 0;
+
+ ether_addr_copy(req.packet.dmac, pfvf->mac_addr);
+ eth_broadcast_addr((u8 *)&req.mask.dmac);
+ req.hdr.pcifunc = 0; /* AF is requester */
+ req.vf = pcifunc;
+ req.features = BIT_ULL(NPC_DMAC);
+ req.channel = pfvf->rx_chan_base;
+ req.chan_mask = chan_mask;
+ req.intf = pfvf->nix_rx_intf;
+ req.op = NIX_RX_ACTION_DEFAULT;
+ req.default_rule = 1;
+
+ return rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp);
+}
+
+static int rvu_switch_install_tx_rule(struct rvu *rvu, u16 pcifunc, u16 entry)
+{
+ struct npc_install_flow_req req = { 0 };
+ struct npc_install_flow_rsp rsp = { 0 };
+ struct rvu_pfvf *pfvf;
+ u8 lbkid;
+
+ pfvf = rvu_get_pfvf(rvu, pcifunc);
+ /* If the pcifunc is not initialized then nothing to do.
+ * This same function will be called again via rvu_switch_update_rules
+ * after pcifunc is initialized.
+ */
+ if (!test_bit(NIXLF_INITIALIZED, &pfvf->flags))
+ return 0;
+
+ lbkid = pfvf->nix_blkaddr == BLKADDR_NIX0 ? 0 : 1;
+ ether_addr_copy(req.packet.dmac, pfvf->mac_addr);
+ eth_broadcast_addr((u8 *)&req.mask.dmac);
+ req.hdr.pcifunc = 0; /* AF is requester */
+ req.vf = pcifunc;
+ req.entry = entry;
+ req.features = BIT_ULL(NPC_DMAC);
+ req.intf = pfvf->nix_tx_intf;
+ req.op = NIX_TX_ACTIONOP_UCAST_CHAN;
+ req.index = (lbkid << 8) | RVU_SWITCH_LBK_CHAN;
+ req.set_cntr = 1;
+
+ return rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp);
+}
+
+static int rvu_switch_install_rules(struct rvu *rvu)
+{
+ struct rvu_switch *rswitch = &rvu->rswitch;
+ u16 start = rswitch->start_entry;
+ struct rvu_hwinfo *hw = rvu->hw;
+ int pf, vf, numvfs, hwvf;
+ u16 pcifunc, entry = 0;
+ int err;
+
+ for (pf = 1; pf < hw->total_pfs; pf++) {
+ if (!is_pf_cgxmapped(rvu, pf))
+ continue;
+
+ pcifunc = pf << 10;
+ /* rvu_get_nix_blkaddr sets up the corresponding NIX block
+ * address and NIX RX and TX interfaces for a pcifunc.
+ * Generally it is called during attach call of a pcifunc but it
+ * is called here since we are pre-installing rules before
+ * nixlfs are attached
+ */
+ rvu_get_nix_blkaddr(rvu, pcifunc);
+
+ /* MCAM RX rule for a PF/VF already exists as default unicast
+ * rules installed by AF. Hence change the channel in those
+ * rules to ignore channel so that packets with the required
+ * DMAC received from LBK(by other PF/VFs in system) or from
+ * external world (from wire) are accepted.
+ */
+ err = rvu_switch_install_rx_rule(rvu, pcifunc, 0x0);
+ if (err) {
+ dev_err(rvu->dev, "RX rule for PF%d failed(%d)\n",
+ pf, err);
+ return err;
+ }
+
+ err = rvu_switch_install_tx_rule(rvu, pcifunc, start + entry);
+ if (err) {
+ dev_err(rvu->dev, "TX rule for PF%d failed(%d)\n",
+ pf, err);
+ return err;
+ }
+
+ rswitch->entry2pcifunc[entry++] = pcifunc;
+
+ rvu_get_pf_numvfs(rvu, pf, &numvfs, &hwvf);
+ for (vf = 0; vf < numvfs; vf++, hwvf++) {
+ pcifunc = pf << 10 | ((vf + 1) & 0x3FF);
+ rvu_get_nix_blkaddr(rvu, pcifunc);
+
+ err = rvu_switch_install_rx_rule(rvu, pcifunc, 0x0);
+ if (err) {
+ dev_err(rvu->dev,
+ "RX rule for PF%dVF%d failed(%d)\n",
+ pf, vf, err);
+ return err;
+ }
+
+ err = rvu_switch_install_tx_rule(rvu, pcifunc,
+ start + entry);
+ if (err) {
+ dev_err(rvu->dev,
+ "TX rule for PF%dVF%d failed(%d)\n",
+ pf, vf, err);
+ return err;
+ }
+
+ rswitch->entry2pcifunc[entry++] = pcifunc;
+ }
+ }
+
+ return 0;
+}
+
+void rvu_switch_enable(struct rvu *rvu)
+{
+ struct npc_mcam_alloc_entry_req alloc_req = { 0 };
+ struct npc_mcam_alloc_entry_rsp alloc_rsp = { 0 };
+ struct npc_delete_flow_req uninstall_req = { 0 };
+ struct npc_mcam_free_entry_req free_req = { 0 };
+ struct rvu_switch *rswitch = &rvu->rswitch;
+ struct msg_rsp rsp;
+ int ret;
+
+ alloc_req.contig = true;
+ alloc_req.count = rvu->cgx_mapped_pfs + rvu->cgx_mapped_vfs;
+ ret = rvu_mbox_handler_npc_mcam_alloc_entry(rvu, &alloc_req,
+ &alloc_rsp);
+ if (ret) {
+ dev_err(rvu->dev,
+ "Unable to allocate MCAM entries\n");
+ goto exit;
+ }
+
+ if (alloc_rsp.count != alloc_req.count) {
+ dev_err(rvu->dev,
+ "Unable to allocate %d MCAM entries, got %d\n",
+ alloc_req.count, alloc_rsp.count);
+ goto free_entries;
+ }
+
+ rswitch->entry2pcifunc = kcalloc(alloc_req.count, sizeof(u16),
+ GFP_KERNEL);
+ if (!rswitch->entry2pcifunc)
+ goto free_entries;
+
+ rswitch->used_entries = alloc_rsp.count;
+ rswitch->start_entry = alloc_rsp.entry;
+
+ ret = rvu_switch_install_rules(rvu);
+ if (ret)
+ goto uninstall_rules;
+
+ return;
+
+uninstall_rules:
+ uninstall_req.start = rswitch->start_entry;
+ uninstall_req.end = rswitch->start_entry + rswitch->used_entries - 1;
+ rvu_mbox_handler_npc_delete_flow(rvu, &uninstall_req, &rsp);
+ kfree(rswitch->entry2pcifunc);
+free_entries:
+ free_req.all = 1;
+ rvu_mbox_handler_npc_mcam_free_entry(rvu, &free_req, &rsp);
+exit:
+ return;
+}
+
+void rvu_switch_disable(struct rvu *rvu)
+{
+ struct npc_delete_flow_req uninstall_req = { 0 };
+ struct npc_mcam_free_entry_req free_req = { 0 };
+ struct rvu_switch *rswitch = &rvu->rswitch;
+ struct rvu_hwinfo *hw = rvu->hw;
+ int pf, vf, numvfs, hwvf;
+ struct msg_rsp rsp;
+ u16 pcifunc;
+ int err;
+
+ if (!rswitch->used_entries)
+ return;
+
+ for (pf = 1; pf < hw->total_pfs; pf++) {
+ if (!is_pf_cgxmapped(rvu, pf))
+ continue;
+
+ pcifunc = pf << 10;
+ err = rvu_switch_install_rx_rule(rvu, pcifunc, 0xFFF);
+ if (err)
+ dev_err(rvu->dev,
+ "Reverting RX rule for PF%d failed(%d)\n",
+ pf, err);
+
+ for (vf = 0; vf < numvfs; vf++, hwvf++) {
+ pcifunc = pf << 10 | ((vf + 1) & 0x3FF);
+ err = rvu_switch_install_rx_rule(rvu, pcifunc, 0xFFF);
+ if (err)
+ dev_err(rvu->dev,
+ "Reverting RX rule for PF%dVF%d failed(%d)\n",
+ pf, vf, err);
+ }
+ }
+
+ uninstall_req.start = rswitch->start_entry;
+ uninstall_req.end = rswitch->start_entry + rswitch->used_entries - 1;
+ free_req.all = 1;
+ rvu_mbox_handler_npc_delete_flow(rvu, &uninstall_req, &rsp);
+ rvu_mbox_handler_npc_mcam_free_entry(rvu, &free_req, &rsp);
+ rswitch->used_entries = 0;
+ kfree(rswitch->entry2pcifunc);
+}
+
+void rvu_switch_update_rules(struct rvu *rvu, u16 pcifunc)
+{
+ struct rvu_switch *rswitch = &rvu->rswitch;
+ u32 max = rswitch->used_entries;
+ u16 entry;
+
+ if (!rswitch->used_entries)
+ return;
+
+ for (entry = 0; entry < max; entry++) {
+ if (rswitch->entry2pcifunc[entry] == pcifunc)
+ break;
+ }
+
+ if (entry >= max)
+ return;
+
+ rvu_switch_install_tx_rule(rvu, pcifunc, rswitch->start_entry + entry);
+ rvu_switch_install_rx_rule(rvu, pcifunc, 0x0);
+}
diff --git a/drivers/net/ethernet/microchip/sparx5/Kconfig b/drivers/net/ethernet/microchip/sparx5/Kconfig
index ac403d43c74c..7bdbb2d09a14 100644
--- a/drivers/net/ethernet/microchip/sparx5/Kconfig
+++ b/drivers/net/ethernet/microchip/sparx5/Kconfig
@@ -3,6 +3,7 @@ config SPARX5_SWITCH
depends on NET_SWITCHDEV
depends on HAS_IOMEM
depends on OF
+ depends on ARCH_SPARX5 || COMPILE_TEST
select PHYLINK
select PHY_SPARX5_SERDES
select RESET_CONTROLLER
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index f744557c33a3..c7af5bc3b8af 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -5084,7 +5084,8 @@ static int r8169_mdio_register(struct rtl8169_private *tp)
new_bus->priv = tp;
new_bus->parent = &pdev->dev;
new_bus->irq[0] = PHY_MAC_INTERRUPT;
- snprintf(new_bus->id, MII_BUS_ID_SIZE, "r8169-%x", pci_dev_id(pdev));
+ snprintf(new_bus->id, MII_BUS_ID_SIZE, "r8169-%x-%x",
+ pci_domain_nr(pdev->bus), pci_dev_id(pdev));
new_bus->read = r8169_mdio_read_reg;
new_bus->write = r8169_mdio_write_reg;
diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h
index 86a1eb0634e8..80e62ca2e3d3 100644
--- a/drivers/net/ethernet/renesas/ravb.h
+++ b/drivers/net/ethernet/renesas/ravb.h
@@ -864,7 +864,7 @@ enum GECMR_BIT {
/* The Ethernet AVB descriptor definitions. */
struct ravb_desc {
- __le16 ds; /* Descriptor size */
+ __le16 ds; /* Descriptor size */
u8 cc; /* Content control MSBs (reserved) */
u8 die_dt; /* Descriptor interrupt enable and type */
__le32 dptr; /* Descriptor pointer */
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 69c50f81e1cb..805397088850 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -920,7 +920,7 @@ static int ravb_poll(struct napi_struct *napi, int budget)
if (ravb_rx(ndev, &quota, q))
goto out;
- /* Processing RX Descriptor Ring */
+ /* Processing TX Descriptor Ring */
spin_lock_irqsave(&priv->lock, flags);
/* Clear TX interrupt */
ravb_write(ndev, ~(mask | TIS_RESERVED), TIS);
diff --git a/drivers/net/ethernet/xscale/ptp_ixp46x.c b/drivers/net/ethernet/xscale/ptp_ixp46x.c
index 99d4d9439d05..a6fb88fd42f7 100644
--- a/drivers/net/ethernet/xscale/ptp_ixp46x.c
+++ b/drivers/net/ethernet/xscale/ptp_ixp46x.c
@@ -14,6 +14,8 @@
#include <linux/kernel.h>
#include <linux/ptp_clock_kernel.h>
#include <linux/soc/ixp4xx/cpu.h>
+#include <linux/module.h>
+#include <mach/ixp4xx-regs.h>
#include "ixp46x_ts.h"
diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index 63006838bdcc..dec96e8ab567 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -2495,7 +2495,7 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface,
hso_net_init);
if (!net) {
dev_err(&interface->dev, "Unable to create ethernet device\n");
- goto exit;
+ goto err_hso_dev;
}
hso_net = netdev_priv(net);
@@ -2508,13 +2508,13 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface,
USB_DIR_IN);
if (!hso_net->in_endp) {
dev_err(&interface->dev, "Can't find BULK IN endpoint\n");
- goto exit;
+ goto err_net;
}
hso_net->out_endp = hso_get_ep(interface, USB_ENDPOINT_XFER_BULK,
USB_DIR_OUT);
if (!hso_net->out_endp) {
dev_err(&interface->dev, "Can't find BULK OUT endpoint\n");
- goto exit;
+ goto err_net;
}
SET_NETDEV_DEV(net, &interface->dev);
SET_NETDEV_DEVTYPE(net, &hso_type);
@@ -2523,18 +2523,18 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface,
for (i = 0; i < MUX_BULK_RX_BUF_COUNT; i++) {
hso_net->mux_bulk_rx_urb_pool[i] = usb_alloc_urb(0, GFP_KERNEL);
if (!hso_net->mux_bulk_rx_urb_pool[i])
- goto exit;
+ goto err_mux_bulk_rx;
hso_net->mux_bulk_rx_buf_pool[i] = kzalloc(MUX_BULK_RX_BUF_SIZE,
GFP_KERNEL);
if (!hso_net->mux_bulk_rx_buf_pool[i])
- goto exit;
+ goto err_mux_bulk_rx;
}
hso_net->mux_bulk_tx_urb = usb_alloc_urb(0, GFP_KERNEL);
if (!hso_net->mux_bulk_tx_urb)
- goto exit;
+ goto err_mux_bulk_rx;
hso_net->mux_bulk_tx_buf = kzalloc(MUX_BULK_TX_BUF_SIZE, GFP_KERNEL);
if (!hso_net->mux_bulk_tx_buf)
- goto exit;
+ goto err_free_tx_urb;
add_net_device(hso_dev);
@@ -2542,7 +2542,7 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface,
result = register_netdev(net);
if (result) {
dev_err(&interface->dev, "Failed to register device\n");
- goto exit;
+ goto err_free_tx_buf;
}
hso_log_port(hso_dev);
@@ -2550,8 +2550,21 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface,
hso_create_rfkill(hso_dev, interface);
return hso_dev;
-exit:
- hso_free_net_device(hso_dev, true);
+
+err_free_tx_buf:
+ remove_net_device(hso_dev);
+ kfree(hso_net->mux_bulk_tx_buf);
+err_free_tx_urb:
+ usb_free_urb(hso_net->mux_bulk_tx_urb);
+err_mux_bulk_rx:
+ for (i = 0; i < MUX_BULK_RX_BUF_COUNT; i++) {
+ usb_free_urb(hso_net->mux_bulk_rx_urb_pool[i]);
+ kfree(hso_net->mux_bulk_rx_buf_pool[i]);
+ }
+err_net:
+ free_netdev(net);
+err_hso_dev:
+ kfree(hso_dev);
return NULL;
}
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 1692d3b1b6e1..e09b107b5c99 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -1552,7 +1552,8 @@ static int
rtl8152_set_speed(struct r8152 *tp, u8 autoneg, u32 speed, u8 duplex,
u32 advertising);
-static int rtl8152_set_mac_address(struct net_device *netdev, void *p)
+static int __rtl8152_set_mac_address(struct net_device *netdev, void *p,
+ bool in_resume)
{
struct r8152 *tp = netdev_priv(netdev);
struct sockaddr *addr = p;
@@ -1561,9 +1562,11 @@ static int rtl8152_set_mac_address(struct net_device *netdev, void *p)
if (!is_valid_ether_addr(addr->sa_data))
goto out1;
- ret = usb_autopm_get_interface(tp->intf);
- if (ret < 0)
- goto out1;
+ if (!in_resume) {
+ ret = usb_autopm_get_interface(tp->intf);
+ if (ret < 0)
+ goto out1;
+ }
mutex_lock(&tp->control);
@@ -1575,11 +1578,17 @@ static int rtl8152_set_mac_address(struct net_device *netdev, void *p)
mutex_unlock(&tp->control);
- usb_autopm_put_interface(tp->intf);
+ if (!in_resume)
+ usb_autopm_put_interface(tp->intf);
out1:
return ret;
}
+static int rtl8152_set_mac_address(struct net_device *netdev, void *p)
+{
+ return __rtl8152_set_mac_address(netdev, p, false);
+}
+
/* Devices containing proper chips can support a persistent
* host system provided MAC address.
* Examples of this are Dell TB15 and Dell WD15 docks
@@ -1698,7 +1707,7 @@ static int determine_ethernet_addr(struct r8152 *tp, struct sockaddr *sa)
return ret;
}
-static int set_ethernet_addr(struct r8152 *tp)
+static int set_ethernet_addr(struct r8152 *tp, bool in_resume)
{
struct net_device *dev = tp->netdev;
struct sockaddr sa;
@@ -1711,7 +1720,7 @@ static int set_ethernet_addr(struct r8152 *tp)
if (tp->version == RTL_VER_01)
ether_addr_copy(dev->dev_addr, sa.sa_data);
else
- ret = rtl8152_set_mac_address(dev, &sa);
+ ret = __rtl8152_set_mac_address(dev, &sa, in_resume);
return ret;
}
@@ -6763,9 +6772,10 @@ static int rtl8152_close(struct net_device *netdev)
tp->rtl_ops.down(tp);
mutex_unlock(&tp->control);
+ }
+ if (!res)
usb_autopm_put_interface(tp->intf);
- }
free_all_mem(tp);
@@ -8443,7 +8453,7 @@ static int rtl8152_reset_resume(struct usb_interface *intf)
clear_bit(SELECTIVE_SUSPEND, &tp->flags);
tp->rtl_ops.init(tp);
queue_delayed_work(system_long_wq, &tp->hw_phy_work, 0);
- set_ethernet_addr(tp);
+ set_ethernet_addr(tp, true);
return rtl8152_resume(intf);
}
@@ -9644,7 +9654,7 @@ static int rtl8152_probe(struct usb_interface *intf,
tp->rtl_fw.retry = true;
#endif
queue_delayed_work(system_long_wq, &tp->hw_phy_work, 0);
- set_ethernet_addr(tp);
+ set_ethernet_addr(tp, false);
usb_set_intfdata(intf, tp);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 17df9b047ee4..784d5c3ef1c5 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1709,7 +1709,6 @@ struct tcp_fastopen_context {
struct rcu_head rcu;
};
-extern unsigned int sysctl_tcp_fastopen_blackhole_timeout;
void tcp_fastopen_active_disable(struct sock *sk);
bool tcp_fastopen_active_should_disable(struct sock *sk);
void tcp_fastopen_active_disable_ofo_check(struct sock *sk);
diff --git a/include/trace/events/net.h b/include/trace/events/net.h
index 2399073c3afc..78c448c6ab4c 100644
--- a/include/trace/events/net.h
+++ b/include/trace/events/net.h
@@ -136,7 +136,7 @@ DECLARE_EVENT_CLASS(net_dev_template,
__assign_str(name, skb->dev->name);
),
- TP_printk("dev=%s skbaddr=%p len=%u",
+ TP_printk("dev=%s skbaddr=%px len=%u",
__get_str(name), __entry->skbaddr, __entry->len)
)
diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h
index 330d32d84485..c3006c6b4a87 100644
--- a/include/trace/events/qdisc.h
+++ b/include/trace/events/qdisc.h
@@ -41,11 +41,37 @@ TRACE_EVENT(qdisc_dequeue,
__entry->txq_state = txq->state;
),
- TP_printk("dequeue ifindex=%d qdisc handle=0x%X parent=0x%X txq_state=0x%lX packets=%d skbaddr=%p",
+ TP_printk("dequeue ifindex=%d qdisc handle=0x%X parent=0x%X txq_state=0x%lX packets=%d skbaddr=%px",
__entry->ifindex, __entry->handle, __entry->parent,
__entry->txq_state, __entry->packets, __entry->skbaddr )
);
+TRACE_EVENT(qdisc_enqueue,
+
+ TP_PROTO(struct Qdisc *qdisc, const struct netdev_queue *txq, struct sk_buff *skb),
+
+ TP_ARGS(qdisc, txq, skb),
+
+ TP_STRUCT__entry(
+ __field(struct Qdisc *, qdisc)
+ __field(void *, skbaddr)
+ __field(int, ifindex)
+ __field(u32, handle)
+ __field(u32, parent)
+ ),
+
+ TP_fast_assign(
+ __entry->qdisc = qdisc;
+ __entry->skbaddr = skb;
+ __entry->ifindex = txq->dev ? txq->dev->ifindex : 0;
+ __entry->handle = qdisc->handle;
+ __entry->parent = qdisc->parent;
+ ),
+
+ TP_printk("enqueue ifindex=%d qdisc handle=0x%X parent=0x%X skbaddr=%px",
+ __entry->ifindex, __entry->handle, __entry->parent, __entry->skbaddr)
+);
+
TRACE_EVENT(qdisc_reset,
TP_PROTO(struct Qdisc *q),
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 42a4063de7cd..9de3c9c3267c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3677,6 +3677,8 @@ continue_func:
if (tail_call_reachable)
for (j = 0; j < frame; j++)
subprog[ret_prog[j]].tail_call_reachable = true;
+ if (subprog[0].tail_call_reachable)
+ env->prog->aux->tail_call_reachable = true;
/* end of for() loop means the last insn of the 'subprog'
* was reached. Doesn't matter whether it was JA or EXIT
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index aa47af349ba8..1cc75c811e24 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -701,6 +701,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
void *data;
int ret;
+ if (prog->expected_attach_type == BPF_XDP_DEVMAP ||
+ prog->expected_attach_type == BPF_XDP_CPUMAP)
+ return -EINVAL;
if (kattr->test.ctx_in || kattr->test.ctx_out)
return -EINVAL;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 2b862cffc03a..a16191dcaed1 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -780,7 +780,7 @@ int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev,
struct net_device *dst_dev;
dst_dev = dst ? dst->dev : br->dev;
- if (dst_dev != br_dev && dst_dev != dev)
+ if (dst_dev && dst_dev != dev)
continue;
err = br_fdb_replay_one(nb, fdb, dst_dev, action, ctx);
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 647554c9813b..e12fd3cad619 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -539,7 +539,8 @@ static int caif_seqpkt_sendmsg(struct socket *sock, struct msghdr *msg,
goto err;
ret = -EINVAL;
- if (unlikely(msg->msg_iter.iov->iov_base == NULL))
+ if (unlikely(msg->msg_iter.nr_segs == 0) ||
+ unlikely(msg->msg_iter.iov->iov_base == NULL))
goto err;
noblock = msg->msg_flags & MSG_DONTWAIT;
diff --git a/net/core/dev.c b/net/core/dev.c
index 64b21f0a2048..8f1a47ad6781 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -131,6 +131,7 @@
#include <trace/events/napi.h>
#include <trace/events/net.h>
#include <trace/events/skb.h>
+#include <trace/events/qdisc.h>
#include <linux/inetdevice.h>
#include <linux/cpu_rmap.h>
#include <linux/static_key.h>
@@ -3844,6 +3845,18 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)
}
}
+static int dev_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *q,
+ struct sk_buff **to_free,
+ struct netdev_queue *txq)
+{
+ int rc;
+
+ rc = q->enqueue(skb, q, to_free) & NET_XMIT_MASK;
+ if (rc == NET_XMIT_SUCCESS)
+ trace_qdisc_enqueue(q, txq, skb);
+ return rc;
+}
+
static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
struct net_device *dev,
struct netdev_queue *txq)
@@ -3862,8 +3875,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
* of q->seqlock to protect from racing with requeuing.
*/
if (unlikely(!nolock_qdisc_is_empty(q))) {
- rc = q->enqueue(skb, q, &to_free) &
- NET_XMIT_MASK;
+ rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
__qdisc_run(q);
qdisc_run_end(q);
@@ -3879,7 +3891,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
return NET_XMIT_SUCCESS;
}
- rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
+ rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
qdisc_run(q);
no_lock_out:
@@ -3923,7 +3935,7 @@ no_lock_out:
qdisc_run_end(q);
rc = NET_XMIT_SUCCESS;
} else {
- rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
+ rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
if (qdisc_run_begin(q)) {
if (unlikely(contended)) {
spin_unlock(&q->busylock);
@@ -9700,14 +9712,17 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
struct net_device *dev;
int err, fd;
+ rtnl_lock();
dev = dev_get_by_index(net, attr->link_create.target_ifindex);
- if (!dev)
+ if (!dev) {
+ rtnl_unlock();
return -EINVAL;
+ }
link = kzalloc(sizeof(*link), GFP_USER);
if (!link) {
err = -ENOMEM;
- goto out_put_dev;
+ goto unlock;
}
bpf_link_init(&link->link, BPF_LINK_TYPE_XDP, &bpf_xdp_link_lops, prog);
@@ -9717,14 +9732,14 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
err = bpf_link_prime(&link->link, &link_primer);
if (err) {
kfree(link);
- goto out_put_dev;
+ goto unlock;
}
- rtnl_lock();
err = dev_xdp_attach_link(dev, NULL, link);
rtnl_unlock();
if (err) {
+ link->dev = NULL;
bpf_link_cleanup(&link_primer);
goto out_put_dev;
}
@@ -9734,6 +9749,9 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
dev_put(dev);
return fd;
+unlock:
+ rtnl_unlock();
+
out_put_dev:
dev_put(dev);
return err;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f63de967ac25..fc7942c0dddc 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -663,7 +663,7 @@ static void skb_release_data(struct sk_buff *skb)
if (skb->cloned &&
atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
&shinfo->dataref))
- return;
+ goto exit;
skb_zcopy_clear(skb, true);
@@ -674,6 +674,17 @@ static void skb_release_data(struct sk_buff *skb)
kfree_skb_list(shinfo->frag_list);
skb_free_head(skb);
+exit:
+ /* When we clone an SKB we copy the reycling bit. The pp_recycle
+ * bit is only set on the head though, so in order to avoid races
+ * while trying to recycle fragments on __skb_frag_unref() we need
+ * to make one SKB responsible for triggering the recycle path.
+ * So disable the recycling bit if an SKB is cloned and we have
+ * additional references to to the fragmented part of the SKB.
+ * Eventually the last SKB will have the recycling bit set and it's
+ * dataref set to 0, which will trigger the recycling
+ */
+ skb->pp_recycle = 0;
}
/*
@@ -3011,8 +3022,11 @@ skb_zerocopy_headlen(const struct sk_buff *from)
if (!from->head_frag ||
skb_headlen(from) < L1_CACHE_BYTES ||
- skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS)
+ skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) {
hlen = skb_headlen(from);
+ if (!hlen)
+ hlen = from->len;
+ }
if (skb_has_frag_list(from))
hlen = from->len;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 9b6160a191f8..15d71288e741 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -508,10 +508,8 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
if (skb_linearize(skb))
return -EAGAIN;
num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len);
- if (unlikely(num_sge < 0)) {
- kfree(msg);
+ if (unlikely(num_sge < 0))
return num_sge;
- }
copied = skb->len;
msg->sg.start = 0;
@@ -530,6 +528,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
{
struct sock *sk = psock->sk;
struct sk_msg *msg;
+ int err;
/* If we are receiving on the same sock skb->sk is already assigned,
* skip memory accounting and owner transition seeing it already set
@@ -548,7 +547,10 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
* into user buffers.
*/
skb_set_owner_r(skb, sk);
- return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
+ err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
+ if (err < 0)
+ kfree(msg);
+ return err;
}
/* Puts an skb on the ingress queue of the socket already assigned to the
@@ -559,12 +561,16 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
{
struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
struct sock *sk = psock->sk;
+ int err;
if (unlikely(!msg))
return -EAGAIN;
sk_msg_init(msg);
skb_set_owner_r(skb, sk);
- return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
+ err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
+ if (err < 0)
+ kfree(msg);
+ return err;
}
static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 5dbd45dc35ad..dc92a67baea3 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -816,7 +816,7 @@ static int dn_auto_bind(struct socket *sock)
static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation)
{
struct dn_scp *scp = DN_SK(sk);
- DEFINE_WAIT(wait);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
int err;
if (scp->state != DN_CR)
@@ -826,11 +826,11 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation)
scp->segsize_loc = dst_metric_advmss(__sk_dst_get(sk));
dn_send_conn_conf(sk, allocation);
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ add_wait_queue(sk_sleep(sk), &wait);
for(;;) {
release_sock(sk);
if (scp->state == DN_CC)
- *timeo = schedule_timeout(*timeo);
+ *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo);
lock_sock(sk);
err = 0;
if (scp->state == DN_RUN)
@@ -844,9 +844,8 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation)
err = -EAGAIN;
if (!*timeo)
break;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
}
- finish_wait(sk_sleep(sk), &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
if (err == 0) {
sk->sk_socket->state = SS_CONNECTED;
} else if (scp->state != DN_CC) {
@@ -858,7 +857,7 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation)
static int dn_wait_run(struct sock *sk, long *timeo)
{
struct dn_scp *scp = DN_SK(sk);
- DEFINE_WAIT(wait);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
int err = 0;
if (scp->state == DN_RUN)
@@ -867,11 +866,11 @@ static int dn_wait_run(struct sock *sk, long *timeo)
if (!*timeo)
return -EALREADY;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ add_wait_queue(sk_sleep(sk), &wait);
for(;;) {
release_sock(sk);
if (scp->state == DN_CI || scp->state == DN_CC)
- *timeo = schedule_timeout(*timeo);
+ *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo);
lock_sock(sk);
err = 0;
if (scp->state == DN_RUN)
@@ -885,9 +884,8 @@ static int dn_wait_run(struct sock *sk, long *timeo)
err = -ETIMEDOUT;
if (!*timeo)
break;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
}
- finish_wait(sk_sleep(sk), &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
out:
if (err == 0) {
sk->sk_socket->state = SS_CONNECTED;
@@ -1032,16 +1030,16 @@ static void dn_user_copy(struct sk_buff *skb, struct optdata_dn *opt)
static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo)
{
- DEFINE_WAIT(wait);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sk_buff *skb = NULL;
int err = 0;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ add_wait_queue(sk_sleep(sk), &wait);
for(;;) {
release_sock(sk);
skb = skb_dequeue(&sk->sk_receive_queue);
if (skb == NULL) {
- *timeo = schedule_timeout(*timeo);
+ *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo);
skb = skb_dequeue(&sk->sk_receive_queue);
}
lock_sock(sk);
@@ -1056,9 +1054,8 @@ static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo)
err = -EAGAIN;
if (!*timeo)
break;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
}
- finish_wait(sk_sleep(sk), &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
return skb == NULL ? ERR_PTR(err) : skb;
}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index ffbba1e71551..532085da8d8f 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1808,6 +1808,7 @@ void dsa_slave_setup_tagger(struct net_device *slave)
struct dsa_slave_priv *p = netdev_priv(slave);
const struct dsa_port *cpu_dp = dp->cpu_dp;
struct net_device *master = cpu_dp->master;
+ const struct dsa_switch *ds = dp->ds;
slave->needed_headroom = cpu_dp->tag_ops->needed_headroom;
slave->needed_tailroom = cpu_dp->tag_ops->needed_tailroom;
@@ -1819,6 +1820,14 @@ void dsa_slave_setup_tagger(struct net_device *slave)
slave->needed_tailroom += master->needed_tailroom;
p->xmit = cpu_dp->tag_ops->xmit;
+
+ slave->features = master->vlan_features | NETIF_F_HW_TC;
+ if (ds->ops->port_vlan_add && ds->ops->port_vlan_del)
+ slave->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+ slave->hw_features |= NETIF_F_HW_TC;
+ slave->features |= NETIF_F_LLTX;
+ if (slave->needed_tailroom)
+ slave->features &= ~(NETIF_F_SG | NETIF_F_FRAGLIST);
}
static struct lock_class_key dsa_slave_netdev_xmit_lock_key;
@@ -1881,11 +1890,6 @@ int dsa_slave_create(struct dsa_port *port)
if (slave_dev == NULL)
return -ENOMEM;
- slave_dev->features = master->vlan_features | NETIF_F_HW_TC;
- if (ds->ops->port_vlan_add && ds->ops->port_vlan_del)
- slave_dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
- slave_dev->hw_features |= NETIF_F_HW_TC;
- slave_dev->features |= NETIF_F_LLTX;
slave_dev->ethtool_ops = &dsa_slave_ethtool_ops;
if (!is_zero_ether_addr(port->mac))
ether_addr_copy(slave_dev->dev_addr, port->mac);
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index 53565f48934c..a201ccf2435d 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -53,6 +53,9 @@ static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev)
u8 *tag;
u8 *addr;
+ if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb))
+ return NULL;
+
/* Tag encoding */
tag = skb_put(skb, KSZ_INGRESS_TAG_LEN);
addr = skb_mac_header(skb);
@@ -114,6 +117,9 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb,
u8 *addr;
u16 val;
+ if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb))
+ return NULL;
+
/* Tag encoding */
tag = skb_put(skb, KSZ9477_INGRESS_TAG_LEN);
addr = skb_mac_header(skb);
@@ -164,6 +170,9 @@ static struct sk_buff *ksz9893_xmit(struct sk_buff *skb,
u8 *addr;
u8 *tag;
+ if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb))
+ return NULL;
+
/* Tag encoding */
tag = skb_put(skb, KSZ_INGRESS_TAG_LEN);
addr = skb_mac_header(skb);
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index f26916a62f25..d3e9386b493e 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -503,7 +503,7 @@ static int __init tcp_bpf_v4_build_proto(void)
tcp_bpf_rebuild_protos(tcp_bpf_prots[TCP_BPF_IPV4], &tcp_prot);
return 0;
}
-core_initcall(tcp_bpf_v4_build_proto);
+late_initcall(tcp_bpf_v4_build_proto);
static int tcp_bpf_assert_proto_ops(struct proto *ops)
{
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 47c32604d38f..25fa4c01a17f 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -507,8 +507,18 @@ void tcp_fastopen_active_disable(struct sock *sk)
{
struct net *net = sock_net(sk);
+ if (!sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout)
+ return;
+
+ /* Paired with READ_ONCE() in tcp_fastopen_active_should_disable() */
+ WRITE_ONCE(net->ipv4.tfo_active_disable_stamp, jiffies);
+
+ /* Paired with smp_rmb() in tcp_fastopen_active_should_disable().
+ * We want net->ipv4.tfo_active_disable_stamp to be updated first.
+ */
+ smp_mb__before_atomic();
atomic_inc(&net->ipv4.tfo_active_disable_times);
- net->ipv4.tfo_active_disable_stamp = jiffies;
+
NET_INC_STATS(net, LINUX_MIB_TCPFASTOPENBLACKHOLE);
}
@@ -519,17 +529,27 @@ void tcp_fastopen_active_disable(struct sock *sk)
bool tcp_fastopen_active_should_disable(struct sock *sk)
{
unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout;
- int tfo_da_times = atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times);
unsigned long timeout;
+ int tfo_da_times;
int multiplier;
+ if (!tfo_bh_timeout)
+ return false;
+
+ tfo_da_times = atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times);
if (!tfo_da_times)
return false;
+ /* Paired with smp_mb__before_atomic() in tcp_fastopen_active_disable() */
+ smp_rmb();
+
/* Limit timeout to max: 2^6 * initial timeout */
multiplier = 1 << min(tfo_da_times - 1, 6);
- timeout = multiplier * tfo_bh_timeout * HZ;
- if (time_before(jiffies, sock_net(sk)->ipv4.tfo_active_disable_stamp + timeout))
+
+ /* Paired with the WRITE_ONCE() in tcp_fastopen_active_disable(). */
+ timeout = READ_ONCE(sock_net(sk)->ipv4.tfo_active_disable_stamp) +
+ multiplier * tfo_bh_timeout * HZ;
+ if (time_before(jiffies, timeout))
return true;
/* Mark check bit so we can check for successful active TFO
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index b9dc2d6197be..a692626c19e4 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2965,7 +2965,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_comp_sack_nr = 44;
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
- net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
+ net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 0;
atomic_set(&net->ipv4.tfo_active_disable_times, 0);
/* Reno is always built in */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 62cd4cd52e84..1a742b710e54 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -645,10 +645,12 @@ static struct sock *__udp4_lib_err_encap(struct net *net,
const struct iphdr *iph,
struct udphdr *uh,
struct udp_table *udptable,
+ struct sock *sk,
struct sk_buff *skb, u32 info)
{
+ int (*lookup)(struct sock *sk, struct sk_buff *skb);
int network_offset, transport_offset;
- struct sock *sk;
+ struct udp_sock *up;
network_offset = skb_network_offset(skb);
transport_offset = skb_transport_offset(skb);
@@ -659,18 +661,28 @@ static struct sock *__udp4_lib_err_encap(struct net *net,
/* Transport header needs to point to the UDP header */
skb_set_transport_header(skb, iph->ihl << 2);
+ if (sk) {
+ up = udp_sk(sk);
+
+ lookup = READ_ONCE(up->encap_err_lookup);
+ if (lookup && lookup(sk, skb))
+ sk = NULL;
+
+ goto out;
+ }
+
sk = __udp4_lib_lookup(net, iph->daddr, uh->source,
iph->saddr, uh->dest, skb->dev->ifindex, 0,
udptable, NULL);
if (sk) {
- int (*lookup)(struct sock *sk, struct sk_buff *skb);
- struct udp_sock *up = udp_sk(sk);
+ up = udp_sk(sk);
lookup = READ_ONCE(up->encap_err_lookup);
if (!lookup || lookup(sk, skb))
sk = NULL;
}
+out:
if (!sk)
sk = ERR_PTR(__udp4_lib_err_encap_no_sk(skb, info));
@@ -707,15 +719,16 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
iph->saddr, uh->source, skb->dev->ifindex,
inet_sdif(skb), udptable, NULL);
+
if (!sk || udp_sk(sk)->encap_type) {
/* No socket for error: try tunnels before discarding */
- sk = ERR_PTR(-ENOENT);
if (static_branch_unlikely(&udp_encap_needed_key)) {
- sk = __udp4_lib_err_encap(net, iph, uh, udptable, skb,
+ sk = __udp4_lib_err_encap(net, iph, uh, udptable, sk, skb,
info);
if (!sk)
return 0;
- }
+ } else
+ sk = ERR_PTR(-ENOENT);
if (IS_ERR(sk)) {
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
index 45b8782aec0c..9f5a5cdc38e6 100644
--- a/net/ipv4/udp_bpf.c
+++ b/net/ipv4/udp_bpf.c
@@ -134,7 +134,7 @@ static int __init udp_bpf_v4_build_proto(void)
udp_bpf_rebuild_protos(&udp_bpf_prots[UDP_BPF_IPV4], &udp_prot);
return 0;
}
-core_initcall(udp_bpf_v4_build_proto);
+late_initcall(udp_bpf_v4_build_proto);
int udp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
{
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 01bea76e3891..e1b9f7ac8bad 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -74,7 +74,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
if (likely(nskb)) {
if (skb->sk)
- skb_set_owner_w(skb, skb->sk);
+ skb_set_owner_w(nskb, skb->sk);
consume_skb(skb);
} else {
kfree_skb(skb);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7b756a7dc036..b6ddf23d3833 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3769,7 +3769,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
err = PTR_ERR(rt->fib6_metrics);
/* Do not leave garbage there. */
rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
- goto out;
+ goto out_free;
}
if (cfg->fc_flags & RTF_ADDRCONF)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 0cc7ba531b34..c5e15e94bb00 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -502,12 +502,14 @@ static struct sock *__udp6_lib_err_encap(struct net *net,
const struct ipv6hdr *hdr, int offset,
struct udphdr *uh,
struct udp_table *udptable,
+ struct sock *sk,
struct sk_buff *skb,
struct inet6_skb_parm *opt,
u8 type, u8 code, __be32 info)
{
+ int (*lookup)(struct sock *sk, struct sk_buff *skb);
int network_offset, transport_offset;
- struct sock *sk;
+ struct udp_sock *up;
network_offset = skb_network_offset(skb);
transport_offset = skb_transport_offset(skb);
@@ -518,18 +520,28 @@ static struct sock *__udp6_lib_err_encap(struct net *net,
/* Transport header needs to point to the UDP header */
skb_set_transport_header(skb, offset);
+ if (sk) {
+ up = udp_sk(sk);
+
+ lookup = READ_ONCE(up->encap_err_lookup);
+ if (lookup && lookup(sk, skb))
+ sk = NULL;
+
+ goto out;
+ }
+
sk = __udp6_lib_lookup(net, &hdr->daddr, uh->source,
&hdr->saddr, uh->dest,
inet6_iif(skb), 0, udptable, skb);
if (sk) {
- int (*lookup)(struct sock *sk, struct sk_buff *skb);
- struct udp_sock *up = udp_sk(sk);
+ up = udp_sk(sk);
lookup = READ_ONCE(up->encap_err_lookup);
if (!lookup || lookup(sk, skb))
sk = NULL;
}
+out:
if (!sk) {
sk = ERR_PTR(__udp6_lib_err_encap_no_sk(skb, opt, type, code,
offset, info));
@@ -558,16 +570,17 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
inet6_iif(skb), inet6_sdif(skb), udptable, NULL);
+
if (!sk || udp_sk(sk)->encap_type) {
/* No socket for error: try tunnels before discarding */
- sk = ERR_PTR(-ENOENT);
if (static_branch_unlikely(&udpv6_encap_needed_key)) {
sk = __udp6_lib_err_encap(net, hdr, offset, uh,
- udptable, skb,
+ udptable, sk, skb,
opt, type, code, info);
if (!sk)
return 0;
- }
+ } else
+ sk = ERR_PTR(-ENOENT);
if (IS_ERR(sk)) {
__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c
index 9115f8a7dd45..a8da88db7893 100644
--- a/net/netrom/nr_timer.c
+++ b/net/netrom/nr_timer.c
@@ -121,11 +121,9 @@ static void nr_heartbeat_expiry(struct timer_list *t)
is accepted() it isn't 'dead' so doesn't get removed. */
if (sock_flag(sk, SOCK_DESTROY) ||
(sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) {
- sock_hold(sk);
bh_unlock_sock(sk);
nr_destroy_socket(sk);
- sock_put(sk);
- return;
+ goto out;
}
break;
@@ -146,6 +144,8 @@ static void nr_heartbeat_expiry(struct timer_list *t)
nr_start_heartbeat(sk);
bh_unlock_sock(sk);
+out:
+ sock_put(sk);
}
static void nr_t2timer_expiry(struct timer_list *t)
@@ -159,6 +159,7 @@ static void nr_t2timer_expiry(struct timer_list *t)
nr_enquiry_response(sk);
}
bh_unlock_sock(sk);
+ sock_put(sk);
}
static void nr_t4timer_expiry(struct timer_list *t)
@@ -169,6 +170,7 @@ static void nr_t4timer_expiry(struct timer_list *t)
bh_lock_sock(sk);
nr_sk(sk)->condition &= ~NR_COND_PEER_RX_BUSY;
bh_unlock_sock(sk);
+ sock_put(sk);
}
static void nr_idletimer_expiry(struct timer_list *t)
@@ -197,6 +199,7 @@ static void nr_idletimer_expiry(struct timer_list *t)
sock_set_flag(sk, SOCK_DEAD);
}
bh_unlock_sock(sk);
+ sock_put(sk);
}
static void nr_t1timer_expiry(struct timer_list *t)
@@ -209,8 +212,7 @@ static void nr_t1timer_expiry(struct timer_list *t)
case NR_STATE_1:
if (nr->n2count == nr->n2) {
nr_disconnect(sk, ETIMEDOUT);
- bh_unlock_sock(sk);
- return;
+ goto out;
} else {
nr->n2count++;
nr_write_internal(sk, NR_CONNREQ);
@@ -220,8 +222,7 @@ static void nr_t1timer_expiry(struct timer_list *t)
case NR_STATE_2:
if (nr->n2count == nr->n2) {
nr_disconnect(sk, ETIMEDOUT);
- bh_unlock_sock(sk);
- return;
+ goto out;
} else {
nr->n2count++;
nr_write_internal(sk, NR_DISCREQ);
@@ -231,8 +232,7 @@ static void nr_t1timer_expiry(struct timer_list *t)
case NR_STATE_3:
if (nr->n2count == nr->n2) {
nr_disconnect(sk, ETIMEDOUT);
- bh_unlock_sock(sk);
- return;
+ goto out;
} else {
nr->n2count++;
nr_requeue_frames(sk);
@@ -241,5 +241,7 @@ static void nr_t1timer_expiry(struct timer_list *t)
}
nr_start_t1timer(sk);
+out:
bh_unlock_sock(sk);
+ sock_put(sk);
}
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 81a1c67335be..8d17a543cc9f 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -6,6 +6,7 @@
*/
#include <linux/module.h>
+#include <linux/if_arp.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
@@ -33,6 +34,13 @@ static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a,
tcf_lastuse_update(&d->tcf_tm);
bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
+ action = READ_ONCE(d->tcf_action);
+ if (unlikely(action == TC_ACT_SHOT))
+ goto drop;
+
+ if (!skb->dev || skb->dev->type != ARPHRD_ETHER)
+ return action;
+
/* XXX: if you are going to edit more fields beyond ethernet header
* (example when you add IP header replacement or vlan swap)
* then MAX_EDIT_LEN needs to change appropriately
@@ -41,10 +49,6 @@ static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a,
if (unlikely(err)) /* best policy is to drop on the floor */
goto drop;
- action = READ_ONCE(d->tcf_action);
- if (unlikely(action == TC_ACT_SHOT))
- goto drop;
-
p = rcu_dereference_bh(d->skbmod_p);
flags = p->flags;
if (flags & SKBMOD_F_DMAC)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index d73b5c5514a9..e3e79e9bd706 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -2904,7 +2904,7 @@ replay:
break;
case RTM_GETCHAIN:
err = tc_chain_notify(chain, skb, n->nlmsg_seq,
- n->nlmsg_seq, n->nlmsg_type, true);
+ n->nlmsg_flags, n->nlmsg_type, true);
if (err < 0)
NL_SET_ERR_MSG(extack, "Failed to send chain notify message");
break;
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 5b274534264c..e9a8a2c86bbd 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -278,6 +278,8 @@ static int tcindex_filter_result_init(struct tcindex_filter_result *r,
TCA_TCINDEX_POLICE);
}
+static void tcindex_free_perfect_hash(struct tcindex_data *cp);
+
static void tcindex_partial_destroy_work(struct work_struct *work)
{
struct tcindex_data *p = container_of(to_rcu_work(work),
@@ -285,7 +287,8 @@ static void tcindex_partial_destroy_work(struct work_struct *work)
rwork);
rtnl_lock();
- kfree(p->perfect);
+ if (p->perfect)
+ tcindex_free_perfect_hash(p);
kfree(p);
rtnl_unlock();
}
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 6f8319b828b0..fe74c5f95630 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -860,6 +860,8 @@ int sctp_auth_set_key(struct sctp_endpoint *ep,
if (replace) {
list_del_init(&shkey->key_list);
sctp_auth_shkey_release(shkey);
+ if (asoc && asoc->active_key_id == auth_key->sca_keynumber)
+ sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL);
}
list_add(&cur_key->key_list, sh_keys);
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 9032ce60d50e..4dfb5ea82b05 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -104,8 +104,8 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag,
if (asoc->param_flags & SPP_PMTUD_ENABLE)
sctp_assoc_sync_pmtu(asoc);
} else if (!sctp_transport_pl_enabled(tp) &&
- !sctp_transport_pmtu_check(tp)) {
- if (asoc->param_flags & SPP_PMTUD_ENABLE)
+ asoc->param_flags & SPP_PMTUD_ENABLE) {
+ if (!sctp_transport_pmtu_check(tp))
sctp_assoc_sync_pmtu(asoc);
}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index e64e01f61b11..6b937bfd4751 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4577,6 +4577,10 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
}
if (optlen > 0) {
+ /* Trim it to the biggest size sctp sockopt may need if necessary */
+ optlen = min_t(unsigned int, optlen,
+ PAGE_ALIGN(USHRT_MAX +
+ sizeof(__u16) * sizeof(struct sctp_reset_streams)));
kopt = memdup_sockptr(optval, optlen);
if (IS_ERR(kopt))
return PTR_ERR(kopt);
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 1828bba19020..dc6daa193557 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -222,6 +222,11 @@ int mount_bpffs_for_pin(const char *name)
int err = 0;
file = malloc(strlen(name) + 1);
+ if (!file) {
+ p_err("mem alloc failed");
+ return -1;
+ }
+
strcpy(file, name);
dir = dirname(file);
diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c
index 6365c7fd1262..bd6288302094 100644
--- a/tools/testing/selftests/net/nettest.c
+++ b/tools/testing/selftests/net/nettest.c
@@ -11,9 +11,11 @@
#include <sys/socket.h>
#include <sys/wait.h>
#include <linux/tcp.h>
+#include <linux/udp.h>
#include <arpa/inet.h>
#include <net/if.h>
#include <netinet/in.h>
+#include <netinet/ip.h>
#include <netdb.h>
#include <fcntl.h>
#include <libgen.h>
@@ -27,6 +29,10 @@
#include <time.h>
#include <errno.h>
+#include <linux/xfrm.h>
+#include <linux/ipsec.h>
+#include <linux/pfkeyv2.h>
+
#ifndef IPV6_UNICAST_IF
#define IPV6_UNICAST_IF 76
#endif
@@ -114,6 +120,9 @@ struct sock_args {
struct in_addr in;
struct in6_addr in6;
} expected_raddr;
+
+ /* ESP in UDP encap test */
+ int use_xfrm;
};
static int server_mode;
@@ -1346,6 +1355,41 @@ static int bind_socket(int sd, struct sock_args *args)
return 0;
}
+static int config_xfrm_policy(int sd, struct sock_args *args)
+{
+ struct xfrm_userpolicy_info policy = {};
+ int type = UDP_ENCAP_ESPINUDP;
+ int xfrm_af = IP_XFRM_POLICY;
+ int level = SOL_IP;
+
+ if (args->type != SOCK_DGRAM) {
+ log_error("Invalid socket type. Only DGRAM could be used for XFRM\n");
+ return 1;
+ }
+
+ policy.action = XFRM_POLICY_ALLOW;
+ policy.sel.family = args->version;
+ if (args->version == AF_INET6) {
+ xfrm_af = IPV6_XFRM_POLICY;
+ level = SOL_IPV6;
+ }
+
+ policy.dir = XFRM_POLICY_OUT;
+ if (setsockopt(sd, level, xfrm_af, &policy, sizeof(policy)) < 0)
+ return 1;
+
+ policy.dir = XFRM_POLICY_IN;
+ if (setsockopt(sd, level, xfrm_af, &policy, sizeof(policy)) < 0)
+ return 1;
+
+ if (setsockopt(sd, IPPROTO_UDP, UDP_ENCAP, &type, sizeof(type)) < 0) {
+ log_err_errno("Failed to set xfrm encap");
+ return 1;
+ }
+
+ return 0;
+}
+
static int lsock_init(struct sock_args *args)
{
long flags;
@@ -1389,6 +1433,11 @@ static int lsock_init(struct sock_args *args)
if (fcntl(sd, F_SETFD, FD_CLOEXEC) < 0)
log_err_errno("Failed to set close-on-exec flag");
+ if (args->use_xfrm && config_xfrm_policy(sd, args)) {
+ log_err_errno("Failed to set xfrm policy");
+ goto err;
+ }
+
out:
return sd;
@@ -1772,7 +1821,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args)
return client_status;
}
-#define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6L:0:1:2:3:Fbq"
+#define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6xL:0:1:2:3:Fbq"
static void print_usage(char *prog)
{
@@ -1795,6 +1844,7 @@ static void print_usage(char *prog)
" -D|R datagram (D) / raw (R) socket (default stream)\n"
" -l addr local address to bind to in server mode\n"
" -c addr local address to bind to in client mode\n"
+ " -x configure XFRM policy on socket\n"
"\n"
" -d dev bind socket to given device name\n"
" -I dev bind socket to given device name - server mode\n"
@@ -1966,6 +2016,9 @@ int main(int argc, char *argv[])
case 'q':
quiet = 1;
break;
+ case 'x':
+ args.use_xfrm = 1;
+ break;
default:
print_usage(argv[0]);
return 1;
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 64cd2e23c568..543ad7513a8e 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -118,6 +118,16 @@
# below for IPv6 doesn't apply here, because, on IPv4, administrative MTU
# changes alone won't affect PMTU
#
+# - pmtu_vti4_udp_exception
+# Same as pmtu_vti4_exception, but using ESP-in-UDP
+#
+# - pmtu_vti4_udp_routed_exception
+# Set up vti tunnel on top of veth connected through routing namespace and
+# add xfrm states and policies with ESP-in-UDP encapsulation. Check that
+# route exception is not created if link layer MTU is not exceeded, then
+# lower MTU on second part of routed environment and check that exception
+# is created with the expected PMTU.
+#
# - pmtu_vti6_exception
# Set up vti6 tunnel on top of veth, with xfrm states and policies, in two
# namespaces with matching endpoints. Check that route exception is
@@ -125,6 +135,13 @@
# decrease and increase MTU of tunnel, checking that route exception PMTU
# changes accordingly
#
+# - pmtu_vti6_udp_exception
+# Same as pmtu_vti6_exception, but using ESP-in-UDP
+#
+# - pmtu_vti6_udp_routed_exception
+# Same as pmtu_vti6_udp_routed_exception but with routing between vti
+# endpoints
+#
# - pmtu_vti4_default_mtu
# Set up vti4 tunnel on top of veth, in two namespaces with matching
# endpoints. Check that MTU assigned to vti interface is the MTU of the
@@ -224,6 +241,10 @@ tests="
pmtu_ipv6_ipv6_exception IPv6 over IPv6: PMTU exceptions 1
pmtu_vti6_exception vti6: PMTU exceptions 0
pmtu_vti4_exception vti4: PMTU exceptions 0
+ pmtu_vti6_udp_exception vti6: PMTU exceptions (ESP-in-UDP) 0
+ pmtu_vti4_udp_exception vti4: PMTU exceptions (ESP-in-UDP) 0
+ pmtu_vti6_udp_routed_exception vti6: PMTU exceptions, routed (ESP-in-UDP) 0
+ pmtu_vti4_udp_routed_exception vti4: PMTU exceptions, routed (ESP-in-UDP) 0
pmtu_vti4_default_mtu vti4: default MTU assignment 0
pmtu_vti6_default_mtu vti6: default MTU assignment 0
pmtu_vti4_link_add_mtu vti4: MTU setting on link creation 0
@@ -246,7 +267,6 @@ ns_b="ip netns exec ${NS_B}"
ns_c="ip netns exec ${NS_C}"
ns_r1="ip netns exec ${NS_R1}"
ns_r2="ip netns exec ${NS_R2}"
-
# Addressing and routing for tests with routers: four network segments, with
# index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an
# identifier ID, which is 1 for hosts (A and B), 2 for routers (R1 and R2).
@@ -279,7 +299,6 @@ routes="
A ${prefix6}:${b_r2}::1 ${prefix6}:${a_r2}::2
B default ${prefix6}:${b_r1}::2
"
-
USE_NH="no"
# ns family nh id destination gateway
nexthops="
@@ -326,6 +345,7 @@ dummy6_mask="64"
err_buf=
tcpdump_pids=
+nettest_pids=
err() {
err_buf="${err_buf}${1}
@@ -548,6 +568,14 @@ setup_vti6() {
setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask}
}
+setup_vti4routed() {
+ setup_vti 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 ${tunnel4_a_addr} ${tunnel4_b_addr} ${tunnel4_mask}
+}
+
+setup_vti6routed() {
+ setup_vti 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask}
+}
+
setup_vxlan_or_geneve() {
type="${1}"
a_addr="${2}"
@@ -619,18 +647,36 @@ setup_xfrm() {
proto=${1}
veth_a_addr="${2}"
veth_b_addr="${3}"
+ encap=${4}
- run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1
- run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+ run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} || return 1
+ run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap}
run_cmd ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
run_cmd ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
- run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
- run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+ run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap}
+ run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap}
run_cmd ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
run_cmd ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
}
+setup_nettest_xfrm() {
+ which nettest >/dev/null
+ if [ $? -ne 0 ]; then
+ echo "'nettest' command not found; skipping tests"
+ return 1
+ fi
+
+ [ ${1} -eq 6 ] && proto="-6" || proto=""
+ port=${2}
+
+ run_cmd ${ns_a} nettest ${proto} -q -D -s -x -p ${port} -t 5 &
+ nettest_pids="${nettest_pids} $!"
+
+ run_cmd ${ns_b} nettest ${proto} -q -D -s -x -p ${port} -t 5 &
+ nettest_pids="${nettest_pids} $!"
+}
+
setup_xfrm4() {
setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr}
}
@@ -639,6 +685,26 @@ setup_xfrm6() {
setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
}
+setup_xfrm4udp() {
+ setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0"
+ setup_nettest_xfrm 4 4500
+}
+
+setup_xfrm6udp() {
+ setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0"
+ setup_nettest_xfrm 6 4500
+}
+
+setup_xfrm4udprouted() {
+ setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0"
+ setup_nettest_xfrm 4 4500
+}
+
+setup_xfrm6udprouted() {
+ setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0"
+ setup_nettest_xfrm 6 4500
+}
+
setup_routing_old() {
for i in ${routes}; do
[ "${ns}" = "" ] && ns="${i}" && continue
@@ -823,6 +889,11 @@ cleanup() {
done
tcpdump_pids=
+ for pid in ${nettest_pids}; do
+ kill ${pid}
+ done
+ nettest_pids=
+
for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do
ip netns del ${n} 2> /dev/null
done
@@ -1432,6 +1503,135 @@ test_pmtu_vti6_exception() {
return ${fail}
}
+test_pmtu_vti4_udp_exception() {
+ setup namespaces veth vti4 xfrm4udp || return $ksft_skip
+ trace "${ns_a}" veth_a "${ns_b}" veth_b \
+ "${ns_a}" vti4_a "${ns_b}" vti4_b
+
+ veth_mtu=1500
+ vti_mtu=$((veth_mtu - 20))
+
+ # UDP SPI SN IV ICV pad length next header
+ esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1))
+ ping_payload=$((esp_payload_rfc4106 - 28))
+
+ mtu "${ns_a}" veth_a ${veth_mtu}
+ mtu "${ns_b}" veth_b ${veth_mtu}
+ mtu "${ns_a}" vti4_a ${vti_mtu}
+ mtu "${ns_b}" vti4_b ${vti_mtu}
+
+ # Send DF packet without exceeding link layer MTU, check that no
+ # exception is created
+ run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr}
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
+ check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
+
+ # Now exceed link layer MTU by one byte, check that exception is created
+ # with the right PMTU value
+ run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr}
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
+ check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))"
+}
+
+test_pmtu_vti6_udp_exception() {
+ setup namespaces veth vti6 xfrm6udp || return $ksft_skip
+ trace "${ns_a}" veth_a "${ns_b}" veth_b \
+ "${ns_a}" vti6_a "${ns_b}" vti6_b
+ fail=0
+
+ # Create route exception by exceeding link layer MTU
+ mtu "${ns_a}" veth_a 4000
+ mtu "${ns_b}" veth_b 4000
+ mtu "${ns_a}" vti6_a 5000
+ mtu "${ns_b}" vti6_b 5000
+ run_cmd ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr}
+
+ # Check that exception was created
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
+ check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1
+
+ # Decrease tunnel MTU, check for PMTU decrease in route exception
+ mtu "${ns_a}" vti6_a 3000
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
+ check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1
+
+ # Increase tunnel MTU, check for PMTU increase in route exception
+ mtu "${ns_a}" vti6_a 9000
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
+ check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1
+
+ return ${fail}
+}
+
+test_pmtu_vti4_udp_routed_exception() {
+ setup namespaces routing vti4routed xfrm4udprouted || return $ksft_skip
+ trace "${ns_a}" veth_A-R1 "${ns_b}" veth_B-R1 \
+ "${ns_a}" vti4_a "${ns_b}" vti4_b
+
+ veth_mtu=1500
+ vti_mtu=$((veth_mtu - 20))
+
+ # UDP SPI SN IV ICV pad length next header
+ esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1))
+ ping_payload=$((esp_payload_rfc4106 - 28))
+
+ mtu "${ns_a}" veth_A-R1 ${veth_mtu}
+ mtu "${ns_r1}" veth_R1-A ${veth_mtu}
+ mtu "${ns_b}" veth_B-R1 ${veth_mtu}
+ mtu "${ns_r1}" veth_R1-B ${veth_mtu}
+
+ mtu "${ns_a}" vti4_a ${vti_mtu}
+ mtu "${ns_b}" vti4_b ${vti_mtu}
+
+ # Send DF packet without exceeding link layer MTU, check that no
+ # exception is created
+ run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr}
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
+ check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
+
+ # Now decrease link layer MTU by 8 bytes on R1, check that exception is created
+ # with the right PMTU value
+ mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8))
+ run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel4_b_addr}
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
+ check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))"
+}
+
+test_pmtu_vti6_udp_routed_exception() {
+ setup namespaces routing vti6routed xfrm6udprouted || return $ksft_skip
+ trace "${ns_a}" veth_A-R1 "${ns_b}" veth_B-R1 \
+ "${ns_a}" vti6_a "${ns_b}" vti6_b
+
+ veth_mtu=1500
+ vti_mtu=$((veth_mtu - 40))
+
+ # UDP SPI SN IV ICV pad length next header
+ esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1))
+ ping_payload=$((esp_payload_rfc4106 - 48))
+
+ mtu "${ns_a}" veth_A-R1 ${veth_mtu}
+ mtu "${ns_r1}" veth_R1-A ${veth_mtu}
+ mtu "${ns_b}" veth_B-R1 ${veth_mtu}
+ mtu "${ns_r1}" veth_R1-B ${veth_mtu}
+
+ # mtu "${ns_a}" vti6_a ${vti_mtu}
+ # mtu "${ns_b}" vti6_b ${vti_mtu}
+
+ run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel6_b_addr}
+
+ # Check that exception was not created
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
+ check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
+
+ # Now decrease link layer MTU by 8 bytes on R1, check that exception is created
+ # with the right PMTU value
+ mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8))
+ run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel6_b_addr}
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
+ check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))"
+
+}
+
test_pmtu_vti4_default_mtu() {
setup namespaces veth vti4 || return $ksft_skip