summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/dsa/dsa.txt10
-rw-r--r--Documentation/networking/switchdev.txt10
-rw-r--r--MAINTAINERS17
-rw-r--r--drivers/net/bonding/bond_main.c35
-rw-r--r--drivers/net/ethernet/atheros/atlx/atl2.c4
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c4
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h2
-rw-r--r--drivers/net/ethernet/cavium/thunder/nic.h14
-rw-r--r--drivers/net/ethernet/cavium/thunder/nic_main.c149
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_main.c128
-rw-r--r--drivers/net/ethernet/cavium/thunder/thunder_bgx.c2
-rw-r--r--drivers/net/ethernet/cavium/thunder/thunder_bgx.h2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c27
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c4
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_xsk.c5
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c19
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c15
-rw-r--r--drivers/net/ethernet/marvell/mvneta.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/jit.c17
-rw-r--r--drivers/net/ipvlan/ipvlan_main.c4
-rw-r--r--drivers/net/phy/marvell10g.c6
-rw-r--r--drivers/net/phy/mdio_bus.c1
-rw-r--r--drivers/net/phy/realtek.c7
-rw-r--r--drivers/net/team/team.c4
-rw-r--r--drivers/net/usb/r8152.c2
-rw-r--r--drivers/net/vrf.c3
-rw-r--r--drivers/net/wireless/mac80211_hwsim.c2
-rw-r--r--include/linux/phy.h8
-rw-r--r--include/linux/virtio_net.h14
-rw-r--r--include/net/phonet/pep.h5
-rw-r--r--include/net/xfrm.h12
-rw-r--r--kernel/bpf/lpm_trie.c1
-rw-r--r--net/bpf/test_run.c45
-rw-r--r--net/bridge/br_multicast.c9
-rw-r--r--net/compat.c6
-rw-r--r--net/dsa/port.c7
-rw-r--r--net/ipv4/esp4.c2
-rw-r--r--net/ipv4/ip_gre.c33
-rw-r--r--net/ipv4/tcp_output.c1
-rw-r--r--net/ipv4/udp.c6
-rw-r--r--net/ipv6/esp6.c2
-rw-r--r--net/ipv6/fou6.c2
-rw-r--r--net/ipv6/ip6_gre.c39
-rw-r--r--net/ipv6/route.c32
-rw-r--r--net/ipv6/udp.c12
-rw-r--r--net/ipv6/xfrm6_tunnel.c2
-rw-r--r--net/key/af_key.c42
-rw-r--r--net/mac80211/main.c4
-rw-r--r--net/mac80211/rx.c7
-rw-r--r--net/phonet/pep.c32
-rw-r--r--net/sctp/transport.c3
-rw-r--r--net/smc/smc.h6
-rw-r--r--net/tipc/socket.c11
-rw-r--r--net/unix/af_unix.c57
-rw-r--r--net/unix/diag.c3
-rw-r--r--net/x25/af_x25.c13
-rw-r--r--net/xdp/xsk.c16
-rw-r--r--net/xfrm/xfrm_interface.c4
-rw-r--r--net/xfrm/xfrm_policy.c4
-rw-r--r--net/xfrm/xfrm_state.c30
-rw-r--r--net/xfrm/xfrm_user.c2
-rw-r--r--security/lsm_audit.c10
-rw-r--r--tools/testing/selftests/bpf/test_lpm_map.c10
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh1
64 files changed, 568 insertions, 420 deletions
diff --git a/Documentation/networking/dsa/dsa.txt b/Documentation/networking/dsa/dsa.txt
index 25170ad7d25b..101f2b2c69ad 100644
--- a/Documentation/networking/dsa/dsa.txt
+++ b/Documentation/networking/dsa/dsa.txt
@@ -533,16 +533,12 @@ Bridge VLAN filtering
function that the driver has to call for each VLAN the given port is a member
of. A switchdev object is used to carry the VID and bridge flags.
-- port_fdb_prepare: bridge layer function invoked when the bridge prepares the
- installation of a Forwarding Database entry. If the operation is not
- supported, this function should return -EOPNOTSUPP to inform the bridge code
- to fallback to a software implementation. No hardware setup must be done in
- this function. See port_fdb_add for this and details.
-
- port_fdb_add: bridge layer function invoked when the bridge wants to install a
Forwarding Database entry, the switch hardware should be programmed with the
specified address in the specified VLAN Id in the forwarding database
- associated with this VLAN ID
+ associated with this VLAN ID. If the operation is not supported, this
+ function should return -EOPNOTSUPP to inform the bridge code to fallback to
+ a software implementation.
Note: VLAN ID 0 corresponds to the port private database, which, in the context
of DSA, would be the its port-based VLAN, used by the associated bridge device.
diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt
index 82236a17b5e6..97b7ca8b9b86 100644
--- a/Documentation/networking/switchdev.txt
+++ b/Documentation/networking/switchdev.txt
@@ -92,11 +92,11 @@ device.
Switch ID
^^^^^^^^^
-The switchdev driver must implement the switchdev op switchdev_port_attr_get
-for SWITCHDEV_ATTR_ID_PORT_PARENT_ID for each port netdev, returning the same
-physical ID for each port of a switch. The ID must be unique between switches
-on the same system. The ID does not need to be unique between switches on
-different systems.
+The switchdev driver must implement the net_device operation
+ndo_get_port_parent_id for each port netdev, returning the same physical ID for
+each port of a switch. The ID must be unique between switches on the same
+system. The ID does not need to be unique between switches on different
+systems.
The switch ID is used to locate ports on a switch and to know if aggregated
ports belong to the same switch.
diff --git a/MAINTAINERS b/MAINTAINERS
index e6e17d8c5aae..dce5c099f43c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2851,7 +2851,7 @@ R: Martin KaFai Lau <kafai@fb.com>
R: Song Liu <songliubraving@fb.com>
R: Yonghong Song <yhs@fb.com>
L: netdev@vger.kernel.org
-L: linux-kernel@vger.kernel.org
+L: bpf@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git
Q: https://patchwork.ozlabs.org/project/netdev/list/?delegate=77147
@@ -2881,6 +2881,7 @@ N: bpf
BPF JIT for ARM
M: Shubham Bansal <illusionist.neo@gmail.com>
L: netdev@vger.kernel.org
+L: bpf@vger.kernel.org
S: Maintained
F: arch/arm/net/
@@ -2889,18 +2890,21 @@ M: Daniel Borkmann <daniel@iogearbox.net>
M: Alexei Starovoitov <ast@kernel.org>
M: Zi Shen Lim <zlim.lnx@gmail.com>
L: netdev@vger.kernel.org
+L: bpf@vger.kernel.org
S: Supported
F: arch/arm64/net/
BPF JIT for MIPS (32-BIT AND 64-BIT)
M: Paul Burton <paul.burton@mips.com>
L: netdev@vger.kernel.org
+L: bpf@vger.kernel.org
S: Maintained
F: arch/mips/net/
BPF JIT for NFP NICs
M: Jakub Kicinski <jakub.kicinski@netronome.com>
L: netdev@vger.kernel.org
+L: bpf@vger.kernel.org
S: Supported
F: drivers/net/ethernet/netronome/nfp/bpf/
@@ -2908,6 +2912,7 @@ BPF JIT for POWERPC (32-BIT AND 64-BIT)
M: Naveen N. Rao <naveen.n.rao@linux.ibm.com>
M: Sandipan Das <sandipan@linux.ibm.com>
L: netdev@vger.kernel.org
+L: bpf@vger.kernel.org
S: Maintained
F: arch/powerpc/net/
@@ -2915,6 +2920,7 @@ BPF JIT for S390
M: Martin Schwidefsky <schwidefsky@de.ibm.com>
M: Heiko Carstens <heiko.carstens@de.ibm.com>
L: netdev@vger.kernel.org
+L: bpf@vger.kernel.org
S: Maintained
F: arch/s390/net/
X: arch/s390/net/pnet.c
@@ -2922,12 +2928,14 @@ X: arch/s390/net/pnet.c
BPF JIT for SPARC (32-BIT AND 64-BIT)
M: David S. Miller <davem@davemloft.net>
L: netdev@vger.kernel.org
+L: bpf@vger.kernel.org
S: Maintained
F: arch/sparc/net/
BPF JIT for X86 32-BIT
M: Wang YanQing <udknight@gmail.com>
L: netdev@vger.kernel.org
+L: bpf@vger.kernel.org
S: Maintained
F: arch/x86/net/bpf_jit_comp32.c
@@ -2935,6 +2943,7 @@ BPF JIT for X86 64-BIT
M: Alexei Starovoitov <ast@kernel.org>
M: Daniel Borkmann <daniel@iogearbox.net>
L: netdev@vger.kernel.org
+L: bpf@vger.kernel.org
S: Supported
F: arch/x86/net/
X: arch/x86/net/bpf_jit_comp32.c
@@ -3389,9 +3398,8 @@ F: Documentation/media/v4l-drivers/cafe_ccic*
F: drivers/media/platform/marvell-ccic/
CAIF NETWORK LAYER
-M: Dmitry Tarnyagin <dmitry.tarnyagin@lockless.no>
L: netdev@vger.kernel.org
-S: Supported
+S: Orphan
F: Documentation/networking/caif/
F: drivers/net/caif/
F: include/uapi/linux/caif/
@@ -8486,6 +8494,7 @@ L7 BPF FRAMEWORK
M: John Fastabend <john.fastabend@gmail.com>
M: Daniel Borkmann <daniel@iogearbox.net>
L: netdev@vger.kernel.org
+L: bpf@vger.kernel.org
S: Maintained
F: include/linux/skmsg.h
F: net/core/skmsg.c
@@ -16713,6 +16722,7 @@ M: Jesper Dangaard Brouer <hawk@kernel.org>
M: John Fastabend <john.fastabend@gmail.com>
L: netdev@vger.kernel.org
L: xdp-newbies@vger.kernel.org
+L: bpf@vger.kernel.org
S: Supported
F: net/core/xdp.c
F: include/net/xdp.h
@@ -16726,6 +16736,7 @@ XDP SOCKETS (AF_XDP)
M: Björn Töpel <bjorn.topel@intel.com>
M: Magnus Karlsson <magnus.karlsson@intel.com>
L: netdev@vger.kernel.org
+L: bpf@vger.kernel.org
S: Maintained
F: kernel/bpf/xskmap.c
F: net/xdp/
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 485462d3087f..537c90c8eb0a 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1183,29 +1183,22 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb)
}
}
- /* Link-local multicast packets should be passed to the
- * stack on the link they arrive as well as pass them to the
- * bond-master device. These packets are mostly usable when
- * stack receives it with the link on which they arrive
- * (e.g. LLDP) they also must be available on master. Some of
- * the use cases include (but are not limited to): LLDP agents
- * that must be able to operate both on enslaved interfaces as
- * well as on bonds themselves; linux bridges that must be able
- * to process/pass BPDUs from attached bonds when any kind of
- * STP version is enabled on the network.
+ /*
+ * For packets determined by bond_should_deliver_exact_match() call to
+ * be suppressed we want to make an exception for link-local packets.
+ * This is necessary for e.g. LLDP daemons to be able to monitor
+ * inactive slave links without being forced to bind to them
+ * explicitly.
+ *
+ * At the same time, packets that are passed to the bonding master
+ * (including link-local ones) can have their originating interface
+ * determined via PACKET_ORIGDEV socket option.
*/
- if (is_link_local_ether_addr(eth_hdr(skb)->h_dest)) {
- struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
-
- if (nskb) {
- nskb->dev = bond->dev;
- nskb->queue_mapping = 0;
- netif_rx(nskb);
- }
- return RX_HANDLER_PASS;
- }
- if (bond_should_deliver_exact_match(skb, slave, bond))
+ if (bond_should_deliver_exact_match(skb, slave, bond)) {
+ if (is_link_local_ether_addr(eth_hdr(skb)->h_dest))
+ return RX_HANDLER_PASS;
return RX_HANDLER_EXACT;
+ }
skb->dev = bond->dev;
diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c
index bb41becb6609..31ff1e0d1baa 100644
--- a/drivers/net/ethernet/atheros/atlx/atl2.c
+++ b/drivers/net/ethernet/atheros/atlx/atl2.c
@@ -1335,13 +1335,11 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
struct net_device *netdev;
struct atl2_adapter *adapter;
- static int cards_found;
+ static int cards_found = 0;
unsigned long mmio_start;
int mmio_len;
int err;
- cards_found = 0;
-
err = pci_enable_device(pdev);
if (err)
return err;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 8bc7e495b027..d95730c6e0f2 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -3903,7 +3903,7 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
if (len)
break;
/* on first few passes, just barely sleep */
- if (i < DFLT_HWRM_CMD_TIMEOUT)
+ if (i < HWRM_SHORT_TIMEOUT_COUNTER)
usleep_range(HWRM_SHORT_MIN_TIMEOUT,
HWRM_SHORT_MAX_TIMEOUT);
else
@@ -3926,7 +3926,7 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
dma_rmb();
if (*valid)
break;
- udelay(1);
+ usleep_range(1, 5);
}
if (j >= HWRM_VALID_BIT_DELAY_USEC) {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index a451796deefe..2fb653e0048d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -582,7 +582,7 @@ struct nqe_cn {
(HWRM_SHORT_TIMEOUT_COUNTER * HWRM_SHORT_MIN_TIMEOUT + \
((n) - HWRM_SHORT_TIMEOUT_COUNTER) * HWRM_MIN_TIMEOUT))
-#define HWRM_VALID_BIT_DELAY_USEC 20
+#define HWRM_VALID_BIT_DELAY_USEC 150
#define BNXT_HWRM_CHNL_CHIMP 0
#define BNXT_HWRM_CHNL_KONG 1
diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index f4d81765221e..62636c1ed141 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -271,7 +271,7 @@ struct xcast_addr_list {
};
struct nicvf_work {
- struct delayed_work work;
+ struct work_struct work;
u8 mode;
struct xcast_addr_list *mc;
};
@@ -327,7 +327,11 @@ struct nicvf {
struct nicvf_work rx_mode_work;
/* spinlock to protect workqueue arguments from concurrent access */
spinlock_t rx_mode_wq_lock;
-
+ /* workqueue for handling kernel ndo_set_rx_mode() calls */
+ struct workqueue_struct *nicvf_rx_mode_wq;
+ /* mutex to protect VF's mailbox contents from concurrent access */
+ struct mutex rx_mode_mtx;
+ struct delayed_work link_change_work;
/* PTP timestamp */
struct cavium_ptp *ptp_clock;
/* Inbound timestamping is on */
@@ -575,10 +579,8 @@ struct set_ptp {
struct xcast {
u8 msg;
- union {
- u8 mode;
- u64 mac;
- } data;
+ u8 mode;
+ u64 mac:48;
};
/* 128 bit shared memory between PF and each VF */
diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index 6c8dcb65ff03..c90252829ed3 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -57,14 +57,8 @@ struct nicpf {
#define NIC_GET_BGX_FROM_VF_LMAC_MAP(map) ((map >> 4) & 0xF)
#define NIC_GET_LMAC_FROM_VF_LMAC_MAP(map) (map & 0xF)
u8 *vf_lmac_map;
- struct delayed_work dwork;
- struct workqueue_struct *check_link;
- u8 *link;
- u8 *duplex;
- u32 *speed;
u16 cpi_base[MAX_NUM_VFS_SUPPORTED];
u16 rssi_base[MAX_NUM_VFS_SUPPORTED];
- bool mbx_lock[MAX_NUM_VFS_SUPPORTED];
/* MSI-X */
u8 num_vec;
@@ -929,6 +923,35 @@ static void nic_config_timestamp(struct nicpf *nic, int vf, struct set_ptp *ptp)
nic_reg_write(nic, NIC_PF_PKIND_0_15_CFG | (pkind_idx << 3), pkind_val);
}
+/* Get BGX LMAC link status and update corresponding VF
+ * if there is a change, valid only if internal L2 switch
+ * is not present otherwise VF link is always treated as up
+ */
+static void nic_link_status_get(struct nicpf *nic, u8 vf)
+{
+ union nic_mbx mbx = {};
+ struct bgx_link_status link;
+ u8 bgx, lmac;
+
+ mbx.link_status.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE;
+
+ /* Get BGX, LMAC indices for the VF */
+ bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+ lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+
+ /* Get interface link status */
+ bgx_get_lmac_link_state(nic->node, bgx, lmac, &link);
+
+ /* Send a mbox message to VF with current link status */
+ mbx.link_status.link_up = link.link_up;
+ mbx.link_status.duplex = link.duplex;
+ mbx.link_status.speed = link.speed;
+ mbx.link_status.mac_type = link.mac_type;
+
+ /* reply with link status */
+ nic_send_msg_to_vf(nic, vf, &mbx);
+}
+
/* Interrupt handler to handle mailbox messages from VFs */
static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
{
@@ -941,8 +964,6 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
int i;
int ret = 0;
- nic->mbx_lock[vf] = true;
-
mbx_addr = nic_get_mbx_addr(vf);
mbx_data = (u64 *)&mbx;
@@ -957,12 +978,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
switch (mbx.msg.msg) {
case NIC_MBOX_MSG_READY:
nic_mbx_send_ready(nic, vf);
- if (vf < nic->num_vf_en) {
- nic->link[vf] = 0;
- nic->duplex[vf] = 0;
- nic->speed[vf] = 0;
- }
- goto unlock;
+ return;
case NIC_MBOX_MSG_QS_CFG:
reg_addr = NIC_PF_QSET_0_127_CFG |
(mbx.qs.num << NIC_QS_ID_SHIFT);
@@ -1031,7 +1047,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
break;
case NIC_MBOX_MSG_RSS_SIZE:
nic_send_rss_size(nic, vf);
- goto unlock;
+ return;
case NIC_MBOX_MSG_RSS_CFG:
case NIC_MBOX_MSG_RSS_CFG_CONT:
nic_config_rss(nic, &mbx.rss_cfg);
@@ -1039,7 +1055,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
case NIC_MBOX_MSG_CFG_DONE:
/* Last message of VF config msg sequence */
nic_enable_vf(nic, vf, true);
- goto unlock;
+ break;
case NIC_MBOX_MSG_SHUTDOWN:
/* First msg in VF teardown sequence */
if (vf >= nic->num_vf_en)
@@ -1049,19 +1065,19 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
break;
case NIC_MBOX_MSG_ALLOC_SQS:
nic_alloc_sqs(nic, &mbx.sqs_alloc);
- goto unlock;
+ return;
case NIC_MBOX_MSG_NICVF_PTR:
nic->nicvf[vf] = mbx.nicvf.nicvf;
break;
case NIC_MBOX_MSG_PNICVF_PTR:
nic_send_pnicvf(nic, vf);
- goto unlock;
+ return;
case NIC_MBOX_MSG_SNICVF_PTR:
nic_send_snicvf(nic, &mbx.nicvf);
- goto unlock;
+ return;
case NIC_MBOX_MSG_BGX_STATS:
nic_get_bgx_stats(nic, &mbx.bgx_stats);
- goto unlock;
+ return;
case NIC_MBOX_MSG_LOOPBACK:
ret = nic_config_loopback(nic, &mbx.lbk);
break;
@@ -1070,7 +1086,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
break;
case NIC_MBOX_MSG_PFC:
nic_pause_frame(nic, vf, &mbx.pfc);
- goto unlock;
+ return;
case NIC_MBOX_MSG_PTP_CFG:
nic_config_timestamp(nic, vf, &mbx.ptp);
break;
@@ -1094,7 +1110,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
bgx_set_dmac_cam_filter(nic->node, bgx, lmac,
- mbx.xcast.data.mac,
+ mbx.xcast.mac,
vf < NIC_VF_PER_MBX_REG ? vf :
vf - NIC_VF_PER_MBX_REG);
break;
@@ -1106,8 +1122,15 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
}
bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
- bgx_set_xcast_mode(nic->node, bgx, lmac, mbx.xcast.data.mode);
+ bgx_set_xcast_mode(nic->node, bgx, lmac, mbx.xcast.mode);
break;
+ case NIC_MBOX_MSG_BGX_LINK_CHANGE:
+ if (vf >= nic->num_vf_en) {
+ ret = -1; /* NACK */
+ break;
+ }
+ nic_link_status_get(nic, vf);
+ return;
default:
dev_err(&nic->pdev->dev,
"Invalid msg from VF%d, msg 0x%x\n", vf, mbx.msg.msg);
@@ -1121,8 +1144,6 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
mbx.msg.msg, vf);
nic_mbx_send_nack(nic, vf);
}
-unlock:
- nic->mbx_lock[vf] = false;
}
static irqreturn_t nic_mbx_intr_handler(int irq, void *nic_irq)
@@ -1270,52 +1291,6 @@ static int nic_sriov_init(struct pci_dev *pdev, struct nicpf *nic)
return 0;
}
-/* Poll for BGX LMAC link status and update corresponding VF
- * if there is a change, valid only if internal L2 switch
- * is not present otherwise VF link is always treated as up
- */
-static void nic_poll_for_link(struct work_struct *work)
-{
- union nic_mbx mbx = {};
- struct nicpf *nic;
- struct bgx_link_status link;
- u8 vf, bgx, lmac;
-
- nic = container_of(work, struct nicpf, dwork.work);
-
- mbx.link_status.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE;
-
- for (vf = 0; vf < nic->num_vf_en; vf++) {
- /* Poll only if VF is UP */
- if (!nic->vf_enabled[vf])
- continue;
-
- /* Get BGX, LMAC indices for the VF */
- bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
- lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
- /* Get interface link status */
- bgx_get_lmac_link_state(nic->node, bgx, lmac, &link);
-
- /* Inform VF only if link status changed */
- if (nic->link[vf] == link.link_up)
- continue;
-
- if (!nic->mbx_lock[vf]) {
- nic->link[vf] = link.link_up;
- nic->duplex[vf] = link.duplex;
- nic->speed[vf] = link.speed;
-
- /* Send a mbox message to VF with current link status */
- mbx.link_status.link_up = link.link_up;
- mbx.link_status.duplex = link.duplex;
- mbx.link_status.speed = link.speed;
- mbx.link_status.mac_type = link.mac_type;
- nic_send_msg_to_vf(nic, vf, &mbx);
- }
- }
- queue_delayed_work(nic->check_link, &nic->dwork, HZ * 2);
-}
-
static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
struct device *dev = &pdev->dev;
@@ -1384,18 +1359,6 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (!nic->vf_lmac_map)
goto err_release_regions;
- nic->link = devm_kmalloc_array(dev, max_lmac, sizeof(u8), GFP_KERNEL);
- if (!nic->link)
- goto err_release_regions;
-
- nic->duplex = devm_kmalloc_array(dev, max_lmac, sizeof(u8), GFP_KERNEL);
- if (!nic->duplex)
- goto err_release_regions;
-
- nic->speed = devm_kmalloc_array(dev, max_lmac, sizeof(u32), GFP_KERNEL);
- if (!nic->speed)
- goto err_release_regions;
-
/* Initialize hardware */
nic_init_hw(nic);
@@ -1411,22 +1374,8 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (err)
goto err_unregister_interrupts;
- /* Register a physical link status poll fn() */
- nic->check_link = alloc_workqueue("check_link_status",
- WQ_UNBOUND | WQ_MEM_RECLAIM, 1);
- if (!nic->check_link) {
- err = -ENOMEM;
- goto err_disable_sriov;
- }
-
- INIT_DELAYED_WORK(&nic->dwork, nic_poll_for_link);
- queue_delayed_work(nic->check_link, &nic->dwork, 0);
-
return 0;
-err_disable_sriov:
- if (nic->flags & NIC_SRIOV_ENABLED)
- pci_disable_sriov(pdev);
err_unregister_interrupts:
nic_unregister_interrupts(nic);
err_release_regions:
@@ -1447,12 +1396,6 @@ static void nic_remove(struct pci_dev *pdev)
if (nic->flags & NIC_SRIOV_ENABLED)
pci_disable_sriov(pdev);
- if (nic->check_link) {
- /* Destroy work Queue */
- cancel_delayed_work_sync(&nic->dwork);
- destroy_workqueue(nic->check_link);
- }
-
nic_unregister_interrupts(nic);
pci_release_regions(pdev);
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 88f8a8fa93cd..503cfadff4ac 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -68,9 +68,6 @@ module_param(cpi_alg, int, 0444);
MODULE_PARM_DESC(cpi_alg,
"PFC algorithm (0=none, 1=VLAN, 2=VLAN16, 3=IP Diffserv)");
-/* workqueue for handling kernel ndo_set_rx_mode() calls */
-static struct workqueue_struct *nicvf_rx_mode_wq;
-
static inline u8 nicvf_netdev_qidx(struct nicvf *nic, u8 qidx)
{
if (nic->sqs_mode)
@@ -127,6 +124,9 @@ int nicvf_send_msg_to_pf(struct nicvf *nic, union nic_mbx *mbx)
{
int timeout = NIC_MBOX_MSG_TIMEOUT;
int sleep = 10;
+ int ret = 0;
+
+ mutex_lock(&nic->rx_mode_mtx);
nic->pf_acked = false;
nic->pf_nacked = false;
@@ -139,7 +139,8 @@ int nicvf_send_msg_to_pf(struct nicvf *nic, union nic_mbx *mbx)
netdev_err(nic->netdev,
"PF NACK to mbox msg 0x%02x from VF%d\n",
(mbx->msg.msg & 0xFF), nic->vf_id);
- return -EINVAL;
+ ret = -EINVAL;
+ break;
}
msleep(sleep);
if (nic->pf_acked)
@@ -149,10 +150,12 @@ int nicvf_send_msg_to_pf(struct nicvf *nic, union nic_mbx *mbx)
netdev_err(nic->netdev,
"PF didn't ACK to mbox msg 0x%02x from VF%d\n",
(mbx->msg.msg & 0xFF), nic->vf_id);
- return -EBUSY;
+ ret = -EBUSY;
+ break;
}
}
- return 0;
+ mutex_unlock(&nic->rx_mode_mtx);
+ return ret;
}
/* Checks if VF is able to comminicate with PF
@@ -172,6 +175,17 @@ static int nicvf_check_pf_ready(struct nicvf *nic)
return 1;
}
+static void nicvf_send_cfg_done(struct nicvf *nic)
+{
+ union nic_mbx mbx = {};
+
+ mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE;
+ if (nicvf_send_msg_to_pf(nic, &mbx)) {
+ netdev_err(nic->netdev,
+ "PF didn't respond to CFG DONE msg\n");
+ }
+}
+
static void nicvf_read_bgx_stats(struct nicvf *nic, struct bgx_stats_msg *bgx)
{
if (bgx->rx)
@@ -228,21 +242,24 @@ static void nicvf_handle_mbx_intr(struct nicvf *nic)
break;
case NIC_MBOX_MSG_BGX_LINK_CHANGE:
nic->pf_acked = true;
- nic->link_up = mbx.link_status.link_up;
- nic->duplex = mbx.link_status.duplex;
- nic->speed = mbx.link_status.speed;
- nic->mac_type = mbx.link_status.mac_type;
- if (nic->link_up) {
- netdev_info(nic->netdev, "Link is Up %d Mbps %s duplex\n",
- nic->speed,
- nic->duplex == DUPLEX_FULL ?
- "Full" : "Half");
- netif_carrier_on(nic->netdev);
- netif_tx_start_all_queues(nic->netdev);
- } else {
- netdev_info(nic->netdev, "Link is Down\n");
- netif_carrier_off(nic->netdev);
- netif_tx_stop_all_queues(nic->netdev);
+ if (nic->link_up != mbx.link_status.link_up) {
+ nic->link_up = mbx.link_status.link_up;
+ nic->duplex = mbx.link_status.duplex;
+ nic->speed = mbx.link_status.speed;
+ nic->mac_type = mbx.link_status.mac_type;
+ if (nic->link_up) {
+ netdev_info(nic->netdev,
+ "Link is Up %d Mbps %s duplex\n",
+ nic->speed,
+ nic->duplex == DUPLEX_FULL ?
+ "Full" : "Half");
+ netif_carrier_on(nic->netdev);
+ netif_tx_start_all_queues(nic->netdev);
+ } else {
+ netdev_info(nic->netdev, "Link is Down\n");
+ netif_carrier_off(nic->netdev);
+ netif_tx_stop_all_queues(nic->netdev);
+ }
}
break;
case NIC_MBOX_MSG_ALLOC_SQS:
@@ -1311,6 +1328,11 @@ int nicvf_stop(struct net_device *netdev)
struct nicvf_cq_poll *cq_poll = NULL;
union nic_mbx mbx = {};
+ cancel_delayed_work_sync(&nic->link_change_work);
+
+ /* wait till all queued set_rx_mode tasks completes */
+ drain_workqueue(nic->nicvf_rx_mode_wq);
+
mbx.msg.msg = NIC_MBOX_MSG_SHUTDOWN;
nicvf_send_msg_to_pf(nic, &mbx);
@@ -1410,13 +1432,27 @@ static int nicvf_update_hw_max_frs(struct nicvf *nic, int mtu)
return nicvf_send_msg_to_pf(nic, &mbx);
}
+static void nicvf_link_status_check_task(struct work_struct *work_arg)
+{
+ struct nicvf *nic = container_of(work_arg,
+ struct nicvf,
+ link_change_work.work);
+ union nic_mbx mbx = {};
+ mbx.msg.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE;
+ nicvf_send_msg_to_pf(nic, &mbx);
+ queue_delayed_work(nic->nicvf_rx_mode_wq,
+ &nic->link_change_work, 2 * HZ);
+}
+
int nicvf_open(struct net_device *netdev)
{
int cpu, err, qidx;
struct nicvf *nic = netdev_priv(netdev);
struct queue_set *qs = nic->qs;
struct nicvf_cq_poll *cq_poll = NULL;
- union nic_mbx mbx = {};
+
+ /* wait till all queued set_rx_mode tasks completes if any */
+ drain_workqueue(nic->nicvf_rx_mode_wq);
netif_carrier_off(netdev);
@@ -1512,8 +1548,12 @@ int nicvf_open(struct net_device *netdev)
nicvf_enable_intr(nic, NICVF_INTR_RBDR, qidx);
/* Send VF config done msg to PF */
- mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE;
- nicvf_write_to_mbx(nic, &mbx);
+ nicvf_send_cfg_done(nic);
+
+ INIT_DELAYED_WORK(&nic->link_change_work,
+ nicvf_link_status_check_task);
+ queue_delayed_work(nic->nicvf_rx_mode_wq,
+ &nic->link_change_work, 0);
return 0;
cleanup:
@@ -1941,15 +1981,17 @@ static void __nicvf_set_rx_mode_task(u8 mode, struct xcast_addr_list *mc_addrs,
/* flush DMAC filters and reset RX mode */
mbx.xcast.msg = NIC_MBOX_MSG_RESET_XCAST;
- nicvf_send_msg_to_pf(nic, &mbx);
+ if (nicvf_send_msg_to_pf(nic, &mbx) < 0)
+ goto free_mc;
if (mode & BGX_XCAST_MCAST_FILTER) {
/* once enabling filtering, we need to signal to PF to add
* its' own LMAC to the filter to accept packets for it.
*/
mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST;
- mbx.xcast.data.mac = 0;
- nicvf_send_msg_to_pf(nic, &mbx);
+ mbx.xcast.mac = 0;
+ if (nicvf_send_msg_to_pf(nic, &mbx) < 0)
+ goto free_mc;
}
/* check if we have any specific MACs to be added to PF DMAC filter */
@@ -1957,23 +1999,25 @@ static void __nicvf_set_rx_mode_task(u8 mode, struct xcast_addr_list *mc_addrs,
/* now go through kernel list of MACs and add them one by one */
for (idx = 0; idx < mc_addrs->count; idx++) {
mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST;
- mbx.xcast.data.mac = mc_addrs->mc[idx];
- nicvf_send_msg_to_pf(nic, &mbx);
+ mbx.xcast.mac = mc_addrs->mc[idx];
+ if (nicvf_send_msg_to_pf(nic, &mbx) < 0)
+ goto free_mc;
}
- kfree(mc_addrs);
}
/* and finally set rx mode for PF accordingly */
mbx.xcast.msg = NIC_MBOX_MSG_SET_XCAST;
- mbx.xcast.data.mode = mode;
+ mbx.xcast.mode = mode;
nicvf_send_msg_to_pf(nic, &mbx);
+free_mc:
+ kfree(mc_addrs);
}
static void nicvf_set_rx_mode_task(struct work_struct *work_arg)
{
struct nicvf_work *vf_work = container_of(work_arg, struct nicvf_work,
- work.work);
+ work);
struct nicvf *nic = container_of(vf_work, struct nicvf, rx_mode_work);
u8 mode;
struct xcast_addr_list *mc;
@@ -2030,7 +2074,7 @@ static void nicvf_set_rx_mode(struct net_device *netdev)
kfree(nic->rx_mode_work.mc);
nic->rx_mode_work.mc = mc_list;
nic->rx_mode_work.mode = mode;
- queue_delayed_work(nicvf_rx_mode_wq, &nic->rx_mode_work.work, 0);
+ queue_work(nic->nicvf_rx_mode_wq, &nic->rx_mode_work.work);
spin_unlock(&nic->rx_mode_wq_lock);
}
@@ -2187,8 +2231,12 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
INIT_WORK(&nic->reset_task, nicvf_reset_task);
- INIT_DELAYED_WORK(&nic->rx_mode_work.work, nicvf_set_rx_mode_task);
+ nic->nicvf_rx_mode_wq = alloc_ordered_workqueue("nicvf_rx_mode_wq_VF%d",
+ WQ_MEM_RECLAIM,
+ nic->vf_id);
+ INIT_WORK(&nic->rx_mode_work.work, nicvf_set_rx_mode_task);
spin_lock_init(&nic->rx_mode_wq_lock);
+ mutex_init(&nic->rx_mode_mtx);
err = register_netdev(netdev);
if (err) {
@@ -2228,13 +2276,15 @@ static void nicvf_remove(struct pci_dev *pdev)
nic = netdev_priv(netdev);
pnetdev = nic->pnicvf->netdev;
- cancel_delayed_work_sync(&nic->rx_mode_work.work);
-
/* Check if this Qset is assigned to different VF.
* If yes, clean primary and all secondary Qsets.
*/
if (pnetdev && (pnetdev->reg_state == NETREG_REGISTERED))
unregister_netdev(pnetdev);
+ if (nic->nicvf_rx_mode_wq) {
+ destroy_workqueue(nic->nicvf_rx_mode_wq);
+ nic->nicvf_rx_mode_wq = NULL;
+ }
nicvf_unregister_interrupts(nic);
pci_set_drvdata(pdev, NULL);
if (nic->drv_stats)
@@ -2261,17 +2311,11 @@ static struct pci_driver nicvf_driver = {
static int __init nicvf_init_module(void)
{
pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION);
- nicvf_rx_mode_wq = alloc_ordered_workqueue("nicvf_generic",
- WQ_MEM_RECLAIM);
return pci_register_driver(&nicvf_driver);
}
static void __exit nicvf_cleanup_module(void)
{
- if (nicvf_rx_mode_wq) {
- destroy_workqueue(nicvf_rx_mode_wq);
- nicvf_rx_mode_wq = NULL;
- }
pci_unregister_driver(&nicvf_driver);
}
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index e337da6ba2a4..673c57b8023f 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -1217,7 +1217,7 @@ static void bgx_init_hw(struct bgx *bgx)
/* Disable MAC steering (NCSI traffic) */
for (i = 0; i < RX_TRAFFIC_STEER_RULE_COUNT; i++)
- bgx_reg_write(bgx, 0, BGX_CMR_RX_STREERING + (i * 8), 0x00);
+ bgx_reg_write(bgx, 0, BGX_CMR_RX_STEERING + (i * 8), 0x00);
}
static u8 bgx_get_lane2sds_cfg(struct bgx *bgx, struct lmac *lmac)
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index cbdd20b9ee6f..5cbc54e9eb19 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -60,7 +60,7 @@
#define RX_DMACX_CAM_EN BIT_ULL(48)
#define RX_DMACX_CAM_LMACID(x) (((u64)x) << 49)
#define RX_DMAC_COUNT 32
-#define BGX_CMR_RX_STREERING 0x300
+#define BGX_CMR_RX_STEERING 0x300
#define RX_TRAFFIC_STEER_RULE_COUNT 8
#define BGX_CMR_CHAN_MSK_AND 0x450
#define BGX_CMR_BIST_STATUS 0x460
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index f52e2c46e6a7..e4ff531db14a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -3289,8 +3289,11 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
i40e_alloc_rx_buffers_zc(ring, I40E_DESC_UNUSED(ring)) :
!i40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring));
if (!ok) {
+ /* Log this in case the user has forgotten to give the kernel
+ * any buffers, even later in the application.
+ */
dev_info(&vsi->back->pdev->dev,
- "Failed allocate some buffers on %sRx ring %d (pf_q %d)\n",
+ "Failed to allocate some buffers on %sRx ring %d (pf_q %d)\n",
ring->xsk_umem ? "UMEM enabled " : "",
ring->queue_index, pf_q);
}
@@ -6725,8 +6728,13 @@ void i40e_down(struct i40e_vsi *vsi)
for (i = 0; i < vsi->num_queue_pairs; i++) {
i40e_clean_tx_ring(vsi->tx_rings[i]);
- if (i40e_enabled_xdp_vsi(vsi))
+ if (i40e_enabled_xdp_vsi(vsi)) {
+ /* Make sure that in-progress ndo_xdp_xmit
+ * calls are completed.
+ */
+ synchronize_rcu();
i40e_clean_tx_ring(vsi->xdp_rings[i]);
+ }
i40e_clean_rx_ring(vsi->rx_rings[i]);
}
@@ -11895,6 +11903,14 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi,
if (old_prog)
bpf_prog_put(old_prog);
+ /* Kick start the NAPI context if there is an AF_XDP socket open
+ * on that queue id. This so that receiving will start.
+ */
+ if (need_reset && prog)
+ for (i = 0; i < vsi->num_queue_pairs; i++)
+ if (vsi->xdp_rings[i]->xsk_umem)
+ (void)i40e_xsk_async_xmit(vsi->netdev, i);
+
return 0;
}
@@ -11955,8 +11971,13 @@ static void i40e_queue_pair_reset_stats(struct i40e_vsi *vsi, int queue_pair)
static void i40e_queue_pair_clean_rings(struct i40e_vsi *vsi, int queue_pair)
{
i40e_clean_tx_ring(vsi->tx_rings[queue_pair]);
- if (i40e_enabled_xdp_vsi(vsi))
+ if (i40e_enabled_xdp_vsi(vsi)) {
+ /* Make sure that in-progress ndo_xdp_xmit calls are
+ * completed.
+ */
+ synchronize_rcu();
i40e_clean_tx_ring(vsi->xdp_rings[queue_pair]);
+ }
i40e_clean_rx_ring(vsi->rx_rings[queue_pair]);
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index a7e14e98889f..6c97667d20ef 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -3709,6 +3709,7 @@ int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
struct i40e_netdev_priv *np = netdev_priv(dev);
unsigned int queue_index = smp_processor_id();
struct i40e_vsi *vsi = np->vsi;
+ struct i40e_pf *pf = vsi->back;
struct i40e_ring *xdp_ring;
int drops = 0;
int i;
@@ -3716,7 +3717,8 @@ int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
if (test_bit(__I40E_VSI_DOWN, vsi->state))
return -ENETDOWN;
- if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
+ if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs ||
+ test_bit(__I40E_CONFIG_BUSY, pf->state))
return -ENXIO;
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 870cf654e436..3827f16e6923 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -183,6 +183,11 @@ static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem,
err = i40e_queue_pair_enable(vsi, qid);
if (err)
return err;
+
+ /* Kick start the NAPI context so that receiving will start */
+ err = i40e_xsk_async_xmit(vsi->netdev, qid);
+ if (err)
+ return err;
}
return 0;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index daff8183534b..cb35d8202572 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -3953,8 +3953,11 @@ static void ixgbe_setup_mrqc(struct ixgbe_adapter *adapter)
else
mrqc = IXGBE_MRQC_VMDQRSS64EN;
- /* Enable L3/L4 for Tx Switched packets */
- mrqc |= IXGBE_MRQC_L3L4TXSWEN;
+ /* Enable L3/L4 for Tx Switched packets only for X550,
+ * older devices do not support this feature
+ */
+ if (hw->mac.type >= ixgbe_mac_X550)
+ mrqc |= IXGBE_MRQC_L3L4TXSWEN;
} else {
if (tcs > 4)
mrqc = IXGBE_MRQC_RTRSS8TCEN;
@@ -10225,6 +10228,7 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
struct ixgbe_adapter *adapter = netdev_priv(dev);
struct bpf_prog *old_prog;
+ bool need_reset;
if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
return -EINVAL;
@@ -10247,9 +10251,10 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
return -ENOMEM;
old_prog = xchg(&adapter->xdp_prog, prog);
+ need_reset = (!!prog != !!old_prog);
/* If transitioning XDP modes reconfigure rings */
- if (!!prog != !!old_prog) {
+ if (need_reset) {
int err = ixgbe_setup_tc(dev, adapter->hw_tcs);
if (err) {
@@ -10265,6 +10270,14 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
if (old_prog)
bpf_prog_put(old_prog);
+ /* Kick start the NAPI context if there is an AF_XDP socket open
+ * on that queue id. This so that receiving will start.
+ */
+ if (need_reset && prog)
+ for (i = 0; i < adapter->num_rx_queues; i++)
+ if (adapter->xdp_ring[i]->xsk_umem)
+ (void)ixgbe_xsk_async_xmit(adapter->netdev, i);
+
return 0;
}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index 65c3e2c979d4..36a8879536a4 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -144,11 +144,19 @@ static int ixgbe_xsk_umem_enable(struct ixgbe_adapter *adapter,
ixgbe_txrx_ring_disable(adapter, qid);
err = ixgbe_add_xsk_umem(adapter, umem, qid);
+ if (err)
+ return err;
- if (if_running)
+ if (if_running) {
ixgbe_txrx_ring_enable(adapter, qid);
- return err;
+ /* Kick start the NAPI context so that receiving will start */
+ err = ixgbe_xsk_async_xmit(adapter->netdev, qid);
+ if (err)
+ return err;
+ }
+
+ return 0;
}
static int ixgbe_xsk_umem_disable(struct ixgbe_adapter *adapter, u16 qid)
@@ -634,7 +642,8 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget)
dma_addr_t dma;
while (budget-- > 0) {
- if (unlikely(!ixgbe_desc_unused(xdp_ring))) {
+ if (unlikely(!ixgbe_desc_unused(xdp_ring)) ||
+ !netif_carrier_ok(xdp_ring->netdev)) {
work_done = false;
break;
}
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 9d4568eb2297..8433fb9c3eee 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -2146,7 +2146,7 @@ err_drop_frame:
if (unlikely(!skb))
goto err_drop_frame_ret_pool;
- dma_sync_single_range_for_cpu(dev->dev.parent,
+ dma_sync_single_range_for_cpu(&pp->bm_priv->pdev->dev,
rx_desc->buf_phys_addr,
MVNETA_MH_SIZE + NET_SKB_PAD,
rx_bytes,
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index e23ca90289f7..0a868c829b90 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -1291,15 +1291,10 @@ wrp_alu64_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
static int
wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
- enum alu_op alu_op, bool skip)
+ enum alu_op alu_op)
{
const struct bpf_insn *insn = &meta->insn;
- if (skip) {
- meta->skip = true;
- return 0;
- }
-
wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm);
wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
@@ -2309,7 +2304,7 @@ static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
- return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR, !~meta->insn.imm);
+ return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR);
}
static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
@@ -2319,7 +2314,7 @@ static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
static int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
- return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm);
+ return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND);
}
static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
@@ -2329,7 +2324,7 @@ static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
static int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
- return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm);
+ return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR);
}
static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
@@ -2339,7 +2334,7 @@ static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
static int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
- return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD, !meta->insn.imm);
+ return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD);
}
static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
@@ -2349,7 +2344,7 @@ static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
- return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB, !meta->insn.imm);
+ return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB);
}
static int mul_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 7cdac77d0c68..07e41c42bcf5 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -499,6 +499,8 @@ static int ipvlan_nl_changelink(struct net_device *dev,
if (!data)
return 0;
+ if (!ns_capable(dev_net(ipvlan->phy_dev)->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
if (data[IFLA_IPVLAN_MODE]) {
u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
@@ -601,6 +603,8 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev,
struct ipvl_dev *tmp = netdev_priv(phy_dev);
phy_dev = tmp->phy_dev;
+ if (!ns_capable(dev_net(phy_dev)->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
} else if (!netif_is_ipvlan_port(phy_dev)) {
/* Exit early if the underlying link is invalid or busy */
if (phy_dev->type != ARPHRD_ETHER ||
diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index 82ab6ed3b74e..6bac602094bd 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -26,6 +26,8 @@
#include <linux/marvell_phy.h>
#include <linux/phy.h>
+#define MDIO_AN_10GBT_CTRL_ADV_NBT_MASK 0x01e0
+
enum {
MV_PCS_BASE_T = 0x0000,
MV_PCS_BASE_R = 0x1000,
@@ -386,8 +388,10 @@ static int mv3310_config_aneg(struct phy_device *phydev)
else
reg = 0;
+ /* Make sure we clear unsupported 2.5G/5G advertising */
ret = mv3310_modify(phydev, MDIO_MMD_AN, MDIO_AN_10GBT_CTRL,
- MDIO_AN_10GBT_CTRL_ADV10G, reg);
+ MDIO_AN_10GBT_CTRL_ADV10G |
+ MDIO_AN_10GBT_CTRL_ADV_NBT_MASK, reg);
if (ret < 0)
return ret;
if (ret > 0)
diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 66b9cfe692fc..7368616286ae 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -379,7 +379,6 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner)
err = device_register(&bus->dev);
if (err) {
pr_err("mii_bus %s failed to register\n", bus->id);
- put_device(&bus->dev);
return -EINVAL;
}
diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c
index c6010fb1aa0f..cb4a23041a94 100644
--- a/drivers/net/phy/realtek.c
+++ b/drivers/net/phy/realtek.c
@@ -282,6 +282,13 @@ static struct phy_driver realtek_drvs[] = {
.name = "RTL8366RB Gigabit Ethernet",
.features = PHY_GBIT_FEATURES,
.config_init = &rtl8366rb_config_init,
+ /* These interrupts are handled by the irq controller
+ * embedded inside the RTL8366RB, they get unmasked when the
+ * irq is requested and ACKed by reading the status register,
+ * which is done by the irqchip code.
+ */
+ .ack_interrupt = genphy_no_ack_interrupt,
+ .config_intr = genphy_no_config_intr,
.suspend = genphy_suspend,
.resume = genphy_resume,
},
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 958f1cf67282..6ce3f666d142 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1256,7 +1256,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev,
list_add_tail_rcu(&port->list, &team->port_list);
team_port_enable(team, port);
__team_compute_features(team);
- __team_port_change_port_added(port, !!netif_carrier_ok(port_dev));
+ __team_port_change_port_added(port, !!netif_oper_up(port_dev));
__team_options_change_check(team);
netdev_info(dev, "Port device %s added\n", portname);
@@ -2915,7 +2915,7 @@ static int team_device_event(struct notifier_block *unused,
switch (event) {
case NETDEV_UP:
- if (netif_carrier_ok(dev))
+ if (netif_oper_up(dev))
team_port_change_check(port, true);
break;
case NETDEV_DOWN:
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index ada6baf8847a..86c8c64fbb0f 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -1179,7 +1179,7 @@ static int vendor_mac_passthru_addr_read(struct r8152 *tp, struct sockaddr *sa)
} else {
/* test for RTL8153-BND and RTL8153-BD */
ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_MISC_1);
- if ((ocp_data & BND_MASK) == 0 && (ocp_data & BD_MASK)) {
+ if ((ocp_data & BND_MASK) == 0 && (ocp_data & BD_MASK) == 0) {
netif_dbg(tp, probe, tp->netdev,
"Invalid variant for MAC pass through\n");
return -ENODEV;
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 95909e262ba4..7c1430ed0244 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -1273,6 +1273,9 @@ static void vrf_setup(struct net_device *dev)
/* default to no qdisc; user can add if desired */
dev->priv_flags |= IFF_NO_QUEUE;
+
+ dev->min_mtu = 0;
+ dev->max_mtu = 0;
}
static int vrf_validate(struct nlattr *tb[], struct nlattr *data[],
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 320edcac4699..6359053bd0c7 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -3554,7 +3554,7 @@ static int hwsim_get_radio_nl(struct sk_buff *msg, struct genl_info *info)
goto out_err;
}
- genlmsg_reply(skb, info);
+ res = genlmsg_reply(skb, info);
break;
}
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 127fcc9c3778..333b56d8f746 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -992,6 +992,14 @@ static inline int genphy_no_soft_reset(struct phy_device *phydev)
{
return 0;
}
+static inline int genphy_no_ack_interrupt(struct phy_device *phydev)
+{
+ return 0;
+}
+static inline int genphy_no_config_intr(struct phy_device *phydev)
+{
+ return 0;
+}
int genphy_read_mmd_unsupported(struct phy_device *phdev, int devad,
u16 regnum);
int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum,
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 71f2394abbf7..e0348cb0a1dd 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -61,10 +61,20 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
/* gso packets without NEEDS_CSUM do not set transport_offset.
* probe and drop if does not match one of the above types.
*/
- if (gso_type) {
+ if (gso_type && skb->network_header) {
+ if (!skb->protocol)
+ virtio_net_hdr_set_proto(skb, hdr);
+retry:
skb_probe_transport_header(skb, -1);
- if (!skb_transport_header_was_set(skb))
+ if (!skb_transport_header_was_set(skb)) {
+ /* UFO does not specify ipv4 or 6: try both */
+ if (gso_type & SKB_GSO_UDP &&
+ skb->protocol == htons(ETH_P_IP)) {
+ skb->protocol = htons(ETH_P_IPV6);
+ goto retry;
+ }
return -EINVAL;
+ }
}
}
diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h
index b669fe6dbc3b..98f31c7ea23d 100644
--- a/include/net/phonet/pep.h
+++ b/include/net/phonet/pep.h
@@ -63,10 +63,11 @@ struct pnpipehdr {
u8 state_after_reset; /* reset request */
u8 error_code; /* any response */
u8 pep_type; /* status indication */
- u8 data[1];
+ u8 data0; /* anything else */
};
+ u8 data[];
};
-#define other_pep_type data[1]
+#define other_pep_type data[0]
static inline struct pnpipehdr *pnp_hdr(struct sk_buff *skb)
{
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 7298a53b9702..85386becbaea 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -853,7 +853,7 @@ static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols)
xfrm_pol_put(pols[i]);
}
-void __xfrm_state_destroy(struct xfrm_state *);
+void __xfrm_state_destroy(struct xfrm_state *, bool);
static inline void __xfrm_state_put(struct xfrm_state *x)
{
@@ -863,7 +863,13 @@ static inline void __xfrm_state_put(struct xfrm_state *x)
static inline void xfrm_state_put(struct xfrm_state *x)
{
if (refcount_dec_and_test(&x->refcnt))
- __xfrm_state_destroy(x);
+ __xfrm_state_destroy(x, false);
+}
+
+static inline void xfrm_state_put_sync(struct xfrm_state *x)
+{
+ if (refcount_dec_and_test(&x->refcnt))
+ __xfrm_state_destroy(x, true);
}
static inline void xfrm_state_hold(struct xfrm_state *x)
@@ -1590,7 +1596,7 @@ struct xfrmk_spdinfo {
struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq);
int xfrm_state_delete(struct xfrm_state *x);
-int xfrm_state_flush(struct net *net, u8 proto, bool task_valid);
+int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync);
int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid);
void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si);
void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si);
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index abf1002080df..93a5cbbde421 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -471,6 +471,7 @@ static int trie_delete_elem(struct bpf_map *map, void *_key)
}
if (!node || node->prefixlen != key->prefixlen ||
+ node->prefixlen != matchlen ||
(node->flags & LPM_TREE_NODE_FLAG_IM)) {
ret = -ENOENT;
goto out;
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index fa2644d276ef..e31e1b20f7f4 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -13,27 +13,13 @@
#include <net/sock.h>
#include <net/tcp.h>
-static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
- struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
-{
- u32 ret;
-
- preempt_disable();
- rcu_read_lock();
- bpf_cgroup_storage_set(storage);
- ret = BPF_PROG_RUN(prog, ctx);
- rcu_read_unlock();
- preempt_enable();
-
- return ret;
-}
-
-static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *ret,
- u32 *time)
+static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
+ u32 *retval, u32 *time)
{
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { 0 };
enum bpf_cgroup_storage_type stype;
u64 time_start, time_spent = 0;
+ int ret = 0;
u32 i;
for_each_cgroup_storage_type(stype) {
@@ -48,25 +34,42 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *ret,
if (!repeat)
repeat = 1;
+
+ rcu_read_lock();
+ preempt_disable();
time_start = ktime_get_ns();
for (i = 0; i < repeat; i++) {
- *ret = bpf_test_run_one(prog, ctx, storage);
+ bpf_cgroup_storage_set(storage);
+ *retval = BPF_PROG_RUN(prog, ctx);
+
+ if (signal_pending(current)) {
+ ret = -EINTR;
+ break;
+ }
+
if (need_resched()) {
- if (signal_pending(current))
- break;
time_spent += ktime_get_ns() - time_start;
+ preempt_enable();
+ rcu_read_unlock();
+
cond_resched();
+
+ rcu_read_lock();
+ preempt_disable();
time_start = ktime_get_ns();
}
}
time_spent += ktime_get_ns() - time_start;
+ preempt_enable();
+ rcu_read_unlock();
+
do_div(time_spent, repeat);
*time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
for_each_cgroup_storage_type(stype)
bpf_cgroup_storage_free(storage[stype]);
- return 0;
+ return ret;
}
static int bpf_test_finish(const union bpf_attr *kattr,
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 3aeff0895669..ac92b2eb32b1 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1204,14 +1204,7 @@ static void br_multicast_query_received(struct net_bridge *br,
return;
br_multicast_update_query_timer(br, query, max_delay);
-
- /* Based on RFC4541, section 2.1.1 IGMP Forwarding Rules,
- * the arrival port for IGMP Queries where the source address
- * is 0.0.0.0 should not be added to router port list.
- */
- if ((saddr->proto == htons(ETH_P_IP) && saddr->u.ip4) ||
- saddr->proto == htons(ETH_P_IPV6))
- br_multicast_mark_router(br, port);
+ br_multicast_mark_router(br, port);
}
static void br_ip4_multicast_query(struct net_bridge *br,
diff --git a/net/compat.c b/net/compat.c
index 959d1c51826d..3d348198004f 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -388,8 +388,12 @@ static int __compat_sys_setsockopt(int fd, int level, int optname,
char __user *optval, unsigned int optlen)
{
int err;
- struct socket *sock = sockfd_lookup(fd, &err);
+ struct socket *sock;
+
+ if (optlen > INT_MAX)
+ return -EINVAL;
+ sock = sockfd_lookup(fd, &err);
if (sock) {
err = security_socket_setsockopt(sock, level, optname);
if (err) {
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 2d7e01b23572..2a2a878b5ce3 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -69,7 +69,6 @@ static void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy)
{
- u8 stp_state = dp->bridge_dev ? BR_STATE_BLOCKING : BR_STATE_FORWARDING;
struct dsa_switch *ds = dp->ds;
int port = dp->index;
int err;
@@ -80,7 +79,8 @@ int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy)
return err;
}
- dsa_port_set_state_now(dp, stp_state);
+ if (!dp->bridge_dev)
+ dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
return 0;
}
@@ -90,7 +90,8 @@ void dsa_port_disable(struct dsa_port *dp, struct phy_device *phy)
struct dsa_switch *ds = dp->ds;
int port = dp->index;
- dsa_port_set_state_now(dp, BR_STATE_DISABLED);
+ if (!dp->bridge_dev)
+ dsa_port_set_state_now(dp, BR_STATE_DISABLED);
if (ds->ops->port_disable)
ds->ops->port_disable(ds, port, phy);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 5459f41fc26f..10e809b296ec 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -328,7 +328,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
skb->len += tailen;
skb->data_len += tailen;
skb->truesize += tailen;
- if (sk)
+ if (sk && sk_fullsock(sk))
refcount_add(tailen, &sk->sk_wmem_alloc);
goto out;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 3978f807fa8b..6ae89f2b541b 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1457,9 +1457,23 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
struct ip_tunnel_parm *p = &t->parms;
__be16 o_flags = p->o_flags;
- if ((t->erspan_ver == 1 || t->erspan_ver == 2) &&
- !t->collect_md)
- o_flags |= TUNNEL_KEY;
+ if (t->erspan_ver == 1 || t->erspan_ver == 2) {
+ if (!t->collect_md)
+ o_flags |= TUNNEL_KEY;
+
+ if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
+ goto nla_put_failure;
+
+ if (t->erspan_ver == 1) {
+ if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
+ goto nla_put_failure;
+ } else {
+ if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
+ goto nla_put_failure;
+ if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
+ goto nla_put_failure;
+ }
+ }
if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
nla_put_be16(skb, IFLA_GRE_IFLAGS,
@@ -1495,19 +1509,6 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
goto nla_put_failure;
}
- if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
- goto nla_put_failure;
-
- if (t->erspan_ver == 1) {
- if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
- goto nla_put_failure;
- } else if (t->erspan_ver == 2) {
- if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
- goto nla_put_failure;
- if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
- goto nla_put_failure;
- }
-
return 0;
nla_put_failure:
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 730bc44dbad9..ccc78f3a4b60 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2347,6 +2347,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
/* "skb_mstamp_ns" is used as a start point for the retransmit timer */
skb->skb_mstamp_ns = tp->tcp_wstamp_ns = tp->tcp_clock_cache;
list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
+ tcp_init_tso_segs(skb, mss_now);
goto repair; /* Skip network transmission */
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5c3cd5d84a6f..372fdc5381a9 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -562,10 +562,12 @@ static int __udp4_lib_err_encap_no_sk(struct sk_buff *skb, u32 info)
for (i = 0; i < MAX_IPTUN_ENCAP_OPS; i++) {
int (*handler)(struct sk_buff *skb, u32 info);
+ const struct ip_tunnel_encap_ops *encap;
- if (!iptun_encaps[i])
+ encap = rcu_dereference(iptun_encaps[i]);
+ if (!encap)
continue;
- handler = rcu_dereference(iptun_encaps[i]->err_handler);
+ handler = encap->err_handler;
if (handler && !handler(skb, info))
return 0;
}
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 5afe9f83374d..239d4a65ad6e 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -296,7 +296,7 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
skb->len += tailen;
skb->data_len += tailen;
skb->truesize += tailen;
- if (sk)
+ if (sk && sk_fullsock(sk))
refcount_add(tailen, &sk->sk_wmem_alloc);
goto out;
diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c
index b858bd5280bf..867474abe269 100644
--- a/net/ipv6/fou6.c
+++ b/net/ipv6/fou6.c
@@ -72,7 +72,7 @@ static int gue6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
static int gue6_err_proto_handler(int proto, struct sk_buff *skb,
struct inet6_skb_parm *opt,
- u8 type, u8 code, int offset, u32 info)
+ u8 type, u8 code, int offset, __be32 info)
{
const struct inet6_protocol *ipprot;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 43890898b0b5..26f25b6e2833 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1722,6 +1722,9 @@ static int ip6erspan_tap_validate(struct nlattr *tb[], struct nlattr *data[],
static void ip6erspan_set_version(struct nlattr *data[],
struct __ip6_tnl_parm *parms)
{
+ if (!data)
+ return;
+
parms->erspan_ver = 1;
if (data[IFLA_GRE_ERSPAN_VER])
parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
@@ -2104,9 +2107,23 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
struct __ip6_tnl_parm *p = &t->parms;
__be16 o_flags = p->o_flags;
- if ((p->erspan_ver == 1 || p->erspan_ver == 2) &&
- !p->collect_md)
- o_flags |= TUNNEL_KEY;
+ if (p->erspan_ver == 1 || p->erspan_ver == 2) {
+ if (!p->collect_md)
+ o_flags |= TUNNEL_KEY;
+
+ if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, p->erspan_ver))
+ goto nla_put_failure;
+
+ if (p->erspan_ver == 1) {
+ if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index))
+ goto nla_put_failure;
+ } else {
+ if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, p->dir))
+ goto nla_put_failure;
+ if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, p->hwid))
+ goto nla_put_failure;
+ }
+ }
if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
nla_put_be16(skb, IFLA_GRE_IFLAGS,
@@ -2121,8 +2138,7 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) ||
nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) ||
nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags) ||
- nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark) ||
- nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index))
+ nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark))
goto nla_put_failure;
if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
@@ -2140,19 +2156,6 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
goto nla_put_failure;
}
- if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, p->erspan_ver))
- goto nla_put_failure;
-
- if (p->erspan_ver == 1) {
- if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index))
- goto nla_put_failure;
- } else if (p->erspan_ver == 2) {
- if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, p->dir))
- goto nla_put_failure;
- if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, p->hwid))
- goto nla_put_failure;
- }
-
return 0;
nla_put_failure:
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 964491cf3672..ce15dc4ccbfa 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1274,18 +1274,29 @@ static DEFINE_SPINLOCK(rt6_exception_lock);
static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
struct rt6_exception *rt6_ex)
{
+ struct fib6_info *from;
struct net *net;
if (!bucket || !rt6_ex)
return;
net = dev_net(rt6_ex->rt6i->dst.dev);
+ net->ipv6.rt6_stats->fib_rt_cache--;
+
+ /* purge completely the exception to allow releasing the held resources:
+ * some [sk] cache may keep the dst around for unlimited time
+ */
+ from = rcu_dereference_protected(rt6_ex->rt6i->from,
+ lockdep_is_held(&rt6_exception_lock));
+ rcu_assign_pointer(rt6_ex->rt6i->from, NULL);
+ fib6_info_release(from);
+ dst_dev_put(&rt6_ex->rt6i->dst);
+
hlist_del_rcu(&rt6_ex->hlist);
dst_release(&rt6_ex->rt6i->dst);
kfree_rcu(rt6_ex, rcu);
WARN_ON_ONCE(!bucket->depth);
bucket->depth--;
- net->ipv6.rt6_stats->fib_rt_cache--;
}
/* Remove oldest rt6_ex in bucket and free the memory
@@ -1599,15 +1610,15 @@ static int rt6_remove_exception_rt(struct rt6_info *rt)
static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
{
struct rt6_exception_bucket *bucket;
- struct fib6_info *from = rt->from;
struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
-
- if (!from ||
- !(rt->rt6i_flags & RTF_CACHE))
- return;
+ struct fib6_info *from;
rcu_read_lock();
+ from = rcu_dereference(rt->from);
+ if (!from || !(rt->rt6i_flags & RTF_CACHE))
+ goto unlock;
+
bucket = rcu_dereference(from->rt6i_exception_bucket);
#ifdef CONFIG_IPV6_SUBTREES
@@ -1626,6 +1637,7 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
if (rt6_ex)
rt6_ex->stamp = jiffies;
+unlock:
rcu_read_unlock();
}
@@ -2742,20 +2754,24 @@ static int ip6_route_check_nh_onlink(struct net *net,
u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
const struct in6_addr *gw_addr = &cfg->fc_gateway;
u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
+ struct fib6_info *from;
struct rt6_info *grt;
int err;
err = 0;
grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
if (grt) {
+ rcu_read_lock();
+ from = rcu_dereference(grt->from);
if (!grt->dst.error &&
/* ignore match if it is the default route */
- grt->from && !ipv6_addr_any(&grt->from->fib6_dst.addr) &&
+ from && !ipv6_addr_any(&from->fib6_dst.addr) &&
(grt->rt6i_flags & flags || dev != grt->dst.dev)) {
NL_SET_ERR_MSG(extack,
"Nexthop has invalid gateway or device mismatch");
err = -EINVAL;
}
+ rcu_read_unlock();
ip6_rt_put(grt);
}
@@ -4649,7 +4665,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
table = rt->fib6_table->tb6_id;
else
table = RT6_TABLE_UNSPEC;
- rtm->rtm_table = table;
+ rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
if (nla_put_u32(skb, RTA_TABLE, table))
goto nla_put_failure;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 2596ffdeebea..b444483cdb2b 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -288,8 +288,8 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int peeked, peeking, off;
int err;
int is_udplite = IS_UDPLITE(sk);
+ struct udp_mib __percpu *mib;
bool checksum_valid = false;
- struct udp_mib *mib;
int is_udp4;
if (flags & MSG_ERRQUEUE)
@@ -420,17 +420,19 @@ EXPORT_SYMBOL(udpv6_encap_enable);
*/
static int __udp6_lib_err_encap_no_sk(struct sk_buff *skb,
struct inet6_skb_parm *opt,
- u8 type, u8 code, int offset, u32 info)
+ u8 type, u8 code, int offset, __be32 info)
{
int i;
for (i = 0; i < MAX_IPTUN_ENCAP_OPS; i++) {
int (*handler)(struct sk_buff *skb, struct inet6_skb_parm *opt,
- u8 type, u8 code, int offset, u32 info);
+ u8 type, u8 code, int offset, __be32 info);
+ const struct ip6_tnl_encap_ops *encap;
- if (!ip6tun_encaps[i])
+ encap = rcu_dereference(ip6tun_encaps[i]);
+ if (!encap)
continue;
- handler = rcu_dereference(ip6tun_encaps[i]->err_handler);
+ handler = encap->err_handler;
if (handler && !handler(skb, opt, type, code, offset, info))
return 0;
}
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index f5b4febeaa25..bc65db782bfb 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -344,8 +344,8 @@ static void __net_exit xfrm6_tunnel_net_exit(struct net *net)
struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
unsigned int i;
- xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
xfrm_flush_gc();
+ xfrm_state_flush(net, IPSEC_PROTO_ANY, false, true);
for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i]));
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 655c787f9d54..5651c29cb5bd 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -196,30 +196,22 @@ static int pfkey_release(struct socket *sock)
return 0;
}
-static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2,
- gfp_t allocation, struct sock *sk)
+static int pfkey_broadcast_one(struct sk_buff *skb, gfp_t allocation,
+ struct sock *sk)
{
int err = -ENOBUFS;
- sock_hold(sk);
- if (*skb2 == NULL) {
- if (refcount_read(&skb->users) != 1) {
- *skb2 = skb_clone(skb, allocation);
- } else {
- *skb2 = skb;
- refcount_inc(&skb->users);
- }
- }
- if (*skb2 != NULL) {
- if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) {
- skb_set_owner_r(*skb2, sk);
- skb_queue_tail(&sk->sk_receive_queue, *skb2);
- sk->sk_data_ready(sk);
- *skb2 = NULL;
- err = 0;
- }
+ if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
+ return err;
+
+ skb = skb_clone(skb, allocation);
+
+ if (skb) {
+ skb_set_owner_r(skb, sk);
+ skb_queue_tail(&sk->sk_receive_queue, skb);
+ sk->sk_data_ready(sk);
+ err = 0;
}
- sock_put(sk);
return err;
}
@@ -234,7 +226,6 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
{
struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
struct sock *sk;
- struct sk_buff *skb2 = NULL;
int err = -ESRCH;
/* XXX Do we need something like netlink_overrun? I think
@@ -253,7 +244,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
* socket.
*/
if (pfk->promisc)
- pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk);
+ pfkey_broadcast_one(skb, GFP_ATOMIC, sk);
/* the exact target will be processed later */
if (sk == one_sk)
@@ -268,7 +259,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
continue;
}
- err2 = pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk);
+ err2 = pfkey_broadcast_one(skb, GFP_ATOMIC, sk);
/* Error is cleared after successful sending to at least one
* registered KM */
@@ -278,9 +269,8 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
rcu_read_unlock();
if (one_sk != NULL)
- err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk);
+ err = pfkey_broadcast_one(skb, allocation, one_sk);
- kfree_skb(skb2);
kfree_skb(skb);
return err;
}
@@ -1783,7 +1773,7 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_m
if (proto == 0)
return -EINVAL;
- err = xfrm_state_flush(net, proto, true);
+ err = xfrm_state_flush(net, proto, true, false);
err2 = unicast_flush_resp(sk, hdr);
if (err || err2) {
if (err == -ESRCH) /* empty table - go quietly */
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 87a729926734..977dea436ee8 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -615,13 +615,13 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
* We need a bit of data queued to build aggregates properly, so
* instruct the TCP stack to allow more than a single ms of data
* to be queued in the stack. The value is a bit-shift of 1
- * second, so 8 is ~4ms of queued data. Only affects local TCP
+ * second, so 7 is ~8ms of queued data. Only affects local TCP
* sockets.
* This is the default, anyhow - drivers may need to override it
* for local reasons (longer buffers, longer completion time, or
* similar).
*/
- local->hw.tx_sk_pacing_shift = 8;
+ local->hw.tx_sk_pacing_shift = 7;
/* set up some defaults */
local->hw.queues = 1;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index bb4d71efb6fb..c2a6da5d80da 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2644,6 +2644,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
struct ieee80211_sub_if_data *sdata = rx->sdata;
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
u16 ac, q, hdrlen;
+ int tailroom = 0;
hdr = (struct ieee80211_hdr *) skb->data;
hdrlen = ieee80211_hdrlen(hdr->frame_control);
@@ -2732,8 +2733,12 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
if (!ifmsh->mshcfg.dot11MeshForwarding)
goto out;
+ if (sdata->crypto_tx_tailroom_needed_cnt)
+ tailroom = IEEE80211_ENCRYPT_TAILROOM;
+
fwd_skb = skb_copy_expand(skb, local->tx_headroom +
- sdata->encrypt_headroom, 0, GFP_ATOMIC);
+ sdata->encrypt_headroom,
+ tailroom, GFP_ATOMIC);
if (!fwd_skb)
goto out;
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 9fc76b19cd3c..db3473540303 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -132,7 +132,7 @@ static int pep_indicate(struct sock *sk, u8 id, u8 code,
ph->utid = 0;
ph->message_id = id;
ph->pipe_handle = pn->pipe_handle;
- ph->data[0] = code;
+ ph->error_code = code;
return pn_skb_send(sk, skb, NULL);
}
@@ -153,7 +153,7 @@ static int pipe_handler_request(struct sock *sk, u8 id, u8 code,
ph->utid = id; /* whatever */
ph->message_id = id;
ph->pipe_handle = pn->pipe_handle;
- ph->data[0] = code;
+ ph->error_code = code;
return pn_skb_send(sk, skb, NULL);
}
@@ -208,7 +208,7 @@ static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code,
struct pnpipehdr *ph;
struct sockaddr_pn dst;
u8 data[4] = {
- oph->data[0], /* PEP type */
+ oph->pep_type, /* PEP type */
code, /* error code, at an unusual offset */
PAD, PAD,
};
@@ -221,7 +221,7 @@ static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code,
ph->utid = oph->utid;
ph->message_id = PNS_PEP_CTRL_RESP;
ph->pipe_handle = oph->pipe_handle;
- ph->data[0] = oph->data[1]; /* CTRL id */
+ ph->data0 = oph->data[0]; /* CTRL id */
pn_skb_get_src_sockaddr(oskb, &dst);
return pn_skb_send(sk, skb, &dst);
@@ -272,17 +272,17 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb)
return -EINVAL;
hdr = pnp_hdr(skb);
- if (hdr->data[0] != PN_PEP_TYPE_COMMON) {
+ if (hdr->pep_type != PN_PEP_TYPE_COMMON) {
net_dbg_ratelimited("Phonet unknown PEP type: %u\n",
- (unsigned int)hdr->data[0]);
+ (unsigned int)hdr->pep_type);
return -EOPNOTSUPP;
}
- switch (hdr->data[1]) {
+ switch (hdr->data[0]) {
case PN_PEP_IND_FLOW_CONTROL:
switch (pn->tx_fc) {
case PN_LEGACY_FLOW_CONTROL:
- switch (hdr->data[4]) {
+ switch (hdr->data[3]) {
case PEP_IND_BUSY:
atomic_set(&pn->tx_credits, 0);
break;
@@ -292,7 +292,7 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb)
}
break;
case PN_ONE_CREDIT_FLOW_CONTROL:
- if (hdr->data[4] == PEP_IND_READY)
+ if (hdr->data[3] == PEP_IND_READY)
atomic_set(&pn->tx_credits, wake = 1);
break;
}
@@ -301,12 +301,12 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb)
case PN_PEP_IND_ID_MCFC_GRANT_CREDITS:
if (pn->tx_fc != PN_MULTI_CREDIT_FLOW_CONTROL)
break;
- atomic_add(wake = hdr->data[4], &pn->tx_credits);
+ atomic_add(wake = hdr->data[3], &pn->tx_credits);
break;
default:
net_dbg_ratelimited("Phonet unknown PEP indication: %u\n",
- (unsigned int)hdr->data[1]);
+ (unsigned int)hdr->data[0]);
return -EOPNOTSUPP;
}
if (wake)
@@ -318,7 +318,7 @@ static int pipe_rcv_created(struct sock *sk, struct sk_buff *skb)
{
struct pep_sock *pn = pep_sk(sk);
struct pnpipehdr *hdr = pnp_hdr(skb);
- u8 n_sb = hdr->data[0];
+ u8 n_sb = hdr->data0;
pn->rx_fc = pn->tx_fc = PN_LEGACY_FLOW_CONTROL;
__skb_pull(skb, sizeof(*hdr));
@@ -506,7 +506,7 @@ static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb)
return -ECONNREFUSED;
/* Parse sub-blocks */
- n_sb = hdr->data[4];
+ n_sb = hdr->data[3];
while (n_sb > 0) {
u8 type, buf[6], len = sizeof(buf);
const u8 *data = pep_get_sb(skb, &type, &len, buf);
@@ -739,7 +739,7 @@ static int pipe_do_remove(struct sock *sk)
ph->utid = 0;
ph->message_id = PNS_PIPE_REMOVE_REQ;
ph->pipe_handle = pn->pipe_handle;
- ph->data[0] = PAD;
+ ph->data0 = PAD;
return pn_skb_send(sk, skb, NULL);
}
@@ -817,7 +817,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp,
peer_type = hdr->other_pep_type << 8;
/* Parse sub-blocks (options) */
- n_sb = hdr->data[4];
+ n_sb = hdr->data[3];
while (n_sb > 0) {
u8 type, buf[1], len = sizeof(buf);
const u8 *data = pep_get_sb(skb, &type, &len, buf);
@@ -1109,7 +1109,7 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb)
ph->utid = 0;
if (pn->aligned) {
ph->message_id = PNS_PIPE_ALIGNED_DATA;
- ph->data[0] = 0; /* padding */
+ ph->data0 = 0; /* padding */
} else
ph->message_id = PNS_PIPE_DATA;
ph->pipe_handle = pn->pipe_handle;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 033696e6f74f..ad158d311ffa 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -207,7 +207,8 @@ void sctp_transport_reset_hb_timer(struct sctp_transport *transport)
/* When a data chunk is sent, reset the heartbeat interval. */
expires = jiffies + sctp_transport_timeout(transport);
- if (time_before(transport->hb_timer.expires, expires) &&
+ if ((time_before(transport->hb_timer.expires, expires) ||
+ !timer_pending(&transport->hb_timer)) &&
!mod_timer(&transport->hb_timer,
expires + prandom_u32_max(transport->rto)))
sctp_transport_hold(transport);
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 5721416d0605..adbdf195eb08 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -113,9 +113,9 @@ struct smc_host_cdc_msg { /* Connection Data Control message */
} __aligned(8);
enum smc_urg_state {
- SMC_URG_VALID, /* data present */
- SMC_URG_NOTYET, /* data pending */
- SMC_URG_READ /* data was already read */
+ SMC_URG_VALID = 1, /* data present */
+ SMC_URG_NOTYET = 2, /* data pending */
+ SMC_URG_READ = 3, /* data was already read */
};
struct smc_connection {
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 1217c90a363b..684f2125fc6b 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -388,7 +388,7 @@ static int tipc_sk_sock_err(struct socket *sock, long *timeout)
rc_ = tipc_sk_sock_err((sock_), timeo_); \
if (rc_) \
break; \
- prepare_to_wait(sk_sleep(sk_), &wait_, TASK_INTERRUPTIBLE); \
+ add_wait_queue(sk_sleep(sk_), &wait_); \
release_sock(sk_); \
*(timeo_) = wait_woken(&wait_, TASK_INTERRUPTIBLE, *(timeo_)); \
sched_annotate_sleep(); \
@@ -1677,7 +1677,7 @@ static void tipc_sk_send_ack(struct tipc_sock *tsk)
static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
{
struct sock *sk = sock->sk;
- DEFINE_WAIT(wait);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
long timeo = *timeop;
int err = sock_error(sk);
@@ -1685,15 +1685,17 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
return err;
for (;;) {
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
if (sk->sk_shutdown & RCV_SHUTDOWN) {
err = -ENOTCONN;
break;
}
+ add_wait_queue(sk_sleep(sk), &wait);
release_sock(sk);
- timeo = schedule_timeout(timeo);
+ timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
+ sched_annotate_sleep();
lock_sock(sk);
+ remove_wait_queue(sk_sleep(sk), &wait);
}
err = 0;
if (!skb_queue_empty(&sk->sk_receive_queue))
@@ -1709,7 +1711,6 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
if (err)
break;
}
- finish_wait(sk_sleep(sk), &wait);
*timeop = timeo;
return err;
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 74d1eed7cbd4..a95d479caeea 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -890,7 +890,7 @@ retry:
addr->hash ^= sk->sk_type;
__unix_remove_socket(sk);
- u->addr = addr;
+ smp_store_release(&u->addr, addr);
__unix_insert_socket(&unix_socket_table[addr->hash], sk);
spin_unlock(&unix_table_lock);
err = 0;
@@ -1060,7 +1060,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
err = 0;
__unix_remove_socket(sk);
- u->addr = addr;
+ smp_store_release(&u->addr, addr);
__unix_insert_socket(list, sk);
out_unlock:
@@ -1331,15 +1331,29 @@ restart:
RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
otheru = unix_sk(other);
- /* copy address information from listening to new sock*/
- if (otheru->addr) {
- refcount_inc(&otheru->addr->refcnt);
- newu->addr = otheru->addr;
- }
+ /* copy address information from listening to new sock
+ *
+ * The contents of *(otheru->addr) and otheru->path
+ * are seen fully set up here, since we have found
+ * otheru in hash under unix_table_lock. Insertion
+ * into the hash chain we'd found it in had been done
+ * in an earlier critical area protected by unix_table_lock,
+ * the same one where we'd set *(otheru->addr) contents,
+ * as well as otheru->path and otheru->addr itself.
+ *
+ * Using smp_store_release() here to set newu->addr
+ * is enough to make those stores, as well as stores
+ * to newu->path visible to anyone who gets newu->addr
+ * by smp_load_acquire(). IOW, the same warranties
+ * as for unix_sock instances bound in unix_bind() or
+ * in unix_autobind().
+ */
if (otheru->path.dentry) {
path_get(&otheru->path);
newu->path = otheru->path;
}
+ refcount_inc(&otheru->addr->refcnt);
+ smp_store_release(&newu->addr, otheru->addr);
/* Set credentials */
copy_peercred(sk, other);
@@ -1453,7 +1467,7 @@ out:
static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
{
struct sock *sk = sock->sk;
- struct unix_sock *u;
+ struct unix_address *addr;
DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
int err = 0;
@@ -1468,19 +1482,15 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
sock_hold(sk);
}
- u = unix_sk(sk);
- unix_state_lock(sk);
- if (!u->addr) {
+ addr = smp_load_acquire(&unix_sk(sk)->addr);
+ if (!addr) {
sunaddr->sun_family = AF_UNIX;
sunaddr->sun_path[0] = 0;
err = sizeof(short);
} else {
- struct unix_address *addr = u->addr;
-
err = addr->len;
memcpy(sunaddr, addr->name, addr->len);
}
- unix_state_unlock(sk);
sock_put(sk);
out:
return err;
@@ -2073,11 +2083,11 @@ static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
{
- struct unix_sock *u = unix_sk(sk);
+ struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
- if (u->addr) {
- msg->msg_namelen = u->addr->len;
- memcpy(msg->msg_name, u->addr->name, u->addr->len);
+ if (addr) {
+ msg->msg_namelen = addr->len;
+ memcpy(msg->msg_name, addr->name, addr->len);
}
}
@@ -2581,15 +2591,14 @@ static int unix_open_file(struct sock *sk)
if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
- unix_state_lock(sk);
+ if (!smp_load_acquire(&unix_sk(sk)->addr))
+ return -ENOENT;
+
path = unix_sk(sk)->path;
- if (!path.dentry) {
- unix_state_unlock(sk);
+ if (!path.dentry)
return -ENOENT;
- }
path_get(&path);
- unix_state_unlock(sk);
fd = get_unused_fd_flags(O_CLOEXEC);
if (fd < 0)
@@ -2830,7 +2839,7 @@ static int unix_seq_show(struct seq_file *seq, void *v)
(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
sock_i_ino(s));
- if (u->addr) {
+ if (u->addr) { // under unix_table_lock here
int i, len;
seq_putc(seq, ' ');
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 384c84e83462..3183d9b8ab33 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -10,7 +10,8 @@
static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb)
{
- struct unix_address *addr = unix_sk(sk)->addr;
+ /* might or might not have unix_table_lock */
+ struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
if (!addr)
return 0;
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index ec3a828672ef..eff31348e20b 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -679,8 +679,7 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct sockaddr_x25 *addr = (struct sockaddr_x25 *)uaddr;
int len, i, rc = 0;
- if (!sock_flag(sk, SOCK_ZAPPED) ||
- addr_len != sizeof(struct sockaddr_x25) ||
+ if (addr_len != sizeof(struct sockaddr_x25) ||
addr->sx25_family != AF_X25) {
rc = -EINVAL;
goto out;
@@ -699,9 +698,13 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
}
lock_sock(sk);
- x25_sk(sk)->source_addr = addr->sx25_addr;
- x25_insert_socket(sk);
- sock_reset_flag(sk, SOCK_ZAPPED);
+ if (sock_flag(sk, SOCK_ZAPPED)) {
+ x25_sk(sk)->source_addr = addr->sx25_addr;
+ x25_insert_socket(sk);
+ sock_reset_flag(sk, SOCK_ZAPPED);
+ } else {
+ rc = -EINVAL;
+ }
release_sock(sk);
SOCK_DEBUG(sk, "x25_bind: socket is bound\n");
out:
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 45f3b528dc09..85e4fe4f18cc 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -366,7 +366,6 @@ static int xsk_release(struct socket *sock)
xskq_destroy(xs->rx);
xskq_destroy(xs->tx);
- xdp_put_umem(xs->umem);
sock_orphan(sk);
sock->sk = NULL;
@@ -718,6 +717,18 @@ static const struct proto_ops xsk_proto_ops = {
.sendpage = sock_no_sendpage,
};
+static void xsk_destruct(struct sock *sk)
+{
+ struct xdp_sock *xs = xdp_sk(sk);
+
+ if (!sock_flag(sk, SOCK_DEAD))
+ return;
+
+ xdp_put_umem(xs->umem);
+
+ sk_refcnt_debug_dec(sk);
+}
+
static int xsk_create(struct net *net, struct socket *sock, int protocol,
int kern)
{
@@ -744,6 +755,9 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
sk->sk_family = PF_XDP;
+ sk->sk_destruct = xsk_destruct;
+ sk_refcnt_debug_inc(sk);
+
sock_set_flag(sk, SOCK_RCU_FREE);
xs = xdp_sk(sk);
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index 6be8c7df15bb..dbb3c1945b5c 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -76,10 +76,10 @@ static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb)
int ifindex;
struct xfrm_if *xi;
- if (!skb->dev)
+ if (!secpath_exists(skb) || !skb->dev)
return NULL;
- xfrmn = net_generic(dev_net(skb->dev), xfrmi_net_id);
+ xfrmn = net_generic(xs_net(xfrm_input_state(skb)), xfrmi_net_id);
ifindex = skb->dev->ifindex;
for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) {
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index ba0a4048c846..8d1a898d0ba5 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -3314,8 +3314,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
if (ifcb) {
xi = ifcb->decode_session(skb);
- if (xi)
+ if (xi) {
if_id = xi->p.if_id;
+ net = xi->net;
+ }
}
rcu_read_unlock();
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 23c92891758a..1bb971f46fc6 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -432,7 +432,7 @@ void xfrm_state_free(struct xfrm_state *x)
}
EXPORT_SYMBOL(xfrm_state_free);
-static void xfrm_state_gc_destroy(struct xfrm_state *x)
+static void ___xfrm_state_destroy(struct xfrm_state *x)
{
tasklet_hrtimer_cancel(&x->mtimer);
del_timer_sync(&x->rtimer);
@@ -474,7 +474,7 @@ static void xfrm_state_gc_task(struct work_struct *work)
synchronize_rcu();
hlist_for_each_entry_safe(x, tmp, &gc_list, gclist)
- xfrm_state_gc_destroy(x);
+ ___xfrm_state_destroy(x);
}
static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
@@ -598,14 +598,19 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
}
EXPORT_SYMBOL(xfrm_state_alloc);
-void __xfrm_state_destroy(struct xfrm_state *x)
+void __xfrm_state_destroy(struct xfrm_state *x, bool sync)
{
WARN_ON(x->km.state != XFRM_STATE_DEAD);
- spin_lock_bh(&xfrm_state_gc_lock);
- hlist_add_head(&x->gclist, &xfrm_state_gc_list);
- spin_unlock_bh(&xfrm_state_gc_lock);
- schedule_work(&xfrm_state_gc_work);
+ if (sync) {
+ synchronize_rcu();
+ ___xfrm_state_destroy(x);
+ } else {
+ spin_lock_bh(&xfrm_state_gc_lock);
+ hlist_add_head(&x->gclist, &xfrm_state_gc_list);
+ spin_unlock_bh(&xfrm_state_gc_lock);
+ schedule_work(&xfrm_state_gc_work);
+ }
}
EXPORT_SYMBOL(__xfrm_state_destroy);
@@ -708,7 +713,7 @@ xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool
}
#endif
-int xfrm_state_flush(struct net *net, u8 proto, bool task_valid)
+int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync)
{
int i, err = 0, cnt = 0;
@@ -730,7 +735,10 @@ restart:
err = xfrm_state_delete(x);
xfrm_audit_state_delete(x, err ? 0 : 1,
task_valid);
- xfrm_state_put(x);
+ if (sync)
+ xfrm_state_put_sync(x);
+ else
+ xfrm_state_put(x);
if (!err)
cnt++;
@@ -2215,7 +2223,7 @@ void xfrm_state_delete_tunnel(struct xfrm_state *x)
if (atomic_read(&t->tunnel_users) == 2)
xfrm_state_delete(t);
atomic_dec(&t->tunnel_users);
- xfrm_state_put(t);
+ xfrm_state_put_sync(t);
x->tunnel = NULL;
}
}
@@ -2375,8 +2383,8 @@ void xfrm_state_fini(struct net *net)
unsigned int sz;
flush_work(&net->xfrm.state_hash_work);
- xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
flush_work(&xfrm_state_gc_work);
+ xfrm_state_flush(net, IPSEC_PROTO_ANY, false, true);
WARN_ON(!list_empty(&net->xfrm.state_all));
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index c6d26afcf89d..a131f9ff979e 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1932,7 +1932,7 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
struct xfrm_usersa_flush *p = nlmsg_data(nlh);
int err;
- err = xfrm_state_flush(net, p->proto, true);
+ err = xfrm_state_flush(net, p->proto, true, false);
if (err) {
if (err == -ESRCH) /* empty table */
return 0;
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index f84001019356..33028c098ef3 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -321,6 +321,7 @@ static void dump_common_audit_data(struct audit_buffer *ab,
if (a->u.net->sk) {
struct sock *sk = a->u.net->sk;
struct unix_sock *u;
+ struct unix_address *addr;
int len = 0;
char *p = NULL;
@@ -351,14 +352,15 @@ static void dump_common_audit_data(struct audit_buffer *ab,
#endif
case AF_UNIX:
u = unix_sk(sk);
+ addr = smp_load_acquire(&u->addr);
+ if (!addr)
+ break;
if (u->path.dentry) {
audit_log_d_path(ab, " path=", &u->path);
break;
}
- if (!u->addr)
- break;
- len = u->addr->len-sizeof(short);
- p = &u->addr->name->sun_path[0];
+ len = addr->len-sizeof(short);
+ p = &addr->name->sun_path[0];
audit_log_format(ab, " path=");
if (*p)
audit_log_untrustedstring(ab, p);
diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c
index 147e34cfceb7..02d7c871862a 100644
--- a/tools/testing/selftests/bpf/test_lpm_map.c
+++ b/tools/testing/selftests/bpf/test_lpm_map.c
@@ -474,6 +474,16 @@ static void test_lpm_delete(void)
assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 &&
errno == ENOENT);
+ key->prefixlen = 30; // unused prefix so far
+ inet_pton(AF_INET, "192.255.0.0", key->data);
+ assert(bpf_map_delete_elem(map_fd, key) == -1 &&
+ errno == ENOENT);
+
+ key->prefixlen = 16; // same prefix as the root node
+ inet_pton(AF_INET, "192.255.0.0", key->data);
+ assert(bpf_map_delete_elem(map_fd, key) == -1 &&
+ errno == ENOENT);
+
/* assert initial lookup */
key->prefixlen = 32;
inet_pton(AF_INET, "192.168.0.1", key->data);
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 802b4af18729..1080ff55a788 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -388,6 +388,7 @@ fib_carrier_unicast_test()
set -e
$IP link set dev dummy0 carrier off
+ sleep 1
set +e
echo " Carrier down"