summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ptp/testptp.mk33
-rw-r--r--drivers/net/dsa/mv88e6171.c2
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-drv.c22
-rw-r--r--drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c16
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c55
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c6
-rw-r--r--drivers/net/ethernet/cisco/enic/enic_clsf.c12
-rw-r--r--drivers/net/ethernet/cisco/enic/enic_main.c4
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c3
-rw-r--r--drivers/net/ethernet/freescale/fs_enet/mac-fec.c3
-rw-r--r--drivers/net/ethernet/freescale/fs_enet/mac-scc.c3
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/eq.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c1
-rw-r--r--drivers/net/ethernet/sfc/tx.c4
-rw-r--r--drivers/net/hyperv/netvsc_drv.c1
-rw-r--r--drivers/net/macvlan.c10
-rw-r--r--drivers/net/usb/ax88179_178a.c7
-rw-r--r--drivers/net/xen-netback/common.h39
-rw-r--r--drivers/net/xen-netback/interface.c74
-rw-r--r--drivers/net/xen-netback/netback.c319
-rw-r--r--drivers/net/xen-netback/xenbus.c22
-rw-r--r--include/net/netfilter/nf_tables.h3
-rw-r--r--include/net/netfilter/nft_masq.h3
-rw-r--r--init/Kconfig14
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/bpf/Makefile6
-rw-r--r--kernel/bpf/core.c9
-rw-r--r--kernel/bpf/verifier.c3
-rw-r--r--net/Kconfig2
-rw-r--r--net/bridge/br_netfilter.c24
-rw-r--r--net/core/dev.c4
-rw-r--r--net/core/skbuff.c13
-rw-r--r--net/core/tso.c3
-rw-r--r--net/dsa/dsa.c5
-rw-r--r--net/ipv4/af_inet.c2
-rw-r--r--net/ipv4/gre_offload.c2
-rw-r--r--net/ipv4/ip_output.c2
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c3
-rw-r--r--net/ipv4/netfilter/nft_masq_ipv4.c1
-rw-r--r--net/ipv4/tcp.c59
-rw-r--r--net/ipv4/tcp_ipv4.c4
-rw-r--r--net/ipv4/udp_offload.c2
-rw-r--r--net/ipv6/ip6_offload.c2
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c4
-rw-r--r--net/ipv6/netfilter/nft_masq_ipv6.c1
-rw-r--r--net/ipv6/tcp_ipv6.c4
-rw-r--r--net/ipv6/xfrm6_policy.c11
-rw-r--r--net/mpls/mpls_gso.c2
-rw-r--r--net/netfilter/ipset/ip_set_core.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c4
-rw-r--r--net/netfilter/nf_tables_api.c18
-rw-r--r--net/netfilter/nfnetlink_log.c31
-rw-r--r--net/netfilter/nfnetlink_queue_core.c2
-rw-r--r--net/netfilter/nft_compat.c81
-rw-r--r--net/netfilter/nft_masq.c12
-rw-r--r--net/netfilter/nft_nat.c86
-rw-r--r--net/netlink/af_netlink.c37
-rw-r--r--net/openvswitch/datapath.c2
-rw-r--r--net/sched/sch_api.c2
-rw-r--r--net/tipc/node.c46
-rw-r--r--net/tipc/node.h7
-rw-r--r--net/tipc/socket.c6
-rw-r--r--net/xfrm/xfrm_output.c2
-rw-r--r--samples/bpf/test_verifier.c11
65 files changed, 755 insertions, 425 deletions
diff --git a/Documentation/ptp/testptp.mk b/Documentation/ptp/testptp.mk
new file mode 100644
index 000000000000..4ef2d9755421
--- /dev/null
+++ b/Documentation/ptp/testptp.mk
@@ -0,0 +1,33 @@
+# PTP 1588 clock support - User space test program
+#
+# Copyright (C) 2010 OMICRON electronics GmbH
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+CC = $(CROSS_COMPILE)gcc
+INC = -I$(KBUILD_OUTPUT)/usr/include
+CFLAGS = -Wall $(INC)
+LDLIBS = -lrt
+PROGS = testptp
+
+all: $(PROGS)
+
+testptp: testptp.o
+
+clean:
+ rm -f testptp.o
+
+distclean: clean
+ rm -f $(PROGS)
diff --git a/drivers/net/dsa/mv88e6171.c b/drivers/net/dsa/mv88e6171.c
index 1020a7af67cf..78d8e876f3aa 100644
--- a/drivers/net/dsa/mv88e6171.c
+++ b/drivers/net/dsa/mv88e6171.c
@@ -395,7 +395,7 @@ static int mv88e6171_get_sset_count(struct dsa_switch *ds)
}
struct dsa_switch_driver mv88e6171_switch_driver = {
- .tag_protocol = DSA_TAG_PROTO_DSA,
+ .tag_protocol = DSA_TAG_PROTO_EDSA,
.priv_size = sizeof(struct mv88e6xxx_priv_state),
.probe = mv88e6171_probe,
.setup = mv88e6171_setup,
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 29554992215a..2349ea970255 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1465,7 +1465,7 @@ static int xgbe_set_features(struct net_device *netdev,
{
struct xgbe_prv_data *pdata = netdev_priv(netdev);
struct xgbe_hw_if *hw_if = &pdata->hw_if;
- unsigned int rxcsum, rxvlan, rxvlan_filter;
+ netdev_features_t rxcsum, rxvlan, rxvlan_filter;
rxcsum = pdata->netdev_features & NETIF_F_RXCSUM;
rxvlan = pdata->netdev_features & NETIF_F_HW_VLAN_CTAG_RX;
@@ -1598,7 +1598,8 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget)
struct skb_shared_hwtstamps *hwtstamps;
unsigned int incomplete, error, context_next, context;
unsigned int len, put_len, max_len;
- int received = 0;
+ unsigned int received = 0;
+ int packet_count = 0;
DBGPR("-->xgbe_rx_poll: budget=%d\n", budget);
@@ -1608,7 +1609,7 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget)
rdata = XGBE_GET_DESC_DATA(ring, ring->cur);
packet = &ring->packet_data;
- while (received < budget) {
+ while (packet_count < budget) {
DBGPR(" cur = %d\n", ring->cur);
/* First time in loop see if we need to restore state */
@@ -1662,7 +1663,7 @@ read_again:
if (packet->errors)
DBGPR("Error in received packet\n");
dev_kfree_skb(skb);
- continue;
+ goto next_packet;
}
if (!context) {
@@ -1677,7 +1678,7 @@ read_again:
}
dev_kfree_skb(skb);
- continue;
+ goto next_packet;
}
memcpy(skb_tail_pointer(skb), rdata->skb->data,
put_len);
@@ -1694,7 +1695,7 @@ read_again:
/* Stray Context Descriptor? */
if (!skb)
- continue;
+ goto next_packet;
/* Be sure we don't exceed the configured MTU */
max_len = netdev->mtu + ETH_HLEN;
@@ -1705,7 +1706,7 @@ read_again:
if (skb->len > max_len) {
DBGPR("packet length exceeds configured MTU\n");
dev_kfree_skb(skb);
- continue;
+ goto next_packet;
}
#ifdef XGMAC_ENABLE_RX_PKT_DUMP
@@ -1739,6 +1740,9 @@ read_again:
netdev->last_rx = jiffies;
napi_gro_receive(&pdata->napi, skb);
+
+next_packet:
+ packet_count++;
}
/* Check if we need to save state before leaving */
@@ -1752,9 +1756,9 @@ read_again:
rdata->state.error = error;
}
- DBGPR("<--xgbe_rx_poll: received = %d\n", received);
+ DBGPR("<--xgbe_rx_poll: packet_count = %d\n", packet_count);
- return received;
+ return packet_count;
}
static int xgbe_poll(struct napi_struct *napi, int budget)
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c
index e6d24c210198..c22f32622fa9 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c
@@ -124,20 +124,18 @@ static int xgene_enet_ecc_init(struct xgene_enet_pdata *p)
{
struct net_device *ndev = p->ndev;
u32 data;
- int i;
+ int i = 0;
xgene_enet_wr_diag_csr(p, ENET_CFG_MEM_RAM_SHUTDOWN_ADDR, 0);
- for (i = 0; i < 10 && data != ~0U ; i++) {
+ do {
usleep_range(100, 110);
data = xgene_enet_rd_diag_csr(p, ENET_BLOCK_MEM_RDY_ADDR);
- }
+ if (data == ~0U)
+ return 0;
+ } while (++i < 10);
- if (data != ~0U) {
- netdev_err(ndev, "Failed to release memory from shutdown\n");
- return -ENODEV;
- }
-
- return 0;
+ netdev_err(ndev, "Failed to release memory from shutdown\n");
+ return -ENODEV;
}
static void xgene_enet_config_ring_if_assoc(struct xgene_enet_pdata *p)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c
index 8edf0f5bd679..6fe300e316c3 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c
@@ -60,6 +60,42 @@ void cxgb4_dcb_version_init(struct net_device *dev)
dcb->dcb_version = FW_PORT_DCB_VER_AUTO;
}
+static void cxgb4_dcb_cleanup_apps(struct net_device *dev)
+{
+ struct port_info *pi = netdev2pinfo(dev);
+ struct adapter *adap = pi->adapter;
+ struct port_dcb_info *dcb = &pi->dcb;
+ struct dcb_app app;
+ int i, err;
+
+ /* zero priority implies remove */
+ app.priority = 0;
+
+ for (i = 0; i < CXGB4_MAX_DCBX_APP_SUPPORTED; i++) {
+ /* Check if app list is exhausted */
+ if (!dcb->app_priority[i].protocolid)
+ break;
+
+ app.protocol = dcb->app_priority[i].protocolid;
+
+ if (dcb->dcb_version == FW_PORT_DCB_VER_IEEE) {
+ app.selector = dcb->app_priority[i].sel_field + 1;
+ err = dcb_ieee_setapp(dev, &app);
+ } else {
+ app.selector = !!(dcb->app_priority[i].sel_field);
+ err = dcb_setapp(dev, &app);
+ }
+
+ if (err) {
+ dev_err(adap->pdev_dev,
+ "Failed DCB Clear %s Application Priority: sel=%d, prot=%d, , err=%d\n",
+ dcb_ver_array[dcb->dcb_version], app.selector,
+ app.protocol, -err);
+ break;
+ }
+ }
+}
+
/* Finite State machine for Data Center Bridging.
*/
void cxgb4_dcb_state_fsm(struct net_device *dev,
@@ -80,7 +116,6 @@ void cxgb4_dcb_state_fsm(struct net_device *dev,
/* we're going to use Host DCB */
dcb->state = CXGB4_DCB_STATE_HOST;
dcb->supported = CXGB4_DCBX_HOST_SUPPORT;
- dcb->enabled = 1;
break;
}
@@ -145,6 +180,7 @@ void cxgb4_dcb_state_fsm(struct net_device *dev,
* state. We need to reset back to a ground state
* of incomplete.
*/
+ cxgb4_dcb_cleanup_apps(dev);
cxgb4_dcb_state_init(dev);
dcb->state = CXGB4_DCB_STATE_FW_INCOMPLETE;
dcb->supported = CXGB4_DCBX_FW_SUPPORT;
@@ -349,6 +385,12 @@ static u8 cxgb4_setstate(struct net_device *dev, u8 enabled)
{
struct port_info *pi = netdev2pinfo(dev);
+ /* If DCBx is host-managed, dcb is enabled by outside lldp agents */
+ if (pi->dcb.state == CXGB4_DCB_STATE_HOST) {
+ pi->dcb.enabled = enabled;
+ return 0;
+ }
+
/* Firmware doesn't provide any mechanism to control the DCB state.
*/
if (enabled != (pi->dcb.state == CXGB4_DCB_STATE_FW_ALLSYNCED))
@@ -833,11 +875,16 @@ static int cxgb4_setapp(struct net_device *dev, u8 app_idtype, u16 app_id,
/* Return whether IEEE Data Center Bridging has been negotiated.
*/
-static inline int cxgb4_ieee_negotiation_complete(struct net_device *dev)
+static inline int
+cxgb4_ieee_negotiation_complete(struct net_device *dev,
+ enum cxgb4_dcb_fw_msgs dcb_subtype)
{
struct port_info *pi = netdev2pinfo(dev);
struct port_dcb_info *dcb = &pi->dcb;
+ if (dcb_subtype && !(dcb->msgs & dcb_subtype))
+ return 0;
+
return (dcb->state == CXGB4_DCB_STATE_FW_ALLSYNCED &&
(dcb->supported & DCB_CAP_DCBX_VER_IEEE));
}
@@ -850,7 +897,7 @@ static int cxgb4_ieee_getapp(struct net_device *dev, struct dcb_app *app)
{
int prio;
- if (!cxgb4_ieee_negotiation_complete(dev))
+ if (!cxgb4_ieee_negotiation_complete(dev, CXGB4_DCB_FW_APP_ID))
return -EINVAL;
if (!(app->selector && app->protocol))
return -EINVAL;
@@ -872,7 +919,7 @@ static int cxgb4_ieee_setapp(struct net_device *dev, struct dcb_app *app)
{
int ret;
- if (!cxgb4_ieee_negotiation_complete(dev))
+ if (!cxgb4_ieee_negotiation_complete(dev, CXGB4_DCB_FW_APP_ID))
return -EINVAL;
if (!(app->selector && app->protocol))
return -EINVAL;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 3f60070f2519..97683c1c5b69 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -694,7 +694,11 @@ int cxgb4_dcb_enabled(const struct net_device *dev)
#ifdef CONFIG_CHELSIO_T4_DCB
struct port_info *pi = netdev_priv(dev);
- return pi->dcb.state == CXGB4_DCB_STATE_FW_ALLSYNCED;
+ if (!pi->dcb.enabled)
+ return 0;
+
+ return ((pi->dcb.state == CXGB4_DCB_STATE_FW_ALLSYNCED) ||
+ (pi->dcb.state == CXGB4_DCB_STATE_HOST));
#else
return 0;
#endif
diff --git a/drivers/net/ethernet/cisco/enic/enic_clsf.c b/drivers/net/ethernet/cisco/enic/enic_clsf.c
index 69dfd3c9e529..0be6850be8a2 100644
--- a/drivers/net/ethernet/cisco/enic/enic_clsf.c
+++ b/drivers/net/ethernet/cisco/enic/enic_clsf.c
@@ -86,7 +86,7 @@ void enic_rfs_flw_tbl_free(struct enic *enic)
int i;
enic_rfs_timer_stop(enic);
- spin_lock(&enic->rfs_h.lock);
+ spin_lock_bh(&enic->rfs_h.lock);
enic->rfs_h.free = 0;
for (i = 0; i < (1 << ENIC_RFS_FLW_BITSHIFT); i++) {
struct hlist_head *hhead;
@@ -100,7 +100,7 @@ void enic_rfs_flw_tbl_free(struct enic *enic)
kfree(n);
}
}
- spin_unlock(&enic->rfs_h.lock);
+ spin_unlock_bh(&enic->rfs_h.lock);
}
struct enic_rfs_fltr_node *htbl_fltr_search(struct enic *enic, u16 fltr_id)
@@ -128,7 +128,7 @@ void enic_flow_may_expire(unsigned long data)
bool res;
int j;
- spin_lock(&enic->rfs_h.lock);
+ spin_lock_bh(&enic->rfs_h.lock);
for (j = 0; j < ENIC_CLSF_EXPIRE_COUNT; j++) {
struct hlist_head *hhead;
struct hlist_node *tmp;
@@ -148,7 +148,7 @@ void enic_flow_may_expire(unsigned long data)
}
}
}
- spin_unlock(&enic->rfs_h.lock);
+ spin_unlock_bh(&enic->rfs_h.lock);
mod_timer(&enic->rfs_h.rfs_may_expire, jiffies + HZ/4);
}
@@ -183,7 +183,7 @@ int enic_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
return -EPROTONOSUPPORT;
tbl_idx = skb_get_hash_raw(skb) & ENIC_RFS_FLW_MASK;
- spin_lock(&enic->rfs_h.lock);
+ spin_lock_bh(&enic->rfs_h.lock);
n = htbl_key_search(&enic->rfs_h.ht_head[tbl_idx], &keys);
if (n) { /* entry already present */
@@ -277,7 +277,7 @@ int enic_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
}
ret_unlock:
- spin_unlock(&enic->rfs_h.lock);
+ spin_unlock_bh(&enic->rfs_h.lock);
return res;
}
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index 929bfe70080a..180e53fa628f 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -1674,13 +1674,13 @@ static int enic_stop(struct net_device *netdev)
enic_dev_disable(enic);
- local_bh_disable();
for (i = 0; i < enic->rq_count; i++) {
napi_disable(&enic->napi[i]);
+ local_bh_disable();
while (!enic_poll_lock_napi(&enic->rq[i]))
mdelay(1);
+ local_bh_enable();
}
- local_bh_enable();
netif_carrier_off(netdev);
netif_tx_disable(netdev);
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 81b96cf87574..50a851db2852 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1581,7 +1581,8 @@ fec_enet_interrupt(int irq, void *dev_id)
complete(&fep->mdio_done);
}
- fec_ptp_check_pps_event(fep);
+ if (fep->ptp_clock)
+ fec_ptp_check_pps_event(fep);
return ret;
}
diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c
index 3d4e08be1709..b34214e2df5f 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c
@@ -341,6 +341,9 @@ static void restart(struct net_device *dev)
FC(fecp, x_cntrl, FEC_TCNTRL_FDEN); /* FD disable */
}
+ /* Restore multicast and promiscuous settings */
+ set_multicast_list(dev);
+
/*
* Enable interrupts we wish to service.
*/
diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-scc.c b/drivers/net/ethernet/freescale/fs_enet/mac-scc.c
index f30411f0701f..7a184e8816a4 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mac-scc.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mac-scc.c
@@ -355,6 +355,9 @@ static void restart(struct net_device *dev)
if (fep->phydev->duplex)
S16(sccp, scc_psmr, SCC_PSMR_LPB | SCC_PSMR_FDE);
+ /* Restore multicast and promiscuous settings */
+ set_multicast_list(dev);
+
S32(sccp, scc_gsmrl, SCC_GSMRL_ENR | SCC_GSMRL_ENT);
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index ed5f1c15fb0f..c3a7f4a4b775 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -6151,7 +6151,7 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
I40E_GL_MDET_TX_PF_NUM_SHIFT;
u8 vf_num = (reg & I40E_GL_MDET_TX_VF_NUM_MASK) >>
I40E_GL_MDET_TX_VF_NUM_SHIFT;
- u8 event = (reg & I40E_GL_MDET_TX_EVENT_SHIFT) >>
+ u8 event = (reg & I40E_GL_MDET_TX_EVENT_MASK) >>
I40E_GL_MDET_TX_EVENT_SHIFT;
u8 queue = (reg & I40E_GL_MDET_TX_QUEUE_MASK) >>
I40E_GL_MDET_TX_QUEUE_SHIFT;
@@ -6165,7 +6165,7 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
if (reg & I40E_GL_MDET_RX_VALID_MASK) {
u8 func = (reg & I40E_GL_MDET_RX_FUNCTION_MASK) >>
I40E_GL_MDET_RX_FUNCTION_SHIFT;
- u8 event = (reg & I40E_GL_MDET_RX_EVENT_SHIFT) >>
+ u8 event = (reg & I40E_GL_MDET_RX_EVENT_MASK) >>
I40E_GL_MDET_RX_EVENT_SHIFT;
u8 queue = (reg & I40E_GL_MDET_RX_QUEUE_MASK) >>
I40E_GL_MDET_RX_QUEUE_SHIFT;
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index a49c9d11d8a5..49290a405903 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -1026,6 +1026,7 @@ static void mlx4_free_eq(struct mlx4_dev *dev,
pr_cont("\n");
}
}
+ synchronize_irq(eq->irq);
mlx4_mtt_cleanup(dev, &eq->mtt);
for (i = 0; i < npages; ++i)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index ed53291468f3..a278238a2db6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -420,6 +420,7 @@ int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
if (err)
mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
eq->eqn);
+ synchronize_irq(table->msix_arr[eq->irqn].vector);
mlx5_buf_free(dev, &eq->buf);
return err;
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index ee84a90e371c..aaf2987512b5 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -343,8 +343,6 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
unsigned short dma_flags;
int i = 0;
- EFX_BUG_ON_PARANOID(tx_queue->write_count > tx_queue->insert_count);
-
if (skb_shinfo(skb)->gso_size)
return efx_enqueue_skb_tso(tx_queue, skb);
@@ -1258,8 +1256,6 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
/* Find the packet protocol and sanity-check it */
state.protocol = efx_tso_check_protocol(skb);
- EFX_BUG_ON_PARANOID(tx_queue->write_count > tx_queue->insert_count);
-
rc = tso_start(&state, efx, skb);
if (rc)
goto mem_err;
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 9e17d1a91e71..78ec33f5100b 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -550,6 +550,7 @@ do_lso:
do_send:
/* Start filling in the page buffers with the rndis hdr */
rndis_msg->msg_len += rndis_msg_size;
+ packet->total_data_buflen = rndis_msg->msg_len;
packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size,
skb, &packet->page_buf[0]);
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 29b3bb410781..bfb0b6ec8c56 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -272,7 +272,7 @@ static void macvlan_process_broadcast(struct work_struct *w)
struct sk_buff *skb;
struct sk_buff_head list;
- skb_queue_head_init(&list);
+ __skb_queue_head_init(&list);
spin_lock_bh(&port->bc_queue.lock);
skb_queue_splice_tail_init(&port->bc_queue, &list);
@@ -1082,9 +1082,15 @@ static void macvlan_port_destroy(struct net_device *dev)
{
struct macvlan_port *port = macvlan_port_get_rtnl(dev);
- cancel_work_sync(&port->bc_work);
dev->priv_flags &= ~IFF_MACVLAN_PORT;
netdev_rx_handler_unregister(dev);
+
+ /* After this point, no packet can schedule bc_work anymore,
+ * but we need to cancel it and purge left skbs if any.
+ */
+ cancel_work_sync(&port->bc_work);
+ __skb_queue_purge(&port->bc_queue);
+
kfree_rcu(port, rcu);
}
diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index be4275721039..e6338c16081a 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -937,6 +937,7 @@ static int ax88179_set_mac_addr(struct net_device *net, void *p)
{
struct usbnet *dev = netdev_priv(net);
struct sockaddr *addr = p;
+ int ret;
if (netif_running(net))
return -EBUSY;
@@ -946,8 +947,12 @@ static int ax88179_set_mac_addr(struct net_device *net, void *p)
memcpy(net->dev_addr, addr->sa_data, ETH_ALEN);
/* Set the MAC address */
- return ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN,
+ ret = ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN,
ETH_ALEN, net->dev_addr);
+ if (ret < 0)
+ return ret;
+
+ return 0;
}
static const struct net_device_ops ax88179_netdev_ops = {
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index d4eb8d2e9cb7..083ecc93fe5e 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -176,10 +176,11 @@ struct xenvif_queue { /* Per-queue data for xenvif */
char rx_irq_name[IRQ_NAME_SIZE]; /* DEVNAME-qN-rx */
struct xen_netif_rx_back_ring rx;
struct sk_buff_head rx_queue;
- RING_IDX rx_last_skb_slots;
- unsigned long status;
- struct timer_list rx_stalled;
+ unsigned int rx_queue_max;
+ unsigned int rx_queue_len;
+ unsigned long last_rx_time;
+ bool stalled;
struct gnttab_copy grant_copy_op[MAX_GRANT_COPY_OPS];
@@ -199,18 +200,14 @@ struct xenvif_queue { /* Per-queue data for xenvif */
struct xenvif_stats stats;
};
+/* Maximum number of Rx slots a to-guest packet may use, including the
+ * slot needed for GSO meta-data.
+ */
+#define XEN_NETBK_RX_SLOTS_MAX (MAX_SKB_FRAGS + 1)
+
enum state_bit_shift {
/* This bit marks that the vif is connected */
VIF_STATUS_CONNECTED,
- /* This bit signals the RX thread that queuing was stopped (in
- * start_xmit), and either the timer fired or an RX interrupt came
- */
- QUEUE_STATUS_RX_PURGE_EVENT,
- /* This bit tells the interrupt handler that this queue was the reason
- * for the carrier off, so it should kick the thread. Only queues which
- * brought it down can turn on the carrier.
- */
- QUEUE_STATUS_RX_STALLED
};
struct xenvif {
@@ -228,9 +225,6 @@ struct xenvif {
u8 ip_csum:1;
u8 ipv6_csum:1;
- /* Internal feature information. */
- u8 can_queue:1; /* can queue packets for receiver? */
-
/* Is this interface disabled? True when backend discovers
* frontend is rogue.
*/
@@ -240,6 +234,9 @@ struct xenvif {
/* Queues */
struct xenvif_queue *queues;
unsigned int num_queues; /* active queues, resource allocated */
+ unsigned int stalled_queues;
+
+ spinlock_t lock;
#ifdef CONFIG_DEBUG_FS
struct dentry *xenvif_dbg_root;
@@ -249,6 +246,14 @@ struct xenvif {
struct net_device *dev;
};
+struct xenvif_rx_cb {
+ unsigned long expires;
+ int meta_slots_used;
+ bool full_coalesce;
+};
+
+#define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb)
+
static inline struct xenbus_device *xenvif_to_xenbus_device(struct xenvif *vif)
{
return to_xenbus_device(vif->dev->dev.parent);
@@ -272,8 +277,6 @@ void xenvif_xenbus_fini(void);
int xenvif_schedulable(struct xenvif *vif);
-int xenvif_must_stop_queue(struct xenvif_queue *queue);
-
int xenvif_queue_stopped(struct xenvif_queue *queue);
void xenvif_wake_queue(struct xenvif_queue *queue);
@@ -296,6 +299,8 @@ void xenvif_kick_thread(struct xenvif_queue *queue);
int xenvif_dealloc_kthread(void *data);
+void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb);
+
/* Determine whether the needed number of slots (req) are available,
* and set req_event if not.
*/
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index f379689dde30..895fe84011e7 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -43,6 +43,9 @@
#define XENVIF_QUEUE_LENGTH 32
#define XENVIF_NAPI_WEIGHT 64
+/* Number of bytes allowed on the internal guest Rx queue. */
+#define XENVIF_RX_QUEUE_BYTES (XEN_NETIF_RX_RING_SIZE/2 * PAGE_SIZE)
+
/* This function is used to set SKBTX_DEV_ZEROCOPY as well as
* increasing the inflight counter. We need to increase the inflight
* counter because core driver calls into xenvif_zerocopy_callback
@@ -60,20 +63,11 @@ void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue)
atomic_dec(&queue->inflight_packets);
}
-static inline void xenvif_stop_queue(struct xenvif_queue *queue)
-{
- struct net_device *dev = queue->vif->dev;
-
- if (!queue->vif->can_queue)
- return;
-
- netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));
-}
-
int xenvif_schedulable(struct xenvif *vif)
{
return netif_running(vif->dev) &&
- test_bit(VIF_STATUS_CONNECTED, &vif->status);
+ test_bit(VIF_STATUS_CONNECTED, &vif->status) &&
+ !vif->disabled;
}
static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id)
@@ -114,16 +108,7 @@ int xenvif_poll(struct napi_struct *napi, int budget)
static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id)
{
struct xenvif_queue *queue = dev_id;
- struct netdev_queue *net_queue =
- netdev_get_tx_queue(queue->vif->dev, queue->id);
- /* QUEUE_STATUS_RX_PURGE_EVENT is only set if either QDisc was off OR
- * the carrier went down and this queue was previously blocked
- */
- if (unlikely(netif_tx_queue_stopped(net_queue) ||
- (!netif_carrier_ok(queue->vif->dev) &&
- test_bit(QUEUE_STATUS_RX_STALLED, &queue->status))))
- set_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status);
xenvif_kick_thread(queue);
return IRQ_HANDLED;
@@ -151,24 +136,13 @@ void xenvif_wake_queue(struct xenvif_queue *queue)
netif_tx_wake_queue(netdev_get_tx_queue(dev, id));
}
-/* Callback to wake the queue's thread and turn the carrier off on timeout */
-static void xenvif_rx_stalled(unsigned long data)
-{
- struct xenvif_queue *queue = (struct xenvif_queue *)data;
-
- if (xenvif_queue_stopped(queue)) {
- set_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status);
- xenvif_kick_thread(queue);
- }
-}
-
static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct xenvif *vif = netdev_priv(dev);
struct xenvif_queue *queue = NULL;
unsigned int num_queues = vif->num_queues;
u16 index;
- int min_slots_needed;
+ struct xenvif_rx_cb *cb;
BUG_ON(skb->dev != dev);
@@ -191,30 +165,10 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
!xenvif_schedulable(vif))
goto drop;
- /* At best we'll need one slot for the header and one for each
- * frag.
- */
- min_slots_needed = 1 + skb_shinfo(skb)->nr_frags;
-
- /* If the skb is GSO then we'll also need an extra slot for the
- * metadata.
- */
- if (skb_is_gso(skb))
- min_slots_needed++;
+ cb = XENVIF_RX_CB(skb);
+ cb->expires = jiffies + rx_drain_timeout_jiffies;
- /* If the skb can't possibly fit in the remaining slots
- * then turn off the queue to give the ring a chance to
- * drain.
- */
- if (!xenvif_rx_ring_slots_available(queue, min_slots_needed)) {
- queue->rx_stalled.function = xenvif_rx_stalled;
- queue->rx_stalled.data = (unsigned long)queue;
- xenvif_stop_queue(queue);
- mod_timer(&queue->rx_stalled,
- jiffies + rx_drain_timeout_jiffies);
- }
-
- skb_queue_tail(&queue->rx_queue, skb);
+ xenvif_rx_queue_tail(queue, skb);
xenvif_kick_thread(queue);
return NETDEV_TX_OK;
@@ -465,6 +419,8 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
vif->queues = NULL;
vif->num_queues = 0;
+ spin_lock_init(&vif->lock);
+
dev->netdev_ops = &xenvif_netdev_ops;
dev->hw_features = NETIF_F_SG |
NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
@@ -508,6 +464,8 @@ int xenvif_init_queue(struct xenvif_queue *queue)
init_timer(&queue->credit_timeout);
queue->credit_window_start = get_jiffies_64();
+ queue->rx_queue_max = XENVIF_RX_QUEUE_BYTES;
+
skb_queue_head_init(&queue->rx_queue);
skb_queue_head_init(&queue->tx_queue);
@@ -539,8 +497,6 @@ int xenvif_init_queue(struct xenvif_queue *queue)
queue->grant_tx_handle[i] = NETBACK_INVALID_HANDLE;
}
- init_timer(&queue->rx_stalled);
-
return 0;
}
@@ -551,7 +507,6 @@ void xenvif_carrier_on(struct xenvif *vif)
dev_set_mtu(vif->dev, ETH_DATA_LEN);
netdev_update_features(vif->dev);
set_bit(VIF_STATUS_CONNECTED, &vif->status);
- netif_carrier_on(vif->dev);
if (netif_running(vif->dev))
xenvif_up(vif);
rtnl_unlock();
@@ -611,6 +566,8 @@ int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref,
disable_irq(queue->rx_irq);
}
+ queue->stalled = true;
+
task = kthread_create(xenvif_kthread_guest_rx,
(void *)queue, "%s-guest-rx", queue->name);
if (IS_ERR(task)) {
@@ -674,7 +631,6 @@ void xenvif_disconnect(struct xenvif *vif)
netif_napi_del(&queue->napi);
if (queue->task) {
- del_timer_sync(&queue->rx_stalled);
kthread_stop(queue->task);
queue->task = NULL;
}
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 08f65996534c..6563f0713fc0 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -55,13 +55,20 @@
bool separate_tx_rx_irq = 1;
module_param(separate_tx_rx_irq, bool, 0644);
-/* When guest ring is filled up, qdisc queues the packets for us, but we have
- * to timeout them, otherwise other guests' packets can get stuck there
+/* The time that packets can stay on the guest Rx internal queue
+ * before they are dropped.
*/
unsigned int rx_drain_timeout_msecs = 10000;
module_param(rx_drain_timeout_msecs, uint, 0444);
unsigned int rx_drain_timeout_jiffies;
+/* The length of time before the frontend is considered unresponsive
+ * because it isn't providing Rx slots.
+ */
+static unsigned int rx_stall_timeout_msecs = 60000;
+module_param(rx_stall_timeout_msecs, uint, 0444);
+static unsigned int rx_stall_timeout_jiffies;
+
unsigned int xenvif_max_queues;
module_param_named(max_queues, xenvif_max_queues, uint, 0644);
MODULE_PARM_DESC(max_queues,
@@ -83,7 +90,6 @@ static void make_tx_response(struct xenvif_queue *queue,
s8 st);
static inline int tx_work_todo(struct xenvif_queue *queue);
-static inline int rx_work_todo(struct xenvif_queue *queue);
static struct xen_netif_rx_response *make_rx_response(struct xenvif_queue *queue,
u16 id,
@@ -163,6 +169,69 @@ bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue, int needed)
return false;
}
+void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&queue->rx_queue.lock, flags);
+
+ __skb_queue_tail(&queue->rx_queue, skb);
+
+ queue->rx_queue_len += skb->len;
+ if (queue->rx_queue_len > queue->rx_queue_max)
+ netif_tx_stop_queue(netdev_get_tx_queue(queue->vif->dev, queue->id));
+
+ spin_unlock_irqrestore(&queue->rx_queue.lock, flags);
+}
+
+static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue)
+{
+ struct sk_buff *skb;
+
+ spin_lock_irq(&queue->rx_queue.lock);
+
+ skb = __skb_dequeue(&queue->rx_queue);
+ if (skb)
+ queue->rx_queue_len -= skb->len;
+
+ spin_unlock_irq(&queue->rx_queue.lock);
+
+ return skb;
+}
+
+static void xenvif_rx_queue_maybe_wake(struct xenvif_queue *queue)
+{
+ spin_lock_irq(&queue->rx_queue.lock);
+
+ if (queue->rx_queue_len < queue->rx_queue_max)
+ netif_tx_wake_queue(netdev_get_tx_queue(queue->vif->dev, queue->id));
+
+ spin_unlock_irq(&queue->rx_queue.lock);
+}
+
+
+static void xenvif_rx_queue_purge(struct xenvif_queue *queue)
+{
+ struct sk_buff *skb;
+ while ((skb = xenvif_rx_dequeue(queue)) != NULL)
+ kfree_skb(skb);
+}
+
+static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue)
+{
+ struct sk_buff *skb;
+
+ for(;;) {
+ skb = skb_peek(&queue->rx_queue);
+ if (!skb)
+ break;
+ if (time_before(jiffies, XENVIF_RX_CB(skb)->expires))
+ break;
+ xenvif_rx_dequeue(queue);
+ kfree_skb(skb);
+ }
+}
+
/*
* Returns true if we should start a new receive buffer instead of
* adding 'size' bytes to a buffer which currently contains 'offset'
@@ -237,13 +306,6 @@ static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif_queue *queue,
return meta;
}
-struct xenvif_rx_cb {
- int meta_slots_used;
- bool full_coalesce;
-};
-
-#define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb)
-
/*
* Set up the grant operations for this fragment. If it's a flipping
* interface, we also set up the unmap request from here.
@@ -587,12 +649,15 @@ static void xenvif_rx_action(struct xenvif_queue *queue)
skb_queue_head_init(&rxq);
- while ((skb = skb_dequeue(&queue->rx_queue)) != NULL) {
+ while (xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX)
+ && (skb = xenvif_rx_dequeue(queue)) != NULL) {
RING_IDX max_slots_needed;
RING_IDX old_req_cons;
RING_IDX ring_slots_used;
int i;
+ queue->last_rx_time = jiffies;
+
/* We need a cheap worse case estimate for the number of
* slots we'll use.
*/
@@ -634,15 +699,6 @@ static void xenvif_rx_action(struct xenvif_queue *queue)
skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6))
max_slots_needed++;
- /* If the skb may not fit then bail out now */
- if (!xenvif_rx_ring_slots_available(queue, max_slots_needed)) {
- skb_queue_head(&queue->rx_queue, skb);
- need_to_notify = true;
- queue->rx_last_skb_slots = max_slots_needed;
- break;
- } else
- queue->rx_last_skb_slots = 0;
-
old_req_cons = queue->rx.req_cons;
XENVIF_RX_CB(skb)->meta_slots_used = xenvif_gop_skb(skb, &npo, queue);
ring_slots_used = queue->rx.req_cons - old_req_cons;
@@ -1869,12 +1925,6 @@ void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
}
}
-static inline int rx_work_todo(struct xenvif_queue *queue)
-{
- return (!skb_queue_empty(&queue->rx_queue) &&
- xenvif_rx_ring_slots_available(queue, queue->rx_last_skb_slots));
-}
-
static inline int tx_work_todo(struct xenvif_queue *queue)
{
if (likely(RING_HAS_UNCONSUMED_REQUESTS(&queue->tx)))
@@ -1931,92 +1981,121 @@ err:
return err;
}
-static void xenvif_start_queue(struct xenvif_queue *queue)
+static void xenvif_queue_carrier_off(struct xenvif_queue *queue)
{
- if (xenvif_schedulable(queue->vif))
- xenvif_wake_queue(queue);
+ struct xenvif *vif = queue->vif;
+
+ queue->stalled = true;
+
+ /* At least one queue has stalled? Disable the carrier. */
+ spin_lock(&vif->lock);
+ if (vif->stalled_queues++ == 0) {
+ netdev_info(vif->dev, "Guest Rx stalled");
+ netif_carrier_off(vif->dev);
+ }
+ spin_unlock(&vif->lock);
}
-/* Only called from the queue's thread, it handles the situation when the guest
- * doesn't post enough requests on the receiving ring.
- * First xenvif_start_xmit disables QDisc and start a timer, and then either the
- * timer fires, or the guest send an interrupt after posting new request. If it
- * is the timer, the carrier is turned off here.
- * */
-static void xenvif_rx_purge_event(struct xenvif_queue *queue)
+static void xenvif_queue_carrier_on(struct xenvif_queue *queue)
{
- /* Either the last unsuccesful skb or at least 1 slot should fit */
- int needed = queue->rx_last_skb_slots ?
- queue->rx_last_skb_slots : 1;
+ struct xenvif *vif = queue->vif;
- /* It is assumed that if the guest post new slots after this, the RX
- * interrupt will set the QUEUE_STATUS_RX_PURGE_EVENT bit and wake up
- * the thread again
- */
- set_bit(QUEUE_STATUS_RX_STALLED, &queue->status);
- if (!xenvif_rx_ring_slots_available(queue, needed)) {
- rtnl_lock();
- if (netif_carrier_ok(queue->vif->dev)) {
- /* Timer fired and there are still no slots. Turn off
- * everything except the interrupts
- */
- netif_carrier_off(queue->vif->dev);
- skb_queue_purge(&queue->rx_queue);
- queue->rx_last_skb_slots = 0;
- if (net_ratelimit())
- netdev_err(queue->vif->dev, "Carrier off due to lack of guest response on queue %d\n", queue->id);
- } else {
- /* Probably an another queue already turned the carrier
- * off, make sure nothing is stucked in the internal
- * queue of this queue
- */
- skb_queue_purge(&queue->rx_queue);
- queue->rx_last_skb_slots = 0;
- }
- rtnl_unlock();
- } else if (!netif_carrier_ok(queue->vif->dev)) {
- unsigned int num_queues = queue->vif->num_queues;
- unsigned int i;
- /* The carrier was down, but an interrupt kicked
- * the thread again after new requests were
- * posted
- */
- clear_bit(QUEUE_STATUS_RX_STALLED,
- &queue->status);
- rtnl_lock();
- netif_carrier_on(queue->vif->dev);
- netif_tx_wake_all_queues(queue->vif->dev);
- rtnl_unlock();
+ queue->last_rx_time = jiffies; /* Reset Rx stall detection. */
+ queue->stalled = false;
- for (i = 0; i < num_queues; i++) {
- struct xenvif_queue *temp = &queue->vif->queues[i];
+ /* All queues are ready? Enable the carrier. */
+ spin_lock(&vif->lock);
+ if (--vif->stalled_queues == 0) {
+ netdev_info(vif->dev, "Guest Rx ready");
+ netif_carrier_on(vif->dev);
+ }
+ spin_unlock(&vif->lock);
+}
- xenvif_napi_schedule_or_enable_events(temp);
- }
- if (net_ratelimit())
- netdev_err(queue->vif->dev, "Carrier on again\n");
- } else {
- /* Queuing were stopped, but the guest posted
- * new requests and sent an interrupt
- */
- clear_bit(QUEUE_STATUS_RX_STALLED,
- &queue->status);
- del_timer_sync(&queue->rx_stalled);
- xenvif_start_queue(queue);
+static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue)
+{
+ RING_IDX prod, cons;
+
+ prod = queue->rx.sring->req_prod;
+ cons = queue->rx.req_cons;
+
+ return !queue->stalled
+ && prod - cons < XEN_NETBK_RX_SLOTS_MAX
+ && time_after(jiffies,
+ queue->last_rx_time + rx_stall_timeout_jiffies);
+}
+
+static bool xenvif_rx_queue_ready(struct xenvif_queue *queue)
+{
+ RING_IDX prod, cons;
+
+ prod = queue->rx.sring->req_prod;
+ cons = queue->rx.req_cons;
+
+ return queue->stalled
+ && prod - cons >= XEN_NETBK_RX_SLOTS_MAX;
+}
+
+static bool xenvif_have_rx_work(struct xenvif_queue *queue)
+{
+ return (!skb_queue_empty(&queue->rx_queue)
+ && xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX))
+ || xenvif_rx_queue_stalled(queue)
+ || xenvif_rx_queue_ready(queue)
+ || kthread_should_stop()
+ || queue->vif->disabled;
+}
+
+static long xenvif_rx_queue_timeout(struct xenvif_queue *queue)
+{
+ struct sk_buff *skb;
+ long timeout;
+
+ skb = skb_peek(&queue->rx_queue);
+ if (!skb)
+ return MAX_SCHEDULE_TIMEOUT;
+
+ timeout = XENVIF_RX_CB(skb)->expires - jiffies;
+ return timeout < 0 ? 0 : timeout;
+}
+
+/* Wait until the guest Rx thread has work.
+ *
+ * The timeout needs to be adjusted based on the current head of the
+ * queue (and not just the head at the beginning). In particular, if
+ * the queue is initially empty an infinite timeout is used and this
+ * needs to be reduced when a skb is queued.
+ *
+ * This cannot be done with wait_event_timeout() because it only
+ * calculates the timeout once.
+ */
+static void xenvif_wait_for_rx_work(struct xenvif_queue *queue)
+{
+ DEFINE_WAIT(wait);
+
+ if (xenvif_have_rx_work(queue))
+ return;
+
+ for (;;) {
+ long ret;
+
+ prepare_to_wait(&queue->wq, &wait, TASK_INTERRUPTIBLE);
+ if (xenvif_have_rx_work(queue))
+ break;
+ ret = schedule_timeout(xenvif_rx_queue_timeout(queue));
+ if (!ret)
+ break;
}
+ finish_wait(&queue->wq, &wait);
}
int xenvif_kthread_guest_rx(void *data)
{
struct xenvif_queue *queue = data;
- struct sk_buff *skb;
+ struct xenvif *vif = queue->vif;
- while (!kthread_should_stop()) {
- wait_event_interruptible(queue->wq,
- rx_work_todo(queue) ||
- queue->vif->disabled ||
- test_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status) ||
- kthread_should_stop());
+ for (;;) {
+ xenvif_wait_for_rx_work(queue);
if (kthread_should_stop())
break;
@@ -2028,35 +2107,38 @@ int xenvif_kthread_guest_rx(void *data)
* context so we defer it here, if this thread is
* associated with queue 0.
*/
- if (unlikely(queue->vif->disabled && queue->id == 0)) {
- xenvif_carrier_off(queue->vif);
- } else if (unlikely(queue->vif->disabled)) {
- /* kthread_stop() would be called upon this thread soon,
- * be a bit proactive
- */
- skb_queue_purge(&queue->rx_queue);
- queue->rx_last_skb_slots = 0;
- } else if (unlikely(test_and_clear_bit(QUEUE_STATUS_RX_PURGE_EVENT,
- &queue->status))) {
- xenvif_rx_purge_event(queue);
- } else if (!netif_carrier_ok(queue->vif->dev)) {
- /* Another queue stalled and turned the carrier off, so
- * purge the internal queue of queues which were not
- * blocked
- */
- skb_queue_purge(&queue->rx_queue);
- queue->rx_last_skb_slots = 0;
+ if (unlikely(vif->disabled && queue->id == 0)) {
+ xenvif_carrier_off(vif);
+ xenvif_rx_queue_purge(queue);
+ continue;
}
if (!skb_queue_empty(&queue->rx_queue))
xenvif_rx_action(queue);
+ /* If the guest hasn't provided any Rx slots for a
+ * while it's probably not responsive, drop the
+ * carrier so packets are dropped earlier.
+ */
+ if (xenvif_rx_queue_stalled(queue))
+ xenvif_queue_carrier_off(queue);
+ else if (xenvif_rx_queue_ready(queue))
+ xenvif_queue_carrier_on(queue);
+
+ /* Queued packets may have foreign pages from other
+ * domains. These cannot be queued indefinitely as
+ * this would starve guests of grant refs and transmit
+ * slots.
+ */
+ xenvif_rx_queue_drop_expired(queue);
+
+ xenvif_rx_queue_maybe_wake(queue);
+
cond_resched();
}
/* Bin any remaining skbs */
- while ((skb = skb_dequeue(&queue->rx_queue)) != NULL)
- dev_kfree_skb(skb);
+ xenvif_rx_queue_purge(queue);
return 0;
}
@@ -2113,6 +2195,7 @@ static int __init netback_init(void)
goto failed_init;
rx_drain_timeout_jiffies = msecs_to_jiffies(rx_drain_timeout_msecs);
+ rx_stall_timeout_jiffies = msecs_to_jiffies(rx_stall_timeout_msecs);
#ifdef CONFIG_DEBUG_FS
xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL);
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index 8079c31ac5e6..4e56a27f9689 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -52,6 +52,7 @@ static int xenvif_read_io_ring(struct seq_file *m, void *v)
struct xenvif_queue *queue = m->private;
struct xen_netif_tx_back_ring *tx_ring = &queue->tx;
struct xen_netif_rx_back_ring *rx_ring = &queue->rx;
+ struct netdev_queue *dev_queue;
if (tx_ring->sring) {
struct xen_netif_tx_sring *sring = tx_ring->sring;
@@ -112,6 +113,13 @@ static int xenvif_read_io_ring(struct seq_file *m, void *v)
queue->credit_timeout.expires,
jiffies);
+ dev_queue = netdev_get_tx_queue(queue->vif->dev, queue->id);
+
+ seq_printf(m, "\nRx internal queue: len %u max %u pkts %u %s\n",
+ queue->rx_queue_len, queue->rx_queue_max,
+ skb_queue_len(&queue->rx_queue),
+ netif_tx_queue_stopped(dev_queue) ? "stopped" : "running");
+
return 0;
}
@@ -703,6 +711,7 @@ static void connect(struct backend_info *be)
be->vif->queues = vzalloc(requested_num_queues *
sizeof(struct xenvif_queue));
be->vif->num_queues = requested_num_queues;
+ be->vif->stalled_queues = requested_num_queues;
for (queue_index = 0; queue_index < requested_num_queues; ++queue_index) {
queue = &be->vif->queues[queue_index];
@@ -873,15 +882,10 @@ static int read_xenbus_vif_flags(struct backend_info *be)
if (!rx_copy)
return -EOPNOTSUPP;
- if (vif->dev->tx_queue_len != 0) {
- if (xenbus_scanf(XBT_NIL, dev->otherend,
- "feature-rx-notify", "%d", &val) < 0)
- val = 0;
- if (val)
- vif->can_queue = 1;
- else
- /* Must be non-zero for pfifo_fast to work. */
- vif->dev->tx_queue_len = 1;
+ if (xenbus_scanf(XBT_NIL, dev->otherend,
+ "feature-rx-notify", "%d", &val) < 0 || val == 0) {
+ xenbus_dev_fatal(dev, -EINVAL, "feature-rx-notify is mandatory");
+ return -EINVAL;
}
if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg",
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 3d7292392fac..845c596bf594 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -530,6 +530,9 @@ enum nft_chain_type {
NFT_CHAIN_T_MAX
};
+int nft_chain_validate_dependency(const struct nft_chain *chain,
+ enum nft_chain_type type);
+
struct nft_stats {
u64 bytes;
u64 pkts;
diff --git a/include/net/netfilter/nft_masq.h b/include/net/netfilter/nft_masq.h
index c72729f954f4..e2a518b60e19 100644
--- a/include/net/netfilter/nft_masq.h
+++ b/include/net/netfilter/nft_masq.h
@@ -13,4 +13,7 @@ int nft_masq_init(const struct nft_ctx *ctx,
int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr);
+int nft_masq_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nft_data **data);
+
#endif /* _NFT_MASQ_H_ */
diff --git a/init/Kconfig b/init/Kconfig
index 3ee28ae02cc8..2081a4d3d917 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1341,6 +1341,10 @@ config SYSCTL_ARCH_UNALIGN_ALLOW
config HAVE_PCSPKR_PLATFORM
bool
+# interpreter that classic socket filters depend on
+config BPF
+ bool
+
menuconfig EXPERT
bool "Configure standard kernel features (expert users)"
# Unhide debug options, to make the on-by-default options visible
@@ -1521,6 +1525,16 @@ config EVENTFD
If unsure, say Y.
+# syscall, maps, verifier
+config BPF_SYSCALL
+ bool "Enable bpf() system call" if EXPERT
+ select ANON_INODES
+ select BPF
+ default n
+ help
+ Enable the bpf() system call that allows to manipulate eBPF
+ programs and maps via file descriptors.
+
config SHMEM
bool "Use full shmem filesystem" if EXPERT
default y
diff --git a/kernel/Makefile b/kernel/Makefile
index dc5c77544fd6..17ea6d4a9a24 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -86,7 +86,7 @@ obj-$(CONFIG_RING_BUFFER) += trace/
obj-$(CONFIG_TRACEPOINTS) += trace/
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-$(CONFIG_CPU_PM) += cpu_pm.o
-obj-$(CONFIG_NET) += bpf/
+obj-$(CONFIG_BPF) += bpf/
obj-$(CONFIG_PERF_EVENTS) += events/
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 45427239f375..0daf7f6ae7df 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -1,5 +1,5 @@
-obj-y := core.o syscall.o verifier.o
-
+obj-y := core.o
+obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o
ifdef CONFIG_TEST_BPF
-obj-y += test_stub.o
+obj-$(CONFIG_BPF_SYSCALL) += test_stub.o
endif
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index f0c30c59b317..d6594e457a25 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -655,3 +655,12 @@ void bpf_prog_free(struct bpf_prog *fp)
schedule_work(&aux->work);
}
EXPORT_SYMBOL_GPL(bpf_prog_free);
+
+/* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call
+ * skb_copy_bits(), so provide a weak definition of it for NET-less config.
+ */
+int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
+ int len)
+{
+ return -EFAULT;
+}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 801f5f3b9307..9f81818f2941 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1409,7 +1409,8 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur)
if (memcmp(&old->regs[i], &cur->regs[i],
sizeof(old->regs[0])) != 0) {
if (old->regs[i].type == NOT_INIT ||
- old->regs[i].type == UNKNOWN_VALUE)
+ (old->regs[i].type == UNKNOWN_VALUE &&
+ cur->regs[i].type != NOT_INIT))
continue;
return false;
}
diff --git a/net/Kconfig b/net/Kconfig
index 6272420a721b..99815b5454bf 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -6,7 +6,7 @@ menuconfig NET
bool "Networking support"
select NLATTR
select GENERIC_NET_UTILS
- select ANON_INODES
+ select BPF
---help---
Unless you really know what you are doing, you should say Y here.
The reason is that some programs need kernel networking support even
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 1bada53bb195..1a4f32c09ad5 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -192,7 +192,6 @@ static inline void nf_bridge_save_header(struct sk_buff *skb)
static int br_parse_ip_options(struct sk_buff *skb)
{
- struct ip_options *opt;
const struct iphdr *iph;
struct net_device *dev = skb->dev;
u32 len;
@@ -201,7 +200,6 @@ static int br_parse_ip_options(struct sk_buff *skb)
goto inhdr_error;
iph = ip_hdr(skb);
- opt = &(IPCB(skb)->opt);
/* Basic sanity checks */
if (iph->ihl < 5 || iph->version != 4)
@@ -227,23 +225,11 @@ static int br_parse_ip_options(struct sk_buff *skb)
}
memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
- if (iph->ihl == 5)
- return 0;
-
- opt->optlen = iph->ihl*4 - sizeof(struct iphdr);
- if (ip_options_compile(dev_net(dev), opt, skb))
- goto inhdr_error;
-
- /* Check correct handling of SRR option */
- if (unlikely(opt->srr)) {
- struct in_device *in_dev = __in_dev_get_rcu(dev);
- if (in_dev && !IN_DEV_SOURCE_ROUTE(in_dev))
- goto drop;
-
- if (ip_options_rcv_srr(skb))
- goto drop;
- }
-
+ /* We should really parse IP options here but until
+ * somebody who actually uses IP options complains to
+ * us we'll just silently ignore the options because
+ * we're lazy!
+ */
return 0;
inhdr_error:
diff --git a/net/core/dev.c b/net/core/dev.c
index b793e3521a36..945bbd001359 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4157,6 +4157,10 @@ EXPORT_SYMBOL(napi_gro_receive);
static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
{
+ if (unlikely(skb->pfmemalloc)) {
+ consume_skb(skb);
+ return;
+ }
__skb_pull(skb, skb_headlen(skb));
/* restore the reserve we had after netdev_alloc_skb_ip_align() */
skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 61059a05ec95..c16615bfb61e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4070,15 +4070,22 @@ EXPORT_SYMBOL_GPL(skb_scrub_packet);
unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
{
const struct skb_shared_info *shinfo = skb_shinfo(skb);
+ unsigned int thlen = 0;
- if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
- return tcp_hdrlen(skb) + shinfo->gso_size;
+ if (skb->encapsulation) {
+ thlen = skb_inner_transport_header(skb) -
+ skb_transport_header(skb);
+ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
+ thlen += inner_tcp_hdrlen(skb);
+ } else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
+ thlen = tcp_hdrlen(skb);
+ }
/* UFO sets gso_size to the size of the fragmentation
* payload, i.e. the size of the L4 (UDP) header is already
* accounted for.
*/
- return shinfo->gso_size;
+ return thlen + shinfo->gso_size;
}
EXPORT_SYMBOL_GPL(skb_gso_transport_seglen);
diff --git a/net/core/tso.c b/net/core/tso.c
index 8c3203c585b0..630b30b4fb53 100644
--- a/net/core/tso.c
+++ b/net/core/tso.c
@@ -1,6 +1,7 @@
#include <linux/export.h>
#include <net/ip.h>
#include <net/tso.h>
+#include <asm/unaligned.h>
/* Calculate expected number of TX descriptors */
int tso_count_descs(struct sk_buff *skb)
@@ -23,7 +24,7 @@ void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso,
iph->id = htons(tso->ip_id);
iph->tot_len = htons(size + hdr_len - mac_hdr_len);
tcph = (struct tcphdr *)(hdr + skb_transport_offset(skb));
- tcph->seq = htonl(tso->tcp_seq);
+ put_unaligned_be32(tso->tcp_seq, &tcph->seq);
tso->ip_id++;
if (!is_last) {
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 22f34cf4cb27..6317b41c99b0 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -174,8 +174,11 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
dst->rcv = brcm_netdev_ops.rcv;
break;
#endif
- default:
+ case DSA_TAG_PROTO_NONE:
break;
+ default:
+ ret = -ENOPROTOOPT;
+ goto out;
}
dst->tag_protocol = drv->tag_protocol;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 92db7a69f2b9..8b7fe5b03906 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1246,7 +1246,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
encap = SKB_GSO_CB(skb)->encap_level > 0;
if (encap)
- features = skb->dev->hw_enc_features & netif_skb_features(skb);
+ features &= skb->dev->hw_enc_features;
SKB_GSO_CB(skb)->encap_level += ihl;
skb_reset_transport_header(skb);
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index ccda09628de7..f6e345c0bc23 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -68,7 +68,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
skb->mac_len = skb_inner_network_offset(skb);
/* segment inner packet. */
- enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
+ enc_features = skb->dev->hw_enc_features & features;
segs = skb_mac_gso_segment(skb, enc_features);
if (IS_ERR_OR_NULL(segs)) {
skb_gso_error_unwind(skb, protocol, ghl, mac_offset, mac_len);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 88e5ef2c7f51..bc6471d4abcd 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -231,7 +231,7 @@ static int ip_finish_output_gso(struct sk_buff *skb)
*/
features = netif_skb_features(skb);
segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
- if (IS_ERR(segs)) {
+ if (IS_ERR_OR_NULL(segs)) {
kfree_skb(skb);
return -ENOMEM;
}
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index b023b4eb1a96..92b303dbd5fc 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -6,6 +6,7 @@
* published by the Free Software Foundation.
*/
+#include <linux/module.h>
#include <net/ip.h>
#include <net/tcp.h>
#include <net/route.h>
@@ -125,3 +126,5 @@ void nf_send_reset(struct sk_buff *oldskb, int hook)
kfree_skb(nskb);
}
EXPORT_SYMBOL_GPL(nf_send_reset);
+
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
index 1c636d6b5b50..c1023c445920 100644
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -39,6 +39,7 @@ static const struct nft_expr_ops nft_masq_ipv4_ops = {
.eval = nft_masq_ipv4_eval,
.init = nft_masq_init,
.dump = nft_masq_dump,
+ .validate = nft_masq_validate,
};
static struct nft_expr_type nft_masq_ipv4_type __read_mostly = {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1bec4e76d88c..39ec0c379545 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2868,61 +2868,42 @@ EXPORT_SYMBOL(compat_tcp_getsockopt);
#endif
#ifdef CONFIG_TCP_MD5SIG
-static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool __read_mostly;
+static DEFINE_PER_CPU(struct tcp_md5sig_pool, tcp_md5sig_pool);
static DEFINE_MUTEX(tcp_md5sig_mutex);
-
-static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool)
-{
- int cpu;
-
- for_each_possible_cpu(cpu) {
- struct tcp_md5sig_pool *p = per_cpu_ptr(pool, cpu);
-
- if (p->md5_desc.tfm)
- crypto_free_hash(p->md5_desc.tfm);
- }
- free_percpu(pool);
-}
+static bool tcp_md5sig_pool_populated = false;
static void __tcp_alloc_md5sig_pool(void)
{
int cpu;
- struct tcp_md5sig_pool __percpu *pool;
-
- pool = alloc_percpu(struct tcp_md5sig_pool);
- if (!pool)
- return;
for_each_possible_cpu(cpu) {
- struct crypto_hash *hash;
-
- hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR_OR_NULL(hash))
- goto out_free;
+ if (!per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm) {
+ struct crypto_hash *hash;
- per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash;
+ hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR_OR_NULL(hash))
+ return;
+ per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm = hash;
+ }
}
- /* before setting tcp_md5sig_pool, we must commit all writes
- * to memory. See ACCESS_ONCE() in tcp_get_md5sig_pool()
+ /* before setting tcp_md5sig_pool_populated, we must commit all writes
+ * to memory. See smp_rmb() in tcp_get_md5sig_pool()
*/
smp_wmb();
- tcp_md5sig_pool = pool;
- return;
-out_free:
- __tcp_free_md5sig_pool(pool);
+ tcp_md5sig_pool_populated = true;
}
bool tcp_alloc_md5sig_pool(void)
{
- if (unlikely(!tcp_md5sig_pool)) {
+ if (unlikely(!tcp_md5sig_pool_populated)) {
mutex_lock(&tcp_md5sig_mutex);
- if (!tcp_md5sig_pool)
+ if (!tcp_md5sig_pool_populated)
__tcp_alloc_md5sig_pool();
mutex_unlock(&tcp_md5sig_mutex);
}
- return tcp_md5sig_pool != NULL;
+ return tcp_md5sig_pool_populated;
}
EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
@@ -2936,13 +2917,13 @@ EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
*/
struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
{
- struct tcp_md5sig_pool __percpu *p;
-
local_bh_disable();
- p = ACCESS_ONCE(tcp_md5sig_pool);
- if (p)
- return raw_cpu_ptr(p);
+ if (tcp_md5sig_pool_populated) {
+ /* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */
+ smp_rmb();
+ return this_cpu_ptr(&tcp_md5sig_pool);
+ }
local_bh_enable();
return NULL;
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 94d1a7757ff7..9c7d7621466b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -206,8 +206,6 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_dport = usin->sin_port;
inet->inet_daddr = daddr;
- inet_set_txhash(sk);
-
inet_csk(sk)->icsk_ext_hdr_len = 0;
if (inet_opt)
inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
@@ -224,6 +222,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (err)
goto failure;
+ inet_set_txhash(sk);
+
rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
inet->inet_sport, inet->inet_dport, sk);
if (IS_ERR(rt)) {
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 507310ef4b56..6480cea7aa53 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -58,7 +58,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
skb->encap_hdr_csum = 1;
/* segment inner packet. */
- enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
+ enc_features = skb->dev->hw_enc_features & features;
segs = gso_inner_segment(skb, enc_features);
if (IS_ERR_OR_NULL(segs)) {
skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 91014d32488d..a071563a7e6e 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -90,7 +90,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
encap = SKB_GSO_CB(skb)->encap_level > 0;
if (encap)
- features = skb->dev->hw_enc_features & netif_skb_features(skb);
+ features &= skb->dev->hw_enc_features;
SKB_GSO_CB(skb)->encap_level += sizeof(*ipv6h);
ipv6h = ipv6_hdr(skb);
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index 5f5f0438d74d..20d9defc6c59 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -5,6 +5,8 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
+
+#include <linux/module.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/ip6_fib.h>
@@ -161,3 +163,5 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
ip6_local_out(nskb);
}
EXPORT_SYMBOL_GPL(nf_send_reset6);
+
+MODULE_LICENSE("GPL");
diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c
index 556262f40761..8a7ac685076d 100644
--- a/net/ipv6/netfilter/nft_masq_ipv6.c
+++ b/net/ipv6/netfilter/nft_masq_ipv6.c
@@ -39,6 +39,7 @@ static const struct nft_expr_ops nft_masq_ipv6_ops = {
.eval = nft_masq_ipv6_eval,
.init = nft_masq_init,
.dump = nft_masq_dump,
+ .validate = nft_masq_validate,
};
static struct nft_expr_type nft_masq_ipv6_type __read_mostly = {
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 831495529b82..ace29b60813c 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -200,8 +200,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
sk->sk_v6_daddr = usin->sin6_addr;
np->flow_label = fl6.flowlabel;
- ip6_set_txhash(sk);
-
/*
* TCP over IPv4
*/
@@ -297,6 +295,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (err)
goto late_failure;
+ ip6_set_txhash(sk);
+
if (!tp->write_seq && likely(!tp->repair))
tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
sk->sk_v6_daddr.s6_addr32,
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index ac49f84fe2c3..5f983644373a 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -170,8 +170,10 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
case IPPROTO_DCCP:
if (!onlyproto && (nh + offset + 4 < skb->data ||
pskb_may_pull(skb, nh + offset + 4 - skb->data))) {
- __be16 *ports = (__be16 *)exthdr;
+ __be16 *ports;
+ nh = skb_network_header(skb);
+ ports = (__be16 *)(nh + offset);
fl6->fl6_sport = ports[!!reverse];
fl6->fl6_dport = ports[!reverse];
}
@@ -180,8 +182,10 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
case IPPROTO_ICMPV6:
if (!onlyproto && pskb_may_pull(skb, nh + offset + 2 - skb->data)) {
- u8 *icmp = (u8 *)exthdr;
+ u8 *icmp;
+ nh = skb_network_header(skb);
+ icmp = (u8 *)(nh + offset);
fl6->fl6_icmp_type = icmp[0];
fl6->fl6_icmp_code = icmp[1];
}
@@ -192,8 +196,9 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
case IPPROTO_MH:
if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) {
struct ip6_mh *mh;
- mh = (struct ip6_mh *)exthdr;
+ nh = skb_network_header(skb);
+ mh = (struct ip6_mh *)(nh + offset);
fl6->fl6_mh_type = mh->ip6mh_type;
}
fl6->flowi6_proto = nexthdr;
diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c
index e28ed2ef5b06..f0f5309a2d72 100644
--- a/net/mpls/mpls_gso.c
+++ b/net/mpls/mpls_gso.c
@@ -48,7 +48,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb,
__skb_push(skb, skb->mac_len);
/* Segment inner packet. */
- mpls_features = skb->dev->mpls_features & netif_skb_features(skb);
+ mpls_features = skb->dev->mpls_features & features;
segs = skb_mac_gso_segment(skb, mpls_features);
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 912e5a05b79d..86f9d76b1464 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -659,7 +659,7 @@ ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index)
struct ip_set *set;
struct ip_set_net *inst = ip_set_pernet(net);
- if (index > inst->ip_set_max)
+ if (index >= inst->ip_set_max)
return IPSET_INVALID_ID;
nfnl_lock(NFNL_SUBSYS_IPSET);
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 44d1ea32570a..d87b6423ffb2 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -213,7 +213,7 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
{
/* REPLY */
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
-/*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sS2 },
+/*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sSS, sIV, sS2 },
/*
* sNO -> sIV Never reached.
* sSS -> sS2 Simultaneous open
@@ -223,7 +223,7 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
* sFW -> sIV
* sCW -> sIV
* sLA -> sIV
- * sTW -> sIV Reopened connection, but server may not do it.
+ * sTW -> sSS Reopened connection, but server may have switched role
* sCL -> sIV
*/
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 556a0dfa4abc..11ab4b078f3b 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1328,10 +1328,10 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
basechain->stats = stats;
} else {
stats = netdev_alloc_pcpu_stats(struct nft_stats);
- if (IS_ERR(stats)) {
+ if (stats == NULL) {
module_put(type->owner);
kfree(basechain);
- return PTR_ERR(stats);
+ return -ENOMEM;
}
rcu_assign_pointer(basechain->stats, stats);
}
@@ -3744,6 +3744,20 @@ static const struct nfnetlink_subsystem nf_tables_subsys = {
.abort = nf_tables_abort,
};
+int nft_chain_validate_dependency(const struct nft_chain *chain,
+ enum nft_chain_type type)
+{
+ const struct nft_base_chain *basechain;
+
+ if (chain->flags & NFT_BASE_CHAIN) {
+ basechain = nft_base_chain(chain);
+ if (basechain->type->type != type)
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nft_chain_validate_dependency);
+
/*
* Loop detection - walk through the ruleset beginning at the destination chain
* of a new jump until either the source chain is reached (loop) or all
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index b1e3a0579416..5f1be5ba3559 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -43,7 +43,8 @@
#define NFULNL_NLBUFSIZ_DEFAULT NLMSG_GOODSIZE
#define NFULNL_TIMEOUT_DEFAULT 100 /* every second */
#define NFULNL_QTHRESH_DEFAULT 100 /* 100 packets */
-#define NFULNL_COPY_RANGE_MAX 0xFFFF /* max packet size is limited by 16-bit struct nfattr nfa_len field */
+/* max packet size is limited by 16-bit struct nfattr nfa_len field */
+#define NFULNL_COPY_RANGE_MAX (0xFFFF - NLA_HDRLEN)
#define PRINTR(x, args...) do { if (net_ratelimit()) \
printk(x, ## args); } while (0);
@@ -252,6 +253,8 @@ nfulnl_set_mode(struct nfulnl_instance *inst, u_int8_t mode,
case NFULNL_COPY_PACKET:
inst->copy_mode = mode;
+ if (range == 0)
+ range = NFULNL_COPY_RANGE_MAX;
inst->copy_range = min_t(unsigned int,
range, NFULNL_COPY_RANGE_MAX);
break;
@@ -343,26 +346,25 @@ nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size,
return skb;
}
-static int
+static void
__nfulnl_send(struct nfulnl_instance *inst)
{
- int status = -1;
-
if (inst->qlen > 1) {
struct nlmsghdr *nlh = nlmsg_put(inst->skb, 0, 0,
NLMSG_DONE,
sizeof(struct nfgenmsg),
0);
- if (!nlh)
+ if (WARN_ONCE(!nlh, "bad nlskb size: %u, tailroom %d\n",
+ inst->skb->len, skb_tailroom(inst->skb))) {
+ kfree_skb(inst->skb);
goto out;
+ }
}
- status = nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid,
- MSG_DONTWAIT);
-
+ nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid,
+ MSG_DONTWAIT);
+out:
inst->qlen = 0;
inst->skb = NULL;
-out:
- return status;
}
static void
@@ -649,7 +651,8 @@ nfulnl_log_packet(struct net *net,
+ nla_total_size(sizeof(u_int32_t)) /* gid */
+ nla_total_size(plen) /* prefix */
+ nla_total_size(sizeof(struct nfulnl_msg_packet_hw))
- + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp));
+ + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp))
+ + nla_total_size(sizeof(struct nfgenmsg)); /* NLMSG_DONE */
if (in && skb_mac_header_was_set(skb)) {
size += nla_total_size(skb->dev->hard_header_len)
@@ -678,8 +681,7 @@ nfulnl_log_packet(struct net *net,
break;
case NFULNL_COPY_PACKET:
- if (inst->copy_range == 0
- || inst->copy_range > skb->len)
+ if (inst->copy_range > skb->len)
data_len = skb->len;
else
data_len = inst->copy_range;
@@ -692,8 +694,7 @@ nfulnl_log_packet(struct net *net,
goto unlock_and_release;
}
- if (inst->skb &&
- size > skb_tailroom(inst->skb) - sizeof(struct nfgenmsg)) {
+ if (inst->skb && size > skb_tailroom(inst->skb)) {
/* either the queue len is too high or we don't have
* enough room in the skb left. flush to userspace. */
__nfulnl_flush(inst);
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index a82077d9f59b..7c60ccd61a3e 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -665,7 +665,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
* returned by nf_queue. For instance, callers rely on -ECANCELED to
* mean 'ignore this hook'.
*/
- if (IS_ERR(segs))
+ if (IS_ERR_OR_NULL(segs))
goto out_err;
queued = 0;
err = 0;
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 7e2683c8a44a..9d6d6f60a80f 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -19,9 +19,52 @@
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
-#include <asm/uaccess.h> /* for set_fs */
#include <net/netfilter/nf_tables.h>
+static const struct {
+ const char *name;
+ u8 type;
+} table_to_chaintype[] = {
+ { "filter", NFT_CHAIN_T_DEFAULT },
+ { "raw", NFT_CHAIN_T_DEFAULT },
+ { "security", NFT_CHAIN_T_DEFAULT },
+ { "mangle", NFT_CHAIN_T_ROUTE },
+ { "nat", NFT_CHAIN_T_NAT },
+ { },
+};
+
+static int nft_compat_table_to_chaintype(const char *table)
+{
+ int i;
+
+ for (i = 0; table_to_chaintype[i].name != NULL; i++) {
+ if (strcmp(table_to_chaintype[i].name, table) == 0)
+ return table_to_chaintype[i].type;
+ }
+
+ return -1;
+}
+
+static int nft_compat_chain_validate_dependency(const char *tablename,
+ const struct nft_chain *chain)
+{
+ enum nft_chain_type type;
+ const struct nft_base_chain *basechain;
+
+ if (!tablename || !(chain->flags & NFT_BASE_CHAIN))
+ return 0;
+
+ type = nft_compat_table_to_chaintype(tablename);
+ if (type < 0)
+ return -EINVAL;
+
+ basechain = nft_base_chain(chain);
+ if (basechain->type->type != type)
+ return -EINVAL;
+
+ return 0;
+}
+
union nft_entry {
struct ipt_entry e4;
struct ip6t_entry e6;
@@ -95,6 +138,8 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par,
const struct nf_hook_ops *ops = &basechain->ops[0];
par->hook_mask = 1 << ops->hooknum;
+ } else {
+ par->hook_mask = 0;
}
par->family = ctx->afi->family;
}
@@ -151,6 +196,10 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
union nft_entry e = {};
int ret;
+ ret = nft_compat_chain_validate_dependency(target->table, ctx->chain);
+ if (ret < 0)
+ goto err;
+
target_compat_from_user(target, nla_data(tb[NFTA_TARGET_INFO]), info);
if (ctx->nla[NFTA_RULE_COMPAT]) {
@@ -216,6 +265,7 @@ static int nft_target_validate(const struct nft_ctx *ctx,
{
struct xt_target *target = expr->ops->data;
unsigned int hook_mask = 0;
+ int ret;
if (ctx->chain->flags & NFT_BASE_CHAIN) {
const struct nft_base_chain *basechain =
@@ -223,11 +273,13 @@ static int nft_target_validate(const struct nft_ctx *ctx,
const struct nf_hook_ops *ops = &basechain->ops[0];
hook_mask = 1 << ops->hooknum;
- if (hook_mask & target->hooks)
- return 0;
+ if (!(hook_mask & target->hooks))
+ return -EINVAL;
- /* This target is being called from an invalid chain */
- return -EINVAL;
+ ret = nft_compat_chain_validate_dependency(target->table,
+ ctx->chain);
+ if (ret < 0)
+ return ret;
}
return 0;
}
@@ -293,6 +345,8 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
const struct nf_hook_ops *ops = &basechain->ops[0];
par->hook_mask = 1 << ops->hooknum;
+ } else {
+ par->hook_mask = 0;
}
par->family = ctx->afi->family;
}
@@ -320,6 +374,10 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
union nft_entry e = {};
int ret;
+ ret = nft_compat_chain_validate_dependency(match->name, ctx->chain);
+ if (ret < 0)
+ goto err;
+
match_compat_from_user(match, nla_data(tb[NFTA_MATCH_INFO]), info);
if (ctx->nla[NFTA_RULE_COMPAT]) {
@@ -379,6 +437,7 @@ static int nft_match_validate(const struct nft_ctx *ctx,
{
struct xt_match *match = expr->ops->data;
unsigned int hook_mask = 0;
+ int ret;
if (ctx->chain->flags & NFT_BASE_CHAIN) {
const struct nft_base_chain *basechain =
@@ -386,11 +445,13 @@ static int nft_match_validate(const struct nft_ctx *ctx,
const struct nf_hook_ops *ops = &basechain->ops[0];
hook_mask = 1 << ops->hooknum;
- if (hook_mask & match->hooks)
- return 0;
+ if (!(hook_mask & match->hooks))
+ return -EINVAL;
- /* This match is being called from an invalid chain */
- return -EINVAL;
+ ret = nft_compat_chain_validate_dependency(match->name,
+ ctx->chain);
+ if (ret < 0)
+ return ret;
}
return 0;
}
@@ -611,7 +672,7 @@ nft_target_select_ops(const struct nft_ctx *ctx,
family = ctx->afi->family;
/* Re-use the existing target if it's already loaded. */
- list_for_each_entry(nft_target, &nft_match_list, head) {
+ list_for_each_entry(nft_target, &nft_target_list, head) {
struct xt_target *target = nft_target->ops.data;
if (strcmp(target->name, tg_name) == 0 &&
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index 6637bab00567..d1ffd5eb3a9b 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -26,6 +26,11 @@ int nft_masq_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_masq *priv = nft_expr_priv(expr);
+ int err;
+
+ err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
+ if (err < 0)
+ return err;
if (tb[NFTA_MASQ_FLAGS] == NULL)
return 0;
@@ -55,5 +60,12 @@ nla_put_failure:
}
EXPORT_SYMBOL_GPL(nft_masq_dump);
+int nft_masq_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ return nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
+}
+EXPORT_SYMBOL_GPL(nft_masq_validate);
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 799550b476fb..afe2b0b45ec4 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -95,7 +95,13 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
u32 family;
int err;
- if (tb[NFTA_NAT_TYPE] == NULL)
+ err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
+ if (err < 0)
+ return err;
+
+ if (tb[NFTA_NAT_TYPE] == NULL ||
+ (tb[NFTA_NAT_REG_ADDR_MIN] == NULL &&
+ tb[NFTA_NAT_REG_PROTO_MIN] == NULL))
return -EINVAL;
switch (ntohl(nla_get_be32(tb[NFTA_NAT_TYPE]))) {
@@ -120,38 +126,44 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
priv->family = family;
if (tb[NFTA_NAT_REG_ADDR_MIN]) {
- priv->sreg_addr_min = ntohl(nla_get_be32(
- tb[NFTA_NAT_REG_ADDR_MIN]));
+ priv->sreg_addr_min =
+ ntohl(nla_get_be32(tb[NFTA_NAT_REG_ADDR_MIN]));
+
err = nft_validate_input_register(priv->sreg_addr_min);
if (err < 0)
return err;
- }
- if (tb[NFTA_NAT_REG_ADDR_MAX]) {
- priv->sreg_addr_max = ntohl(nla_get_be32(
- tb[NFTA_NAT_REG_ADDR_MAX]));
- err = nft_validate_input_register(priv->sreg_addr_max);
- if (err < 0)
- return err;
- } else
- priv->sreg_addr_max = priv->sreg_addr_min;
+ if (tb[NFTA_NAT_REG_ADDR_MAX]) {
+ priv->sreg_addr_max =
+ ntohl(nla_get_be32(tb[NFTA_NAT_REG_ADDR_MAX]));
+
+ err = nft_validate_input_register(priv->sreg_addr_max);
+ if (err < 0)
+ return err;
+ } else {
+ priv->sreg_addr_max = priv->sreg_addr_min;
+ }
+ }
if (tb[NFTA_NAT_REG_PROTO_MIN]) {
- priv->sreg_proto_min = ntohl(nla_get_be32(
- tb[NFTA_NAT_REG_PROTO_MIN]));
+ priv->sreg_proto_min =
+ ntohl(nla_get_be32(tb[NFTA_NAT_REG_PROTO_MIN]));
+
err = nft_validate_input_register(priv->sreg_proto_min);
if (err < 0)
return err;
- }
- if (tb[NFTA_NAT_REG_PROTO_MAX]) {
- priv->sreg_proto_max = ntohl(nla_get_be32(
- tb[NFTA_NAT_REG_PROTO_MAX]));
- err = nft_validate_input_register(priv->sreg_proto_max);
- if (err < 0)
- return err;
- } else
- priv->sreg_proto_max = priv->sreg_proto_min;
+ if (tb[NFTA_NAT_REG_PROTO_MAX]) {
+ priv->sreg_proto_max =
+ ntohl(nla_get_be32(tb[NFTA_NAT_REG_PROTO_MAX]));
+
+ err = nft_validate_input_register(priv->sreg_proto_max);
+ if (err < 0)
+ return err;
+ } else {
+ priv->sreg_proto_max = priv->sreg_proto_min;
+ }
+ }
if (tb[NFTA_NAT_FLAGS]) {
priv->flags = ntohl(nla_get_be32(tb[NFTA_NAT_FLAGS]));
@@ -179,17 +191,19 @@ static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr)
if (nla_put_be32(skb, NFTA_NAT_FAMILY, htonl(priv->family)))
goto nla_put_failure;
- if (nla_put_be32(skb,
- NFTA_NAT_REG_ADDR_MIN, htonl(priv->sreg_addr_min)))
- goto nla_put_failure;
- if (nla_put_be32(skb,
- NFTA_NAT_REG_ADDR_MAX, htonl(priv->sreg_addr_max)))
- goto nla_put_failure;
+
+ if (priv->sreg_addr_min) {
+ if (nla_put_be32(skb, NFTA_NAT_REG_ADDR_MIN,
+ htonl(priv->sreg_addr_min)) ||
+ nla_put_be32(skb, NFTA_NAT_REG_ADDR_MAX,
+ htonl(priv->sreg_addr_max)))
+ goto nla_put_failure;
+ }
+
if (priv->sreg_proto_min) {
if (nla_put_be32(skb, NFTA_NAT_REG_PROTO_MIN,
- htonl(priv->sreg_proto_min)))
- goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_NAT_REG_PROTO_MAX,
+ htonl(priv->sreg_proto_min)) ||
+ nla_put_be32(skb, NFTA_NAT_REG_PROTO_MAX,
htonl(priv->sreg_proto_max)))
goto nla_put_failure;
}
@@ -205,6 +219,13 @@ nla_put_failure:
return -1;
}
+static int nft_nat_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ return nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
+}
+
static struct nft_expr_type nft_nat_type;
static const struct nft_expr_ops nft_nat_ops = {
.type = &nft_nat_type,
@@ -212,6 +233,7 @@ static const struct nft_expr_ops nft_nat_ops = {
.eval = nft_nat_eval,
.init = nft_nat_init,
.dump = nft_nat_dump,
+ .validate = nft_nat_validate,
};
static struct nft_expr_type nft_nat_type __read_mostly = {
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 7a186e74b1b3..f1de72de273e 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -96,6 +96,14 @@ static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
static int netlink_dump(struct sock *sk);
static void netlink_skb_destructor(struct sk_buff *skb);
+/* nl_table locking explained:
+ * Lookup and traversal are protected with nl_sk_hash_lock or nl_table_lock
+ * combined with an RCU read-side lock. Insertion and removal are protected
+ * with nl_sk_hash_lock while using RCU list modification primitives and may
+ * run in parallel to nl_table_lock protected lookups. Destruction of the
+ * Netlink socket may only occur *after* nl_table_lock has been acquired
+ * either during or after the socket has been removed from the list.
+ */
DEFINE_RWLOCK(nl_table_lock);
EXPORT_SYMBOL_GPL(nl_table_lock);
static atomic_t nl_table_users = ATOMIC_INIT(0);
@@ -109,10 +117,10 @@ EXPORT_SYMBOL_GPL(nl_sk_hash_lock);
static int lockdep_nl_sk_hash_is_held(void)
{
#ifdef CONFIG_LOCKDEP
- return (debug_locks) ? lockdep_is_held(&nl_sk_hash_lock) : 1;
-#else
- return 1;
+ if (debug_locks)
+ return lockdep_is_held(&nl_sk_hash_lock) || lockdep_is_held(&nl_table_lock);
#endif
+ return 1;
}
static ATOMIC_NOTIFIER_HEAD(netlink_chain);
@@ -1028,11 +1036,13 @@ static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
struct netlink_table *table = &nl_table[protocol];
struct sock *sk;
+ read_lock(&nl_table_lock);
rcu_read_lock();
sk = __netlink_lookup(table, portid, net);
if (sk)
sock_hold(sk);
rcu_read_unlock();
+ read_unlock(&nl_table_lock);
return sk;
}
@@ -1257,9 +1267,6 @@ static int netlink_release(struct socket *sock)
}
netlink_table_ungrab();
- /* Wait for readers to complete */
- synchronize_net();
-
kfree(nlk->groups);
nlk->groups = NULL;
@@ -1281,6 +1288,7 @@ static int netlink_autobind(struct socket *sock)
retry:
cond_resched();
+ netlink_table_grab();
rcu_read_lock();
if (__netlink_lookup(table, portid, net)) {
/* Bind collision, search negative portid values. */
@@ -1288,9 +1296,11 @@ retry:
if (rover > -4097)
rover = -4097;
rcu_read_unlock();
+ netlink_table_ungrab();
goto retry;
}
rcu_read_unlock();
+ netlink_table_ungrab();
err = netlink_insert(sk, net, portid);
if (err == -EADDRINUSE)
@@ -2921,14 +2931,16 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
}
static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(RCU)
+ __acquires(nl_table_lock) __acquires(RCU)
{
+ read_lock(&nl_table_lock);
rcu_read_lock();
return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
}
static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
+ struct rhashtable *ht;
struct netlink_sock *nlk;
struct nl_seq_iter *iter;
struct net *net;
@@ -2943,19 +2955,19 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
iter = seq->private;
nlk = v;
- rht_for_each_entry_rcu(nlk, nlk->node.next, node)
+ i = iter->link;
+ ht = &nl_table[i].hash;
+ rht_for_each_entry(nlk, nlk->node.next, ht, node)
if (net_eq(sock_net((struct sock *)nlk), net))
return nlk;
- i = iter->link;
j = iter->hash_idx + 1;
do {
- struct rhashtable *ht = &nl_table[i].hash;
const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
for (; j < tbl->size; j++) {
- rht_for_each_entry_rcu(nlk, tbl->buckets[j], node) {
+ rht_for_each_entry(nlk, tbl->buckets[j], ht, node) {
if (net_eq(sock_net((struct sock *)nlk), net)) {
iter->link = i;
iter->hash_idx = j;
@@ -2971,9 +2983,10 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
static void netlink_seq_stop(struct seq_file *seq, void *v)
- __releases(RCU)
+ __releases(RCU) __releases(nl_table_lock)
{
rcu_read_unlock();
+ read_unlock(&nl_table_lock);
}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 2e31d9e7f4dc..e6d7255183eb 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -324,6 +324,8 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
segs = __skb_gso_segment(skb, NETIF_F_SG, false);
if (IS_ERR(segs))
return PTR_ERR(segs);
+ if (segs == NULL)
+ return -EINVAL;
/* Queue all of the segments. */
skb = segs;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 2cf61b3e633c..76f402e05bd6 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -947,7 +947,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
if (qdisc_is_percpu_stats(sch)) {
sch->cpu_bstats =
- alloc_percpu(struct gnet_stats_basic_cpu);
+ netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
if (!sch->cpu_bstats)
goto err_out4;
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 90cee4a6fce4..5781634e957d 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -219,11 +219,11 @@ void tipc_node_abort_sock_conns(struct list_head *conns)
void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
{
struct tipc_link **active = &n_ptr->active_links[0];
- u32 addr = n_ptr->addr;
n_ptr->working_links++;
- tipc_nametbl_publish(TIPC_LINK_STATE, addr, addr, TIPC_NODE_SCOPE,
- l_ptr->bearer_id, addr);
+ n_ptr->action_flags |= TIPC_NOTIFY_LINK_UP;
+ n_ptr->link_id = l_ptr->peer_bearer_id << 16 | l_ptr->bearer_id;
+
pr_info("Established link <%s> on network plane %c\n",
l_ptr->name, l_ptr->net_plane);
@@ -284,10 +284,10 @@ static void node_select_active_links(struct tipc_node *n_ptr)
void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
{
struct tipc_link **active;
- u32 addr = n_ptr->addr;
n_ptr->working_links--;
- tipc_nametbl_withdraw(TIPC_LINK_STATE, addr, l_ptr->bearer_id, addr);
+ n_ptr->action_flags |= TIPC_NOTIFY_LINK_DOWN;
+ n_ptr->link_id = l_ptr->peer_bearer_id << 16 | l_ptr->bearer_id;
if (!tipc_link_is_active(l_ptr)) {
pr_info("Lost standby link <%s> on network plane %c\n",
@@ -552,28 +552,30 @@ void tipc_node_unlock(struct tipc_node *node)
LIST_HEAD(conn_sks);
struct sk_buff_head waiting_sks;
u32 addr = 0;
- unsigned int flags = node->action_flags;
+ int flags = node->action_flags;
+ u32 link_id = 0;
- if (likely(!node->action_flags)) {
+ if (likely(!flags)) {
spin_unlock_bh(&node->lock);
return;
}
+ addr = node->addr;
+ link_id = node->link_id;
__skb_queue_head_init(&waiting_sks);
- if (node->action_flags & TIPC_WAKEUP_USERS) {
+
+ if (flags & TIPC_WAKEUP_USERS)
skb_queue_splice_init(&node->waiting_sks, &waiting_sks);
- node->action_flags &= ~TIPC_WAKEUP_USERS;
- }
- if (node->action_flags & TIPC_NOTIFY_NODE_DOWN) {
+
+ if (flags & TIPC_NOTIFY_NODE_DOWN) {
list_replace_init(&node->nsub, &nsub_list);
list_replace_init(&node->conn_sks, &conn_sks);
- node->action_flags &= ~TIPC_NOTIFY_NODE_DOWN;
}
- if (node->action_flags & TIPC_NOTIFY_NODE_UP) {
- node->action_flags &= ~TIPC_NOTIFY_NODE_UP;
- addr = node->addr;
- }
- node->action_flags &= ~TIPC_WAKEUP_BCAST_USERS;
+ node->action_flags &= ~(TIPC_WAKEUP_USERS | TIPC_NOTIFY_NODE_DOWN |
+ TIPC_NOTIFY_NODE_UP | TIPC_NOTIFY_LINK_UP |
+ TIPC_NOTIFY_LINK_DOWN |
+ TIPC_WAKEUP_BCAST_USERS);
+
spin_unlock_bh(&node->lock);
while (!skb_queue_empty(&waiting_sks))
@@ -588,6 +590,14 @@ void tipc_node_unlock(struct tipc_node *node)
if (flags & TIPC_WAKEUP_BCAST_USERS)
tipc_bclink_wakeup_users();
- if (addr)
+ if (flags & TIPC_NOTIFY_NODE_UP)
tipc_named_node_up(addr);
+
+ if (flags & TIPC_NOTIFY_LINK_UP)
+ tipc_nametbl_publish(TIPC_LINK_STATE, addr, addr,
+ TIPC_NODE_SCOPE, link_id, addr);
+
+ if (flags & TIPC_NOTIFY_LINK_DOWN)
+ tipc_nametbl_withdraw(TIPC_LINK_STATE, addr,
+ link_id, addr);
}
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 67513c3c852c..04e91458bb29 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -53,6 +53,7 @@
* TIPC_WAIT_OWN_LINKS_DOWN: wait until peer node is declared down
* TIPC_NOTIFY_NODE_DOWN: notify node is down
* TIPC_NOTIFY_NODE_UP: notify node is up
+ * TIPC_DISTRIBUTE_NAME: publish or withdraw link state name type
*/
enum {
TIPC_WAIT_PEER_LINKS_DOWN = (1 << 1),
@@ -60,7 +61,9 @@ enum {
TIPC_NOTIFY_NODE_DOWN = (1 << 3),
TIPC_NOTIFY_NODE_UP = (1 << 4),
TIPC_WAKEUP_USERS = (1 << 5),
- TIPC_WAKEUP_BCAST_USERS = (1 << 6)
+ TIPC_WAKEUP_BCAST_USERS = (1 << 6),
+ TIPC_NOTIFY_LINK_UP = (1 << 7),
+ TIPC_NOTIFY_LINK_DOWN = (1 << 8)
};
/**
@@ -100,6 +103,7 @@ struct tipc_node_bclink {
* @working_links: number of working links to node (both active and standby)
* @link_cnt: number of links to node
* @signature: node instance identifier
+ * @link_id: local and remote bearer ids of changing link, if any
* @nsub: list of "node down" subscriptions monitoring node
* @rcu: rcu struct for tipc_node
*/
@@ -116,6 +120,7 @@ struct tipc_node {
int link_cnt;
int working_links;
u32 signature;
+ u32 link_id;
struct list_head nsub;
struct sk_buff_head waiting_sks;
struct list_head conn_sks;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 75275c5cf929..51bddc236a15 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1776,7 +1776,7 @@ int tipc_sk_rcv(struct sk_buff *buf)
sk = &tsk->sk;
/* Queue message */
- bh_lock_sock(sk);
+ spin_lock_bh(&sk->sk_lock.slock);
if (!sock_owned_by_user(sk)) {
rc = filter_rcv(sk, buf);
@@ -1787,7 +1787,7 @@ int tipc_sk_rcv(struct sk_buff *buf)
if (sk_add_backlog(sk, buf, limit))
rc = -TIPC_ERR_OVERLOAD;
}
- bh_unlock_sock(sk);
+ spin_unlock_bh(&sk->sk_lock.slock);
tipc_sk_put(tsk);
if (likely(!rc))
return 0;
@@ -2673,7 +2673,7 @@ static int tipc_ioctl(struct socket *sk, unsigned int cmd, unsigned long arg)
case SIOCGETLINKNAME:
if (copy_from_user(&lnr, argp, sizeof(lnr)))
return -EFAULT;
- if (!tipc_node_get_linkname(lnr.bearer_id, lnr.peer,
+ if (!tipc_node_get_linkname(lnr.bearer_id & 0xffff, lnr.peer,
lnr.linkname, TIPC_MAX_LINK_NAME)) {
if (copy_to_user(argp, &lnr, sizeof(lnr)))
return -EFAULT;
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 499d6c18a8ce..7c532856b398 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -157,6 +157,8 @@ static int xfrm_output_gso(struct sk_buff *skb)
kfree_skb(skb);
if (IS_ERR(segs))
return PTR_ERR(segs);
+ if (segs == NULL)
+ return -EINVAL;
do {
struct sk_buff *nskb = segs->next;
diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c
index f44ef11f65a7..eb4bec0ad8af 100644
--- a/samples/bpf/test_verifier.c
+++ b/samples/bpf/test_verifier.c
@@ -209,6 +209,17 @@ static struct bpf_test tests[] = {
.result = REJECT,
},
{
+ "program doesn't init R0 before exit in all branches",
+ .insns = {
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R0 !read_ok",
+ .result = REJECT,
+ },
+ {
"stack out of bounds",
.insns = {
BPF_ST_MEM(BPF_DW, BPF_REG_10, 8, 0),