summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-07-21 22:46:01 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2014-07-21 22:46:01 -0700
commit15ba2236f3556fc01b9ca91394465152b5ea74b6 (patch)
treed272d8227f618aab5e63075a8aa86932f3c89e50 /net
parent89faa06ec4229b27e339891df69b4d92f29ab899 (diff)
parent850717ef00d8a224cf1aaffc9c636ea67e01cce2 (diff)
downloadlinux-15ba2236f3556fc01b9ca91394465152b5ea74b6.tar.bz2
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller: 1) Null termination fix in dns_resolver got the pointer dereferncing wrong, fix from Ben Hutchings. 2) ip_options_compile() has a benign but real buffer overflow when parsing options. From Eric Dumazet. 3) Table updates can crash in netfilter's nftables if none of the state flags indicate an actual change, from Pablo Neira Ayuso. 4) Fix race in nf_tables dumping, also from Pablo. 5) GRE-GRO support broke the forwarding path because the segmentation state was not fully initialized in these paths, from Jerry Chu. 6) sunvnet driver leaks objects and potentially crashes on module unload, from Sowmini Varadhan. 7) We can accidently generate the same handle for several u32 classifier filters, fix from Cong Wang. 8) Several edge case bug fixes in fragment handling in xen-netback, from Zoltan Kiss. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (21 commits) ipv4: fix buffer overflow in ip_options_compile() batman-adv: fix TT VLAN inconsistency on VLAN re-add batman-adv: drop QinQ claim frames in bridge loop avoidance dns_resolver: Null-terminate the right string xen-netback: Fix pointer incrementation to avoid incorrect logging xen-netback: Fix releasing header slot on error path xen-netback: Fix releasing frag_list skbs in error path xen-netback: Fix handling frag_list on grant op error path net_sched: avoid generating same handle for u32 filters net: huawei_cdc_ncm: add "subclass 3" devices net: qmi_wwan: add two Sierra Wireless/Netgear devices wan/x25_asy: integer overflow in x25_asy_change_mtu() net: ppp: fix creating PPP pass and active filters net/mlx4_en: cq->irq_desc wasn't set in legacy EQ's sunvnet: clean up objects created in vnet_new() on vnet_exit() r8169: Enable RX_MULTI_EN for RTL_GIGA_MAC_VER_40 net-gre-gro: Fix a bug that breaks the forwarding path netfilter: nf_tables: 64bit stats need some extra synchronization netfilter: nf_tables: set NLM_F_DUMP_INTR if netlink dumping is stale netfilter: nf_tables: safe RCU iteration on list when dumping ...
Diffstat (limited to 'net')
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c44
-rw-r--r--net/batman-adv/soft-interface.c60
-rw-r--r--net/batman-adv/translation-table.c26
-rw-r--r--net/batman-adv/types.h2
-rw-r--r--net/core/dev.c2
-rw-r--r--net/dns_resolver/dns_query.c2
-rw-r--r--net/ipv4/af_inet.c3
-rw-r--r--net/ipv4/gre_offload.c3
-rw-r--r--net/ipv4/ip_options.c4
-rw-r--r--net/ipv4/tcp_offload.c2
-rw-r--r--net/ipv6/tcpv6_offload.c2
-rw-r--r--net/netfilter/nf_tables_api.c140
-rw-r--r--net/netfilter/nf_tables_core.c10
-rw-r--r--net/sched/cls_u32.c19
14 files changed, 232 insertions, 87 deletions
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 6f0d9ec37950..a957c8140721 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -800,11 +800,6 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv,
bla_dst = (struct batadv_bla_claim_dst *)hw_dst;
bla_dst_own = &bat_priv->bla.claim_dest;
- /* check if it is a claim packet in general */
- if (memcmp(bla_dst->magic, bla_dst_own->magic,
- sizeof(bla_dst->magic)) != 0)
- return 0;
-
/* if announcement packet, use the source,
* otherwise assume it is in the hw_src
*/
@@ -866,12 +861,13 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
struct batadv_hard_iface *primary_if,
struct sk_buff *skb)
{
- struct batadv_bla_claim_dst *bla_dst;
+ struct batadv_bla_claim_dst *bla_dst, *bla_dst_own;
uint8_t *hw_src, *hw_dst;
- struct vlan_ethhdr *vhdr;
+ struct vlan_hdr *vhdr, vhdr_buf;
struct ethhdr *ethhdr;
struct arphdr *arphdr;
unsigned short vid;
+ int vlan_depth = 0;
__be16 proto;
int headlen;
int ret;
@@ -882,9 +878,24 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
proto = ethhdr->h_proto;
headlen = ETH_HLEN;
if (vid & BATADV_VLAN_HAS_TAG) {
- vhdr = vlan_eth_hdr(skb);
- proto = vhdr->h_vlan_encapsulated_proto;
- headlen += VLAN_HLEN;
+ /* Traverse the VLAN/Ethertypes.
+ *
+ * At this point it is known that the first protocol is a VLAN
+ * header, so start checking at the encapsulated protocol.
+ *
+ * The depth of the VLAN headers is recorded to drop BLA claim
+ * frames encapsulated into multiple VLAN headers (QinQ).
+ */
+ do {
+ vhdr = skb_header_pointer(skb, headlen, VLAN_HLEN,
+ &vhdr_buf);
+ if (!vhdr)
+ return 0;
+
+ proto = vhdr->h_vlan_encapsulated_proto;
+ headlen += VLAN_HLEN;
+ vlan_depth++;
+ } while (proto == htons(ETH_P_8021Q));
}
if (proto != htons(ETH_P_ARP))
@@ -914,6 +925,19 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
hw_src = (uint8_t *)arphdr + sizeof(struct arphdr);
hw_dst = hw_src + ETH_ALEN + 4;
bla_dst = (struct batadv_bla_claim_dst *)hw_dst;
+ bla_dst_own = &bat_priv->bla.claim_dest;
+
+ /* check if it is a claim frame in general */
+ if (memcmp(bla_dst->magic, bla_dst_own->magic,
+ sizeof(bla_dst->magic)) != 0)
+ return 0;
+
+ /* check if there is a claim frame encapsulated deeper in (QinQ) and
+ * drop that, as this is not supported by BLA but should also not be
+ * sent via the mesh.
+ */
+ if (vlan_depth > 1)
+ return 1;
/* check if it is a claim frame. */
ret = batadv_check_claim_group(bat_priv, primary_if, hw_src, hw_dst,
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index e7ee65dc20bf..cbd677f48c00 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -448,10 +448,15 @@ out:
* possibly free it
* @softif_vlan: the vlan object to release
*/
-void batadv_softif_vlan_free_ref(struct batadv_softif_vlan *softif_vlan)
+void batadv_softif_vlan_free_ref(struct batadv_softif_vlan *vlan)
{
- if (atomic_dec_and_test(&softif_vlan->refcount))
- kfree_rcu(softif_vlan, rcu);
+ if (atomic_dec_and_test(&vlan->refcount)) {
+ spin_lock_bh(&vlan->bat_priv->softif_vlan_list_lock);
+ hlist_del_rcu(&vlan->list);
+ spin_unlock_bh(&vlan->bat_priv->softif_vlan_list_lock);
+
+ kfree_rcu(vlan, rcu);
+ }
}
/**
@@ -505,6 +510,7 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
if (!vlan)
return -ENOMEM;
+ vlan->bat_priv = bat_priv;
vlan->vid = vid;
atomic_set(&vlan->refcount, 1);
@@ -516,6 +522,10 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
return err;
}
+ spin_lock_bh(&bat_priv->softif_vlan_list_lock);
+ hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list);
+ spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
+
/* add a new TT local entry. This one will be marked with the NOPURGE
* flag
*/
@@ -523,10 +533,6 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
bat_priv->soft_iface->dev_addr, vid,
BATADV_NULL_IFINDEX, BATADV_NO_MARK);
- spin_lock_bh(&bat_priv->softif_vlan_list_lock);
- hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list);
- spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
-
return 0;
}
@@ -538,18 +544,13 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
static void batadv_softif_destroy_vlan(struct batadv_priv *bat_priv,
struct batadv_softif_vlan *vlan)
{
- spin_lock_bh(&bat_priv->softif_vlan_list_lock);
- hlist_del_rcu(&vlan->list);
- spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
-
- batadv_sysfs_del_vlan(bat_priv, vlan);
-
/* explicitly remove the associated TT local entry because it is marked
* with the NOPURGE flag
*/
batadv_tt_local_remove(bat_priv, bat_priv->soft_iface->dev_addr,
vlan->vid, "vlan interface destroyed", false);
+ batadv_sysfs_del_vlan(bat_priv, vlan);
batadv_softif_vlan_free_ref(vlan);
}
@@ -567,6 +568,8 @@ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto,
unsigned short vid)
{
struct batadv_priv *bat_priv = netdev_priv(dev);
+ struct batadv_softif_vlan *vlan;
+ int ret;
/* only 802.1Q vlans are supported.
* batman-adv does not know how to handle other types
@@ -576,7 +579,36 @@ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto,
vid |= BATADV_VLAN_HAS_TAG;
- return batadv_softif_create_vlan(bat_priv, vid);
+ /* if a new vlan is getting created and it already exists, it means that
+ * it was not deleted yet. batadv_softif_vlan_get() increases the
+ * refcount in order to revive the object.
+ *
+ * if it does not exist then create it.
+ */
+ vlan = batadv_softif_vlan_get(bat_priv, vid);
+ if (!vlan)
+ return batadv_softif_create_vlan(bat_priv, vid);
+
+ /* recreate the sysfs object if it was already destroyed (and it should
+ * be since we received a kill_vid() for this vlan
+ */
+ if (!vlan->kobj) {
+ ret = batadv_sysfs_add_vlan(bat_priv->soft_iface, vlan);
+ if (ret) {
+ batadv_softif_vlan_free_ref(vlan);
+ return ret;
+ }
+ }
+
+ /* add a new TT local entry. This one will be marked with the NOPURGE
+ * flag. This must be added again, even if the vlan object already
+ * exists, because the entry was deleted by kill_vid()
+ */
+ batadv_tt_local_add(bat_priv->soft_iface,
+ bat_priv->soft_iface->dev_addr, vid,
+ BATADV_NULL_IFINDEX, BATADV_NO_MARK);
+
+ return 0;
}
/**
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index d636bde72c9a..5f59e7f899a0 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -511,6 +511,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
struct batadv_priv *bat_priv = netdev_priv(soft_iface);
struct batadv_tt_local_entry *tt_local;
struct batadv_tt_global_entry *tt_global = NULL;
+ struct batadv_softif_vlan *vlan;
struct net_device *in_dev = NULL;
struct hlist_head *head;
struct batadv_tt_orig_list_entry *orig_entry;
@@ -572,6 +573,9 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
if (!tt_local)
goto out;
+ /* increase the refcounter of the related vlan */
+ vlan = batadv_softif_vlan_get(bat_priv, vid);
+
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Creating new local tt entry: %pM (vid: %d, ttvn: %d)\n",
addr, BATADV_PRINT_VID(vid),
@@ -604,6 +608,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
if (unlikely(hash_added != 0)) {
/* remove the reference for the hash */
batadv_tt_local_entry_free_ref(tt_local);
+ batadv_softif_vlan_free_ref(vlan);
goto out;
}
@@ -1009,6 +1014,7 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv,
{
struct batadv_tt_local_entry *tt_local_entry;
uint16_t flags, curr_flags = BATADV_NO_FLAGS;
+ struct batadv_softif_vlan *vlan;
tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid);
if (!tt_local_entry)
@@ -1039,6 +1045,11 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv,
hlist_del_rcu(&tt_local_entry->common.hash_entry);
batadv_tt_local_entry_free_ref(tt_local_entry);
+ /* decrease the reference held for this vlan */
+ vlan = batadv_softif_vlan_get(bat_priv, vid);
+ batadv_softif_vlan_free_ref(vlan);
+ batadv_softif_vlan_free_ref(vlan);
+
out:
if (tt_local_entry)
batadv_tt_local_entry_free_ref(tt_local_entry);
@@ -1111,6 +1122,7 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv)
spinlock_t *list_lock; /* protects write access to the hash lists */
struct batadv_tt_common_entry *tt_common_entry;
struct batadv_tt_local_entry *tt_local;
+ struct batadv_softif_vlan *vlan;
struct hlist_node *node_tmp;
struct hlist_head *head;
uint32_t i;
@@ -1131,6 +1143,13 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv)
tt_local = container_of(tt_common_entry,
struct batadv_tt_local_entry,
common);
+
+ /* decrease the reference held for this vlan */
+ vlan = batadv_softif_vlan_get(bat_priv,
+ tt_common_entry->vid);
+ batadv_softif_vlan_free_ref(vlan);
+ batadv_softif_vlan_free_ref(vlan);
+
batadv_tt_local_entry_free_ref(tt_local);
}
spin_unlock_bh(list_lock);
@@ -3139,6 +3158,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
struct batadv_hashtable *hash = bat_priv->tt.local_hash;
struct batadv_tt_common_entry *tt_common;
struct batadv_tt_local_entry *tt_local;
+ struct batadv_softif_vlan *vlan;
struct hlist_node *node_tmp;
struct hlist_head *head;
spinlock_t *list_lock; /* protects write access to the hash lists */
@@ -3167,6 +3187,12 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
tt_local = container_of(tt_common,
struct batadv_tt_local_entry,
common);
+
+ /* decrease the reference held for this vlan */
+ vlan = batadv_softif_vlan_get(bat_priv, tt_common->vid);
+ batadv_softif_vlan_free_ref(vlan);
+ batadv_softif_vlan_free_ref(vlan);
+
batadv_tt_local_entry_free_ref(tt_local);
}
spin_unlock_bh(list_lock);
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 34891a56773f..8854c05622a9 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -687,6 +687,7 @@ struct batadv_priv_nc {
/**
* struct batadv_softif_vlan - per VLAN attributes set
+ * @bat_priv: pointer to the mesh object
* @vid: VLAN identifier
* @kobj: kobject for sysfs vlan subdirectory
* @ap_isolation: AP isolation state
@@ -696,6 +697,7 @@ struct batadv_priv_nc {
* @rcu: struct used for freeing in a RCU-safe manner
*/
struct batadv_softif_vlan {
+ struct batadv_priv *bat_priv;
unsigned short vid;
struct kobject *kobj;
atomic_t ap_isolation; /* boolean */
diff --git a/net/core/dev.c b/net/core/dev.c
index 7990984ca364..367a586d0c8a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4096,6 +4096,8 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
skb->vlan_tci = 0;
skb->dev = napi->dev;
skb->skb_iif = 0;
+ skb->encapsulation = 0;
+ skb_shinfo(skb)->gso_type = 0;
skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
napi->skb = skb;
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index 9acec61f5433..dd8696a3dbec 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -150,7 +150,7 @@ int dns_query(const char *type, const char *name, size_t namelen,
goto put;
memcpy(*_result, upayload->data, len);
- *_result[len] = '\0';
+ (*_result)[len] = '\0';
if (_expiry)
*_expiry = rkey->expiry;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d5e6836cf772..d156b3c5f363 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1429,6 +1429,9 @@ static int inet_gro_complete(struct sk_buff *skb, int nhoff)
int proto = iph->protocol;
int err = -ENOSYS;
+ if (skb->encapsulation)
+ skb_set_inner_network_header(skb, nhoff);
+
csum_replace2(&iph->check, iph->tot_len, newlen);
iph->tot_len = newlen;
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index eb92deb12666..f0bdd47bbbcb 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -263,6 +263,9 @@ static int gre_gro_complete(struct sk_buff *skb, int nhoff)
int err = -ENOENT;
__be16 type;
+ skb->encapsulation = 1;
+ skb_shinfo(skb)->gso_type = SKB_GSO_GRE;
+
type = greh->protocol;
if (greh->flags & GRE_KEY)
grehlen += GRE_HEADER_SECTION;
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 5e7aecea05cd..ad382499bace 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -288,6 +288,10 @@ int ip_options_compile(struct net *net,
optptr++;
continue;
}
+ if (unlikely(l < 2)) {
+ pp_ptr = optptr;
+ goto error;
+ }
optlen = optptr[1];
if (optlen < 2 || optlen > l) {
pp_ptr = optptr;
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 4e86c59ec7f7..55046ecd083e 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -309,7 +309,7 @@ static int tcp4_gro_complete(struct sk_buff *skb, int thoff)
th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr,
iph->daddr, 0);
- skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
return tcp_gro_complete(skb);
}
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index 8517d3cd1aed..01b0ff9a0c2c 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -73,7 +73,7 @@ static int tcp6_gro_complete(struct sk_buff *skb, int thoff)
th->check = ~tcp_v6_check(skb->len - thoff, &iph->saddr,
&iph->daddr, 0);
- skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
return tcp_gro_complete(skb);
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index ab4566cfcbe4..8746ff9a8357 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -35,7 +35,7 @@ int nft_register_afinfo(struct net *net, struct nft_af_info *afi)
{
INIT_LIST_HEAD(&afi->tables);
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- list_add_tail(&afi->list, &net->nft.af_info);
+ list_add_tail_rcu(&afi->list, &net->nft.af_info);
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
return 0;
}
@@ -51,7 +51,7 @@ EXPORT_SYMBOL_GPL(nft_register_afinfo);
void nft_unregister_afinfo(struct nft_af_info *afi)
{
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- list_del(&afi->list);
+ list_del_rcu(&afi->list);
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
}
EXPORT_SYMBOL_GPL(nft_unregister_afinfo);
@@ -277,11 +277,14 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
- list_for_each_entry(afi, &net->nft.af_info, list) {
+ rcu_read_lock();
+ cb->seq = net->nft.base_seq;
+
+ list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
if (family != NFPROTO_UNSPEC && family != afi->family)
continue;
- list_for_each_entry(table, &afi->tables, list) {
+ list_for_each_entry_rcu(table, &afi->tables, list) {
if (idx < s_idx)
goto cont;
if (idx > s_idx)
@@ -294,11 +297,14 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
NLM_F_MULTI,
afi->family, table) < 0)
goto done;
+
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
idx++;
}
}
done:
+ rcu_read_unlock();
cb->args[0] = idx;
return skb->len;
}
@@ -407,6 +413,9 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
if (flags & ~NFT_TABLE_F_DORMANT)
return -EINVAL;
+ if (flags == ctx->table->flags)
+ return 0;
+
trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
sizeof(struct nft_trans_table));
if (trans == NULL)
@@ -514,7 +523,7 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
module_put(afi->owner);
return err;
}
- list_add_tail(&table->list, &afi->tables);
+ list_add_tail_rcu(&table->list, &afi->tables);
return 0;
}
@@ -546,7 +555,7 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
if (err < 0)
return err;
- list_del(&table->list);
+ list_del_rcu(&table->list);
return 0;
}
@@ -635,13 +644,20 @@ static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats)
{
struct nft_stats *cpu_stats, total;
struct nlattr *nest;
+ unsigned int seq;
+ u64 pkts, bytes;
int cpu;
memset(&total, 0, sizeof(total));
for_each_possible_cpu(cpu) {
cpu_stats = per_cpu_ptr(stats, cpu);
- total.pkts += cpu_stats->pkts;
- total.bytes += cpu_stats->bytes;
+ do {
+ seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
+ pkts = cpu_stats->pkts;
+ bytes = cpu_stats->bytes;
+ } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq));
+ total.pkts += pkts;
+ total.bytes += bytes;
}
nest = nla_nest_start(skb, NFTA_CHAIN_COUNTERS);
if (nest == NULL)
@@ -761,12 +777,15 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
- list_for_each_entry(afi, &net->nft.af_info, list) {
+ rcu_read_lock();
+ cb->seq = net->nft.base_seq;
+
+ list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
if (family != NFPROTO_UNSPEC && family != afi->family)
continue;
- list_for_each_entry(table, &afi->tables, list) {
- list_for_each_entry(chain, &table->chains, list) {
+ list_for_each_entry_rcu(table, &afi->tables, list) {
+ list_for_each_entry_rcu(chain, &table->chains, list) {
if (idx < s_idx)
goto cont;
if (idx > s_idx)
@@ -778,17 +797,19 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
NLM_F_MULTI,
afi->family, table, chain) < 0)
goto done;
+
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
idx++;
}
}
}
done:
+ rcu_read_unlock();
cb->args[0] = idx;
return skb->len;
}
-
static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -861,7 +882,7 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr)
if (!tb[NFTA_COUNTER_BYTES] || !tb[NFTA_COUNTER_PACKETS])
return ERR_PTR(-EINVAL);
- newstats = alloc_percpu(struct nft_stats);
+ newstats = netdev_alloc_pcpu_stats(struct nft_stats);
if (newstats == NULL)
return ERR_PTR(-ENOMEM);
@@ -1077,7 +1098,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
}
basechain->stats = stats;
} else {
- stats = alloc_percpu(struct nft_stats);
+ stats = netdev_alloc_pcpu_stats(struct nft_stats);
if (IS_ERR(stats)) {
module_put(type->owner);
kfree(basechain);
@@ -1130,7 +1151,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
goto err2;
table->use++;
- list_add_tail(&chain->list, &table->chains);
+ list_add_tail_rcu(&chain->list, &table->chains);
return 0;
err2:
if (!(table->flags & NFT_TABLE_F_DORMANT) &&
@@ -1180,7 +1201,7 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
return err;
table->use--;
- list_del(&chain->list);
+ list_del_rcu(&chain->list);
return 0;
}
@@ -1199,9 +1220,9 @@ int nft_register_expr(struct nft_expr_type *type)
{
nfnl_lock(NFNL_SUBSYS_NFTABLES);
if (type->family == NFPROTO_UNSPEC)
- list_add_tail(&type->list, &nf_tables_expressions);
+ list_add_tail_rcu(&type->list, &nf_tables_expressions);
else
- list_add(&type->list, &nf_tables_expressions);
+ list_add_rcu(&type->list, &nf_tables_expressions);
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
return 0;
}
@@ -1216,7 +1237,7 @@ EXPORT_SYMBOL_GPL(nft_register_expr);
void nft_unregister_expr(struct nft_expr_type *type)
{
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- list_del(&type->list);
+ list_del_rcu(&type->list);
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
}
EXPORT_SYMBOL_GPL(nft_unregister_expr);
@@ -1549,16 +1570,17 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
unsigned int idx = 0, s_idx = cb->args[0];
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
- u8 genctr = ACCESS_ONCE(net->nft.genctr);
- u8 gencursor = ACCESS_ONCE(net->nft.gencursor);
- list_for_each_entry(afi, &net->nft.af_info, list) {
+ rcu_read_lock();
+ cb->seq = net->nft.base_seq;
+
+ list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
if (family != NFPROTO_UNSPEC && family != afi->family)
continue;
- list_for_each_entry(table, &afi->tables, list) {
- list_for_each_entry(chain, &table->chains, list) {
- list_for_each_entry(rule, &chain->rules, list) {
+ list_for_each_entry_rcu(table, &afi->tables, list) {
+ list_for_each_entry_rcu(chain, &table->chains, list) {
+ list_for_each_entry_rcu(rule, &chain->rules, list) {
if (!nft_rule_is_active(net, rule))
goto cont;
if (idx < s_idx)
@@ -1572,6 +1594,8 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
NLM_F_MULTI | NLM_F_APPEND,
afi->family, table, chain, rule) < 0)
goto done;
+
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
idx++;
}
@@ -1579,9 +1603,7 @@ cont:
}
}
done:
- /* Invalidate this dump, a transition to the new generation happened */
- if (gencursor != net->nft.gencursor || genctr != net->nft.genctr)
- return -EBUSY;
+ rcu_read_unlock();
cb->args[0] = idx;
return skb->len;
@@ -1932,7 +1954,7 @@ static LIST_HEAD(nf_tables_set_ops);
int nft_register_set(struct nft_set_ops *ops)
{
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- list_add_tail(&ops->list, &nf_tables_set_ops);
+ list_add_tail_rcu(&ops->list, &nf_tables_set_ops);
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
return 0;
}
@@ -1941,7 +1963,7 @@ EXPORT_SYMBOL_GPL(nft_register_set);
void nft_unregister_set(struct nft_set_ops *ops)
{
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- list_del(&ops->list);
+ list_del_rcu(&ops->list);
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
}
EXPORT_SYMBOL_GPL(nft_unregister_set);
@@ -2234,7 +2256,10 @@ static int nf_tables_dump_sets_table(struct nft_ctx *ctx, struct sk_buff *skb,
if (cb->args[1])
return skb->len;
- list_for_each_entry(set, &ctx->table->sets, list) {
+ rcu_read_lock();
+ cb->seq = ctx->net->nft.base_seq;
+
+ list_for_each_entry_rcu(set, &ctx->table->sets, list) {
if (idx < s_idx)
goto cont;
if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET,
@@ -2242,11 +2267,13 @@ static int nf_tables_dump_sets_table(struct nft_ctx *ctx, struct sk_buff *skb,
cb->args[0] = idx;
goto done;
}
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
idx++;
}
cb->args[1] = 1;
done:
+ rcu_read_unlock();
return skb->len;
}
@@ -2260,7 +2287,10 @@ static int nf_tables_dump_sets_family(struct nft_ctx *ctx, struct sk_buff *skb,
if (cb->args[1])
return skb->len;
- list_for_each_entry(table, &ctx->afi->tables, list) {
+ rcu_read_lock();
+ cb->seq = ctx->net->nft.base_seq;
+
+ list_for_each_entry_rcu(table, &ctx->afi->tables, list) {
if (cur_table) {
if (cur_table != table)
continue;
@@ -2269,7 +2299,7 @@ static int nf_tables_dump_sets_family(struct nft_ctx *ctx, struct sk_buff *skb,
}
ctx->table = table;
idx = 0;
- list_for_each_entry(set, &ctx->table->sets, list) {
+ list_for_each_entry_rcu(set, &ctx->table->sets, list) {
if (idx < s_idx)
goto cont;
if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET,
@@ -2278,12 +2308,14 @@ static int nf_tables_dump_sets_family(struct nft_ctx *ctx, struct sk_buff *skb,
cb->args[2] = (unsigned long) table;
goto done;
}
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
idx++;
}
}
cb->args[1] = 1;
done:
+ rcu_read_unlock();
return skb->len;
}
@@ -2300,7 +2332,10 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
if (cb->args[1])
return skb->len;
- list_for_each_entry(afi, &net->nft.af_info, list) {
+ rcu_read_lock();
+ cb->seq = net->nft.base_seq;
+
+ list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
if (cur_family) {
if (afi->family != cur_family)
continue;
@@ -2308,7 +2343,7 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
cur_family = 0;
}
- list_for_each_entry(table, &afi->tables, list) {
+ list_for_each_entry_rcu(table, &afi->tables, list) {
if (cur_table) {
if (cur_table != table)
continue;
@@ -2319,7 +2354,7 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
ctx->table = table;
ctx->afi = afi;
idx = 0;
- list_for_each_entry(set, &ctx->table->sets, list) {
+ list_for_each_entry_rcu(set, &ctx->table->sets, list) {
if (idx < s_idx)
goto cont;
if (nf_tables_fill_set(skb, ctx, set,
@@ -2330,6 +2365,7 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
cb->args[3] = afi->family;
goto done;
}
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
idx++;
}
@@ -2339,6 +2375,7 @@ cont:
}
cb->args[1] = 1;
done:
+ rcu_read_unlock();
return skb->len;
}
@@ -2597,7 +2634,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
if (err < 0)
goto err2;
- list_add_tail(&set->list, &table->sets);
+ list_add_tail_rcu(&set->list, &table->sets);
table->use++;
return 0;
@@ -2617,7 +2654,7 @@ static void nft_set_destroy(struct nft_set *set)
static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
{
- list_del(&set->list);
+ list_del_rcu(&set->list);
nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_ATOMIC);
nft_set_destroy(set);
}
@@ -2652,7 +2689,7 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
if (err < 0)
return err;
- list_del(&set->list);
+ list_del_rcu(&set->list);
ctx.table->use--;
return 0;
}
@@ -2704,14 +2741,14 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
}
bind:
binding->chain = ctx->chain;
- list_add_tail(&binding->list, &set->bindings);
+ list_add_tail_rcu(&binding->list, &set->bindings);
return 0;
}
void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_binding *binding)
{
- list_del(&binding->list);
+ list_del_rcu(&binding->list);
if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS &&
!(set->flags & NFT_SET_INACTIVE))
@@ -3346,7 +3383,7 @@ static int nf_tables_commit(struct sk_buff *skb)
struct nft_set *set;
/* Bump generation counter, invalidate any dump in progress */
- net->nft.genctr++;
+ while (++net->nft.base_seq == 0);
/* A new generation has just started */
net->nft.gencursor = gencursor_next(net);
@@ -3491,12 +3528,12 @@ static int nf_tables_abort(struct sk_buff *skb)
}
nft_trans_destroy(trans);
} else {
- list_del(&trans->ctx.table->list);
+ list_del_rcu(&trans->ctx.table->list);
}
break;
case NFT_MSG_DELTABLE:
- list_add_tail(&trans->ctx.table->list,
- &trans->ctx.afi->tables);
+ list_add_tail_rcu(&trans->ctx.table->list,
+ &trans->ctx.afi->tables);
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWCHAIN:
@@ -3507,7 +3544,7 @@ static int nf_tables_abort(struct sk_buff *skb)
nft_trans_destroy(trans);
} else {
trans->ctx.table->use--;
- list_del(&trans->ctx.chain->list);
+ list_del_rcu(&trans->ctx.chain->list);
if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT) &&
trans->ctx.chain->flags & NFT_BASE_CHAIN) {
nf_unregister_hooks(nft_base_chain(trans->ctx.chain)->ops,
@@ -3517,8 +3554,8 @@ static int nf_tables_abort(struct sk_buff *skb)
break;
case NFT_MSG_DELCHAIN:
trans->ctx.table->use++;
- list_add_tail(&trans->ctx.chain->list,
- &trans->ctx.table->chains);
+ list_add_tail_rcu(&trans->ctx.chain->list,
+ &trans->ctx.table->chains);
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWRULE:
@@ -3532,12 +3569,12 @@ static int nf_tables_abort(struct sk_buff *skb)
break;
case NFT_MSG_NEWSET:
trans->ctx.table->use--;
- list_del(&nft_trans_set(trans)->list);
+ list_del_rcu(&nft_trans_set(trans)->list);
break;
case NFT_MSG_DELSET:
trans->ctx.table->use++;
- list_add_tail(&nft_trans_set(trans)->list,
- &trans->ctx.table->sets);
+ list_add_tail_rcu(&nft_trans_set(trans)->list,
+ &trans->ctx.table->sets);
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSETELEM:
@@ -3951,6 +3988,7 @@ static int nf_tables_init_net(struct net *net)
{
INIT_LIST_HEAD(&net->nft.af_info);
INIT_LIST_HEAD(&net->nft.commit_list);
+ net->nft.base_seq = 1;
return 0;
}
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 345acfb1720b..3b90eb2b2c55 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -109,7 +109,7 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
struct nft_data data[NFT_REG_MAX + 1];
unsigned int stackptr = 0;
struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
- struct nft_stats __percpu *stats;
+ struct nft_stats *stats;
int rulenum;
/*
* Cache cursor to avoid problems in case that the cursor is updated
@@ -205,9 +205,11 @@ next_rule:
nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY);
rcu_read_lock_bh();
- stats = rcu_dereference(nft_base_chain(basechain)->stats);
- __this_cpu_inc(stats->pkts);
- __this_cpu_add(stats->bytes, pkt->skb->len);
+ stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats));
+ u64_stats_update_begin(&stats->syncp);
+ stats->pkts++;
+ stats->bytes += pkt->skb->len;
+ u64_stats_update_end(&stats->syncp);
rcu_read_unlock_bh();
return nft_base_chain(basechain)->policy;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index c39b583ace32..70c0be8d0121 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -38,6 +38,7 @@
#include <linux/errno.h>
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
+#include <linux/bitmap.h>
#include <net/netlink.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
@@ -460,17 +461,25 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg)
return 0;
}
+#define NR_U32_NODE (1<<12)
static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
{
struct tc_u_knode *n;
- unsigned int i = 0x7FF;
+ unsigned long i;
+ unsigned long *bitmap = kzalloc(BITS_TO_LONGS(NR_U32_NODE) * sizeof(unsigned long),
+ GFP_KERNEL);
+ if (!bitmap)
+ return handle | 0xFFF;
for (n = ht->ht[TC_U32_HASH(handle)]; n; n = n->next)
- if (i < TC_U32_NODE(n->handle))
- i = TC_U32_NODE(n->handle);
- i++;
+ set_bit(TC_U32_NODE(n->handle), bitmap);
- return handle | (i > 0xFFF ? 0xFFF : i);
+ i = find_next_zero_bit(bitmap, NR_U32_NODE, 0x800);
+ if (i >= NR_U32_NODE)
+ i = find_next_zero_bit(bitmap, NR_U32_NODE, 1);
+
+ kfree(bitmap);
+ return handle | (i >= NR_U32_NODE ? 0xFFF : i);
}
static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {