From bb5ecb0c63ac88b6f39029f75c47f4be4e352e8d Mon Sep 17 00:00:00 2001
From: Rostislav Lisovy <lisovy@gmail.com>
Date: Fri, 24 Jan 2014 13:17:37 +0100
Subject: can: Propagate SO_PRIORITY of raw sockets to skbs

This allows controlling certain queueing disciplines by setting the
socket's SO_PRIORITY option.

For example, with the default pfifo_fast queueing discipline, which
provides three priorities, socket priority TC_PRIO_CONTROL means
higher than default and TC_PRIO_BULK means lower than default.

Signed-off-by: Rostislav Lisovy <lisovy@gmail.com>
Signed-off-by: Michal Sojka <sojkam1@fel.cvut.cz>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/raw.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/can/raw.c b/net/can/raw.c
index 07d72d852324..8be757cca2ec 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -715,6 +715,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
 
 	skb->dev = dev;
 	skb->sk  = sk;
+	skb->priority = sk->sk_priority;
 
 	err = can_send(skb, ro->loopback);
 
-- 
cgit v1.2.3


From b5aaab12b2b4bc4acab7384c17a87f3406e5047d Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Wed, 29 Jan 2014 18:08:59 +0200
Subject: net/ipv4: Use non-atomic allocation of udp offloads structure
 instance

Since udp_add_offload() can be called from non-sleepable context e.g
under this call tree from the vxlan driver use case:

  vxlan_socket_create() <-- holds the spinlock
  -> vxlan_notify_add_rx_port()
     -> udp_add_offload()  <-- schedules

we should allocate the udp_offloads structure in atomic manner.

Fixes: b582ef0 ('net: Add GRO support for UDP encapsulating protocols')
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp_offload.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 25f5cee3a08a..2ffea6f31efc 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -101,7 +101,7 @@ out:
 int udp_add_offload(struct udp_offload *uo)
 {
 	struct udp_offload_priv __rcu **head = &udp_offload_base;
-	struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_KERNEL);
+	struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_ATOMIC);
 
 	if (!new_offload)
 		return -ENOMEM;
-- 
cgit v1.2.3


From 0ae89beb283a0db5980d1d4781c7d7be2f2810d6 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Thu, 30 Jan 2014 10:11:28 +0100
Subject: can: add destructor for self generated skbs

Self generated skbuffs in net/can/bcm.c are setting a skb->sk reference but
no explicit destructor which is enforced since Linux 3.11 with commit
376c7311bdb6 (net: add a temporary sanity check in skb_orphan()).

This patch adds some helper functions to make sure that a destructor is
properly defined when a sock reference is assigned to a CAN related skb.
To create an unshared skb owned by the original sock a common helper function
has been introduced to replace open coded functions to create CAN echo skbs.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Tested-by: Andre Naujoks <nautsch2@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/can/dev.c        | 15 +++------------
 drivers/net/can/janz-ican3.c | 18 ++++--------------
 drivers/net/can/vcan.c       |  9 ++++-----
 include/linux/can/skb.h      | 38 ++++++++++++++++++++++++++++++++++++++
 net/can/af_can.c             |  3 ++-
 net/can/bcm.c                |  4 ++--
 6 files changed, 53 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 13a909822e25..fc59bc6f040b 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -323,19 +323,10 @@ void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev,
 	}
 
 	if (!priv->echo_skb[idx]) {
-		struct sock *srcsk = skb->sk;
 
-		if (atomic_read(&skb->users) != 1) {
-			struct sk_buff *old_skb = skb;
-
-			skb = skb_clone(old_skb, GFP_ATOMIC);
-			kfree_skb(old_skb);
-			if (!skb)
-				return;
-		} else
-			skb_orphan(skb);
-
-		skb->sk = srcsk;
+		skb = can_create_echo_skb(skb);
+		if (!skb)
+			return;
 
 		/* make settings for echo to reduce code in irq context */
 		skb->protocol = htons(ETH_P_CAN);
diff --git a/drivers/net/can/janz-ican3.c b/drivers/net/can/janz-ican3.c
index e24e6690d672..2124c6790687 100644
--- a/drivers/net/can/janz-ican3.c
+++ b/drivers/net/can/janz-ican3.c
@@ -18,6 +18,7 @@
 #include <linux/netdevice.h>
 #include <linux/can.h>
 #include <linux/can/dev.h>
+#include <linux/can/skb.h>
 #include <linux/can/error.h>
 
 #include <linux/mfd/janz.h>
@@ -1133,20 +1134,9 @@ static void ican3_handle_message(struct ican3_dev *mod, struct ican3_msg *msg)
  */
 static void ican3_put_echo_skb(struct ican3_dev *mod, struct sk_buff *skb)
 {
-	struct sock *srcsk = skb->sk;
-
-	if (atomic_read(&skb->users) != 1) {
-		struct sk_buff *old_skb = skb;
-
-		skb = skb_clone(old_skb, GFP_ATOMIC);
-		kfree_skb(old_skb);
-		if (!skb)
-			return;
-	} else {
-		skb_orphan(skb);
-	}
-
-	skb->sk = srcsk;
+	skb = can_create_echo_skb(skb);
+	if (!skb)
+		return;
 
 	/* save this skb for tx interrupt echo handling */
 	skb_queue_tail(&mod->echoq, skb);
diff --git a/drivers/net/can/vcan.c b/drivers/net/can/vcan.c
index 0a2a5ee79a17..4e94057ef5cf 100644
--- a/drivers/net/can/vcan.c
+++ b/drivers/net/can/vcan.c
@@ -46,6 +46,7 @@
 #include <linux/if_ether.h>
 #include <linux/can.h>
 #include <linux/can/dev.h>
+#include <linux/can/skb.h>
 #include <linux/slab.h>
 #include <net/rtnetlink.h>
 
@@ -109,25 +110,23 @@ static netdev_tx_t vcan_tx(struct sk_buff *skb, struct net_device *dev)
 			stats->rx_packets++;
 			stats->rx_bytes += cfd->len;
 		}
-		kfree_skb(skb);
+		consume_skb(skb);
 		return NETDEV_TX_OK;
 	}
 
 	/* perform standard echo handling for CAN network interfaces */
 
 	if (loop) {
-		struct sock *srcsk = skb->sk;
 
-		skb = skb_share_check(skb, GFP_ATOMIC);
+		skb = can_create_echo_skb(skb);
 		if (!skb)
 			return NETDEV_TX_OK;
 
 		/* receive with packet counting */
-		skb->sk = srcsk;
 		vcan_rx(skb, dev);
 	} else {
 		/* no looped packets => no counting */
-		kfree_skb(skb);
+		consume_skb(skb);
 	}
 	return NETDEV_TX_OK;
 }
diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h
index 2f0543f7510c..f9bbbb472663 100644
--- a/include/linux/can/skb.h
+++ b/include/linux/can/skb.h
@@ -11,7 +11,9 @@
 #define CAN_SKB_H
 
 #include <linux/types.h>
+#include <linux/skbuff.h>
 #include <linux/can.h>
+#include <net/sock.h>
 
 /*
  * The struct can_skb_priv is used to transport additional information along
@@ -42,4 +44,40 @@ static inline void can_skb_reserve(struct sk_buff *skb)
 	skb_reserve(skb, sizeof(struct can_skb_priv));
 }
 
+static inline void can_skb_destructor(struct sk_buff *skb)
+{
+	sock_put(skb->sk);
+}
+
+static inline void can_skb_set_owner(struct sk_buff *skb, struct sock *sk)
+{
+	if (sk) {
+		sock_hold(sk);
+		skb->destructor = can_skb_destructor;
+		skb->sk = sk;
+	}
+}
+
+/*
+ * returns an unshared skb owned by the original sock to be echo'ed back
+ */
+static inline struct sk_buff *can_create_echo_skb(struct sk_buff *skb)
+{
+	if (skb_shared(skb)) {
+		struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
+
+		if (likely(nskb)) {
+			can_skb_set_owner(nskb, skb->sk);
+			consume_skb(skb);
+			return nskb;
+		} else {
+			kfree_skb(skb);
+			return NULL;
+		}
+	}
+
+	/* we can assume to have an unshared skb with proper owner */
+	return skb;
+}
+
 #endif /* CAN_SKB_H */
diff --git a/net/can/af_can.c b/net/can/af_can.c
index d249874a366d..a27f8aad9e99 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -57,6 +57,7 @@
 #include <linux/skbuff.h>
 #include <linux/can.h>
 #include <linux/can/core.h>
+#include <linux/can/skb.h>
 #include <linux/ratelimit.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
@@ -290,7 +291,7 @@ int can_send(struct sk_buff *skb, int loop)
 				return -ENOMEM;
 			}
 
-			newskb->sk = skb->sk;
+			can_skb_set_owner(newskb, skb->sk);
 			newskb->ip_summed = CHECKSUM_UNNECESSARY;
 			newskb->pkt_type = PACKET_BROADCAST;
 		}
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 3fc737b214c7..dcb75c0e66c1 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -268,7 +268,7 @@ static void bcm_can_tx(struct bcm_op *op)
 
 	/* send with loopback */
 	skb->dev = dev;
-	skb->sk = op->sk;
+	can_skb_set_owner(skb, op->sk);
 	can_send(skb, 1);
 
 	/* update statistics */
@@ -1223,7 +1223,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
 
 	can_skb_prv(skb)->ifindex = dev->ifindex;
 	skb->dev = dev;
-	skb->sk  = sk;
+	can_skb_set_owner(skb, sk);
 	err = can_send(skb, 1); /* send with loopback */
 	dev_put(dev);
 
-- 
cgit v1.2.3


From b045d37bd68c20ca88123c2b363cac5e3dae815f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 3 Feb 2014 12:52:14 -0800
Subject: ip_tunnel: fix panic in ip_tunnel_xmit()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Setting rt variable to NULL at the beginning of ip_tunnel_xmit()
missed possible use of this variable as a scratch value.

Also fixes a possible dst leak in tunnel_dst_check() :
If we had to call tunnel_dst_reset(), we forgot to
release the reference on dst.

Merges tunnel_dst_get()/tunnel_dst_check() into
a single tunnel_rtable_get() function for clarity.

Many thanks to Tommi for his report and tests.

Fixes: 7d442fab0a67 ("ipv4: Cache dst in tunnels")
Reported-by: Tommi Rantala <tt.rantala@gmail.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Tested-by: Tommi Rantala <tt.rantala@gmail.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Maciej Żenczykowski <maze@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_tunnel.c | 29 +++++++++++------------------
 1 file changed, 11 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index bd28f386bd02..50228be5c17b 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -101,28 +101,22 @@ static void tunnel_dst_reset_all(struct ip_tunnel *t)
 		__tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
 }
 
-static struct dst_entry *tunnel_dst_get(struct ip_tunnel *t)
+static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie)
 {
 	struct dst_entry *dst;
 
 	rcu_read_lock();
 	dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
-	if (dst)
+	if (dst) {
+		if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
+			rcu_read_unlock();
+			tunnel_dst_reset(t);
+			return NULL;
+		}
 		dst_hold(dst);
-	rcu_read_unlock();
-	return dst;
-}
-
-static struct dst_entry *tunnel_dst_check(struct ip_tunnel *t, u32 cookie)
-{
-	struct dst_entry *dst = tunnel_dst_get(t);
-
-	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
-		tunnel_dst_reset(t);
-		return NULL;
 	}
-
-	return dst;
+	rcu_read_unlock();
+	return (struct rtable *)dst;
 }
 
 /* Often modified stats are per cpu, other are shared (netdev->stats) */
@@ -584,7 +578,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 	struct flowi4 fl4;
 	u8     tos, ttl;
 	__be16 df;
-	struct rtable *rt = NULL;	/* Route to the other host */
+	struct rtable *rt;		/* Route to the other host */
 	unsigned int max_headroom;	/* The extra header space needed */
 	__be32 dst;
 	int err;
@@ -657,8 +651,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 	init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
 			 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
 
-	if (connected)
-		rt = (struct rtable *)tunnel_dst_check(tunnel, 0);
+	rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL;
 
 	if (!rt) {
 		rt = ip_route_output_key(tunnel->net, &fl4);
-- 
cgit v1.2.3


From 2a971354e74f3837d14b9c8d7f7983b0c9c330e4 Mon Sep 17 00:00:00 2001
From: Michal Kubecek <mkubecek@suse.cz>
Date: Thu, 30 Jan 2014 08:50:20 +0100
Subject: ipvs: fix AF assignment in ip_vs_conn_new()

If a fwmark is passed to ip_vs_conn_new(), it is passed in
vaddr, not daddr. Therefore we should set AF to AF_UNSPEC in
vaddr assignment (like we do in ip_vs_ct_in_get()), otherwise we
may copy only first 4 bytes of an IPv6 address into cp->daddr.

Signed-off-by: Bogdano Arendartchuk <barendartchuk@suse.com>
Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_conn.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 59a1a85bcb3e..a8eb0a89326a 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -871,11 +871,11 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
 	cp->protocol	   = p->protocol;
 	ip_vs_addr_set(p->af, &cp->caddr, p->caddr);
 	cp->cport	   = p->cport;
-	ip_vs_addr_set(p->af, &cp->vaddr, p->vaddr);
-	cp->vport	   = p->vport;
-	/* proto should only be IPPROTO_IP if d_addr is a fwmark */
+	/* proto should only be IPPROTO_IP if p->vaddr is a fwmark */
 	ip_vs_addr_set(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af,
-		       &cp->daddr, daddr);
+		       &cp->vaddr, p->vaddr);
+	cp->vport	   = p->vport;
+	ip_vs_addr_set(p->af, &cp->daddr, daddr);
 	cp->dport          = dport;
 	cp->flags	   = flags;
 	cp->fwmark         = fwmark;
-- 
cgit v1.2.3


From a664a4f7aa4f01ca8728e3ec43618327416fc8ff Mon Sep 17 00:00:00 2001
From: Shlomo Pongratz <shlomop@mellanox.com>
Date: Sun, 2 Feb 2014 15:42:10 +0200
Subject: net/ipv4: Use proper RCU APIs for writer-side in udp_offload.c

RCU writer side should use rcu_dereference_protected() and not
rcu_dereference(), fix that. This also removes the "suspicious RCU usage"
warning seen when running with CONFIG_PROVE_RCU.

Also, don't use rcu_assign_pointer/rcu_dereference for pointers
which are invisible beyond the udp offload code.

Fixes: b582ef0 ('net: Add GRO support for UDP encapsulating protocols')
Reported-by: Eric Dumazet <edumazet@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Shlomo Pongratz <shlomop@mellanox.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp_offload.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 2ffea6f31efc..88b4023ecfcf 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -17,6 +17,8 @@
 static DEFINE_SPINLOCK(udp_offload_lock);
 static struct udp_offload_priv __rcu *udp_offload_base __read_mostly;
 
+#define udp_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&udp_offload_lock))
+
 struct udp_offload_priv {
 	struct udp_offload	*offload;
 	struct rcu_head		rcu;
@@ -100,7 +102,6 @@ out:
 
 int udp_add_offload(struct udp_offload *uo)
 {
-	struct udp_offload_priv __rcu **head = &udp_offload_base;
 	struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_ATOMIC);
 
 	if (!new_offload)
@@ -109,8 +110,8 @@ int udp_add_offload(struct udp_offload *uo)
 	new_offload->offload = uo;
 
 	spin_lock(&udp_offload_lock);
-	rcu_assign_pointer(new_offload->next, rcu_dereference(*head));
-	rcu_assign_pointer(*head, new_offload);
+	new_offload->next = udp_offload_base;
+	rcu_assign_pointer(udp_offload_base, new_offload);
 	spin_unlock(&udp_offload_lock);
 
 	return 0;
@@ -130,12 +131,12 @@ void udp_del_offload(struct udp_offload *uo)
 
 	spin_lock(&udp_offload_lock);
 
-	uo_priv = rcu_dereference(*head);
+	uo_priv = udp_deref_protected(*head);
 	for (; uo_priv != NULL;
-		uo_priv = rcu_dereference(*head)) {
-
+	     uo_priv = udp_deref_protected(*head)) {
 		if (uo_priv->offload == uo) {
-			rcu_assign_pointer(*head, rcu_dereference(uo_priv->next));
+			rcu_assign_pointer(*head,
+					   udp_deref_protected(uo_priv->next));
 			goto unlock;
 		}
 		head = &uo_priv->next;
-- 
cgit v1.2.3


From 6049f2530cf2cb48a6fe8735309cc0b97aa7f700 Mon Sep 17 00:00:00 2001
From: Fernando Luis Vazquez Cao <fernando_b1@lab.ntt.co.jp>
Date: Tue, 4 Feb 2014 19:35:02 +0900
Subject: rtnetlink: fix oops in rtnl_link_get_slave_info_data_size

We should check whether rtnetlink link operations
are defined before calling get_slave_size().

Without this, the following oops can occur when
adding a tap device to OVS.

[   87.839553] BUG: unable to handle kernel NULL pointer dereference at 00000000000000a8
[   87.839595] IP: [<ffffffff813d47c0>] if_nlmsg_size+0xf0/0x220
[...]
[   87.840651] Call Trace:
[   87.840664]  [<ffffffff813d694b>] ? rtmsg_ifinfo+0x2b/0x100
[   87.840688]  [<ffffffff813c8340>] ? __netdev_adjacent_dev_insert+0x150/0x1a0
[   87.840718]  [<ffffffff813d6a50>] ? rtnetlink_event+0x30/0x40
[   87.840742]  [<ffffffff814b4144>] ? notifier_call_chain+0x44/0x70
[   87.840768]  [<ffffffff813c8946>] ? __netdev_upper_dev_link+0x3c6/0x3f0
[   87.840798]  [<ffffffffa0678d6c>] ? netdev_create+0xcc/0x160 [openvswitch]
[   87.840828]  [<ffffffffa06781ea>] ? ovs_vport_add+0x4a/0xd0 [openvswitch]
[   87.840857]  [<ffffffffa0670139>] ? new_vport+0x9/0x50 [openvswitch]
[   87.840884]  [<ffffffffa067279e>] ? ovs_vport_cmd_new+0x11e/0x210 [openvswitch]
[   87.840915]  [<ffffffff813f3efa>] ? genl_family_rcv_msg+0x19a/0x360
[   87.840941]  [<ffffffff813f40c0>] ? genl_family_rcv_msg+0x360/0x360
[   87.840967]  [<ffffffff813f4139>] ? genl_rcv_msg+0x79/0xc0
[   87.840991]  [<ffffffff813b6cf9>] ? __kmalloc_reserve.isra.25+0x29/0x80
[   87.841018]  [<ffffffff813f2389>] ? netlink_rcv_skb+0xa9/0xc0
[   87.841042]  [<ffffffff813f27cf>] ? genl_rcv+0x1f/0x30
[   87.841064]  [<ffffffff813f1988>] ? netlink_unicast+0xe8/0x1e0
[   87.841088]  [<ffffffff813f1d9a>] ? netlink_sendmsg+0x31a/0x750
[   87.841113]  [<ffffffff813aee96>] ? sock_sendmsg+0x86/0xc0
[   87.841136]  [<ffffffff813c960d>] ? __netdev_update_features+0x4d/0x200
[   87.841163]  [<ffffffff813ca94e>] ? ethtool_get_value+0x2e/0x50
[   87.841188]  [<ffffffff813af269>] ? ___sys_sendmsg+0x359/0x370
[   87.841212]  [<ffffffff813da686>] ? dev_ioctl+0x1a6/0x5c0
[   87.841236]  [<ffffffff8109c210>] ? autoremove_wake_function+0x30/0x30
[   87.841264]  [<ffffffff813ac59d>] ? sock_do_ioctl+0x3d/0x50
[   87.841288]  [<ffffffff813aca68>] ? sock_ioctl+0x1e8/0x2c0
[   87.841312]  [<ffffffff811934bf>] ? do_vfs_ioctl+0x2cf/0x4b0
[   87.841335]  [<ffffffff813afeb9>] ? __sys_sendmsg+0x39/0x70
[   87.841362]  [<ffffffff814b86f9>] ? system_call_fastpath+0x16/0x1b
[   87.841386] Code: c0 74 10 48 89 ef ff d0 83 c0 07 83 e0 fc 48 98 49 01 c7 48 89 ef e8 d0 d6 fe ff 48 85 c0 0f 84 df 00 00 00 48 8b 90 08 07 00 00 <48> 8b 8a a8 00 00 00 31 d2 48 85 c9 74 0c 48 89 ee 48 89 c7 ff
[   87.841529] RIP  [<ffffffff813d47c0>] if_nlmsg_size+0xf0/0x220
[   87.841555]  RSP <ffff880221aa5950>
[   87.841569] CR2: 00000000000000a8
[   87.851442] ---[ end trace e42ab217691b4fc2 ]---

Signed-off-by: Fernando Luis Vazquez Cao <fernando@oss.ntt.co.jp>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 393b1bc9a618..048dc8d183aa 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -374,7 +374,7 @@ static size_t rtnl_link_get_slave_info_data_size(const struct net_device *dev)
 	if (!master_dev)
 		return 0;
 	ops = master_dev->rtnl_link_ops;
-	if (!ops->get_slave_size)
+	if (!ops || !ops->get_slave_size)
 		return 0;
 	/* IFLA_INFO_SLAVE_DATA + nested data */
 	return nla_total_size(sizeof(struct nlattr)) +
-- 
cgit v1.2.3


From aea0bb4f8ee513537ad84b9f3f609f96e272d98e Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 14 Jan 2014 16:27:49 +0000
Subject: openvswitch: Pad OVS_PACKET_ATTR_PACKET if linear copy was performed

While the zerocopy method is correctly omitted if user space
does not support unaligned Netlink messages. The attribute is
still not padded correctly as skb_zerocopy() will not ensure
padding and the attribute size is no longer pre calculated
though nla_reserve() which ensured padding previously.

This patch applies appropriate padding if a linear data copy
was performed in skb_zerocopy().

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Zoltan Kiss <zoltan.kiss@citrix.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 net/openvswitch/datapath.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'net')

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index df4692826ead..d1a73a6102f8 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -466,6 +466,14 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
 
 	skb_zerocopy(user_skb, skb, skb->len, hlen);
 
+	/* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
+	if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
+		size_t plen = NLA_ALIGN(user_skb->len) - user_skb->len;
+
+		if (plen > 0)
+			memset(skb_put(user_skb, plen), 0, plen);
+	}
+
 	((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
 
 	err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
-- 
cgit v1.2.3


From e80857cce82da31e41a6599fc888dfc92e0167cc Mon Sep 17 00:00:00 2001
From: Andy Zhou <azhou@nicira.com>
Date: Tue, 21 Jan 2014 09:31:04 -0800
Subject: openvswitch: Fix kernel panic on ovs_flow_free

Both mega flow mask's reference counter and per flow table mask list
should only be accessed when holding ovs_mutex() lock. However
this is not true with ovs_flow_table_flush(). The patch fixes this bug.

Reported-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Andy Zhou <azhou@nicira.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 net/openvswitch/datapath.c   |  9 +++--
 net/openvswitch/flow_table.c | 84 +++++++++++++++++++++-----------------------
 net/openvswitch/flow_table.h |  2 +-
 3 files changed, 47 insertions(+), 48 deletions(-)

(limited to 'net')

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index d1a73a6102f8..e1b337e0bf4d 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -55,6 +55,7 @@
 
 #include "datapath.h"
 #include "flow.h"
+#include "flow_table.h"
 #include "flow_netlink.h"
 #include "vport-internal_dev.h"
 #include "vport-netdev.h"
@@ -160,7 +161,6 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
 {
 	struct datapath *dp = container_of(rcu, struct datapath, rcu);
 
-	ovs_flow_tbl_destroy(&dp->table);
 	free_percpu(dp->stats_percpu);
 	release_net(ovs_dp_get_net(dp));
 	kfree(dp->ports);
@@ -1287,7 +1287,7 @@ err_destroy_ports_array:
 err_destroy_percpu:
 	free_percpu(dp->stats_percpu);
 err_destroy_table:
-	ovs_flow_tbl_destroy(&dp->table);
+	ovs_flow_tbl_destroy(&dp->table, false);
 err_free_dp:
 	release_net(ovs_dp_get_net(dp));
 	kfree(dp);
@@ -1314,10 +1314,13 @@ static void __dp_destroy(struct datapath *dp)
 	list_del_rcu(&dp->list_node);
 
 	/* OVSP_LOCAL is datapath internal port. We need to make sure that
-	 * all port in datapath are destroyed first before freeing datapath.
+	 * all ports in datapath are destroyed first before freeing datapath.
 	 */
 	ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
 
+	/* RCU destroy the flow table */
+	ovs_flow_tbl_destroy(&dp->table, true);
+
 	call_rcu(&dp->rcu, destroy_dp_rcu);
 }
 
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index c58a0fe3c889..bd14052ed342 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -153,29 +153,27 @@ static void rcu_free_flow_callback(struct rcu_head *rcu)
 	flow_free(flow);
 }
 
-static void flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred)
+void ovs_flow_free(struct sw_flow *flow, bool deferred)
 {
-	if (!mask)
+	if (!flow)
 		return;
 
-	BUG_ON(!mask->ref_count);
-	mask->ref_count--;
+	ASSERT_OVSL();
 
-	if (!mask->ref_count) {
-		list_del_rcu(&mask->list);
-		if (deferred)
-			kfree_rcu(mask, rcu);
-		else
-			kfree(mask);
-	}
-}
+	if (flow->mask) {
+		struct sw_flow_mask *mask = flow->mask;
 
-void ovs_flow_free(struct sw_flow *flow, bool deferred)
-{
-	if (!flow)
-		return;
+		BUG_ON(!mask->ref_count);
+		mask->ref_count--;
 
-	flow_mask_del_ref(flow->mask, deferred);
+		if (!mask->ref_count) {
+			list_del_rcu(&mask->list);
+			if (deferred)
+				kfree_rcu(mask, rcu);
+			else
+				kfree(mask);
+		}
+	}
 
 	if (deferred)
 		call_rcu(&flow->rcu, rcu_free_flow_callback);
@@ -188,26 +186,9 @@ static void free_buckets(struct flex_array *buckets)
 	flex_array_free(buckets);
 }
 
+
 static void __table_instance_destroy(struct table_instance *ti)
 {
-	int i;
-
-	if (ti->keep_flows)
-		goto skip_flows;
-
-	for (i = 0; i < ti->n_buckets; i++) {
-		struct sw_flow *flow;
-		struct hlist_head *head = flex_array_get(ti->buckets, i);
-		struct hlist_node *n;
-		int ver = ti->node_ver;
-
-		hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
-			hlist_del(&flow->hash_node[ver]);
-			ovs_flow_free(flow, false);
-		}
-	}
-
-skip_flows:
 	free_buckets(ti->buckets);
 	kfree(ti);
 }
@@ -258,20 +239,38 @@ static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
 
 static void table_instance_destroy(struct table_instance *ti, bool deferred)
 {
+	int i;
+
 	if (!ti)
 		return;
 
+	if (ti->keep_flows)
+		goto skip_flows;
+
+	for (i = 0; i < ti->n_buckets; i++) {
+		struct sw_flow *flow;
+		struct hlist_head *head = flex_array_get(ti->buckets, i);
+		struct hlist_node *n;
+		int ver = ti->node_ver;
+
+		hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
+			hlist_del_rcu(&flow->hash_node[ver]);
+			ovs_flow_free(flow, deferred);
+		}
+	}
+
+skip_flows:
 	if (deferred)
 		call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
 	else
 		__table_instance_destroy(ti);
 }
 
-void ovs_flow_tbl_destroy(struct flow_table *table)
+void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred)
 {
 	struct table_instance *ti = ovsl_dereference(table->ti);
 
-	table_instance_destroy(ti, false);
+	table_instance_destroy(ti, deferred);
 }
 
 struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
@@ -504,16 +503,11 @@ static struct sw_flow_mask *mask_alloc(void)
 
 	mask = kmalloc(sizeof(*mask), GFP_KERNEL);
 	if (mask)
-		mask->ref_count = 0;
+		mask->ref_count = 1;
 
 	return mask;
 }
 
-static void mask_add_ref(struct sw_flow_mask *mask)
-{
-	mask->ref_count++;
-}
-
 static bool mask_equal(const struct sw_flow_mask *a,
 		       const struct sw_flow_mask *b)
 {
@@ -554,9 +548,11 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
 		mask->key = new->key;
 		mask->range = new->range;
 		list_add_rcu(&mask->list, &tbl->mask_list);
+	} else {
+		BUG_ON(!mask->ref_count);
+		mask->ref_count++;
 	}
 
-	mask_add_ref(mask);
 	flow->mask = mask;
 	return 0;
 }
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index 1996e34c0fd8..baaeb101924d 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -60,7 +60,7 @@ void ovs_flow_free(struct sw_flow *, bool deferred);
 
 int ovs_flow_tbl_init(struct flow_table *);
 int ovs_flow_tbl_count(struct flow_table *table);
-void ovs_flow_tbl_destroy(struct flow_table *table);
+void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred);
 int ovs_flow_tbl_flush(struct flow_table *flow_table);
 
 int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
-- 
cgit v1.2.3


From 45fb9c35b27c9982e9a55d04ed0a5230a2d0b306 Mon Sep 17 00:00:00 2001
From: Daniele Di Proietto <daniele.di.proietto@gmail.com>
Date: Thu, 23 Jan 2014 10:47:35 -0800
Subject: openvswitch: Fix ovs_dp_cmd_msg_size()

commit 43d4be9cb55f3bac5253e9289996fd9d735531db (openvswitch: Allow user space
to announce ability to accept unaligned Netlink messages) introduced
OVS_DP_ATTR_USER_FEATURES netlink attribute in datapath responses,
but the attribute size was not taken into account in ovs_dp_cmd_msg_size().

Signed-off-by: Daniele Di Proietto <daniele.di.proietto@gmail.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 net/openvswitch/datapath.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index e1b337e0bf4d..58689dda8377 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1087,6 +1087,7 @@ static size_t ovs_dp_cmd_msg_size(void)
 	msgsize += nla_total_size(IFNAMSIZ);
 	msgsize += nla_total_size(sizeof(struct ovs_dp_stats));
 	msgsize += nla_total_size(sizeof(struct ovs_dp_megaflow_stats));
+	msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
 
 	return msgsize;
 }
-- 
cgit v1.2.3


From e4c6d7595403e943a3afc334eb8c0efcd840043a Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Fri, 31 Jan 2014 09:43:23 -0800
Subject: openvswitch: Fix ovs_flow_free() ovs-lock assert.

ovs_flow_free() is not called under ovs-lock during packet
execute path (ovs_packet_cmd_execute()). Since packet execute
does not touch flow->mask, there is no need to take that
lock either. So move assert in case where flow->mask is checked.

Found by code inspection.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 net/openvswitch/flow_table.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index bd14052ed342..3c268b3d71c3 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -158,11 +158,13 @@ void ovs_flow_free(struct sw_flow *flow, bool deferred)
 	if (!flow)
 		return;
 
-	ASSERT_OVSL();
-
 	if (flow->mask) {
 		struct sw_flow_mask *mask = flow->mask;
 
+		/* ovs-lock is required to protect mask-refcount and
+		 * mask list.
+		 */
+		ASSERT_OVSL();
 		BUG_ON(!mask->ref_count);
 		mask->ref_count--;
 
-- 
cgit v1.2.3


From c14e0953ca51dbcb8d1ac92acbdcff23d0caa158 Mon Sep 17 00:00:00 2001
From: Andy Zhou <azhou@nicira.com>
Date: Sun, 2 Feb 2014 17:08:06 -0800
Subject: openvswitch: Suppress error messages on megaflow updates

With subfacets, we'd expect megaflow updates message to carry
the original micro flow. If not, EINVAL is returned and kernel
logs an error message.  Now that the user space subfacet layer is
removed, it is expected that flow updates can arrive with a
micro flow other than the original. Change the return code to
EEXIST and remove the kernel error log message.

Reported-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Andy Zhou <azhou@nicira.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 net/openvswitch/datapath.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 58689dda8377..e9a48baf8551 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -860,11 +860,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 			goto err_unlock_ovs;
 
 		/* The unmasked key has to be the same for flow updates. */
-		error = -EINVAL;
-		if (!ovs_flow_cmp_unmasked_key(flow, &match)) {
-			OVS_NLERR("Flow modification message rejected, unmasked key does not match.\n");
+		if (!ovs_flow_cmp_unmasked_key(flow, &match))
 			goto err_unlock_ovs;
-		}
 
 		/* Update actions. */
 		old_acts = ovsl_dereference(flow->sf_acts);
-- 
cgit v1.2.3


From 2a53bfb3e0fb6aa6b1ac93e5979a040a4b57ea8b Mon Sep 17 00:00:00 2001
From: Arturo Borrero <arturo.borrero.glez@gmail.com>
Date: Fri, 17 Jan 2014 02:28:45 +0100
Subject: netfilter: nft_ct: fix unconditional dump of 'dir' attr

We want to make sure that the information that we get from the kernel can
be reinjected without troubles. The kernel shouldn't return an attribute
that is not required, or even prohibited.

Dumping unconditionally NFTA_CT_DIRECTION could lead an application in
userspace to interpret that the attribute was originally set, while it
was not.

Signed-off-by: Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_ct.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 917052e20602..feaf0f354a93 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -311,8 +311,19 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
 		goto nla_put_failure;
 	if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
 		goto nla_put_failure;
-	if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
-		goto nla_put_failure;
+
+	switch (priv->key) {
+	case NFT_CT_PROTOCOL:
+	case NFT_CT_SRC:
+	case NFT_CT_DST:
+	case NFT_CT_PROTO_SRC:
+	case NFT_CT_PROTO_DST:
+		if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
+			goto nla_put_failure;
+	default:
+		break;
+	}
+
 	return 0;
 
 nla_put_failure:
-- 
cgit v1.2.3


From 3dd7279fb6db05ec5a088cd0cae6ba22580a82bd Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 25 Jan 2014 08:04:07 +0000
Subject: netfilter: nf_tables: fix oops when deleting a chain with references

The following commands trigger an oops:

 # nft -i
 nft> add table filter
 nft> add chain filter input { type filter hook input priority 0; }
 nft> add chain filter test
 nft> add rule filter input jump test
 nft> delete chain filter test

We need to check the chain use counter before allowing destruction since
we might have references from sets or jump rules.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=69341
Reported-by: Matthew Ife <deleriux1@gmail.com>
Tested-by: Matthew Ife <deleriux1@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 117bbaaddde6..9ce30534f853 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1045,7 +1045,7 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
 	if (IS_ERR(chain))
 		return PTR_ERR(chain);
 
-	if (!list_empty(&chain->rules))
+	if (!list_empty(&chain->rules) || chain->use > 0)
 		return -EBUSY;
 
 	list_del(&chain->list);
-- 
cgit v1.2.3


From c6825c0976fa7893692e0e43b09740b419b23c09 Mon Sep 17 00:00:00 2001
From: Andrey Vagin <avagin@openvz.org>
Date: Wed, 29 Jan 2014 19:34:14 +0100
Subject: netfilter: nf_conntrack: fix RCU race in nf_conntrack_find_get

Lets look at destroy_conntrack:

hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
...
nf_conntrack_free(ct)
	kmem_cache_free(net->ct.nf_conntrack_cachep, ct);

net->ct.nf_conntrack_cachep is created with SLAB_DESTROY_BY_RCU.

The hash is protected by rcu, so readers look up conntracks without
locks.
A conntrack is removed from the hash, but in this moment a few readers
still can use the conntrack. Then this conntrack is released and another
thread creates conntrack with the same address and the equal tuple.
After this a reader starts to validate the conntrack:
* It's not dying, because a new conntrack was created
* nf_ct_tuple_equal() returns true.

But this conntrack is not initialized yet, so it can not be used by two
threads concurrently. In this case BUG_ON may be triggered from
nf_nat_setup_info().

Florian Westphal suggested to check the confirm bit too. I think it's
right.

task 1			task 2			task 3
			nf_conntrack_find_get
			 ____nf_conntrack_find
destroy_conntrack
 hlist_nulls_del_rcu
 nf_conntrack_free
 kmem_cache_free
						__nf_conntrack_alloc
						 kmem_cache_alloc
						 memset(&ct->tuplehash[IP_CT_DIR_MAX],
			 if (nf_ct_is_dying(ct))
			 if (!nf_ct_tuple_equal()

I'm not sure, that I have ever seen this race condition in a real life.
Currently we are investigating a bug, which is reproduced on a few nodes.
In our case one conntrack is initialized from a few tasks concurrently,
we don't have any other explanation for this.

<2>[46267.083061] kernel BUG at net/ipv4/netfilter/nf_nat_core.c:322!
...
<4>[46267.083951] RIP: 0010:[<ffffffffa01e00a4>]  [<ffffffffa01e00a4>] nf_nat_setup_info+0x564/0x590 [nf_nat]
...
<4>[46267.085549] Call Trace:
<4>[46267.085622]  [<ffffffffa023421b>] alloc_null_binding+0x5b/0xa0 [iptable_nat]
<4>[46267.085697]  [<ffffffffa02342bc>] nf_nat_rule_find+0x5c/0x80 [iptable_nat]
<4>[46267.085770]  [<ffffffffa0234521>] nf_nat_fn+0x111/0x260 [iptable_nat]
<4>[46267.085843]  [<ffffffffa0234798>] nf_nat_out+0x48/0xd0 [iptable_nat]
<4>[46267.085919]  [<ffffffff814841b9>] nf_iterate+0x69/0xb0
<4>[46267.085991]  [<ffffffff81494e70>] ? ip_finish_output+0x0/0x2f0
<4>[46267.086063]  [<ffffffff81484374>] nf_hook_slow+0x74/0x110
<4>[46267.086133]  [<ffffffff81494e70>] ? ip_finish_output+0x0/0x2f0
<4>[46267.086207]  [<ffffffff814b5890>] ? dst_output+0x0/0x20
<4>[46267.086277]  [<ffffffff81495204>] ip_output+0xa4/0xc0
<4>[46267.086346]  [<ffffffff814b65a4>] raw_sendmsg+0x8b4/0x910
<4>[46267.086419]  [<ffffffff814c10fa>] inet_sendmsg+0x4a/0xb0
<4>[46267.086491]  [<ffffffff814459aa>] ? sock_update_classid+0x3a/0x50
<4>[46267.086562]  [<ffffffff81444d67>] sock_sendmsg+0x117/0x140
<4>[46267.086638]  [<ffffffff8151997b>] ? _spin_unlock_bh+0x1b/0x20
<4>[46267.086712]  [<ffffffff8109d370>] ? autoremove_wake_function+0x0/0x40
<4>[46267.086785]  [<ffffffff81495e80>] ? do_ip_setsockopt+0x90/0xd80
<4>[46267.086858]  [<ffffffff8100be0e>] ? call_function_interrupt+0xe/0x20
<4>[46267.086936]  [<ffffffff8118cb10>] ? ub_slab_ptr+0x20/0x90
<4>[46267.087006]  [<ffffffff8118cb10>] ? ub_slab_ptr+0x20/0x90
<4>[46267.087081]  [<ffffffff8118f2e8>] ? kmem_cache_alloc+0xd8/0x1e0
<4>[46267.087151]  [<ffffffff81445599>] sys_sendto+0x139/0x190
<4>[46267.087229]  [<ffffffff81448c0d>] ? sock_setsockopt+0x16d/0x6f0
<4>[46267.087303]  [<ffffffff810efa47>] ? audit_syscall_entry+0x1d7/0x200
<4>[46267.087378]  [<ffffffff810ef795>] ? __audit_syscall_exit+0x265/0x290
<4>[46267.087454]  [<ffffffff81474885>] ? compat_sys_setsockopt+0x75/0x210
<4>[46267.087531]  [<ffffffff81474b5f>] compat_sys_socketcall+0x13f/0x210
<4>[46267.087607]  [<ffffffff8104dea3>] ia32_sysret+0x0/0x5
<4>[46267.087676] Code: 91 20 e2 01 75 29 48 89 de 4c 89 f7 e8 56 fa ff ff 85 c0 0f 84 68 fc ff ff 0f b6 4d c6 41 8b 45 00 e9 4d fb ff ff e8 7c 19 e9 e0 <0f> 0b eb fe f6 05 17 91 20 e2 80 74 ce 80 3d 5f 2e 00 00 00 74
<1>[46267.088023] RIP  [<ffffffffa01e00a4>] nf_nat_setup_info+0x564/0x590

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Florian Westphal <fw@strlen.de>
Cc: Pablo Neira Ayuso <pablo@netfilter.org>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Andrey Vagin <avagin@openvz.org>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_core.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 8824ed0ccc9c..4d1fb5d094c3 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -312,6 +312,21 @@ static void death_by_timeout(unsigned long ul_conntrack)
 	nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0);
 }
 
+static inline bool
+nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
+			const struct nf_conntrack_tuple *tuple,
+			u16 zone)
+{
+	struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+
+	/* A conntrack can be recreated with the equal tuple,
+	 * so we need to check that the conntrack is confirmed
+	 */
+	return nf_ct_tuple_equal(tuple, &h->tuple) &&
+		nf_ct_zone(ct) == zone &&
+		nf_ct_is_confirmed(ct);
+}
+
 /*
  * Warning :
  * - Caller must take a reference on returned object
@@ -333,8 +348,7 @@ ____nf_conntrack_find(struct net *net, u16 zone,
 	local_bh_disable();
 begin:
 	hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[bucket], hnnode) {
-		if (nf_ct_tuple_equal(tuple, &h->tuple) &&
-		    nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)) == zone) {
+		if (nf_ct_key_equal(h, tuple, zone)) {
 			NF_CT_STAT_INC(net, found);
 			local_bh_enable();
 			return h;
@@ -372,8 +386,7 @@ begin:
 			     !atomic_inc_not_zero(&ct->ct_general.use)))
 			h = NULL;
 		else {
-			if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple) ||
-				     nf_ct_zone(ct) != zone)) {
+			if (unlikely(!nf_ct_key_equal(h, tuple, zone))) {
 				nf_ct_put(ct);
 				goto begin;
 			}
-- 
cgit v1.2.3


From 829d9315c46a2be57a8fb40c89aeb7db61513d96 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 3 Feb 2014 13:07:24 +0100
Subject: netfilter: nf_nat_h323: fix crash in nf_ct_unlink_expect_report()

Similar bug fixed in SIP module in 3f509c6 ("netfilter: nf_nat_sip: fix
incorrect handling of EBUSY for RTCP expectation").

BUG: unable to handle kernel paging request at 00100104
IP: [<f8214f07>] nf_ct_unlink_expect_report+0x57/0xf0 [nf_conntrack]
...
Call Trace:
  [<c0244bd8>] ? del_timer+0x48/0x70
  [<f8215687>] nf_ct_remove_expectations+0x47/0x60 [nf_conntrack]
  [<f8211c99>] nf_ct_delete_from_lists+0x59/0x90 [nf_conntrack]
  [<f8212e5e>] death_by_timeout+0x14e/0x1c0 [nf_conntrack]
  [<f8212d10>] ? nf_conntrack_set_hashsize+0x190/0x190 [nf_conntrack]
  [<c024442d>] call_timer_fn+0x1d/0x80
  [<c024461e>] run_timer_softirq+0x18e/0x1a0
  [<f8212d10>] ? nf_conntrack_set_hashsize+0x190/0x190 [nf_conntrack]
  [<c023e6f3>] __do_softirq+0xa3/0x170
  [<c023e650>] ? __local_bh_enable+0x70/0x70
  <IRQ>
  [<c023e587>] ? irq_exit+0x67/0xa0
  [<c0202af6>] ? do_IRQ+0x46/0xb0
  [<c027ad05>] ? clockevents_notify+0x35/0x110
  [<c066ac6c>] ? common_interrupt+0x2c/0x40
  [<c056e3c1>] ? cpuidle_enter_state+0x41/0xf0
  [<c056e6fb>] ? cpuidle_idle_call+0x8b/0x100
  [<c02085f8>] ? arch_cpu_idle+0x8/0x30
  [<c027314b>] ? cpu_idle_loop+0x4b/0x140
  [<c0273258>] ? cpu_startup_entry+0x18/0x20
  [<c066056d>] ? rest_init+0x5d/0x70
  [<c0813ac8>] ? start_kernel+0x2ec/0x2f2
  [<c081364f>] ? repair_env_string+0x5b/0x5b
  [<c0813269>] ? i386_start_kernel+0x33/0x35

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/nf_nat_h323.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 9eea059dd621..574f7ebba0b6 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -229,7 +229,10 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
 			ret = nf_ct_expect_related(rtcp_exp);
 			if (ret == 0)
 				break;
-			else if (ret != -EBUSY) {
+			else if (ret == -EBUSY) {
+				nf_ct_unexpect_related(rtp_exp);
+				continue;
+			} else if (ret < 0) {
 				nf_ct_unexpect_related(rtp_exp);
 				nated_port = 0;
 				break;
-- 
cgit v1.2.3


From e53376bef2cd97d3e3f61fdc677fb8da7d03d0da Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 3 Feb 2014 20:01:53 +0100
Subject: netfilter: nf_conntrack: don't release a conntrack with non-zero
 refcnt

With this patch, the conntrack refcount is initially set to zero and
it is bumped once it is added to any of the list, so we fulfill
Eric's golden rule which is that all released objects always have a
refcount that equals zero.

Andrey Vagin reports that nf_conntrack_free can't be called for a
conntrack with non-zero ref-counter, because it can race with
nf_conntrack_find_get().

A conntrack slab is created with SLAB_DESTROY_BY_RCU. Non-zero
ref-counter says that this conntrack is used. So when we release
a conntrack with non-zero counter, we break this assumption.

CPU1                                    CPU2
____nf_conntrack_find()
                                        nf_ct_put()
                                         destroy_conntrack()
                                        ...
                                        init_conntrack
                                         __nf_conntrack_alloc (set use = 1)
atomic_inc_not_zero(&ct->use) (use = 2)
                                         if (!l4proto->new(ct, skb, dataoff, timeouts))
                                          nf_conntrack_free(ct); (use = 2 !!!)
                                        ...
                                        __nf_conntrack_alloc (set use = 1)
 if (!nf_ct_key_equal(h, tuple, zone))
  nf_ct_put(ct); (use = 0)
   destroy_conntrack()
                                        /* continue to work with CT */

After applying the path "[PATCH] netfilter: nf_conntrack: fix RCU
race in nf_conntrack_find_get" another bug was triggered in
destroy_conntrack():

<4>[67096.759334] ------------[ cut here ]------------
<2>[67096.759353] kernel BUG at net/netfilter/nf_conntrack_core.c:211!
...
<4>[67096.759837] Pid: 498649, comm: atdd veid: 666 Tainted: G         C ---------------    2.6.32-042stab084.18 #1 042stab084_18 /DQ45CB
<4>[67096.759932] RIP: 0010:[<ffffffffa03d99ac>]  [<ffffffffa03d99ac>] destroy_conntrack+0x15c/0x190 [nf_conntrack]
<4>[67096.760255] Call Trace:
<4>[67096.760255]  [<ffffffff814844a7>] nf_conntrack_destroy+0x17/0x30
<4>[67096.760255]  [<ffffffffa03d9bb5>] nf_conntrack_find_get+0x85/0x130 [nf_conntrack]
<4>[67096.760255]  [<ffffffffa03d9fb2>] nf_conntrack_in+0x352/0xb60 [nf_conntrack]
<4>[67096.760255]  [<ffffffffa048c771>] ipv4_conntrack_local+0x51/0x60 [nf_conntrack_ipv4]
<4>[67096.760255]  [<ffffffff81484419>] nf_iterate+0x69/0xb0
<4>[67096.760255]  [<ffffffff814b5b00>] ? dst_output+0x0/0x20
<4>[67096.760255]  [<ffffffff814845d4>] nf_hook_slow+0x74/0x110
<4>[67096.760255]  [<ffffffff814b5b00>] ? dst_output+0x0/0x20
<4>[67096.760255]  [<ffffffff814b66d5>] raw_sendmsg+0x775/0x910
<4>[67096.760255]  [<ffffffff8104c5a8>] ? flush_tlb_others_ipi+0x128/0x130
<4>[67096.760255]  [<ffffffff8100bc4e>] ? apic_timer_interrupt+0xe/0x20
<4>[67096.760255]  [<ffffffff8100bc4e>] ? apic_timer_interrupt+0xe/0x20
<4>[67096.760255]  [<ffffffff814c136a>] inet_sendmsg+0x4a/0xb0
<4>[67096.760255]  [<ffffffff81444e93>] ? sock_sendmsg+0x13/0x140
<4>[67096.760255]  [<ffffffff81444f97>] sock_sendmsg+0x117/0x140
<4>[67096.760255]  [<ffffffff8102e299>] ? native_smp_send_reschedule+0x49/0x60
<4>[67096.760255]  [<ffffffff81519beb>] ? _spin_unlock_bh+0x1b/0x20
<4>[67096.760255]  [<ffffffff8109d930>] ? autoremove_wake_function+0x0/0x40
<4>[67096.760255]  [<ffffffff814960f0>] ? do_ip_setsockopt+0x90/0xd80
<4>[67096.760255]  [<ffffffff8100bc4e>] ? apic_timer_interrupt+0xe/0x20
<4>[67096.760255]  [<ffffffff8100bc4e>] ? apic_timer_interrupt+0xe/0x20
<4>[67096.760255]  [<ffffffff814457c9>] sys_sendto+0x139/0x190
<4>[67096.760255]  [<ffffffff810efa77>] ? audit_syscall_entry+0x1d7/0x200
<4>[67096.760255]  [<ffffffff810ef7c5>] ? __audit_syscall_exit+0x265/0x290
<4>[67096.760255]  [<ffffffff81474daf>] compat_sys_socketcall+0x13f/0x210
<4>[67096.760255]  [<ffffffff8104dea3>] ia32_sysret+0x0/0x5

I have reused the original title for the RFC patch that Andrey posted and
most of the original patch description.

Cc: Eric Dumazet <edumazet@google.com>
Cc: Andrew Vagin <avagin@parallels.com>
Cc: Florian Westphal <fw@strlen.de>
Reported-by: Andrew Vagin <avagin@parallels.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Andrew Vagin <avagin@parallels.com>
---
 include/net/netfilter/nf_conntrack.h |  2 ++
 net/netfilter/nf_conntrack_core.c    | 34 +++++++++++++++++++++++++++++-----
 net/netfilter/nf_synproxy_core.c     |  5 ++---
 net/netfilter/xt_CT.c                |  7 +------
 4 files changed, 34 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 01ea6eed1bb1..b2ac6246b7e0 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -284,6 +284,8 @@ extern unsigned int nf_conntrack_max;
 extern unsigned int nf_conntrack_hash_rnd;
 void init_nf_conntrack_hash_rnd(void);
 
+void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl);
+
 #define NF_CT_STAT_INC(net, count)	  __this_cpu_inc((net)->ct.stat->count)
 #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count)
 
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 4d1fb5d094c3..356bef519fe5 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -448,7 +448,9 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
 			goto out;
 
 	add_timer(&ct->timeout);
-	nf_conntrack_get(&ct->ct_general);
+	smp_wmb();
+	/* The caller holds a reference to this object */
+	atomic_set(&ct->ct_general.use, 2);
 	__nf_conntrack_hash_insert(ct, hash, repl_hash);
 	NF_CT_STAT_INC(net, insert);
 	spin_unlock_bh(&nf_conntrack_lock);
@@ -462,6 +464,21 @@ out:
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
 
+/* deletion from this larval template list happens via nf_ct_put() */
+void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl)
+{
+	__set_bit(IPS_TEMPLATE_BIT, &tmpl->status);
+	__set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
+	nf_conntrack_get(&tmpl->ct_general);
+
+	spin_lock_bh(&nf_conntrack_lock);
+	/* Overload tuple linked list to put us in template list. */
+	hlist_nulls_add_head_rcu(&tmpl->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
+				 &net->ct.tmpl);
+	spin_unlock_bh(&nf_conntrack_lock);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_tmpl_insert);
+
 /* Confirm a connection given skb; places it in hash table */
 int
 __nf_conntrack_confirm(struct sk_buff *skb)
@@ -733,11 +750,10 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
 		nf_ct_zone->id = zone;
 	}
 #endif
-	/*
-	 * changes to lookup keys must be done before setting refcnt to 1
+	/* Because we use RCU lookups, we set ct_general.use to zero before
+	 * this is inserted in any list.
 	 */
-	smp_wmb();
-	atomic_set(&ct->ct_general.use, 1);
+	atomic_set(&ct->ct_general.use, 0);
 	return ct;
 
 #ifdef CONFIG_NF_CONNTRACK_ZONES
@@ -761,6 +777,11 @@ void nf_conntrack_free(struct nf_conn *ct)
 {
 	struct net *net = nf_ct_net(ct);
 
+	/* A freed object has refcnt == 0, that's
+	 * the golden rule for SLAB_DESTROY_BY_RCU
+	 */
+	NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 0);
+
 	nf_ct_ext_destroy(ct);
 	nf_ct_ext_free(ct);
 	kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
@@ -856,6 +877,9 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 		NF_CT_STAT_INC(net, new);
 	}
 
+	/* Now it is inserted into the unconfirmed list, bump refcount */
+	nf_conntrack_get(&ct->ct_general);
+
 	/* Overload tuple linked list to put us in unconfirmed list. */
 	hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
 		       &net->ct.unconfirmed);
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 9858e3e51a3a..52e20c9a46a5 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -363,9 +363,8 @@ static int __net_init synproxy_net_init(struct net *net)
 		goto err2;
 	if (!nfct_synproxy_ext_add(ct))
 		goto err2;
-	__set_bit(IPS_TEMPLATE_BIT, &ct->status);
-	__set_bit(IPS_CONFIRMED_BIT, &ct->status);
 
+	nf_conntrack_tmpl_insert(net, ct);
 	snet->tmpl = ct;
 
 	snet->stats = alloc_percpu(struct synproxy_stats);
@@ -390,7 +389,7 @@ static void __net_exit synproxy_net_exit(struct net *net)
 {
 	struct synproxy_net *snet = synproxy_pernet(net);
 
-	nf_conntrack_free(snet->tmpl);
+	nf_ct_put(snet->tmpl);
 	synproxy_proc_exit(net);
 	free_percpu(snet->stats);
 }
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 5929be622c5c..75747aecdebe 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -228,12 +228,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
 			goto err3;
 	}
 
-	__set_bit(IPS_TEMPLATE_BIT, &ct->status);
-	__set_bit(IPS_CONFIRMED_BIT, &ct->status);
-
-	/* Overload tuple linked list to put us in template list. */
-	hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
-				 &par->net->ct.tmpl);
+	nf_conntrack_tmpl_insert(par->net, ct);
 out:
 	info->ct = ct;
 	return 0;
-- 
cgit v1.2.3


From 53b70287ddf487a38b7cbf0a10db28f40714b799 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Feb 2014 12:26:22 +0100
Subject: netfilter: nf_tables: fix overrun in nf_tables_set_alloc_name()

The map that is used to allocate anonymous sets is indeed
BITS_PER_BYTE * PAGE_SIZE long.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 9ce30534f853..2a22a186eb3d 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1989,13 +1989,13 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
 
 			if (!sscanf(i->name, name, &tmp))
 				continue;
-			if (tmp < 0 || tmp > BITS_PER_LONG * PAGE_SIZE)
+			if (tmp < 0 || tmp >= BITS_PER_BYTE * PAGE_SIZE)
 				continue;
 
 			set_bit(tmp, inuse);
 		}
 
-		n = find_first_zero_bit(inuse, BITS_PER_LONG * PAGE_SIZE);
+		n = find_first_zero_bit(inuse, BITS_PER_BYTE * PAGE_SIZE);
 		free_page((unsigned long)inuse);
 	}
 
-- 
cgit v1.2.3


From ec2c9935688fbd5eaa7c975e3e21562c3da77363 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Feb 2014 15:03:35 +0000
Subject: netfilter: nf_tables: fix potential oops when dumping sets

Commit c9c8e48597 (netfilter: nf_tables: dump sets in all existing families)
changed nft_ctx_init_from_setattr() to only look up the address family if it
is not NFPROTO_UNSPEC. However if it is NFPROTO_UNSPEC and a table attribute
is given, nftables_afinfo_lookup() will dereference the NULL afi pointer.

Fix by checking for non-NULL afi and also move a check added by that commit
to the proper position.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 2a22a186eb3d..3c5a219f4242 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1943,6 +1943,9 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
 	}
 
 	if (nla[NFTA_SET_TABLE] != NULL) {
+		if (afi == NULL)
+			return -EAFNOSUPPORT;
+
 		table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
 		if (IS_ERR(table))
 			return PTR_ERR(table);
@@ -2428,6 +2431,8 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
 	struct nft_ctx ctx;
 	int err;
 
+	if (nfmsg->nfgen_family == NFPROTO_UNSPEC)
+		return -EAFNOSUPPORT;
 	if (nla[NFTA_SET_TABLE] == NULL)
 		return -EINVAL;
 
@@ -2435,9 +2440,6 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	if (nfmsg->nfgen_family == NFPROTO_UNSPEC)
-		return -EAFNOSUPPORT;
-
 	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
-- 
cgit v1.2.3


From 51292c0735eb2d9e29115cbf6264845e19a6c77d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Feb 2014 15:03:36 +0000
Subject: netfilter: nft_ct: fix missing NFT_CT_L3PROTOCOL key in validity
 checks

The key was missing in the list of valid keys, add it.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_ct.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index feaf0f354a93..46e275403838 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -226,6 +226,7 @@ static int nft_ct_init_validate_get(const struct nft_expr *expr,
 		if (tb[NFTA_CT_DIRECTION] != NULL)
 			return -EINVAL;
 		break;
+	case NFT_CT_L3PROTOCOL:
 	case NFT_CT_PROTOCOL:
 	case NFT_CT_SRC:
 	case NFT_CT_DST:
-- 
cgit v1.2.3


From 64d46806b6218c97f68742c5663a8ae3a5fbe838 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Feb 2014 15:03:37 +0000
Subject: netfilter: nf_tables: add AF specific expression support

For the reject module, we need to add AF-specific implementations to
get rid of incorrect module dependencies. Try to load an AF-specific
module first and fall back to generic modules.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  5 +++++
 net/netfilter/nf_tables_api.c     | 22 ++++++++++++++++------
 2 files changed, 21 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 57c8ff7955df..0f68e47d3e5e 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -252,6 +252,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
  *	@owner: module reference
  *	@policy: netlink attribute policy
  *	@maxattr: highest netlink attribute number
+ *	@family: address family for AF-specific types
  */
 struct nft_expr_type {
 	const struct nft_expr_ops	*(*select_ops)(const struct nft_ctx *,
@@ -262,6 +263,7 @@ struct nft_expr_type {
 	struct module			*owner;
 	const struct nla_policy		*policy;
 	unsigned int			maxattr;
+	u8				family;
 };
 
 /**
@@ -529,6 +531,9 @@ void nft_unregister_expr(struct nft_expr_type *);
 #define MODULE_ALIAS_NFT_CHAIN(family, name) \
 	MODULE_ALIAS("nft-chain-" __stringify(family) "-" name)
 
+#define MODULE_ALIAS_NFT_AF_EXPR(family, name) \
+	MODULE_ALIAS("nft-expr-" __stringify(family) "-" name)
+
 #define MODULE_ALIAS_NFT_EXPR(name) \
 	MODULE_ALIAS("nft-expr-" name)
 
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 3c5a219f4242..113c469c7579 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1114,35 +1114,45 @@ void nft_unregister_expr(struct nft_expr_type *type)
 }
 EXPORT_SYMBOL_GPL(nft_unregister_expr);
 
-static const struct nft_expr_type *__nft_expr_type_get(struct nlattr *nla)
+static const struct nft_expr_type *__nft_expr_type_get(u8 family,
+						       struct nlattr *nla)
 {
 	const struct nft_expr_type *type;
 
 	list_for_each_entry(type, &nf_tables_expressions, list) {
-		if (!nla_strcmp(nla, type->name))
+		if (!nla_strcmp(nla, type->name) &&
+		    (!type->family || type->family == family))
 			return type;
 	}
 	return NULL;
 }
 
-static const struct nft_expr_type *nft_expr_type_get(struct nlattr *nla)
+static const struct nft_expr_type *nft_expr_type_get(u8 family,
+						     struct nlattr *nla)
 {
 	const struct nft_expr_type *type;
 
 	if (nla == NULL)
 		return ERR_PTR(-EINVAL);
 
-	type = __nft_expr_type_get(nla);
+	type = __nft_expr_type_get(family, nla);
 	if (type != NULL && try_module_get(type->owner))
 		return type;
 
 #ifdef CONFIG_MODULES
 	if (type == NULL) {
+		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+		request_module("nft-expr-%u-%.*s", family,
+			       nla_len(nla), (char *)nla_data(nla));
+		nfnl_lock(NFNL_SUBSYS_NFTABLES);
+		if (__nft_expr_type_get(family, nla))
+			return ERR_PTR(-EAGAIN);
+
 		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
 		request_module("nft-expr-%.*s",
 			       nla_len(nla), (char *)nla_data(nla));
 		nfnl_lock(NFNL_SUBSYS_NFTABLES);
-		if (__nft_expr_type_get(nla))
+		if (__nft_expr_type_get(family, nla))
 			return ERR_PTR(-EAGAIN);
 	}
 #endif
@@ -1193,7 +1203,7 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx,
 	if (err < 0)
 		return err;
 
-	type = nft_expr_type_get(tb[NFTA_EXPR_NAME]);
+	type = nft_expr_type_get(ctx->afi->family, tb[NFTA_EXPR_NAME]);
 	if (IS_ERR(type))
 		return PTR_ERR(type);
 
-- 
cgit v1.2.3


From cc4723ca316742891954efa346298e7c747c0d17 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Feb 2014 15:03:38 +0000
Subject: netfilter: nft_reject: split up reject module into IPv4 and IPv6
 specifc parts

Currently the nft_reject module depends on symbols from ipv6. This is
wrong since no generic module should force IPv6 support to be loaded.
Split up the module into AF-specific and a generic part.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nft_reject.h   | 17 +++++++
 net/ipv4/netfilter/Kconfig           |  5 ++
 net/ipv4/netfilter/Makefile          |  1 +
 net/ipv4/netfilter/nft_reject_ipv4.c | 74 ++++++++++++++++++++++++++++++
 net/ipv6/netfilter/Kconfig           |  5 ++
 net/ipv6/netfilter/Makefile          |  1 +
 net/ipv6/netfilter/nft_reject_ipv6.c | 75 ++++++++++++++++++++++++++++++
 net/netfilter/Kconfig                |  1 -
 net/netfilter/nft_reject.c           | 89 ++++--------------------------------
 9 files changed, 187 insertions(+), 81 deletions(-)
 create mode 100644 include/net/netfilter/nft_reject.h
 create mode 100644 net/ipv4/netfilter/nft_reject_ipv4.c
 create mode 100644 net/ipv6/netfilter/nft_reject_ipv6.c

(limited to 'net')

diff --git a/include/net/netfilter/nft_reject.h b/include/net/netfilter/nft_reject.h
new file mode 100644
index 000000000000..ecda75945e77
--- /dev/null
+++ b/include/net/netfilter/nft_reject.h
@@ -0,0 +1,17 @@
+#ifndef _NFT_REJECT_H_
+#define _NFT_REJECT_H_
+
+struct nft_reject {
+	enum nft_reject_types	type:8;
+	u8			icmp_code;
+};
+
+extern const struct nla_policy nft_reject_policy[];
+
+int nft_reject_init(const struct nft_ctx *ctx,
+		    const struct nft_expr *expr,
+		    const struct nlattr * const tb[]);
+
+int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr);
+
+#endif
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 81c6910cfa92..a26ce035e3fa 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -61,6 +61,11 @@ config NFT_CHAIN_NAT_IPV4
 	  packet transformations such as the source, destination address and
 	  source and destination ports.
 
+config NFT_REJECT_IPV4
+	depends on NF_TABLES_IPV4
+	default NFT_REJECT
+	tristate
+
 config NF_TABLES_ARP
 	depends on NF_TABLES
 	tristate "ARP nf_tables support"
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index c16be9d58420..90b82405331e 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
 obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
 obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
 obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
+obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
 obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
 
 # generic IP tables 
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
new file mode 100644
index 000000000000..e935d8de1182
--- /dev/null
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2013 Eric Leblond <eric@regit.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/icmp.h>
+#include <net/netfilter/ipv4/nf_reject.h>
+#include <net/netfilter/nft_reject.h>
+
+static void nft_reject_ipv4_eval(const struct nft_expr *expr,
+				 struct nft_data data[NFT_REG_MAX + 1],
+				 const struct nft_pktinfo *pkt)
+{
+	struct nft_reject *priv = nft_expr_priv(expr);
+
+	switch (priv->type) {
+	case NFT_REJECT_ICMP_UNREACH:
+		nf_send_unreach(pkt->skb, priv->icmp_code);
+		break;
+	case NFT_REJECT_TCP_RST:
+		nf_send_reset(pkt->skb, pkt->ops->hooknum);
+		break;
+	}
+
+	data[NFT_REG_VERDICT].verdict = NF_DROP;
+}
+
+static struct nft_expr_type nft_reject_ipv4_type;
+static const struct nft_expr_ops nft_reject_ipv4_ops = {
+	.type		= &nft_reject_ipv4_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_reject)),
+	.eval		= nft_reject_ipv4_eval,
+	.init		= nft_reject_init,
+	.dump		= nft_reject_dump,
+};
+
+static struct nft_expr_type nft_reject_ipv4_type __read_mostly = {
+	.family		= NFPROTO_IPV4,
+	.name		= "reject",
+	.ops		= &nft_reject_ipv4_ops,
+	.policy		= nft_reject_policy,
+	.maxattr	= NFTA_REJECT_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_reject_ipv4_module_init(void)
+{
+	return nft_register_expr(&nft_reject_ipv4_type);
+}
+
+static void __exit nft_reject_ipv4_module_exit(void)
+{
+	nft_unregister_expr(&nft_reject_ipv4_type);
+}
+
+module_init(nft_reject_ipv4_module_init);
+module_exit(nft_reject_ipv4_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "reject");
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 35750df744dc..4bff1f297e39 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -50,6 +50,11 @@ config NFT_CHAIN_NAT_IPV6
 	  packet transformations such as the source, destination address and
 	  source and destination ports.
 
+config NFT_REJECT_IPV6
+	depends on NF_TABLES_IPV6
+	default NFT_REJECT
+	tristate
+
 config IP6_NF_IPTABLES
 	tristate "IP6 tables support (required for filtering)"
 	depends on INET && IPV6
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index d1b4928f34f7..70d3dd66f2cd 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
 obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o
 obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o
 obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o
+obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
 
 # matches
 obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c
new file mode 100644
index 000000000000..f73285924144
--- /dev/null
+++ b/net/ipv6/netfilter/nft_reject_ipv6.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2013 Eric Leblond <eric@regit.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_reject.h>
+#include <net/netfilter/ipv6/nf_reject.h>
+
+static void nft_reject_ipv6_eval(const struct nft_expr *expr,
+				 struct nft_data data[NFT_REG_MAX + 1],
+				 const struct nft_pktinfo *pkt)
+{
+	struct nft_reject *priv = nft_expr_priv(expr);
+	struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out);
+
+	switch (priv->type) {
+	case NFT_REJECT_ICMP_UNREACH:
+		nf_send_unreach6(net, pkt->skb, priv->icmp_code,
+				 pkt->ops->hooknum);
+		break;
+	case NFT_REJECT_TCP_RST:
+		nf_send_reset6(net, pkt->skb, pkt->ops->hooknum);
+		break;
+	}
+
+	data[NFT_REG_VERDICT].verdict = NF_DROP;
+}
+
+static struct nft_expr_type nft_reject_ipv6_type;
+static const struct nft_expr_ops nft_reject_ipv6_ops = {
+	.type		= &nft_reject_ipv6_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_reject)),
+	.eval		= nft_reject_ipv6_eval,
+	.init		= nft_reject_init,
+	.dump		= nft_reject_dump,
+};
+
+static struct nft_expr_type nft_reject_ipv6_type __read_mostly = {
+	.family		= NFPROTO_IPV6,
+	.name		= "reject",
+	.ops		= &nft_reject_ipv6_ops,
+	.policy		= nft_reject_policy,
+	.maxattr	= NFTA_REJECT_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_reject_ipv6_module_init(void)
+{
+	return nft_register_expr(&nft_reject_ipv6_type);
+}
+
+static void __exit nft_reject_ipv6_module_exit(void)
+{
+	nft_unregister_expr(&nft_reject_ipv6_type);
+}
+
+module_init(nft_reject_ipv6_module_init);
+module_exit(nft_reject_ipv6_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "reject");
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index c37467562fd0..ed8b50e62276 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -513,7 +513,6 @@ config NFT_QUEUE
 
 config NFT_REJECT
 	depends on NF_TABLES
-	depends on NF_TABLES_IPV6 || !NF_TABLES_IPV6
 	default m if NETFILTER_ADVANCED=n
 	tristate "Netfilter nf_tables reject support"
 	help
diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c
index 5e204711d704..f3448c296446 100644
--- a/net/netfilter/nft_reject.c
+++ b/net/netfilter/nft_reject.c
@@ -16,65 +16,23 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables.h>
-#include <net/icmp.h>
-#include <net/netfilter/ipv4/nf_reject.h>
+#include <net/netfilter/nft_reject.h>
 
-#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
-#include <net/netfilter/ipv6/nf_reject.h>
-#endif
-
-struct nft_reject {
-	enum nft_reject_types	type:8;
-	u8			icmp_code;
-	u8			family;
-};
-
-static void nft_reject_eval(const struct nft_expr *expr,
-			      struct nft_data data[NFT_REG_MAX + 1],
-			      const struct nft_pktinfo *pkt)
-{
-	struct nft_reject *priv = nft_expr_priv(expr);
-#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
-	struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out);
-#endif
-	switch (priv->type) {
-	case NFT_REJECT_ICMP_UNREACH:
-		if (priv->family == NFPROTO_IPV4)
-			nf_send_unreach(pkt->skb, priv->icmp_code);
-#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
-		else if (priv->family == NFPROTO_IPV6)
-			nf_send_unreach6(net, pkt->skb, priv->icmp_code,
-				      pkt->ops->hooknum);
-#endif
-		break;
-	case NFT_REJECT_TCP_RST:
-		if (priv->family == NFPROTO_IPV4)
-			nf_send_reset(pkt->skb, pkt->ops->hooknum);
-#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
-		else if (priv->family == NFPROTO_IPV6)
-			nf_send_reset6(net, pkt->skb, pkt->ops->hooknum);
-#endif
-		break;
-	}
-
-	data[NFT_REG_VERDICT].verdict = NF_DROP;
-}
-
-static const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = {
+const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = {
 	[NFTA_REJECT_TYPE]		= { .type = NLA_U32 },
 	[NFTA_REJECT_ICMP_CODE]		= { .type = NLA_U8 },
 };
+EXPORT_SYMBOL_GPL(nft_reject_policy);
 
-static int nft_reject_init(const struct nft_ctx *ctx,
-			   const struct nft_expr *expr,
-			   const struct nlattr * const tb[])
+int nft_reject_init(const struct nft_ctx *ctx,
+		    const struct nft_expr *expr,
+		    const struct nlattr * const tb[])
 {
 	struct nft_reject *priv = nft_expr_priv(expr);
 
 	if (tb[NFTA_REJECT_TYPE] == NULL)
 		return -EINVAL;
 
-	priv->family = ctx->afi->family;
 	priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE]));
 	switch (priv->type) {
 	case NFT_REJECT_ICMP_UNREACH:
@@ -89,8 +47,9 @@ static int nft_reject_init(const struct nft_ctx *ctx,
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(nft_reject_init);
 
-static int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr)
+int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
 	const struct nft_reject *priv = nft_expr_priv(expr);
 
@@ -109,37 +68,7 @@ static int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr)
 nla_put_failure:
 	return -1;
 }
-
-static struct nft_expr_type nft_reject_type;
-static const struct nft_expr_ops nft_reject_ops = {
-	.type		= &nft_reject_type,
-	.size		= NFT_EXPR_SIZE(sizeof(struct nft_reject)),
-	.eval		= nft_reject_eval,
-	.init		= nft_reject_init,
-	.dump		= nft_reject_dump,
-};
-
-static struct nft_expr_type nft_reject_type __read_mostly = {
-	.name		= "reject",
-	.ops		= &nft_reject_ops,
-	.policy		= nft_reject_policy,
-	.maxattr	= NFTA_REJECT_MAX,
-	.owner		= THIS_MODULE,
-};
-
-static int __init nft_reject_module_init(void)
-{
-	return nft_register_expr(&nft_reject_type);
-}
-
-static void __exit nft_reject_module_exit(void)
-{
-	nft_unregister_expr(&nft_reject_type);
-}
-
-module_init(nft_reject_module_init);
-module_exit(nft_reject_module_exit);
+EXPORT_SYMBOL_GPL(nft_reject_dump);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_EXPR("reject");
-- 
cgit v1.2.3


From 05513e9e33dbded8124567466a444d32173eecc6 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 5 Feb 2014 15:03:39 +0000
Subject: netfilter: nf_tables: add reject module for NFPROTO_INET

Add a reject module for NFPROTO_INET. It does nothing but dispatch
to the AF-specific modules based on the hook family.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nft_reject.h   |  8 +++++
 net/ipv4/netfilter/nft_reject_ipv4.c |  7 ++--
 net/ipv6/netfilter/nft_reject_ipv6.c |  7 ++--
 net/netfilter/Kconfig                |  5 +++
 net/netfilter/Makefile               |  1 +
 net/netfilter/nft_reject_inet.c      | 63 ++++++++++++++++++++++++++++++++++++
 6 files changed, 85 insertions(+), 6 deletions(-)
 create mode 100644 net/netfilter/nft_reject_inet.c

(limited to 'net')

diff --git a/include/net/netfilter/nft_reject.h b/include/net/netfilter/nft_reject.h
index ecda75945e77..36b0da2d55bb 100644
--- a/include/net/netfilter/nft_reject.h
+++ b/include/net/netfilter/nft_reject.h
@@ -14,4 +14,12 @@ int nft_reject_init(const struct nft_ctx *ctx,
 
 int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr);
 
+void nft_reject_ipv4_eval(const struct nft_expr *expr,
+			  struct nft_data data[NFT_REG_MAX + 1],
+			  const struct nft_pktinfo *pkt);
+
+void nft_reject_ipv6_eval(const struct nft_expr *expr,
+			  struct nft_data data[NFT_REG_MAX + 1],
+			  const struct nft_pktinfo *pkt);
+
 #endif
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
index e935d8de1182..e79718a382f2 100644
--- a/net/ipv4/netfilter/nft_reject_ipv4.c
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -20,9 +20,9 @@
 #include <net/netfilter/ipv4/nf_reject.h>
 #include <net/netfilter/nft_reject.h>
 
-static void nft_reject_ipv4_eval(const struct nft_expr *expr,
-				 struct nft_data data[NFT_REG_MAX + 1],
-				 const struct nft_pktinfo *pkt)
+void nft_reject_ipv4_eval(const struct nft_expr *expr,
+			  struct nft_data data[NFT_REG_MAX + 1],
+			  const struct nft_pktinfo *pkt)
 {
 	struct nft_reject *priv = nft_expr_priv(expr);
 
@@ -37,6 +37,7 @@ static void nft_reject_ipv4_eval(const struct nft_expr *expr,
 
 	data[NFT_REG_VERDICT].verdict = NF_DROP;
 }
+EXPORT_SYMBOL_GPL(nft_reject_ipv4_eval);
 
 static struct nft_expr_type nft_reject_ipv4_type;
 static const struct nft_expr_ops nft_reject_ipv4_ops = {
diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c
index f73285924144..0bc19fa87821 100644
--- a/net/ipv6/netfilter/nft_reject_ipv6.c
+++ b/net/ipv6/netfilter/nft_reject_ipv6.c
@@ -19,9 +19,9 @@
 #include <net/netfilter/nft_reject.h>
 #include <net/netfilter/ipv6/nf_reject.h>
 
-static void nft_reject_ipv6_eval(const struct nft_expr *expr,
-				 struct nft_data data[NFT_REG_MAX + 1],
-				 const struct nft_pktinfo *pkt)
+void nft_reject_ipv6_eval(const struct nft_expr *expr,
+			  struct nft_data data[NFT_REG_MAX + 1],
+			  const struct nft_pktinfo *pkt)
 {
 	struct nft_reject *priv = nft_expr_priv(expr);
 	struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out);
@@ -38,6 +38,7 @@ static void nft_reject_ipv6_eval(const struct nft_expr *expr,
 
 	data[NFT_REG_VERDICT].verdict = NF_DROP;
 }
+EXPORT_SYMBOL_GPL(nft_reject_ipv6_eval);
 
 static struct nft_expr_type nft_reject_ipv6_type;
 static const struct nft_expr_ops nft_reject_ipv6_ops = {
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index ed8b50e62276..e9410d17619d 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -520,6 +520,11 @@ config NFT_REJECT
 	  explicitly deny and notify via TCP reset/ICMP informational errors
 	  unallowed traffic.
 
+config NFT_REJECT_INET
+	depends on NF_TABLES_INET
+	default NFT_REJECT
+	tristate
+
 config NFT_COMPAT
 	depends on NF_TABLES
 	depends on NETFILTER_XTABLES
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index ee9c4de5f8ed..bffdad774da7 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -79,6 +79,7 @@ obj-$(CONFIG_NFT_LIMIT)		+= nft_limit.o
 obj-$(CONFIG_NFT_NAT)		+= nft_nat.o
 obj-$(CONFIG_NFT_QUEUE)		+= nft_queue.o
 obj-$(CONFIG_NFT_REJECT) 	+= nft_reject.o
+obj-$(CONFIG_NFT_REJECT_INET)	+= nft_reject_inet.o
 obj-$(CONFIG_NFT_RBTREE)	+= nft_rbtree.o
 obj-$(CONFIG_NFT_HASH)		+= nft_hash.o
 obj-$(CONFIG_NFT_COUNTER)	+= nft_counter.o
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
new file mode 100644
index 000000000000..8a310f239c93
--- /dev/null
+++ b/net/netfilter/nft_reject_inet.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2014 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_reject.h>
+
+static void nft_reject_inet_eval(const struct nft_expr *expr,
+				 struct nft_data data[NFT_REG_MAX + 1],
+				 const struct nft_pktinfo *pkt)
+{
+	switch (pkt->ops->pf) {
+	case NFPROTO_IPV4:
+		nft_reject_ipv4_eval(expr, data, pkt);
+	case NFPROTO_IPV6:
+		nft_reject_ipv6_eval(expr, data, pkt);
+	}
+}
+
+static struct nft_expr_type nft_reject_inet_type;
+static const struct nft_expr_ops nft_reject_inet_ops = {
+	.type		= &nft_reject_inet_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_reject)),
+	.eval		= nft_reject_inet_eval,
+	.init		= nft_reject_init,
+	.dump		= nft_reject_dump,
+};
+
+static struct nft_expr_type nft_reject_inet_type __read_mostly = {
+	.family		= NFPROTO_INET,
+	.name		= "reject",
+	.ops		= &nft_reject_inet_ops,
+	.policy		= nft_reject_policy,
+	.maxattr	= NFTA_REJECT_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_reject_inet_module_init(void)
+{
+	return nft_register_expr(&nft_reject_inet_type);
+}
+
+static void __exit nft_reject_inet_module_exit(void)
+{
+	nft_unregister_expr(&nft_reject_inet_type);
+}
+
+module_init(nft_reject_inet_module_init);
+module_exit(nft_reject_inet_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_AF_EXPR(1, "reject");
-- 
cgit v1.2.3


From 2f617435c3a6fe3f39efb9ae2baa77de2d6c97b8 Mon Sep 17 00:00:00 2001
From: Eliad Peller <eliad@wizery.com>
Date: Sun, 12 Jan 2014 11:06:37 +0200
Subject: mac80211: move roc cookie assignment earlier

ieee80211_start_roc_work() might add a new roc
to existing roc, and tell cfg80211 it has already
started.

However, this might happen before the roc cookie
was set, resulting in REMAIN_ON_CHANNEL (started)
event with null cookie. Consequently, it can make
wpa_supplicant go out of sync.

Fix it by setting the roc cookie earlier.

Cc: stable@vger.kernel.org
Signed-off-by: Eliad Peller <eliad@wizery.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/cfg.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index f9ae9b85d4c1..94b4acb5aabb 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2638,6 +2638,24 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
 	INIT_DELAYED_WORK(&roc->work, ieee80211_sw_roc_work);
 	INIT_LIST_HEAD(&roc->dependents);
 
+	/*
+	 * cookie is either the roc cookie (for normal roc)
+	 * or the SKB (for mgmt TX)
+	 */
+	if (!txskb) {
+		/* local->mtx protects this */
+		local->roc_cookie_counter++;
+		roc->cookie = local->roc_cookie_counter;
+		/* wow, you wrapped 64 bits ... more likely a bug */
+		if (WARN_ON(roc->cookie == 0)) {
+			roc->cookie = 1;
+			local->roc_cookie_counter++;
+		}
+		*cookie = roc->cookie;
+	} else {
+		*cookie = (unsigned long)txskb;
+	}
+
 	/* if there's one pending or we're scanning, queue this one */
 	if (!list_empty(&local->roc_list) ||
 	    local->scanning || local->radar_detect_enabled)
@@ -2772,24 +2790,6 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
 	if (!queued)
 		list_add_tail(&roc->list, &local->roc_list);
 
-	/*
-	 * cookie is either the roc cookie (for normal roc)
-	 * or the SKB (for mgmt TX)
-	 */
-	if (!txskb) {
-		/* local->mtx protects this */
-		local->roc_cookie_counter++;
-		roc->cookie = local->roc_cookie_counter;
-		/* wow, you wrapped 64 bits ... more likely a bug */
-		if (WARN_ON(roc->cookie == 0)) {
-			roc->cookie = 1;
-			local->roc_cookie_counter++;
-		}
-		*cookie = roc->cookie;
-	} else {
-		*cookie = (unsigned long)txskb;
-	}
-
 	return 0;
 }
 
-- 
cgit v1.2.3


From f12cb2893069495726c21a4b0178705dacfecfe0 Mon Sep 17 00:00:00 2001
From: Pontus Fuchs <pontus.fuchs@gmail.com>
Date: Thu, 16 Jan 2014 15:00:40 +0100
Subject: nl80211: Reset split_start when netlink skb is exhausted

When the netlink skb is exhausted split_start is left set. In the
subsequent retry, with a larger buffer, the dump is continued from the
failing point instead of from the beginning.

This was causing my rt28xx based USB dongle to now show up when
running "iw list" with an old iw version without split dump support.

Cc: stable@vger.kernel.org
Fixes: 3713b4e364ef ("nl80211: allow splitting wiphy information in dumps")
Signed-off-by: Pontus Fuchs <pontus.fuchs@gmail.com>
[avoid the entire workaround when state->split is set]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 7a742594916e..6ea960b1a8eb 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1719,9 +1719,10 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
 				 * We can then retry with the larger buffer.
 				 */
 				if ((ret == -ENOBUFS || ret == -EMSGSIZE) &&
-				    !skb->len &&
+				    !skb->len && !state->split &&
 				    cb->min_dump_alloc < 4096) {
 					cb->min_dump_alloc = 4096;
+					state->split_start = 0;
 					rtnl_unlock();
 					return 1;
 				}
-- 
cgit v1.2.3


From 5a6aa705ffdd97552ff756bbfa7d5a3b62a6c690 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 23 Jan 2014 16:32:29 +0100
Subject: cfg80211: re-enable 5/10 MHz support

Unfortunately I forgot this during the merge window, but the
patch seems small enough to go in as a fix. The userspace API
bug that was the reason for disabling it has long been fixed.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/core.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/wireless/core.c b/net/wireless/core.c
index d89dee2259b5..77fe4c791269 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -440,9 +440,6 @@ int wiphy_register(struct wiphy *wiphy)
 	int i;
 	u16 ifmodes = wiphy->interface_modes;
 
-	/* support for 5/10 MHz is broken due to nl80211 API mess - disable */
-	wiphy->flags &= ~WIPHY_FLAG_SUPPORTS_5_10_MHZ;
-
 	/*
 	 * There are major locking problems in nl80211/mac80211 for CSA,
 	 * disable for all drivers until this has been reworked.
-- 
cgit v1.2.3


From 8ffcc704c963b4157391bd87a4544cdfd18b574d Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Thu, 23 Jan 2014 14:28:16 +0200
Subject: mac80211: avoid deadlock revealed by lockdep
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

sdata->u.ap.request_smps_work can’t be flushed synchronously
under wdev_lock(wdev) since ieee80211_request_smps_ap_work
itself locks the same lock.
While at it, reset the driver_smps_mode when the ap is
stopped to its default: OFF.

This solves:

======================================================
[ INFO: possible circular locking dependency detected ]
3.12.0-ipeer+ #2 Tainted: G           O
-------------------------------------------------------
rmmod/2867 is trying to acquire lock:
  ((&sdata->u.ap.request_smps_work)){+.+...}, at: [<c105b8d0>] flush_work+0x0/0x90

but task is already holding lock:
  (&wdev->mtx){+.+.+.}, at: [<f9b32626>] cfg80211_stop_ap+0x26/0x230 [cfg80211]

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #1 (&wdev->mtx){+.+.+.}:
        [<c10aefa9>] lock_acquire+0x79/0xe0
        [<c1607a1a>] mutex_lock_nested+0x4a/0x360
        [<fb06288b>] ieee80211_request_smps_ap_work+0x2b/0x50 [mac80211]
        [<c105cdd8>] process_one_work+0x198/0x450
        [<c105d469>] worker_thread+0xf9/0x320
        [<c10669ff>] kthread+0x9f/0xb0
        [<c1613397>] ret_from_kernel_thread+0x1b/0x28

-> #0 ((&sdata->u.ap.request_smps_work)){+.+...}:
        [<c10ae9df>] __lock_acquire+0x183f/0x1910
        [<c10aefa9>] lock_acquire+0x79/0xe0
        [<c105b917>] flush_work+0x47/0x90
        [<c105d867>] __cancel_work_timer+0x67/0xe0
        [<c105d90f>] cancel_work_sync+0xf/0x20
        [<fb0765cc>] ieee80211_stop_ap+0x8c/0x340 [mac80211]
        [<f9b3268c>] cfg80211_stop_ap+0x8c/0x230 [cfg80211]
        [<f9b0d8f9>] cfg80211_leave+0x79/0x100 [cfg80211]
        [<f9b0da72>] cfg80211_netdev_notifier_call+0xf2/0x4f0 [cfg80211]
        [<c160f2c9>] notifier_call_chain+0x59/0x130
        [<c106c6de>] __raw_notifier_call_chain+0x1e/0x30
        [<c106c70f>] raw_notifier_call_chain+0x1f/0x30
        [<c14f8213>] call_netdevice_notifiers_info+0x33/0x70
        [<c14f8263>] call_netdevice_notifiers+0x13/0x20
        [<c14f82a4>] __dev_close_many+0x34/0xb0
        [<c14f83fe>] dev_close_many+0x6e/0xc0
        [<c14f9c77>] rollback_registered_many+0xa7/0x1f0
        [<c14f9dd4>] unregister_netdevice_many+0x14/0x60
        [<fb06f4d9>] ieee80211_remove_interfaces+0xe9/0x170 [mac80211]
        [<fb055116>] ieee80211_unregister_hw+0x56/0x110 [mac80211]
        [<fa3e9396>] iwl_op_mode_mvm_stop+0x26/0xe0 [iwlmvm]
        [<f9b9d8ca>] _iwl_op_mode_stop+0x3a/0x70 [iwlwifi]
        [<f9b9d96f>] iwl_opmode_deregister+0x6f/0x90 [iwlwifi]
        [<fa405179>] __exit_compat+0xd/0x19 [iwlmvm]
        [<c10b8bf9>] SyS_delete_module+0x179/0x2b0
        [<c1613421>] sysenter_do_call+0x12/0x32

Fixes: 687da132234f ("mac80211: implement SMPS for AP")
Cc: <stable@vger.kernel.org> [3.13]
Reported-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/cfg.c   |  3 +--
 net/mac80211/ht.c    |  4 +++-
 net/mac80211/iface.c | 15 +++++++++++----
 3 files changed, 15 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 94b4acb5aabb..33acdca4a1df 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1090,8 +1090,6 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
 	kfree(sdata->u.ap.next_beacon);
 	sdata->u.ap.next_beacon = NULL;
 
-	cancel_work_sync(&sdata->u.ap.request_smps_work);
-
 	/* turn off carrier for this interface and dependent VLANs */
 	list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
 		netif_carrier_off(vlan->dev);
@@ -1103,6 +1101,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
 	kfree_rcu(old_beacon, rcu_head);
 	if (old_probe_resp)
 		kfree_rcu(old_probe_resp, rcu_head);
+	sdata->u.ap.driver_smps_mode = IEEE80211_SMPS_OFF;
 
 	__sta_info_flush(sdata, true);
 	ieee80211_free_keys(sdata, true);
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index fab7b91923e0..70dd013de836 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -466,7 +466,9 @@ void ieee80211_request_smps_ap_work(struct work_struct *work)
 			     u.ap.request_smps_work);
 
 	sdata_lock(sdata);
-	__ieee80211_request_smps_ap(sdata, sdata->u.ap.driver_smps_mode);
+	if (sdata_dereference(sdata->u.ap.beacon, sdata))
+		__ieee80211_request_smps_ap(sdata,
+					    sdata->u.ap.driver_smps_mode);
 	sdata_unlock(sdata);
 }
 
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 3dfd20a453ab..ae2eb148a028 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -770,12 +770,19 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 
 	ieee80211_roc_purge(local, sdata);
 
-	if (sdata->vif.type == NL80211_IFTYPE_STATION)
+	switch (sdata->vif.type) {
+	case NL80211_IFTYPE_STATION:
 		ieee80211_mgd_stop(sdata);
-
-	if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
+		break;
+	case NL80211_IFTYPE_ADHOC:
 		ieee80211_ibss_stop(sdata);
-
+		break;
+	case NL80211_IFTYPE_AP:
+		cancel_work_sync(&sdata->u.ap.request_smps_work);
+		break;
+	default:
+		break;
+	}
 
 	/*
 	 * Remove all stations associated with this interface.
-- 
cgit v1.2.3


From a617302c531eaf497ccd02a61d380efc119ba999 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 22 Jan 2014 11:14:18 +0200
Subject: cfg80211: fix scan done race

When an interface/wdev is removed, any ongoing scan should be
cancelled by the driver. This will make it call cfg80211, which
only queues a work struct. If interface/wdev removal is quick
enough, this can leave the scan request pending and processed
only after the interface is gone, causing a use-after-free.

Fix this by making sure the scan request is not pending after
the interface is destroyed. We can't flush or cancel the work
item due to locking concerns, but when it'll run it shouldn't
find anything to do. This leaves a potential issue, if a new
scan gets requested before the work runs, it prematurely stops
the running scan, potentially causing another crash. I'll fix
that in the next patch.

This was particularly observed with P2P_DEVICE wdevs, likely
because freeing them is quicker than freeing netdevs.

Reported-by: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Fixes: 4a58e7c38443 ("cfg80211: don't "leak" uncompleted scans")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/core.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/wireless/core.c b/net/wireless/core.c
index 77fe4c791269..02ed00dbf2df 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -203,8 +203,11 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
 
 	rdev->opencount--;
 
-	WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev &&
-		!rdev->scan_req->notified);
+	if (rdev->scan_req && rdev->scan_req->wdev == wdev) {
+		if (WARN_ON(!rdev->scan_req->notified))
+			rdev->scan_req->aborted = true;
+		___cfg80211_scan_done(rdev);
+	}
 }
 
 static int cfg80211_rfkill_set_block(void *data, bool blocked)
@@ -856,8 +859,11 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
 		break;
 	case NETDEV_DOWN:
 		cfg80211_update_iface_num(rdev, wdev->iftype, -1);
-		WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev &&
-			!rdev->scan_req->notified);
+		if (rdev->scan_req && rdev->scan_req->wdev == wdev) {
+			if (WARN_ON(!rdev->scan_req->notified))
+				rdev->scan_req->aborted = true;
+			___cfg80211_scan_done(rdev);
+		}
 
 		if (WARN_ON(rdev->sched_scan_req &&
 			    rdev->sched_scan_req->dev == wdev->netdev)) {
-- 
cgit v1.2.3


From f9d15d162b3acf28f85b3ac05c4883e5ed588d28 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 22 Jan 2014 11:14:19 +0200
Subject: cfg80211: send scan results from work queue

Due to the previous commit, when a scan finishes, it is in theory
possible to hit the following sequence:
 1. interface starts being removed
 2. scan is cancelled by driver and cfg80211 is notified
 3. scan done work is scheduled
 4. interface is removed completely, rdev->scan_req is freed,
    event sent to userspace but scan done work remains pending
 5. new scan is requested on another virtual interface
 6. scan done work runs, freeing the still-running scan

To fix this situation, hang on to the scan done message and block
new scans while that is the case, and only send the message from
the work function, regardless of whether the scan_req is already
freed from interface removal. This makes step 5 above impossible
and changes step 6 to be
 5. scan done work runs, sending the scan done message

As this can't work for wext, so we send the message immediately,
but this shouldn't be an issue since we still return -EBUSY.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/core.c    |  4 ++--
 net/wireless/core.h    |  4 +++-
 net/wireless/nl80211.c | 29 ++++++++++-------------------
 net/wireless/nl80211.h |  8 ++++----
 net/wireless/scan.c    | 40 +++++++++++++++++++++++++---------------
 net/wireless/sme.c     |  2 +-
 6 files changed, 45 insertions(+), 42 deletions(-)

(limited to 'net')

diff --git a/net/wireless/core.c b/net/wireless/core.c
index 02ed00dbf2df..010892b81a06 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -206,7 +206,7 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
 	if (rdev->scan_req && rdev->scan_req->wdev == wdev) {
 		if (WARN_ON(!rdev->scan_req->notified))
 			rdev->scan_req->aborted = true;
-		___cfg80211_scan_done(rdev);
+		___cfg80211_scan_done(rdev, false);
 	}
 }
 
@@ -862,7 +862,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
 		if (rdev->scan_req && rdev->scan_req->wdev == wdev) {
 			if (WARN_ON(!rdev->scan_req->notified))
 				rdev->scan_req->aborted = true;
-			___cfg80211_scan_done(rdev);
+			___cfg80211_scan_done(rdev, false);
 		}
 
 		if (WARN_ON(rdev->sched_scan_req &&
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 37ec16d7bb1a..f1d193b557b6 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -62,6 +62,7 @@ struct cfg80211_registered_device {
 	struct rb_root bss_tree;
 	u32 bss_generation;
 	struct cfg80211_scan_request *scan_req; /* protected by RTNL */
+	struct sk_buff *scan_msg;
 	struct cfg80211_sched_scan_request *sched_scan_req;
 	unsigned long suspend_at;
 	struct work_struct scan_done_wk;
@@ -361,7 +362,8 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
 				   struct key_params *params, int key_idx,
 				   bool pairwise, const u8 *mac_addr);
 void __cfg80211_scan_done(struct work_struct *wk);
-void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev);
+void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
+			   bool send_message);
 void __cfg80211_sched_scan_results(struct work_struct *wk);
 int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev,
 			       bool driver_initiated);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 6ea960b1a8eb..4fe2e6e2bc76 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5245,7 +5245,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
 	if (!rdev->ops->scan)
 		return -EOPNOTSUPP;
 
-	if (rdev->scan_req) {
+	if (rdev->scan_req || rdev->scan_msg) {
 		err = -EBUSY;
 		goto unlock;
 	}
@@ -10012,40 +10012,31 @@ void nl80211_send_scan_start(struct cfg80211_registered_device *rdev,
 				NL80211_MCGRP_SCAN, GFP_KERNEL);
 }
 
-void nl80211_send_scan_done(struct cfg80211_registered_device *rdev,
-			    struct wireless_dev *wdev)
+struct sk_buff *nl80211_build_scan_msg(struct cfg80211_registered_device *rdev,
+				       struct wireless_dev *wdev, bool aborted)
 {
 	struct sk_buff *msg;
 
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (!msg)
-		return;
+		return NULL;
 
 	if (nl80211_send_scan_msg(msg, rdev, wdev, 0, 0, 0,
-				  NL80211_CMD_NEW_SCAN_RESULTS) < 0) {
+				  aborted ? NL80211_CMD_SCAN_ABORTED :
+					    NL80211_CMD_NEW_SCAN_RESULTS) < 0) {
 		nlmsg_free(msg);
-		return;
+		return NULL;
 	}
 
-	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
-				NL80211_MCGRP_SCAN, GFP_KERNEL);
+	return msg;
 }
 
-void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev,
-			       struct wireless_dev *wdev)
+void nl80211_send_scan_result(struct cfg80211_registered_device *rdev,
+			      struct sk_buff *msg)
 {
-	struct sk_buff *msg;
-
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (!msg)
 		return;
 
-	if (nl80211_send_scan_msg(msg, rdev, wdev, 0, 0, 0,
-				  NL80211_CMD_SCAN_ABORTED) < 0) {
-		nlmsg_free(msg);
-		return;
-	}
-
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				NL80211_MCGRP_SCAN, GFP_KERNEL);
 }
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index b1b231324e10..75799746d845 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -8,10 +8,10 @@ void nl80211_exit(void);
 void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev);
 void nl80211_send_scan_start(struct cfg80211_registered_device *rdev,
 			     struct wireless_dev *wdev);
-void nl80211_send_scan_done(struct cfg80211_registered_device *rdev,
-			    struct wireless_dev *wdev);
-void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev,
-			       struct wireless_dev *wdev);
+struct sk_buff *nl80211_build_scan_msg(struct cfg80211_registered_device *rdev,
+				       struct wireless_dev *wdev, bool aborted);
+void nl80211_send_scan_result(struct cfg80211_registered_device *rdev,
+			      struct sk_buff *msg);
 void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev,
 			     struct net_device *netdev, u32 cmd);
 void nl80211_send_sched_scan_results(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index b528e31da2cf..d1ed4aebbbb7 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -161,18 +161,25 @@ static void __cfg80211_bss_expire(struct cfg80211_registered_device *dev,
 		dev->bss_generation++;
 }
 
-void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev)
+void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
+			   bool send_message)
 {
 	struct cfg80211_scan_request *request;
 	struct wireless_dev *wdev;
+	struct sk_buff *msg;
 #ifdef CONFIG_CFG80211_WEXT
 	union iwreq_data wrqu;
 #endif
 
 	ASSERT_RTNL();
 
-	request = rdev->scan_req;
+	if (rdev->scan_msg) {
+		nl80211_send_scan_result(rdev, rdev->scan_msg);
+		rdev->scan_msg = NULL;
+		return;
+	}
 
+	request = rdev->scan_req;
 	if (!request)
 		return;
 
@@ -186,18 +193,16 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev)
 	if (wdev->netdev)
 		cfg80211_sme_scan_done(wdev->netdev);
 
-	if (request->aborted) {
-		nl80211_send_scan_aborted(rdev, wdev);
-	} else {
-		if (request->flags & NL80211_SCAN_FLAG_FLUSH) {
-			/* flush entries from previous scans */
-			spin_lock_bh(&rdev->bss_lock);
-			__cfg80211_bss_expire(rdev, request->scan_start);
-			spin_unlock_bh(&rdev->bss_lock);
-		}
-		nl80211_send_scan_done(rdev, wdev);
+	if (!request->aborted &&
+	    request->flags & NL80211_SCAN_FLAG_FLUSH) {
+		/* flush entries from previous scans */
+		spin_lock_bh(&rdev->bss_lock);
+		__cfg80211_bss_expire(rdev, request->scan_start);
+		spin_unlock_bh(&rdev->bss_lock);
 	}
 
+	msg = nl80211_build_scan_msg(rdev, wdev, request->aborted);
+
 #ifdef CONFIG_CFG80211_WEXT
 	if (wdev->netdev && !request->aborted) {
 		memset(&wrqu, 0, sizeof(wrqu));
@@ -211,6 +216,11 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev)
 
 	rdev->scan_req = NULL;
 	kfree(request);
+
+	if (!send_message)
+		rdev->scan_msg = msg;
+	else
+		nl80211_send_scan_result(rdev, msg);
 }
 
 void __cfg80211_scan_done(struct work_struct *wk)
@@ -221,7 +231,7 @@ void __cfg80211_scan_done(struct work_struct *wk)
 			    scan_done_wk);
 
 	rtnl_lock();
-	___cfg80211_scan_done(rdev);
+	___cfg80211_scan_done(rdev, true);
 	rtnl_unlock();
 }
 
@@ -1079,7 +1089,7 @@ int cfg80211_wext_siwscan(struct net_device *dev,
 	if (IS_ERR(rdev))
 		return PTR_ERR(rdev);
 
-	if (rdev->scan_req) {
+	if (rdev->scan_req || rdev->scan_msg) {
 		err = -EBUSY;
 		goto out;
 	}
@@ -1481,7 +1491,7 @@ int cfg80211_wext_giwscan(struct net_device *dev,
 	if (IS_ERR(rdev))
 		return PTR_ERR(rdev);
 
-	if (rdev->scan_req)
+	if (rdev->scan_req || rdev->scan_msg)
 		return -EAGAIN;
 
 	res = ieee80211_scan_results(rdev, info, extra, data->length);
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index a63509118508..f04d4c32e96e 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -67,7 +67,7 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev)
 	ASSERT_RDEV_LOCK(rdev);
 	ASSERT_WDEV_LOCK(wdev);
 
-	if (rdev->scan_req)
+	if (rdev->scan_req || rdev->scan_msg)
 		return -EBUSY;
 
 	if (wdev->conn->params.channel)
-- 
cgit v1.2.3


From 0297ea17bf7879fb5846fafd1be4c0471e72848d Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Mon, 27 Jan 2014 11:07:42 +0200
Subject: mac80211: release the channel in error path in start_ap

When the driver cannot start the AP or when the assignement
of the beacon goes wrong, we need to unassign the vif.

Cc: stable@vger.kernel.org
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/cfg.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 33acdca4a1df..453e974287d1 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1021,8 +1021,10 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
 					IEEE80211_P2P_OPPPS_ENABLE_BIT;
 
 	err = ieee80211_assign_beacon(sdata, &params->beacon);
-	if (err < 0)
+	if (err < 0) {
+		ieee80211_vif_release_channel(sdata);
 		return err;
+	}
 	changed |= err;
 
 	err = drv_start_ap(sdata->local, sdata);
@@ -1032,6 +1034,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
 		if (old)
 			kfree_rcu(old, rcu_head);
 		RCU_INIT_POINTER(sdata->u.ap.beacon, NULL);
+		ieee80211_vif_release_channel(sdata);
 		return err;
 	}
 
-- 
cgit v1.2.3


From d4c80d9df6d1e4473b1409e4d220ca3d1612125c Mon Sep 17 00:00:00 2001
From: Sujith Manoharan <c_manoha@qca.qualcomm.com>
Date: Thu, 30 Jan 2014 14:17:28 +0530
Subject: mac80211: Fix IBSS disconnect

Currently, when a station leaves an IBSS network, the
corresponding BSS is not dropped from cfg80211 if there are
other active stations in the network. But, the small
window that is present when trying to determine a station's
status based on IEEE80211_IBSS_MERGE_INTERVAL introduces
a race.

Instead of trying to keep the BSS, always remove it when
leaving an IBSS network. There is not much benefit to retain
the BSS entry since it will be added with a subsequent join
operation.

This fixes an issue where a dangling BSS entry causes ath9k
to wait for a beacon indefinitely.

Cc: <stable@vger.kernel.org>
Reported-by: Simon Wunderlich <sw@simonwunderlich.de>
Signed-off-by: Sujith Manoharan <c_manoha@qca.qualcomm.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ibss.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 771080ec7212..2796a198728f 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -695,12 +695,9 @@ static void ieee80211_ibss_disconnect(struct ieee80211_sub_if_data *sdata)
 	struct cfg80211_bss *cbss;
 	struct beacon_data *presp;
 	struct sta_info *sta;
-	int active_ibss;
 	u16 capability;
 
-	active_ibss = ieee80211_sta_active_ibss(sdata);
-
-	if (!active_ibss && !is_zero_ether_addr(ifibss->bssid)) {
+	if (!is_zero_ether_addr(ifibss->bssid)) {
 		capability = WLAN_CAPABILITY_IBSS;
 
 		if (ifibss->privacy)
-- 
cgit v1.2.3


From 338f977f4eb441e69bb9a46eaa0ac715c931a67f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sat, 1 Feb 2014 00:16:23 +0100
Subject: mac80211: fix fragmentation code, particularly for encryption

The "new" fragmentation code (since my rewrite almost 5 years ago)
erroneously sets skb->len rather than using skb_trim() to adjust
the length of the first fragment after copying out all the others.
This leaves the skb tail pointer pointing to after where the data
originally ended, and thus causes the encryption MIC to be written
at that point, rather than where it belongs: immediately after the
data.

The impact of this is that if software encryption is done, then
 a) encryption doesn't work for the first fragment, the connection
    becomes unusable as the first fragment will never be properly
    verified at the receiver, the MIC is practically guaranteed to
    be wrong
 b) we leak up to 8 bytes of plaintext (!) of the packet out into
    the air

This is only mitigated by the fact that many devices are capable
of doing encryption in hardware, in which case this can't happen
as the tail pointer is irrelevant in that case. Additionally,
fragmentation is not used very frequently and would normally have
to be configured manually.

Fix this by using skb_trim() properly.

Cc: stable@vger.kernel.org
Fixes: 2de8e0d999b8 ("mac80211: rewrite fragmentation")
Reported-by: Jouni Malinen <j@w1.fi>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 27c990bf2320..97a02d3f7d87 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -878,7 +878,7 @@ static int ieee80211_fragment(struct ieee80211_tx_data *tx,
 	}
 
 	/* adjust first fragment's length */
-	skb->len = hdrlen + per_fragm;
+	skb_trim(skb, hdrlen + per_fragm);
 	return 0;
 }
 
-- 
cgit v1.2.3


From fab57a6cc227468ca9e6a4c7ff8d3b10727785ee Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 29 Jan 2014 13:28:02 +0100
Subject: mac80211: fix virtual monitor interface iteration

During channel context assignment, the interface should
be found by interface iteration, so we need to assign the
pointer before the channel context.

Reported-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Tested-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/iface.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index ae2eb148a028..d6d1f1df9119 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -418,20 +418,24 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
 		return ret;
 	}
 
+	mutex_lock(&local->iflist_mtx);
+	rcu_assign_pointer(local->monitor_sdata, sdata);
+	mutex_unlock(&local->iflist_mtx);
+
 	mutex_lock(&local->mtx);
 	ret = ieee80211_vif_use_channel(sdata, &local->monitor_chandef,
 					IEEE80211_CHANCTX_EXCLUSIVE);
 	mutex_unlock(&local->mtx);
 	if (ret) {
+		mutex_lock(&local->iflist_mtx);
+		rcu_assign_pointer(local->monitor_sdata, NULL);
+		mutex_unlock(&local->iflist_mtx);
+		synchronize_net();
 		drv_remove_interface(local, sdata);
 		kfree(sdata);
 		return ret;
 	}
 
-	mutex_lock(&local->iflist_mtx);
-	rcu_assign_pointer(local->monitor_sdata, sdata);
-	mutex_unlock(&local->iflist_mtx);
-
 	return 0;
 }
 
-- 
cgit v1.2.3


From b8ecbee67c732ef9fc47fcf50aed6b7bb6231d98 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 6 Feb 2014 09:17:41 +0000
Subject: netfilter: nf_tables: fix log/queue expressions for NFPROTO_INET

The log and queue expressions both store the family during ->init() and
use it to deliver packets. This is wrong when used in NFPROTO_INET since
they should both deliver to the actual AF of the packet, not the dummy
NFPROTO_INET.

Use the family from the hook ops to fix this.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_log.c   | 5 +----
 net/netfilter/nft_queue.c | 4 +---
 2 files changed, 2 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index 5af790123ad8..26c5154e05f3 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -23,7 +23,6 @@ static const char *nft_log_null_prefix = "";
 struct nft_log {
 	struct nf_loginfo	loginfo;
 	char			*prefix;
-	int			family;
 };
 
 static void nft_log_eval(const struct nft_expr *expr,
@@ -33,7 +32,7 @@ static void nft_log_eval(const struct nft_expr *expr,
 	const struct nft_log *priv = nft_expr_priv(expr);
 	struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
 
-	nf_log_packet(net, priv->family, pkt->ops->hooknum, pkt->skb, pkt->in,
+	nf_log_packet(net, pkt->ops->pf, pkt->ops->hooknum, pkt->skb, pkt->in,
 		      pkt->out, &priv->loginfo, "%s", priv->prefix);
 }
 
@@ -52,8 +51,6 @@ static int nft_log_init(const struct nft_ctx *ctx,
 	struct nf_loginfo *li = &priv->loginfo;
 	const struct nlattr *nla;
 
-	priv->family = ctx->afi->family;
-
 	nla = tb[NFTA_LOG_PREFIX];
 	if (nla != NULL) {
 		priv->prefix = kmalloc(nla_len(nla) + 1, GFP_KERNEL);
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index cbea473d69e9..e8ae2f6bf232 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c
@@ -25,7 +25,6 @@ struct nft_queue {
 	u16	queuenum;
 	u16	queues_total;
 	u16	flags;
-	u8	family;
 };
 
 static void nft_queue_eval(const struct nft_expr *expr,
@@ -43,7 +42,7 @@ static void nft_queue_eval(const struct nft_expr *expr,
 			queue = priv->queuenum + cpu % priv->queues_total;
 		} else {
 			queue = nfqueue_hash(pkt->skb, queue,
-					     priv->queues_total, priv->family,
+					     priv->queues_total, pkt->ops->pf,
 					     jhash_initval);
 		}
 	}
@@ -71,7 +70,6 @@ static int nft_queue_init(const struct nft_ctx *ctx,
 		return -EINVAL;
 
 	init_hashrandom(&jhash_initval);
-	priv->family = ctx->afi->family;
 	priv->queuenum = ntohs(nla_get_be16(tb[NFTA_QUEUE_NUM]));
 
 	if (tb[NFTA_QUEUE_TOTAL] != NULL)
-- 
cgit v1.2.3


From 0165d9325d6a3cf856e2cbbe64a0f4635ac75893 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sat, 25 Jan 2014 14:03:51 +0100
Subject: netfilter: nf_tables: fix racy rule deletion

We may lost race if we flush the rule-set (which happens asynchronously
via call_rcu) and we try to remove the table (that userspace assumes
to be empty).

Fix this by recovering synchronous rule and chain deletion. This was
introduced time ago before we had no batch support, and synchronous
rule deletion performance was not good. Now that we have the batch
support, we can just postpone the purge of old rule in a second step
in the commit phase. All object deletions are synchronous after this
patch.

As a side effect, we save memory as we don't need rcu_head per rule
anymore.

Cc: Patrick McHardy <kaber@trash.net>
Reported-by: Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  4 ----
 net/netfilter/nf_tables_api.c     | 40 ++++++++++++++++++++++-----------------
 2 files changed, 23 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 0f68e47d3e5e..e7e14ffe0f6a 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -322,7 +322,6 @@ static inline void *nft_expr_priv(const struct nft_expr *expr)
  *	struct nft_rule - nf_tables rule
  *
  *	@list: used internally
- *	@rcu_head: used internally for rcu
  *	@handle: rule handle
  *	@genmask: generation mask
  *	@dlen: length of expression data
@@ -330,7 +329,6 @@ static inline void *nft_expr_priv(const struct nft_expr *expr)
  */
 struct nft_rule {
 	struct list_head		list;
-	struct rcu_head			rcu_head;
 	u64				handle:46,
 					genmask:2,
 					dlen:16;
@@ -391,7 +389,6 @@ enum nft_chain_flags {
  *
  *	@rules: list of rules in the chain
  *	@list: used internally
- *	@rcu_head: used internally
  *	@net: net namespace that this chain belongs to
  *	@table: table that this chain belongs to
  *	@handle: chain handle
@@ -403,7 +400,6 @@ enum nft_chain_flags {
 struct nft_chain {
 	struct list_head		rules;
 	struct list_head		list;
-	struct rcu_head			rcu_head;
 	struct net			*net;
 	struct nft_table		*table;
 	u64				handle;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 113c469c7579..3a2e4800b415 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1008,10 +1008,8 @@ notify:
 	return 0;
 }
 
-static void nf_tables_rcu_chain_destroy(struct rcu_head *head)
+static void nf_tables_chain_destroy(struct nft_chain *chain)
 {
-	struct nft_chain *chain = container_of(head, struct nft_chain, rcu_head);
-
 	BUG_ON(chain->use > 0);
 
 	if (chain->flags & NFT_BASE_CHAIN) {
@@ -1059,7 +1057,9 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
 			       family);
 
 	/* Make sure all rule references are gone before this is released */
-	call_rcu(&chain->rcu_head, nf_tables_rcu_chain_destroy);
+	synchronize_rcu();
+
+	nf_tables_chain_destroy(chain);
 	return 0;
 }
 
@@ -1531,9 +1531,8 @@ err:
 	return err;
 }
 
-static void nf_tables_rcu_rule_destroy(struct rcu_head *head)
+static void nf_tables_rule_destroy(struct nft_rule *rule)
 {
-	struct nft_rule *rule = container_of(head, struct nft_rule, rcu_head);
 	struct nft_expr *expr;
 
 	/*
@@ -1548,11 +1547,6 @@ static void nf_tables_rcu_rule_destroy(struct rcu_head *head)
 	kfree(rule);
 }
 
-static void nf_tables_rule_destroy(struct nft_rule *rule)
-{
-	call_rcu(&rule->rcu_head, nf_tables_rcu_rule_destroy);
-}
-
 #define NFT_RULE_MAXEXPRS	128
 
 static struct nft_expr_info *info;
@@ -1819,9 +1813,6 @@ static int nf_tables_commit(struct sk_buff *skb)
 	synchronize_rcu();
 
 	list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
-		/* Delete this rule from the dirty list */
-		list_del(&rupd->list);
-
 		/* This rule was inactive in the past and just became active.
 		 * Clear the next bit of the genmask since its meaning has
 		 * changed, now it is the future.
@@ -1832,6 +1823,7 @@ static int nf_tables_commit(struct sk_buff *skb)
 					      rupd->chain, rupd->rule,
 					      NFT_MSG_NEWRULE, 0,
 					      rupd->family);
+			list_del(&rupd->list);
 			kfree(rupd);
 			continue;
 		}
@@ -1841,7 +1833,15 @@ static int nf_tables_commit(struct sk_buff *skb)
 		nf_tables_rule_notify(skb, rupd->nlh, rupd->table, rupd->chain,
 				      rupd->rule, NFT_MSG_DELRULE, 0,
 				      rupd->family);
+	}
+
+	/* Make sure we don't see any packet traversing old rules */
+	synchronize_rcu();
+
+	/* Now we can safely release unused old rules */
+	list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
 		nf_tables_rule_destroy(rupd->rule);
+		list_del(&rupd->list);
 		kfree(rupd);
 	}
 
@@ -1854,20 +1854,26 @@ static int nf_tables_abort(struct sk_buff *skb)
 	struct nft_rule_trans *rupd, *tmp;
 
 	list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
-		/* Delete all rules from the dirty list */
-		list_del(&rupd->list);
-
 		if (!nft_rule_is_active_next(net, rupd->rule)) {
 			nft_rule_clear(net, rupd->rule);
+			list_del(&rupd->list);
 			kfree(rupd);
 			continue;
 		}
 
 		/* This rule is inactive, get rid of it */
 		list_del_rcu(&rupd->rule->list);
+	}
+
+	/* Make sure we don't see any packet accessing aborted rules */
+	synchronize_rcu();
+
+	list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
 		nf_tables_rule_destroy(rupd->rule);
+		list_del(&rupd->list);
 		kfree(rupd);
 	}
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 63b5f152eb4a5bb79b9caf7ec37b4201d12f6e66 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed, 5 Feb 2014 08:38:25 +0100
Subject: ipv4: Fix runtime WARNING in rtmsg_ifa()

On m68k/ARAnyM:

WARNING: CPU: 0 PID: 407 at net/ipv4/devinet.c:1599 0x316a99()
Modules linked in:
CPU: 0 PID: 407 Comm: ifconfig Not tainted
3.13.0-atari-09263-g0c71d68014d1 #1378
Stack from 10c4fdf0:
        10c4fdf0 002ffabb 000243e8 00000000 008ced6c 00024416 00316a99 0000063f
        00316a99 00000009 00000000 002501b4 00316a99 0000063f c0a86117 00000080
        c0a86117 00ad0c90 00250a5a 00000014 00ad0c90 00000000 00000000 00000001
        00b02dd0 00356594 00000000 00356594 c0a86117 eff6c9e4 008ced6c 00000002
        008ced60 0024f9b4 00250b52 00ad0c90 00000000 00000000 00252390 00ad0c90
        eff6c9e4 0000004f 00000000 00000000 eff6c9e4 8000e25c eff6c9e4 80001020
Call Trace: [<000243e8>] warn_slowpath_common+0x52/0x6c
 [<00024416>] warn_slowpath_null+0x14/0x1a
 [<002501b4>] rtmsg_ifa+0xdc/0xf0
 [<00250a5a>] __inet_insert_ifa+0xd6/0x1c2
 [<0024f9b4>] inet_abc_len+0x0/0x42
 [<00250b52>] inet_insert_ifa+0xc/0x12
 [<00252390>] devinet_ioctl+0x2ae/0x5d6

Adding some debugging code reveals that net_fill_ifaddr() fails in

    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
                              preferred, valid))

nla_put complains:

    lib/nlattr.c:454: skb_tailroom(skb) = 12, nla_total_size(attrlen) = 20

Apparently commit 5c766d642bcaffd0c2a5b354db2068515b3846cf ("ipv4:
introduce address lifetime") forgot to take into account the addition of
struct ifa_cacheinfo in inet_nlmsg_size(). Hence add it, like is already
done for ipv6.

Suggested-by: Cong Wang <cwang@twopensource.com>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Cong Wang <cwang@twopensource.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index ac2dff3c2c1c..bdbf68bb2e2d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1443,7 +1443,8 @@ static size_t inet_nlmsg_size(void)
 	       + nla_total_size(4) /* IFA_LOCAL */
 	       + nla_total_size(4) /* IFA_BROADCAST */
 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
-	       + nla_total_size(4);  /* IFA_FLAGS */
+	       + nla_total_size(4)  /* IFA_FLAGS */
+	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
 }
 
 static inline u32 cstamp_delta(unsigned long cstamp)
-- 
cgit v1.2.3


From 661dbf34129cecd20879cb7b1cbe936911da0f02 Mon Sep 17 00:00:00 2001
From: Matija Glavinic Pecotic <matija.glavinic-pecotic.ext@nsn.com>
Date: Thu, 6 Feb 2014 08:30:10 +0100
Subject: net: sctp: fix initialization of local source address on accepted
 ipv6 sockets

commit 	efe4208f47f907b86f528788da711e8ab9dea44d:
'ipv6: make lookups simpler and faster' broke initialization of local source
address on accepted ipv6 sockets. Before the mentioned commit receive address
was copied along with the contents of ipv6_pinfo in sctp_v6_create_accept_sk.
Now when it is moved, it has to be copied separately.

This also fixes lksctp's ipv6 regression in a sense that test_getname_v6, TC5 -
'getsockname on a connected server socket' now passes.

Signed-off-by: Matija Glavinic Pecotic <matija.glavinic-pecotic.ext@nsn.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/ipv6.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 0f6259a6a932..2b1738ef9394 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -662,6 +662,8 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
 	 */
 	sctp_v6_to_sk_daddr(&asoc->peer.primary_addr, newsk);
 
+	newsk->sk_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
+
 	sk_refcnt_debug_inc(newsk);
 
 	if (newsk->sk_prot->init(newsk)) {
-- 
cgit v1.2.3


From 00fe11b3c67dc670fe6391d22f1fe64e7c99a8ec Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Thu, 6 Feb 2014 18:34:12 +0100
Subject: netpoll: fix netconsole IPv6 setup

Currently, to make netconsole start over IPv6, the source address
needs to be specified. Without a source address, netpoll_parse_options
assumes we're setting up over IPv4 and the destination IPv6 address is
rejected.

Check if the IP version has been forced by a source address before
checking for a version mismatch when parsing the destination address.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Cong Wang <cwang@twopensource.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index c03f3dec4763..a664f7829a6d 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -948,6 +948,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
 {
 	char *cur=opt, *delim;
 	int ipv6;
+	bool ipversion_set = false;
 
 	if (*cur != '@') {
 		if ((delim = strchr(cur, '@')) == NULL)
@@ -960,6 +961,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
 	cur++;
 
 	if (*cur != '/') {
+		ipversion_set = true;
 		if ((delim = strchr(cur, '/')) == NULL)
 			goto parse_failed;
 		*delim = 0;
@@ -1002,7 +1004,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
 	ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip);
 	if (ipv6 < 0)
 		goto parse_failed;
-	else if (np->ipv6 != (bool)ipv6)
+	else if (ipversion_set && np->ipv6 != (bool)ipv6)
 		goto parse_failed;
 	else
 		np->ipv6 = (bool)ipv6;
-- 
cgit v1.2.3


From ed98df3361f059db42786c830ea96e2d18b8d4db Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 6 Feb 2014 10:42:42 -0800
Subject: net: use __GFP_NORETRY for high order allocations

sock_alloc_send_pskb() & sk_page_frag_refill()
have a loop trying high order allocations to prepare
skb with low number of fragments as this increases performance.

Problem is that under memory pressure/fragmentation, this can
trigger OOM while the intent was only to try the high order
allocations, then fallback to order-0 allocations.

We had various reports from unexpected regressions.

According to David, setting __GFP_NORETRY should be fine,
as the asynchronous compaction is still enabled, and this
will prevent OOM from kicking as in :

CFSClientEventm invoked oom-killer: gfp_mask=0x42d0, order=3, oom_adj=0,
oom_score_adj=0, oom_score_badness=2 (enabled),memcg_scoring=disabled
CFSClientEventm

Call Trace:
 [<ffffffff8043766c>] dump_header+0xe1/0x23e
 [<ffffffff80437a02>] oom_kill_process+0x6a/0x323
 [<ffffffff80438443>] out_of_memory+0x4b3/0x50d
 [<ffffffff8043a4a6>] __alloc_pages_may_oom+0xa2/0xc7
 [<ffffffff80236f42>] __alloc_pages_nodemask+0x1002/0x17f0
 [<ffffffff8024bd23>] alloc_pages_current+0x103/0x2b0
 [<ffffffff8028567f>] sk_page_frag_refill+0x8f/0x160
 [<ffffffff80295fa0>] tcp_sendmsg+0x560/0xee0
 [<ffffffff802a5037>] inet_sendmsg+0x67/0x100
 [<ffffffff80283c9c>] __sock_sendmsg_nosec+0x6c/0x90
 [<ffffffff80283e85>] sock_sendmsg+0xc5/0xf0
 [<ffffffff802847b6>] __sys_sendmsg+0x136/0x430
 [<ffffffff80284ec8>] sys_sendmsg+0x88/0x110
 [<ffffffff80711472>] system_call_fastpath+0x16/0x1b
Out of Memory: Kill process 2856 (bash) score 9999 or sacrifice child

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index 0c127dcdf6a8..5b6a9431b017 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1775,7 +1775,9 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
 			while (order) {
 				if (npages >= 1 << order) {
 					page = alloc_pages(sk->sk_allocation |
-							   __GFP_COMP | __GFP_NOWARN,
+							   __GFP_COMP |
+							   __GFP_NOWARN |
+							   __GFP_NORETRY,
 							   order);
 					if (page)
 						goto fill_page;
@@ -1845,7 +1847,7 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio)
 		gfp_t gfp = prio;
 
 		if (order)
-			gfp |= __GFP_COMP | __GFP_NOWARN;
+			gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY;
 		pfrag->page = alloc_pages(gfp, order);
 		if (likely(pfrag->page)) {
 			pfrag->offset = 0;
-- 
cgit v1.2.3


From dbe173079ab58a444e12dbebe96f5aec1e0bed1a Mon Sep 17 00:00:00 2001
From: Cong Wang <cwang@twopensource.com>
Date: Thu, 6 Feb 2014 15:00:52 -0800
Subject: bridge: fix netconsole setup over bridge

Commit 93d8bf9fb8f3 ("bridge: cleanup netpoll code") introduced
a check in br_netpoll_enable(), but this check is incorrect for
br_netpoll_setup(). This patch moves the code after the check
into __br_netpoll_enable() and calls it in br_netpoll_setup().
For br_add_if(), the check is still needed.

Fixes: 93d8bf9fb8f3 ("bridge: cleanup netpoll code")
Cc: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <cwang@twopensource.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Tested-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c | 51 +++++++++++++++++++++++++++-----------------------
 1 file changed, 28 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index e4401a531afb..d9a9b0fc1795 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -226,6 +226,33 @@ static void br_netpoll_cleanup(struct net_device *dev)
 		br_netpoll_disable(p);
 }
 
+static int __br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp)
+{
+	struct netpoll *np;
+	int err;
+
+	np = kzalloc(sizeof(*p->np), gfp);
+	if (!np)
+		return -ENOMEM;
+
+	err = __netpoll_setup(np, p->dev, gfp);
+	if (err) {
+		kfree(np);
+		return err;
+	}
+
+	p->np = np;
+	return err;
+}
+
+int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp)
+{
+	if (!p->br->dev->npinfo)
+		return 0;
+
+	return __br_netpoll_enable(p, gfp);
+}
+
 static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni,
 			    gfp_t gfp)
 {
@@ -236,7 +263,7 @@ static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni,
 	list_for_each_entry(p, &br->port_list, list) {
 		if (!p->dev)
 			continue;
-		err = br_netpoll_enable(p, gfp);
+		err = __br_netpoll_enable(p, gfp);
 		if (err)
 			goto fail;
 	}
@@ -249,28 +276,6 @@ fail:
 	goto out;
 }
 
-int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp)
-{
-	struct netpoll *np;
-	int err;
-
-	if (!p->br->dev->npinfo)
-		return 0;
-
-	np = kzalloc(sizeof(*p->np), gfp);
-	if (!np)
-		return -ENOMEM;
-
-	err = __netpoll_setup(np, p->dev, gfp);
-	if (err) {
-		kfree(np);
-		return err;
-	}
-
-	p->np = np;
-	return err;
-}
-
 void br_netpoll_disable(struct net_bridge_port *p)
 {
 	struct netpoll *np = p->np;
-- 
cgit v1.2.3


From 4a5ab4e224288403b0b4b6b8c4d339323150c312 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 6 Feb 2014 15:57:10 -0800
Subject: tcp: remove 1ms offset in srtt computation

TCP pacing depends on an accurate srtt estimation.

Current srtt estimation is using jiffie resolution,
and has an artificial offset of at least 1 ms, which can produce
slowdowns when FQ/pacing is used, especially in DC world,
where typical rtt is below 1 ms.

We are planning a switch to usec resolution for linux-3.15,
but in the meantime, this patch removes the 1 ms offset.

All we need is to have tp->srtt minimal value of 1 to differentiate
the case of srtt being initialized or not, not 8.

The problematic behavior was observed on a 40Gbit testbed,
where 32 concurrent netperf were reaching 12Gbps of aggregate
speed, instead of line speed.

This patch also has the effect of reporting more accurate srtt and send
rates to iproute2 ss command as in :

$ ss -i dst cca2
Netid  State      Recv-Q Send-Q          Local Address:Port
Peer Address:Port
tcp    ESTAB      0      0                10.244.129.1:56984
10.244.129.2:12865
	 cubic wscale:6,6 rto:200 rtt:0.25/0.25 ato:40 mss:1448 cwnd:10 send
463.4Mbps rcv_rtt:1 rcv_space:29200
tcp    ESTAB      0      390960           10.244.129.1:60247
10.244.129.2:50204
	 cubic wscale:6,6 rto:200 rtt:0.875/0.75 mss:1448 cwnd:73 ssthresh:51
send 966.4Mbps unacked:73 retrans:0/121 rcv_space:29200

Reported-by: Vytautas Valancius <valas@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c  | 18 ++++++++++--------
 net/ipv4/tcp_output.c |  2 +-
 2 files changed, 11 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 65cf90e063d5..227cba79fa6b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -671,6 +671,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	long m = mrtt; /* RTT */
+	u32 srtt = tp->srtt;
 
 	/*	The following amusing code comes from Jacobson's
 	 *	article in SIGCOMM '88.  Note that rtt and mdev
@@ -688,11 +689,9 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
 	 * does not matter how to _calculate_ it. Seems, it was trap
 	 * that VJ failed to avoid. 8)
 	 */
-	if (m == 0)
-		m = 1;
-	if (tp->srtt != 0) {
-		m -= (tp->srtt >> 3);	/* m is now error in rtt est */
-		tp->srtt += m;		/* rtt = 7/8 rtt + 1/8 new */
+	if (srtt != 0) {
+		m -= (srtt >> 3);	/* m is now error in rtt est */
+		srtt += m;		/* rtt = 7/8 rtt + 1/8 new */
 		if (m < 0) {
 			m = -m;		/* m is now abs(error) */
 			m -= (tp->mdev >> 2);   /* similar update on mdev */
@@ -723,11 +722,12 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
 		}
 	} else {
 		/* no previous measure. */
-		tp->srtt = m << 3;	/* take the measured time to be rtt */
+		srtt = m << 3;		/* take the measured time to be rtt */
 		tp->mdev = m << 1;	/* make sure rto = 3*rtt */
 		tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
 		tp->rtt_seq = tp->snd_nxt;
 	}
+	tp->srtt = max(1U, srtt);
 }
 
 /* Set the sk_pacing_rate to allow proper sizing of TSO packets.
@@ -746,8 +746,10 @@ static void tcp_update_pacing_rate(struct sock *sk)
 
 	rate *= max(tp->snd_cwnd, tp->packets_out);
 
-	/* Correction for small srtt : minimum srtt being 8 (1 jiffy << 3),
-	 * be conservative and assume srtt = 1 (125 us instead of 1.25 ms)
+	/* Correction for small srtt and scheduling constraints.
+	 * For small rtt, consider noise is too high, and use
+	 * the minimal value (srtt = 1 -> 125 us for HZ=1000)
+	 *
 	 * We probably need usec resolution in the future.
 	 * Note: This also takes care of possible srtt=0 case,
 	 * when tcp_rtt_estimator() was not yet called.
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 03d26b85eab8..10435b3b9d0f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1977,7 +1977,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
 	/* Schedule a loss probe in 2*RTT for SACK capable connections
 	 * in Open state, that are either limited by cwnd or application.
 	 */
-	if (sysctl_tcp_early_retrans < 3 || !rtt || !tp->packets_out ||
+	if (sysctl_tcp_early_retrans < 3 || !tp->srtt || !tp->packets_out ||
 	    !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
 		return false;
 
-- 
cgit v1.2.3


From bd7fc645dabab423ef362186db2917f3919321d3 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 7 Feb 2014 12:53:07 +0100
Subject: netfilter: nf_tables: do not allow NFT_SET_ELEM_INTERVAL_END flag and
 data

This combination is not allowed since end interval elements cannot
contain data.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_tables_api.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 3a2e4800b415..d0c790e3e495 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2741,6 +2741,9 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
 		if (nla[NFTA_SET_ELEM_DATA] == NULL &&
 		    !(elem.flags & NFT_SET_ELEM_INTERVAL_END))
 			return -EINVAL;
+		if (nla[NFTA_SET_ELEM_DATA] != NULL &&
+		    elem.flags & NFT_SET_ELEM_INTERVAL_END)
+			return -EINVAL;
 	} else {
 		if (nla[NFTA_SET_ELEM_DATA] != NULL)
 			return -EINVAL;
-- 
cgit v1.2.3


From 2fb91ddbf8e1dcb207e1e2085473aeaeff975102 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 6 Feb 2014 16:15:39 +0100
Subject: netfilter: nft_rbtree: fix data handling of end interval elements

This patch fixes several things which related to the handling of
end interval elements:

* Chain use underflow with intervals and map: If you add a rule
  using intervals+map that introduces a loop, the error path of the
  rbtree set decrements the chain refcount for each side of the
  interval, leading to a chain use counter underflow.

* Don't copy the data part of the end interval element since, this
  area is uninitialized and this confuses the loop detection code.

* Don't allocate room for the data part of end interval elements
  since this is unused.

So, after this patch the idea is that end interval elements don't
have a data part.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nft_rbtree.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index ca0c1b231bfe..e21d69d13506 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -69,8 +69,10 @@ static void nft_rbtree_elem_destroy(const struct nft_set *set,
 				    struct nft_rbtree_elem *rbe)
 {
 	nft_data_uninit(&rbe->key, NFT_DATA_VALUE);
-	if (set->flags & NFT_SET_MAP)
+	if (set->flags & NFT_SET_MAP &&
+	    !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
 		nft_data_uninit(rbe->data, set->dtype);
+
 	kfree(rbe);
 }
 
@@ -108,7 +110,8 @@ static int nft_rbtree_insert(const struct nft_set *set,
 	int err;
 
 	size = sizeof(*rbe);
-	if (set->flags & NFT_SET_MAP)
+	if (set->flags & NFT_SET_MAP &&
+	    !(elem->flags & NFT_SET_ELEM_INTERVAL_END))
 		size += sizeof(rbe->data[0]);
 
 	rbe = kzalloc(size, GFP_KERNEL);
@@ -117,7 +120,8 @@ static int nft_rbtree_insert(const struct nft_set *set,
 
 	rbe->flags = elem->flags;
 	nft_data_copy(&rbe->key, &elem->key);
-	if (set->flags & NFT_SET_MAP)
+	if (set->flags & NFT_SET_MAP &&
+	    !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
 		nft_data_copy(rbe->data, &elem->data);
 
 	err = __nft_rbtree_insert(set, rbe);
@@ -153,7 +157,8 @@ static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem)
 			parent = parent->rb_right;
 		else {
 			elem->cookie = rbe;
-			if (set->flags & NFT_SET_MAP)
+			if (set->flags & NFT_SET_MAP &&
+			    !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
 				nft_data_copy(&elem->data, rbe->data);
 			elem->flags = rbe->flags;
 			return 0;
@@ -177,7 +182,8 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
 
 		rbe = rb_entry(node, struct nft_rbtree_elem, node);
 		nft_data_copy(&elem.key, &rbe->key);
-		if (set->flags & NFT_SET_MAP)
+		if (set->flags & NFT_SET_MAP &&
+		    !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
 			nft_data_copy(&elem.data, rbe->data);
 		elem.flags = rbe->flags;
 
-- 
cgit v1.2.3


From 62f9c8b40d2db915f89a6aa47395412fb29f1cfc Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 7 Feb 2014 14:45:01 +0100
Subject: netfilter: nf_tables: fix loop checking with end interval elements

Fix access to uninitialized data for end interval elements. The
element data part is uninitialized in interval end elements.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index d0c790e3e495..adce01e8bb57 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2998,6 +2998,9 @@ static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx,
 					const struct nft_set_iter *iter,
 					const struct nft_set_elem *elem)
 {
+	if (elem->flags & NFT_SET_ELEM_INTERVAL_END)
+		return 0;
+
 	switch (elem->data.verdict) {
 	case NFT_JUMP:
 	case NFT_GOTO:
-- 
cgit v1.2.3


From 6d8c00d58e9e484fdc41aaaf62e5d8364efe375a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 9 Jan 2014 18:42:42 +0000
Subject: netfilter: nf_tables: unininline nft_trace_packet()

It makes no sense to inline a rarely used function meant for debugging
only that is called a total of five times in the main evaluation loop.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 0d879fcb8763..90998a6ff8b9 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -103,9 +103,9 @@ static struct nf_loginfo trace_loginfo = {
 	},
 };
 
-static inline void nft_trace_packet(const struct nft_pktinfo *pkt,
-				    const struct nft_chain *chain,
-				    int rulenum, enum nft_trace type)
+static void nft_trace_packet(const struct nft_pktinfo *pkt,
+			     const struct nft_chain *chain,
+			     int rulenum, enum nft_trace type)
 {
 	struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
 
-- 
cgit v1.2.3


From fd944bded5ce32247941ce54eb8f65646549886b Mon Sep 17 00:00:00 2001
From: Rashika Kheria <rashika.kheria@gmail.com>
Date: Sun, 9 Feb 2014 19:57:33 +0530
Subject: net: Mark function as static in 9p/client.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Mark function as static in net/9p/client.c because it is not used
outside this file.

This eliminates the following warning in net/9p/client.c:
net/9p/client.c:207:18: warning: no previous prototype for ‘p9_fcall_alloc’ [-Wmissing-prototypes]

Signed-off-by: Rashika Kheria <rashika.kheria@gmail.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/9p/client.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index a5e4d2dcb03e..9186550d77a6 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -204,7 +204,7 @@ free_and_return:
 	return ret;
 }
 
-struct p9_fcall *p9_fcall_alloc(int alloc_msize)
+static struct p9_fcall *p9_fcall_alloc(int alloc_msize)
 {
 	struct p9_fcall *fc;
 	fc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS);
-- 
cgit v1.2.3


From 8203274e1541392e8a85d4bcbcda55d62fe62469 Mon Sep 17 00:00:00 2001
From: Rashika Kheria <rashika.kheria@gmail.com>
Date: Sun, 9 Feb 2014 19:59:04 +0530
Subject: net: Include appropriate header file in caif/caif_dev.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Include appropriate header file net/caif/caif_dev.h in caif/caif_dev.c
because it has prototype declarations of function defined in
caif/caif_dev.c.

This eliminates the following file in caif/caif_dev.c:
net/caif/caif_dev.c:303:6: warning: no previous prototype for ‘caif_enroll_dev’ [-Wmissing-prototypes]

Signed-off-by: Rashika Kheria <rashika.kheria@gmail.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_dev.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 4dca159435cf..edbca468fa73 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -22,6 +22,7 @@
 #include <net/pkt_sched.h>
 #include <net/caif/caif_device.h>
 #include <net/caif/caif_layer.h>
+#include <net/caif/caif_dev.h>
 #include <net/caif/cfpkt.h>
 #include <net/caif/cfcnfg.h>
 #include <net/caif/cfserl.h>
-- 
cgit v1.2.3


From 02fe72c9edc58d6b5d85c08327d7ef05cd3e97dc Mon Sep 17 00:00:00 2001
From: Rashika Kheria <rashika.kheria@gmail.com>
Date: Sun, 9 Feb 2014 20:02:16 +0530
Subject: net: Include appropriate header file in caif/cfsrvl.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Include appropriate header file net/caif/caif_dev.h in caif/cfsrvl.c
because it has prototype declaration of functions defined in
caif/cfsrvl.c.

This eliminates the following warning in caif/cfsrvl.c:
net/caif/cfsrvl.c:198:6: warning: no previous prototype for ‘caif_free_client’ [-Wmissing-prototypes]
net/caif/cfsrvl.c:208:6: warning: no previous prototype for ‘caif_client_register_refcnt’ [-Wmissing-prototypes]

Signed-off-by: Rashika Kheria <rashika.kheria@gmail.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/cfsrvl.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index 353f793d1b3b..a6e115463052 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -15,6 +15,7 @@
 #include <net/caif/caif_layer.h>
 #include <net/caif/cfsrvl.h>
 #include <net/caif/cfpkt.h>
+#include <net/caif/caif_dev.h>
 
 #define SRVL_CTRL_PKT_SIZE 1
 #define SRVL_FLOW_OFF 0x81
-- 
cgit v1.2.3


From 0a59f3a9fd7e2801a445682465ea0522ea497183 Mon Sep 17 00:00:00 2001
From: Rashika Kheria <rashika.kheria@gmail.com>
Date: Sun, 9 Feb 2014 20:26:25 +0530
Subject: net: Mark functions as static in core/dev.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Mark functions as static in core/dev.c because they are not used outside
this file.

This eliminates the following warning in core/dev.c:
net/core/dev.c:2806:5: warning: no previous prototype for ‘__dev_queue_xmit’ [-Wmissing-prototypes]
net/core/dev.c:4640:5: warning: no previous prototype for ‘netdev_adjacent_sysfs_add’ [-Wmissing-prototypes]
net/core/dev.c:4650:6: warning: no previous prototype for ‘netdev_adjacent_sysfs_del’ [-Wmissing-prototypes]

Signed-off-by: Rashika Kheria <rashika.kheria@gmail.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Reviewed-by: Veaceslav Falico <vfalico@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 3721db716350..4ad1b78c9c77 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2803,7 +2803,7 @@ EXPORT_SYMBOL(dev_loopback_xmit);
  *      the BH enable code must have IRQs enabled so that it will not deadlock.
  *          --BLG
  */
-int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
+static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
 {
 	struct net_device *dev = skb->dev;
 	struct netdev_queue *txq;
@@ -4637,7 +4637,7 @@ struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
 }
 EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
 
-int netdev_adjacent_sysfs_add(struct net_device *dev,
+static int netdev_adjacent_sysfs_add(struct net_device *dev,
 			      struct net_device *adj_dev,
 			      struct list_head *dev_list)
 {
@@ -4647,7 +4647,7 @@ int netdev_adjacent_sysfs_add(struct net_device *dev,
 	return sysfs_create_link(&(dev->dev.kobj), &(adj_dev->dev.kobj),
 				 linkname);
 }
-void netdev_adjacent_sysfs_del(struct net_device *dev,
+static void netdev_adjacent_sysfs_del(struct net_device *dev,
 			       char *name,
 			       struct list_head *dev_list)
 {
-- 
cgit v1.2.3


From f56b8bf6e445b22bb6207dbcf30b2c7f5ddfdf96 Mon Sep 17 00:00:00 2001
From: Rashika Kheria <rashika.kheria@gmail.com>
Date: Sun, 9 Feb 2014 22:20:09 +0530
Subject: net: Move prototype declaration to appropriate header file from
 decnet/af_decnet.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move prototype declaration of functions to header file include/net/dn_route.h
from net/decnet/af_decnet.c because it is used by more than one file.

This eliminates the following warning in net/decnet/dn_route.c:
net/decnet/dn_route.c:629:5: warning: no previous prototype for ‘dn_route_rcv’ [-Wmissing-prototypes]

Signed-off-by: Rashika Kheria <rashika.kheria@gmail.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dn_route.h | 2 ++
 net/decnet/af_decnet.c | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/net/dn_route.h b/include/net/dn_route.h
index b409ad6b8d7a..55df9939bca2 100644
--- a/include/net/dn_route.h
+++ b/include/net/dn_route.h
@@ -20,6 +20,8 @@ int dn_route_output_sock(struct dst_entry __rcu **pprt, struct flowidn *,
 			 struct sock *sk, int flags);
 int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb);
 void dn_rt_cache_flush(int delay);
+int dn_route_rcv(struct sk_buff *skb, struct net_device *dev,
+		 struct packet_type *pt, struct net_device *orig_dev);
 
 /* Masks for flags field */
 #define DN_RT_F_PID 0x07 /* Mask for packet type                      */
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 2954dcbca832..24d9193240e3 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -2104,8 +2104,6 @@ static struct notifier_block dn_dev_notifier = {
 	.notifier_call = dn_device_event,
 };
 
-extern int dn_route_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *);
-
 static struct packet_type dn_dix_packet_type __read_mostly = {
 	.type =		cpu_to_be16(ETH_P_DNA_RT),
 	.func =		dn_route_rcv,
-- 
cgit v1.2.3


From ab3301bd96c024ec9c2e1c35c90327dc5c8012a4 Mon Sep 17 00:00:00 2001
From: Rashika Kheria <rashika.kheria@gmail.com>
Date: Sun, 9 Feb 2014 22:22:53 +0530
Subject: net: Move prototype declaration to header file include/net/dn.h from
 net/decnet/af_decnet.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move prototype declaration of functions to header file include/net/dn.h
from net/decnet/af_decnet.c because they are used by more than one file.

This eliminates the following warning in net/decnet/af_decnet.c:
net/decnet/sysctl_net_decnet.c:354:6: warning: no previous prototype for ‘dn_register_sysctl’ [-Wmissing-prototypes]
net/decnet/sysctl_net_decnet.c:359:6: warning: no previous prototype for ‘dn_unregister_sysctl’ [-Wmissing-prototypes]

Signed-off-by: Rashika Kheria <rashika.kheria@gmail.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dn.h       | 2 ++
 net/decnet/af_decnet.c | 3 ---
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/net/dn.h b/include/net/dn.h
index ccc15588d108..913b73d239f5 100644
--- a/include/net/dn.h
+++ b/include/net/dn.h
@@ -200,6 +200,8 @@ static inline void dn_sk_ports_copy(struct flowidn *fld, struct dn_scp *scp)
 }
 
 unsigned int dn_mss_from_pmtu(struct net_device *dev, int mtu);
+void dn_register_sysctl(void);
+void dn_unregister_sysctl(void);
 
 #define DN_MENUVER_ACC 0x01
 #define DN_MENUVER_USR 0x02
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 24d9193240e3..4c04848953bd 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -2351,9 +2351,6 @@ static const struct proto_ops dn_proto_ops = {
 	.sendpage =	sock_no_sendpage,
 };
 
-void dn_register_sysctl(void);
-void dn_unregister_sysctl(void);
-
 MODULE_DESCRIPTION("The Linux DECnet Network Protocol");
 MODULE_AUTHOR("Linux DECnet Project Team");
 MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From 493cc5e5ba450356560cc12a8f71ff6a8574c91c Mon Sep 17 00:00:00 2001
From: Rashika Kheria <rashika.kheria@gmail.com>
Date: Sun, 9 Feb 2014 22:24:33 +0530
Subject: net: Move prototype declaration to include/net/ipx.h from
 net/ipx/ipx_route.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move prototype definition of function to header file include/net/ipx.h
from net/ipx/ipx_route.c because they are used by more than one file.

This eliminates the following warning from net/ipx/af_ipx.c:
net/ipx/af_ipx.c:193:23: warning: no previous prototype for ‘ipxitf_find_using_net’ [-Wmissing-prototypes]
net/ipx/af_ipx.c:577:5: warning: no previous prototype for ‘ipxitf_send’ [-Wmissing-prototypes]
net/ipx/af_ipx.c:1219:8: warning: no previous prototype for ‘ipx_cksum’ [-Wmissing-prototypes]

Signed-off-by: Rashika Kheria <rashika.kheria@gmail.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipx.h   | 3 +++
 net/ipx/ipx_route.c | 4 ----
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/net/ipx.h b/include/net/ipx.h
index 9e9e35465baf..75466acdce21 100644
--- a/include/net/ipx.h
+++ b/include/net/ipx.h
@@ -140,6 +140,9 @@ static __inline__ void ipxitf_hold(struct ipx_interface *intrfc)
 }
 
 void ipxitf_down(struct ipx_interface *intrfc);
+struct ipx_interface *ipxitf_find_using_net(__be32 net);
+int ipxitf_send(struct ipx_interface *intrfc, struct sk_buff *skb, char *node);
+__be16 ipx_cksum(struct ipxhdr *packet, int length);
 
 static __inline__ void ipxitf_put(struct ipx_interface *intrfc)
 {
diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c
index 30f4519b092f..c1f03185c5e1 100644
--- a/net/ipx/ipx_route.c
+++ b/net/ipx/ipx_route.c
@@ -20,15 +20,11 @@ DEFINE_RWLOCK(ipx_routes_lock);
 
 extern struct ipx_interface *ipx_internal_net;
 
-extern __be16 ipx_cksum(struct ipxhdr *packet, int length);
 extern struct ipx_interface *ipxitf_find_using_net(__be32 net);
 extern int ipxitf_demux_socket(struct ipx_interface *intrfc,
 			       struct sk_buff *skb, int copy);
 extern int ipxitf_demux_socket(struct ipx_interface *intrfc,
 			       struct sk_buff *skb, int copy);
-extern int ipxitf_send(struct ipx_interface *intrfc, struct sk_buff *skb,
-		       char *node);
-extern struct ipx_interface *ipxitf_find_using_net(__be32 net);
 
 struct ipx_route *ipxrtr_lookup(__be32 net)
 {
-- 
cgit v1.2.3


From 578efbc19f468686ad7eb1e505bab56d04e92784 Mon Sep 17 00:00:00 2001
From: Rashika Kheria <rashika.kheria@gmail.com>
Date: Sun, 9 Feb 2014 22:26:32 +0530
Subject: net: Move prototype declaration to header file include/net/ipx.h from
 net/ipx/af_ipx.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move prototype declaration of functions to header file include/net/ipx.h
from net/ipx/af_ipx.c because they are used by more than one file.

This eliminates the following warning in
net/ipx/ipx_route.c:33:19: warning: no previous prototype for ‘ipxrtr_lookup’ [-Wmissing-prototypes]
net/ipx/ipx_route.c:52:5: warning: no previous prototype for ‘ipxrtr_add_route’ [-Wmissing-prototypes]
net/ipx/ipx_route.c:94:6: warning: no previous prototype for ‘ipxrtr_del_routes’ [-Wmissing-prototypes]
net/ipx/ipx_route.c:149:5: warning: no previous prototype for ‘ipxrtr_route_skb’ [-Wmissing-prototypes]
net/ipx/ipx_route.c:171:5: warning: no previous prototype for ‘ipxrtr_route_packet’ [-Wmissing-prototypes]
net/ipx/ipx_route.c:261:5: warning: no previous prototype for ‘ipxrtr_ioctl’ [-Wmissing-prototypes]

Signed-off-by: Rashika Kheria <rashika.kheria@gmail.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipx.h | 8 ++++++++
 net/ipx/af_ipx.c  | 9 ---------
 2 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/include/net/ipx.h b/include/net/ipx.h
index 75466acdce21..0143180fecc9 100644
--- a/include/net/ipx.h
+++ b/include/net/ipx.h
@@ -143,6 +143,14 @@ void ipxitf_down(struct ipx_interface *intrfc);
 struct ipx_interface *ipxitf_find_using_net(__be32 net);
 int ipxitf_send(struct ipx_interface *intrfc, struct sk_buff *skb, char *node);
 __be16 ipx_cksum(struct ipxhdr *packet, int length);
+int ipxrtr_add_route(__be32 network, struct ipx_interface *intrfc,
+		     unsigned char *node);
+void ipxrtr_del_routes(struct ipx_interface *intrfc);
+int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx,
+			struct iovec *iov, size_t len, int noblock);
+int ipxrtr_route_skb(struct sk_buff *skb);
+struct ipx_route *ipxrtr_lookup(__be32 net);
+int ipxrtr_ioctl(unsigned int cmd, void __user *arg);
 
 static __inline__ void ipxitf_put(struct ipx_interface *intrfc)
 {
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 994e28bfb32e..e5a00a9b52f3 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -84,15 +84,6 @@ DEFINE_SPINLOCK(ipx_interfaces_lock);
 struct ipx_interface *ipx_primary_net;
 struct ipx_interface *ipx_internal_net;
 
-extern int ipxrtr_add_route(__be32 network, struct ipx_interface *intrfc,
-			    unsigned char *node);
-extern void ipxrtr_del_routes(struct ipx_interface *intrfc);
-extern int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx,
-			       struct iovec *iov, size_t len, int noblock);
-extern int ipxrtr_route_skb(struct sk_buff *skb);
-extern struct ipx_route *ipxrtr_lookup(__be32 net);
-extern int ipxrtr_ioctl(unsigned int cmd, void __user *arg);
-
 struct ipx_interface *ipx_interfaces_head(void)
 {
 	struct ipx_interface *rc = NULL;
-- 
cgit v1.2.3


From 7780d8ae4ae1960ef7c0570de0a1ecd7b60c8152 Mon Sep 17 00:00:00 2001
From: Rashika Kheria <rashika.kheria@gmail.com>
Date: Sun, 9 Feb 2014 22:27:41 +0530
Subject: net: Move prototype declaration to header file include/net/datalink.h
 from net/ipx/af_ipx.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move prototype declarations of function to header file
include/net/datalink.h from net/ipx/af_ipx.c because they are used by
more than one file.

This eliminates the following warning in net/ipx/pe2.c:
net/ipx/pe2.c:20:24: warning: no previous prototype for ‘make_EII_client’ [-Wmissing-prototypes]
net/ipx/pe2.c:32:6: warning: no previous prototype for ‘destroy_EII_client’ [-Wmissing-prototypes]

Signed-off-by: Rashika Kheria <rashika.kheria@gmail.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/datalink.h | 2 ++
 net/ipx/af_ipx.c       | 4 +---
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/net/datalink.h b/include/net/datalink.h
index deb7ca75db48..93cb18f729b5 100644
--- a/include/net/datalink.h
+++ b/include/net/datalink.h
@@ -15,4 +15,6 @@ struct datalink_proto {
 	struct list_head node;
 };
 
+struct datalink_proto *make_EII_client(void);
+void destroy_EII_client(struct datalink_proto *dl);
 #endif
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index e5a00a9b52f3..224d05856b85 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -52,6 +52,7 @@
 #include <net/p8022.h>
 #include <net/psnap.h>
 #include <net/sock.h>
+#include <net/datalink.h>
 #include <net/tcp_states.h>
 
 #include <asm/uaccess.h>
@@ -1977,9 +1978,6 @@ static struct notifier_block ipx_dev_notifier = {
 	.notifier_call	= ipxitf_device_event,
 };
 
-extern struct datalink_proto *make_EII_client(void);
-extern void destroy_EII_client(struct datalink_proto *);
-
 static const unsigned char ipx_8022_type = 0xE0;
 static const unsigned char ipx_snap_id[5] = { 0x0, 0x0, 0x0, 0x81, 0x37 };
 static const char ipx_EII_err_msg[] __initconst =
-- 
cgit v1.2.3


From 535d3ae9c808e6a5248f0524dbc7a9e997cf3288 Mon Sep 17 00:00:00 2001
From: Rashika Kheria <rashika.kheria@gmail.com>
Date: Sun, 9 Feb 2014 22:29:14 +0530
Subject: net: Move prototype declaration to header file
 include/net/net_namespace.h from net/ipx/af_ipx.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move prototype declaration of function to header file
include/net/net_namespace.h from net/ipx/af_ipx.c because they are used
by more than one file.

This eliminates the following warning in net/ipx/sysctl_net_ipx.c:
net/ipx/sysctl_net_ipx.c:33:6: warning: no previous prototype for ‘ipx_register_sysctl’ [-Wmissing-prototypes]
net/ipx/sysctl_net_ipx.c:38:6: warning: no previous prototype for ‘ipx_unregister_sysctl’ [-Wmissing-prototypes]

Signed-off-by: Rashika Kheria <rashika.kheria@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h | 8 ++++++++
 net/ipx/af_ipx.c            | 9 +--------
 2 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index da68c9a90ac5..991dcd94cbbf 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -162,6 +162,14 @@ extern struct list_head net_namespace_list;
 struct net *get_net_ns_by_pid(pid_t pid);
 struct net *get_net_ns_by_fd(int pid);
 
+#ifdef CONFIG_SYSCTL
+void ipx_register_sysctl(void);
+void ipx_unregister_sysctl(void);
+#else
+#define ipx_register_sysctl()
+#define ipx_unregister_sysctl()
+#endif
+
 #ifdef CONFIG_NET_NS
 void __put_net(struct net *net);
 
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 224d05856b85..00b2a6d1c009 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -54,17 +54,10 @@
 #include <net/sock.h>
 #include <net/datalink.h>
 #include <net/tcp_states.h>
+#include <net/net_namespace.h>
 
 #include <asm/uaccess.h>
 
-#ifdef CONFIG_SYSCTL
-extern void ipx_register_sysctl(void);
-extern void ipx_unregister_sysctl(void);
-#else
-#define ipx_register_sysctl()
-#define ipx_unregister_sysctl()
-#endif
-
 /* Configuration Variables */
 static unsigned char ipxcfg_max_hops = 16;
 static char ipxcfg_auto_select_primary;
-- 
cgit v1.2.3


From bd76ed36bac50b5402fd09c3fc2f368ef324ffe2 Mon Sep 17 00:00:00 2001
From: Rashika Kheria <rashika.kheria@gmail.com>
Date: Sun, 9 Feb 2014 22:31:42 +0530
Subject: net: Include appropriate header file in netfilter/nft_lookup.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Include appropriate header file net/netfilter/nf_tables_core.h in
net/netfilter/nft_lookup.c because it has prototype declaration of
functions defined in net/netfilter/nft_lookup.c.

This eliminates the following warning in net/netfilter/nft_lookup.c:
net/netfilter/nft_lookup.c:133:12: warning: no previous prototype for ‘nft_lookup_module_init’ [-Wmissing-prototypes]
net/netfilter/nft_lookup.c:138:6: warning: no previous prototype for ‘nft_lookup_module_exit’ [-Wmissing-prototypes]

Signed-off-by: Rashika Kheria <rashika.kheria@gmail.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nft_lookup.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 8a6116b75b5a..bb4ef4cccb6e 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -16,6 +16,7 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
 
 struct nft_lookup {
 	struct nft_set			*set;
-- 
cgit v1.2.3


From e1d83ee673eff5407255ba9e884312219f6832d7 Mon Sep 17 00:00:00 2001
From: Rashika Kheria <rashika.kheria@gmail.com>
Date: Sun, 9 Feb 2014 22:33:03 +0530
Subject: net: Mark functions as static in net/sunrpc/svc_xprt.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Mark functions as static in net/sunrpc/svc_xprt.c because they are not
used outside this file.

This eliminates the following warning in net/sunrpc/svc_xprt.c:
net/sunrpc/svc_xprt.c:574:5: warning: no previous prototype for ‘svc_alloc_arg’ [-Wmissing-prototypes]
net/sunrpc/svc_xprt.c:615:18: warning: no previous prototype for ‘svc_get_next_xprt’ [-Wmissing-prototypes]
net/sunrpc/svc_xprt.c:694:6: warning: no previous prototype for ‘svc_add_new_temp_xprt’ [-Wmissing-prototypes]

Signed-off-by: Rashika Kheria <rashika.kheria@gmail.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sunrpc/svc_xprt.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 80a6640f329b..06c6ff0cb911 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -571,7 +571,7 @@ static void svc_check_conn_limits(struct svc_serv *serv)
 	}
 }
 
-int svc_alloc_arg(struct svc_rqst *rqstp)
+static int svc_alloc_arg(struct svc_rqst *rqstp)
 {
 	struct svc_serv *serv = rqstp->rq_server;
 	struct xdr_buf *arg;
@@ -612,7 +612,7 @@ int svc_alloc_arg(struct svc_rqst *rqstp)
 	return 0;
 }
 
-struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
+static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
 {
 	struct svc_xprt *xprt;
 	struct svc_pool		*pool = rqstp->rq_pool;
@@ -691,7 +691,7 @@ struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
 	return xprt;
 }
 
-void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt)
+static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt)
 {
 	spin_lock_bh(&serv->sv_lock);
 	set_bit(XPT_TEMP, &newxpt->xpt_flags);
-- 
cgit v1.2.3


From d94c1f92bbf2c42c5febd68bbea51fffeac90834 Mon Sep 17 00:00:00 2001
From: FX Le Bail <fx.lebail@yahoo.com>
Date: Fri, 7 Feb 2014 11:22:37 +0100
Subject: ipv6: icmp6_send: fix Oops when pinging a not set up IPv6 peer on a
 sit tunnel

The patch 446fab59333dea91e54688f033dd8d788d0486fb ("ipv6: enable anycast addresses
as source addresses in ICMPv6 error messages") causes an Oops when pinging a not
set up IPv6 peer on a sit tunnel.

The problem is that ipv6_anycast_destination() uses unconditionally skb_dst(skb),
which is NULL in this case.

The solution is to use instead the ipv6_chk_acast_addr_src() function.

Here are the steps to reproduce it:
modprobe sit
ip link add sit1 type sit remote 10.16.0.121 local 10.16.0.249
ip l s sit1 up
ip -6 a a dev sit1 2001:1234::123 remote 2001:1234::121
ping6 2001:1234::121

Reported-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Tested-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Francois-Xavier Le Bail <fx.lebail@yahoo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/icmp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index f81f59686f21..f2610e157660 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -414,7 +414,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 	addr_type = ipv6_addr_type(&hdr->daddr);
 
 	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
-	    ipv6_anycast_destination(skb))
+	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
 		saddr = &hdr->daddr;
 
 	/*
-- 
cgit v1.2.3


From 310f6fd0ca9de8c1fc6399af494855e15157aa35 Mon Sep 17 00:00:00 2001
From: Christian Engelmayer <cengelma@gmx.at>
Date: Fri, 7 Feb 2014 22:58:38 +0100
Subject: 6lowpan: Remove unused pointer in lowpan_header_create()

Commit 8df8c56a (6lowpan: Moving generic compression code into 6lowpan_iphc.c)
left pointer 'hdr' unused - remove it.

Detected by Coverity: CID 1164868.

Signed-off-by: Christian Engelmayer <cengelma@gmx.at>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ieee802154/6lowpan.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
index 48b25c0af4d0..8bfb40153fe7 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan.c
@@ -106,7 +106,6 @@ static int lowpan_header_create(struct sk_buff *skb,
 			   unsigned short type, const void *_daddr,
 			   const void *_saddr, unsigned int len)
 {
-	struct ipv6hdr *hdr;
 	const u8 *saddr = _saddr;
 	const u8 *daddr = _daddr;
 	struct ieee802154_addr sa, da;
@@ -117,8 +116,6 @@ static int lowpan_header_create(struct sk_buff *skb,
 	if (type != ETH_P_IPV6)
 		return 0;
 
-	hdr = ipv6_hdr(skb);
-
 	if (!saddr)
 		saddr = dev->dev_addr;
 
-- 
cgit v1.2.3


From 946c032e5a53992ea45e062ecb08670ba39b99e3 Mon Sep 17 00:00:00 2001
From: Maciej Żenczykowski <maze@google.com>
Date: Fri, 7 Feb 2014 16:23:48 -0800
Subject: net: fix 'ip rule' iif/oif device rename
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ip rules with iif/oif references do not update:
(detach/attach) across interface renames.

Signed-off-by: Maciej Żenczykowski <maze@google.com>
CC: Willem de Bruijn <willemb@google.com>
CC: Eric Dumazet <edumazet@google.com>
CC: Chris Davis <chrismd@google.com>
CC: Carlo Contavalli <ccontavalli@google.com>

Google-Bug-Id: 12936021
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/fib_rules.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index f409e0bd35c0..185c341fafbd 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -745,6 +745,13 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
 			attach_rules(&ops->rules_list, dev);
 		break;
 
+	case NETDEV_CHANGENAME:
+		list_for_each_entry(ops, &net->rules_ops, list) {
+			detach_rules(&ops->rules_list, dev);
+			attach_rules(&ops->rules_list, dev);
+		}
+		break;
+
 	case NETDEV_UNREGISTER:
 		list_for_each_entry(ops, &net->rules_ops, list)
 			detach_rules(&ops->rules_list, dev);
-- 
cgit v1.2.3


From b10bd54c05a6c3c96549f4642d7de23c99b9853b Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jj@chaosbits.net>
Date: Sun, 9 Feb 2014 23:30:32 +0100
Subject: tcp: correct code comment stating 3 min timeout for FIN_WAIT2, we
 only do 1 min

As far as I can tell we have used a default of 60 seconds for
FIN_WAIT2 timeout for ages (since 2.x times??).

In any case, the timeout these days is 60 seconds, so the 3 min
comment is wrong (and cost me a few minutes of my life when I was
debugging a FIN_WAIT2 related problem in a userspace application and
checked the kernel source for details).

Signed-off-by: Jesper Juhl <jj@chaosbits.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4475b3bb494d..9f3a2db9109e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2229,7 +2229,7 @@ adjudge_to_death:
 	/*	This is a (useful) BSD violating of the RFC. There is a
 	 *	problem with TCP as specified in that the other end could
 	 *	keep a socket open forever with no application left this end.
-	 *	We use a 3 minute timeout (about the same as BSD) then kill
+	 *	We use a 1 minute timeout (about the same as BSD) then kill
 	 *	our end. If they send after that then tough - BUT: long enough
 	 *	that we won't make the old 4*rto = almost no time - whoops
 	 *	reset mistake.
-- 
cgit v1.2.3


From a5642ab4744bc8c5a8c7ce7c6e30c01bd6bbc691 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Fri, 7 Feb 2014 16:48:18 +0900
Subject: bridge: Fix the way to find old local fdb entries in
 br_fdb_changeaddr

br_fdb_changeaddr() assumes that there is at most one local entry per port
per vlan. It used to be true, but since commit 36fd2b63e3b4 ("bridge: allow
creating/deleting fdb entries via netlink"), it has not been so.
Therefore, the function might fail to search a correct previous address
to be deleted and delete an arbitrary local entry if user has added local
entries manually.

Example of problematic case:
  ip link set eth0 address ee:ff:12:34:56:78
  brctl addif br0 eth0
  bridge fdb add 12:34:56:78:90:ab dev eth0 master
  ip link set eth0 address aa:bb:cc:dd:ee:ff
Then, the address 12:34:56:78:90:ab might be deleted instead of
ee:ff:12:34:56:78, the original mac address of eth0.

Address this issue by introducing a new flag, added_by_user, to struct
net_bridge_fdb_entry.

Note that br_fdb_delete_by_port() has to set added_by_user to 0 in cases
like:
  ip link set eth0 address 12:34:56:78:90:ab
  ip link set eth1 address aa:bb:cc:dd:ee:ff
  brctl addif br0 eth0
  bridge fdb add aa:bb:cc:dd:ee:ff dev eth0 master
  brctl addif br0 eth1
  brctl delif br0 eth0
In this case, kernel should delete the user-added entry aa:bb:cc:dd:ee:ff,
but it also should have been added by "brctl addif br0 eth1" originally,
so we don't delete it and treat it a new kernel-created entry.

Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c     | 16 ++++++++++++----
 net/bridge/br_input.c   |  4 ++--
 net/bridge/br_private.h |  3 ++-
 3 files changed, 16 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index c5f5a4a933f4..ce5411995a63 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -104,7 +104,7 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
 			struct net_bridge_fdb_entry *f;
 
 			f = hlist_entry(h, struct net_bridge_fdb_entry, hlist);
-			if (f->dst == p && f->is_local) {
+			if (f->dst == p && f->is_local && !f->added_by_user) {
 				/* maybe another port has same hw addr? */
 				struct net_bridge_port *op;
 				u16 vid = f->vlan_id;
@@ -247,6 +247,7 @@ void br_fdb_delete_by_port(struct net_bridge *br,
 					    ether_addr_equal(op->dev->dev_addr,
 							     f->addr.addr)) {
 						f->dst = op;
+						f->added_by_user = 0;
 						goto skip_delete;
 					}
 				}
@@ -397,6 +398,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
 		fdb->vlan_id = vid;
 		fdb->is_local = 0;
 		fdb->is_static = 0;
+		fdb->added_by_user = 0;
 		fdb->updated = fdb->used = jiffies;
 		hlist_add_head_rcu(&fdb->hlist, head);
 	}
@@ -447,7 +449,7 @@ int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
 }
 
 void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
-		   const unsigned char *addr, u16 vid)
+		   const unsigned char *addr, u16 vid, bool added_by_user)
 {
 	struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
 	struct net_bridge_fdb_entry *fdb;
@@ -473,13 +475,18 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 			/* fastpath: update of existing entry */
 			fdb->dst = source;
 			fdb->updated = jiffies;
+			if (unlikely(added_by_user))
+				fdb->added_by_user = 1;
 		}
 	} else {
 		spin_lock(&br->hash_lock);
 		if (likely(!fdb_find(head, addr, vid))) {
 			fdb = fdb_create(head, source, addr, vid);
-			if (fdb)
+			if (fdb) {
+				if (unlikely(added_by_user))
+					fdb->added_by_user = 1;
 				fdb_notify(br, fdb, RTM_NEWNEIGH);
+			}
 		}
 		/* else  we lose race and someone else inserts
 		 * it first, don't bother updating
@@ -647,6 +654,7 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
 
 		modified = true;
 	}
+	fdb->added_by_user = 1;
 
 	fdb->used = jiffies;
 	if (modified) {
@@ -664,7 +672,7 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p,
 
 	if (ndm->ndm_flags & NTF_USE) {
 		rcu_read_lock();
-		br_fdb_update(p->br, p, addr, vid);
+		br_fdb_update(p->br, p, addr, vid, true);
 		rcu_read_unlock();
 	} else {
 		spin_lock_bh(&p->br->hash_lock);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index bf8dc7d308d6..28d544627422 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -77,7 +77,7 @@ int br_handle_frame_finish(struct sk_buff *skb)
 	/* insert into forwarding database after filtering to avoid spoofing */
 	br = p->br;
 	if (p->flags & BR_LEARNING)
-		br_fdb_update(br, p, eth_hdr(skb)->h_source, vid);
+		br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, false);
 
 	if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) &&
 	    br_multicast_rcv(br, p, skb, vid))
@@ -148,7 +148,7 @@ static int br_handle_local_finish(struct sk_buff *skb)
 
 	br_vlan_get_tag(skb, &vid);
 	if (p->flags & BR_LEARNING)
-		br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid);
+		br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false);
 	return 0;	 /* process further */
 }
 
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index fcd12333c59b..939a59e15036 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -104,6 +104,7 @@ struct net_bridge_fdb_entry
 	mac_addr			addr;
 	unsigned char			is_local;
 	unsigned char			is_static;
+	unsigned char			added_by_user;
 	__u16				vlan_id;
 };
 
@@ -383,7 +384,7 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, unsigned long count,
 int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
 		  const unsigned char *addr, u16 vid);
 void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
-		   const unsigned char *addr, u16 vid);
+		   const unsigned char *addr, u16 vid, bool added_by_user);
 int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, u16 vid);
 
 int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
-- 
cgit v1.2.3


From 2836882fe07718fe3263745b1aa07284ec71871c Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Fri, 7 Feb 2014 16:48:19 +0900
Subject: bridge: Fix the way to insert new local fdb entries in
 br_fdb_changeaddr

Since commit bc9a25d21ef8 ("bridge: Add vlan support for local fdb entries"),
br_fdb_changeaddr() has inserted a new local fdb entry only if it can
find old one. But if we have two ports where they have the same address
or user has deleted a local entry, there will be no entry for one of the
ports.

Example of problematic case:
  ip link set eth0 address aa:bb:cc:dd:ee:ff
  ip link set eth1 address aa:bb:cc:dd:ee:ff
  brctl addif br0 eth0
  brctl addif br0 eth1 # eth1 will not have a local entry due to dup.
  ip link set eth1 address 12:34:56:78:90:ab
Then, the new entry for the address 12:34:56:78:90:ab will not be
created, and the bridge device will not be able to communicate.

Insert new entries regardless of whether we can find old entries or not.

Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Acked-by: Vlad Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index ce5411995a63..96ab1d1748d0 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -92,8 +92,10 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
 void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
 {
 	struct net_bridge *br = p->br;
-	bool no_vlan = (nbp_get_vlan_info(p) == NULL) ? true : false;
+	struct net_port_vlans *pv = nbp_get_vlan_info(p);
+	bool no_vlan = !pv;
 	int i;
+	u16 vid;
 
 	spin_lock_bh(&br->hash_lock);
 
@@ -114,28 +116,37 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
 							     f->addr.addr) &&
 					    nbp_vlan_find(op, vid)) {
 						f->dst = op;
-						goto insert;
+						goto skip_delete;
 					}
 				}
 
 				/* delete old one */
 				fdb_delete(br, f);
-insert:
-				/* insert new address,  may fail if invalid
-				 * address or dup.
-				 */
-				fdb_insert(br, p, newaddr, vid);
-
+skip_delete:
 				/* if this port has no vlan information
 				 * configured, we can safely be done at
 				 * this point.
 				 */
 				if (no_vlan)
-					goto done;
+					goto insert;
 			}
 		}
 	}
 
+insert:
+	/* insert new address,  may fail if invalid address or dup. */
+	fdb_insert(br, p, newaddr, 0);
+
+	if (no_vlan)
+		goto done;
+
+	/* Now add entries for every VLAN configured on the port.
+	 * This function runs under RTNL so the bitmap will not change
+	 * from under us.
+	 */
+	for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID)
+		fdb_insert(br, p, newaddr, vid);
+
 done:
 	spin_unlock_bh(&br->hash_lock);
 }
-- 
cgit v1.2.3


From a3ebb7efe7033d903d86c9d664f07a9cc21747b3 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Fri, 7 Feb 2014 16:48:20 +0900
Subject: bridge: Fix the way to find old local fdb entries in
 br_fdb_change_mac_address

We have been always failed to delete the old entry at
br_fdb_change_mac_address() because br_set_mac_address() updates
dev->dev_addr before calling br_fdb_change_mac_address() and
br_fdb_change_mac_address() uses dev->dev_addr to find the old entry.

That update of dev_addr is completely unnecessary because the same work
is done in br_stp_change_bridge_id() which is called right away after
calling br_fdb_change_mac_address().

Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Acked-by: Vlad Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index d9a9b0fc1795..6f5cbd1a2f38 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -187,8 +187,8 @@ static int br_set_mac_address(struct net_device *dev, void *p)
 
 	spin_lock_bh(&br->lock);
 	if (!ether_addr_equal(dev->dev_addr, addr->sa_data)) {
-		memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
 		br_fdb_change_mac_address(br, addr->sa_data);
+		/* Mac address will be changed in br_stp_change_bridge_id(). */
 		br_stp_change_bridge_id(br, addr->sa_data);
 	}
 	spin_unlock_bh(&br->lock);
-- 
cgit v1.2.3


From a4b816d8ba1c1917842dc3de97cbf8ef116e043e Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Fri, 7 Feb 2014 16:48:21 +0900
Subject: bridge: Change local fdb entries whenever mac address of bridge
 device changes

Vlan code may need fdb change when changing mac address of bridge device
even if it is caused by the mac address changing of a bridge port.

Example configuration:
  ip link set eth0 address 12:34:56:78:90:ab
  ip link set eth1 address aa:bb:cc:dd:ee:ff
  brctl addif br0 eth0
  brctl addif br0 eth1 # br0 will have mac address 12:34:56:78:90:ab
  bridge vlan add dev br0 vid 10 self
  bridge vlan add dev eth0 vid 10
We will have fdb entry such that f->dst == NULL, f->vlan_id == 10 and
f->addr == 12:34:56:78:90:ab at this time.
Next, change the mac address of eth0 to greater value.
  ip link set eth0 address ee:ff:12:34:56:78
Then, mac address of br0 will be recalculated and set to aa:bb:cc:dd:ee:ff.
However, an entry aa:bb:cc:dd:ee:ff will not be created and we will be not
able to communicate using br0 on vlan 10.

Address this issue by deleting and adding local entries whenever
changing the mac address of the bridge device.

If there already exists an entry that has the same address, for example,
in case that br_fdb_changeaddr() has already inserted it,
br_fdb_change_mac_address() will simply fail to insert it and no
duplicated entry will be made, as it was.

This approach also needs br_add_if() to call br_fdb_insert() before
br_stp_recalculate_bridge_id() so that we don't create an entry whose
dst == NULL in this function to preserve previous behavior.

Note that this is a slight change in behavior where the bridge device can
receive the traffic to the new address before calling
br_stp_recalculate_bridge_id() in br_add_if().
However, it is not a problem because we have already the address on the
new port and such a way to insert new one before recalculating bridge id
is taken in br_device_event() as well.

Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Acked-by: Vlad Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c | 1 -
 net/bridge/br_if.c     | 6 +++---
 net/bridge/br_stp_if.c | 2 ++
 3 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 6f5cbd1a2f38..63f0455c0bc3 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -187,7 +187,6 @@ static int br_set_mac_address(struct net_device *dev, void *p)
 
 	spin_lock_bh(&br->lock);
 	if (!ether_addr_equal(dev->dev_addr, addr->sa_data)) {
-		br_fdb_change_mac_address(br, addr->sa_data);
 		/* Mac address will be changed in br_stp_change_bridge_id(). */
 		br_stp_change_bridge_id(br, addr->sa_data);
 	}
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index cffe1d666ba1..54d207d3a31c 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -389,6 +389,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	if (br->dev->needed_headroom < dev->needed_headroom)
 		br->dev->needed_headroom = dev->needed_headroom;
 
+	if (br_fdb_insert(br, p, dev->dev_addr, 0))
+		netdev_err(dev, "failed insert local address bridge forwarding table\n");
+
 	spin_lock_bh(&br->lock);
 	changed_addr = br_stp_recalculate_bridge_id(br);
 
@@ -404,9 +407,6 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 
 	dev_set_mtu(br->dev, br_min_mtu(br));
 
-	if (br_fdb_insert(br, p, dev->dev_addr, 0))
-		netdev_err(dev, "failed insert local address bridge forwarding table\n");
-
 	kobject_uevent(&p->kobj, KOBJ_ADD);
 
 	return 0;
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 656a6f3e40de..189ba1e7d851 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -194,6 +194,8 @@ void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr)
 
 	wasroot = br_is_root_bridge(br);
 
+	br_fdb_change_mac_address(br, addr);
+
 	memcpy(oldaddr, br->bridge_id.addr, ETH_ALEN);
 	memcpy(br->bridge_id.addr, addr, ETH_ALEN);
 	memcpy(br->dev->dev_addr, addr, ETH_ALEN);
-- 
cgit v1.2.3


From 2b292fb4a57dc233e298a84196d33be0bc3828e4 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Fri, 7 Feb 2014 16:48:22 +0900
Subject: bridge: Fix the way to check if a local fdb entry can be deleted

We should take into account the followings when deleting a local fdb
entry.

- nbp_vlan_find() can be used only when vid != 0 to check if an entry is
  deletable, because a fdb entry with vid 0 can exist at any time while
  nbp_vlan_find() always return false with vid 0.

  Example of problematic case:
    ip link set eth0 address 12:34:56:78:90:ab
    ip link set eth1 address 12:34:56:78:90:ab
    brctl addif br0 eth0
    brctl addif br0 eth1
    ip link set eth0 address aa:bb:cc:dd:ee:ff
  Then, the fdb entry 12:34:56:78:90:ab will be deleted even though the
  bridge port eth1 still has that address.

- The port to which the bridge device is attached might needs a local entry
  if its mac address is set manually.

  Example of problematic case:
    ip link set eth0 address 12:34:56:78:90:ab
    brctl addif br0 eth0
    ip link set br0 address 12:34:56:78:90:ab
    ip link set eth0 address aa:bb:cc:dd:ee:ff
  Then, the fdb still must have the entry 12:34:56:78:90:ab, but it will be
  deleted.

We can use br->dev->addr_assign_type to check if the address is manually
set or not, but I propose another approach.

Since we delete and insert local entries whenever changing mac address
of the bridge device, we can change dst of the entry to NULL regardless of
addr_assign_type when deleting an entry associated with a certain port,
and if it is found to be unnecessary later, then delete it.
That is, if changing mac address of a port, the entry might be changed
to its dst being NULL first, but is eventually deleted when recalculating
and changing bridge id.

This approach is especially useful when we want to share the code with
deleting vlan in which the bridge device might want such an entry regardless
of addr_assign_type, and makes things easy because we don't have to consider
if mac address of the bridge device will be changed or not at the time we
delete a local entry of a port, which means fdb code will not be bothered
even if the bridge id calculating logic is changed in the future.

Also, this change reduces inconsistent state, where frames whose dst is the
mac address of the bridge, can't reach the bridge because of premature fdb
entry deletion. This change reduces the possibility that the bridge device
replies unreachable mac address to arp requests, which could occur during
the short window between calling del_nbp() and br_stp_recalculate_bridge_id()
in br_del_if(). This will effective after br_fdb_delete_by_port() starts to
use the same code by following patch.

Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Acked-by: Vlad Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c     | 10 +++++++++-
 net/bridge/br_private.h |  6 ++++++
 net/bridge/br_vlan.c    | 19 +++++++++++++++++++
 3 files changed, 34 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 96ab1d1748d0..b4005f5b28f4 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -114,12 +114,20 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
 					if (op != p &&
 					    ether_addr_equal(op->dev->dev_addr,
 							     f->addr.addr) &&
-					    nbp_vlan_find(op, vid)) {
+					    (!vid || nbp_vlan_find(op, vid))) {
 						f->dst = op;
 						goto skip_delete;
 					}
 				}
 
+				/* maybe bridge device has same hw addr? */
+				if (ether_addr_equal(br->dev->dev_addr,
+						     f->addr.addr) &&
+				    (!vid || br_vlan_find(br, vid))) {
+					f->dst = NULL;
+					goto skip_delete;
+				}
+
 				/* delete old one */
 				fdb_delete(br, f);
 skip_delete:
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 939a59e15036..f91e1d9c8e92 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -585,6 +585,7 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
 int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags);
 int br_vlan_delete(struct net_bridge *br, u16 vid);
 void br_vlan_flush(struct net_bridge *br);
+bool br_vlan_find(struct net_bridge *br, u16 vid);
 int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val);
 int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags);
 int nbp_vlan_delete(struct net_bridge_port *port, u16 vid);
@@ -666,6 +667,11 @@ static inline void br_vlan_flush(struct net_bridge *br)
 {
 }
 
+static inline bool br_vlan_find(struct net_bridge *br, u16 vid)
+{
+	return false;
+}
+
 static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
 {
 	return -EOPNOTSUPP;
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 4ca4d0a0151c..233ec1c6e9db 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -295,6 +295,25 @@ void br_vlan_flush(struct net_bridge *br)
 	__vlan_flush(pv);
 }
 
+bool br_vlan_find(struct net_bridge *br, u16 vid)
+{
+	struct net_port_vlans *pv;
+	bool found = false;
+
+	rcu_read_lock();
+	pv = rcu_dereference(br->vlan_info);
+
+	if (!pv)
+		goto out;
+
+	if (test_bit(vid, pv->vlan_bitmap))
+		found = true;
+
+out:
+	rcu_read_unlock();
+	return found;
+}
+
 int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val)
 {
 	if (!rtnl_trylock())
-- 
cgit v1.2.3


From 960b589f86c74ce582922fcb996103271081f4de Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Fri, 7 Feb 2014 16:48:23 +0900
Subject: bridge: Properly check if local fdb entry can be deleted in
 br_fdb_change_mac_address

br_fdb_change_mac_address() doesn't check if the local entry has the
same address as any of bridge ports.
Although I'm not sure when it is beneficial, current implementation allow
the bridge device to receive any mac address of its ports.
To preserve this behavior, we have to check if the mac address of the
entry being deleted is identical to that of any port.

As this check is almost the same as that in br_fdb_changeaddr(), create
a common function fdb_delete_local() and call it from
br_fdb_changeadddr() and br_fdb_change_mac_address().

Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Acked-by: Vlad Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c | 57 ++++++++++++++++++++++++++++++-----------------------
 1 file changed, 32 insertions(+), 25 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index b4005f5b28f4..15bf13d0b543 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -89,6 +89,34 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
 	call_rcu(&f->rcu, fdb_rcu_free);
 }
 
+/* Delete a local entry if no other port had the same address. */
+static void fdb_delete_local(struct net_bridge *br,
+			     const struct net_bridge_port *p,
+			     struct net_bridge_fdb_entry *f)
+{
+	const unsigned char *addr = f->addr.addr;
+	u16 vid = f->vlan_id;
+	struct net_bridge_port *op;
+
+	/* Maybe another port has same hw addr? */
+	list_for_each_entry(op, &br->port_list, list) {
+		if (op != p && ether_addr_equal(op->dev->dev_addr, addr) &&
+		    (!vid || nbp_vlan_find(op, vid))) {
+			f->dst = op;
+			return;
+		}
+	}
+
+	/* Maybe bridge device has same hw addr? */
+	if (p && ether_addr_equal(br->dev->dev_addr, addr) &&
+	    (!vid || br_vlan_find(br, vid))) {
+		f->dst = NULL;
+		return;
+	}
+
+	fdb_delete(br, f);
+}
+
 void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
 {
 	struct net_bridge *br = p->br;
@@ -107,30 +135,9 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
 
 			f = hlist_entry(h, struct net_bridge_fdb_entry, hlist);
 			if (f->dst == p && f->is_local && !f->added_by_user) {
-				/* maybe another port has same hw addr? */
-				struct net_bridge_port *op;
-				u16 vid = f->vlan_id;
-				list_for_each_entry(op, &br->port_list, list) {
-					if (op != p &&
-					    ether_addr_equal(op->dev->dev_addr,
-							     f->addr.addr) &&
-					    (!vid || nbp_vlan_find(op, vid))) {
-						f->dst = op;
-						goto skip_delete;
-					}
-				}
-
-				/* maybe bridge device has same hw addr? */
-				if (ether_addr_equal(br->dev->dev_addr,
-						     f->addr.addr) &&
-				    (!vid || br_vlan_find(br, vid))) {
-					f->dst = NULL;
-					goto skip_delete;
-				}
-
 				/* delete old one */
-				fdb_delete(br, f);
-skip_delete:
+				fdb_delete_local(br, p, f);
+
 				/* if this port has no vlan information
 				 * configured, we can safely be done at
 				 * this point.
@@ -168,7 +175,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
 	/* If old entry was unassociated with any port, then delete it. */
 	f = __br_fdb_get(br, br->dev->dev_addr, 0);
 	if (f && f->is_local && !f->dst)
-		fdb_delete(br, f);
+		fdb_delete_local(br, NULL, f);
 
 	fdb_insert(br, NULL, newaddr, 0);
 
@@ -183,7 +190,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
 	for_each_set_bit_from(vid, pv->vlan_bitmap, VLAN_N_VID) {
 		f = __br_fdb_get(br, br->dev->dev_addr, vid);
 		if (f && f->is_local && !f->dst)
-			fdb_delete(br, f);
+			fdb_delete_local(br, NULL, f);
 		fdb_insert(br, NULL, newaddr, vid);
 	}
 }
-- 
cgit v1.2.3


From a778e6d1a51faaafa6a3a3cef9bee11c3bd47f9f Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Fri, 7 Feb 2014 16:48:24 +0900
Subject: bridge: Properly check if local fdb entry can be deleted in
 br_fdb_delete_by_port

br_fdb_delete_by_port() doesn't care about vlan and mac address of the
bridge device.

As the check is almost the same as mac address changing, slightly modify
fdb_delete_local() and use it.

Note that we can always set added_by_user to 0 in fdb_delete_local() because
- br_fdb_delete_by_port() calls fdb_delete_local() for local entries
  regardless of its added_by_user. In this case, we have to check if another
  port has the same address and vlan, and if found, we have to create the
  entry (by changing dst). This is kernel-added entry, not user-added.
- br_fdb_changeaddr() doesn't call fdb_delete_local() for user-added entry.

Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Acked-by: Vlad Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c | 25 ++++++-------------------
 1 file changed, 6 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 15bf13d0b543..43e54d1ae956 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -103,6 +103,7 @@ static void fdb_delete_local(struct net_bridge *br,
 		if (op != p && ether_addr_equal(op->dev->dev_addr, addr) &&
 		    (!vid || nbp_vlan_find(op, vid))) {
 			f->dst = op;
+			f->added_by_user = 0;
 			return;
 		}
 	}
@@ -111,6 +112,7 @@ static void fdb_delete_local(struct net_bridge *br,
 	if (p && ether_addr_equal(br->dev->dev_addr, addr) &&
 	    (!vid || br_vlan_find(br, vid))) {
 		f->dst = NULL;
+		f->added_by_user = 0;
 		return;
 	}
 
@@ -261,26 +263,11 @@ void br_fdb_delete_by_port(struct net_bridge *br,
 
 			if (f->is_static && !do_all)
 				continue;
-			/*
-			 * if multiple ports all have the same device address
-			 * then when one port is deleted, assign
-			 * the local entry to other port
-			 */
-			if (f->is_local) {
-				struct net_bridge_port *op;
-				list_for_each_entry(op, &br->port_list, list) {
-					if (op != p &&
-					    ether_addr_equal(op->dev->dev_addr,
-							     f->addr.addr)) {
-						f->dst = op;
-						f->added_by_user = 0;
-						goto skip_delete;
-					}
-				}
-			}
 
-			fdb_delete(br, f);
-		skip_delete: ;
+			if (f->is_local)
+				fdb_delete_local(br, p, f);
+			else
+				fdb_delete(br, f);
 		}
 	}
 	spin_unlock_bh(&br->hash_lock);
-- 
cgit v1.2.3


From 424bb9c97cc1992018950485ca7410779b8ea21d Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Fri, 7 Feb 2014 16:48:25 +0900
Subject: bridge: Properly check if local fdb entry can be deleted when
 deleting vlan

Vlan codes unconditionally delete local fdb entries.
We should consider the possibility that other ports have the same
address and vlan.

Example of problematic case:
  ip link set eth0 address 12:34:56:78:90:ab
  ip link set eth1 address aa:bb:cc:dd:ee:ff
  brctl addif br0 eth0
  brctl addif br0 eth1 # br0 will have mac address 12:34:56:78:90:ab
  bridge vlan add dev eth0 vid 10
  bridge vlan add dev eth1 vid 10
  bridge vlan add dev br0 vid 10 self
We will have fdb entry such that f->dst == eth0, f->vlan_id == 10 and
f->addr == 12:34:56:78:90:ab at this time.
Next, delete eth0 vlan 10.
  bridge vlan del dev eth0 vid 10
In this case, we still need the entry for br0, but it will be deleted.

Note that br0 needs the entry even though its mac address is not set
manually. To delete the entry with proper condition checking,
fdb_delete_local() is suitable to use.

Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Acked-by: Vlad Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c     | 20 ++++++++++++++++++--
 net/bridge/br_private.h |  4 +++-
 net/bridge/br_vlan.c    |  8 ++------
 3 files changed, 23 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 43e54d1ae956..0426dff4b3fd 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -27,6 +27,9 @@
 #include "br_private.h"
 
 static struct kmem_cache *br_fdb_cache __read_mostly;
+static struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head,
+					     const unsigned char *addr,
+					     __u16 vid);
 static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
 		      const unsigned char *addr, u16 vid);
 static void fdb_notify(struct net_bridge *br,
@@ -119,6 +122,20 @@ static void fdb_delete_local(struct net_bridge *br,
 	fdb_delete(br, f);
 }
 
+void br_fdb_find_delete_local(struct net_bridge *br,
+			      const struct net_bridge_port *p,
+			      const unsigned char *addr, u16 vid)
+{
+	struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
+	struct net_bridge_fdb_entry *f;
+
+	spin_lock_bh(&br->hash_lock);
+	f = fdb_find(head, addr, vid);
+	if (f && f->is_local && !f->added_by_user && f->dst == p)
+		fdb_delete_local(br, p, f);
+	spin_unlock_bh(&br->hash_lock);
+}
+
 void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
 {
 	struct net_bridge *br = p->br;
@@ -770,8 +787,7 @@ out:
 	return err;
 }
 
-int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr,
-		       u16 vlan)
+static int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, u16 vlan)
 {
 	struct hlist_head *head = &br->hash[br_mac_hash(addr, vlan)];
 	struct net_bridge_fdb_entry *fdb;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index f91e1d9c8e92..3ba11bc99b65 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -371,6 +371,9 @@ static inline void br_netpoll_disable(struct net_bridge_port *p)
 int br_fdb_init(void);
 void br_fdb_fini(void);
 void br_fdb_flush(struct net_bridge *br);
+void br_fdb_find_delete_local(struct net_bridge *br,
+			      const struct net_bridge_port *p,
+			      const unsigned char *addr, u16 vid);
 void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr);
 void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr);
 void br_fdb_cleanup(unsigned long arg);
@@ -385,7 +388,6 @@ int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
 		  const unsigned char *addr, u16 vid);
 void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 		   const unsigned char *addr, u16 vid, bool added_by_user);
-int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, u16 vid);
 
 int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
 		  struct net_device *dev, const unsigned char *addr);
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 233ec1c6e9db..8249ca764c79 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -275,9 +275,7 @@ int br_vlan_delete(struct net_bridge *br, u16 vid)
 	if (!pv)
 		return -EINVAL;
 
-	spin_lock_bh(&br->hash_lock);
-	fdb_delete_by_addr(br, br->dev->dev_addr, vid);
-	spin_unlock_bh(&br->hash_lock);
+	br_fdb_find_delete_local(br, NULL, br->dev->dev_addr, vid);
 
 	__vlan_del(pv, vid);
 	return 0;
@@ -378,9 +376,7 @@ int nbp_vlan_delete(struct net_bridge_port *port, u16 vid)
 	if (!pv)
 		return -EINVAL;
 
-	spin_lock_bh(&port->br->hash_lock);
-	fdb_delete_by_addr(port->br, port->dev->dev_addr, vid);
-	spin_unlock_bh(&port->br->hash_lock);
+	br_fdb_find_delete_local(port->br, port, port->dev->dev_addr, vid);
 
 	return __vlan_del(pv, vid);
 }
-- 
cgit v1.2.3


From ac4c8868837a7c70ebb3eaf2298324a3b61b214e Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Fri, 7 Feb 2014 16:48:26 +0900
Subject: bridge: Prevent possible race condition in br_fdb_change_mac_address

br_fdb_change_mac_address() calls fdb_insert()/fdb_delete() without
br->hash_lock.

These hash list updates are racy with br_fdb_update()/br_fdb_cleanup().

Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Acked-by: Vlad Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 0426dff4b3fd..9203d5a1943f 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -191,6 +191,8 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
 	struct net_port_vlans *pv;
 	u16 vid = 0;
 
+	spin_lock_bh(&br->hash_lock);
+
 	/* If old entry was unassociated with any port, then delete it. */
 	f = __br_fdb_get(br, br->dev->dev_addr, 0);
 	if (f && f->is_local && !f->dst)
@@ -204,7 +206,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
 	 */
 	pv = br_get_vlan_info(br);
 	if (!pv)
-		return;
+		goto out;
 
 	for_each_set_bit_from(vid, pv->vlan_bitmap, VLAN_N_VID) {
 		f = __br_fdb_get(br, br->dev->dev_addr, vid);
@@ -212,6 +214,8 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
 			fdb_delete_local(br, NULL, f);
 		fdb_insert(br, NULL, newaddr, vid);
 	}
+out:
+	spin_unlock_bh(&br->hash_lock);
 }
 
 void br_fdb_cleanup(unsigned long _data)
-- 
cgit v1.2.3


From bf06200e732de613a1277984bf34d1a21c2de03d Mon Sep 17 00:00:00 2001
From: John Ogness <john.ogness@linutronix.de>
Date: Sun, 9 Feb 2014 18:40:11 -0800
Subject: tcp: tsq: fix nonagle handling

Commit 46d3ceabd8d9 ("tcp: TCP Small Queues") introduced a possible
regression for applications using TCP_NODELAY.

If TCP session is throttled because of tsq, we should consult
tp->nonagle when TX completion is done and allow us to send additional
segment, especially if this segment is not a full MSS.
Otherwise this segment is sent after an RTO.

[edumazet] : Cooked the changelog, added another fix about testing
sk_wmem_alloc twice because TX completion can happen right before
setting TSQ_THROTTLED bit.

This problem is particularly visible with recent auto corking,
but might also be triggered with low tcp_limit_output_bytes
values or NIC drivers delaying TX completion by hundred of usec,
and very low rtt.

Thomas Glanzmann for example reported an iscsi regression, caused
by tcp auto corking making this bug quite visible.

Fixes: 46d3ceabd8d9 ("tcp: TCP Small Queues")
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Thomas Glanzmann <thomas@glanzmann.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 10435b3b9d0f..3be16727f058 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -698,7 +698,8 @@ static void tcp_tsq_handler(struct sock *sk)
 	if ((1 << sk->sk_state) &
 	    (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
 	     TCPF_CLOSE_WAIT  | TCPF_LAST_ACK))
-		tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC);
+		tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle,
+			       0, GFP_ATOMIC);
 }
 /*
  * One tasklet per cpu tries to send more skbs.
@@ -1904,7 +1905,15 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 
 		if (atomic_read(&sk->sk_wmem_alloc) > limit) {
 			set_bit(TSQ_THROTTLED, &tp->tsq_flags);
-			break;
+			/* It is possible TX completion already happened
+			 * before we set TSQ_THROTTLED, so we must
+			 * test again the condition.
+			 * We abuse smp_mb__after_clear_bit() because
+			 * there is no smp_mb__after_set_bit() yet
+			 */
+			smp_mb__after_clear_bit();
+			if (atomic_read(&sk->sk_wmem_alloc) > limit)
+				break;
 		}
 
 		limit = mss_now;
-- 
cgit v1.2.3


From b6f52ae2f0d32387bde2b89883e3b64d88b9bfe8 Mon Sep 17 00:00:00 2001
From: Richard Yao <ryao@gentoo.org>
Date: Sat, 8 Feb 2014 19:32:01 -0500
Subject: 9p/trans_virtio.c: Fix broken zero-copy on vmalloc() buffers

The 9p-virtio transport does zero copy on things larger than 1024 bytes
in size. It accomplishes this by returning the physical addresses of
pages to the virtio-pci device. At present, the translation is usually a
bit shift.

That approach produces an invalid page address when we read/write to
vmalloc buffers, such as those used for Linux kernel modules. Any
attempt to load a Linux kernel module from 9p-virtio produces the
following stack.

[<ffffffff814878ce>] p9_virtio_zc_request+0x45e/0x510
[<ffffffff814814ed>] p9_client_zc_rpc.constprop.16+0xfd/0x4f0
[<ffffffff814839dd>] p9_client_read+0x15d/0x240
[<ffffffff811c8440>] v9fs_fid_readn+0x50/0xa0
[<ffffffff811c84a0>] v9fs_file_readn+0x10/0x20
[<ffffffff811c84e7>] v9fs_file_read+0x37/0x70
[<ffffffff8114e3fb>] vfs_read+0x9b/0x160
[<ffffffff81153571>] kernel_read+0x41/0x60
[<ffffffff810c83ab>] copy_module_from_fd.isra.34+0xfb/0x180

Subsequently, QEMU will die printing:

qemu-system-x86_64: virtio: trying to map MMIO memory

This patch enables 9p-virtio to correctly handle this case. This not
only enables us to load Linux kernel modules off virtfs, but also
enables ZFS file-based vdevs on virtfs to be used without killing QEMU.

Special thanks to both Avi Kivity and Alexander Graf for their
interpretation of QEMU backtraces. Without their guidence, tracking down
this bug would have taken much longer. Also, special thanks to Linus
Torvalds for his insightful explanation of why this should use
is_vmalloc_addr() instead of is_vmalloc_or_module_addr():

https://lkml.org/lkml/2014/2/8/272

Signed-off-by: Richard Yao <ryao@gentoo.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/9p/trans_virtio.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index cd1e1ede73a4..ac2666c1d011 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -340,7 +340,10 @@ static int p9_get_mapped_pages(struct virtio_chan *chan,
 		int count = nr_pages;
 		while (nr_pages) {
 			s = rest_of_page(data);
-			pages[index++] = kmap_to_page(data);
+			if (is_vmalloc_addr(data))
+				pages[index++] = vmalloc_to_page(data);
+			else
+				pages[index++] = kmap_to_page(data);
 			data += s;
 			nr_pages--;
 		}
-- 
cgit v1.2.3


From 20e7c4e80dcd01dad5e6c8b32455228b8fe9c619 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 10 Feb 2014 11:42:35 -0800
Subject: 6lowpan: fix lockdep splats

When a device ndo_start_xmit() calls again dev_queue_xmit(),
lockdep can complain because dev_queue_xmit() is re-entered and the
spinlocks protecting tx queues share a common lockdep class.

Same issue was fixed for bonding/l2tp/ppp in commits

0daa2303028a6 ("[PATCH] bonding: lockdep annotation")
49ee49202b4ac ("bonding: set qdisc_tx_busylock to avoid LOCKDEP splat")
23d3b8bfb8eb2 ("net: qdisc busylock needs lockdep annotations ")
303c07db487be ("ppp: set qdisc_tx_busylock to avoid LOCKDEP splat ")

Reported-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Tested-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ieee802154/6lowpan.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'net')

diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
index 8bfb40153fe7..8edfea5da572 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan.c
@@ -530,7 +530,27 @@ static struct header_ops lowpan_header_ops = {
 	.create	= lowpan_header_create,
 };
 
+static struct lock_class_key lowpan_tx_busylock;
+static struct lock_class_key lowpan_netdev_xmit_lock_key;
+
+static void lowpan_set_lockdep_class_one(struct net_device *dev,
+					 struct netdev_queue *txq,
+					 void *_unused)
+{
+	lockdep_set_class(&txq->_xmit_lock,
+			  &lowpan_netdev_xmit_lock_key);
+}
+
+
+static int lowpan_dev_init(struct net_device *dev)
+{
+	netdev_for_each_tx_queue(dev, lowpan_set_lockdep_class_one, NULL);
+	dev->qdisc_tx_busylock = &lowpan_tx_busylock;
+	return 0;
+}
+
 static const struct net_device_ops lowpan_netdev_ops = {
+	.ndo_init		= lowpan_dev_init,
 	.ndo_start_xmit		= lowpan_xmit,
 	.ndo_set_mac_address	= lowpan_set_address,
 };
-- 
cgit v1.2.3