From 64380a04deeed4720de76b086a3a4eab8dd41671 Mon Sep 17 00:00:00 2001
From: Erik Hugne <erik.hugne@ericsson.com>
Date: Tue, 11 Feb 2014 11:38:26 +0100
Subject: tipc: fix message corruption bug for deferred packets

If a packet received on a link is out-of-sequence, it will be
placed on a deferred queue and later reinserted in the receive
path once the preceding packets have been processed. The problem
with this is that it will be subject to the buffer adjustment from
link_recv_buf_validate twice. The second adjustment for 20 bytes
header space will corrupt the packet.

We solve this by tagging the deferred packets and bail out from
receive buffer validation for packets that have already been
subjected to this.

Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/core.h | 1 +
 net/tipc/link.c | 7 +++++++
 2 files changed, 8 insertions(+)

(limited to 'net')

diff --git a/net/tipc/core.h b/net/tipc/core.h
index 1ff477b0450d..5569d96b4da3 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -192,6 +192,7 @@ static inline void k_term_timer(struct timer_list *timer)
 
 struct tipc_skb_cb {
 	void *handle;
+	bool deferred;
 };
 
 #define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0]))
diff --git a/net/tipc/link.c b/net/tipc/link.c
index d4b5de41b682..da6018beb6eb 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1391,6 +1391,12 @@ static int link_recv_buf_validate(struct sk_buff *buf)
 	u32 hdr_size;
 	u32 min_hdr_size;
 
+	/* If this packet comes from the defer queue, the skb has already
+	 * been validated
+	 */
+	if (unlikely(TIPC_SKB_CB(buf)->deferred))
+		return 1;
+
 	if (unlikely(buf->len < MIN_H_SIZE))
 		return 0;
 
@@ -1703,6 +1709,7 @@ static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr,
 				&l_ptr->newest_deferred_in, buf)) {
 		l_ptr->deferred_inqueue_sz++;
 		l_ptr->stats.deferred_recv++;
+		TIPC_SKB_CB(buf)->deferred = true;
 		if ((l_ptr->deferred_inqueue_sz % 16) == 1)
 			tipc_link_send_proto_msg(l_ptr, STATE_MSG, 0, 0, 0, 0, 0);
 	} else
-- 
cgit v1.2.3


From 0e0eee2465df77bcec2e8ff75432b8e57897b143 Mon Sep 17 00:00:00 2001
From: Cong Wang <cwang@twopensource.com>
Date: Tue, 11 Feb 2014 15:51:30 -0800
Subject: net: correct error path in rtnl_newlink()

I saw the following BUG when ->newlink() fails in rtnl_newlink():

[   40.240058] kernel BUG at net/core/dev.c:6438!

this is due to free_netdev() is not supposed to be called before
netdev is completely unregistered, therefore it is not correct
to call free_netdev() here, at least for ops->newlink!=NULL case,
many drivers call it in ->destructor so that rtnl_unlock() will
take care of it, we probably don't need to do anything here.

Cc: David S. Miller <davem@davemloft.net>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Cong Wang <cwang@twopensource.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 048dc8d183aa..1a0dac2ef9ad 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1963,16 +1963,21 @@ replay:
 
 		dev->ifindex = ifm->ifi_index;
 
-		if (ops->newlink)
+		if (ops->newlink) {
 			err = ops->newlink(net, dev, tb, data);
-		else
+			/* Drivers should call free_netdev() in ->destructor
+			 * and unregister it on failure so that device could be
+			 * finally freed in rtnl_unlock.
+			 */
+			if (err < 0)
+				goto out;
+		} else {
 			err = register_netdevice(dev);
-
-		if (err < 0) {
-			free_netdev(dev);
-			goto out;
+			if (err < 0) {
+				free_netdev(dev);
+				goto out;
+			}
 		}
-
 		err = rtnl_configure_link(dev, ifm);
 		if (err < 0)
 			unregister_netdevice(dev);
-- 
cgit v1.2.3


From 22a1f5140ed69baa5906ce9b2b9143478f5a4da0 Mon Sep 17 00:00:00 2001
From: wangweidong <wangweidong1@huawei.com>
Date: Wed, 12 Feb 2014 09:44:43 +0800
Subject: sctp: fix a missed .data initialization

As commit 3c68198e75111a90("sctp: Make hmac algorithm selection for
 cookie generation dynamic"), we miss the .data initialization.
If we don't use the net_namespace, the problem that parts of the
sysctl configuration won't be isolation and won't occur.

In sctp_sysctl_net_register(), we register the sysctl for each
net, in the for(), we use the 'table[i].data' as check condition, so
when the 'i' is the index of sctp_hmac_alg, the data is NULL, then
break. So add the .data initialization.

Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Wang Weidong <wangweidong1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sysctl.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 7135e617ab0f..d354de5a6fe0 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -151,6 +151,7 @@ static struct ctl_table sctp_net_table[] = {
 	},
 	{
 		.procname	= "cookie_hmac_alg",
+		.data		= &init_net.sctp.sctp_hmac_alg,
 		.maxlen		= 8,
 		.mode		= 0644,
 		.proc_handler	= proc_sctp_do_hmac_alg,
-- 
cgit v1.2.3


From efb842c45e667332774196bd5a1d539d048159cc Mon Sep 17 00:00:00 2001
From: wangweidong <wangweidong1@huawei.com>
Date: Wed, 12 Feb 2014 09:44:44 +0800
Subject: sctp: optimize the sctp_sysctl_net_register

Here, when the net is init_net, we needn't to kmemdup the ctl_table
again. So add a check for net. Also we can save some memory.

Signed-off-by: Wang Weidong <wangweidong1@huawei.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sysctl.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index d354de5a6fe0..35c8923b5554 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -402,15 +402,18 @@ static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write,
 
 int sctp_sysctl_net_register(struct net *net)
 {
-	struct ctl_table *table;
-	int i;
+	struct ctl_table *table = sctp_net_table;
+
+	if (!net_eq(net, &init_net)) {
+		int i;
 
-	table = kmemdup(sctp_net_table, sizeof(sctp_net_table), GFP_KERNEL);
-	if (!table)
-		return -ENOMEM;
+		table = kmemdup(sctp_net_table, sizeof(sctp_net_table), GFP_KERNEL);
+		if (!table)
+			return -ENOMEM;
 
-	for (i = 0; table[i].data; i++)
-		table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp;
+		for (i = 0; table[i].data; i++)
+			table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp;
+	}
 
 	net->sctp.sysctl_header = register_net_sysctl(net, "net/sctp", table);
 	return 0;
-- 
cgit v1.2.3


From d206940319c41df4299db75ed56142177bb2e5f6 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 13 Feb 2014 23:09:11 +0100
Subject: net: core: introduce netif_skb_dev_features

Will be used by upcoming ipv4 forward path change that needs to
determine feature mask using skb->dst->dev instead of skb->dev.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  7 ++++++-
 net/core/dev.c            | 22 ++++++++++++----------
 2 files changed, 18 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 440a02ee6f92..21d4e6be8949 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3068,7 +3068,12 @@ void netdev_change_features(struct net_device *dev);
 void netif_stacked_transfer_operstate(const struct net_device *rootdev,
 					struct net_device *dev);
 
-netdev_features_t netif_skb_features(struct sk_buff *skb);
+netdev_features_t netif_skb_dev_features(struct sk_buff *skb,
+					 const struct net_device *dev);
+static inline netdev_features_t netif_skb_features(struct sk_buff *skb)
+{
+	return netif_skb_dev_features(skb, skb->dev);
+}
 
 static inline bool net_gso_ok(netdev_features_t features, int gso_type)
 {
diff --git a/net/core/dev.c b/net/core/dev.c
index 4ad1b78c9c77..b1b0c8d4d7df 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2420,7 +2420,7 @@ EXPORT_SYMBOL(netdev_rx_csum_fault);
  * 2. No high memory really exists on this machine.
  */
 
-static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
+static int illegal_highdma(const struct net_device *dev, struct sk_buff *skb)
 {
 #ifdef CONFIG_HIGHMEM
 	int i;
@@ -2495,34 +2495,36 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
 }
 
 static netdev_features_t harmonize_features(struct sk_buff *skb,
-	netdev_features_t features)
+					    const struct net_device *dev,
+					    netdev_features_t features)
 {
 	if (skb->ip_summed != CHECKSUM_NONE &&
 	    !can_checksum_protocol(features, skb_network_protocol(skb))) {
 		features &= ~NETIF_F_ALL_CSUM;
-	} else if (illegal_highdma(skb->dev, skb)) {
+	} else if (illegal_highdma(dev, skb)) {
 		features &= ~NETIF_F_SG;
 	}
 
 	return features;
 }
 
-netdev_features_t netif_skb_features(struct sk_buff *skb)
+netdev_features_t netif_skb_dev_features(struct sk_buff *skb,
+					 const struct net_device *dev)
 {
 	__be16 protocol = skb->protocol;
-	netdev_features_t features = skb->dev->features;
+	netdev_features_t features = dev->features;
 
-	if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
+	if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs)
 		features &= ~NETIF_F_GSO_MASK;
 
 	if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
 		protocol = veh->h_vlan_encapsulated_proto;
 	} else if (!vlan_tx_tag_present(skb)) {
-		return harmonize_features(skb, features);
+		return harmonize_features(skb, dev, features);
 	}
 
-	features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
+	features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
 					       NETIF_F_HW_VLAN_STAG_TX);
 
 	if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))
@@ -2530,9 +2532,9 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
 				NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
 				NETIF_F_HW_VLAN_STAG_TX;
 
-	return harmonize_features(skb, features);
+	return harmonize_features(skb, dev, features);
 }
-EXPORT_SYMBOL(netif_skb_features);
+EXPORT_SYMBOL(netif_skb_dev_features);
 
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			struct netdev_queue *txq)
-- 
cgit v1.2.3


From fe6cc55f3a9a053482a76f5a6b2257cee51b4663 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 13 Feb 2014 23:09:12 +0100
Subject: net: ip, ipv6: handle gso skbs in forwarding path

Marcelo Ricardo Leitner reported problems when the forwarding link path
has a lower mtu than the incoming one if the inbound interface supports GRO.

Given:
Host <mtu1500> R1 <mtu1200> R2

Host sends tcp stream which is routed via R1 and R2.  R1 performs GRO.

In this case, the kernel will fail to send ICMP fragmentation needed
messages (or pkt too big for ipv6), as GSO packets currently bypass dstmtu
checks in forward path. Instead, Linux tries to send out packets exceeding
the mtu.

When locking route MTU on Host (i.e., no ipv4 DF bit set), R1 does
not fragment the packets when forwarding, and again tries to send out
packets exceeding R1-R2 link mtu.

This alters the forwarding dstmtu checks to take the individual gso
segment lengths into account.

For ipv6, we send out pkt too big error for gso if the individual
segments are too big.

For ipv4, we either send icmp fragmentation needed, or, if the DF bit
is not set, perform software segmentation and let the output path
create fragments when the packet is leaving the machine.
It is not 100% correct as the error message will contain the headers of
the GRO skb instead of the original/segmented one, but it seems to
work fine in my (limited) tests.

Eric Dumazet suggested to simply shrink mss via ->gso_size to avoid
sofware segmentation.

However it turns out that skb_segment() assumes skb nr_frags is related
to mss size so we would BUG there.  I don't want to mess with it considering
Herbert and Eric disagree on what the correct behavior should be.

Hannes Frederic Sowa notes that when we would shrink gso_size
skb_segment would then also need to deal with the case where
SKB_MAX_FRAGS would be exceeded.

This uses sofware segmentation in the forward path when we hit ipv4
non-DF packets and the outgoing link mtu is too small.  Its not perfect,
but given the lack of bug reports wrt. GRO fwd being broken this is a
rare case anyway.  Also its not like this could not be improved later
once the dust settles.

Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Reported-by: Marcelo Ricardo Leitner <mleitner@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 17 ++++++++++++
 net/ipv4/ip_forward.c  | 71 ++++++++++++++++++++++++++++++++++++++++++++++++--
 net/ipv6/ip6_output.c  | 17 ++++++++++--
 3 files changed, 101 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f589c9af8cbf..3ebbbe7b6d05 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2916,5 +2916,22 @@ static inline bool skb_head_is_locked(const struct sk_buff *skb)
 {
 	return !skb->head_frag || skb_cloned(skb);
 }
+
+/**
+ * skb_gso_network_seglen - Return length of individual segments of a gso packet
+ *
+ * @skb: GSO skb
+ *
+ * skb_gso_network_seglen is used to determine the real size of the
+ * individual segments, including Layer3 (IP, IPv6) and L4 headers (TCP/UDP).
+ *
+ * The MAC/L2 header is not accounted for.
+ */
+static inline unsigned int skb_gso_network_seglen(const struct sk_buff *skb)
+{
+	unsigned int hdr_len = skb_transport_header(skb) -
+			       skb_network_header(skb);
+	return hdr_len + skb_gso_transport_seglen(skb);
+}
 #endif	/* __KERNEL__ */
 #endif	/* _LINUX_SKBUFF_H */
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index e9f1217a8afd..f3869c186d97 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -39,6 +39,71 @@
 #include <net/route.h>
 #include <net/xfrm.h>
 
+static bool ip_may_fragment(const struct sk_buff *skb)
+{
+	return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) ||
+	       !skb->local_df;
+}
+
+static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+{
+	if (skb->len <= mtu || skb->local_df)
+		return false;
+
+	if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
+		return false;
+
+	return true;
+}
+
+static bool ip_gso_exceeds_dst_mtu(const struct sk_buff *skb)
+{
+	unsigned int mtu;
+
+	if (skb->local_df || !skb_is_gso(skb))
+		return false;
+
+	mtu = ip_dst_mtu_maybe_forward(skb_dst(skb), true);
+
+	/* if seglen > mtu, do software segmentation for IP fragmentation on
+	 * output.  DF bit cannot be set since ip_forward would have sent
+	 * icmp error.
+	 */
+	return skb_gso_network_seglen(skb) > mtu;
+}
+
+/* called if GSO skb needs to be fragmented on forward */
+static int ip_forward_finish_gso(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb_dst(skb);
+	netdev_features_t features;
+	struct sk_buff *segs;
+	int ret = 0;
+
+	features = netif_skb_dev_features(skb, dst->dev);
+	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
+	if (IS_ERR(segs)) {
+		kfree_skb(skb);
+		return -ENOMEM;
+	}
+
+	consume_skb(skb);
+
+	do {
+		struct sk_buff *nskb = segs->next;
+		int err;
+
+		segs->next = NULL;
+		err = dst_output(segs);
+
+		if (err && ret == 0)
+			ret = err;
+		segs = nskb;
+	} while (segs);
+
+	return ret;
+}
+
 static int ip_forward_finish(struct sk_buff *skb)
 {
 	struct ip_options *opt	= &(IPCB(skb)->opt);
@@ -49,6 +114,9 @@ static int ip_forward_finish(struct sk_buff *skb)
 	if (unlikely(opt->optlen))
 		ip_forward_options(skb);
 
+	if (ip_gso_exceeds_dst_mtu(skb))
+		return ip_forward_finish_gso(skb);
+
 	return dst_output(skb);
 }
 
@@ -91,8 +159,7 @@ int ip_forward(struct sk_buff *skb)
 
 	IPCB(skb)->flags |= IPSKB_FORWARDED;
 	mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
-	if (unlikely(skb->len > mtu && !skb_is_gso(skb) &&
-		     (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) {
+	if (!ip_may_fragment(skb) && ip_exceeds_mtu(skb, mtu)) {
 		IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS);
 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
 			  htonl(mtu));
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index ef02b26ccf81..070a2fae2375 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -342,6 +342,20 @@ static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
 	return mtu;
 }
 
+static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
+{
+	if (skb->len <= mtu || skb->local_df)
+		return false;
+
+	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
+		return true;
+
+	if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
+		return false;
+
+	return true;
+}
+
 int ip6_forward(struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
@@ -466,8 +480,7 @@ int ip6_forward(struct sk_buff *skb)
 	if (mtu < IPV6_MIN_MTU)
 		mtu = IPV6_MIN_MTU;
 
-	if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) ||
-	    (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) {
+	if (ip6_pkt_too_big(skb, mtu)) {
 		/* Again, force OUTPUT device used as source address */
 		skb->dev = dst->dev;
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
-- 
cgit v1.2.3


From 219e288e8900fac65211e0a23e2a1037fd521af1 Mon Sep 17 00:00:00 2001
From: Vijay Subramanian <vijaynsu@cisco.com>
Date: Wed, 12 Feb 2014 18:58:21 -0800
Subject: net: sched: Cleanup PIE comments

Fix incorrect comment reported by Norbert Kiesel. Edit another comment to add
more details. Also add references to algorithm (IETF draft and paper) to top of
file.

Signed-off-by: Vijay Subramanian <subramanian.vijay@gmail.com>
CC: Mythili Prabhu <mysuryan@cisco.com>
CC: Norbert Kiesel <nkiesel@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_pie.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index a255d0200a59..fefeeb73f15f 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -15,6 +15,11 @@
  *
  * ECN support is added by Naeem Khademi <naeemk@ifi.uio.no>
  * University of Oslo, Norway.
+ *
+ * References:
+ * IETF draft submission: http://tools.ietf.org/html/draft-pan-aqm-pie-00
+ * IEEE  Conference on High Performance Switching and Routing 2013 :
+ * "PIE: A * Lightweight Control Scheme to Address the Bufferbloat Problem"
  */
 
 #include <linux/module.h>
@@ -36,7 +41,7 @@ struct pie_params {
 	psched_time_t target;	/* user specified target delay in pschedtime */
 	u32 tupdate;		/* timer frequency (in jiffies) */
 	u32 limit;		/* number of packets that can be enqueued */
-	u32 alpha;		/* alpha and beta are between -4 and 4 */
+	u32 alpha;		/* alpha and beta are between 0 and 32 */
 	u32 beta;		/* and are used for shift relative to 1 */
 	bool ecn;		/* true if ecn is enabled */
 	bool bytemode;		/* to scale drop early prob based on pkt size */
@@ -326,10 +331,16 @@ static void calculate_probability(struct Qdisc *sch)
 	if (qdelay == 0 && qlen != 0)
 		update_prob = false;
 
-	/* Add ranges for alpha and beta, more aggressive for high dropping
-	 * mode and gentle steps for light dropping mode
-	 * In light dropping mode, take gentle steps; in medium dropping mode,
-	 * take medium steps; in high dropping mode, take big steps.
+	/* In the algorithm, alpha and beta are between 0 and 2 with typical
+	 * value for alpha as 0.125. In this implementation, we use values 0-32
+	 * passed from user space to represent this. Also, alpha and beta have
+	 * unit of HZ and need to be scaled before they can used to update
+	 * probability. alpha/beta are updated locally below by 1) scaling them
+	 * appropriately 2) scaling down by 16 to come to 0-2 range.
+	 * Please see paper for details.
+	 *
+	 * We scale alpha and beta differently depending on whether we are in
+	 * light, medium or high dropping mode.
 	 */
 	if (q->vars.prob < MAX_PROB / 100) {
 		alpha =
-- 
cgit v1.2.3


From 357137a4222b1984b3d867923c7e388669744ceb Mon Sep 17 00:00:00 2001
From: FX Le Bail <fx.lebail@yahoo.com>
Date: Mon, 10 Feb 2014 16:46:54 +0100
Subject: ipv4: ipconfig.c: add parentheses in an if statement

Even if the 'time_before' macro expand with parentheses, the look is bad.

Signed-off-by: Francois-Xavier Le Bail <fx.lebail@yahoo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipconfig.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index efa1138fa523..b3e86ea7b71b 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -273,7 +273,7 @@ static int __init ic_open_devs(void)
 
 		msleep(1);
 
-		if time_before(jiffies, next_msg)
+		if (time_before(jiffies, next_msg))
 			continue;
 
 		elapsed = jiffies_to_msecs(jiffies - start);
-- 
cgit v1.2.3


From 09db30805300e9ed5ad43d4d339115cf1d9c84e1 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 13 Feb 2014 19:02:33 -0700
Subject: dccp: re-enable debug macro

dccp tfrc: revert

This reverts 6aee49c558de ("dccp: make local variable static") since
the variable tfrc_debug is referenced by the tfrc_pr_debug(fmt, ...)
macro when TFRC debugging is enabled. If it is enabled, use of the
macro produces a compilation error.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/lib/tfrc.c | 2 +-
 net/dccp/ccids/lib/tfrc.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c
index c073b81a1f3e..62b5828acde0 100644
--- a/net/dccp/ccids/lib/tfrc.c
+++ b/net/dccp/ccids/lib/tfrc.c
@@ -8,7 +8,7 @@
 #include "tfrc.h"
 
 #ifdef CONFIG_IP_DCCP_TFRC_DEBUG
-static bool tfrc_debug;
+bool tfrc_debug;
 module_param(tfrc_debug, bool, 0644);
 MODULE_PARM_DESC(tfrc_debug, "Enable TFRC debug messages");
 #endif
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index a3d8f7c76ae0..40ee7d62b652 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -21,6 +21,7 @@
 #include "packet_history.h"
 
 #ifdef CONFIG_IP_DCCP_TFRC_DEBUG
+extern bool tfrc_debug;
 #define tfrc_pr_debug(format, a...)	DCCP_PR_DEBUG(tfrc_debug, format, ##a)
 #else
 #define tfrc_pr_debug(format, a...)
-- 
cgit v1.2.3


From cd0f0b95fd2cd2b716caf5f15db73ab76992789b Mon Sep 17 00:00:00 2001
From: Duan Jiong <duanj.fnst@cn.fujitsu.com>
Date: Fri, 14 Feb 2014 18:26:22 +0800
Subject: ipv4: distinguish EHOSTUNREACH from the ENETUNREACH

since commit 251da413("ipv4: Cache ip_error() routes even when not forwarding."),
the counter IPSTATS_MIB_INADDRERRORS can't work correctly, because the value of
err was always set to ENETUNREACH.

Signed-off-by: Duan Jiong <duanj.fnst@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 25071b48921c..6f6dd85bdffc 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1695,8 +1695,11 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	fl4.daddr = daddr;
 	fl4.saddr = saddr;
 	err = fib_lookup(net, &fl4, &res);
-	if (err != 0)
+	if (err != 0) {
+		if (!IN_DEV_FORWARD(in_dev))
+			err = -EHOSTUNREACH;
 		goto no_route;
+	}
 
 	RT_CACHE_STAT_INC(in_slow_tot);
 
@@ -1712,8 +1715,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		goto local_input;
 	}
 
-	if (!IN_DEV_FORWARD(in_dev))
+	if (!IN_DEV_FORWARD(in_dev)) {
+		err = -EHOSTUNREACH;
 		goto no_route;
+	}
 	if (res.type != RTN_UNICAST)
 		goto martian_destination;
 
-- 
cgit v1.2.3


From ef2820a735f74ea60335f8ba3801b844f0cb184d Mon Sep 17 00:00:00 2001
From: Matija Glavinic Pecotic <matija.glavinic-pecotic.ext@nsn.com>
Date: Fri, 14 Feb 2014 14:51:18 +0100
Subject: net: sctp: Fix a_rwnd/rwnd management to reflect real state of the
 receiver's buffer

Implementation of (a)rwnd calculation might lead to severe performance issues
and associations completely stalling. These problems are described and solution
is proposed which improves lksctp's robustness in congestion state.

1) Sudden drop of a_rwnd and incomplete window recovery afterwards

Data accounted in sctp_assoc_rwnd_decrease takes only payload size (sctp data),
but size of sk_buff, which is blamed against receiver buffer, is not accounted
in rwnd. Theoretically, this should not be the problem as actual size of buffer
is double the amount requested on the socket (SO_RECVBUF). Problem here is
that this will have bad scaling for data which is less then sizeof sk_buff.
E.g. in 4G (LTE) networks, link interfacing radio side will have a large portion
of traffic of this size (less then 100B).

An example of sudden drop and incomplete window recovery is given below. Node B
exhibits problematic behavior. Node A initiates association and B is configured
to advertise rwnd of 10000. A sends messages of size 43B (size of typical sctp
message in 4G (LTE) network). On B data is left in buffer by not reading socket
in userspace.

Lets examine when we will hit pressure state and declare rwnd to be 0 for
scenario with above stated parameters (rwnd == 10000, chunk size == 43, each
chunk is sent in separate sctp packet)

Logic is implemented in sctp_assoc_rwnd_decrease:

socket_buffer (see below) is maximum size which can be held in socket buffer
(sk_rcvbuf). current_alloced is amount of data currently allocated (rx_count)

A simple expression is given for which it will be examined after how many
packets for above stated parameters we enter pressure state:

We start by condition which has to be met in order to enter pressure state:

	socket_buffer < currently_alloced;

currently_alloced is represented as size of sctp packets received so far and not
yet delivered to userspace. x is the number of chunks/packets (since there is no
bundling, and each chunk is delivered in separate packet, we can observe each
chunk also as sctp packet, and what is important here, having its own sk_buff):

	socket_buffer < x*each_sctp_packet;

each_sctp_packet is sctp chunk size + sizeof(struct sk_buff). socket_buffer is
twice the amount of initially requested size of socket buffer, which is in case
of sctp, twice the a_rwnd requested:

	2*rwnd < x*(payload+sizeof(struc sk_buff));

sizeof(struct sk_buff) is 190 (3.13.0-rc4+). Above is stated that rwnd is 10000
and each payload size is 43

	20000 < x(43+190);

	x > 20000/233;

	x ~> 84;

After ~84 messages, pressure state is entered and 0 rwnd is advertised while
received 84*43B ~= 3612B sctp data. This is why external observer notices sudden
drop from 6474 to 0, as it will be now shown in example:

IP A.34340 > B.12345: sctp (1) [INIT] [init tag: 1875509148] [rwnd: 81920] [OS: 10] [MIS: 65535] [init TSN: 1096057017]
IP B.12345 > A.34340: sctp (1) [INIT ACK] [init tag: 3198966556] [rwnd: 10000] [OS: 10] [MIS: 10] [init TSN: 902132839]
IP A.34340 > B.12345: sctp (1) [COOKIE ECHO]
IP B.12345 > A.34340: sctp (1) [COOKIE ACK]
IP A.34340 > B.12345: sctp (1) [DATA] (B)(E) [TSN: 1096057017] [SID: 0] [SSEQ 0] [PPID 0x18]
IP B.12345 > A.34340: sctp (1) [SACK] [cum ack 1096057017] [a_rwnd 9957] [#gap acks 0] [#dup tsns 0]
IP A.34340 > B.12345: sctp (1) [DATA] (B)(E) [TSN: 1096057018] [SID: 0] [SSEQ 1] [PPID 0x18]
IP B.12345 > A.34340: sctp (1) [SACK] [cum ack 1096057018] [a_rwnd 9957] [#gap acks 0] [#dup tsns 0]
IP A.34340 > B.12345: sctp (1) [DATA] (B)(E) [TSN: 1096057019] [SID: 0] [SSEQ 2] [PPID 0x18]
IP B.12345 > A.34340: sctp (1) [SACK] [cum ack 1096057019] [a_rwnd 9914] [#gap acks 0] [#dup tsns 0]
<...>
IP A.34340 > B.12345: sctp (1) [DATA] (B)(E) [TSN: 1096057098] [SID: 0] [SSEQ 81] [PPID 0x18]
IP B.12345 > A.34340: sctp (1) [SACK] [cum ack 1096057098] [a_rwnd 6517] [#gap acks 0] [#dup tsns 0]
IP A.34340 > B.12345: sctp (1) [DATA] (B)(E) [TSN: 1096057099] [SID: 0] [SSEQ 82] [PPID 0x18]
IP B.12345 > A.34340: sctp (1) [SACK] [cum ack 1096057099] [a_rwnd 6474] [#gap acks 0] [#dup tsns 0]
IP A.34340 > B.12345: sctp (1) [DATA] (B)(E) [TSN: 1096057100] [SID: 0] [SSEQ 83] [PPID 0x18]

--> Sudden drop

IP B.12345 > A.34340: sctp (1) [SACK] [cum ack 1096057100] [a_rwnd 0] [#gap acks 0] [#dup tsns 0]

At this point, rwnd_press stores current rwnd value so it can be later restored
in sctp_assoc_rwnd_increase. This however doesn't happen as condition to start
slowly increasing rwnd until rwnd_press is returned to rwnd is never met. This
condition is not met since rwnd, after it hit 0, must first reach rwnd_press by
adding amount which is read from userspace. Let us observe values in above
example. Initial a_rwnd is 10000, pressure was hit when rwnd was ~6500 and the
amount of actual sctp data currently waiting to be delivered to userspace
is ~3500. When userspace starts to read, sctp_assoc_rwnd_increase will be blamed
only for sctp data, which is ~3500. Condition is never met, and when userspace
reads all data, rwnd stays on 3569.

IP B.12345 > A.34340: sctp (1) [SACK] [cum ack 1096057100] [a_rwnd 1505] [#gap acks 0] [#dup tsns 0]
IP B.12345 > A.34340: sctp (1) [SACK] [cum ack 1096057100] [a_rwnd 3010] [#gap acks 0] [#dup tsns 0]
IP A.34340 > B.12345: sctp (1) [DATA] (B)(E) [TSN: 1096057101] [SID: 0] [SSEQ 84] [PPID 0x18]
IP B.12345 > A.34340: sctp (1) [SACK] [cum ack 1096057101] [a_rwnd 3569] [#gap acks 0] [#dup tsns 0]

--> At this point userspace read everything, rwnd recovered only to 3569

IP A.34340 > B.12345: sctp (1) [DATA] (B)(E) [TSN: 1096057102] [SID: 0] [SSEQ 85] [PPID 0x18]
IP B.12345 > A.34340: sctp (1) [SACK] [cum ack 1096057102] [a_rwnd 3569] [#gap acks 0] [#dup tsns 0]

Reproduction is straight forward, it is enough for sender to send packets of
size less then sizeof(struct sk_buff) and receiver keeping them in its buffers.

2) Minute size window for associations sharing the same socket buffer

In case multiple associations share the same socket, and same socket buffer
(sctp.rcvbuf_policy == 0), different scenarios exist in which congestion on one
of the associations can permanently drop rwnd of other association(s).

Situation will be typically observed as one association suddenly having rwnd
dropped to size of last packet received and never recovering beyond that point.
Different scenarios will lead to it, but all have in common that one of the
associations (let it be association from 1)) nearly depleted socket buffer, and
the other association blames socket buffer just for the amount enough to start
the pressure. This association will enter pressure state, set rwnd_press and
announce 0 rwnd.
When data is read by userspace, similar situation as in 1) will occur, rwnd will
increase just for the size read by userspace but rwnd_press will be high enough
so that association doesn't have enough credit to reach rwnd_press and restore
to previous state. This case is special case of 1), being worse as there is, in
the worst case, only one packet in buffer for which size rwnd will be increased.
Consequence is association which has very low maximum rwnd ('minute size', in
our case down to 43B - size of packet which caused pressure) and as such
unusable.

Scenario happened in the field and labs frequently after congestion state (link
breaks, different probabilities of packet drop, packet reordering) and with
scenario 1) preceding. Here is given a deterministic scenario for reproduction:

>From node A establish two associations on the same socket, with rcvbuf_policy
being set to share one common buffer (sctp.rcvbuf_policy == 0). On association 1
repeat scenario from 1), that is, bring it down to 0 and restore up. Observe
scenario 1). Use small payload size (here we use 43). Once rwnd is 'recovered',
bring it down close to 0, as in just one more packet would close it. This has as
a consequence that association number 2 is able to receive (at least) one more
packet which will bring it in pressure state. E.g. if association 2 had rwnd of
10000, packet received was 43, and we enter at this point into pressure,
rwnd_press will have 9957. Once payload is delivered to userspace, rwnd will
increase for 43, but conditions to restore rwnd to original state, just as in
1), will never be satisfied.

--> Association 1, between A.y and B.12345

IP A.55915 > B.12345: sctp (1) [INIT] [init tag: 836880897] [rwnd: 10000] [OS: 10] [MIS: 65535] [init TSN: 4032536569]
IP B.12345 > A.55915: sctp (1) [INIT ACK] [init tag: 2873310749] [rwnd: 81920] [OS: 10] [MIS: 10] [init TSN: 3799315613]
IP A.55915 > B.12345: sctp (1) [COOKIE ECHO]
IP B.12345 > A.55915: sctp (1) [COOKIE ACK]

--> Association 2, between A.z and B.12346

IP A.55915 > B.12346: sctp (1) [INIT] [init tag: 534798321] [rwnd: 10000] [OS: 10] [MIS: 65535] [init TSN: 2099285173]
IP B.12346 > A.55915: sctp (1) [INIT ACK] [init tag: 516668823] [rwnd: 81920] [OS: 10] [MIS: 10] [init TSN: 3676403240]
IP A.55915 > B.12346: sctp (1) [COOKIE ECHO]
IP B.12346 > A.55915: sctp (1) [COOKIE ACK]

--> Deplete socket buffer by sending messages of size 43B over association 1

IP B.12345 > A.55915: sctp (1) [DATA] (B)(E) [TSN: 3799315613] [SID: 0] [SSEQ 0] [PPID 0x18]
IP A.55915 > B.12345: sctp (1) [SACK] [cum ack 3799315613] [a_rwnd 9957] [#gap acks 0] [#dup tsns 0]

<...>

IP A.55915 > B.12345: sctp (1) [SACK] [cum ack 3799315696] [a_rwnd 6388] [#gap acks 0] [#dup tsns 0]
IP B.12345 > A.55915: sctp (1) [DATA] (B)(E) [TSN: 3799315697] [SID: 0] [SSEQ 84] [PPID 0x18]
IP A.55915 > B.12345: sctp (1) [SACK] [cum ack 3799315697] [a_rwnd 6345] [#gap acks 0] [#dup tsns 0]

--> Sudden drop on 1

IP B.12345 > A.55915: sctp (1) [DATA] (B)(E) [TSN: 3799315698] [SID: 0] [SSEQ 85] [PPID 0x18]
IP A.55915 > B.12345: sctp (1) [SACK] [cum ack 3799315698] [a_rwnd 0] [#gap acks 0] [#dup tsns 0]

--> Here userspace read, rwnd 'recovered' to 3698, now deplete again using
    association 1 so there is place in buffer for only one more packet

IP B.12345 > A.55915: sctp (1) [DATA] (B)(E) [TSN: 3799315799] [SID: 0] [SSEQ 186] [PPID 0x18]
IP A.55915 > B.12345: sctp (1) [SACK] [cum ack 3799315799] [a_rwnd 86] [#gap acks 0] [#dup tsns 0]
IP B.12345 > A.55915: sctp (1) [DATA] (B)(E) [TSN: 3799315800] [SID: 0] [SSEQ 187] [PPID 0x18]
IP A.55915 > B.12345: sctp (1) [SACK] [cum ack 3799315800] [a_rwnd 43] [#gap acks 0] [#dup tsns 0]

--> Socket buffer is almost depleted, but there is space for one more packet,
    send them over association 2, size 43B

IP B.12346 > A.55915: sctp (1) [DATA] (B)(E) [TSN: 3676403240] [SID: 0] [SSEQ 0] [PPID 0x18]
IP A.55915 > B.12346: sctp (1) [SACK] [cum ack 3676403240] [a_rwnd 0] [#gap acks 0] [#dup tsns 0]

--> Immediate drop

IP A.60995 > B.12346: sctp (1) [SACK] [cum ack 387491510] [a_rwnd 0] [#gap acks 0] [#dup tsns 0]

--> Read everything from the socket, both association recover up to maximum rwnd
    they are capable of reaching, note that association 1 recovered up to 3698,
    and association 2 recovered only to 43

IP A.55915 > B.12345: sctp (1) [SACK] [cum ack 3799315800] [a_rwnd 1548] [#gap acks 0] [#dup tsns 0]
IP A.55915 > B.12345: sctp (1) [SACK] [cum ack 3799315800] [a_rwnd 3053] [#gap acks 0] [#dup tsns 0]
IP B.12345 > A.55915: sctp (1) [DATA] (B)(E) [TSN: 3799315801] [SID: 0] [SSEQ 188] [PPID 0x18]
IP A.55915 > B.12345: sctp (1) [SACK] [cum ack 3799315801] [a_rwnd 3698] [#gap acks 0] [#dup tsns 0]
IP B.12346 > A.55915: sctp (1) [DATA] (B)(E) [TSN: 3676403241] [SID: 0] [SSEQ 1] [PPID 0x18]
IP A.55915 > B.12346: sctp (1) [SACK] [cum ack 3676403241] [a_rwnd 43] [#gap acks 0] [#dup tsns 0]

A careful reader might wonder why it is necessary to reproduce 1) prior
reproduction of 2). It is simply easier to observe when to send packet over
association 2 which will push association into the pressure state.

Proposed solution:

Both problems share the same root cause, and that is improper scaling of socket
buffer with rwnd. Solution in which sizeof(sk_buff) is taken into concern while
calculating rwnd is not possible due to fact that there is no linear
relationship between amount of data blamed in increase/decrease with IP packet
in which payload arrived. Even in case such solution would be followed,
complexity of the code would increase. Due to nature of current rwnd handling,
slow increase (in sctp_assoc_rwnd_increase) of rwnd after pressure state is
entered is rationale, but it gives false representation to the sender of current
buffer space. Furthermore, it implements additional congestion control mechanism
which is defined on implementation, and not on standard basis.

Proposed solution simplifies whole algorithm having on mind definition from rfc:

o  Receiver Window (rwnd): This gives the sender an indication of the space
   available in the receiver's inbound buffer.

Core of the proposed solution is given with these lines:

sctp_assoc_rwnd_update:
	if ((asoc->base.sk->sk_rcvbuf - rx_count) > 0)
		asoc->rwnd = (asoc->base.sk->sk_rcvbuf - rx_count) >> 1;
	else
		asoc->rwnd = 0;

We advertise to sender (half of) actual space we have. Half is in the braces
depending whether you would like to observe size of socket buffer as SO_RECVBUF
or twice the amount, i.e. size is the one visible from userspace, that is,
from kernelspace.
In this way sender is given with good approximation of our buffer space,
regardless of the buffer policy - we always advertise what we have. Proposed
solution fixes described problems and removes necessity for rwnd restoration
algorithm. Finally, as proposed solution is simplification, some lines of code,
along with some bytes in struct sctp_association are saved.

Version 2 of the patch addressed comments from Vlad. Name of the function is set
to be more descriptive, and two parts of code are changed, in one removing the
superfluous call to sctp_assoc_rwnd_update since call would not result in update
of rwnd, and the other being reordering of the code in a way that call to
sctp_assoc_rwnd_update updates rwnd. Version 3 corrected change introduced in v2
in a way that existing function is not reordered/copied in line, but it is
correctly called. Thanks Vlad for suggesting.

Signed-off-by: Matija Glavinic Pecotic <matija.glavinic-pecotic.ext@nsn.com>
Reviewed-by: Alexander Sverdlin <alexander.sverdlin@nsn.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 14 +-------
 net/sctp/associola.c       | 82 ++++++++++------------------------------------
 net/sctp/sm_statefuns.c    |  2 +-
 net/sctp/socket.c          |  6 ----
 net/sctp/ulpevent.c        |  8 +++--
 5 files changed, 25 insertions(+), 87 deletions(-)

(limited to 'net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index d992ca3145fe..6ee76c804893 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1653,17 +1653,6 @@ struct sctp_association {
 	/* This is the last advertised value of rwnd over a SACK chunk. */
 	__u32 a_rwnd;
 
-	/* Number of bytes by which the rwnd has slopped.  The rwnd is allowed
-	 * to slop over a maximum of the association's frag_point.
-	 */
-	__u32 rwnd_over;
-
-	/* Keeps treack of rwnd pressure.  This happens when we have
-	 * a window, but not recevie buffer (i.e small packets).  This one
-	 * is releases slowly (1 PMTU at a time ).
-	 */
-	__u32 rwnd_press;
-
 	/* This is the sndbuf size in use for the association.
 	 * This corresponds to the sndbuf size for the association,
 	 * as specified in the sk->sndbuf.
@@ -1892,8 +1881,7 @@ void sctp_assoc_update(struct sctp_association *old,
 __u32 sctp_association_get_next_tsn(struct sctp_association *);
 
 void sctp_assoc_sync_pmtu(struct sock *, struct sctp_association *);
-void sctp_assoc_rwnd_increase(struct sctp_association *, unsigned int);
-void sctp_assoc_rwnd_decrease(struct sctp_association *, unsigned int);
+void sctp_assoc_rwnd_update(struct sctp_association *, bool);
 void sctp_assoc_set_primary(struct sctp_association *,
 			    struct sctp_transport *);
 void sctp_assoc_del_nonprimary_peers(struct sctp_association *,
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 5ae609200674..f558433537b8 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1367,44 +1367,35 @@ static inline bool sctp_peer_needs_update(struct sctp_association *asoc)
 	return false;
 }
 
-/* Increase asoc's rwnd by len and send any window update SACK if needed. */
-void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len)
+/* Update asoc's rwnd for the approximated state in the buffer,
+ * and check whether SACK needs to be sent.
+ */
+void sctp_assoc_rwnd_update(struct sctp_association *asoc, bool update_peer)
 {
+	int rx_count;
 	struct sctp_chunk *sack;
 	struct timer_list *timer;
 
-	if (asoc->rwnd_over) {
-		if (asoc->rwnd_over >= len) {
-			asoc->rwnd_over -= len;
-		} else {
-			asoc->rwnd += (len - asoc->rwnd_over);
-			asoc->rwnd_over = 0;
-		}
-	} else {
-		asoc->rwnd += len;
-	}
+	if (asoc->ep->rcvbuf_policy)
+		rx_count = atomic_read(&asoc->rmem_alloc);
+	else
+		rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc);
 
-	/* If we had window pressure, start recovering it
-	 * once our rwnd had reached the accumulated pressure
-	 * threshold.  The idea is to recover slowly, but up
-	 * to the initial advertised window.
-	 */
-	if (asoc->rwnd_press && asoc->rwnd >= asoc->rwnd_press) {
-		int change = min(asoc->pathmtu, asoc->rwnd_press);
-		asoc->rwnd += change;
-		asoc->rwnd_press -= change;
-	}
+	if ((asoc->base.sk->sk_rcvbuf - rx_count) > 0)
+		asoc->rwnd = (asoc->base.sk->sk_rcvbuf - rx_count) >> 1;
+	else
+		asoc->rwnd = 0;
 
-	pr_debug("%s: asoc:%p rwnd increased by %d to (%u, %u) - %u\n",
-		 __func__, asoc, len, asoc->rwnd, asoc->rwnd_over,
-		 asoc->a_rwnd);
+	pr_debug("%s: asoc:%p rwnd=%u, rx_count=%d, sk_rcvbuf=%d\n",
+		 __func__, asoc, asoc->rwnd, rx_count,
+		 asoc->base.sk->sk_rcvbuf);
 
 	/* Send a window update SACK if the rwnd has increased by at least the
 	 * minimum of the association's PMTU and half of the receive buffer.
 	 * The algorithm used is similar to the one described in
 	 * Section 4.2.3.3 of RFC 1122.
 	 */
-	if (sctp_peer_needs_update(asoc)) {
+	if (update_peer && sctp_peer_needs_update(asoc)) {
 		asoc->a_rwnd = asoc->rwnd;
 
 		pr_debug("%s: sending window update SACK- asoc:%p rwnd:%u "
@@ -1426,45 +1417,6 @@ void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len)
 	}
 }
 
-/* Decrease asoc's rwnd by len. */
-void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len)
-{
-	int rx_count;
-	int over = 0;
-
-	if (unlikely(!asoc->rwnd || asoc->rwnd_over))
-		pr_debug("%s: association:%p has asoc->rwnd:%u, "
-			 "asoc->rwnd_over:%u!\n", __func__, asoc,
-			 asoc->rwnd, asoc->rwnd_over);
-
-	if (asoc->ep->rcvbuf_policy)
-		rx_count = atomic_read(&asoc->rmem_alloc);
-	else
-		rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc);
-
-	/* If we've reached or overflowed our receive buffer, announce
-	 * a 0 rwnd if rwnd would still be positive.  Store the
-	 * the potential pressure overflow so that the window can be restored
-	 * back to original value.
-	 */
-	if (rx_count >= asoc->base.sk->sk_rcvbuf)
-		over = 1;
-
-	if (asoc->rwnd >= len) {
-		asoc->rwnd -= len;
-		if (over) {
-			asoc->rwnd_press += asoc->rwnd;
-			asoc->rwnd = 0;
-		}
-	} else {
-		asoc->rwnd_over = len - asoc->rwnd;
-		asoc->rwnd = 0;
-	}
-
-	pr_debug("%s: asoc:%p rwnd decreased by %d to (%u, %u, %u)\n",
-		 __func__, asoc, len, asoc->rwnd, asoc->rwnd_over,
-		 asoc->rwnd_press);
-}
 
 /* Build the bind address list for the association based on info from the
  * local endpoint and the remote peer.
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 483dcd71b3c5..591b44d3b7de 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -6176,7 +6176,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 	 * PMTU.  In cases, such as loopback, this might be a rather
 	 * large spill over.
 	 */
-	if ((!chunk->data_accepted) && (!asoc->rwnd || asoc->rwnd_over ||
+	if ((!chunk->data_accepted) && (!asoc->rwnd ||
 	    (datalen > asoc->rwnd + asoc->frag_point))) {
 
 		/* If this is the next TSN, consider reneging to make
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 9e91d6e5df63..7075ac847fde 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2092,12 +2092,6 @@ static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk,
 		sctp_skb_pull(skb, copied);
 		skb_queue_head(&sk->sk_receive_queue, skb);
 
-		/* When only partial message is copied to the user, increase
-		 * rwnd by that amount. If all the data in the skb is read,
-		 * rwnd is updated when the event is freed.
-		 */
-		if (!sctp_ulpevent_is_notification(event))
-			sctp_assoc_rwnd_increase(event->asoc, copied);
 		goto out;
 	} else if ((event->msg_flags & MSG_NOTIFICATION) ||
 		   (event->msg_flags & MSG_EOR))
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 85c64658bd0b..8d198ae03606 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -989,7 +989,7 @@ static void sctp_ulpevent_receive_data(struct sctp_ulpevent *event,
 	skb = sctp_event2skb(event);
 	/* Set the owner and charge rwnd for bytes received.  */
 	sctp_ulpevent_set_owner(event, asoc);
-	sctp_assoc_rwnd_decrease(asoc, skb_headlen(skb));
+	sctp_assoc_rwnd_update(asoc, false);
 
 	if (!skb->data_len)
 		return;
@@ -1011,6 +1011,7 @@ static void sctp_ulpevent_release_data(struct sctp_ulpevent *event)
 {
 	struct sk_buff *skb, *frag;
 	unsigned int	len;
+	struct sctp_association *asoc;
 
 	/* Current stack structures assume that the rcv buffer is
 	 * per socket.   For UDP style sockets this is not true as
@@ -1035,8 +1036,11 @@ static void sctp_ulpevent_release_data(struct sctp_ulpevent *event)
 	}
 
 done:
-	sctp_assoc_rwnd_increase(event->asoc, len);
+	asoc = event->asoc;
+	sctp_association_hold(asoc);
 	sctp_ulpevent_release_owner(event);
+	sctp_assoc_rwnd_update(asoc, true);
+	sctp_association_put(asoc);
 }
 
 static void sctp_ulpevent_release_frag_data(struct sctp_ulpevent *event)
-- 
cgit v1.2.3


From 99932d4fc03a13bb3e94938fe25458fabc8f2fc3 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Sun, 16 Feb 2014 15:55:20 +0100
Subject: netdevice: add queue selection fallback handler for ndo_select_queue

Add a new argument for ndo_select_queue() callback that passes a
fallback handler. This gets invoked through netdev_pick_tx();
fallback handler is currently __netdev_pick_tx() as most drivers
invoke this function within their customized implementation in
case for skbs that don't need any special handling. This fallback
handler can then be replaced on other call-sites with different
queue selection methods (e.g. in packet sockets, pktgen etc).

This also has the nice side-effect that __netdev_pick_tx() is
then only invoked from netdev_pick_tx() and export of that
function to modules can be undone.

Suggested-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c                 | 2 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 4 ++--
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h | 2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c   | 6 +++---
 drivers/net/ethernet/lantiq_etop.c              | 2 +-
 drivers/net/ethernet/mellanox/mlx4/en_tx.c      | 4 ++--
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h    | 2 +-
 drivers/net/ethernet/tile/tilegx.c              | 2 +-
 drivers/net/team/team.c                         | 2 +-
 drivers/net/tun.c                               | 2 +-
 drivers/net/wireless/mwifiex/main.c             | 2 +-
 drivers/staging/bcm/Bcmnet.c                    | 2 +-
 drivers/staging/netlogic/xlr_net.c              | 2 +-
 drivers/staging/rtl8188eu/os_dep/os_intfs.c     | 2 +-
 include/linux/netdevice.h                       | 9 ++++++---
 net/core/flow_dissector.c                       | 7 +++----
 net/mac80211/iface.c                            | 6 ++++--
 17 files changed, 31 insertions(+), 27 deletions(-)

(limited to 'net')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 867664918715..1c6104d3501d 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3707,7 +3707,7 @@ static inline int bond_slave_override(struct bonding *bond,
 
 
 static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb,
-			     void *accel_priv)
+			     void *accel_priv, select_queue_fallback_t fallback)
 {
 	/*
 	 * This helper function exists to help dev_pick_tx get the correct
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 9d7419e0390b..66c0df78c3ff 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -1873,7 +1873,7 @@ void bnx2x_netif_stop(struct bnx2x *bp, int disable_hw)
 }
 
 u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb,
-		       void *accel_priv)
+		       void *accel_priv, select_queue_fallback_t fallback)
 {
 	struct bnx2x *bp = netdev_priv(dev);
 
@@ -1895,7 +1895,7 @@ u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb,
 	}
 
 	/* select a non-FCoE queue */
-	return __netdev_pick_tx(dev, skb) % BNX2X_NUM_ETH_QUEUES(bp);
+	return fallback(dev, skb) % BNX2X_NUM_ETH_QUEUES(bp);
 }
 
 void bnx2x_set_num_queues(struct bnx2x *bp)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index bfc58d488bb5..a89a40f88c25 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -496,7 +496,7 @@ int bnx2x_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos);
 
 /* select_queue callback */
 u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb,
-		       void *accel_priv);
+		       void *accel_priv, select_queue_fallback_t fallback);
 
 static inline void bnx2x_update_rx_prod(struct bnx2x *bp,
 					struct bnx2x_fastpath *fp,
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 6d4ada72dfd0..18076c4178b4 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -6881,7 +6881,7 @@ static inline int ixgbe_maybe_stop_tx(struct ixgbe_ring *tx_ring, u16 size)
 }
 
 static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
-			      void *accel_priv)
+			      void *accel_priv, select_queue_fallback_t fallback)
 {
 	struct ixgbe_fwd_adapter *fwd_adapter = accel_priv;
 #ifdef IXGBE_FCOE
@@ -6907,7 +6907,7 @@ static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
 		if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED)
 			break;
 	default:
-		return __netdev_pick_tx(dev, skb);
+		return fallback(dev, skb);
 	}
 
 	f = &adapter->ring_feature[RING_F_FCOE];
@@ -6920,7 +6920,7 @@ static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
 
 	return txq + f->offset;
 #else
-	return __netdev_pick_tx(dev, skb);
+	return fallback(dev, skb);
 #endif
 }
 
diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c
index 8f9266c64c75..fd4b6aecf6ee 100644
--- a/drivers/net/ethernet/lantiq_etop.c
+++ b/drivers/net/ethernet/lantiq_etop.c
@@ -619,7 +619,7 @@ ltq_etop_set_multicast_list(struct net_device *dev)
 
 static u16
 ltq_etop_select_queue(struct net_device *dev, struct sk_buff *skb,
-		      void *accel_priv)
+		      void *accel_priv, select_queue_fallback_t fallback)
 {
 	/* we are currently only using the first queue */
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 8e8a7eb43a2c..13457032d15f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -629,7 +629,7 @@ static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, struct sk_buff *sk
 }
 
 u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
-			 void *accel_priv)
+			 void *accel_priv, select_queue_fallback_t fallback)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	u16 rings_p_up = priv->num_tx_rings_p_up;
@@ -641,7 +641,7 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
 	if (vlan_tx_tag_present(skb))
 		up = vlan_tx_tag_get(skb) >> VLAN_PRIO_SHIFT;
 
-	return __netdev_pick_tx(dev, skb) % rings_p_up + up * rings_p_up;
+	return fallback(dev, skb) % rings_p_up + up * rings_p_up;
 }
 
 static void mlx4_bf_copy(void __iomem *dst, unsigned long *src, unsigned bytecnt)
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 3af04c3f42ea..9ca223bc90fc 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -723,7 +723,7 @@ int mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
 
 void mlx4_en_tx_irq(struct mlx4_cq *mcq);
 u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
-			 void *accel_priv);
+			 void *accel_priv, select_queue_fallback_t fallback);
 netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev);
 
 int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
diff --git a/drivers/net/ethernet/tile/tilegx.c b/drivers/net/ethernet/tile/tilegx.c
index 023237a65720..17503da9f7a5 100644
--- a/drivers/net/ethernet/tile/tilegx.c
+++ b/drivers/net/ethernet/tile/tilegx.c
@@ -2071,7 +2071,7 @@ static int tile_net_tx(struct sk_buff *skb, struct net_device *dev)
 
 /* Return subqueue id on this core (one per core). */
 static u16 tile_net_select_queue(struct net_device *dev, struct sk_buff *skb,
-				 void *accel_priv)
+				 void *accel_priv, select_queue_fallback_t fallback)
 {
 	return smp_processor_id();
 }
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 28407426fd6f..c8624a8235ab 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1648,7 +1648,7 @@ static netdev_tx_t team_xmit(struct sk_buff *skb, struct net_device *dev)
 }
 
 static u16 team_select_queue(struct net_device *dev, struct sk_buff *skb,
-			     void *accel_priv)
+			     void *accel_priv, select_queue_fallback_t fallback)
 {
 	/*
 	 * This helper function exists to help dev_pick_tx get the correct
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 44c4db8450f0..8fe9cb7d0f72 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -366,7 +366,7 @@ static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash)
  * hope the rxq no. may help here.
  */
 static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
-			    void *accel_priv)
+			    void *accel_priv, select_queue_fallback_t fallback)
 {
 	struct tun_struct *tun = netdev_priv(dev);
 	struct tun_flow_entry *e;
diff --git a/drivers/net/wireless/mwifiex/main.c b/drivers/net/wireless/mwifiex/main.c
index 4d79761b9c87..9d3d2758ec35 100644
--- a/drivers/net/wireless/mwifiex/main.c
+++ b/drivers/net/wireless/mwifiex/main.c
@@ -748,7 +748,7 @@ static struct net_device_stats *mwifiex_get_stats(struct net_device *dev)
 
 static u16
 mwifiex_netdev_select_wmm_queue(struct net_device *dev, struct sk_buff *skb,
-				void *accel_priv)
+				void *accel_priv, select_queue_fallback_t fallback)
 {
 	skb->priority = cfg80211_classify8021d(skb, NULL);
 	return mwifiex_1d_to_wmm_queue[skb->priority];
diff --git a/drivers/staging/bcm/Bcmnet.c b/drivers/staging/bcm/Bcmnet.c
index 8dfdd2732bdc..95a2358267ba 100644
--- a/drivers/staging/bcm/Bcmnet.c
+++ b/drivers/staging/bcm/Bcmnet.c
@@ -40,7 +40,7 @@ static INT bcm_close(struct net_device *dev)
 }
 
 static u16 bcm_select_queue(struct net_device *dev, struct sk_buff *skb,
-			    void *accel_priv)
+			    void *accel_priv, select_queue_fallback_t fallback)
 {
 	return ClassifyPacket(netdev_priv(dev), skb);
 }
diff --git a/drivers/staging/netlogic/xlr_net.c b/drivers/staging/netlogic/xlr_net.c
index eedffed17e39..6f9ac27730af 100644
--- a/drivers/staging/netlogic/xlr_net.c
+++ b/drivers/staging/netlogic/xlr_net.c
@@ -307,7 +307,7 @@ static netdev_tx_t xlr_net_start_xmit(struct sk_buff *skb,
 }
 
 static u16 xlr_net_select_queue(struct net_device *ndev, struct sk_buff *skb,
-				void *accel_priv)
+				void *accel_priv, select_queue_fallback_t fallback)
 {
 	return (u16)smp_processor_id();
 }
diff --git a/drivers/staging/rtl8188eu/os_dep/os_intfs.c b/drivers/staging/rtl8188eu/os_dep/os_intfs.c
index 68f98fa114d2..7c9ee58f47bb 100644
--- a/drivers/staging/rtl8188eu/os_dep/os_intfs.c
+++ b/drivers/staging/rtl8188eu/os_dep/os_intfs.c
@@ -653,7 +653,7 @@ static unsigned int rtw_classify8021d(struct sk_buff *skb)
 }
 
 static u16 rtw_select_queue(struct net_device *dev, struct sk_buff *skb,
-			    void *accel_priv)
+			    void *accel_priv, select_queue_fallback_t fallback)
 {
 	struct adapter	*padapter = rtw_netdev_priv(dev);
 	struct mlme_priv *pmlmepriv = &padapter->mlmepriv;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 21d4e6be8949..1de9c136b066 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -752,6 +752,9 @@ struct netdev_phys_port_id {
 	unsigned char id_len;
 };
 
+typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
+				       struct sk_buff *skb);
+
 /*
  * This structure defines the management hooks for network devices.
  * The following hooks can be defined; unless noted otherwise, they are
@@ -783,7 +786,7 @@ struct netdev_phys_port_id {
  *	Required can not be NULL.
  *
  * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb,
- *                         void *accel_priv);
+ *                         void *accel_priv, select_queue_fallback_t fallback);
  *	Called to decide which queue to when device supports multiple
  *	transmit queues.
  *
@@ -1005,7 +1008,8 @@ struct net_device_ops {
 						   struct net_device *dev);
 	u16			(*ndo_select_queue)(struct net_device *dev,
 						    struct sk_buff *skb,
-						    void *accel_priv);
+						    void *accel_priv,
+						    select_queue_fallback_t fallback);
 	void			(*ndo_change_rx_flags)(struct net_device *dev,
 						       int flags);
 	void			(*ndo_set_rx_mode)(struct net_device *dev);
@@ -1551,7 +1555,6 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
 struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 				    struct sk_buff *skb,
 				    void *accel_priv);
-u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb);
 
 /*
  * Net namespace inlines
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 87577d447554..75fe83f590ea 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -372,7 +372,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 #endif
 }
 
-u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
+static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
 {
 	struct sock *sk = skb->sk;
 	int queue_index = sk_tx_queue_get(sk);
@@ -392,7 +392,6 @@ u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
 
 	return queue_index;
 }
-EXPORT_SYMBOL(__netdev_pick_tx);
 
 struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 				    struct sk_buff *skb,
@@ -403,8 +402,8 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 	if (dev->real_num_tx_queues != 1) {
 		const struct net_device_ops *ops = dev->netdev_ops;
 		if (ops->ndo_select_queue)
-			queue_index = ops->ndo_select_queue(dev, skb,
-							    accel_priv);
+			queue_index = ops->ndo_select_queue(dev, skb, accel_priv,
+							    __netdev_pick_tx);
 		else
 			queue_index = __netdev_pick_tx(dev, skb);
 
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index d6d1f1df9119..ce1c44370610 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1057,7 +1057,8 @@ static void ieee80211_uninit(struct net_device *dev)
 
 static u16 ieee80211_netdev_select_queue(struct net_device *dev,
 					 struct sk_buff *skb,
-					 void *accel_priv)
+					 void *accel_priv,
+					 select_queue_fallback_t fallback)
 {
 	return ieee80211_select_queue(IEEE80211_DEV_TO_SUB_IF(dev), skb);
 }
@@ -1075,7 +1076,8 @@ static const struct net_device_ops ieee80211_dataif_ops = {
 
 static u16 ieee80211_monitor_select_queue(struct net_device *dev,
 					  struct sk_buff *skb,
-					  void *accel_priv)
+					  void *accel_priv,
+					  select_queue_fallback_t fallback)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = sdata->local;
-- 
cgit v1.2.3


From b9507bdaf40e91fea2b1c0c1ee7dc627c8ee6fd6 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Sun, 16 Feb 2014 15:55:21 +0100
Subject: netdevice: move netdev_cap_txqueue for shared usage to header

In order to allow users to invoke netdev_cap_txqueue, it needs to
be moved into netdevice.h header file. While at it, also add kernel
doc header to document the API.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 20 ++++++++++++++++++++
 net/core/flow_dissector.c | 13 +------------
 2 files changed, 21 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1de9c136b066..e8eeebd49a98 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2278,6 +2278,26 @@ static inline void netdev_reset_queue(struct net_device *dev_queue)
 	netdev_tx_reset_queue(netdev_get_tx_queue(dev_queue, 0));
 }
 
+/**
+ * 	netdev_cap_txqueue - check if selected tx queue exceeds device queues
+ * 	@dev: network device
+ * 	@queue_index: given tx queue index
+ *
+ * 	Returns 0 if given tx queue index >= number of device tx queues,
+ * 	otherwise returns the originally passed tx queue index.
+ */
+static inline u16 netdev_cap_txqueue(struct net_device *dev, u16 queue_index)
+{
+	if (unlikely(queue_index >= dev->real_num_tx_queues)) {
+		net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n",
+				     dev->name, queue_index,
+				     dev->real_num_tx_queues);
+		return 0;
+	}
+
+	return queue_index;
+}
+
 /**
  *	netif_running - test if up
  *	@dev: network device
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 75fe83f590ea..e29e810663d7 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -323,17 +323,6 @@ u32 __skb_get_poff(const struct sk_buff *skb)
 	return poff;
 }
 
-static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
-{
-	if (unlikely(queue_index >= dev->real_num_tx_queues)) {
-		net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n",
-				     dev->name, queue_index,
-				     dev->real_num_tx_queues);
-		return 0;
-	}
-	return queue_index;
-}
-
 static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 {
 #ifdef CONFIG_XPS
@@ -408,7 +397,7 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 			queue_index = __netdev_pick_tx(dev, skb);
 
 		if (!accel_priv)
-			queue_index = dev_cap_txqueue(dev, queue_index);
+			queue_index = netdev_cap_txqueue(dev, queue_index);
 	}
 
 	skb_set_queue_mapping(skb, queue_index);
-- 
cgit v1.2.3


From 0fd5d57ba3456c4d0b77d1ae64be4818b47d7545 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Sun, 16 Feb 2014 15:55:22 +0100
Subject: packet: check for ndo_select_queue during queue selection

Mathias reported that on an AMD Geode LX embedded board (ALiX)
with ath9k driver PACKET_QDISC_BYPASS, introduced in commit
d346a3fae3ff ("packet: introduce PACKET_QDISC_BYPASS socket
option"), triggers a WARN_ON() coming from the driver itself
via 066dae93bdf ("ath9k: rework tx queue selection and fix
queue stopping/waking").

The reason why this happened is that ndo_select_queue() call
is not invoked from direct xmit path i.e. for ieee80211 subsystem
that sets queue and TID (similar to 802.1d tag) which is being
put into the frame through 802.11e (WMM, QoS). If that is not
set, pending frame counter for e.g. ath9k can get messed up.

So the WARN_ON() in ath9k is absolutely legitimate. Generally,
the hw queue selection in ieee80211 depends on the type of
traffic, and priorities are set according to ieee80211_ac_numbers
mapping; working in a similar way as DiffServ only on a lower
layer, so that the AP can favour frames that have "real-time"
requirements like voice or video data frames.

Therefore, check for presence of ndo_select_queue() in netdev
ops and, if available, invoke it with a fallback handler to
__packet_pick_tx_queue(), so that driver such as bnx2x, ixgbe,
or mlx4 can still select a hw queue for transmission in
relation to the current CPU while e.g. ieee80211 subsystem
can make their own choices.

Reported-by: Mathias Kretschmer <mathias.kretschmer@fokus.fraunhofer.de>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 6a2bb37506c5..b5dc1168f98a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -308,11 +308,27 @@ static bool packet_use_direct_xmit(const struct packet_sock *po)
 	return po->xmit == packet_direct_xmit;
 }
 
-static u16 packet_pick_tx_queue(struct net_device *dev)
+static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb)
 {
 	return (u16) raw_smp_processor_id() % dev->real_num_tx_queues;
 }
 
+static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+	u16 queue_index;
+
+	if (ops->ndo_select_queue) {
+		queue_index = ops->ndo_select_queue(dev, skb, NULL,
+						    __packet_pick_tx_queue);
+		queue_index = netdev_cap_txqueue(dev, queue_index);
+	} else {
+		queue_index = __packet_pick_tx_queue(dev, skb);
+	}
+
+	skb_set_queue_mapping(skb, queue_index);
+}
+
 /* register_prot_hook must be invoked with the po->bind_lock held,
  * or from a context in which asynchronous accesses to the packet
  * socket is not possible (packet_create()).
@@ -2285,7 +2301,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 			}
 		}
 
-		skb_set_queue_mapping(skb, packet_pick_tx_queue(dev));
+		packet_pick_tx_queue(dev, skb);
+
 		skb->destructor = tpacket_destruct_skb;
 		__packet_set_status(po, ph, TP_STATUS_SENDING);
 		packet_inc_pending(&po->tx_ring);
@@ -2499,7 +2516,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	skb->dev = dev;
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
-	skb_set_queue_mapping(skb, packet_pick_tx_queue(dev));
+
+	packet_pick_tx_queue(dev, skb);
 
 	if (po->has_vnet_hdr) {
 		if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
-- 
cgit v1.2.3


From 930cd6e46eadce8b8ed2a232ee536e5fd286c152 Mon Sep 17 00:00:00 2001
From: Antonio Quartulli <antonio@meshcoding.com>
Date: Tue, 21 Jan 2014 11:22:05 +0100
Subject: batman-adv: fix soft-interface MTU computation

The current MTU computation always returns a value
smaller than 1500bytes even if the real interfaces
have an MTU large enough to compensate the batman-adv
overhead.

Fix the computation by properly returning the highest
admitted value.

Introduced by a19d3d85e1b854e4a483a55d740a42458085560d
("batman-adv: limit local translation table max size")

Reported-by: Russell Senior <russell@personaltelco.net>
Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
---
 net/batman-adv/hard-interface.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 3d417d3641c6..b851cc580853 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -241,7 +241,7 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface)
 {
 	struct batadv_priv *bat_priv = netdev_priv(soft_iface);
 	const struct batadv_hard_iface *hard_iface;
-	int min_mtu = ETH_DATA_LEN;
+	int min_mtu = INT_MAX;
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
@@ -256,8 +256,6 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface)
 	}
 	rcu_read_unlock();
 
-	atomic_set(&bat_priv->packet_size_max, min_mtu);
-
 	if (atomic_read(&bat_priv->fragmentation) == 0)
 		goto out;
 
@@ -268,13 +266,21 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface)
 	min_mtu = min_t(int, min_mtu, BATADV_FRAG_MAX_FRAG_SIZE);
 	min_mtu -= sizeof(struct batadv_frag_packet);
 	min_mtu *= BATADV_FRAG_MAX_FRAGMENTS;
-	atomic_set(&bat_priv->packet_size_max, min_mtu);
-
-	/* with fragmentation enabled we can fragment external packets easily */
-	min_mtu = min_t(int, min_mtu, ETH_DATA_LEN);
 
 out:
-	return min_mtu - batadv_max_header_len();
+	/* report to the other components the maximum amount of bytes that
+	 * batman-adv can send over the wire (without considering the payload
+	 * overhead). For example, this value is used by TT to compute the
+	 * maximum local table table size
+	 */
+	atomic_set(&bat_priv->packet_size_max, min_mtu);
+
+	/* the real soft-interface MTU is computed by removing the payload
+	 * overhead from the maximum amount of bytes that was just computed.
+	 *
+	 * However batman-adv does not support MTUs bigger than ETH_DATA_LEN
+	 */
+	return min_t(int, min_mtu - batadv_max_header_len(), ETH_DATA_LEN);
 }
 
 /* adjusts the MTU if a new interface with a smaller MTU appeared. */
-- 
cgit v1.2.3


From e889241f45f9cecbc84a6ffed577083ab52e62ee Mon Sep 17 00:00:00 2001
From: Antonio Quartulli <antonio@meshcoding.com>
Date: Mon, 27 Jan 2014 12:23:28 +0100
Subject: batman-adv: fix TT-TVLV parsing on OGM reception

When accessing a TT-TVLV container in the OGM RX path
the variable pointing to the list of changes to apply is
altered by mistake.

This makes the TT component read data at the wrong position
in the OGM packet buffer.

Fix it by removing the bogus pointer alteration.

Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
---
 net/batman-adv/translation-table.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index b6071f675a3e..beba13fbd10a 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -3218,7 +3218,6 @@ static void batadv_tt_update_orig(struct batadv_priv *bat_priv,
 
 		spin_lock_bh(&orig_node->tt_lock);
 
-		tt_change = (struct batadv_tvlv_tt_change *)tt_buff;
 		batadv_tt_update_changes(bat_priv, orig_node, tt_num_changes,
 					 ttvn, tt_change);
 
-- 
cgit v1.2.3


From 91c2b1a9f680ff105369d49abc7e19ca7efb33e1 Mon Sep 17 00:00:00 2001
From: Antonio Quartulli <antonio@meshcoding.com>
Date: Tue, 28 Jan 2014 02:06:47 +0100
Subject: batman-adv: release vlan object after checking the CRC

There is a refcounter unbalance in the CRC checking routine
invoked on OGM reception. A vlan object is retrieved (thus
its refcounter is increased by one) but it is never properly
released. This leads to a memleak because the vlan object
will never be free'd.

Fix this by releasing the vlan object after having read the
CRC.

Reported-by: Russell Senior <russell@personaltelco.net>
Reported-by: Daniel <daniel@makrotopia.org>
Reported-by: cmsv <cmsv@wirelesspt.net>
Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
---
 net/batman-adv/translation-table.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index beba13fbd10a..c21c5572c860 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -2262,6 +2262,7 @@ static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node,
 {
 	struct batadv_tvlv_tt_vlan_data *tt_vlan_tmp;
 	struct batadv_orig_node_vlan *vlan;
+	uint32_t crc;
 	int i;
 
 	/* check if each received CRC matches the locally stored one */
@@ -2281,7 +2282,10 @@ static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node,
 		if (!vlan)
 			return false;
 
-		if (vlan->tt.crc != ntohl(tt_vlan_tmp->crc))
+		crc = vlan->tt.crc;
+		batadv_orig_node_vlan_free_ref(vlan);
+
+		if (crc != ntohl(tt_vlan_tmp->crc))
 			return false;
 	}
 
-- 
cgit v1.2.3


From f1791425cf0bcda43ab9a9a37df1ad3ccb1f6654 Mon Sep 17 00:00:00 2001
From: Antonio Quartulli <antonio@meshcoding.com>
Date: Thu, 30 Jan 2014 00:12:24 +0100
Subject: batman-adv: properly check pskb_may_pull return value

pskb_may_pull() returns 1 on success and 0 in case of failure,
therefore checking for the return value being negative does
not make sense at all.

This way if the function fails we will probably read beyond the current
skb data buffer. Fix this by doing the proper check.

Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
---
 net/batman-adv/routing.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 1ed9f7c9ecea..c26f07368849 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -688,7 +688,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
 	int is_old_ttvn;
 
 	/* check if there is enough data before accessing it */
-	if (pskb_may_pull(skb, hdr_len + ETH_HLEN) < 0)
+	if (!pskb_may_pull(skb, hdr_len + ETH_HLEN))
 		return 0;
 
 	/* create a copy of the skb (in case of for re-routing) to modify it. */
-- 
cgit v1.2.3


From 08bf0ed29c7ded45c477d08618220dd200c3524a Mon Sep 17 00:00:00 2001
From: Antonio Quartulli <antonio@open-mesh.com>
Date: Wed, 29 Jan 2014 11:25:12 +0100
Subject: batman-adv: avoid potential race condition when adding a new
 neighbour

When adding a new neighbour it is important to atomically
perform the following:
- check if the neighbour already exists
- append the neighbour to the proper list

If the two operations are not performed in an atomic context
it is possible that two concurrent insertions add the same
neighbour twice.

Signed-off-by: Antonio Quartulli <antonio@open-mesh.com>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
---
 net/batman-adv/bat_iv_ogm.c | 22 ++++++++++++++++------
 net/batman-adv/originator.c | 36 ++++++++++++++++++++++++++++++++++++
 net/batman-adv/originator.h |  4 ++++
 3 files changed, 56 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 512159bf607f..094ae7ca50a0 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -266,7 +266,7 @@ batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface,
 			struct batadv_orig_node *orig_neigh)
 {
 	struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
-	struct batadv_neigh_node *neigh_node;
+	struct batadv_neigh_node *neigh_node, *tmp_neigh_node;
 
 	neigh_node = batadv_neigh_node_new(hard_iface, neigh_addr, orig_node);
 	if (!neigh_node)
@@ -281,14 +281,24 @@ batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface,
 	neigh_node->orig_node = orig_neigh;
 	neigh_node->if_incoming = hard_iface;
 
-	batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
-		   "Creating new neighbor %pM for orig_node %pM on interface %s\n",
-		   neigh_addr, orig_node->orig, hard_iface->net_dev->name);
-
 	spin_lock_bh(&orig_node->neigh_list_lock);
-	hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list);
+	tmp_neigh_node = batadv_neigh_node_get(orig_node, hard_iface,
+					       neigh_addr);
+	if (!tmp_neigh_node) {
+		hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list);
+	} else {
+		kfree(neigh_node);
+		batadv_hardif_free_ref(hard_iface);
+		neigh_node = tmp_neigh_node;
+	}
 	spin_unlock_bh(&orig_node->neigh_list_lock);
 
+	if (!tmp_neigh_node)
+		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+			   "Creating new neighbor %pM for orig_node %pM on interface %s\n",
+			   neigh_addr, orig_node->orig,
+			   hard_iface->net_dev->name);
+
 out:
 	return neigh_node;
 }
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 6df12a2e3605..853941629dc1 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -457,6 +457,42 @@ out:
 	return neigh_node;
 }
 
+/**
+ * batadv_neigh_node_get - retrieve a neighbour from the list
+ * @orig_node: originator which the neighbour belongs to
+ * @hard_iface: the interface where this neighbour is connected to
+ * @addr: the address of the neighbour
+ *
+ * Looks for and possibly returns a neighbour belonging to this originator list
+ * which is connected through the provided hard interface.
+ * Returns NULL if the neighbour is not found.
+ */
+struct batadv_neigh_node *
+batadv_neigh_node_get(const struct batadv_orig_node *orig_node,
+		      const struct batadv_hard_iface *hard_iface,
+		      const uint8_t *addr)
+{
+	struct batadv_neigh_node *tmp_neigh_node, *res = NULL;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(tmp_neigh_node, &orig_node->neigh_list, list) {
+		if (!batadv_compare_eth(tmp_neigh_node->addr, addr))
+			continue;
+
+		if (tmp_neigh_node->if_incoming != hard_iface)
+			continue;
+
+		if (!atomic_inc_not_zero(&tmp_neigh_node->refcount))
+			continue;
+
+		res = tmp_neigh_node;
+		break;
+	}
+	rcu_read_unlock();
+
+	return res;
+}
+
 /**
  * batadv_orig_ifinfo_free_rcu - free the orig_ifinfo object
  * @rcu: rcu pointer of the orig_ifinfo object
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 37be290f63f6..db3a9ed734cb 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -29,6 +29,10 @@ void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node);
 struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
 					      const uint8_t *addr);
 struct batadv_neigh_node *
+batadv_neigh_node_get(const struct batadv_orig_node *orig_node,
+		      const struct batadv_hard_iface *hard_iface,
+		      const uint8_t *addr);
+struct batadv_neigh_node *
 batadv_neigh_node_new(struct batadv_hard_iface *hard_iface,
 		      const uint8_t *neigh_addr,
 		      struct batadv_orig_node *orig_node);
-- 
cgit v1.2.3


From b2262df7fcf2c395eca564df83238e931d88d7bf Mon Sep 17 00:00:00 2001
From: Simon Wunderlich <sw@simonwunderlich.de>
Date: Sat, 8 Feb 2014 16:45:06 +0100
Subject: batman-adv: fix potential orig_node reference leak

Since batadv_orig_node_new() sets the refcount to two, assuming that
the calling function will use a reference for putting the orig_node into
a hash or similar, both references must be freed if initialization of
the orig_node fails. Otherwise that object may be leaked in that error
case.

Reported-by: Antonio Quartulli <antonio@meshcoding.com>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
---
 net/batman-adv/bat_iv_ogm.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 094ae7ca50a0..42cbc0a68941 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -254,6 +254,8 @@ batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const uint8_t *addr)
 free_bcast_own:
 	kfree(orig_node->bat_iv.bcast_own);
 free_orig_node:
+	/* free twice, as batadv_orig_node_new sets refcount to 2 */
+	batadv_orig_node_free_ref(orig_node);
 	batadv_orig_node_free_ref(orig_node);
 
 	return NULL;
-- 
cgit v1.2.3


From a30e22ca8464c2dc573e0144a972221c2f06c2cd Mon Sep 17 00:00:00 2001
From: Antonio Quartulli <antonio@open-mesh.com>
Date: Tue, 11 Feb 2014 17:05:06 +0100
Subject: batman-adv: fix TT CRC computation by ensuring byte order

When computing the CRC on a 2byte variable the order of
the bytes obviously alters the final result. This means
that computing the CRC over the same value on two archs
having different endianess leads to different numbers.

The global and local translation table CRC computation
routine makes this mistake while processing the clients
VIDs. The result is a continuous CRC mismatching between
nodes having different endianess.

Fix this by converting the VID to Network Order before
processing it. This guarantees that every node uses the same
byte order.

Introduced by 7ea7b4a142758deaf46c1af0ca9ceca6dd55138b
("batman-adv: make the TT CRC logic VLAN specific")

Reported-by: Russel Senior <russell@personaltelco.net>
Signed-off-by: Antonio Quartulli <antonio@open-mesh.com>
Tested-by: Russell Senior <russell@personaltelco.net>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
---
 net/batman-adv/translation-table.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index c21c5572c860..959dde721c46 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1975,6 +1975,7 @@ static uint32_t batadv_tt_global_crc(struct batadv_priv *bat_priv,
 	struct hlist_head *head;
 	uint32_t i, crc_tmp, crc = 0;
 	uint8_t flags;
+	__be16 tmp_vid;
 
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
@@ -2011,8 +2012,11 @@ static uint32_t batadv_tt_global_crc(struct batadv_priv *bat_priv,
 							     orig_node))
 				continue;
 
-			crc_tmp = crc32c(0, &tt_common->vid,
-					 sizeof(tt_common->vid));
+			/* use network order to read the VID: this ensures that
+			 * every node reads the bytes in the same order.
+			 */
+			tmp_vid = htons(tt_common->vid);
+			crc_tmp = crc32c(0, &tmp_vid, sizeof(tmp_vid));
 
 			/* compute the CRC on flags that have to be kept in sync
 			 * among nodes
@@ -2046,6 +2050,7 @@ static uint32_t batadv_tt_local_crc(struct batadv_priv *bat_priv,
 	struct hlist_head *head;
 	uint32_t i, crc_tmp, crc = 0;
 	uint8_t flags;
+	__be16 tmp_vid;
 
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
@@ -2064,8 +2069,11 @@ static uint32_t batadv_tt_local_crc(struct batadv_priv *bat_priv,
 			if (tt_common->flags & BATADV_TT_CLIENT_NEW)
 				continue;
 
-			crc_tmp = crc32c(0, &tt_common->vid,
-					 sizeof(tt_common->vid));
+			/* use network order to read the VID: this ensures that
+			 * every node reads the bytes in the same order.
+			 */
+			tmp_vid = htons(tt_common->vid);
+			crc_tmp = crc32c(0, &tmp_vid, sizeof(tmp_vid));
 
 			/* compute the CRC on flags that have to be kept in sync
 			 * among nodes
-- 
cgit v1.2.3


From 05c3c8a636aa9ee35ce13f65afc5b665615cc786 Mon Sep 17 00:00:00 2001
From: Antonio Quartulli <antonio@open-mesh.com>
Date: Tue, 11 Feb 2014 17:05:07 +0100
Subject: batman-adv: free skb on TVLV parsing success

When the TVLV parsing routine succeed the skb is left
untouched thus leading to a memory leak.

Fix this by consuming the skb in case of success.

Introduced by ef26157747d42254453f6b3ac2bd8bd3c53339c3
("batman-adv: tvlv - basic infrastructure")

Reported-by: Russel Senior <russell@personaltelco.net>
Signed-off-by: Antonio Quartulli <antonio@open-mesh.com>
Tested-by: Russell Senior <russell@personaltelco.net>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
---
 net/batman-adv/routing.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index c26f07368849..a953d5b196a3 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -918,6 +918,8 @@ int batadv_recv_unicast_tvlv(struct sk_buff *skb,
 
 	if (ret != NET_RX_SUCCESS)
 		ret = batadv_route_unicast_packet(skb, recv_if);
+	else
+		consume_skb(skb);
 
 	return ret;
 }
-- 
cgit v1.2.3


From a5a5cb8cab526af2f6cbe9715f8ca843192f0d81 Mon Sep 17 00:00:00 2001
From: Antonio Quartulli <antonio@meshcoding.com>
Date: Sat, 15 Feb 2014 02:17:20 +0100
Subject: batman-adv: avoid double free when orig_node initialization fails

In the failure path of the orig_node initialization routine
the orig_node->bat_iv.bcast_own field is free'd twice: first
in batadv_iv_ogm_orig_get() and then later in
batadv_orig_node_free_rcu().

Fix it by removing the kfree in batadv_iv_ogm_orig_get().

Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
---
 net/batman-adv/bat_iv_ogm.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 42cbc0a68941..8323bced8e5b 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -241,18 +241,16 @@ batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const uint8_t *addr)
 	size = bat_priv->num_ifaces * sizeof(uint8_t);
 	orig_node->bat_iv.bcast_own_sum = kzalloc(size, GFP_ATOMIC);
 	if (!orig_node->bat_iv.bcast_own_sum)
-		goto free_bcast_own;
+		goto free_orig_node;
 
 	hash_added = batadv_hash_add(bat_priv->orig_hash, batadv_compare_orig,
 				     batadv_choose_orig, orig_node,
 				     &orig_node->hash_entry);
 	if (hash_added != 0)
-		goto free_bcast_own;
+		goto free_orig_node;
 
 	return orig_node;
 
-free_bcast_own:
-	kfree(orig_node->bat_iv.bcast_own);
 free_orig_node:
 	/* free twice, as batadv_orig_node_new sets refcount to 2 */
 	batadv_orig_node_free_ref(orig_node);
-- 
cgit v1.2.3


From 70b271a78beba787155d6696aacd7c4d4a251c50 Mon Sep 17 00:00:00 2001
From: Antonio Quartulli <antonio@meshcoding.com>
Date: Sat, 15 Feb 2014 21:50:37 +0100
Subject: batman-adv: fix potential kernel paging error for unicast
 transmissions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

batadv_send_skb_prepare_unicast(_4addr) might reallocate the
skb's data. If it does then our ethhdr pointer is not valid
anymore in batadv_send_skb_unicast(), resulting in a kernel
paging error.

Fixing this by refetching the ethhdr pointer after the
potential reallocation.

Signed-off-by: Linus Lüssing <linus.luessing@web.de>
Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
---
 net/batman-adv/send.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 579f5f00a385..843febd1e519 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -254,9 +254,9 @@ static int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
 				   struct batadv_orig_node *orig_node,
 				   unsigned short vid)
 {
-	struct ethhdr *ethhdr = (struct ethhdr *)skb->data;
+	struct ethhdr *ethhdr;
 	struct batadv_unicast_packet *unicast_packet;
-	int ret = NET_XMIT_DROP;
+	int ret = NET_XMIT_DROP, hdr_size;
 
 	if (!orig_node)
 		goto out;
@@ -265,12 +265,16 @@ static int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
 	case BATADV_UNICAST:
 		if (!batadv_send_skb_prepare_unicast(skb, orig_node))
 			goto out;
+
+		hdr_size = sizeof(*unicast_packet);
 		break;
 	case BATADV_UNICAST_4ADDR:
 		if (!batadv_send_skb_prepare_unicast_4addr(bat_priv, skb,
 							   orig_node,
 							   packet_subtype))
 			goto out;
+
+		hdr_size = sizeof(struct batadv_unicast_4addr_packet);
 		break;
 	default:
 		/* this function supports UNICAST and UNICAST_4ADDR only. It
@@ -279,6 +283,7 @@ static int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
 		goto out;
 	}
 
+	ethhdr = (struct ethhdr *)(skb->data + hdr_size);
 	unicast_packet = (struct batadv_unicast_packet *)skb->data;
 
 	/* inform the destination node that we are still missing a correct route
-- 
cgit v1.2.3


From 08b44656c08c8c2f73cdac2a058be2880e3361f2 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 17 Feb 2014 14:22:21 +0100
Subject: gre: add link local route when local addr is any

This bug was reported by Steinar H. Gunderson and was introduced by commit
f7cb8886335d ("sit/gre6: don't try to add the same route two times").

root@morgental:~# ip tunnel add foo mode gre remote 1.2.3.4 ttl 64
root@morgental:~# ip link set foo up mtu 1468
root@morgental:~# ip -6 route show dev foo
fe80::/64  proto kernel  metric 256

but after the above commit, no such route shows up.

There is no link local route because dev->dev_addr is 0 (because local ipv4
address is 0), hence no link local address is configured.

In this scenario, the link local address is added manually: 'ip -6 addr add
fe80::1 dev foo' and because prefix is /128, no link local route is added by the
kernel.

Even if the right things to do is to add the link local address with a /64
prefix, we need to restore the previous behavior to avoid breaking userpace.

Reported-by: Steinar H. Gunderson <sesse@samfundet.no>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ad235690684c..fdbfeca36d63 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2783,6 +2783,8 @@ static void addrconf_gre_config(struct net_device *dev)
 	ipv6_addr_set(&addr,  htonl(0xFE800000), 0, 0, 0);
 	if (!ipv6_generate_eui64(addr.s6_addr + 8, dev))
 		addrconf_add_linklocal(idev, &addr);
+	else
+		addrconf_prefix_route(&addr, 64, dev, 0, 0);
 }
 #endif
 
-- 
cgit v1.2.3


From a4b1b5877b514b276f0f31efe02388a9c2836728 Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@gmail.com>
Date: Thu, 19 Dec 2013 12:09:32 +0100
Subject: HID: Bluetooth: hidp: make sure input buffers are big enough

HID core expects the input buffers to be at least of size 4096
(HID_MAX_BUFFER_SIZE). Other sizes will result in buffer-overflows if an
input-report is smaller than advertised. We could, like i2c, compute the
biggest report-size instead of using HID_MAX_BUFFER_SIZE, but this will
blow up if report-descriptors are changed after ->start() has been called.
So lets be safe and just use the biggest buffer we have.

Note that this adds an additional copy to the HIDP input path. If there is
a way to make sure the skb-buf is big enough, we should use that instead.

The best way would be to make hid-core honor the @size argument, though,
that sounds easier than it is. So lets just fix the buffer-overflows for
now and afterwards look for a faster way for all transport drivers.

Signed-off-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 net/bluetooth/hidp/core.c | 16 ++++++++++++++--
 net/bluetooth/hidp/hidp.h |  4 ++++
 2 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 292e619db896..d9fb93451442 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -430,6 +430,16 @@ static void hidp_del_timer(struct hidp_session *session)
 		del_timer(&session->timer);
 }
 
+static void hidp_process_report(struct hidp_session *session,
+				int type, const u8 *data, int len, int intr)
+{
+	if (len > HID_MAX_BUFFER_SIZE)
+		len = HID_MAX_BUFFER_SIZE;
+
+	memcpy(session->input_buf, data, len);
+	hid_input_report(session->hid, type, session->input_buf, len, intr);
+}
+
 static void hidp_process_handshake(struct hidp_session *session,
 					unsigned char param)
 {
@@ -502,7 +512,8 @@ static int hidp_process_data(struct hidp_session *session, struct sk_buff *skb,
 			hidp_input_report(session, skb);
 
 		if (session->hid)
-			hid_input_report(session->hid, HID_INPUT_REPORT, skb->data, skb->len, 0);
+			hidp_process_report(session, HID_INPUT_REPORT,
+					    skb->data, skb->len, 0);
 		break;
 
 	case HIDP_DATA_RTYPE_OTHER:
@@ -584,7 +595,8 @@ static void hidp_recv_intr_frame(struct hidp_session *session,
 			hidp_input_report(session, skb);
 
 		if (session->hid) {
-			hid_input_report(session->hid, HID_INPUT_REPORT, skb->data, skb->len, 1);
+			hidp_process_report(session, HID_INPUT_REPORT,
+					    skb->data, skb->len, 1);
 			BT_DBG("report len %d", skb->len);
 		}
 	} else {
diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h
index ab5241400cf7..8798492a6e99 100644
--- a/net/bluetooth/hidp/hidp.h
+++ b/net/bluetooth/hidp/hidp.h
@@ -24,6 +24,7 @@
 #define __HIDP_H
 
 #include <linux/types.h>
+#include <linux/hid.h>
 #include <linux/kref.h>
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/l2cap.h>
@@ -179,6 +180,9 @@ struct hidp_session {
 
 	/* Used in hidp_output_raw_report() */
 	int output_report_success; /* boolean */
+
+	/* temporary input buffer */
+	u8 input_buf[HID_MAX_BUFFER_SIZE];
 };
 
 /* HIDP init defines */
-- 
cgit v1.2.3


From a6254864c08109c66a194612585afc0439005286 Mon Sep 17 00:00:00 2001
From: Duan Jiong <duanj.fnst@cn.fujitsu.com>
Date: Mon, 17 Feb 2014 15:23:43 +0800
Subject: ipv4: fix counter in_slow_tot

since commit 89aef8921bf("ipv4: Delete routing cache."), the counter
in_slow_tot can't work correctly.

The counter in_slow_tot increase by one when fib_lookup() return successfully
in ip_route_input_slow(), but actually the dst struct maybe not be created and
cached, so we can increase in_slow_tot after the dst struct is created.

Signed-off-by: Duan Jiong <duanj.fnst@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6f6dd85bdffc..4c011ec69ed4 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1597,6 +1597,7 @@ static int __mkroute_input(struct sk_buff *skb,
 	rth->rt_gateway	= 0;
 	rth->rt_uses_gateway = 0;
 	INIT_LIST_HEAD(&rth->rt_uncached);
+	RT_CACHE_STAT_INC(in_slow_tot);
 
 	rth->dst.input = ip_forward;
 	rth->dst.output = ip_output;
@@ -1701,8 +1702,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		goto no_route;
 	}
 
-	RT_CACHE_STAT_INC(in_slow_tot);
-
 	if (res.type == RTN_BROADCAST)
 		goto brd_input;
 
@@ -1773,6 +1772,7 @@ local_input:
 	rth->rt_gateway	= 0;
 	rth->rt_uses_gateway = 0;
 	INIT_LIST_HEAD(&rth->rt_uncached);
+	RT_CACHE_STAT_INC(in_slow_tot);
 	if (res.type == RTN_UNREACHABLE) {
 		rth->dst.input= ip_error;
 		rth->dst.error= -err;
-- 
cgit v1.2.3


From ffd5939381c609056b33b7585fb05a77b4c695f3 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Mon, 17 Feb 2014 12:11:11 +0100
Subject: net: sctp: fix sctp_connectx abi for ia32 emulation/compat mode

SCTP's sctp_connectx() abi breaks for 64bit kernels compiled with 32bit
emulation (e.g. ia32 emulation or x86_x32). Due to internal usage of
'struct sctp_getaddrs_old' which includes a struct sockaddr pointer,
sizeof(param) check will always fail in kernel as the structure in
64bit kernel space is 4bytes larger than for user binaries compiled
in 32bit mode. Thus, applications making use of sctp_connectx() won't
be able to run under such circumstances.

Introduce a compat interface in the kernel to deal with such
situations by using a 'struct compat_sctp_getaddrs_old' structure
where user data is copied into it, and then sucessively transformed
into a 'struct sctp_getaddrs_old' structure with the help of
compat_ptr(). That fixes sctp_connectx() abi without any changes
needed in user space, and lets the SCTP test suite pass when compiled
in 32bit and run on 64bit kernels.

Fixes: f9c67811ebc0 ("sctp: Fix regression introduced by new sctp_connectx api")
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 41 ++++++++++++++++++++++++++++++++---------
 1 file changed, 32 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 7075ac847fde..981aaf8b6ace 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -64,6 +64,7 @@
 #include <linux/crypto.h>
 #include <linux/slab.h>
 #include <linux/file.h>
+#include <linux/compat.h>
 
 #include <net/ip.h>
 #include <net/icmp.h>
@@ -1368,11 +1369,19 @@ static int sctp_setsockopt_connectx(struct sock *sk,
 /*
  * New (hopefully final) interface for the API.
  * We use the sctp_getaddrs_old structure so that use-space library
- * can avoid any unnecessary allocations.   The only defferent part
+ * can avoid any unnecessary allocations. The only different part
  * is that we store the actual length of the address buffer into the
- * addrs_num structure member.  That way we can re-use the existing
+ * addrs_num structure member. That way we can re-use the existing
  * code.
  */
+#ifdef CONFIG_COMPAT
+struct compat_sctp_getaddrs_old {
+	sctp_assoc_t	assoc_id;
+	s32		addr_num;
+	compat_uptr_t	addrs;		/* struct sockaddr * */
+};
+#endif
+
 static int sctp_getsockopt_connectx3(struct sock *sk, int len,
 				     char __user *optval,
 				     int __user *optlen)
@@ -1381,16 +1390,30 @@ static int sctp_getsockopt_connectx3(struct sock *sk, int len,
 	sctp_assoc_t assoc_id = 0;
 	int err = 0;
 
-	if (len < sizeof(param))
-		return -EINVAL;
+#ifdef CONFIG_COMPAT
+	if (is_compat_task()) {
+		struct compat_sctp_getaddrs_old param32;
 
-	if (copy_from_user(&param, optval, sizeof(param)))
-		return -EFAULT;
+		if (len < sizeof(param32))
+			return -EINVAL;
+		if (copy_from_user(&param32, optval, sizeof(param32)))
+			return -EFAULT;
 
-	err = __sctp_setsockopt_connectx(sk,
-			(struct sockaddr __user *)param.addrs,
-			param.addr_num, &assoc_id);
+		param.assoc_id = param32.assoc_id;
+		param.addr_num = param32.addr_num;
+		param.addrs = compat_ptr(param32.addrs);
+	} else
+#endif
+	{
+		if (len < sizeof(param))
+			return -EINVAL;
+		if (copy_from_user(&param, optval, sizeof(param)))
+			return -EFAULT;
+	}
 
+	err = __sctp_setsockopt_connectx(sk, (struct sockaddr __user *)
+					 param.addrs, param.addr_num,
+					 &assoc_id);
 	if (err == 0 || err == -EINPROGRESS) {
 		if (copy_to_user(optval, &assoc_id, sizeof(assoc_id)))
 			return -EFAULT;
-- 
cgit v1.2.3


From d7cf0c34af067555737193b6c1aa7abaa677f29c Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 18 Feb 2014 15:20:51 +0300
Subject: af_packet: remove a stray tab in packet_set_ring()

At first glance it looks like there is a missing curly brace but
actually the code works the same either way.  I have adjusted the
indenting but left the code the same.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index b5dc1168f98a..48a6a93db296 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3804,7 +3804,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 		 */
 			if (!tx_ring)
 				init_prb_bdqc(po, rb, pg_vec, req_u, tx_ring);
-				break;
+			break;
 		default:
 			break;
 		}
-- 
cgit v1.2.3