From 3082a2b7b1af1b1508c1c3fa589566064f926f40 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Tue, 16 Feb 2010 16:36:25 -0500
Subject: rfkill: Add support for KEY_RFKILL

Add support for handling KEY_RFKILL in the rfkill input module. This
simply toggles the state of all rfkill devices. The comment in rfkill.h
is also updated to reflect that RFKILL_TYPE_ALL may be used inside the
kernel.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/rfkill/input.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'net')

diff --git a/net/rfkill/input.c b/net/rfkill/input.c
index a7295ad5f9cb..3713d7ecab96 100644
--- a/net/rfkill/input.c
+++ b/net/rfkill/input.c
@@ -212,6 +212,9 @@ static void rfkill_event(struct input_handle *handle, unsigned int type,
 		case KEY_WIMAX:
 			rfkill_schedule_toggle(RFKILL_TYPE_WIMAX);
 			break;
+		case KEY_RFKILL:
+			rfkill_schedule_toggle(RFKILL_TYPE_ALL);
+			break;
 		}
 	} else if (type == EV_SW && code == SW_RFKILL_ALL)
 		rfkill_schedule_evsw_rfkillall(data);
@@ -294,6 +297,11 @@ static const struct input_device_id rfkill_ids[] = {
 		.evbit = { BIT_MASK(EV_KEY) },
 		.keybit = { [BIT_WORD(KEY_WIMAX)] = BIT_MASK(KEY_WIMAX) },
 	},
+	{
+		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT,
+		.evbit = { BIT_MASK(EV_KEY) },
+		.keybit = { [BIT_WORD(KEY_RFKILL)] = BIT_MASK(KEY_RFKILL) },
+	},
 	{
 		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_SWBIT,
 		.evbit = { BIT(EV_SW) },
-- 
cgit v1.2.3


From 9c87ba6734422034fccb938da1039ed63da1395c Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Sun, 28 Feb 2010 12:13:46 +0200
Subject: mac80211: Fix reassociation processing (within ESS roaming)

Commit e1dd33f60ced091114e4aacf141e0d03b88d3e13 changed cfg80211 to
allow association commands while in associated state to enable support
for roaming within an ESS. However, this was not enough to resolve all
cases with mac80211 which needs some additional handling of the
reassociation case to clear internal state with the BSS that was in use
previously.

This patch makes ieee80211_mgd_assoc() accept a valid reassociation
command and clean the association state with the previous BSS. This
fixes roaming between BSSes in an ESS when using wpa_supplicant with
-Dnl80211.

Signed-off-by: Jouni Malinen <j@w1.fi>
Cc: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 41812a15eea0..5a268761e4c5 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1893,8 +1893,20 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 
 	mutex_lock(&ifmgd->mtx);
 	if (ifmgd->associated) {
-		mutex_unlock(&ifmgd->mtx);
-		return -EALREADY;
+		if (!req->prev_bssid ||
+		    memcmp(req->prev_bssid, ifmgd->associated->bssid,
+			   ETH_ALEN)) {
+			/*
+			 * We are already associated and the request was not a
+			 * reassociation request from the current BSS, so
+			 * reject it.
+			 */
+			mutex_unlock(&ifmgd->mtx);
+			return -EALREADY;
+		}
+
+		/* Trying to reassociate - clear previous association state */
+		ieee80211_set_disassoc(sdata);
 	}
 	mutex_unlock(&ifmgd->mtx);
 
-- 
cgit v1.2.3


From 87c1e12b5eeb7b30b4b41291bef8e0b41fc3dde9 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 2 Mar 2010 02:51:56 +0000
Subject: ipsec: Fix bogus bundle flowi

When I merged the bundle creation code, I introduced a bogus
flowi value in the bundle.  Instead of getting from the caller,
it was instead set to the flow in the route object, which is
totally different.

The end result is that the bundles we created never match, and
we instead end up with an ever growing bundle list.

Thanks to Jamal for find this problem.

Reported-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h      | 3 ++-
 net/ipv4/xfrm4_policy.c | 5 +++--
 net/ipv6/xfrm6_policy.c | 3 ++-
 net/xfrm/xfrm_policy.c  | 7 ++++---
 4 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index a7df3275b860..d74e080ba6c9 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -275,7 +275,8 @@ struct xfrm_policy_afinfo {
 					     struct dst_entry *dst,
 					     int nfheader_len);
 	int			(*fill_dst)(struct xfrm_dst *xdst,
-					    struct net_device *dev);
+					    struct net_device *dev,
+					    struct flowi *fl);
 };
 
 extern int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 67107d63c1cd..e4a1483fba77 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -91,11 +91,12 @@ static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst,
 	return 0;
 }
 
-static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
+static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
+			  struct flowi *fl)
 {
 	struct rtable *rt = (struct rtable *)xdst->route;
 
-	xdst->u.rt.fl = rt->fl;
+	xdst->u.rt.fl = *fl;
 
 	xdst->u.dst.dev = dev;
 	dev_hold(dev);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index dbdc696f5fc5..ae181651c75a 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -116,7 +116,8 @@ static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
 	return 0;
 }
 
-static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
+static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
+			  struct flowi *fl)
 {
 	struct rt6_info *rt = (struct rt6_info*)xdst->route;
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 34a5ef8316e7..843e066649cb 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1372,7 +1372,8 @@ static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
 	return err;
 }
 
-static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
+static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
+				struct flowi *fl)
 {
 	struct xfrm_policy_afinfo *afinfo =
 		xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
@@ -1381,7 +1382,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
 	if (!afinfo)
 		return -EINVAL;
 
-	err = afinfo->fill_dst(xdst, dev);
+	err = afinfo->fill_dst(xdst, dev, fl);
 
 	xfrm_policy_put_afinfo(afinfo);
 
@@ -1486,7 +1487,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
 	for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
 		struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
 
-		err = xfrm_fill_dst(xdst, dev);
+		err = xfrm_fill_dst(xdst, dev, fl);
 		if (err)
 			goto free_dst;
 
-- 
cgit v1.2.3


From 1162563f82b434e3099c9e6c1bbdba846d792f0d Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Tue, 2 Mar 2010 20:40:01 +0000
Subject: af_packet: move strict addr_len check right before
 dev_[mc/unicast]_[add/del]

My previous patch 914c8ad2d18b62ad1420f518c0cab0b0b90ab308 incorrectly changed
the length check in packet_mc_add to be more strict. The problem is that
userspace is not filling this field (and it stays zeroed) in case of setting
PACKET_MR_PROMISC or PACKET_MR_ALLMULTI. So move the strict check to the point
in path where the addr_len must be set correctly.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Reported-by: Pavel Roskin <proski@gnu.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 031a5e6fb4aa..1612d417d10c 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1688,6 +1688,8 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
 {
 	switch (i->type) {
 	case PACKET_MR_MULTICAST:
+		if (i->alen != dev->addr_len)
+			return -EINVAL;
 		if (what > 0)
 			return dev_mc_add(dev, i->addr, i->alen, 0);
 		else
@@ -1700,6 +1702,8 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
 		return dev_set_allmulti(dev, what);
 		break;
 	case PACKET_MR_UNICAST:
+		if (i->alen != dev->addr_len)
+			return -EINVAL;
 		if (what > 0)
 			return dev_unicast_add(dev, i->addr);
 		else
@@ -1734,7 +1738,7 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
 		goto done;
 
 	err = -EINVAL;
-	if (mreq->mr_alen != dev->addr_len)
+	if (mreq->mr_alen > dev->addr_len)
 		goto done;
 
 	err = -ENOBUFS;
-- 
cgit v1.2.3


From d4612cb86ed8db8956b6b19435f8a30de6c67ffe Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Tue, 2 Mar 2010 15:48:23 +0000
Subject: Bluetooth: Use single_open() for inquiry cache within debugfs

The inquiry cache information in debugfs should be using seq_file support
and not allocating memory on the stack for the string. Since the usage of
these information is really seldom, using single_open() for it is good
enough.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/hci_sysfs.c | 41 ++++++++++++++++++++---------------------
 1 file changed, 20 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 1a79a6c7e30e..cafb55b0cea5 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -3,6 +3,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/debugfs.h>
+#include <linux/seq_file.h>
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
@@ -405,20 +406,11 @@ static struct device_type bt_host = {
 	.release = bt_host_release,
 };
 
-static int inquiry_cache_open(struct inode *inode, struct file *file)
-{
-	file->private_data = inode->i_private;
-	return 0;
-}
-
-static ssize_t inquiry_cache_read(struct file *file, char __user *userbuf,
-						size_t count, loff_t *ppos)
+static int inquiry_cache_show(struct seq_file *f, void *p)
 {
-	struct hci_dev *hdev = file->private_data;
+	struct hci_dev *hdev = f->private;
 	struct inquiry_cache *cache = &hdev->inq_cache;
 	struct inquiry_entry *e;
-	char buf[4096];
-	int n = 0;
 
 	hci_dev_lock_bh(hdev);
 
@@ -426,23 +418,30 @@ static ssize_t inquiry_cache_read(struct file *file, char __user *userbuf,
 		struct inquiry_data *data = &e->data;
 		bdaddr_t bdaddr;
 		baswap(&bdaddr, &data->bdaddr);
-		n += sprintf(buf + n, "%s %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %d %u\n",
-				batostr(&bdaddr),
-				data->pscan_rep_mode, data->pscan_period_mode,
-				data->pscan_mode, data->dev_class[2],
-				data->dev_class[1], data->dev_class[0],
-				__le16_to_cpu(data->clock_offset),
-				data->rssi, data->ssp_mode, e->timestamp);
+		seq_printf(f, "%s %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %d %u\n",
+			   batostr(&bdaddr),
+			   data->pscan_rep_mode, data->pscan_period_mode,
+			   data->pscan_mode, data->dev_class[2],
+			   data->dev_class[1], data->dev_class[0],
+			   __le16_to_cpu(data->clock_offset),
+			   data->rssi, data->ssp_mode, e->timestamp);
 	}
 
 	hci_dev_unlock_bh(hdev);
 
-	return simple_read_from_buffer(userbuf, count, ppos, buf, n);
+	return 0;
+}
+
+static int inquiry_cache_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, inquiry_cache_show, inode->i_private);
 }
 
 static const struct file_operations inquiry_cache_fops = {
-	.open = inquiry_cache_open,
-	.read = inquiry_cache_read,
+	.open		= inquiry_cache_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
 };
 
 int hci_register_sysfs(struct hci_dev *hdev)
-- 
cgit v1.2.3


From 1cd4efddc4512ccbd9fe317f688f361605ca0c88 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 3 Mar 2010 01:23:22 -0800
Subject: bridge: depends on INET

br_multicast calls ip_send_check(), so it should depend on INET.

built-in:
br_multicast.c:(.text+0x88cf4): undefined reference to `ip_send_check'

or modular:
ERROR: "ip_send_check" [net/bridge/bridge.ko] undefined!

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig
index 19a6b9629c51..d115d5cea5b6 100644
--- a/net/bridge/Kconfig
+++ b/net/bridge/Kconfig
@@ -35,6 +35,7 @@ config BRIDGE
 config BRIDGE_IGMP_SNOOPING
 	bool "IGMP snooping"
 	depends on BRIDGE
+	depends on INET
 	default y
 	---help---
 	  If you say Y here, then the Ethernet bridge will be able selectively
-- 
cgit v1.2.3


From 4fa004373133ece3d9b1c0a7e243b0e53760b165 Mon Sep 17 00:00:00 2001
From: Sujith <Sujith.Manoharan@atheros.com>
Date: Mon, 1 Mar 2010 14:42:57 +0530
Subject: mac80211: Fix HT rate control configuration

Handling HT configuration changes involved setting the channel
with the new HT parameters and then issuing a rate_update()
notification to the driver.

This behavior changed after the off-channel changes. Now, the channel
is not updated with the new HT params in enable_ht() - instead, it
is now done when the scan work terminates. This results in the driver
depending on stale information, defaulting to non-HT mode always.

Fix this by passing the new channel type to the driver.

Cc: stable@kernel.org
Signed-off-by: Sujith <Sujith.Manoharan@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ath9k/rc.c | 6 +++---
 include/net/mac80211.h              | 3 ++-
 net/mac80211/mlme.c                 | 3 ++-
 net/mac80211/rate.h                 | 5 +++--
 4 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/ath/ath9k/rc.c b/drivers/net/wireless/ath/ath9k/rc.c
index ac34a055c713..0e79e58cf4c9 100644
--- a/drivers/net/wireless/ath/ath9k/rc.c
+++ b/drivers/net/wireless/ath/ath9k/rc.c
@@ -1323,7 +1323,7 @@ static void ath_rate_init(void *priv, struct ieee80211_supported_band *sband,
 
 static void ath_rate_update(void *priv, struct ieee80211_supported_band *sband,
 			    struct ieee80211_sta *sta, void *priv_sta,
-			    u32 changed)
+			    u32 changed, enum nl80211_channel_type oper_chan_type)
 {
 	struct ath_softc *sc = priv;
 	struct ath_rate_priv *ath_rc_priv = priv_sta;
@@ -1340,8 +1340,8 @@ static void ath_rate_update(void *priv, struct ieee80211_supported_band *sband,
 		if (sc->sc_ah->opmode != NL80211_IFTYPE_STATION)
 			return;
 
-		if (sc->hw->conf.channel_type == NL80211_CHAN_HT40MINUS ||
-		    sc->hw->conf.channel_type == NL80211_CHAN_HT40PLUS)
+		if (oper_chan_type == NL80211_CHAN_HT40MINUS ||
+		    oper_chan_type == NL80211_CHAN_HT40PLUS)
 			oper_cw40 = true;
 
 		oper_sgi40 = (sta->ht_cap.cap & IEEE80211_HT_CAP_SGI_40) ?
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 80eb7cc42ce9..45d7d44d7cbe 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2426,7 +2426,8 @@ struct rate_control_ops {
 			  struct ieee80211_sta *sta, void *priv_sta);
 	void (*rate_update)(void *priv, struct ieee80211_supported_band *sband,
 			    struct ieee80211_sta *sta,
-			    void *priv_sta, u32 changed);
+			    void *priv_sta, u32 changed,
+			    enum nl80211_channel_type oper_chan_type);
 	void (*free_sta)(void *priv, struct ieee80211_sta *sta,
 			 void *priv_sta);
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 5a268761e4c5..0ab284c32135 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -177,7 +177,8 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
 		sta = sta_info_get(sdata, bssid);
 		if (sta)
 			rate_control_rate_update(local, sband, sta,
-						 IEEE80211_RC_HT_CHANGED);
+						 IEEE80211_RC_HT_CHANGED,
+						 local->oper_channel_type);
 		rcu_read_unlock();
         }
 
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index b6108bca96d4..065a96190e32 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -66,7 +66,8 @@ static inline void rate_control_rate_init(struct sta_info *sta)
 
 static inline void rate_control_rate_update(struct ieee80211_local *local,
 				    struct ieee80211_supported_band *sband,
-				    struct sta_info *sta, u32 changed)
+				    struct sta_info *sta, u32 changed,
+				    enum nl80211_channel_type oper_chan_type)
 {
 	struct rate_control_ref *ref = local->rate_ctrl;
 	struct ieee80211_sta *ista = &sta->sta;
@@ -74,7 +75,7 @@ static inline void rate_control_rate_update(struct ieee80211_local *local,
 
 	if (ref && ref->ops->rate_update)
 		ref->ops->rate_update(ref->priv, sband, ista,
-				      priv_sta, changed);
+				      priv_sta, changed, oper_chan_type);
 }
 
 static inline void *rate_control_alloc_sta(struct rate_control_ref *ref,
-- 
cgit v1.2.3


From 122e4519cd5c224d4b8e681d368132b643e28f60 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Tue, 2 Mar 2010 13:32:44 +0000
Subject: IPv6: addrconf dad timer unnecessary bh_disable

Timer code runs in bottom half, so there is no need for
using _bh form of locking.  Also check if device is not ready
to avoid race with address that is no longer active.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 88fd8c5877ee..e6cba9c45c6c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2850,9 +2850,9 @@ static void addrconf_dad_timer(unsigned long data)
 	struct inet6_dev *idev = ifp->idev;
 	struct in6_addr mcaddr;
 
-	read_lock_bh(&idev->lock);
-	if (idev->dead) {
-		read_unlock_bh(&idev->lock);
+	read_lock(&idev->lock);
+	if (idev->dead || !(idev->if_flags & IF_READY)) {
+		read_unlock(&idev->lock);
 		goto out;
 	}
 
@@ -2864,7 +2864,7 @@ static void addrconf_dad_timer(unsigned long data)
 
 		ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
 		spin_unlock(&ifp->lock);
-		read_unlock_bh(&idev->lock);
+		read_unlock(&idev->lock);
 
 		addrconf_dad_completed(ifp);
 
@@ -2874,7 +2874,7 @@ static void addrconf_dad_timer(unsigned long data)
 	ifp->probes--;
 	addrconf_mod_timer(ifp, AC_DAD, ifp->idev->nd_parms->retrans_time);
 	spin_unlock(&ifp->lock);
-	read_unlock_bh(&idev->lock);
+	read_unlock(&idev->lock);
 
 	/* send a neighbour solicitation for our addr */
 	addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
-- 
cgit v1.2.3


From 5b2a19539c5f59c5a038d213ede723f0245d97cf Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Tue, 2 Mar 2010 13:32:45 +0000
Subject: IPv6: addrconf timer race

The Router Solicitation timer races with device state changes
because it doesn't lock the device. Use local variable to avoid
one repeated dereference.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e6cba9c45c6c..5f582f385abb 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2739,28 +2739,29 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 static void addrconf_rs_timer(unsigned long data)
 {
 	struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
+	struct inet6_dev *idev = ifp->idev;
 
-	if (ifp->idev->cnf.forwarding)
+	read_lock(&idev->lock);
+	if (idev->dead || !(idev->if_flags & IF_READY))
 		goto out;
 
-	if (ifp->idev->if_flags & IF_RA_RCVD) {
-		/*
-		 *	Announcement received after solicitation
-		 *	was sent
-		 */
+	if (idev->cnf.forwarding)
+		goto out;
+
+	/* Announcement received after solicitation was sent */
+	if (idev->if_flags & IF_RA_RCVD)
 		goto out;
-	}
 
 	spin_lock(&ifp->lock);
-	if (ifp->probes++ < ifp->idev->cnf.rtr_solicits) {
+	if (ifp->probes++ < idev->cnf.rtr_solicits) {
 		/* The wait after the last probe can be shorter */
 		addrconf_mod_timer(ifp, AC_RS,
-				   (ifp->probes == ifp->idev->cnf.rtr_solicits) ?
-				   ifp->idev->cnf.rtr_solicit_delay :
-				   ifp->idev->cnf.rtr_solicit_interval);
+				   (ifp->probes == idev->cnf.rtr_solicits) ?
+				   idev->cnf.rtr_solicit_delay :
+				   idev->cnf.rtr_solicit_interval);
 		spin_unlock(&ifp->lock);
 
-		ndisc_send_rs(ifp->idev->dev, &ifp->addr, &in6addr_linklocal_allrouters);
+		ndisc_send_rs(idev->dev, &ifp->addr, &in6addr_linklocal_allrouters);
 	} else {
 		spin_unlock(&ifp->lock);
 		/*
@@ -2768,10 +2769,11 @@ static void addrconf_rs_timer(unsigned long data)
 		 * assumption any longer.
 		 */
 		printk(KERN_DEBUG "%s: no IPv6 routers present\n",
-		       ifp->idev->dev->name);
+		       idev->dev->name);
 	}
 
 out:
+	read_unlock(&idev->lock);
 	in6_ifa_put(ifp);
 }
 
-- 
cgit v1.2.3


From 84e8b803f1e16f3a2b8b80f80a63fa2f2f8a9be6 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Tue, 2 Mar 2010 13:32:46 +0000
Subject: IPv6: addrconf notify when address is unavailable

My recent change in net-next to retain permanent addresses caused regression.
Device refcount would not go to zero when device was unregistered because
left over anycast reference would hold ipv6 dev reference which would hold
device references...

The correct procedure is to call notify chain when address is no longer
available for use.  When interface comes back DAD timer will notify
back that address is available.

Also, link local addresses should be purged when interface is brought
down. The address might be changed.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 46 +++++++++++++++++++++++++++++-----------------
 1 file changed, 29 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 5f582f385abb..7a4bf7671285 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2649,11 +2649,11 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		write_lock_bh(&addrconf_hash_lock);
 		while ((ifa = *bifa) != NULL) {
 			if (ifa->idev == idev &&
-			    (how || !(ifa->flags&IFA_F_PERMANENT))) {
+			    (how || !(ifa->flags&IFA_F_PERMANENT) ||
+			     ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) {
 				*bifa = ifa->lst_next;
 				ifa->lst_next = NULL;
-				addrconf_del_timer(ifa);
-				in6_ifa_put(ifa);
+				__in6_ifa_put(ifa);
 				continue;
 			}
 			bifa = &ifa->lst_next;
@@ -2691,28 +2691,40 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 #endif
 	bifa = &idev->addr_list;
 	while ((ifa = *bifa) != NULL) {
-		if (how == 0 && (ifa->flags&IFA_F_PERMANENT)) {
-			/* Retain permanent address on admin down */
+		addrconf_del_timer(ifa);
+
+		/* If just doing link down, and address is permanent
+		   and not link-local, then retain it. */
+		if (how == 0 &&
+		    (ifa->flags&IFA_F_PERMANENT) &&
+		    !(ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) {
 			bifa = &ifa->if_next;
 
-			/* Restart DAD if needed when link comes back up */
-			if ( !((dev->flags&(IFF_NOARP|IFF_LOOPBACK)) ||
-			       idev->cnf.accept_dad <= 0 ||
-			       (ifa->flags & IFA_F_NODAD)))
-				ifa->flags |= IFA_F_TENTATIVE;
+			/* If not doing DAD on this address, just keep it. */
+			if ((dev->flags&(IFF_NOARP|IFF_LOOPBACK)) ||
+			    idev->cnf.accept_dad <= 0 ||
+			    (ifa->flags & IFA_F_NODAD))
+				continue;
+
+			/* If it was tentative already, no need to notify */
+			if (ifa->flags & IFA_F_TENTATIVE)
+				continue;
+
+			/* Flag it for later restoration when link comes up */
+			ifa->flags |= IFA_F_TENTATIVE;
+			in6_ifa_hold(ifa);
 		} else {
 			*bifa = ifa->if_next;
 			ifa->if_next = NULL;
-
 			ifa->dead = 1;
-			write_unlock_bh(&idev->lock);
+		}
+		write_unlock_bh(&idev->lock);
 
-			__ipv6_ifa_notify(RTM_DELADDR, ifa);
-			atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa);
-			in6_ifa_put(ifa);
+		__ipv6_ifa_notify(RTM_DELADDR, ifa);
+		atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa);
+		in6_ifa_put(ifa);
 
-			write_lock_bh(&idev->lock);
-		}
+		write_lock_bh(&idev->lock);
 	}
 	write_unlock_bh(&idev->lock);
 
-- 
cgit v1.2.3


From 8f37ada5b5f6bfb4d251a7f510f249cb855b77b3 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Wed, 3 Mar 2010 08:19:59 +0000
Subject: IPv6: fix race between cleanup and add/delete address

This solves a potential race problem during the cleanup process.
The issue is that addrconf_ifdown() needs to traverse address list,
but then drop lock to call the notifier. The version in -next
could get confused if add/delete happened during this window.
Original code (2.6.32 and earlier) was okay because all addresses
were always deleted.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 7a4bf7671285..6cf3ee14ace3 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2615,7 +2615,7 @@ static void addrconf_bonding_change(struct net_device *dev, unsigned long event)
 static int addrconf_ifdown(struct net_device *dev, int how)
 {
 	struct inet6_dev *idev;
-	struct inet6_ifaddr *ifa, **bifa;
+	struct inet6_ifaddr *ifa, *keep_list, **bifa;
 	struct net *net = dev_net(dev);
 	int i;
 
@@ -2689,8 +2689,12 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		write_lock_bh(&idev->lock);
 	}
 #endif
-	bifa = &idev->addr_list;
-	while ((ifa = *bifa) != NULL) {
+	keep_list = NULL;
+	bifa = &keep_list;
+	while ((ifa = idev->addr_list) != NULL) {
+		idev->addr_list = ifa->if_next;
+		ifa->if_next = NULL;
+
 		addrconf_del_timer(ifa);
 
 		/* If just doing link down, and address is permanent
@@ -2698,6 +2702,9 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		if (how == 0 &&
 		    (ifa->flags&IFA_F_PERMANENT) &&
 		    !(ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) {
+
+			/* Move to holding list */
+			*bifa = ifa;
 			bifa = &ifa->if_next;
 
 			/* If not doing DAD on this address, just keep it. */
@@ -2714,8 +2721,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 			ifa->flags |= IFA_F_TENTATIVE;
 			in6_ifa_hold(ifa);
 		} else {
-			*bifa = ifa->if_next;
-			ifa->if_next = NULL;
 			ifa->dead = 1;
 		}
 		write_unlock_bh(&idev->lock);
@@ -2726,6 +2731,9 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 
 		write_lock_bh(&idev->lock);
 	}
+
+	idev->addr_list = keep_list;
+
 	write_unlock_bh(&idev->lock);
 
 	/* Step 5: Discard multicast list */
-- 
cgit v1.2.3


From 6d55cb91a0020ac0d78edcad61efd6c8cf5785a3 Mon Sep 17 00:00:00 2001
From: Timo Teräs <timo.teras@iki.fi>
Date: Wed, 3 Mar 2010 04:01:13 +0000
Subject: gre: fix hard header destination address checking

ipgre_header() can be called with zero daddr when the gre device is
configured as multipoint tunnel and still has the NOARP flag set (which is
typically cleared by the userspace arp daemon).  If the NOARP packets are
not dropped, ipgre_tunnel_xmit() will take rt->rt_gateway (= NBMA IP) and
use that for route look up (and may lead to bogus xfrm acquires).

The multicast address check is removed as sending to multicast group should
be ok.  In fact, if gre device has a multicast address as destination
ipgre_header is always called with multicast address.

Signed-off-by: Timo Teras <timo.teras@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index c0c5274d0271..f47c9f76754b 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1144,12 +1144,9 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
 
 	if (saddr)
 		memcpy(&iph->saddr, saddr, 4);
-
-	if (daddr) {
+	if (daddr)
 		memcpy(&iph->daddr, daddr, 4);
-		return t->hlen;
-	}
-	if (iph->daddr && !ipv4_is_multicast(iph->daddr))
+	if (iph->daddr)
 		return t->hlen;
 
 	return -t->hlen;
-- 
cgit v1.2.3


From d0021b252eaf65ca07ed14f0d66425dd9ccab9a6 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Wed, 3 Mar 2010 08:31:23 +0000
Subject: tipc: Fix oops on send prior to entering networked mode (v3)

Fix TIPC to disallow sending to remote addresses prior to entering NET_MODE

user programs can oops the kernel by sending datagrams via AF_TIPC prior to
entering networked mode.  The following backtrace has been observed:

ID: 13459  TASK: ffff810014640040  CPU: 0   COMMAND: "tipc-client"
[exception RIP: tipc_node_select_next_hop+90]
RIP: ffffffff8869d3c3  RSP: ffff81002d9a5ab8  RFLAGS: 00010202
RAX: 0000000000000001  RBX: 0000000000000001  RCX: 0000000000000001
RDX: 0000000000000000  RSI: 0000000000000001  RDI: 0000000001001001
RBP: 0000000001001001   R8: 0074736575716552   R9: 0000000000000000
R10: ffff81003fbd0680  R11: 00000000000000c8  R12: 0000000000000008
R13: 0000000000000001  R14: 0000000000000001  R15: ffff810015c6ca00
ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
RIP: 0000003cbd8d49a3  RSP: 00007fffc84e0be8  RFLAGS: 00010206
RAX: 000000000000002c  RBX: ffffffff8005d116  RCX: 0000000000000000
RDX: 0000000000000008  RSI: 00007fffc84e0c00  RDI: 0000000000000003
RBP: 0000000000000000   R8: 00007fffc84e0c10   R9: 0000000000000010
R10: 0000000000000000  R11: 0000000000000246  R12: 0000000000000000
R13: 00007fffc84e0d10  R14: 0000000000000000  R15: 00007fffc84e0c30
ORIG_RAX: 000000000000002c  CS: 0033  SS: 002b

What happens is that, when the tipc module in inserted it enters a standalone
node mode in which communication to its own address is allowed <0.0.0> but not
to other addresses, since the appropriate data structures have not been
allocated yet (specifically the tipc_net pointer).  There is nothing stopping a
client from trying to send such a message however, and if that happens, we
attempt to dereference tipc_net.zones while the pointer is still NULL, and
explode.  The fix is pretty straightforward.  Since these oopses all arise from
the dereference of global pointers prior to their assignment to allocated
values, and since these allocations are small (about 2k total), lets convert
these pointers to static arrays of the appropriate size.  All the accesses to
these bits consider 0/NULL to be a non match when searching, so all the lookups
still work properly, and there is no longer a chance of a bad dererence
anywhere.  As a bonus, this lets us eliminate the setup/teardown routines for
those pointers, and elimnates the need to preform any locking around them to
prevent access while their being allocated/freed.

I've updated the tipc_net structure to behave this way to fix the exact reported
problem, and also fixed up the tipc_bearers and media_list arrays to fix an
obvious simmilar problem that arises from issuing tipc-config commands to
manipulate bearers/links prior to entering networked mode

I've tested this for a few hours by running the sanity tests and stress test
with the tipcutils suite, and nothing has fallen over.  There have been a few
lockdep warnings, but those were there before, and can be addressed later, as
they didn't actually result in any deadlock.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: Allan Stephens <allan.stephens@windriver.com>
CC: David S. Miller <davem@davemloft.net>
CC: tipc-discussion@lists.sourceforge.net

 bearer.c |   37 ++++++-------------------------------
 bearer.h |    2 +-
 net.c    |   25 ++++---------------------
 3 files changed, 11 insertions(+), 53 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bearer.c | 37 ++++++-------------------------------
 net/tipc/bearer.h |  2 +-
 net/tipc/net.c    | 25 ++++---------------------
 3 files changed, 11 insertions(+), 53 deletions(-)

(limited to 'net')

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 327011fcc407..78091375ca12 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -45,10 +45,10 @@
 
 #define MAX_ADDR_STR 32
 
-static struct media *media_list = NULL;
+static struct media media_list[MAX_MEDIA];
 static u32 media_count = 0;
 
-struct bearer *tipc_bearers = NULL;
+struct bearer tipc_bearers[MAX_BEARERS];
 
 /**
  * media_name_valid - validate media name
@@ -108,9 +108,11 @@ int  tipc_register_media(u32 media_type,
 	int res = -EINVAL;
 
 	write_lock_bh(&tipc_net_lock);
-	if (!media_list)
-		goto exit;
 
+	if (tipc_mode != TIPC_NET_MODE) {
+		warn("Media <%s> rejected, not in networked mode yet\n", name);
+		goto exit;
+	}
 	if (!media_name_valid(name)) {
 		warn("Media <%s> rejected, illegal name\n", name);
 		goto exit;
@@ -660,33 +662,10 @@ int tipc_disable_bearer(const char *name)
 
 
-int tipc_bearer_init(void)
-{
-	int res;
-
-	write_lock_bh(&tipc_net_lock);
-	tipc_bearers = kcalloc(MAX_BEARERS, sizeof(struct bearer), GFP_ATOMIC);
-	media_list = kcalloc(MAX_MEDIA, sizeof(struct media), GFP_ATOMIC);
-	if (tipc_bearers && media_list) {
-		res = 0;
-	} else {
-		kfree(tipc_bearers);
-		kfree(media_list);
-		tipc_bearers = NULL;
-		media_list = NULL;
-		res = -ENOMEM;
-	}
-	write_unlock_bh(&tipc_net_lock);
-	return res;
-}
-
 void tipc_bearer_stop(void)
 {
 	u32 i;
 
-	if (!tipc_bearers)
-		return;
-
 	for (i = 0; i < MAX_BEARERS; i++) {
 		if (tipc_bearers[i].active)
 			tipc_bearers[i].publ.blocked = 1;
@@ -695,10 +674,6 @@ void tipc_bearer_stop(void)
 		if (tipc_bearers[i].active)
 			bearer_disable(tipc_bearers[i].publ.name);
 	}
-	kfree(tipc_bearers);
-	kfree(media_list);
-	tipc_bearers = NULL;
-	media_list = NULL;
 	media_count = 0;
 }
 
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index ca5734892713..000228e93f9e 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -114,7 +114,7 @@ struct bearer_name {
 
 struct link;
 
-extern struct bearer *tipc_bearers;
+extern struct bearer tipc_bearers[];
 
 void tipc_media_addr_printf(struct print_buf *pb, struct tipc_media_addr *a);
 struct sk_buff *tipc_media_get_names(void);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 7906608bf510..f25b1cdb64eb 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -116,7 +116,8 @@
 */
 
 DEFINE_RWLOCK(tipc_net_lock);
-struct network tipc_net = { NULL };
+struct _zone *tipc_zones[256] = { NULL, };
+struct network tipc_net = { tipc_zones };
 
 struct tipc_node *tipc_net_select_remote_node(u32 addr, u32 ref)
 {
@@ -158,28 +159,12 @@ void tipc_net_send_external_routes(u32 dest)
 	}
 }
 
-static int net_init(void)
-{
-	memset(&tipc_net, 0, sizeof(tipc_net));
-	tipc_net.zones = kcalloc(tipc_max_zones + 1, sizeof(struct _zone *), GFP_ATOMIC);
-	if (!tipc_net.zones) {
-		return -ENOMEM;
-	}
-	return 0;
-}
-
 static void net_stop(void)
 {
 	u32 z_num;
 
-	if (!tipc_net.zones)
-		return;
-
-	for (z_num = 1; z_num <= tipc_max_zones; z_num++) {
+	for (z_num = 1; z_num <= tipc_max_zones; z_num++)
 		tipc_zone_delete(tipc_net.zones[z_num]);
-	}
-	kfree(tipc_net.zones);
-	tipc_net.zones = NULL;
 }
 
 static void net_route_named_msg(struct sk_buff *buf)
@@ -282,9 +267,7 @@ int tipc_net_start(u32 addr)
 	tipc_named_reinit();
 	tipc_port_reinit();
 
-	if ((res = tipc_bearer_init()) ||
-	    (res = net_init()) ||
-	    (res = tipc_cltr_init()) ||
+	if ((res = tipc_cltr_init()) ||
 	    (res = tipc_bclink_init())) {
 		return res;
 	}
-- 
cgit v1.2.3


From 8eae939f1400326b06d0c9afe53d2a484a326871 Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu@intel.com>
Date: Thu, 4 Mar 2010 18:01:40 +0000
Subject: net: add limit for socket backlog

We got system OOM while running some UDP netperf testing on the loopback
device. The case is multiple senders sent stream UDP packets to a single
receiver via loopback on local host. Of course, the receiver is not able
to handle all the packets in time. But we surprisingly found that these
packets were not discarded due to the receiver's sk->sk_rcvbuf limit.
Instead, they are kept queuing to sk->sk_backlog and finally ate up all
the memory. We believe this is a secure hole that a none privileged user
can crash the system.

The root cause for this problem is, when the receiver is doing
__release_sock() (i.e. after userspace recv, kernel udp_recvmsg ->
skb_free_datagram_locked -> release_sock), it moves skbs from backlog to
sk_receive_queue with the softirq enabled. In the above case, multiple
busy senders will almost make it an endless loop. The skbs in the
backlog end up eat all the system memory.

The issue is not only for UDP. Any protocols using socket backlog is
potentially affected. The patch adds limit for socket backlog so that
the backlog size cannot be expanded endlessly.

Reported-by: Alex Shi <alex.shi@intel.com>
Cc: David Miller <davem@davemloft.net>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru
Cc: "Pekka Savola (ipv6)" <pekkas@netcore.fi>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Vlad Yasevich <vladislav.yasevich@hp.com>
Cc: Sridhar Samudrala <sri@us.ibm.com>
Cc: Jon Maloy <jon.maloy@ericsson.com>
Cc: Allan Stephens <allan.stephens@windriver.com>
Cc: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 15 ++++++++++++++-
 net/core/sock.c    | 16 ++++++++++++++--
 2 files changed, 28 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/net/sock.h b/include/net/sock.h
index 6cb1676e409a..2516d76f043c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -253,6 +253,8 @@ struct sock {
 	struct {
 		struct sk_buff *head;
 		struct sk_buff *tail;
+		int len;
+		int limit;
 	} sk_backlog;
 	wait_queue_head_t	*sk_sleep;
 	struct dst_entry	*sk_dst_cache;
@@ -589,7 +591,7 @@ static inline int sk_stream_memory_free(struct sock *sk)
 	return sk->sk_wmem_queued < sk->sk_sndbuf;
 }
 
-/* The per-socket spinlock must be held here. */
+/* OOB backlog add */
 static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb)
 {
 	if (!sk->sk_backlog.tail) {
@@ -601,6 +603,17 @@ static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb)
 	skb->next = NULL;
 }
 
+/* The per-socket spinlock must be held here. */
+static inline int sk_add_backlog_limited(struct sock *sk, struct sk_buff *skb)
+{
+	if (sk->sk_backlog.len >= max(sk->sk_backlog.limit, sk->sk_rcvbuf << 1))
+		return -ENOBUFS;
+
+	sk_add_backlog(sk, skb);
+	sk->sk_backlog.len += skb->truesize;
+	return 0;
+}
+
 static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 {
 	return sk->sk_backlog_rcv(sk, skb);
diff --git a/net/core/sock.c b/net/core/sock.c
index fcd397a762ff..6e22dc973d23 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -340,8 +340,12 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
 		rc = sk_backlog_rcv(sk, skb);
 
 		mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
-	} else
-		sk_add_backlog(sk, skb);
+	} else if (sk_add_backlog_limited(sk, skb)) {
+		bh_unlock_sock(sk);
+		atomic_inc(&sk->sk_drops);
+		goto discard_and_relse;
+	}
+
 	bh_unlock_sock(sk);
 out:
 	sock_put(sk);
@@ -1139,6 +1143,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 		sock_lock_init(newsk);
 		bh_lock_sock(newsk);
 		newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
+		newsk->sk_backlog.len = 0;
 
 		atomic_set(&newsk->sk_rmem_alloc, 0);
 		/*
@@ -1542,6 +1547,12 @@ static void __release_sock(struct sock *sk)
 
 		bh_lock_sock(sk);
 	} while ((skb = sk->sk_backlog.head) != NULL);
+
+	/*
+	 * Doing the zeroing here guarantee we can not loop forever
+	 * while a wild producer attempts to flood us.
+	 */
+	sk->sk_backlog.len = 0;
 }
 
 /**
@@ -1874,6 +1885,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk->sk_allocation	=	GFP_KERNEL;
 	sk->sk_rcvbuf		=	sysctl_rmem_default;
 	sk->sk_sndbuf		=	sysctl_wmem_default;
+	sk->sk_backlog.limit	=	sk->sk_rcvbuf << 1;
 	sk->sk_state		=	TCP_CLOSE;
 	sk_set_socket(sk, sock);
 
-- 
cgit v1.2.3


From 6b03a53a5ab7ccf2d5d69f96cf1c739c4d2a8fb9 Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu@intel.com>
Date: Thu, 4 Mar 2010 18:01:41 +0000
Subject: tcp: use limited socket backlog

Make tcp adapt to the limited socket backlog change.

Cc: "David S. Miller" <davem@davemloft.net>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: "Pekka Savola (ipv6)" <pekkas@netcore.fi>
Cc: Patrick McHardy <kaber@trash.net>
Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 6 ++++--
 net/ipv6/tcp_ipv6.c | 6 ++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c3588b4fd979..4baf1943b1bd 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1682,8 +1682,10 @@ process:
 			if (!tcp_prequeue(sk, skb))
 				ret = tcp_v4_do_rcv(sk, skb);
 		}
-	} else
-		sk_add_backlog(sk, skb);
+	} else if (sk_add_backlog_limited(sk, skb)) {
+		bh_unlock_sock(sk);
+		goto discard_and_relse;
+	}
 	bh_unlock_sock(sk);
 
 	sock_put(sk);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 6963a6b6763e..c4ea9d5cbfaa 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1740,8 +1740,10 @@ process:
 			if (!tcp_prequeue(sk, skb))
 				ret = tcp_v6_do_rcv(sk, skb);
 		}
-	} else
-		sk_add_backlog(sk, skb);
+	} else if (sk_add_backlog_limited(sk, skb)) {
+		bh_unlock_sock(sk);
+		goto discard_and_relse;
+	}
 	bh_unlock_sock(sk);
 
 	sock_put(sk);
-- 
cgit v1.2.3


From 55349790d7cbf0d381873a7ece1dcafcffd4aaa9 Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu@intel.com>
Date: Thu, 4 Mar 2010 18:01:42 +0000
Subject: udp: use limited socket backlog

Make udp adapt to the limited socket backlog change.

Cc: "David S. Miller" <davem@davemloft.net>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: "Pekka Savola (ipv6)" <pekkas@netcore.fi>
Cc: Patrick McHardy <kaber@trash.net>
Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c |  6 ++++--
 net/ipv6/udp.c | 28 ++++++++++++++++++----------
 2 files changed, 22 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 608a5446d05b..e7eb47f338d4 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1371,8 +1371,10 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	bh_lock_sock(sk);
 	if (!sock_owned_by_user(sk))
 		rc = __udp_queue_rcv_skb(sk, skb);
-	else
-		sk_add_backlog(sk, skb);
+	else if (sk_add_backlog_limited(sk, skb)) {
+		bh_unlock_sock(sk);
+		goto drop;
+	}
 	bh_unlock_sock(sk);
 
 	return rc;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 52b8347ae3b2..64804912b093 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -583,16 +583,20 @@ static void flush_stack(struct sock **stack, unsigned int count,
 			bh_lock_sock(sk);
 			if (!sock_owned_by_user(sk))
 				udpv6_queue_rcv_skb(sk, skb1);
-			else
-				sk_add_backlog(sk, skb1);
+			else if (sk_add_backlog_limited(sk, skb1)) {
+				kfree_skb(skb1);
+				bh_unlock_sock(sk);
+				goto drop;
+			}
 			bh_unlock_sock(sk);
-		} else {
-			atomic_inc(&sk->sk_drops);
-			UDP6_INC_STATS_BH(sock_net(sk),
-					UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk));
-			UDP6_INC_STATS_BH(sock_net(sk),
-					UDP_MIB_INERRORS, IS_UDPLITE(sk));
+			continue;
 		}
+drop:
+		atomic_inc(&sk->sk_drops);
+		UDP6_INC_STATS_BH(sock_net(sk),
+				UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk));
+		UDP6_INC_STATS_BH(sock_net(sk),
+				UDP_MIB_INERRORS, IS_UDPLITE(sk));
 	}
 }
 /*
@@ -754,8 +758,12 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	bh_lock_sock(sk);
 	if (!sock_owned_by_user(sk))
 		udpv6_queue_rcv_skb(sk, skb);
-	else
-		sk_add_backlog(sk, skb);
+	else if (sk_add_backlog_limited(sk, skb)) {
+		atomic_inc(&sk->sk_drops);
+		bh_unlock_sock(sk);
+		sock_put(sk);
+		goto discard;
+	}
 	bh_unlock_sock(sk);
 	sock_put(sk);
 	return 0;
-- 
cgit v1.2.3


From 79545b681961d7001c1f4c3eb9ffb87bed4485db Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu@intel.com>
Date: Thu, 4 Mar 2010 18:01:43 +0000
Subject: llc: use limited socket backlog

Make llc adapt to the limited socket backlog change.

Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/llc_conn.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index a8dde9b010da..c0539ffdb272 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -827,7 +827,8 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb)
 	else {
 		dprintk("%s: adding to backlog...\n", __func__);
 		llc_set_backlog_type(skb, LLC_PACKET);
-		sk_add_backlog(sk, skb);
+		if (sk_add_backlog_limited(sk, skb))
+			goto drop_unlock;
 	}
 out:
 	bh_unlock_sock(sk);
-- 
cgit v1.2.3


From 50b1a782f845140f4138f14a1ce8a4a6dd0cc82f Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu@intel.com>
Date: Thu, 4 Mar 2010 18:01:44 +0000
Subject: sctp: use limited socket backlog

Make sctp adapt to the limited socket backlog change.

Cc: Vlad Yasevich <vladislav.yasevich@hp.com>
Cc: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/input.c  | 42 +++++++++++++++++++++++++++---------------
 net/sctp/socket.c |  3 +++
 2 files changed, 30 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/sctp/input.c b/net/sctp/input.c
index c0c973e67add..cbc063665e6b 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -75,7 +75,7 @@ static struct sctp_association *__sctp_lookup_association(
 					const union sctp_addr *peer,
 					struct sctp_transport **pt);
 
-static void sctp_add_backlog(struct sock *sk, struct sk_buff *skb);
+static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb);
 
 
 /* Calculate the SCTP checksum of an SCTP packet.  */
@@ -265,8 +265,13 @@ int sctp_rcv(struct sk_buff *skb)
 	}
 
 	if (sock_owned_by_user(sk)) {
+		if (sctp_add_backlog(sk, skb)) {
+			sctp_bh_unlock_sock(sk);
+			sctp_chunk_free(chunk);
+			skb = NULL; /* sctp_chunk_free already freed the skb */
+			goto discard_release;
+		}
 		SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_BACKLOG);
-		sctp_add_backlog(sk, skb);
 	} else {
 		SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_SOFTIRQ);
 		sctp_inq_push(&chunk->rcvr->inqueue, chunk);
@@ -336,8 +341,10 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 		sctp_bh_lock_sock(sk);
 
 		if (sock_owned_by_user(sk)) {
-			sk_add_backlog(sk, skb);
-			backloged = 1;
+			if (sk_add_backlog_limited(sk, skb))
+				sctp_chunk_free(chunk);
+			else
+				backloged = 1;
 		} else
 			sctp_inq_push(inqueue, chunk);
 
@@ -362,22 +369,27 @@ done:
 	return 0;
 }
 
-static void sctp_add_backlog(struct sock *sk, struct sk_buff *skb)
+static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb)
 {
 	struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk;
 	struct sctp_ep_common *rcvr = chunk->rcvr;
+	int ret;
 
-	/* Hold the assoc/ep while hanging on the backlog queue.
-	 * This way, we know structures we need will not disappear from us
-	 */
-	if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type)
-		sctp_association_hold(sctp_assoc(rcvr));
-	else if (SCTP_EP_TYPE_SOCKET == rcvr->type)
-		sctp_endpoint_hold(sctp_ep(rcvr));
-	else
-		BUG();
+	ret = sk_add_backlog_limited(sk, skb);
+	if (!ret) {
+		/* Hold the assoc/ep while hanging on the backlog queue.
+		 * This way, we know structures we need will not disappear
+		 * from us
+		 */
+		if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type)
+			sctp_association_hold(sctp_assoc(rcvr));
+		else if (SCTP_EP_TYPE_SOCKET == rcvr->type)
+			sctp_endpoint_hold(sctp_ep(rcvr));
+		else
+			BUG();
+	}
+	return ret;
 
-	sk_add_backlog(sk, skb);
 }
 
 /* Handle icmp frag needed error. */
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index f6d1e59c4151..dfc5c127efd4 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3720,6 +3720,9 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
 	SCTP_DBG_OBJCNT_INC(sock);
 	percpu_counter_inc(&sctp_sockets_allocated);
 
+	/* Set socket backlog limit. */
+	sk->sk_backlog.limit = sysctl_sctp_rmem[1];
+
 	local_bh_disable();
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 	local_bh_enable();
-- 
cgit v1.2.3


From 53eecb1be5ae499d399d2923933937a9ea1a284f Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu@intel.com>
Date: Thu, 4 Mar 2010 18:01:45 +0000
Subject: tipc: use limited socket backlog

Make tipc adapt to the limited socket backlog change.

Cc: Jon Maloy <jon.maloy@ericsson.com>
Cc: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 1ea64f09cc45..22bfbc33a8ac 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1322,8 +1322,10 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf)
 	if (!sock_owned_by_user(sk)) {
 		res = filter_rcv(sk, buf);
 	} else {
-		sk_add_backlog(sk, buf);
-		res = TIPC_OK;
+		if (sk_add_backlog_limited(sk, buf))
+			res = TIPC_ERR_OVERLOAD;
+		else
+			res = TIPC_OK;
 	}
 	bh_unlock_sock(sk);
 
-- 
cgit v1.2.3


From 2499849ee8f513e795b9f2c19a42d6356e4943a4 Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu@intel.com>
Date: Thu, 4 Mar 2010 18:01:46 +0000
Subject: x25: use limited socket backlog

Make x25 adapt to the limited socket backlog change.

Cc: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/x25_dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
index 3e1efe534645..a9da0dc26f4f 100644
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c
@@ -53,7 +53,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb)
 		if (!sock_owned_by_user(sk)) {
 			queued = x25_process_rx_frame(sk, skb);
 		} else {
-			sk_add_backlog(sk, skb);
+			queued = !sk_add_backlog_limited(sk, skb);
 		}
 		bh_unlock_sock(sk);
 		sock_put(sk);
-- 
cgit v1.2.3


From a3a858ff18a72a8d388e31ab0d98f7e944841a62 Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu@intel.com>
Date: Thu, 4 Mar 2010 18:01:47 +0000
Subject: net: backlog functions rename

sk_add_backlog -> __sk_add_backlog
sk_add_backlog_limited -> sk_add_backlog

Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h       | 6 +++---
 net/core/sock.c          | 2 +-
 net/dccp/minisocks.c     | 2 +-
 net/ipv4/tcp_ipv4.c      | 2 +-
 net/ipv4/tcp_minisocks.c | 2 +-
 net/ipv4/udp.c           | 2 +-
 net/ipv6/tcp_ipv6.c      | 2 +-
 net/ipv6/udp.c           | 4 ++--
 net/llc/llc_c_ac.c       | 2 +-
 net/llc/llc_conn.c       | 2 +-
 net/sctp/input.c         | 4 ++--
 net/tipc/socket.c        | 2 +-
 net/x25/x25_dev.c        | 2 +-
 13 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/include/net/sock.h b/include/net/sock.h
index 2516d76f043c..170353dd9570 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -592,7 +592,7 @@ static inline int sk_stream_memory_free(struct sock *sk)
 }
 
 /* OOB backlog add */
-static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb)
+static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
 {
 	if (!sk->sk_backlog.tail) {
 		sk->sk_backlog.head = sk->sk_backlog.tail = skb;
@@ -604,12 +604,12 @@ static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb)
 }
 
 /* The per-socket spinlock must be held here. */
-static inline int sk_add_backlog_limited(struct sock *sk, struct sk_buff *skb)
+static inline int sk_add_backlog(struct sock *sk, struct sk_buff *skb)
 {
 	if (sk->sk_backlog.len >= max(sk->sk_backlog.limit, sk->sk_rcvbuf << 1))
 		return -ENOBUFS;
 
-	sk_add_backlog(sk, skb);
+	__sk_add_backlog(sk, skb);
 	sk->sk_backlog.len += skb->truesize;
 	return 0;
 }
diff --git a/net/core/sock.c b/net/core/sock.c
index 6e22dc973d23..61a65a2e0455 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -340,7 +340,7 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
 		rc = sk_backlog_rcv(sk, skb);
 
 		mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
-	} else if (sk_add_backlog_limited(sk, skb)) {
+	} else if (sk_add_backlog(sk, skb)) {
 		bh_unlock_sock(sk);
 		atomic_inc(&sk->sk_drops);
 		goto discard_and_relse;
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index af226a063141..0d508c359fa9 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -254,7 +254,7 @@ int dccp_child_process(struct sock *parent, struct sock *child,
 		 * in main socket hash table and lock on listening
 		 * socket does not protect us more.
 		 */
-		sk_add_backlog(child, skb);
+		__sk_add_backlog(child, skb);
 	}
 
 	bh_unlock_sock(child);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4baf1943b1bd..1915f7dc30e6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1682,7 +1682,7 @@ process:
 			if (!tcp_prequeue(sk, skb))
 				ret = tcp_v4_do_rcv(sk, skb);
 		}
-	} else if (sk_add_backlog_limited(sk, skb)) {
+	} else if (sk_add_backlog(sk, skb)) {
 		bh_unlock_sock(sk);
 		goto discard_and_relse;
 	}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f206ee5dda80..4199bc6915c5 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -728,7 +728,7 @@ int tcp_child_process(struct sock *parent, struct sock *child,
 		 * in main socket hash table and lock on listening
 		 * socket does not protect us more.
 		 */
-		sk_add_backlog(child, skb);
+		__sk_add_backlog(child, skb);
 	}
 
 	bh_unlock_sock(child);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e7eb47f338d4..7af756d0f931 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1371,7 +1371,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	bh_lock_sock(sk);
 	if (!sock_owned_by_user(sk))
 		rc = __udp_queue_rcv_skb(sk, skb);
-	else if (sk_add_backlog_limited(sk, skb)) {
+	else if (sk_add_backlog(sk, skb)) {
 		bh_unlock_sock(sk);
 		goto drop;
 	}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c4ea9d5cbfaa..2c378b1bd5cf 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1740,7 +1740,7 @@ process:
 			if (!tcp_prequeue(sk, skb))
 				ret = tcp_v6_do_rcv(sk, skb);
 		}
-	} else if (sk_add_backlog_limited(sk, skb)) {
+	} else if (sk_add_backlog(sk, skb)) {
 		bh_unlock_sock(sk);
 		goto discard_and_relse;
 	}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 64804912b093..3c0c9c755c92 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -583,7 +583,7 @@ static void flush_stack(struct sock **stack, unsigned int count,
 			bh_lock_sock(sk);
 			if (!sock_owned_by_user(sk))
 				udpv6_queue_rcv_skb(sk, skb1);
-			else if (sk_add_backlog_limited(sk, skb1)) {
+			else if (sk_add_backlog(sk, skb1)) {
 				kfree_skb(skb1);
 				bh_unlock_sock(sk);
 				goto drop;
@@ -758,7 +758,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	bh_lock_sock(sk);
 	if (!sock_owned_by_user(sk))
 		udpv6_queue_rcv_skb(sk, skb);
-	else if (sk_add_backlog_limited(sk, skb)) {
+	else if (sk_add_backlog(sk, skb)) {
 		atomic_inc(&sk->sk_drops);
 		bh_unlock_sock(sk);
 		sock_put(sk);
diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c
index 019c780512e8..86d6985b9d49 100644
--- a/net/llc/llc_c_ac.c
+++ b/net/llc/llc_c_ac.c
@@ -1437,7 +1437,7 @@ static void llc_process_tmr_ev(struct sock *sk, struct sk_buff *skb)
 			llc_conn_state_process(sk, skb);
 		else {
 			llc_set_backlog_type(skb, LLC_EVENT);
-			sk_add_backlog(sk, skb);
+			__sk_add_backlog(sk, skb);
 		}
 	}
 }
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index c0539ffdb272..a12144da7974 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -827,7 +827,7 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb)
 	else {
 		dprintk("%s: adding to backlog...\n", __func__);
 		llc_set_backlog_type(skb, LLC_PACKET);
-		if (sk_add_backlog_limited(sk, skb))
+		if (sk_add_backlog(sk, skb))
 			goto drop_unlock;
 	}
 out:
diff --git a/net/sctp/input.c b/net/sctp/input.c
index cbc063665e6b..3d74b264ea22 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -341,7 +341,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 		sctp_bh_lock_sock(sk);
 
 		if (sock_owned_by_user(sk)) {
-			if (sk_add_backlog_limited(sk, skb))
+			if (sk_add_backlog(sk, skb))
 				sctp_chunk_free(chunk);
 			else
 				backloged = 1;
@@ -375,7 +375,7 @@ static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb)
 	struct sctp_ep_common *rcvr = chunk->rcvr;
 	int ret;
 
-	ret = sk_add_backlog_limited(sk, skb);
+	ret = sk_add_backlog(sk, skb);
 	if (!ret) {
 		/* Hold the assoc/ep while hanging on the backlog queue.
 		 * This way, we know structures we need will not disappear
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 22bfbc33a8ac..4b235fc1c70f 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1322,7 +1322,7 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf)
 	if (!sock_owned_by_user(sk)) {
 		res = filter_rcv(sk, buf);
 	} else {
-		if (sk_add_backlog_limited(sk, buf))
+		if (sk_add_backlog(sk, buf))
 			res = TIPC_ERR_OVERLOAD;
 		else
 			res = TIPC_OK;
diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
index a9da0dc26f4f..52e304212241 100644
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c
@@ -53,7 +53,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb)
 		if (!sock_owned_by_user(sk)) {
 			queued = x25_process_rx_frame(sk, skb);
 		} else {
-			queued = !sk_add_backlog_limited(sk, skb);
+			queued = !sk_add_backlog(sk, skb);
 		}
 		bh_unlock_sock(sk);
 		sock_put(sk);
-- 
cgit v1.2.3


From 723b2f57ad83ee7087acf9a95e8e289414b1f521 Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jgarzik@redhat.com>
Date: Wed, 3 Mar 2010 22:51:50 +0000
Subject: ethtool: Add direct access to ops->get_sset_count

This patch is an alternative approach for accessing string
counts, vs. the drvinfo indirect approach.  This way the drvinfo
space doesn't run out, and we don't break ABI later.

Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 17 +++++++++---
 net/core/ethtool.c      | 72 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index cca1c3de140d..f6f961fefbe5 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -253,6 +253,17 @@ struct ethtool_gstrings {
 	__u8	data[0];
 };
 
+struct ethtool_sset_info {
+	__u32	cmd;		/* ETHTOOL_GSSET_INFO */
+	__u32	reserved;
+	__u64	sset_mask;	/* input: each bit selects an sset to query */
+				/* output: each bit a returned sset */
+	__u32	data[0];	/* ETH_SS_xxx count, in order, based on bits
+				   in sset_mask.  One bit implies one
+				   __u32, two bits implies two
+				   __u32's, etc. */
+};
+
 enum ethtool_test_flags {
 	ETH_TEST_FL_OFFLINE	= (1 << 0),	/* online / offline */
 	ETH_TEST_FL_FAILED	= (1 << 1),	/* test passed / failed */
@@ -606,9 +617,9 @@ struct ethtool_ops {
 #define	ETHTOOL_SRXCLSRLINS	0x00000032 /* Insert RX classification rule */
 #define	ETHTOOL_FLASHDEV	0x00000033 /* Flash firmware to device */
 #define	ETHTOOL_RESET		0x00000034 /* Reset hardware */
-
-#define ETHTOOL_SRXNTUPLE	0x00000035 /* Add an n-tuple filter to device */
-#define ETHTOOL_GRXNTUPLE	0x00000036 /* Get n-tuple filters from device */
+#define	ETHTOOL_SRXNTUPLE	0x00000035 /* Add an n-tuple filter to device */
+#define	ETHTOOL_GRXNTUPLE	0x00000036 /* Get n-tuple filters from device */
+#define	ETHTOOL_GSSET_INFO	0x00000037 /* Get string set info */
 
 /* compatibility with older code */
 #define SPARC_ETH_GSET		ETHTOOL_GSET
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 0f2f82185ec4..70075c47ada8 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -214,6 +214,10 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use
 	info.cmd = ETHTOOL_GDRVINFO;
 	ops->get_drvinfo(dev, &info);
 
+	/*
+	 * this method of obtaining string set info is deprecated;
+	 * consider using ETHTOOL_GSSET_INFO instead
+	 */
 	if (ops->get_sset_count) {
 		int rc;
 
@@ -237,6 +241,71 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use
 	return 0;
 }
 
+/*
+ * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
+ */
+static noinline int ethtool_get_sset_info(struct net_device *dev,
+                                          void __user *useraddr)
+{
+	struct ethtool_sset_info info;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	u64 sset_mask;
+	int i, idx = 0, n_bits = 0, ret, rc;
+	u32 *info_buf = NULL;
+
+	if (!ops->get_sset_count)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&info, useraddr, sizeof(info)))
+		return -EFAULT;
+
+	/* store copy of mask, because we zero struct later on */
+	sset_mask = info.sset_mask;
+	if (!sset_mask)
+		return 0;
+
+	/* calculate size of return buffer */
+	for (i = 0; i < 64; i++)
+		if (sset_mask & (1ULL << i))
+			n_bits++;
+
+	memset(&info, 0, sizeof(info));
+	info.cmd = ETHTOOL_GSSET_INFO;
+
+	info_buf = kzalloc(n_bits * sizeof(u32), GFP_USER);
+	if (!info_buf)
+		return -ENOMEM;
+
+	/*
+	 * fill return buffer based on input bitmask and successful
+	 * get_sset_count return
+	 */
+	for (i = 0; i < 64; i++) {
+		if (!(sset_mask & (1ULL << i)))
+			continue;
+
+		rc = ops->get_sset_count(dev, i);
+		if (rc >= 0) {
+			info.sset_mask |= (1ULL << i);
+			info_buf[idx++] = rc;
+		}
+	}
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &info, sizeof(info)))
+		goto out;
+
+	useraddr += offsetof(struct ethtool_sset_info, data);
+	if (copy_to_user(useraddr, info_buf, idx * sizeof(u32)))
+		goto out;
+
+	ret = 0;
+
+out:
+	kfree(info_buf);
+	return ret;
+}
+
 /*
  * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
  */
@@ -1471,6 +1540,9 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GRXNTUPLE:
 		rc = ethtool_get_rx_ntuple(dev, useraddr);
 		break;
+	case ETHTOOL_GSSET_INFO:
+		rc = ethtool_get_sset_info(dev, useraddr);
+		break;
 	default:
 		rc = -EOPNOTSUPP;
 	}
-- 
cgit v1.2.3


From d17792ebdf90289c9fd1bce888076d3d60ecd53b Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jeff@garzik.org>
Date: Thu, 4 Mar 2010 08:21:53 +0000
Subject: ethtool: Add direct access to ops->get_sset_count

On 03/04/2010 09:26 AM, Ben Hutchings wrote:
> On Thu, 2010-03-04 at 00:51 -0800, Jeff Kirsher wrote:
>> From: Jeff Garzik<jgarzik@redhat.com>
>>
>> This patch is an alternative approach for accessing string
>> counts, vs. the drvinfo indirect approach.  This way the drvinfo
>> space doesn't run out, and we don't break ABI later.
> [...]
>> --- a/net/core/ethtool.c
>> +++ b/net/core/ethtool.c
>> @@ -214,6 +214,10 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use
>>   	info.cmd = ETHTOOL_GDRVINFO;
>>   	ops->get_drvinfo(dev,&info);
>>
>> +	/*
>> +	 * this method of obtaining string set info is deprecated;
>> +	 * consider using ETHTOOL_GSSET_INFO instead
>> +	 */
>
> This comment belongs on the interface (ethtool.h) not the
> implementation.

Debatable -- the current comment is located at the callsite of
ops->get_sset_count(), which is where an implementor might think to add
a new call.  Not all the numeric fields in ethtool_drvinfo are obtained
from ->get_sset_count().

Hence the "some" in the attached patch to include/linux/ethtool.h,
addressing your comment.

> [...]
>> +static noinline int ethtool_get_sset_info(struct net_device *dev,
>> +                                          void __user *useraddr)
>> +{
> [...]
>> +	/* calculate size of return buffer */
>> +	for (i = 0; i<  64; i++)
>> +		if (sset_mask&  (1ULL<<  i))
>> +			n_bits++;
> [...]
>
> We have a function for this:
>
> 	n_bits = hweight64(sset_mask);

Agreed.

I've attached a follow-up patch, which should enable my/Jeff's kernel
patch to be applied, followed by this one.

Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 7 +++++++
 net/core/ethtool.c      | 7 +++----
 2 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index f6f961fefbe5..b33f316bb92e 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -61,6 +61,13 @@ struct ethtool_drvinfo {
 				/* For PCI devices, use pci_name(pci_dev). */
 	char	reserved1[32];
 	char	reserved2[12];
+				/*
+				 * Some struct members below are filled in
+				 * using ops->get_sset_count().  Obtaining
+				 * this info from ethtool_drvinfo is now
+				 * deprecated; Use ETHTOOL_GSSET_INFO
+				 * instead.
+				 */
 	__u32	n_priv_flags;	/* number of flags valid in ETHTOOL_GPFLAGS */
 	__u32	n_stats;	/* number of u64's from ETHTOOL_GSTATS */
 	__u32	testinfo_len;
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 70075c47ada8..33d2ded50f84 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -17,6 +17,7 @@
 #include <linux/errno.h>
 #include <linux/ethtool.h>
 #include <linux/netdevice.h>
+#include <linux/bitops.h>
 #include <asm/uaccess.h>
 
 /*
@@ -216,7 +217,7 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use
 
 	/*
 	 * this method of obtaining string set info is deprecated;
-	 * consider using ETHTOOL_GSSET_INFO instead
+	 * Use ETHTOOL_GSSET_INFO instead.
 	 */
 	if (ops->get_sset_count) {
 		int rc;
@@ -265,9 +266,7 @@ static noinline int ethtool_get_sset_info(struct net_device *dev,
 		return 0;
 
 	/* calculate size of return buffer */
-	for (i = 0; i < 64; i++)
-		if (sset_mask & (1ULL << i))
-			n_bits++;
+	n_bits = hweight64(sset_mask);
 
 	memset(&info, 0, sizeof(info));
 	info.cmd = ETHTOOL_GSSET_INFO;
-- 
cgit v1.2.3


From 02a780c014c40973cbe71d04cec7a24e6629995f Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Sat, 6 Mar 2010 01:14:09 +0000
Subject: bridge: cleanup: remove unneed check

We dereference "port" on the lines immediately before and immediately
after the test so port should hopefully never be null here.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 2559fb539836..8b258872eba8 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -627,8 +627,8 @@ static void br_multicast_port_query_expired(unsigned long data)
 	struct net_bridge *br = port->br;
 
 	spin_lock(&br->multicast_lock);
-	if (port && (port->state == BR_STATE_DISABLED ||
-		     port->state == BR_STATE_BLOCKING))
+	if (port->state == BR_STATE_DISABLED ||
+	    port->state == BR_STATE_BLOCKING)
 		goto out;
 
 	if (port->multicast_startup_queries_sent <
-- 
cgit v1.2.3


From 72150e9b7fec217fbd646a29ea2f65a3d4d55ea9 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Sat, 6 Mar 2010 01:04:45 +0000
Subject: sock.c: potential null dereference

We test that "prot->rsk_prot" is non-null right before we dereference it
on this line.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index 61a65a2e0455..c5812bbc2cc9 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2288,7 +2288,8 @@ out_free_request_sock_slab:
 		prot->rsk_prot->slab = NULL;
 	}
 out_free_request_sock_slab_name:
-	kfree(prot->rsk_prot->slab_name);
+	if (prot->rsk_prot)
+		kfree(prot->rsk_prot->slab_name);
 out_free_sock_slab:
 	kmem_cache_destroy(prot->slab);
 	prot->slab = NULL;
-- 
cgit v1.2.3


From 0c9a2ac1f8a2e55b3382dfc27256878a58ea49e9 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki / 吉藤英明 <yoshfuji@linux-ipv6.org>
Date: Sun, 7 Mar 2010 00:14:44 +0000
Subject: ipv6: Optmize translation between IPV6_PREFER_SRC_xxx and
 RT6_LOOKUP_F_xxx.

IPV6_PREFER_SRC_xxx definitions:
| #define IPV6_PREFER_SRC_TMP             0x0001
| #define IPV6_PREFER_SRC_PUBLIC          0x0002
| #define IPV6_PREFER_SRC_COA             0x0004

RT6_LOOKUP_F_xxx definitions:
| #define RT6_LOOKUP_F_SRCPREF_TMP        0x00000008
| #define RT6_LOOKUP_F_SRCPREF_PUBLIC     0x00000010
| #define RT6_LOOKUP_F_SRCPREF_COA        0x00000020

So, we can translate between these two groups by shift operation
instead of multiple 'if's.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_route.h | 18 ++++++++++++++++++
 net/ipv6/fib6_rules.c   | 11 ++---------
 net/ipv6/route.c        | 11 ++---------
 3 files changed, 22 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 4a808de7c0f6..68f67836e146 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -37,6 +37,24 @@ struct route_info {
 #define RT6_LOOKUP_F_SRCPREF_PUBLIC	0x00000010
 #define RT6_LOOKUP_F_SRCPREF_COA	0x00000020
 
+/*
+ * rt6_srcprefs2flags() and rt6_flags2srcprefs() translate
+ * between IPV6_ADDR_PREFERENCES socket option values
+ *	IPV6_PREFER_SRC_TMP    = 0x1
+ *	IPV6_PREFER_SRC_PUBLIC = 0x2
+ *	IPV6_PREFER_SRC_COA    = 0x4
+ * and above RT6_LOOKUP_F_SRCPREF_xxx flags.
+ */
+static inline int rt6_srcprefs2flags(unsigned int srcprefs)
+{
+	/* No need to bitmask because srcprefs have only 3 bits. */
+	return srcprefs << 3;
+}
+
+static inline unsigned int rt6_flags2srcprefs(int flags)
+{
+	return (flags >> 3) & 7;
+}
 
 extern void			ip6_route_input(struct sk_buff *skb);
 
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 551882b9dfd6..5e463c43fcc2 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -84,18 +84,11 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
 		if ((rule->flags & FIB_RULE_FIND_SADDR) &&
 		    r->src.plen && !(flags & RT6_LOOKUP_F_HAS_SADDR)) {
 			struct in6_addr saddr;
-			unsigned int srcprefs = 0;
-
-			if (flags & RT6_LOOKUP_F_SRCPREF_TMP)
-				srcprefs |= IPV6_PREFER_SRC_TMP;
-			if (flags & RT6_LOOKUP_F_SRCPREF_PUBLIC)
-				srcprefs |= IPV6_PREFER_SRC_PUBLIC;
-			if (flags & RT6_LOOKUP_F_SRCPREF_COA)
-				srcprefs |= IPV6_PREFER_SRC_COA;
 
 			if (ipv6_dev_get_saddr(net,
 					       ip6_dst_idev(&rt->u.dst)->dev,
-					       &flp->fl6_dst, srcprefs,
+					       &flp->fl6_dst,
+					       rt6_flags2srcprefs(flags),
 					       &saddr))
 				goto again;
 			if (!ipv6_prefix_equal(&saddr, &r->src.addr,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index b08879e97f22..52cd3eff31dc 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -819,15 +819,8 @@ struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
 
 	if (!ipv6_addr_any(&fl->fl6_src))
 		flags |= RT6_LOOKUP_F_HAS_SADDR;
-	else if (sk) {
-		unsigned int prefs = inet6_sk(sk)->srcprefs;
-		if (prefs & IPV6_PREFER_SRC_TMP)
-			flags |= RT6_LOOKUP_F_SRCPREF_TMP;
-		if (prefs & IPV6_PREFER_SRC_PUBLIC)
-			flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC;
-		if (prefs & IPV6_PREFER_SRC_COA)
-			flags |= RT6_LOOKUP_F_SRCPREF_COA;
-	}
+	else if (sk)
+		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
 
 	return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
 }
-- 
cgit v1.2.3


From 49f5fcfd4ac3df24aa66520e1c5f37db5dfa8c10 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 5 Mar 2010 21:07:39 +0000
Subject: bridge: Use RCU list primitive in __br_mdb_ip_get

As Paul McKenney correctly pointed out, __br_mdb_ip_get needs
to use the RCU list walking primitive in order to work correctly
on platforms where data-dependency ordering is not guaranteed.

Reported-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 8b258872eba8..a1ffe1582c9a 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -38,7 +38,7 @@ static struct net_bridge_mdb_entry *__br_mdb_ip_get(
 	struct net_bridge_mdb_entry *mp;
 	struct hlist_node *p;
 
-	hlist_for_each_entry(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) {
+	hlist_for_each_entry_rcu(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) {
 		if (dst == mp->addr)
 			return mp;
 	}
-- 
cgit v1.2.3


From 10cc2b50eb4b01ca4dc014af2094d28b4ebe20d7 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 5 Mar 2010 21:03:35 +0000
Subject: bridge: Fix RCU race in br_multicast_stop

Thanks to Paul McKenny for pointing out that it is incorrect to use
synchronize_rcu_bh to ensure that pending callbacks have completed.
Instead we should use rcu_barrier_bh.

Reported-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index a1ffe1582c9a..12ce1eaa4f3e 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1135,7 +1135,7 @@ void br_multicast_stop(struct net_bridge *br)
 
 	if (mdb->old) {
 		spin_unlock_bh(&br->multicast_lock);
-		synchronize_rcu_bh();
+		rcu_barrier_bh();
 		spin_lock_bh(&br->multicast_lock);
 		WARN_ON(mdb->old);
 	}
-- 
cgit v1.2.3


From 6cce09f87a04797fae5b947ef2626c14a78f0b49 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 7 Mar 2010 23:21:57 +0000
Subject: tcp: Add SNMP counters for backlog and min_ttl drops

Commit 6b03a53a (tcp: use limited socket backlog) added the possibility
of dropping frames when backlog queue is full.

Commit d218d111 (tcp: Generalized TTL Security Mechanism) added the
possibility of dropping frames when TTL is under a given limit.

This patch adds new SNMP MIB entries, named TCPBacklogDrop and
TCPMinTTLDrop, published in /proc/net/netstat in TcpExt: line

netstat -s | egrep "TCPBacklogDrop|TCPMinTTLDrop"
    TCPBacklogDrop: 0
    TCPMinTTLDrop: 0

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/snmp.h | 2 ++
 net/ipv4/proc.c      | 2 ++
 net/ipv4/tcp_ipv4.c  | 7 +++++--
 net/ipv6/tcp_ipv6.c  | 3 ++-
 4 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/linux/snmp.h b/include/linux/snmp.h
index e28f5a0182e8..4435d1084755 100644
--- a/include/linux/snmp.h
+++ b/include/linux/snmp.h
@@ -225,6 +225,8 @@ enum
 	LINUX_MIB_SACKSHIFTED,
 	LINUX_MIB_SACKMERGED,
 	LINUX_MIB_SACKSHIFTFALLBACK,
+	LINUX_MIB_TCPBACKLOGDROP,
+	LINUX_MIB_TCPMINTTLDROP, /* RFC 5082 */
 	__LINUX_MIB_MAX
 };
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 242ed2307370..4f1f337f4337 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -249,6 +249,8 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPSackShifted", LINUX_MIB_SACKSHIFTED),
 	SNMP_MIB_ITEM("TCPSackMerged", LINUX_MIB_SACKMERGED),
 	SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK),
+	SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP),
+	SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 1915f7dc30e6..8d51d39ad1bb 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1651,8 +1651,10 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	if (!sk)
 		goto no_tcp_socket;
 
-	if (iph->ttl < inet_sk(sk)->min_ttl)
+	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
+		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
 		goto discard_and_relse;
+	}
 
 process:
 	if (sk->sk_state == TCP_TIME_WAIT)
@@ -1682,8 +1684,9 @@ process:
 			if (!tcp_prequeue(sk, skb))
 				ret = tcp_v4_do_rcv(sk, skb);
 		}
-	} else if (sk_add_backlog(sk, skb)) {
+	} else if (unlikely(sk_add_backlog(sk, skb))) {
 		bh_unlock_sock(sk);
+		NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
 		goto discard_and_relse;
 	}
 	bh_unlock_sock(sk);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 2c378b1bd5cf..9b6dbba80d31 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1740,8 +1740,9 @@ process:
 			if (!tcp_prequeue(sk, skb))
 				ret = tcp_v6_do_rcv(sk, skb);
 		}
-	} else if (sk_add_backlog(sk, skb)) {
+	} else if (unlikely(sk_add_backlog(sk, skb))) {
 		bh_unlock_sock(sk);
+		NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
 		goto discard_and_relse;
 	}
 	bh_unlock_sock(sk);
-- 
cgit v1.2.3


From 9837638727488922727b0cfd438039fa73364183 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 8 Mar 2010 03:20:00 +0000
Subject: net: fix route cache rebuilds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We added an automatic route cache rebuilding in commit 1080d709fb9d8cd43
but had to correct few bugs. One of the assumption of original patch,
was that entries where kept sorted in a given way.

This assumption is known to be wrong (commit 1ddbcb005c395518 gave an
explanation of this and corrected a leak) and expensive to respect.

Paweł Staszewski reported to me one of his machine got its routing cache
disabled after few messages like :

[ 2677.850065] Route hash chain too long!
[ 2677.850080] Adjust your secret_interval!
[82839.662993] Route hash chain too long!
[82839.662996] Adjust your secret_interval!
[155843.731650] Route hash chain too long!
[155843.731664] Adjust your secret_interval!
[155843.811881] Route hash chain too long!
[155843.811891] Adjust your secret_interval!
[155843.858209] vlan0811: 5 rebuilds is over limit, route caching
disabled
[155843.858212] Route hash chain too long!
[155843.858213] Adjust your secret_interval!

This is because rt_intern_hash() might be fooled when computing a chain
length, because multiple entries with same keys can differ because of
TOS (or mark/oif) bits.

In the rare case the fast algorithm see a too long chain, and before
taking expensive path, we call a helper function in order to not count
duplicates of same routes, that only differ with tos/mark/oif bits. This
helper works with data already in cpu cache and is not be very
expensive, despite its O(N^2) implementation.

Paweł Staszewski sucessfully tested this patch on his loaded router.

Reported-and-tested-by: Paweł Staszewski <pstaszewski@itcare.pl>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 50 ++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 38 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b2ba5581d2ae..d9b40248b97f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -146,7 +146,6 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
 static void		 ipv4_link_failure(struct sk_buff *skb);
 static void		 ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
 static int rt_garbage_collect(struct dst_ops *ops);
-static void rt_emergency_hash_rebuild(struct net *net);
 
 
 static struct dst_ops ipv4_dst_ops = {
@@ -780,11 +779,30 @@ static void rt_do_flush(int process_context)
 #define FRACT_BITS 3
 #define ONE (1UL << FRACT_BITS)
 
+/*
+ * Given a hash chain and an item in this hash chain,
+ * find if a previous entry has the same hash_inputs
+ * (but differs on tos, mark or oif)
+ * Returns 0 if an alias is found.
+ * Returns ONE if rth has no alias before itself.
+ */
+static int has_noalias(const struct rtable *head, const struct rtable *rth)
+{
+	const struct rtable *aux = head;
+
+	while (aux != rth) {
+		if (compare_hash_inputs(&aux->fl, &rth->fl))
+			return 0;
+		aux = aux->u.dst.rt_next;
+	}
+	return ONE;
+}
+
 static void rt_check_expire(void)
 {
 	static unsigned int rover;
 	unsigned int i = rover, goal;
-	struct rtable *rth, *aux, **rthp;
+	struct rtable *rth, **rthp;
 	unsigned long samples = 0;
 	unsigned long sum = 0, sum2 = 0;
 	unsigned long delta;
@@ -835,15 +853,7 @@ nofree:
 					 * attributes don't unfairly skew
 					 * the length computation
 					 */
-					for (aux = rt_hash_table[i].chain;;) {
-						if (aux == rth) {
-							length += ONE;
-							break;
-						}
-						if (compare_hash_inputs(&aux->fl, &rth->fl))
-							break;
-						aux = aux->u.dst.rt_next;
-					}
+					length += has_noalias(rt_hash_table[i].chain, rth);
 					continue;
 				}
 			} else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout))
@@ -1073,6 +1083,21 @@ work_done:
 out:	return 0;
 }
 
+/*
+ * Returns number of entries in a hash chain that have different hash_inputs
+ */
+static int slow_chain_length(const struct rtable *head)
+{
+	int length = 0;
+	const struct rtable *rth = head;
+
+	while (rth) {
+		length += has_noalias(head, rth);
+		rth = rth->u.dst.rt_next;
+	}
+	return length >> FRACT_BITS;
+}
+
 static int rt_intern_hash(unsigned hash, struct rtable *rt,
 			  struct rtable **rp, struct sk_buff *skb)
 {
@@ -1185,7 +1210,8 @@ restart:
 			rt_free(cand);
 		}
 	} else {
-		if (chain_length > rt_chain_length_max) {
+		if (chain_length > rt_chain_length_max &&
+		    slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) {
 			struct net *net = dev_net(rt->u.dst.dev);
 			int num = ++net->ipv4.current_rt_cache_rebuild_count;
 			if (!rt_caching(dev_net(rt->u.dst.dev))) {
-- 
cgit v1.2.3


From 28b2774a0d5852236dab77a4147b8b88548110f1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 8 Mar 2010 11:32:01 -0800
Subject: tcp: Fix tcp_make_synack()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 4957faad (TCPCT part 1g: Responder Cookie => Initiator), part
of TCP_COOKIE_TRANSACTION implementation, forgot to correctly size
synack skb in case user data must be included.

Many thanks to Mika Pentillä for spotting this error.

Reported-by: Penttillä Mika <mika.penttila@ixonos.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 4a1605d3f909..f181b78f2385 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2395,13 +2395,17 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	struct tcp_extend_values *xvp = tcp_xv(rvp);
 	struct inet_request_sock *ireq = inet_rsk(req);
 	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_cookie_values *cvp = tp->cookie_values;
 	struct tcphdr *th;
 	struct sk_buff *skb;
 	struct tcp_md5sig_key *md5;
 	int tcp_header_size;
 	int mss;
+	int s_data_desired = 0;
 
-	skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC);
+	if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired)
+		s_data_desired = cvp->s_data_desired;
+	skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15 + s_data_desired, 1, GFP_ATOMIC);
 	if (skb == NULL)
 		return NULL;
 
@@ -2457,16 +2461,12 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 			     TCPCB_FLAG_SYN | TCPCB_FLAG_ACK);
 
 	if (OPTION_COOKIE_EXTENSION & opts.options) {
-		const struct tcp_cookie_values *cvp = tp->cookie_values;
-
-		if (cvp != NULL &&
-		    cvp->s_data_constant &&
-		    cvp->s_data_desired > 0) {
-			u8 *buf = skb_put(skb, cvp->s_data_desired);
+		if (s_data_desired) {
+			u8 *buf = skb_put(skb, s_data_desired);
 
 			/* copy data directly from the listening socket. */
-			memcpy(buf, cvp->s_data_payload, cvp->s_data_desired);
-			TCP_SKB_CB(skb)->end_seq += cvp->s_data_desired;
+			memcpy(buf, cvp->s_data_payload, s_data_desired);
+			TCP_SKB_CB(skb)->end_seq += s_data_desired;
 		}
 
 		if (opts.hash_size > 0) {
-- 
cgit v1.2.3


From fc0b579168cbe737c83c6b9bbfe265d3ae6baca6 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 8 Mar 2010 12:15:28 -0800
Subject: net/sunrpc: Remove uses of NIPQUAD, use %pI4

Originally submitted Jan 1, 2010
http://patchwork.kernel.org/patch/71221/

Convert NIPQUAD to the %pI4 format extension where possible
Convert %02x%02x%02x%02x/NIPQUAD to %08x/ntohl

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sunrpc/xprtrdma/transport.c | 3 +--
 net/sunrpc/xprtsock.c           | 5 ++---
 2 files changed, 3 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 7018eef1dcdd..83d339fd66d5 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -165,8 +165,7 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt)
 
 	xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
 
-	(void)snprintf(buf, sizeof(buf), "%02x%02x%02x%02x",
-				NIPQUAD(sin->sin_addr.s_addr));
+	(void)snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
 	xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
 
 	(void)snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap));
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 3d739e5d15d8..86234bcf2e89 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -297,12 +297,11 @@ static void xs_format_common_peer_addresses(struct rpc_xprt *xprt)
 	switch (sap->sa_family) {
 	case AF_INET:
 		sin = xs_addr_in(xprt);
-		(void)snprintf(buf, sizeof(buf), "%02x%02x%02x%02x",
-					NIPQUAD(sin->sin_addr.s_addr));
+		snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
 		break;
 	case AF_INET6:
 		sin6 = xs_addr_in6(xprt);
-		(void)snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr);
+		snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr);
 		break;
 	default:
 		BUG();
-- 
cgit v1.2.3


From 81160e66cca3d3a16b7d88e0e2dccfc5c76f36f9 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 8 Mar 2010 12:15:59 -0800
Subject: net/sunrpc: Convert (void)snprintf to snprintf

(Applies on top of "Remove uses of NIPQUAD, use %pI4")

Casts to void of snprintf are most uncommon in kernel source.
9 use casts, 1301 do not.

Remove the remaining uses in net/sunrpc/

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sunrpc/xprtrdma/transport.c | 6 +++---
 net/sunrpc/xprtsock.c           | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 83d339fd66d5..f96c2fe6137b 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -160,15 +160,15 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt)
 	(void)rpc_ntop(sap, buf, sizeof(buf));
 	xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL);
 
-	(void)snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap));
+	snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap));
 	xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL);
 
 	xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
 
-	(void)snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
+	snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
 	xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
 
-	(void)snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap));
+	snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap));
 	xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
 
 	/* netid */
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 86234bcf2e89..4f55ab7ec1b1 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -314,10 +314,10 @@ static void xs_format_common_peer_ports(struct rpc_xprt *xprt)
 	struct sockaddr *sap = xs_addr(xprt);
 	char buf[128];
 
-	(void)snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap));
+	snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap));
 	xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL);
 
-	(void)snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap));
+	snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap));
 	xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
 }
 
-- 
cgit v1.2.3


From f5c445ed4148434f142be0263a8ad7cb58503e8a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 8 Mar 2010 12:17:04 -0800
Subject: ethtool: Use noinline_for_stack

Use self documenting noinline_for_stack instead of duplicated comments.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 40 ++++++++--------------------------------
 1 file changed, 8 insertions(+), 32 deletions(-)

(limited to 'net')

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 33d2ded50f84..f4cb6b6299d9 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -200,10 +200,7 @@ static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
 	return dev->ethtool_ops->set_settings(dev, &cmd);
 }
 
-/*
- * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
- */
-static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_drvinfo info;
 	const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -242,10 +239,7 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use
 	return 0;
 }
 
-/*
- * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
- */
-static noinline int ethtool_get_sset_info(struct net_device *dev,
+static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev,
                                           void __user *useraddr)
 {
 	struct ethtool_sset_info info;
@@ -305,10 +299,7 @@ out:
 	return ret;
 }
 
-/*
- * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
- */
-static noinline int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_rxnfc cmd;
 
@@ -321,10 +312,7 @@ static noinline int ethtool_set_rxnfc(struct net_device *dev, void __user *usera
 	return dev->ethtool_ops->set_rxnfc(dev, &cmd);
 }
 
-/*
- * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
- */
-static noinline int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_rxnfc info;
 	const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -396,10 +384,7 @@ static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
 	list->count++;
 }
 
-/*
- * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
- */
-static noinline int ethtool_set_rx_ntuple(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_rx_ntuple cmd;
 	const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -867,10 +852,7 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
 	return ret;
 }
 
-/*
- * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
- */
-static noinline int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE };
 
@@ -884,10 +866,7 @@ static noinline int ethtool_get_coalesce(struct net_device *dev, void __user *us
 	return 0;
 }
 
-/*
- * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
- */
-static noinline int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_coalesce coalesce;
 
@@ -1297,10 +1276,7 @@ static int ethtool_set_value(struct net_device *dev, char __user *useraddr,
 	return actor(dev, edata.data);
 }
 
-/*
- * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
- */
-static noinline int ethtool_flash_device(struct net_device *dev, char __user *useraddr)
+static noinline_for_stack int ethtool_flash_device(struct net_device *dev, char __user *useraddr)
 {
 	struct ethtool_flash efl;
 
-- 
cgit v1.2.3


From d88dca79d3852a3623f606f781e013d61486828a Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Mon, 8 Mar 2010 12:20:58 -0800
Subject: tipc: fix endianness on tipc subscriber messages

Remove htohl implementation from tipc

I was working on forward porting the downstream commits for TIPC and ran accross this one:
http://tipc.cslab.ericsson.net/cgi-bin/gitweb.cgi?p=people/allan/tipc.git;a=commitdiff;h=894279b9437b63cbb02405ad5b8e033b51e4e31e

I was going to just take it, when I looked closer and noted what it was doing.
This is basically a routine to byte swap fields of data in sent/received packets
for tipc, dependent upon the receivers guessed endianness of the peer when a
connection is established.  Asside from just seeming silly to me, it appears to
violate the latest RFC draft for tipc:
http://tipc.sourceforge.net/doc/draft-spec-tipc-02.txt
Which, according to section 4.2 and 4.3.3, requires that all fields of all
commands be sent in network byte order.  So instead of just taking this patch,
instead I'm removing the htohl function and replacing the calls with calls to
ntohl in the rx path and htonl in the send path.

As part of this fix, I'm also changing the subscr_cancel function, which
searches the list of subscribers, using a memcmp of the entire subscriber list,
for the entry to tear down.  unfortunately it memcmps the entire tipc_subscr
structure which has several bits that are private to the local side, so nothing
will ever match.  section 5.2 of the draft spec indicates the <type,upper,lower>
tuple should uniquely identify a subscriber, so convert subscr_cancel to just
match on those fields (properly endian swapped).

I've tested this using the tipc test suite, and its passed without issue.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/subscr.c | 57 +++++++++++++++++++++----------------------------------
 net/tipc/subscr.h |  2 --
 2 files changed, 22 insertions(+), 37 deletions(-)

(limited to 'net')

diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index ac91f0dfa144..ff123e56114a 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -75,19 +75,6 @@ struct top_srv {
 
 static struct top_srv topsrv = { 0 };
 
-/**
- * htohl - convert value to endianness used by destination
- * @in: value to convert
- * @swap: non-zero if endianness must be reversed
- *
- * Returns converted value
- */
-
-static u32 htohl(u32 in, int swap)
-{
-	return swap ? swab32(in) : in;
-}
-
 /**
  * subscr_send_event - send a message containing a tipc_event to the subscriber
  *
@@ -107,11 +94,11 @@ static void subscr_send_event(struct subscription *sub,
 	msg_sect.iov_base = (void *)&sub->evt;
 	msg_sect.iov_len = sizeof(struct tipc_event);
 
-	sub->evt.event = htohl(event, sub->swap);
-	sub->evt.found_lower = htohl(found_lower, sub->swap);
-	sub->evt.found_upper = htohl(found_upper, sub->swap);
-	sub->evt.port.ref = htohl(port_ref, sub->swap);
-	sub->evt.port.node = htohl(node, sub->swap);
+	sub->evt.event = htonl(event);
+	sub->evt.found_lower = htonl(found_lower);
+	sub->evt.found_upper = htonl(found_upper);
+	sub->evt.port.ref = htonl(port_ref);
+	sub->evt.port.node = htonl(node);
 	tipc_send(sub->server_ref, 1, &msg_sect);
 }
 
@@ -287,16 +274,23 @@ static void subscr_cancel(struct tipc_subscr *s,
 {
 	struct subscription *sub;
 	struct subscription *sub_temp;
+	__u32 type, lower, upper;
 	int found = 0;
 
 	/* Find first matching subscription, exit if not found */
 
+	type = ntohl(s->seq.type);
+	lower = ntohl(s->seq.lower);
+	upper = ntohl(s->seq.upper);
+
 	list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list,
 				 subscription_list) {
-		if (!memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) {
-			found = 1;
-			break;
-		}
+			if ((type == sub->seq.type) &&
+			    (lower == sub->seq.lower) &&
+			    (upper == sub->seq.upper)) {
+				found = 1;
+				break;
+			}
 	}
 	if (!found)
 		return;
@@ -325,16 +319,10 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s,
 					     struct subscriber *subscriber)
 {
 	struct subscription *sub;
-	int swap;
-
-	/* Determine subscriber's endianness */
-
-	swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE));
 
 	/* Detect & process a subscription cancellation request */
 
-	if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) {
-		s->filter &= ~htohl(TIPC_SUB_CANCEL, swap);
+	if (ntohl(s->filter) & TIPC_SUB_CANCEL) {
 		subscr_cancel(s, subscriber);
 		return NULL;
 	}
@@ -359,11 +347,11 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s,
 
 	/* Initialize subscription object */
 
-	sub->seq.type = htohl(s->seq.type, swap);
-	sub->seq.lower = htohl(s->seq.lower, swap);
-	sub->seq.upper = htohl(s->seq.upper, swap);
-	sub->timeout = htohl(s->timeout, swap);
-	sub->filter = htohl(s->filter, swap);
+	sub->seq.type = ntohl(s->seq.type);
+	sub->seq.lower = ntohl(s->seq.lower);
+	sub->seq.upper = ntohl(s->seq.upper);
+	sub->timeout = ntohl(s->timeout);
+	sub->filter = ntohl(s->filter);
 	if ((!(sub->filter & TIPC_SUB_PORTS) ==
 	     !(sub->filter & TIPC_SUB_SERVICE)) ||
 	    (sub->seq.lower > sub->seq.upper)) {
@@ -376,7 +364,6 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s,
 	INIT_LIST_HEAD(&sub->nameseq_list);
 	list_add(&sub->subscription_list, &subscriber->subscription_list);
 	sub->server_ref = subscriber->port_ref;
-	sub->swap = swap;
 	memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr));
 	atomic_inc(&topsrv.subscription_count);
 	if (sub->timeout != TIPC_WAIT_FOREVER) {
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index 45d89bf4d202..c20f496d95b2 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -53,7 +53,6 @@ typedef void (*tipc_subscr_event) (struct subscription *sub,
  * @nameseq_list: adjacent subscriptions in name sequence's subscription list
  * @subscription_list: adjacent subscriptions in subscriber's subscription list
  * @server_ref: object reference of server port associated with subscription
- * @swap: indicates if subscriber uses opposite endianness in its messages
  * @evt: template for events generated by subscription
  */
 
@@ -66,7 +65,6 @@ struct subscription {
 	struct list_head nameseq_list;
 	struct list_head subscription_list;
 	u32 server_ref;
-	int swap;
 	struct tipc_event evt;
 };
 
-- 
cgit v1.2.3


From de5865714621e23d65c52955ca2125dbb074c242 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Mon, 8 Mar 2010 12:43:56 -0800
Subject: tipc: filter out messages not intended for this host

Port commit 20deb48d16fdd07ce2fdc8d03ea317362217e085
from git://tipc.cslab.ericsson.net/pub/git/people/allan/tipc.git

Part of the large effort I'm trying to help with getting all the downstreamed
code from windriver forward ported to the upstream tree

Origional commit message
Restore check to filter out inadverdently received messages
This patch reimplements a check that allows TIPC to discard messages
that are not intended for it.  This check was present in TIPC 1.5/1.6,
but was removed by accident during the development of TIPC 1.7; it has
now been updated to account for new features present in TIPC 1.7 and
reinserted into TIPC.  The main benefit of this check is to filter
out messages arriving from orphaned link endpoints, which can arise
when a node exits the network and then re-enters it with a different
TIPC network address (i.e. <Z.C.N> value).

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Origionally-authored-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'net')

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 6f50f6423f63..1a7e4665af80 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1882,6 +1882,15 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
 			     (msg_destnode(msg) != tipc_own_addr)))
 			goto cont;
 
+		/* Discard non-routeable messages destined for another node */
+
+		if (unlikely(!msg_isdata(msg) &&
+			     (msg_destnode(msg) != tipc_own_addr))) {
+			if ((msg_user(msg) != CONN_MANAGER) &&
+			    (msg_user(msg) != MSG_FRAGMENTER))
+				goto cont;
+		}
+
 		/* Locate unicast link endpoint that should handle message */
 
 		n_ptr = tipc_node_find(msg_prevnode(msg));
-- 
cgit v1.2.3


From bb134d5d9580fc7b945e3bca3c4b263947022966 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 9 Mar 2010 05:55:56 +0000
Subject: tcp: Fix tcp_v4_rcv()

Commit d218d111 (tcp: Generalized TTL Security Mechanism) added a bug
for TIMEWAIT sockets. We should not test min_ttl for TW sockets.

Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8d51d39ad1bb..70df40980a87 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1651,15 +1651,15 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	if (!sk)
 		goto no_tcp_socket;
 
+process:
+	if (sk->sk_state == TCP_TIME_WAIT)
+		goto do_time_wait;
+
 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
 		goto discard_and_relse;
 	}
 
-process:
-	if (sk->sk_state == TCP_TIME_WAIT)
-		goto do_time_wait;
-
 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
 		goto discard_and_relse;
 	nf_reset(skb);
-- 
cgit v1.2.3


From 0a141509ede48ac33ef756ac1640f4d3f46fa2db Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 9 Mar 2010 19:40:54 +0000
Subject: net: Annotates neigh_invalidate()

Annotates neigh_invalidate() with __releases() and __acquires() for
sparse sake.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index d102f6d9abdc..6cee6434da67 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -771,6 +771,8 @@ static __inline__ int neigh_max_probes(struct neighbour *n)
 }
 
 static void neigh_invalidate(struct neighbour *neigh)
+	__releases(neigh->lock)
+	__acquires(neigh->lock)
 {
 	struct sk_buff *skb;
 
-- 
cgit v1.2.3


From 3041f5170751e3522aa1bd6e8ca5d98e846720b0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 9 Mar 2010 19:09:08 +0000
Subject: net: Fix dev_mc_add()

Commit 6e17d45a (net: add addr len check to dev_mc_add)
added a bug in dev_mc_add(), since it can now exit with a lock
imbalance.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev_mcast.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index fd91569e2394..3dc295beb483 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -97,8 +97,9 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
 
 	netif_addr_lock_bh(dev);
 	if (alen != dev->addr_len)
-		return -EINVAL;
-	err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl);
+		err = -EINVAL;
+	else
+		err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl);
 	if (!err)
 		__dev_set_rx_mode(dev);
 	netif_addr_unlock_bh(dev);
-- 
cgit v1.2.3


From 38a679a52be13d5a0c766597ab823e06688d6e8e Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Sat, 6 Mar 2010 18:35:08 +0200
Subject: mac80211: Fix sta_mtx unlocking on insert STA failure path

Commit 34e895075e21be3e21e71d6317440d1ee7969ad0 introduced sta_mtx
locking into sta_info_insert() (now sta_info_insert_rcu), but forgot
to unlock this mutex on one of the error paths. Fix this by adding
the missing mutex_unlock() call for the case where STA insert fails
due to an entry existing already. This may happen at least in AP mode
when a STA roams between two BSSes (vifs).

Signed-off-by: Jouni Malinen <j@w1.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/sta_info.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 211c475f73c6..56422d894351 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -434,6 +434,7 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU)
 	/* check if STA exists already */
 	if (sta_info_get_bss(sdata, sta->sta.addr)) {
 		spin_unlock_irqrestore(&local->sta_lock, flags);
+		mutex_unlock(&local->sta_mtx);
 		rcu_read_lock();
 		err = -EEXIST;
 		goto out_free;
-- 
cgit v1.2.3


From 2a13052fe495948e572839e514e0e0cd236c50b0 Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Tue, 9 Mar 2010 14:25:02 +0200
Subject: mac80211: Fix (dynamic) power save entry

Currently hardware with !IEEE80211_HW_PS_NULLFUNC_STACK and
IEEE80211_HW_REPORTS_TX_ACK_STATUS will never enter PSM due to the
conditions in the power save entry functions.

Fix those conditions.

Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Cc: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 0ab284c32135..be5f723d643a 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -436,10 +436,12 @@ static void ieee80211_enable_ps(struct ieee80211_local *local,
 		if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)
 			ieee80211_send_nullfunc(local, sdata, 1);
 
-		if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) {
-			conf->flags |= IEEE80211_CONF_PS;
-			ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
-		}
+		if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) &&
+		    (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS))
+			return;
+
+		conf->flags |= IEEE80211_CONF_PS;
+		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
 	}
 }
 
@@ -558,7 +560,8 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
 	    (!(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)))
 		ieee80211_send_nullfunc(local, sdata, 1);
 
-	if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) ||
+	if (!((local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) &&
+	      (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)) ||
 	    (ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)) {
 		ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED;
 		local->hw.conf.flags |= IEEE80211_CONF_PS;
-- 
cgit v1.2.3


From 51f5f8ca446d4c59041b9b6995821e13208897ea Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 10 Mar 2010 17:13:36 +0100
Subject: mac80211: Fix memory leak in ieee80211_if_write()

Fix memory leak and use kmalloc() instead of kzalloc() as we are going
to overwrite the allocated buffer.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/debugfs_netdev.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 9affe2cd185f..b4ddb2f83914 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -48,20 +48,24 @@ static ssize_t ieee80211_if_write(
 	ssize_t (*write)(struct ieee80211_sub_if_data *, const char *, int))
 {
 	u8 *buf;
-	ssize_t ret = -ENODEV;
+	ssize_t ret;
 
-	buf = kzalloc(count, GFP_KERNEL);
+	buf = kmalloc(count, GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
 
+	ret = -EFAULT;
 	if (copy_from_user(buf, userbuf, count))
-		return -EFAULT;
+		goto freebuf;
 
+	ret = -ENODEV;
 	rtnl_lock();
 	if (sdata->dev->reg_state == NETREG_REGISTERED)
 		ret = (*write)(sdata, buf, count);
 	rtnl_unlock();
 
+freebuf:
+	kfree(buf);
 	return ret;
 }
 
-- 
cgit v1.2.3


From 964ad81cbd933e5fa310faeec1e923c14651284b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 12 Mar 2010 00:00:17 -0800
Subject: ipconfig: Handle devices which take some time to come up.

Some network devices, particularly USB ones, take several seconds to
fully init and appear in the device list.

If the user turned ipconfig on, they are using it for NFS root or some
other early booting purpose.  So it makes no sense to just flat out
fail immediately if the device isn't found.

It also doesn't make sense to just jack up the initial wait to
something crazy like 10 seconds.

Instead, poll immediately, and then periodically once a second,
waiting for a usable device to appear.  Fail after 12 seconds.

Signed-off-by: David S. Miller <davem@davemloft.net>
Tested-by: Christian Pellegrin <chripell@fsfe.org>
---
 net/ipv4/ipconfig.c | 57 +++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 47 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 10a6a604bf32..678909281648 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -187,6 +187,16 @@ struct ic_device {
 static struct ic_device *ic_first_dev __initdata = NULL;/* List of open device */
 static struct net_device *ic_dev __initdata = NULL;	/* Selected device */
 
+static bool __init ic_device_match(struct net_device *dev)
+{
+	if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) :
+	    (!(dev->flags & IFF_LOOPBACK) &&
+	     (dev->flags & (IFF_POINTOPOINT|IFF_BROADCAST)) &&
+	     strncmp(dev->name, "dummy", 5)))
+		return true;
+	return false;
+}
+
 static int __init ic_open_devs(void)
 {
 	struct ic_device *d, **last;
@@ -207,10 +217,7 @@ static int __init ic_open_devs(void)
 	for_each_netdev(&init_net, dev) {
 		if (dev->flags & IFF_LOOPBACK)
 			continue;
-		if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) :
-		    (!(dev->flags & IFF_LOOPBACK) &&
-		     (dev->flags & (IFF_POINTOPOINT|IFF_BROADCAST)) &&
-		     strncmp(dev->name, "dummy", 5))) {
+		if (ic_device_match(dev)) {
 			int able = 0;
 			if (dev->mtu >= 364)
 				able |= IC_BOOTP;
@@ -228,7 +235,7 @@ static int __init ic_open_devs(void)
 			}
 			if (!(d = kmalloc(sizeof(struct ic_device), GFP_KERNEL))) {
 				rtnl_unlock();
-				return -1;
+				return -ENOMEM;
 			}
 			d->dev = dev;
 			*last = d;
@@ -253,7 +260,7 @@ static int __init ic_open_devs(void)
 			printk(KERN_ERR "IP-Config: Device `%s' not found.\n", user_dev_name);
 		else
 			printk(KERN_ERR "IP-Config: No network devices available.\n");
-		return -1;
+		return -ENODEV;
 	}
 	return 0;
 }
@@ -1303,6 +1310,32 @@ __be32 __init root_nfs_parse_addr(char *name)
 	return addr;
 }
 
+#define DEVICE_WAIT_MAX		12 /* 12 seconds */
+
+static int __init wait_for_devices(void)
+{
+	int i;
+
+	msleep(CONF_PRE_OPEN);
+	for (i = 0; i < DEVICE_WAIT_MAX; i++) {
+		struct net_device *dev;
+		int found = 0;
+
+		rtnl_lock();
+		for_each_netdev(&init_net, dev) {
+			if (ic_device_match(dev)) {
+				found = 1;
+				break;
+			}
+		}
+		rtnl_unlock();
+		if (found)
+			return 0;
+		ssleep(1);
+	}
+	return -ENODEV;
+}
+
 /*
  *	IP Autoconfig dispatcher.
  */
@@ -1313,6 +1346,7 @@ static int __init ip_auto_config(void)
 #ifdef IPCONFIG_DYNAMIC
 	int retries = CONF_OPEN_RETRIES;
 #endif
+	int err;
 
 #ifdef CONFIG_PROC_FS
 	proc_net_fops_create(&init_net, "pnp", S_IRUGO, &pnp_seq_fops);
@@ -1325,12 +1359,15 @@ static int __init ip_auto_config(void)
 #ifdef IPCONFIG_DYNAMIC
  try_try_again:
 #endif
-	/* Give hardware a chance to settle */
-	msleep(CONF_PRE_OPEN);
+	/* Wait for devices to appear */
+	err = wait_for_devices();
+	if (err)
+		return err;
 
 	/* Setup all network devices */
-	if (ic_open_devs() < 0)
-		return -1;
+	err = ic_open_devs();
+	if (err)
+		return err;
 
 	/* Give drivers a chance to settle */
 	ssleep(CONF_POST_OPEN);
-- 
cgit v1.2.3


From e2577a065832f2c6d108de2e027891bdb2d78924 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 13 Mar 2010 12:23:29 -0800
Subject: ipv6: Send netlink notification when DAD fails

If we are managing IPv6 addresses using DHCP, it would be nice
for user-space to be notified if an address configured through
DHCP fails DAD.  Otherwise user-space would have to poll to see
whether DAD succeeds.

This patch uses the existing notification mechanism and simply
hooks it into the DAD failure code path.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 6cf3ee14ace3..3381b4317c27 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1380,6 +1380,8 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
 		if (dad_failed)
 			ifp->flags |= IFA_F_DADFAILED;
 		spin_unlock_bh(&ifp->lock);
+		if (dad_failed)
+			ipv6_ifa_notify(0, ifp);
 		in6_ifa_put(ifp);
 #ifdef CONFIG_IPV6_PRIVACY
 	} else if (ifp->flags&IFA_F_TEMPORARY) {
-- 
cgit v1.2.3


From bec68ff1637ca00bb1585a03a7be8a13380084de Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Sat, 13 Mar 2010 12:27:21 -0800
Subject: bridge: ensure to unlock in error path in br_multicast_query().

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 12ce1eaa4f3e..fd96a8dc97f4 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -823,6 +823,7 @@ static int br_multicast_query(struct net_bridge *br,
 	unsigned long max_delay;
 	unsigned long now = jiffies;
 	__be32 group;
+	int err = 0;
 
 	spin_lock(&br->multicast_lock);
 	if (!netif_running(br->dev) ||
@@ -841,12 +842,14 @@ static int br_multicast_query(struct net_bridge *br,
 			group = 0;
 		}
 	} else {
-		if (!pskb_may_pull(skb, sizeof(struct igmpv3_query)))
-			return -EINVAL;
+		if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) {
+			err = -EINVAL;
+			goto out;
+		}
 
 		ih3 = igmpv3_query_hdr(skb);
 		if (ih3->nsrcs)
-			return 0;
+			goto out;
 
 		max_delay = ih3->code ? 1 :
 			    IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE);
@@ -876,7 +879,7 @@ static int br_multicast_query(struct net_bridge *br,
 
 out:
 	spin_unlock(&br->multicast_lock);
-	return 0;
+	return err;
 }
 
 static void br_multicast_leave_group(struct net_bridge *br,
-- 
cgit v1.2.3