From 3082a2b7b1af1b1508c1c3fa589566064f926f40 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 16 Feb 2010 16:36:25 -0500 Subject: rfkill: Add support for KEY_RFKILL Add support for handling KEY_RFKILL in the rfkill input module. This simply toggles the state of all rfkill devices. The comment in rfkill.h is also updated to reflect that RFKILL_TYPE_ALL may be used inside the kernel. Signed-off-by: Matthew Garrett Acked-by: Marcel Holtmann Signed-off-by: John W. Linville --- net/rfkill/input.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/rfkill/input.c b/net/rfkill/input.c index a7295ad5f9cb..3713d7ecab96 100644 --- a/net/rfkill/input.c +++ b/net/rfkill/input.c @@ -212,6 +212,9 @@ static void rfkill_event(struct input_handle *handle, unsigned int type, case KEY_WIMAX: rfkill_schedule_toggle(RFKILL_TYPE_WIMAX); break; + case KEY_RFKILL: + rfkill_schedule_toggle(RFKILL_TYPE_ALL); + break; } } else if (type == EV_SW && code == SW_RFKILL_ALL) rfkill_schedule_evsw_rfkillall(data); @@ -294,6 +297,11 @@ static const struct input_device_id rfkill_ids[] = { .evbit = { BIT_MASK(EV_KEY) }, .keybit = { [BIT_WORD(KEY_WIMAX)] = BIT_MASK(KEY_WIMAX) }, }, + { + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT, + .evbit = { BIT_MASK(EV_KEY) }, + .keybit = { [BIT_WORD(KEY_RFKILL)] = BIT_MASK(KEY_RFKILL) }, + }, { .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_SWBIT, .evbit = { BIT(EV_SW) }, -- cgit v1.2.3 From 9c87ba6734422034fccb938da1039ed63da1395c Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Sun, 28 Feb 2010 12:13:46 +0200 Subject: mac80211: Fix reassociation processing (within ESS roaming) Commit e1dd33f60ced091114e4aacf141e0d03b88d3e13 changed cfg80211 to allow association commands while in associated state to enable support for roaming within an ESS. However, this was not enough to resolve all cases with mac80211 which needs some additional handling of the reassociation case to clear internal state with the BSS that was in use previously. This patch makes ieee80211_mgd_assoc() accept a valid reassociation command and clean the association state with the previous BSS. This fixes roaming between BSSes in an ESS when using wpa_supplicant with -Dnl80211. Signed-off-by: Jouni Malinen Cc: stable@kernel.org Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 41812a15eea0..5a268761e4c5 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1893,8 +1893,20 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, mutex_lock(&ifmgd->mtx); if (ifmgd->associated) { - mutex_unlock(&ifmgd->mtx); - return -EALREADY; + if (!req->prev_bssid || + memcmp(req->prev_bssid, ifmgd->associated->bssid, + ETH_ALEN)) { + /* + * We are already associated and the request was not a + * reassociation request from the current BSS, so + * reject it. + */ + mutex_unlock(&ifmgd->mtx); + return -EALREADY; + } + + /* Trying to reassociate - clear previous association state */ + ieee80211_set_disassoc(sdata); } mutex_unlock(&ifmgd->mtx); -- cgit v1.2.3 From 87c1e12b5eeb7b30b4b41291bef8e0b41fc3dde9 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 2 Mar 2010 02:51:56 +0000 Subject: ipsec: Fix bogus bundle flowi When I merged the bundle creation code, I introduced a bogus flowi value in the bundle. Instead of getting from the caller, it was instead set to the flow in the route object, which is totally different. The end result is that the bundles we created never match, and we instead end up with an ever growing bundle list. Thanks to Jamal for find this problem. Reported-by: Jamal Hadi Salim Signed-off-by: Herbert Xu Acked-by: Steffen Klassert Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/xfrm.h | 3 ++- net/ipv4/xfrm4_policy.c | 5 +++-- net/ipv6/xfrm6_policy.c | 3 ++- net/xfrm/xfrm_policy.c | 7 ++++--- 4 files changed, 11 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index a7df3275b860..d74e080ba6c9 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -275,7 +275,8 @@ struct xfrm_policy_afinfo { struct dst_entry *dst, int nfheader_len); int (*fill_dst)(struct xfrm_dst *xdst, - struct net_device *dev); + struct net_device *dev, + struct flowi *fl); }; extern int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 67107d63c1cd..e4a1483fba77 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -91,11 +91,12 @@ static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, return 0; } -static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) +static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, + struct flowi *fl) { struct rtable *rt = (struct rtable *)xdst->route; - xdst->u.rt.fl = rt->fl; + xdst->u.rt.fl = *fl; xdst->u.dst.dev = dev; dev_hold(dev); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index dbdc696f5fc5..ae181651c75a 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -116,7 +116,8 @@ static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, return 0; } -static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) +static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, + struct flowi *fl) { struct rt6_info *rt = (struct rt6_info*)xdst->route; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 34a5ef8316e7..843e066649cb 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1372,7 +1372,8 @@ static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, return err; } -static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) +static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, + struct flowi *fl) { struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(xdst->u.dst.ops->family); @@ -1381,7 +1382,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) if (!afinfo) return -EINVAL; - err = afinfo->fill_dst(xdst, dev); + err = afinfo->fill_dst(xdst, dev, fl); xfrm_policy_put_afinfo(afinfo); @@ -1486,7 +1487,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev; - err = xfrm_fill_dst(xdst, dev); + err = xfrm_fill_dst(xdst, dev, fl); if (err) goto free_dst; -- cgit v1.2.3 From 1162563f82b434e3099c9e6c1bbdba846d792f0d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 2 Mar 2010 20:40:01 +0000 Subject: af_packet: move strict addr_len check right before dev_[mc/unicast]_[add/del] My previous patch 914c8ad2d18b62ad1420f518c0cab0b0b90ab308 incorrectly changed the length check in packet_mc_add to be more strict. The problem is that userspace is not filling this field (and it stays zeroed) in case of setting PACKET_MR_PROMISC or PACKET_MR_ALLMULTI. So move the strict check to the point in path where the addr_len must be set correctly. Signed-off-by: Jiri Pirko Reported-by: Pavel Roskin Signed-off-by: David S. Miller --- net/packet/af_packet.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 031a5e6fb4aa..1612d417d10c 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1688,6 +1688,8 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i, { switch (i->type) { case PACKET_MR_MULTICAST: + if (i->alen != dev->addr_len) + return -EINVAL; if (what > 0) return dev_mc_add(dev, i->addr, i->alen, 0); else @@ -1700,6 +1702,8 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i, return dev_set_allmulti(dev, what); break; case PACKET_MR_UNICAST: + if (i->alen != dev->addr_len) + return -EINVAL; if (what > 0) return dev_unicast_add(dev, i->addr); else @@ -1734,7 +1738,7 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq) goto done; err = -EINVAL; - if (mreq->mr_alen != dev->addr_len) + if (mreq->mr_alen > dev->addr_len) goto done; err = -ENOBUFS; -- cgit v1.2.3 From d4612cb86ed8db8956b6b19435f8a30de6c67ffe Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 2 Mar 2010 15:48:23 +0000 Subject: Bluetooth: Use single_open() for inquiry cache within debugfs The inquiry cache information in debugfs should be using seq_file support and not allocating memory on the stack for the string. Since the usage of these information is really seldom, using single_open() for it is good enough. Signed-off-by: Marcel Holtmann Signed-off-by: David S. Miller --- net/bluetooth/hci_sysfs.c | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 1a79a6c7e30e..cafb55b0cea5 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -405,20 +406,11 @@ static struct device_type bt_host = { .release = bt_host_release, }; -static int inquiry_cache_open(struct inode *inode, struct file *file) -{ - file->private_data = inode->i_private; - return 0; -} - -static ssize_t inquiry_cache_read(struct file *file, char __user *userbuf, - size_t count, loff_t *ppos) +static int inquiry_cache_show(struct seq_file *f, void *p) { - struct hci_dev *hdev = file->private_data; + struct hci_dev *hdev = f->private; struct inquiry_cache *cache = &hdev->inq_cache; struct inquiry_entry *e; - char buf[4096]; - int n = 0; hci_dev_lock_bh(hdev); @@ -426,23 +418,30 @@ static ssize_t inquiry_cache_read(struct file *file, char __user *userbuf, struct inquiry_data *data = &e->data; bdaddr_t bdaddr; baswap(&bdaddr, &data->bdaddr); - n += sprintf(buf + n, "%s %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %d %u\n", - batostr(&bdaddr), - data->pscan_rep_mode, data->pscan_period_mode, - data->pscan_mode, data->dev_class[2], - data->dev_class[1], data->dev_class[0], - __le16_to_cpu(data->clock_offset), - data->rssi, data->ssp_mode, e->timestamp); + seq_printf(f, "%s %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %d %u\n", + batostr(&bdaddr), + data->pscan_rep_mode, data->pscan_period_mode, + data->pscan_mode, data->dev_class[2], + data->dev_class[1], data->dev_class[0], + __le16_to_cpu(data->clock_offset), + data->rssi, data->ssp_mode, e->timestamp); } hci_dev_unlock_bh(hdev); - return simple_read_from_buffer(userbuf, count, ppos, buf, n); + return 0; +} + +static int inquiry_cache_open(struct inode *inode, struct file *file) +{ + return single_open(file, inquiry_cache_show, inode->i_private); } static const struct file_operations inquiry_cache_fops = { - .open = inquiry_cache_open, - .read = inquiry_cache_read, + .open = inquiry_cache_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, }; int hci_register_sysfs(struct hci_dev *hdev) -- cgit v1.2.3 From 1cd4efddc4512ccbd9fe317f688f361605ca0c88 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 3 Mar 2010 01:23:22 -0800 Subject: bridge: depends on INET br_multicast calls ip_send_check(), so it should depend on INET. built-in: br_multicast.c:(.text+0x88cf4): undefined reference to `ip_send_check' or modular: ERROR: "ip_send_check" [net/bridge/bridge.ko] undefined! Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- net/bridge/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig index 19a6b9629c51..d115d5cea5b6 100644 --- a/net/bridge/Kconfig +++ b/net/bridge/Kconfig @@ -35,6 +35,7 @@ config BRIDGE config BRIDGE_IGMP_SNOOPING bool "IGMP snooping" depends on BRIDGE + depends on INET default y ---help--- If you say Y here, then the Ethernet bridge will be able selectively -- cgit v1.2.3 From 4fa004373133ece3d9b1c0a7e243b0e53760b165 Mon Sep 17 00:00:00 2001 From: Sujith Date: Mon, 1 Mar 2010 14:42:57 +0530 Subject: mac80211: Fix HT rate control configuration Handling HT configuration changes involved setting the channel with the new HT parameters and then issuing a rate_update() notification to the driver. This behavior changed after the off-channel changes. Now, the channel is not updated with the new HT params in enable_ht() - instead, it is now done when the scan work terminates. This results in the driver depending on stale information, defaulting to non-HT mode always. Fix this by passing the new channel type to the driver. Cc: stable@kernel.org Signed-off-by: Sujith Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/rc.c | 6 +++--- include/net/mac80211.h | 3 ++- net/mac80211/mlme.c | 3 ++- net/mac80211/rate.h | 5 +++-- 4 files changed, 10 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/drivers/net/wireless/ath/ath9k/rc.c b/drivers/net/wireless/ath/ath9k/rc.c index ac34a055c713..0e79e58cf4c9 100644 --- a/drivers/net/wireless/ath/ath9k/rc.c +++ b/drivers/net/wireless/ath/ath9k/rc.c @@ -1323,7 +1323,7 @@ static void ath_rate_init(void *priv, struct ieee80211_supported_band *sband, static void ath_rate_update(void *priv, struct ieee80211_supported_band *sband, struct ieee80211_sta *sta, void *priv_sta, - u32 changed) + u32 changed, enum nl80211_channel_type oper_chan_type) { struct ath_softc *sc = priv; struct ath_rate_priv *ath_rc_priv = priv_sta; @@ -1340,8 +1340,8 @@ static void ath_rate_update(void *priv, struct ieee80211_supported_band *sband, if (sc->sc_ah->opmode != NL80211_IFTYPE_STATION) return; - if (sc->hw->conf.channel_type == NL80211_CHAN_HT40MINUS || - sc->hw->conf.channel_type == NL80211_CHAN_HT40PLUS) + if (oper_chan_type == NL80211_CHAN_HT40MINUS || + oper_chan_type == NL80211_CHAN_HT40PLUS) oper_cw40 = true; oper_sgi40 = (sta->ht_cap.cap & IEEE80211_HT_CAP_SGI_40) ? diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 80eb7cc42ce9..45d7d44d7cbe 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2426,7 +2426,8 @@ struct rate_control_ops { struct ieee80211_sta *sta, void *priv_sta); void (*rate_update)(void *priv, struct ieee80211_supported_band *sband, struct ieee80211_sta *sta, - void *priv_sta, u32 changed); + void *priv_sta, u32 changed, + enum nl80211_channel_type oper_chan_type); void (*free_sta)(void *priv, struct ieee80211_sta *sta, void *priv_sta); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 5a268761e4c5..0ab284c32135 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -177,7 +177,8 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, sta = sta_info_get(sdata, bssid); if (sta) rate_control_rate_update(local, sband, sta, - IEEE80211_RC_HT_CHANGED); + IEEE80211_RC_HT_CHANGED, + local->oper_channel_type); rcu_read_unlock(); } diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index b6108bca96d4..065a96190e32 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -66,7 +66,8 @@ static inline void rate_control_rate_init(struct sta_info *sta) static inline void rate_control_rate_update(struct ieee80211_local *local, struct ieee80211_supported_band *sband, - struct sta_info *sta, u32 changed) + struct sta_info *sta, u32 changed, + enum nl80211_channel_type oper_chan_type) { struct rate_control_ref *ref = local->rate_ctrl; struct ieee80211_sta *ista = &sta->sta; @@ -74,7 +75,7 @@ static inline void rate_control_rate_update(struct ieee80211_local *local, if (ref && ref->ops->rate_update) ref->ops->rate_update(ref->priv, sband, ista, - priv_sta, changed); + priv_sta, changed, oper_chan_type); } static inline void *rate_control_alloc_sta(struct rate_control_ref *ref, -- cgit v1.2.3 From 122e4519cd5c224d4b8e681d368132b643e28f60 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 2 Mar 2010 13:32:44 +0000 Subject: IPv6: addrconf dad timer unnecessary bh_disable Timer code runs in bottom half, so there is no need for using _bh form of locking. Also check if device is not ready to avoid race with address that is no longer active. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 88fd8c5877ee..e6cba9c45c6c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2850,9 +2850,9 @@ static void addrconf_dad_timer(unsigned long data) struct inet6_dev *idev = ifp->idev; struct in6_addr mcaddr; - read_lock_bh(&idev->lock); - if (idev->dead) { - read_unlock_bh(&idev->lock); + read_lock(&idev->lock); + if (idev->dead || !(idev->if_flags & IF_READY)) { + read_unlock(&idev->lock); goto out; } @@ -2864,7 +2864,7 @@ static void addrconf_dad_timer(unsigned long data) ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED); spin_unlock(&ifp->lock); - read_unlock_bh(&idev->lock); + read_unlock(&idev->lock); addrconf_dad_completed(ifp); @@ -2874,7 +2874,7 @@ static void addrconf_dad_timer(unsigned long data) ifp->probes--; addrconf_mod_timer(ifp, AC_DAD, ifp->idev->nd_parms->retrans_time); spin_unlock(&ifp->lock); - read_unlock_bh(&idev->lock); + read_unlock(&idev->lock); /* send a neighbour solicitation for our addr */ addrconf_addr_solict_mult(&ifp->addr, &mcaddr); -- cgit v1.2.3 From 5b2a19539c5f59c5a038d213ede723f0245d97cf Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 2 Mar 2010 13:32:45 +0000 Subject: IPv6: addrconf timer race The Router Solicitation timer races with device state changes because it doesn't lock the device. Use local variable to avoid one repeated dereference. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e6cba9c45c6c..5f582f385abb 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2739,28 +2739,29 @@ static int addrconf_ifdown(struct net_device *dev, int how) static void addrconf_rs_timer(unsigned long data) { struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data; + struct inet6_dev *idev = ifp->idev; - if (ifp->idev->cnf.forwarding) + read_lock(&idev->lock); + if (idev->dead || !(idev->if_flags & IF_READY)) goto out; - if (ifp->idev->if_flags & IF_RA_RCVD) { - /* - * Announcement received after solicitation - * was sent - */ + if (idev->cnf.forwarding) + goto out; + + /* Announcement received after solicitation was sent */ + if (idev->if_flags & IF_RA_RCVD) goto out; - } spin_lock(&ifp->lock); - if (ifp->probes++ < ifp->idev->cnf.rtr_solicits) { + if (ifp->probes++ < idev->cnf.rtr_solicits) { /* The wait after the last probe can be shorter */ addrconf_mod_timer(ifp, AC_RS, - (ifp->probes == ifp->idev->cnf.rtr_solicits) ? - ifp->idev->cnf.rtr_solicit_delay : - ifp->idev->cnf.rtr_solicit_interval); + (ifp->probes == idev->cnf.rtr_solicits) ? + idev->cnf.rtr_solicit_delay : + idev->cnf.rtr_solicit_interval); spin_unlock(&ifp->lock); - ndisc_send_rs(ifp->idev->dev, &ifp->addr, &in6addr_linklocal_allrouters); + ndisc_send_rs(idev->dev, &ifp->addr, &in6addr_linklocal_allrouters); } else { spin_unlock(&ifp->lock); /* @@ -2768,10 +2769,11 @@ static void addrconf_rs_timer(unsigned long data) * assumption any longer. */ printk(KERN_DEBUG "%s: no IPv6 routers present\n", - ifp->idev->dev->name); + idev->dev->name); } out: + read_unlock(&idev->lock); in6_ifa_put(ifp); } -- cgit v1.2.3 From 84e8b803f1e16f3a2b8b80f80a63fa2f2f8a9be6 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 2 Mar 2010 13:32:46 +0000 Subject: IPv6: addrconf notify when address is unavailable My recent change in net-next to retain permanent addresses caused regression. Device refcount would not go to zero when device was unregistered because left over anycast reference would hold ipv6 dev reference which would hold device references... The correct procedure is to call notify chain when address is no longer available for use. When interface comes back DAD timer will notify back that address is available. Also, link local addresses should be purged when interface is brought down. The address might be changed. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 46 +++++++++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 5f582f385abb..7a4bf7671285 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2649,11 +2649,11 @@ static int addrconf_ifdown(struct net_device *dev, int how) write_lock_bh(&addrconf_hash_lock); while ((ifa = *bifa) != NULL) { if (ifa->idev == idev && - (how || !(ifa->flags&IFA_F_PERMANENT))) { + (how || !(ifa->flags&IFA_F_PERMANENT) || + ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) { *bifa = ifa->lst_next; ifa->lst_next = NULL; - addrconf_del_timer(ifa); - in6_ifa_put(ifa); + __in6_ifa_put(ifa); continue; } bifa = &ifa->lst_next; @@ -2691,28 +2691,40 @@ static int addrconf_ifdown(struct net_device *dev, int how) #endif bifa = &idev->addr_list; while ((ifa = *bifa) != NULL) { - if (how == 0 && (ifa->flags&IFA_F_PERMANENT)) { - /* Retain permanent address on admin down */ + addrconf_del_timer(ifa); + + /* If just doing link down, and address is permanent + and not link-local, then retain it. */ + if (how == 0 && + (ifa->flags&IFA_F_PERMANENT) && + !(ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) { bifa = &ifa->if_next; - /* Restart DAD if needed when link comes back up */ - if ( !((dev->flags&(IFF_NOARP|IFF_LOOPBACK)) || - idev->cnf.accept_dad <= 0 || - (ifa->flags & IFA_F_NODAD))) - ifa->flags |= IFA_F_TENTATIVE; + /* If not doing DAD on this address, just keep it. */ + if ((dev->flags&(IFF_NOARP|IFF_LOOPBACK)) || + idev->cnf.accept_dad <= 0 || + (ifa->flags & IFA_F_NODAD)) + continue; + + /* If it was tentative already, no need to notify */ + if (ifa->flags & IFA_F_TENTATIVE) + continue; + + /* Flag it for later restoration when link comes up */ + ifa->flags |= IFA_F_TENTATIVE; + in6_ifa_hold(ifa); } else { *bifa = ifa->if_next; ifa->if_next = NULL; - ifa->dead = 1; - write_unlock_bh(&idev->lock); + } + write_unlock_bh(&idev->lock); - __ipv6_ifa_notify(RTM_DELADDR, ifa); - atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa); - in6_ifa_put(ifa); + __ipv6_ifa_notify(RTM_DELADDR, ifa); + atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa); + in6_ifa_put(ifa); - write_lock_bh(&idev->lock); - } + write_lock_bh(&idev->lock); } write_unlock_bh(&idev->lock); -- cgit v1.2.3 From 8f37ada5b5f6bfb4d251a7f510f249cb855b77b3 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 3 Mar 2010 08:19:59 +0000 Subject: IPv6: fix race between cleanup and add/delete address This solves a potential race problem during the cleanup process. The issue is that addrconf_ifdown() needs to traverse address list, but then drop lock to call the notifier. The version in -next could get confused if add/delete happened during this window. Original code (2.6.32 and earlier) was okay because all addresses were always deleted. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 7a4bf7671285..6cf3ee14ace3 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2615,7 +2615,7 @@ static void addrconf_bonding_change(struct net_device *dev, unsigned long event) static int addrconf_ifdown(struct net_device *dev, int how) { struct inet6_dev *idev; - struct inet6_ifaddr *ifa, **bifa; + struct inet6_ifaddr *ifa, *keep_list, **bifa; struct net *net = dev_net(dev); int i; @@ -2689,8 +2689,12 @@ static int addrconf_ifdown(struct net_device *dev, int how) write_lock_bh(&idev->lock); } #endif - bifa = &idev->addr_list; - while ((ifa = *bifa) != NULL) { + keep_list = NULL; + bifa = &keep_list; + while ((ifa = idev->addr_list) != NULL) { + idev->addr_list = ifa->if_next; + ifa->if_next = NULL; + addrconf_del_timer(ifa); /* If just doing link down, and address is permanent @@ -2698,6 +2702,9 @@ static int addrconf_ifdown(struct net_device *dev, int how) if (how == 0 && (ifa->flags&IFA_F_PERMANENT) && !(ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) { + + /* Move to holding list */ + *bifa = ifa; bifa = &ifa->if_next; /* If not doing DAD on this address, just keep it. */ @@ -2714,8 +2721,6 @@ static int addrconf_ifdown(struct net_device *dev, int how) ifa->flags |= IFA_F_TENTATIVE; in6_ifa_hold(ifa); } else { - *bifa = ifa->if_next; - ifa->if_next = NULL; ifa->dead = 1; } write_unlock_bh(&idev->lock); @@ -2726,6 +2731,9 @@ static int addrconf_ifdown(struct net_device *dev, int how) write_lock_bh(&idev->lock); } + + idev->addr_list = keep_list; + write_unlock_bh(&idev->lock); /* Step 5: Discard multicast list */ -- cgit v1.2.3 From 6d55cb91a0020ac0d78edcad61efd6c8cf5785a3 Mon Sep 17 00:00:00 2001 From: Timo Teräs Date: Wed, 3 Mar 2010 04:01:13 +0000 Subject: gre: fix hard header destination address checking ipgre_header() can be called with zero daddr when the gre device is configured as multipoint tunnel and still has the NOARP flag set (which is typically cleared by the userspace arp daemon). If the NOARP packets are not dropped, ipgre_tunnel_xmit() will take rt->rt_gateway (= NBMA IP) and use that for route look up (and may lead to bogus xfrm acquires). The multicast address check is removed as sending to multicast group should be ok. In fact, if gre device has a multicast address as destination ipgre_header is always called with multicast address. Signed-off-by: Timo Teras Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index c0c5274d0271..f47c9f76754b 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1144,12 +1144,9 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, if (saddr) memcpy(&iph->saddr, saddr, 4); - - if (daddr) { + if (daddr) memcpy(&iph->daddr, daddr, 4); - return t->hlen; - } - if (iph->daddr && !ipv4_is_multicast(iph->daddr)) + if (iph->daddr) return t->hlen; return -t->hlen; -- cgit v1.2.3 From d0021b252eaf65ca07ed14f0d66425dd9ccab9a6 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Wed, 3 Mar 2010 08:31:23 +0000 Subject: tipc: Fix oops on send prior to entering networked mode (v3) Fix TIPC to disallow sending to remote addresses prior to entering NET_MODE user programs can oops the kernel by sending datagrams via AF_TIPC prior to entering networked mode. The following backtrace has been observed: ID: 13459 TASK: ffff810014640040 CPU: 0 COMMAND: "tipc-client" [exception RIP: tipc_node_select_next_hop+90] RIP: ffffffff8869d3c3 RSP: ffff81002d9a5ab8 RFLAGS: 00010202 RAX: 0000000000000001 RBX: 0000000000000001 RCX: 0000000000000001 RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000001001001 RBP: 0000000001001001 R8: 0074736575716552 R9: 0000000000000000 R10: ffff81003fbd0680 R11: 00000000000000c8 R12: 0000000000000008 R13: 0000000000000001 R14: 0000000000000001 R15: ffff810015c6ca00 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 RIP: 0000003cbd8d49a3 RSP: 00007fffc84e0be8 RFLAGS: 00010206 RAX: 000000000000002c RBX: ffffffff8005d116 RCX: 0000000000000000 RDX: 0000000000000008 RSI: 00007fffc84e0c00 RDI: 0000000000000003 RBP: 0000000000000000 R8: 00007fffc84e0c10 R9: 0000000000000010 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007fffc84e0d10 R14: 0000000000000000 R15: 00007fffc84e0c30 ORIG_RAX: 000000000000002c CS: 0033 SS: 002b What happens is that, when the tipc module in inserted it enters a standalone node mode in which communication to its own address is allowed <0.0.0> but not to other addresses, since the appropriate data structures have not been allocated yet (specifically the tipc_net pointer). There is nothing stopping a client from trying to send such a message however, and if that happens, we attempt to dereference tipc_net.zones while the pointer is still NULL, and explode. The fix is pretty straightforward. Since these oopses all arise from the dereference of global pointers prior to their assignment to allocated values, and since these allocations are small (about 2k total), lets convert these pointers to static arrays of the appropriate size. All the accesses to these bits consider 0/NULL to be a non match when searching, so all the lookups still work properly, and there is no longer a chance of a bad dererence anywhere. As a bonus, this lets us eliminate the setup/teardown routines for those pointers, and elimnates the need to preform any locking around them to prevent access while their being allocated/freed. I've updated the tipc_net structure to behave this way to fix the exact reported problem, and also fixed up the tipc_bearers and media_list arrays to fix an obvious simmilar problem that arises from issuing tipc-config commands to manipulate bearers/links prior to entering networked mode I've tested this for a few hours by running the sanity tests and stress test with the tipcutils suite, and nothing has fallen over. There have been a few lockdep warnings, but those were there before, and can be addressed later, as they didn't actually result in any deadlock. Signed-off-by: Neil Horman CC: Allan Stephens CC: David S. Miller CC: tipc-discussion@lists.sourceforge.net bearer.c | 37 ++++++------------------------------- bearer.h | 2 +- net.c | 25 ++++--------------------- 3 files changed, 11 insertions(+), 53 deletions(-) Signed-off-by: David S. Miller --- net/tipc/bearer.c | 37 ++++++------------------------------- net/tipc/bearer.h | 2 +- net/tipc/net.c | 25 ++++--------------------- 3 files changed, 11 insertions(+), 53 deletions(-) (limited to 'net') diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 327011fcc407..78091375ca12 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -45,10 +45,10 @@ #define MAX_ADDR_STR 32 -static struct media *media_list = NULL; +static struct media media_list[MAX_MEDIA]; static u32 media_count = 0; -struct bearer *tipc_bearers = NULL; +struct bearer tipc_bearers[MAX_BEARERS]; /** * media_name_valid - validate media name @@ -108,9 +108,11 @@ int tipc_register_media(u32 media_type, int res = -EINVAL; write_lock_bh(&tipc_net_lock); - if (!media_list) - goto exit; + if (tipc_mode != TIPC_NET_MODE) { + warn("Media <%s> rejected, not in networked mode yet\n", name); + goto exit; + } if (!media_name_valid(name)) { warn("Media <%s> rejected, illegal name\n", name); goto exit; @@ -660,33 +662,10 @@ int tipc_disable_bearer(const char *name) -int tipc_bearer_init(void) -{ - int res; - - write_lock_bh(&tipc_net_lock); - tipc_bearers = kcalloc(MAX_BEARERS, sizeof(struct bearer), GFP_ATOMIC); - media_list = kcalloc(MAX_MEDIA, sizeof(struct media), GFP_ATOMIC); - if (tipc_bearers && media_list) { - res = 0; - } else { - kfree(tipc_bearers); - kfree(media_list); - tipc_bearers = NULL; - media_list = NULL; - res = -ENOMEM; - } - write_unlock_bh(&tipc_net_lock); - return res; -} - void tipc_bearer_stop(void) { u32 i; - if (!tipc_bearers) - return; - for (i = 0; i < MAX_BEARERS; i++) { if (tipc_bearers[i].active) tipc_bearers[i].publ.blocked = 1; @@ -695,10 +674,6 @@ void tipc_bearer_stop(void) if (tipc_bearers[i].active) bearer_disable(tipc_bearers[i].publ.name); } - kfree(tipc_bearers); - kfree(media_list); - tipc_bearers = NULL; - media_list = NULL; media_count = 0; } diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index ca5734892713..000228e93f9e 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -114,7 +114,7 @@ struct bearer_name { struct link; -extern struct bearer *tipc_bearers; +extern struct bearer tipc_bearers[]; void tipc_media_addr_printf(struct print_buf *pb, struct tipc_media_addr *a); struct sk_buff *tipc_media_get_names(void); diff --git a/net/tipc/net.c b/net/tipc/net.c index 7906608bf510..f25b1cdb64eb 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -116,7 +116,8 @@ */ DEFINE_RWLOCK(tipc_net_lock); -struct network tipc_net = { NULL }; +struct _zone *tipc_zones[256] = { NULL, }; +struct network tipc_net = { tipc_zones }; struct tipc_node *tipc_net_select_remote_node(u32 addr, u32 ref) { @@ -158,28 +159,12 @@ void tipc_net_send_external_routes(u32 dest) } } -static int net_init(void) -{ - memset(&tipc_net, 0, sizeof(tipc_net)); - tipc_net.zones = kcalloc(tipc_max_zones + 1, sizeof(struct _zone *), GFP_ATOMIC); - if (!tipc_net.zones) { - return -ENOMEM; - } - return 0; -} - static void net_stop(void) { u32 z_num; - if (!tipc_net.zones) - return; - - for (z_num = 1; z_num <= tipc_max_zones; z_num++) { + for (z_num = 1; z_num <= tipc_max_zones; z_num++) tipc_zone_delete(tipc_net.zones[z_num]); - } - kfree(tipc_net.zones); - tipc_net.zones = NULL; } static void net_route_named_msg(struct sk_buff *buf) @@ -282,9 +267,7 @@ int tipc_net_start(u32 addr) tipc_named_reinit(); tipc_port_reinit(); - if ((res = tipc_bearer_init()) || - (res = net_init()) || - (res = tipc_cltr_init()) || + if ((res = tipc_cltr_init()) || (res = tipc_bclink_init())) { return res; } -- cgit v1.2.3 From 8eae939f1400326b06d0c9afe53d2a484a326871 Mon Sep 17 00:00:00 2001 From: Zhu Yi Date: Thu, 4 Mar 2010 18:01:40 +0000 Subject: net: add limit for socket backlog We got system OOM while running some UDP netperf testing on the loopback device. The case is multiple senders sent stream UDP packets to a single receiver via loopback on local host. Of course, the receiver is not able to handle all the packets in time. But we surprisingly found that these packets were not discarded due to the receiver's sk->sk_rcvbuf limit. Instead, they are kept queuing to sk->sk_backlog and finally ate up all the memory. We believe this is a secure hole that a none privileged user can crash the system. The root cause for this problem is, when the receiver is doing __release_sock() (i.e. after userspace recv, kernel udp_recvmsg -> skb_free_datagram_locked -> release_sock), it moves skbs from backlog to sk_receive_queue with the softirq enabled. In the above case, multiple busy senders will almost make it an endless loop. The skbs in the backlog end up eat all the system memory. The issue is not only for UDP. Any protocols using socket backlog is potentially affected. The patch adds limit for socket backlog so that the backlog size cannot be expanded endlessly. Reported-by: Alex Shi Cc: David Miller Cc: Arnaldo Carvalho de Melo Cc: Alexey Kuznetsov Cc: Patrick McHardy Cc: Vlad Yasevich Cc: Sridhar Samudrala Cc: Jon Maloy Cc: Allan Stephens Cc: Andrew Hendry Signed-off-by: Zhu Yi Signed-off-by: Eric Dumazet Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/sock.h | 15 ++++++++++++++- net/core/sock.c | 16 ++++++++++++++-- 2 files changed, 28 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/net/sock.h b/include/net/sock.h index 6cb1676e409a..2516d76f043c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -253,6 +253,8 @@ struct sock { struct { struct sk_buff *head; struct sk_buff *tail; + int len; + int limit; } sk_backlog; wait_queue_head_t *sk_sleep; struct dst_entry *sk_dst_cache; @@ -589,7 +591,7 @@ static inline int sk_stream_memory_free(struct sock *sk) return sk->sk_wmem_queued < sk->sk_sndbuf; } -/* The per-socket spinlock must be held here. */ +/* OOB backlog add */ static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb) { if (!sk->sk_backlog.tail) { @@ -601,6 +603,17 @@ static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb) skb->next = NULL; } +/* The per-socket spinlock must be held here. */ +static inline int sk_add_backlog_limited(struct sock *sk, struct sk_buff *skb) +{ + if (sk->sk_backlog.len >= max(sk->sk_backlog.limit, sk->sk_rcvbuf << 1)) + return -ENOBUFS; + + sk_add_backlog(sk, skb); + sk->sk_backlog.len += skb->truesize; + return 0; +} + static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) { return sk->sk_backlog_rcv(sk, skb); diff --git a/net/core/sock.c b/net/core/sock.c index fcd397a762ff..6e22dc973d23 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -340,8 +340,12 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) rc = sk_backlog_rcv(sk, skb); mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); - } else - sk_add_backlog(sk, skb); + } else if (sk_add_backlog_limited(sk, skb)) { + bh_unlock_sock(sk); + atomic_inc(&sk->sk_drops); + goto discard_and_relse; + } + bh_unlock_sock(sk); out: sock_put(sk); @@ -1139,6 +1143,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) sock_lock_init(newsk); bh_lock_sock(newsk); newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; + newsk->sk_backlog.len = 0; atomic_set(&newsk->sk_rmem_alloc, 0); /* @@ -1542,6 +1547,12 @@ static void __release_sock(struct sock *sk) bh_lock_sock(sk); } while ((skb = sk->sk_backlog.head) != NULL); + + /* + * Doing the zeroing here guarantee we can not loop forever + * while a wild producer attempts to flood us. + */ + sk->sk_backlog.len = 0; } /** @@ -1874,6 +1885,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_allocation = GFP_KERNEL; sk->sk_rcvbuf = sysctl_rmem_default; sk->sk_sndbuf = sysctl_wmem_default; + sk->sk_backlog.limit = sk->sk_rcvbuf << 1; sk->sk_state = TCP_CLOSE; sk_set_socket(sk, sock); -- cgit v1.2.3 From 6b03a53a5ab7ccf2d5d69f96cf1c739c4d2a8fb9 Mon Sep 17 00:00:00 2001 From: Zhu Yi Date: Thu, 4 Mar 2010 18:01:41 +0000 Subject: tcp: use limited socket backlog Make tcp adapt to the limited socket backlog change. Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: "Pekka Savola (ipv6)" Cc: Patrick McHardy Signed-off-by: Zhu Yi Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 6 ++++-- net/ipv6/tcp_ipv6.c | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c3588b4fd979..4baf1943b1bd 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1682,8 +1682,10 @@ process: if (!tcp_prequeue(sk, skb)) ret = tcp_v4_do_rcv(sk, skb); } - } else - sk_add_backlog(sk, skb); + } else if (sk_add_backlog_limited(sk, skb)) { + bh_unlock_sock(sk); + goto discard_and_relse; + } bh_unlock_sock(sk); sock_put(sk); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6963a6b6763e..c4ea9d5cbfaa 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1740,8 +1740,10 @@ process: if (!tcp_prequeue(sk, skb)) ret = tcp_v6_do_rcv(sk, skb); } - } else - sk_add_backlog(sk, skb); + } else if (sk_add_backlog_limited(sk, skb)) { + bh_unlock_sock(sk); + goto discard_and_relse; + } bh_unlock_sock(sk); sock_put(sk); -- cgit v1.2.3 From 55349790d7cbf0d381873a7ece1dcafcffd4aaa9 Mon Sep 17 00:00:00 2001 From: Zhu Yi Date: Thu, 4 Mar 2010 18:01:42 +0000 Subject: udp: use limited socket backlog Make udp adapt to the limited socket backlog change. Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: "Pekka Savola (ipv6)" Cc: Patrick McHardy Signed-off-by: Zhu Yi Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 6 ++++-- net/ipv6/udp.c | 28 ++++++++++++++++++---------- 2 files changed, 22 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 608a5446d05b..e7eb47f338d4 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1371,8 +1371,10 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) bh_lock_sock(sk); if (!sock_owned_by_user(sk)) rc = __udp_queue_rcv_skb(sk, skb); - else - sk_add_backlog(sk, skb); + else if (sk_add_backlog_limited(sk, skb)) { + bh_unlock_sock(sk); + goto drop; + } bh_unlock_sock(sk); return rc; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 52b8347ae3b2..64804912b093 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -583,16 +583,20 @@ static void flush_stack(struct sock **stack, unsigned int count, bh_lock_sock(sk); if (!sock_owned_by_user(sk)) udpv6_queue_rcv_skb(sk, skb1); - else - sk_add_backlog(sk, skb1); + else if (sk_add_backlog_limited(sk, skb1)) { + kfree_skb(skb1); + bh_unlock_sock(sk); + goto drop; + } bh_unlock_sock(sk); - } else { - atomic_inc(&sk->sk_drops); - UDP6_INC_STATS_BH(sock_net(sk), - UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk)); - UDP6_INC_STATS_BH(sock_net(sk), - UDP_MIB_INERRORS, IS_UDPLITE(sk)); + continue; } +drop: + atomic_inc(&sk->sk_drops); + UDP6_INC_STATS_BH(sock_net(sk), + UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk)); + UDP6_INC_STATS_BH(sock_net(sk), + UDP_MIB_INERRORS, IS_UDPLITE(sk)); } } /* @@ -754,8 +758,12 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, bh_lock_sock(sk); if (!sock_owned_by_user(sk)) udpv6_queue_rcv_skb(sk, skb); - else - sk_add_backlog(sk, skb); + else if (sk_add_backlog_limited(sk, skb)) { + atomic_inc(&sk->sk_drops); + bh_unlock_sock(sk); + sock_put(sk); + goto discard; + } bh_unlock_sock(sk); sock_put(sk); return 0; -- cgit v1.2.3 From 79545b681961d7001c1f4c3eb9ffb87bed4485db Mon Sep 17 00:00:00 2001 From: Zhu Yi Date: Thu, 4 Mar 2010 18:01:43 +0000 Subject: llc: use limited socket backlog Make llc adapt to the limited socket backlog change. Cc: Arnaldo Carvalho de Melo Signed-off-by: Zhu Yi Acked-by: Eric Dumazet Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/llc/llc_conn.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index a8dde9b010da..c0539ffdb272 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -827,7 +827,8 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb) else { dprintk("%s: adding to backlog...\n", __func__); llc_set_backlog_type(skb, LLC_PACKET); - sk_add_backlog(sk, skb); + if (sk_add_backlog_limited(sk, skb)) + goto drop_unlock; } out: bh_unlock_sock(sk); -- cgit v1.2.3 From 50b1a782f845140f4138f14a1ce8a4a6dd0cc82f Mon Sep 17 00:00:00 2001 From: Zhu Yi Date: Thu, 4 Mar 2010 18:01:44 +0000 Subject: sctp: use limited socket backlog Make sctp adapt to the limited socket backlog change. Cc: Vlad Yasevich Cc: Sridhar Samudrala Signed-off-by: Zhu Yi Signed-off-by: David S. Miller --- net/sctp/input.c | 42 +++++++++++++++++++++++++++--------------- net/sctp/socket.c | 3 +++ 2 files changed, 30 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/sctp/input.c b/net/sctp/input.c index c0c973e67add..cbc063665e6b 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -75,7 +75,7 @@ static struct sctp_association *__sctp_lookup_association( const union sctp_addr *peer, struct sctp_transport **pt); -static void sctp_add_backlog(struct sock *sk, struct sk_buff *skb); +static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb); /* Calculate the SCTP checksum of an SCTP packet. */ @@ -265,8 +265,13 @@ int sctp_rcv(struct sk_buff *skb) } if (sock_owned_by_user(sk)) { + if (sctp_add_backlog(sk, skb)) { + sctp_bh_unlock_sock(sk); + sctp_chunk_free(chunk); + skb = NULL; /* sctp_chunk_free already freed the skb */ + goto discard_release; + } SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_BACKLOG); - sctp_add_backlog(sk, skb); } else { SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_SOFTIRQ); sctp_inq_push(&chunk->rcvr->inqueue, chunk); @@ -336,8 +341,10 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) sctp_bh_lock_sock(sk); if (sock_owned_by_user(sk)) { - sk_add_backlog(sk, skb); - backloged = 1; + if (sk_add_backlog_limited(sk, skb)) + sctp_chunk_free(chunk); + else + backloged = 1; } else sctp_inq_push(inqueue, chunk); @@ -362,22 +369,27 @@ done: return 0; } -static void sctp_add_backlog(struct sock *sk, struct sk_buff *skb) +static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb) { struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk; struct sctp_ep_common *rcvr = chunk->rcvr; + int ret; - /* Hold the assoc/ep while hanging on the backlog queue. - * This way, we know structures we need will not disappear from us - */ - if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type) - sctp_association_hold(sctp_assoc(rcvr)); - else if (SCTP_EP_TYPE_SOCKET == rcvr->type) - sctp_endpoint_hold(sctp_ep(rcvr)); - else - BUG(); + ret = sk_add_backlog_limited(sk, skb); + if (!ret) { + /* Hold the assoc/ep while hanging on the backlog queue. + * This way, we know structures we need will not disappear + * from us + */ + if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type) + sctp_association_hold(sctp_assoc(rcvr)); + else if (SCTP_EP_TYPE_SOCKET == rcvr->type) + sctp_endpoint_hold(sctp_ep(rcvr)); + else + BUG(); + } + return ret; - sk_add_backlog(sk, skb); } /* Handle icmp frag needed error. */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f6d1e59c4151..dfc5c127efd4 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3720,6 +3720,9 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) SCTP_DBG_OBJCNT_INC(sock); percpu_counter_inc(&sctp_sockets_allocated); + /* Set socket backlog limit. */ + sk->sk_backlog.limit = sysctl_sctp_rmem[1]; + local_bh_disable(); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); local_bh_enable(); -- cgit v1.2.3 From 53eecb1be5ae499d399d2923933937a9ea1a284f Mon Sep 17 00:00:00 2001 From: Zhu Yi Date: Thu, 4 Mar 2010 18:01:45 +0000 Subject: tipc: use limited socket backlog Make tipc adapt to the limited socket backlog change. Cc: Jon Maloy Cc: Allan Stephens Signed-off-by: Zhu Yi Acked-by: Eric Dumazet Acked-by: Allan Stephens Signed-off-by: David S. Miller --- net/tipc/socket.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 1ea64f09cc45..22bfbc33a8ac 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1322,8 +1322,10 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf) if (!sock_owned_by_user(sk)) { res = filter_rcv(sk, buf); } else { - sk_add_backlog(sk, buf); - res = TIPC_OK; + if (sk_add_backlog_limited(sk, buf)) + res = TIPC_ERR_OVERLOAD; + else + res = TIPC_OK; } bh_unlock_sock(sk); -- cgit v1.2.3 From 2499849ee8f513e795b9f2c19a42d6356e4943a4 Mon Sep 17 00:00:00 2001 From: Zhu Yi Date: Thu, 4 Mar 2010 18:01:46 +0000 Subject: x25: use limited socket backlog Make x25 adapt to the limited socket backlog change. Cc: Andrew Hendry Signed-off-by: Zhu Yi Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/x25/x25_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index 3e1efe534645..a9da0dc26f4f 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -53,7 +53,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb) if (!sock_owned_by_user(sk)) { queued = x25_process_rx_frame(sk, skb); } else { - sk_add_backlog(sk, skb); + queued = !sk_add_backlog_limited(sk, skb); } bh_unlock_sock(sk); sock_put(sk); -- cgit v1.2.3 From a3a858ff18a72a8d388e31ab0d98f7e944841a62 Mon Sep 17 00:00:00 2001 From: Zhu Yi Date: Thu, 4 Mar 2010 18:01:47 +0000 Subject: net: backlog functions rename sk_add_backlog -> __sk_add_backlog sk_add_backlog_limited -> sk_add_backlog Signed-off-by: Zhu Yi Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 6 +++--- net/core/sock.c | 2 +- net/dccp/minisocks.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_minisocks.c | 2 +- net/ipv4/udp.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- net/ipv6/udp.c | 4 ++-- net/llc/llc_c_ac.c | 2 +- net/llc/llc_conn.c | 2 +- net/sctp/input.c | 4 ++-- net/tipc/socket.c | 2 +- net/x25/x25_dev.c | 2 +- 13 files changed, 17 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/include/net/sock.h b/include/net/sock.h index 2516d76f043c..170353dd9570 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -592,7 +592,7 @@ static inline int sk_stream_memory_free(struct sock *sk) } /* OOB backlog add */ -static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb) +static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb) { if (!sk->sk_backlog.tail) { sk->sk_backlog.head = sk->sk_backlog.tail = skb; @@ -604,12 +604,12 @@ static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb) } /* The per-socket spinlock must be held here. */ -static inline int sk_add_backlog_limited(struct sock *sk, struct sk_buff *skb) +static inline int sk_add_backlog(struct sock *sk, struct sk_buff *skb) { if (sk->sk_backlog.len >= max(sk->sk_backlog.limit, sk->sk_rcvbuf << 1)) return -ENOBUFS; - sk_add_backlog(sk, skb); + __sk_add_backlog(sk, skb); sk->sk_backlog.len += skb->truesize; return 0; } diff --git a/net/core/sock.c b/net/core/sock.c index 6e22dc973d23..61a65a2e0455 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -340,7 +340,7 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) rc = sk_backlog_rcv(sk, skb); mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); - } else if (sk_add_backlog_limited(sk, skb)) { + } else if (sk_add_backlog(sk, skb)) { bh_unlock_sock(sk); atomic_inc(&sk->sk_drops); goto discard_and_relse; diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index af226a063141..0d508c359fa9 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -254,7 +254,7 @@ int dccp_child_process(struct sock *parent, struct sock *child, * in main socket hash table and lock on listening * socket does not protect us more. */ - sk_add_backlog(child, skb); + __sk_add_backlog(child, skb); } bh_unlock_sock(child); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4baf1943b1bd..1915f7dc30e6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1682,7 +1682,7 @@ process: if (!tcp_prequeue(sk, skb)) ret = tcp_v4_do_rcv(sk, skb); } - } else if (sk_add_backlog_limited(sk, skb)) { + } else if (sk_add_backlog(sk, skb)) { bh_unlock_sock(sk); goto discard_and_relse; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f206ee5dda80..4199bc6915c5 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -728,7 +728,7 @@ int tcp_child_process(struct sock *parent, struct sock *child, * in main socket hash table and lock on listening * socket does not protect us more. */ - sk_add_backlog(child, skb); + __sk_add_backlog(child, skb); } bh_unlock_sock(child); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e7eb47f338d4..7af756d0f931 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1371,7 +1371,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) bh_lock_sock(sk); if (!sock_owned_by_user(sk)) rc = __udp_queue_rcv_skb(sk, skb); - else if (sk_add_backlog_limited(sk, skb)) { + else if (sk_add_backlog(sk, skb)) { bh_unlock_sock(sk); goto drop; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c4ea9d5cbfaa..2c378b1bd5cf 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1740,7 +1740,7 @@ process: if (!tcp_prequeue(sk, skb)) ret = tcp_v6_do_rcv(sk, skb); } - } else if (sk_add_backlog_limited(sk, skb)) { + } else if (sk_add_backlog(sk, skb)) { bh_unlock_sock(sk); goto discard_and_relse; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 64804912b093..3c0c9c755c92 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -583,7 +583,7 @@ static void flush_stack(struct sock **stack, unsigned int count, bh_lock_sock(sk); if (!sock_owned_by_user(sk)) udpv6_queue_rcv_skb(sk, skb1); - else if (sk_add_backlog_limited(sk, skb1)) { + else if (sk_add_backlog(sk, skb1)) { kfree_skb(skb1); bh_unlock_sock(sk); goto drop; @@ -758,7 +758,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, bh_lock_sock(sk); if (!sock_owned_by_user(sk)) udpv6_queue_rcv_skb(sk, skb); - else if (sk_add_backlog_limited(sk, skb)) { + else if (sk_add_backlog(sk, skb)) { atomic_inc(&sk->sk_drops); bh_unlock_sock(sk); sock_put(sk); diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index 019c780512e8..86d6985b9d49 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -1437,7 +1437,7 @@ static void llc_process_tmr_ev(struct sock *sk, struct sk_buff *skb) llc_conn_state_process(sk, skb); else { llc_set_backlog_type(skb, LLC_EVENT); - sk_add_backlog(sk, skb); + __sk_add_backlog(sk, skb); } } } diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index c0539ffdb272..a12144da7974 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -827,7 +827,7 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb) else { dprintk("%s: adding to backlog...\n", __func__); llc_set_backlog_type(skb, LLC_PACKET); - if (sk_add_backlog_limited(sk, skb)) + if (sk_add_backlog(sk, skb)) goto drop_unlock; } out: diff --git a/net/sctp/input.c b/net/sctp/input.c index cbc063665e6b..3d74b264ea22 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -341,7 +341,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) sctp_bh_lock_sock(sk); if (sock_owned_by_user(sk)) { - if (sk_add_backlog_limited(sk, skb)) + if (sk_add_backlog(sk, skb)) sctp_chunk_free(chunk); else backloged = 1; @@ -375,7 +375,7 @@ static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb) struct sctp_ep_common *rcvr = chunk->rcvr; int ret; - ret = sk_add_backlog_limited(sk, skb); + ret = sk_add_backlog(sk, skb); if (!ret) { /* Hold the assoc/ep while hanging on the backlog queue. * This way, we know structures we need will not disappear diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 22bfbc33a8ac..4b235fc1c70f 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1322,7 +1322,7 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf) if (!sock_owned_by_user(sk)) { res = filter_rcv(sk, buf); } else { - if (sk_add_backlog_limited(sk, buf)) + if (sk_add_backlog(sk, buf)) res = TIPC_ERR_OVERLOAD; else res = TIPC_OK; diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index a9da0dc26f4f..52e304212241 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -53,7 +53,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb) if (!sock_owned_by_user(sk)) { queued = x25_process_rx_frame(sk, skb); } else { - queued = !sk_add_backlog_limited(sk, skb); + queued = !sk_add_backlog(sk, skb); } bh_unlock_sock(sk); sock_put(sk); -- cgit v1.2.3 From 723b2f57ad83ee7087acf9a95e8e289414b1f521 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Wed, 3 Mar 2010 22:51:50 +0000 Subject: ethtool: Add direct access to ops->get_sset_count This patch is an alternative approach for accessing string counts, vs. the drvinfo indirect approach. This way the drvinfo space doesn't run out, and we don't break ABI later. Signed-off-by: Jeff Garzik Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/linux/ethtool.h | 17 +++++++++--- net/core/ethtool.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index cca1c3de140d..f6f961fefbe5 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -253,6 +253,17 @@ struct ethtool_gstrings { __u8 data[0]; }; +struct ethtool_sset_info { + __u32 cmd; /* ETHTOOL_GSSET_INFO */ + __u32 reserved; + __u64 sset_mask; /* input: each bit selects an sset to query */ + /* output: each bit a returned sset */ + __u32 data[0]; /* ETH_SS_xxx count, in order, based on bits + in sset_mask. One bit implies one + __u32, two bits implies two + __u32's, etc. */ +}; + enum ethtool_test_flags { ETH_TEST_FL_OFFLINE = (1 << 0), /* online / offline */ ETH_TEST_FL_FAILED = (1 << 1), /* test passed / failed */ @@ -606,9 +617,9 @@ struct ethtool_ops { #define ETHTOOL_SRXCLSRLINS 0x00000032 /* Insert RX classification rule */ #define ETHTOOL_FLASHDEV 0x00000033 /* Flash firmware to device */ #define ETHTOOL_RESET 0x00000034 /* Reset hardware */ - -#define ETHTOOL_SRXNTUPLE 0x00000035 /* Add an n-tuple filter to device */ -#define ETHTOOL_GRXNTUPLE 0x00000036 /* Get n-tuple filters from device */ +#define ETHTOOL_SRXNTUPLE 0x00000035 /* Add an n-tuple filter to device */ +#define ETHTOOL_GRXNTUPLE 0x00000036 /* Get n-tuple filters from device */ +#define ETHTOOL_GSSET_INFO 0x00000037 /* Get string set info */ /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 0f2f82185ec4..70075c47ada8 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -214,6 +214,10 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use info.cmd = ETHTOOL_GDRVINFO; ops->get_drvinfo(dev, &info); + /* + * this method of obtaining string set info is deprecated; + * consider using ETHTOOL_GSSET_INFO instead + */ if (ops->get_sset_count) { int rc; @@ -237,6 +241,71 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use return 0; } +/* + * noinline attribute so that gcc doesnt use too much stack in dev_ethtool() + */ +static noinline int ethtool_get_sset_info(struct net_device *dev, + void __user *useraddr) +{ + struct ethtool_sset_info info; + const struct ethtool_ops *ops = dev->ethtool_ops; + u64 sset_mask; + int i, idx = 0, n_bits = 0, ret, rc; + u32 *info_buf = NULL; + + if (!ops->get_sset_count) + return -EOPNOTSUPP; + + if (copy_from_user(&info, useraddr, sizeof(info))) + return -EFAULT; + + /* store copy of mask, because we zero struct later on */ + sset_mask = info.sset_mask; + if (!sset_mask) + return 0; + + /* calculate size of return buffer */ + for (i = 0; i < 64; i++) + if (sset_mask & (1ULL << i)) + n_bits++; + + memset(&info, 0, sizeof(info)); + info.cmd = ETHTOOL_GSSET_INFO; + + info_buf = kzalloc(n_bits * sizeof(u32), GFP_USER); + if (!info_buf) + return -ENOMEM; + + /* + * fill return buffer based on input bitmask and successful + * get_sset_count return + */ + for (i = 0; i < 64; i++) { + if (!(sset_mask & (1ULL << i))) + continue; + + rc = ops->get_sset_count(dev, i); + if (rc >= 0) { + info.sset_mask |= (1ULL << i); + info_buf[idx++] = rc; + } + } + + ret = -EFAULT; + if (copy_to_user(useraddr, &info, sizeof(info))) + goto out; + + useraddr += offsetof(struct ethtool_sset_info, data); + if (copy_to_user(useraddr, info_buf, idx * sizeof(u32))) + goto out; + + ret = 0; + +out: + kfree(info_buf); + return ret; +} + /* * noinline attribute so that gcc doesnt use too much stack in dev_ethtool() */ @@ -1471,6 +1540,9 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GRXNTUPLE: rc = ethtool_get_rx_ntuple(dev, useraddr); break; + case ETHTOOL_GSSET_INFO: + rc = ethtool_get_sset_info(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } -- cgit v1.2.3 From d17792ebdf90289c9fd1bce888076d3d60ecd53b Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Thu, 4 Mar 2010 08:21:53 +0000 Subject: ethtool: Add direct access to ops->get_sset_count On 03/04/2010 09:26 AM, Ben Hutchings wrote: > On Thu, 2010-03-04 at 00:51 -0800, Jeff Kirsher wrote: >> From: Jeff Garzik >> >> This patch is an alternative approach for accessing string >> counts, vs. the drvinfo indirect approach. This way the drvinfo >> space doesn't run out, and we don't break ABI later. > [...] >> --- a/net/core/ethtool.c >> +++ b/net/core/ethtool.c >> @@ -214,6 +214,10 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use >> info.cmd = ETHTOOL_GDRVINFO; >> ops->get_drvinfo(dev,&info); >> >> + /* >> + * this method of obtaining string set info is deprecated; >> + * consider using ETHTOOL_GSSET_INFO instead >> + */ > > This comment belongs on the interface (ethtool.h) not the > implementation. Debatable -- the current comment is located at the callsite of ops->get_sset_count(), which is where an implementor might think to add a new call. Not all the numeric fields in ethtool_drvinfo are obtained from ->get_sset_count(). Hence the "some" in the attached patch to include/linux/ethtool.h, addressing your comment. > [...] >> +static noinline int ethtool_get_sset_info(struct net_device *dev, >> + void __user *useraddr) >> +{ > [...] >> + /* calculate size of return buffer */ >> + for (i = 0; i< 64; i++) >> + if (sset_mask& (1ULL<< i)) >> + n_bits++; > [...] > > We have a function for this: > > n_bits = hweight64(sset_mask); Agreed. I've attached a follow-up patch, which should enable my/Jeff's kernel patch to be applied, followed by this one. Signed-off-by: Jeff Garzik Signed-off-by: David S. Miller --- include/linux/ethtool.h | 7 +++++++ net/core/ethtool.c | 7 +++---- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index f6f961fefbe5..b33f316bb92e 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -61,6 +61,13 @@ struct ethtool_drvinfo { /* For PCI devices, use pci_name(pci_dev). */ char reserved1[32]; char reserved2[12]; + /* + * Some struct members below are filled in + * using ops->get_sset_count(). Obtaining + * this info from ethtool_drvinfo is now + * deprecated; Use ETHTOOL_GSSET_INFO + * instead. + */ __u32 n_priv_flags; /* number of flags valid in ETHTOOL_GPFLAGS */ __u32 n_stats; /* number of u64's from ETHTOOL_GSTATS */ __u32 testinfo_len; diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 70075c47ada8..33d2ded50f84 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -17,6 +17,7 @@ #include #include #include +#include #include /* @@ -216,7 +217,7 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use /* * this method of obtaining string set info is deprecated; - * consider using ETHTOOL_GSSET_INFO instead + * Use ETHTOOL_GSSET_INFO instead. */ if (ops->get_sset_count) { int rc; @@ -265,9 +266,7 @@ static noinline int ethtool_get_sset_info(struct net_device *dev, return 0; /* calculate size of return buffer */ - for (i = 0; i < 64; i++) - if (sset_mask & (1ULL << i)) - n_bits++; + n_bits = hweight64(sset_mask); memset(&info, 0, sizeof(info)); info.cmd = ETHTOOL_GSSET_INFO; -- cgit v1.2.3 From 02a780c014c40973cbe71d04cec7a24e6629995f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 6 Mar 2010 01:14:09 +0000 Subject: bridge: cleanup: remove unneed check We dereference "port" on the lines immediately before and immediately after the test so port should hopefully never be null here. Signed-off-by: Dan Carpenter Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 2559fb539836..8b258872eba8 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -627,8 +627,8 @@ static void br_multicast_port_query_expired(unsigned long data) struct net_bridge *br = port->br; spin_lock(&br->multicast_lock); - if (port && (port->state == BR_STATE_DISABLED || - port->state == BR_STATE_BLOCKING)) + if (port->state == BR_STATE_DISABLED || + port->state == BR_STATE_BLOCKING) goto out; if (port->multicast_startup_queries_sent < -- cgit v1.2.3 From 72150e9b7fec217fbd646a29ea2f65a3d4d55ea9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 6 Mar 2010 01:04:45 +0000 Subject: sock.c: potential null dereference We test that "prot->rsk_prot" is non-null right before we dereference it on this line. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/core/sock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 61a65a2e0455..c5812bbc2cc9 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2288,7 +2288,8 @@ out_free_request_sock_slab: prot->rsk_prot->slab = NULL; } out_free_request_sock_slab_name: - kfree(prot->rsk_prot->slab_name); + if (prot->rsk_prot) + kfree(prot->rsk_prot->slab_name); out_free_sock_slab: kmem_cache_destroy(prot->slab); prot->slab = NULL; -- cgit v1.2.3 From 0c9a2ac1f8a2e55b3382dfc27256878a58ea49e9 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki / 吉藤英明 Date: Sun, 7 Mar 2010 00:14:44 +0000 Subject: ipv6: Optmize translation between IPV6_PREFER_SRC_xxx and RT6_LOOKUP_F_xxx. IPV6_PREFER_SRC_xxx definitions: | #define IPV6_PREFER_SRC_TMP 0x0001 | #define IPV6_PREFER_SRC_PUBLIC 0x0002 | #define IPV6_PREFER_SRC_COA 0x0004 RT6_LOOKUP_F_xxx definitions: | #define RT6_LOOKUP_F_SRCPREF_TMP 0x00000008 | #define RT6_LOOKUP_F_SRCPREF_PUBLIC 0x00000010 | #define RT6_LOOKUP_F_SRCPREF_COA 0x00000020 So, we can translate between these two groups by shift operation instead of multiple 'if's. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/ip6_route.h | 18 ++++++++++++++++++ net/ipv6/fib6_rules.c | 11 ++--------- net/ipv6/route.c | 11 ++--------- 3 files changed, 22 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 4a808de7c0f6..68f67836e146 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -37,6 +37,24 @@ struct route_info { #define RT6_LOOKUP_F_SRCPREF_PUBLIC 0x00000010 #define RT6_LOOKUP_F_SRCPREF_COA 0x00000020 +/* + * rt6_srcprefs2flags() and rt6_flags2srcprefs() translate + * between IPV6_ADDR_PREFERENCES socket option values + * IPV6_PREFER_SRC_TMP = 0x1 + * IPV6_PREFER_SRC_PUBLIC = 0x2 + * IPV6_PREFER_SRC_COA = 0x4 + * and above RT6_LOOKUP_F_SRCPREF_xxx flags. + */ +static inline int rt6_srcprefs2flags(unsigned int srcprefs) +{ + /* No need to bitmask because srcprefs have only 3 bits. */ + return srcprefs << 3; +} + +static inline unsigned int rt6_flags2srcprefs(int flags) +{ + return (flags >> 3) & 7; +} extern void ip6_route_input(struct sk_buff *skb); diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 551882b9dfd6..5e463c43fcc2 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -84,18 +84,11 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, if ((rule->flags & FIB_RULE_FIND_SADDR) && r->src.plen && !(flags & RT6_LOOKUP_F_HAS_SADDR)) { struct in6_addr saddr; - unsigned int srcprefs = 0; - - if (flags & RT6_LOOKUP_F_SRCPREF_TMP) - srcprefs |= IPV6_PREFER_SRC_TMP; - if (flags & RT6_LOOKUP_F_SRCPREF_PUBLIC) - srcprefs |= IPV6_PREFER_SRC_PUBLIC; - if (flags & RT6_LOOKUP_F_SRCPREF_COA) - srcprefs |= IPV6_PREFER_SRC_COA; if (ipv6_dev_get_saddr(net, ip6_dst_idev(&rt->u.dst)->dev, - &flp->fl6_dst, srcprefs, + &flp->fl6_dst, + rt6_flags2srcprefs(flags), &saddr)) goto again; if (!ipv6_prefix_equal(&saddr, &r->src.addr, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b08879e97f22..52cd3eff31dc 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -819,15 +819,8 @@ struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, if (!ipv6_addr_any(&fl->fl6_src)) flags |= RT6_LOOKUP_F_HAS_SADDR; - else if (sk) { - unsigned int prefs = inet6_sk(sk)->srcprefs; - if (prefs & IPV6_PREFER_SRC_TMP) - flags |= RT6_LOOKUP_F_SRCPREF_TMP; - if (prefs & IPV6_PREFER_SRC_PUBLIC) - flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC; - if (prefs & IPV6_PREFER_SRC_COA) - flags |= RT6_LOOKUP_F_SRCPREF_COA; - } + else if (sk) + flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output); } -- cgit v1.2.3 From 49f5fcfd4ac3df24aa66520e1c5f37db5dfa8c10 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 5 Mar 2010 21:07:39 +0000 Subject: bridge: Use RCU list primitive in __br_mdb_ip_get As Paul McKenney correctly pointed out, __br_mdb_ip_get needs to use the RCU list walking primitive in order to work correctly on platforms where data-dependency ordering is not guaranteed. Reported-by: Paul E. McKenney Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 8b258872eba8..a1ffe1582c9a 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -38,7 +38,7 @@ static struct net_bridge_mdb_entry *__br_mdb_ip_get( struct net_bridge_mdb_entry *mp; struct hlist_node *p; - hlist_for_each_entry(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) { + hlist_for_each_entry_rcu(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) { if (dst == mp->addr) return mp; } -- cgit v1.2.3 From 10cc2b50eb4b01ca4dc014af2094d28b4ebe20d7 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 5 Mar 2010 21:03:35 +0000 Subject: bridge: Fix RCU race in br_multicast_stop Thanks to Paul McKenny for pointing out that it is incorrect to use synchronize_rcu_bh to ensure that pending callbacks have completed. Instead we should use rcu_barrier_bh. Reported-by: Paul E. McKenney Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index a1ffe1582c9a..12ce1eaa4f3e 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1135,7 +1135,7 @@ void br_multicast_stop(struct net_bridge *br) if (mdb->old) { spin_unlock_bh(&br->multicast_lock); - synchronize_rcu_bh(); + rcu_barrier_bh(); spin_lock_bh(&br->multicast_lock); WARN_ON(mdb->old); } -- cgit v1.2.3 From 6cce09f87a04797fae5b947ef2626c14a78f0b49 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 7 Mar 2010 23:21:57 +0000 Subject: tcp: Add SNMP counters for backlog and min_ttl drops Commit 6b03a53a (tcp: use limited socket backlog) added the possibility of dropping frames when backlog queue is full. Commit d218d111 (tcp: Generalized TTL Security Mechanism) added the possibility of dropping frames when TTL is under a given limit. This patch adds new SNMP MIB entries, named TCPBacklogDrop and TCPMinTTLDrop, published in /proc/net/netstat in TcpExt: line netstat -s | egrep "TCPBacklogDrop|TCPMinTTLDrop" TCPBacklogDrop: 0 TCPMinTTLDrop: 0 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/snmp.h | 2 ++ net/ipv4/proc.c | 2 ++ net/ipv4/tcp_ipv4.c | 7 +++++-- net/ipv6/tcp_ipv6.c | 3 ++- 4 files changed, 11 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/linux/snmp.h b/include/linux/snmp.h index e28f5a0182e8..4435d1084755 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -225,6 +225,8 @@ enum LINUX_MIB_SACKSHIFTED, LINUX_MIB_SACKMERGED, LINUX_MIB_SACKSHIFTFALLBACK, + LINUX_MIB_TCPBACKLOGDROP, + LINUX_MIB_TCPMINTTLDROP, /* RFC 5082 */ __LINUX_MIB_MAX }; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 242ed2307370..4f1f337f4337 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -249,6 +249,8 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPSackShifted", LINUX_MIB_SACKSHIFTED), SNMP_MIB_ITEM("TCPSackMerged", LINUX_MIB_SACKMERGED), SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK), + SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP), + SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1915f7dc30e6..8d51d39ad1bb 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1651,8 +1651,10 @@ int tcp_v4_rcv(struct sk_buff *skb) if (!sk) goto no_tcp_socket; - if (iph->ttl < inet_sk(sk)->min_ttl) + if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { + NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); goto discard_and_relse; + } process: if (sk->sk_state == TCP_TIME_WAIT) @@ -1682,8 +1684,9 @@ process: if (!tcp_prequeue(sk, skb)) ret = tcp_v4_do_rcv(sk, skb); } - } else if (sk_add_backlog(sk, skb)) { + } else if (unlikely(sk_add_backlog(sk, skb))) { bh_unlock_sock(sk); + NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); goto discard_and_relse; } bh_unlock_sock(sk); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2c378b1bd5cf..9b6dbba80d31 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1740,8 +1740,9 @@ process: if (!tcp_prequeue(sk, skb)) ret = tcp_v6_do_rcv(sk, skb); } - } else if (sk_add_backlog(sk, skb)) { + } else if (unlikely(sk_add_backlog(sk, skb))) { bh_unlock_sock(sk); + NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); goto discard_and_relse; } bh_unlock_sock(sk); -- cgit v1.2.3 From 9837638727488922727b0cfd438039fa73364183 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 8 Mar 2010 03:20:00 +0000 Subject: net: fix route cache rebuilds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We added an automatic route cache rebuilding in commit 1080d709fb9d8cd43 but had to correct few bugs. One of the assumption of original patch, was that entries where kept sorted in a given way. This assumption is known to be wrong (commit 1ddbcb005c395518 gave an explanation of this and corrected a leak) and expensive to respect. Paweł Staszewski reported to me one of his machine got its routing cache disabled after few messages like : [ 2677.850065] Route hash chain too long! [ 2677.850080] Adjust your secret_interval! [82839.662993] Route hash chain too long! [82839.662996] Adjust your secret_interval! [155843.731650] Route hash chain too long! [155843.731664] Adjust your secret_interval! [155843.811881] Route hash chain too long! [155843.811891] Adjust your secret_interval! [155843.858209] vlan0811: 5 rebuilds is over limit, route caching disabled [155843.858212] Route hash chain too long! [155843.858213] Adjust your secret_interval! This is because rt_intern_hash() might be fooled when computing a chain length, because multiple entries with same keys can differ because of TOS (or mark/oif) bits. In the rare case the fast algorithm see a too long chain, and before taking expensive path, we call a helper function in order to not count duplicates of same routes, that only differ with tos/mark/oif bits. This helper works with data already in cpu cache and is not be very expensive, despite its O(N^2) implementation. Paweł Staszewski sucessfully tested this patch on his loaded router. Reported-and-tested-by: Paweł Staszewski Signed-off-by: Eric Dumazet Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/ipv4/route.c | 50 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b2ba5581d2ae..d9b40248b97f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -146,7 +146,6 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); static int rt_garbage_collect(struct dst_ops *ops); -static void rt_emergency_hash_rebuild(struct net *net); static struct dst_ops ipv4_dst_ops = { @@ -780,11 +779,30 @@ static void rt_do_flush(int process_context) #define FRACT_BITS 3 #define ONE (1UL << FRACT_BITS) +/* + * Given a hash chain and an item in this hash chain, + * find if a previous entry has the same hash_inputs + * (but differs on tos, mark or oif) + * Returns 0 if an alias is found. + * Returns ONE if rth has no alias before itself. + */ +static int has_noalias(const struct rtable *head, const struct rtable *rth) +{ + const struct rtable *aux = head; + + while (aux != rth) { + if (compare_hash_inputs(&aux->fl, &rth->fl)) + return 0; + aux = aux->u.dst.rt_next; + } + return ONE; +} + static void rt_check_expire(void) { static unsigned int rover; unsigned int i = rover, goal; - struct rtable *rth, *aux, **rthp; + struct rtable *rth, **rthp; unsigned long samples = 0; unsigned long sum = 0, sum2 = 0; unsigned long delta; @@ -835,15 +853,7 @@ nofree: * attributes don't unfairly skew * the length computation */ - for (aux = rt_hash_table[i].chain;;) { - if (aux == rth) { - length += ONE; - break; - } - if (compare_hash_inputs(&aux->fl, &rth->fl)) - break; - aux = aux->u.dst.rt_next; - } + length += has_noalias(rt_hash_table[i].chain, rth); continue; } } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) @@ -1073,6 +1083,21 @@ work_done: out: return 0; } +/* + * Returns number of entries in a hash chain that have different hash_inputs + */ +static int slow_chain_length(const struct rtable *head) +{ + int length = 0; + const struct rtable *rth = head; + + while (rth) { + length += has_noalias(head, rth); + rth = rth->u.dst.rt_next; + } + return length >> FRACT_BITS; +} + static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp, struct sk_buff *skb) { @@ -1185,7 +1210,8 @@ restart: rt_free(cand); } } else { - if (chain_length > rt_chain_length_max) { + if (chain_length > rt_chain_length_max && + slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) { struct net *net = dev_net(rt->u.dst.dev); int num = ++net->ipv4.current_rt_cache_rebuild_count; if (!rt_caching(dev_net(rt->u.dst.dev))) { -- cgit v1.2.3 From 28b2774a0d5852236dab77a4147b8b88548110f1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 8 Mar 2010 11:32:01 -0800 Subject: tcp: Fix tcp_make_synack() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 4957faad (TCPCT part 1g: Responder Cookie => Initiator), part of TCP_COOKIE_TRANSACTION implementation, forgot to correctly size synack skb in case user data must be included. Many thanks to Mika Pentillä for spotting this error. Reported-by: Penttillä Mika Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 4a1605d3f909..f181b78f2385 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2395,13 +2395,17 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct tcp_extend_values *xvp = tcp_xv(rvp); struct inet_request_sock *ireq = inet_rsk(req); struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_cookie_values *cvp = tp->cookie_values; struct tcphdr *th; struct sk_buff *skb; struct tcp_md5sig_key *md5; int tcp_header_size; int mss; + int s_data_desired = 0; - skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC); + if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) + s_data_desired = cvp->s_data_desired; + skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15 + s_data_desired, 1, GFP_ATOMIC); if (skb == NULL) return NULL; @@ -2457,16 +2461,12 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, TCPCB_FLAG_SYN | TCPCB_FLAG_ACK); if (OPTION_COOKIE_EXTENSION & opts.options) { - const struct tcp_cookie_values *cvp = tp->cookie_values; - - if (cvp != NULL && - cvp->s_data_constant && - cvp->s_data_desired > 0) { - u8 *buf = skb_put(skb, cvp->s_data_desired); + if (s_data_desired) { + u8 *buf = skb_put(skb, s_data_desired); /* copy data directly from the listening socket. */ - memcpy(buf, cvp->s_data_payload, cvp->s_data_desired); - TCP_SKB_CB(skb)->end_seq += cvp->s_data_desired; + memcpy(buf, cvp->s_data_payload, s_data_desired); + TCP_SKB_CB(skb)->end_seq += s_data_desired; } if (opts.hash_size > 0) { -- cgit v1.2.3 From fc0b579168cbe737c83c6b9bbfe265d3ae6baca6 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 8 Mar 2010 12:15:28 -0800 Subject: net/sunrpc: Remove uses of NIPQUAD, use %pI4 Originally submitted Jan 1, 2010 http://patchwork.kernel.org/patch/71221/ Convert NIPQUAD to the %pI4 format extension where possible Convert %02x%02x%02x%02x/NIPQUAD to %08x/ntohl Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/sunrpc/xprtrdma/transport.c | 3 +-- net/sunrpc/xprtsock.c | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 7018eef1dcdd..83d339fd66d5 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -165,8 +165,7 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt) xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; - (void)snprintf(buf, sizeof(buf), "%02x%02x%02x%02x", - NIPQUAD(sin->sin_addr.s_addr)); + (void)snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); (void)snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 3d739e5d15d8..86234bcf2e89 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -297,12 +297,11 @@ static void xs_format_common_peer_addresses(struct rpc_xprt *xprt) switch (sap->sa_family) { case AF_INET: sin = xs_addr_in(xprt); - (void)snprintf(buf, sizeof(buf), "%02x%02x%02x%02x", - NIPQUAD(sin->sin_addr.s_addr)); + snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); break; case AF_INET6: sin6 = xs_addr_in6(xprt); - (void)snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr); + snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr); break; default: BUG(); -- cgit v1.2.3 From 81160e66cca3d3a16b7d88e0e2dccfc5c76f36f9 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 8 Mar 2010 12:15:59 -0800 Subject: net/sunrpc: Convert (void)snprintf to snprintf (Applies on top of "Remove uses of NIPQUAD, use %pI4") Casts to void of snprintf are most uncommon in kernel source. 9 use casts, 1301 do not. Remove the remaining uses in net/sunrpc/ Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/sunrpc/xprtrdma/transport.c | 6 +++--- net/sunrpc/xprtsock.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 83d339fd66d5..f96c2fe6137b 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -160,15 +160,15 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt) (void)rpc_ntop(sap, buf, sizeof(buf)); xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); - (void)snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); + snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; - (void)snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); + snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); - (void)snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); + snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); /* netid */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 86234bcf2e89..4f55ab7ec1b1 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -314,10 +314,10 @@ static void xs_format_common_peer_ports(struct rpc_xprt *xprt) struct sockaddr *sap = xs_addr(xprt); char buf[128]; - (void)snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); + snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); - (void)snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); + snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); } -- cgit v1.2.3 From f5c445ed4148434f142be0263a8ad7cb58503e8a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 8 Mar 2010 12:17:04 -0800 Subject: ethtool: Use noinline_for_stack Use self documenting noinline_for_stack instead of duplicated comments. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/ethtool.c | 40 ++++++++-------------------------------- 1 file changed, 8 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 33d2ded50f84..f4cb6b6299d9 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -200,10 +200,7 @@ static int ethtool_set_settings(struct net_device *dev, void __user *useraddr) return dev->ethtool_ops->set_settings(dev, &cmd); } -/* - * noinline attribute so that gcc doesnt use too much stack in dev_ethtool() - */ -static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) +static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) { struct ethtool_drvinfo info; const struct ethtool_ops *ops = dev->ethtool_ops; @@ -242,10 +239,7 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use return 0; } -/* - * noinline attribute so that gcc doesnt use too much stack in dev_ethtool() - */ -static noinline int ethtool_get_sset_info(struct net_device *dev, +static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev, void __user *useraddr) { struct ethtool_sset_info info; @@ -305,10 +299,7 @@ out: return ret; } -/* - * noinline attribute so that gcc doesnt use too much stack in dev_ethtool() - */ -static noinline int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr) +static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr) { struct ethtool_rxnfc cmd; @@ -321,10 +312,7 @@ static noinline int ethtool_set_rxnfc(struct net_device *dev, void __user *usera return dev->ethtool_ops->set_rxnfc(dev, &cmd); } -/* - * noinline attribute so that gcc doesnt use too much stack in dev_ethtool() - */ -static noinline int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr) +static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr) { struct ethtool_rxnfc info; const struct ethtool_ops *ops = dev->ethtool_ops; @@ -396,10 +384,7 @@ static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list, list->count++; } -/* - * noinline attribute so that gcc doesnt use too much stack in dev_ethtool() - */ -static noinline int ethtool_set_rx_ntuple(struct net_device *dev, void __user *useraddr) +static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, void __user *useraddr) { struct ethtool_rx_ntuple cmd; const struct ethtool_ops *ops = dev->ethtool_ops; @@ -867,10 +852,7 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr) return ret; } -/* - * noinline attribute so that gcc doesnt use too much stack in dev_ethtool() - */ -static noinline int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr) +static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr) { struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE }; @@ -884,10 +866,7 @@ static noinline int ethtool_get_coalesce(struct net_device *dev, void __user *us return 0; } -/* - * noinline attribute so that gcc doesnt use too much stack in dev_ethtool() - */ -static noinline int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr) +static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr) { struct ethtool_coalesce coalesce; @@ -1297,10 +1276,7 @@ static int ethtool_set_value(struct net_device *dev, char __user *useraddr, return actor(dev, edata.data); } -/* - * noinline attribute so that gcc doesnt use too much stack in dev_ethtool() - */ -static noinline int ethtool_flash_device(struct net_device *dev, char __user *useraddr) +static noinline_for_stack int ethtool_flash_device(struct net_device *dev, char __user *useraddr) { struct ethtool_flash efl; -- cgit v1.2.3 From d88dca79d3852a3623f606f781e013d61486828a Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 8 Mar 2010 12:20:58 -0800 Subject: tipc: fix endianness on tipc subscriber messages Remove htohl implementation from tipc I was working on forward porting the downstream commits for TIPC and ran accross this one: http://tipc.cslab.ericsson.net/cgi-bin/gitweb.cgi?p=people/allan/tipc.git;a=commitdiff;h=894279b9437b63cbb02405ad5b8e033b51e4e31e I was going to just take it, when I looked closer and noted what it was doing. This is basically a routine to byte swap fields of data in sent/received packets for tipc, dependent upon the receivers guessed endianness of the peer when a connection is established. Asside from just seeming silly to me, it appears to violate the latest RFC draft for tipc: http://tipc.sourceforge.net/doc/draft-spec-tipc-02.txt Which, according to section 4.2 and 4.3.3, requires that all fields of all commands be sent in network byte order. So instead of just taking this patch, instead I'm removing the htohl function and replacing the calls with calls to ntohl in the rx path and htonl in the send path. As part of this fix, I'm also changing the subscr_cancel function, which searches the list of subscribers, using a memcmp of the entire subscriber list, for the entry to tear down. unfortunately it memcmps the entire tipc_subscr structure which has several bits that are private to the local side, so nothing will ever match. section 5.2 of the draft spec indicates the tuple should uniquely identify a subscriber, so convert subscr_cancel to just match on those fields (properly endian swapped). I've tested this using the tipc test suite, and its passed without issue. Signed-off-by: Neil Horman Signed-off-by: David S. Miller --- net/tipc/subscr.c | 57 +++++++++++++++++++++---------------------------------- net/tipc/subscr.h | 2 -- 2 files changed, 22 insertions(+), 37 deletions(-) (limited to 'net') diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index ac91f0dfa144..ff123e56114a 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -75,19 +75,6 @@ struct top_srv { static struct top_srv topsrv = { 0 }; -/** - * htohl - convert value to endianness used by destination - * @in: value to convert - * @swap: non-zero if endianness must be reversed - * - * Returns converted value - */ - -static u32 htohl(u32 in, int swap) -{ - return swap ? swab32(in) : in; -} - /** * subscr_send_event - send a message containing a tipc_event to the subscriber * @@ -107,11 +94,11 @@ static void subscr_send_event(struct subscription *sub, msg_sect.iov_base = (void *)&sub->evt; msg_sect.iov_len = sizeof(struct tipc_event); - sub->evt.event = htohl(event, sub->swap); - sub->evt.found_lower = htohl(found_lower, sub->swap); - sub->evt.found_upper = htohl(found_upper, sub->swap); - sub->evt.port.ref = htohl(port_ref, sub->swap); - sub->evt.port.node = htohl(node, sub->swap); + sub->evt.event = htonl(event); + sub->evt.found_lower = htonl(found_lower); + sub->evt.found_upper = htonl(found_upper); + sub->evt.port.ref = htonl(port_ref); + sub->evt.port.node = htonl(node); tipc_send(sub->server_ref, 1, &msg_sect); } @@ -287,16 +274,23 @@ static void subscr_cancel(struct tipc_subscr *s, { struct subscription *sub; struct subscription *sub_temp; + __u32 type, lower, upper; int found = 0; /* Find first matching subscription, exit if not found */ + type = ntohl(s->seq.type); + lower = ntohl(s->seq.lower); + upper = ntohl(s->seq.upper); + list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list, subscription_list) { - if (!memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) { - found = 1; - break; - } + if ((type == sub->seq.type) && + (lower == sub->seq.lower) && + (upper == sub->seq.upper)) { + found = 1; + break; + } } if (!found) return; @@ -325,16 +319,10 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s, struct subscriber *subscriber) { struct subscription *sub; - int swap; - - /* Determine subscriber's endianness */ - - swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE)); /* Detect & process a subscription cancellation request */ - if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) { - s->filter &= ~htohl(TIPC_SUB_CANCEL, swap); + if (ntohl(s->filter) & TIPC_SUB_CANCEL) { subscr_cancel(s, subscriber); return NULL; } @@ -359,11 +347,11 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s, /* Initialize subscription object */ - sub->seq.type = htohl(s->seq.type, swap); - sub->seq.lower = htohl(s->seq.lower, swap); - sub->seq.upper = htohl(s->seq.upper, swap); - sub->timeout = htohl(s->timeout, swap); - sub->filter = htohl(s->filter, swap); + sub->seq.type = ntohl(s->seq.type); + sub->seq.lower = ntohl(s->seq.lower); + sub->seq.upper = ntohl(s->seq.upper); + sub->timeout = ntohl(s->timeout); + sub->filter = ntohl(s->filter); if ((!(sub->filter & TIPC_SUB_PORTS) == !(sub->filter & TIPC_SUB_SERVICE)) || (sub->seq.lower > sub->seq.upper)) { @@ -376,7 +364,6 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s, INIT_LIST_HEAD(&sub->nameseq_list); list_add(&sub->subscription_list, &subscriber->subscription_list); sub->server_ref = subscriber->port_ref; - sub->swap = swap; memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr)); atomic_inc(&topsrv.subscription_count); if (sub->timeout != TIPC_WAIT_FOREVER) { diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index 45d89bf4d202..c20f496d95b2 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -53,7 +53,6 @@ typedef void (*tipc_subscr_event) (struct subscription *sub, * @nameseq_list: adjacent subscriptions in name sequence's subscription list * @subscription_list: adjacent subscriptions in subscriber's subscription list * @server_ref: object reference of server port associated with subscription - * @swap: indicates if subscriber uses opposite endianness in its messages * @evt: template for events generated by subscription */ @@ -66,7 +65,6 @@ struct subscription { struct list_head nameseq_list; struct list_head subscription_list; u32 server_ref; - int swap; struct tipc_event evt; }; -- cgit v1.2.3 From de5865714621e23d65c52955ca2125dbb074c242 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 8 Mar 2010 12:43:56 -0800 Subject: tipc: filter out messages not intended for this host Port commit 20deb48d16fdd07ce2fdc8d03ea317362217e085 from git://tipc.cslab.ericsson.net/pub/git/people/allan/tipc.git Part of the large effort I'm trying to help with getting all the downstreamed code from windriver forward ported to the upstream tree Origional commit message Restore check to filter out inadverdently received messages This patch reimplements a check that allows TIPC to discard messages that are not intended for it. This check was present in TIPC 1.5/1.6, but was removed by accident during the development of TIPC 1.7; it has now been updated to account for new features present in TIPC 1.7 and reinserted into TIPC. The main benefit of this check is to filter out messages arriving from orphaned link endpoints, which can arise when a node exits the network and then re-enters it with a different TIPC network address (i.e. value). Signed-off-by: Neil Horman Origionally-authored-by: Allan Stephens Signed-off-by: David S. Miller --- net/tipc/link.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net') diff --git a/net/tipc/link.c b/net/tipc/link.c index 6f50f6423f63..1a7e4665af80 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1882,6 +1882,15 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr) (msg_destnode(msg) != tipc_own_addr))) goto cont; + /* Discard non-routeable messages destined for another node */ + + if (unlikely(!msg_isdata(msg) && + (msg_destnode(msg) != tipc_own_addr))) { + if ((msg_user(msg) != CONN_MANAGER) && + (msg_user(msg) != MSG_FRAGMENTER)) + goto cont; + } + /* Locate unicast link endpoint that should handle message */ n_ptr = tipc_node_find(msg_prevnode(msg)); -- cgit v1.2.3 From bb134d5d9580fc7b945e3bca3c4b263947022966 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 9 Mar 2010 05:55:56 +0000 Subject: tcp: Fix tcp_v4_rcv() Commit d218d111 (tcp: Generalized TTL Security Mechanism) added a bug for TIMEWAIT sockets. We should not test min_ttl for TW sockets. Reported-by: Tetsuo Handa Signed-off-by: Eric Dumazet Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 8d51d39ad1bb..70df40980a87 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1651,15 +1651,15 @@ int tcp_v4_rcv(struct sk_buff *skb) if (!sk) goto no_tcp_socket; +process: + if (sk->sk_state == TCP_TIME_WAIT) + goto do_time_wait; + if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); goto discard_and_relse; } -process: - if (sk->sk_state == TCP_TIME_WAIT) - goto do_time_wait; - if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; nf_reset(skb); -- cgit v1.2.3 From 0a141509ede48ac33ef756ac1640f4d3f46fa2db Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 9 Mar 2010 19:40:54 +0000 Subject: net: Annotates neigh_invalidate() Annotates neigh_invalidate() with __releases() and __acquires() for sparse sake. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/neighbour.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index d102f6d9abdc..6cee6434da67 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -771,6 +771,8 @@ static __inline__ int neigh_max_probes(struct neighbour *n) } static void neigh_invalidate(struct neighbour *neigh) + __releases(neigh->lock) + __acquires(neigh->lock) { struct sk_buff *skb; -- cgit v1.2.3 From 3041f5170751e3522aa1bd6e8ca5d98e846720b0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 9 Mar 2010 19:09:08 +0000 Subject: net: Fix dev_mc_add() Commit 6e17d45a (net: add addr len check to dev_mc_add) added a bug in dev_mc_add(), since it can now exit with a lock imbalance. Signed-off-by: Eric Dumazet CC: Jiri Pirko Signed-off-by: David S. Miller --- net/core/dev_mcast.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index fd91569e2394..3dc295beb483 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -97,8 +97,9 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) netif_addr_lock_bh(dev); if (alen != dev->addr_len) - return -EINVAL; - err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl); + err = -EINVAL; + else + err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl); if (!err) __dev_set_rx_mode(dev); netif_addr_unlock_bh(dev); -- cgit v1.2.3 From 38a679a52be13d5a0c766597ab823e06688d6e8e Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Sat, 6 Mar 2010 18:35:08 +0200 Subject: mac80211: Fix sta_mtx unlocking on insert STA failure path Commit 34e895075e21be3e21e71d6317440d1ee7969ad0 introduced sta_mtx locking into sta_info_insert() (now sta_info_insert_rcu), but forgot to unlock this mutex on one of the error paths. Fix this by adding the missing mutex_unlock() call for the case where STA insert fails due to an entry existing already. This may happen at least in AP mode when a STA roams between two BSSes (vifs). Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- net/mac80211/sta_info.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 211c475f73c6..56422d894351 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -434,6 +434,7 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU) /* check if STA exists already */ if (sta_info_get_bss(sdata, sta->sta.addr)) { spin_unlock_irqrestore(&local->sta_lock, flags); + mutex_unlock(&local->sta_mtx); rcu_read_lock(); err = -EEXIST; goto out_free; -- cgit v1.2.3 From 2a13052fe495948e572839e514e0e0cd236c50b0 Mon Sep 17 00:00:00 2001 From: Juuso Oikarinen Date: Tue, 9 Mar 2010 14:25:02 +0200 Subject: mac80211: Fix (dynamic) power save entry Currently hardware with !IEEE80211_HW_PS_NULLFUNC_STACK and IEEE80211_HW_REPORTS_TX_ACK_STATUS will never enter PSM due to the conditions in the power save entry functions. Fix those conditions. Signed-off-by: Juuso Oikarinen Cc: stable@kernel.org Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 0ab284c32135..be5f723d643a 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -436,10 +436,12 @@ static void ieee80211_enable_ps(struct ieee80211_local *local, if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) ieee80211_send_nullfunc(local, sdata, 1); - if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) { - conf->flags |= IEEE80211_CONF_PS; - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); - } + if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) && + (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) + return; + + conf->flags |= IEEE80211_CONF_PS; + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); } } @@ -558,7 +560,8 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work) (!(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED))) ieee80211_send_nullfunc(local, sdata, 1); - if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) || + if (!((local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) && + (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)) || (ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)) { ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED; local->hw.conf.flags |= IEEE80211_CONF_PS; -- cgit v1.2.3 From 51f5f8ca446d4c59041b9b6995821e13208897ea Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 10 Mar 2010 17:13:36 +0100 Subject: mac80211: Fix memory leak in ieee80211_if_write() Fix memory leak and use kmalloc() instead of kzalloc() as we are going to overwrite the allocated buffer. Signed-off-by: Eric Dumazet Signed-off-by: John W. Linville --- net/mac80211/debugfs_netdev.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 9affe2cd185f..b4ddb2f83914 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -48,20 +48,24 @@ static ssize_t ieee80211_if_write( ssize_t (*write)(struct ieee80211_sub_if_data *, const char *, int)) { u8 *buf; - ssize_t ret = -ENODEV; + ssize_t ret; - buf = kzalloc(count, GFP_KERNEL); + buf = kmalloc(count, GFP_KERNEL); if (!buf) return -ENOMEM; + ret = -EFAULT; if (copy_from_user(buf, userbuf, count)) - return -EFAULT; + goto freebuf; + ret = -ENODEV; rtnl_lock(); if (sdata->dev->reg_state == NETREG_REGISTERED) ret = (*write)(sdata, buf, count); rtnl_unlock(); +freebuf: + kfree(buf); return ret; } -- cgit v1.2.3 From 964ad81cbd933e5fa310faeec1e923c14651284b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 12 Mar 2010 00:00:17 -0800 Subject: ipconfig: Handle devices which take some time to come up. Some network devices, particularly USB ones, take several seconds to fully init and appear in the device list. If the user turned ipconfig on, they are using it for NFS root or some other early booting purpose. So it makes no sense to just flat out fail immediately if the device isn't found. It also doesn't make sense to just jack up the initial wait to something crazy like 10 seconds. Instead, poll immediately, and then periodically once a second, waiting for a usable device to appear. Fail after 12 seconds. Signed-off-by: David S. Miller Tested-by: Christian Pellegrin --- net/ipv4/ipconfig.c | 57 +++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 10a6a604bf32..678909281648 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -187,6 +187,16 @@ struct ic_device { static struct ic_device *ic_first_dev __initdata = NULL;/* List of open device */ static struct net_device *ic_dev __initdata = NULL; /* Selected device */ +static bool __init ic_device_match(struct net_device *dev) +{ + if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) : + (!(dev->flags & IFF_LOOPBACK) && + (dev->flags & (IFF_POINTOPOINT|IFF_BROADCAST)) && + strncmp(dev->name, "dummy", 5))) + return true; + return false; +} + static int __init ic_open_devs(void) { struct ic_device *d, **last; @@ -207,10 +217,7 @@ static int __init ic_open_devs(void) for_each_netdev(&init_net, dev) { if (dev->flags & IFF_LOOPBACK) continue; - if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) : - (!(dev->flags & IFF_LOOPBACK) && - (dev->flags & (IFF_POINTOPOINT|IFF_BROADCAST)) && - strncmp(dev->name, "dummy", 5))) { + if (ic_device_match(dev)) { int able = 0; if (dev->mtu >= 364) able |= IC_BOOTP; @@ -228,7 +235,7 @@ static int __init ic_open_devs(void) } if (!(d = kmalloc(sizeof(struct ic_device), GFP_KERNEL))) { rtnl_unlock(); - return -1; + return -ENOMEM; } d->dev = dev; *last = d; @@ -253,7 +260,7 @@ static int __init ic_open_devs(void) printk(KERN_ERR "IP-Config: Device `%s' not found.\n", user_dev_name); else printk(KERN_ERR "IP-Config: No network devices available.\n"); - return -1; + return -ENODEV; } return 0; } @@ -1303,6 +1310,32 @@ __be32 __init root_nfs_parse_addr(char *name) return addr; } +#define DEVICE_WAIT_MAX 12 /* 12 seconds */ + +static int __init wait_for_devices(void) +{ + int i; + + msleep(CONF_PRE_OPEN); + for (i = 0; i < DEVICE_WAIT_MAX; i++) { + struct net_device *dev; + int found = 0; + + rtnl_lock(); + for_each_netdev(&init_net, dev) { + if (ic_device_match(dev)) { + found = 1; + break; + } + } + rtnl_unlock(); + if (found) + return 0; + ssleep(1); + } + return -ENODEV; +} + /* * IP Autoconfig dispatcher. */ @@ -1313,6 +1346,7 @@ static int __init ip_auto_config(void) #ifdef IPCONFIG_DYNAMIC int retries = CONF_OPEN_RETRIES; #endif + int err; #ifdef CONFIG_PROC_FS proc_net_fops_create(&init_net, "pnp", S_IRUGO, &pnp_seq_fops); @@ -1325,12 +1359,15 @@ static int __init ip_auto_config(void) #ifdef IPCONFIG_DYNAMIC try_try_again: #endif - /* Give hardware a chance to settle */ - msleep(CONF_PRE_OPEN); + /* Wait for devices to appear */ + err = wait_for_devices(); + if (err) + return err; /* Setup all network devices */ - if (ic_open_devs() < 0) - return -1; + err = ic_open_devs(); + if (err) + return err; /* Give drivers a chance to settle */ ssleep(CONF_POST_OPEN); -- cgit v1.2.3 From e2577a065832f2c6d108de2e027891bdb2d78924 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 13 Mar 2010 12:23:29 -0800 Subject: ipv6: Send netlink notification when DAD fails If we are managing IPv6 addresses using DHCP, it would be nice for user-space to be notified if an address configured through DHCP fails DAD. Otherwise user-space would have to poll to see whether DAD succeeds. This patch uses the existing notification mechanism and simply hooks it into the DAD failure code path. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6cf3ee14ace3..3381b4317c27 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1380,6 +1380,8 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) if (dad_failed) ifp->flags |= IFA_F_DADFAILED; spin_unlock_bh(&ifp->lock); + if (dad_failed) + ipv6_ifa_notify(0, ifp); in6_ifa_put(ifp); #ifdef CONFIG_IPV6_PRIVACY } else if (ifp->flags&IFA_F_TEMPORARY) { -- cgit v1.2.3 From bec68ff1637ca00bb1585a03a7be8a13380084de Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Sat, 13 Mar 2010 12:27:21 -0800 Subject: bridge: ensure to unlock in error path in br_multicast_query(). Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 12ce1eaa4f3e..fd96a8dc97f4 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -823,6 +823,7 @@ static int br_multicast_query(struct net_bridge *br, unsigned long max_delay; unsigned long now = jiffies; __be32 group; + int err = 0; spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || @@ -841,12 +842,14 @@ static int br_multicast_query(struct net_bridge *br, group = 0; } } else { - if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) - return -EINVAL; + if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) { + err = -EINVAL; + goto out; + } ih3 = igmpv3_query_hdr(skb); if (ih3->nsrcs) - return 0; + goto out; max_delay = ih3->code ? 1 : IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE); @@ -876,7 +879,7 @@ static int br_multicast_query(struct net_bridge *br, out: spin_unlock(&br->multicast_lock); - return 0; + return err; } static void br_multicast_leave_group(struct net_bridge *br, -- cgit v1.2.3