From de3b7a06dfe15bda3e66a52285d422b954bb4832 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Thu, 4 Dec 2014 09:46:20 +0100
Subject: xfrm6: Fix transport header offset in _decode_session6.

skb->transport_header might not be valid when we do a reverse
decode because the ipv6 tunnel error handlers don't update it
to the inner transport header. This leads to a wrong offset
calculation and to wrong layer 4 informations. We fix this
by using the size of the ipv6 header as the first offset.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv6/xfrm6_policy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 5f983644373a..aa48302f00a1 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -130,8 +130,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 {
 	struct flowi6 *fl6 = &fl->u.ip6;
 	int onlyproto = 0;
-	u16 offset = skb_network_header_len(skb);
 	const struct ipv6hdr *hdr = ipv6_hdr(skb);
+	u16 offset = sizeof(*hdr);
 	struct ipv6_opt_hdr *exthdr;
 	const unsigned char *nh = skb_network_header(skb);
 	u8 nexthdr = nh[IP6CB(skb)->nhoff];
-- 
cgit v1.2.3


From f855691975bb06373a98711e4cfe2c224244b536 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Mon, 8 Dec 2014 07:56:18 +0100
Subject: xfrm6: Fix the nexthdr offset in _decode_session6.

xfrm_decode_session() was originally designed for the
usage in the receive path where the correct nexthdr offset
is stored in IP6CB(skb)->nhoff. Over time this function
spread to code that is used in the output path (netfilter,
vti) where IP6CB(skb)->nhoff is not set. As a result, we
get a wrong nexthdr and the upper layer flow informations
are wrong. This can leed to incorrect policy lookups.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv6/xfrm6_policy.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index aa48302f00a1..48bf5a06847b 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -134,8 +134,14 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 	u16 offset = sizeof(*hdr);
 	struct ipv6_opt_hdr *exthdr;
 	const unsigned char *nh = skb_network_header(skb);
-	u8 nexthdr = nh[IP6CB(skb)->nhoff];
+	u16 nhoff = IP6CB(skb)->nhoff;
 	int oif = 0;
+	u8 nexthdr;
+
+	if (!nhoff)
+		nhoff = offsetof(struct ipv6hdr, nexthdr);
+
+	nexthdr = nh[nhoff];
 
 	if (skb_dst(skb))
 		oif = skb_dst(skb)->dev->ifindex;
-- 
cgit v1.2.3


From cec3f0ed7de78c02ac4bc719a95915367d68c778 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 12 Dec 2014 12:26:25 +0100
Subject: cfg80211: use __force __rcu to suppress sparse warning

The code assigns a constant value (a pointer to a static variable)
to an RCU pointer, which results in a sparse warning:
  reg.c:112:10: warning: cast adds address space to expression (<asn:4>)

Suppress this warning by using __force.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/reg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 47be6163381c..d83480b6efde 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -109,7 +109,7 @@ static struct regulatory_request core_request_world = {
  * protected by RTNL (and can be accessed with RCU protection)
  */
 static struct regulatory_request __rcu *last_request =
-	(void __rcu *)&core_request_world;
+	(void __force __rcu *)&core_request_world;
 
 /* To trigger userspace events */
 static struct platform_device *reg_pdev;
-- 
cgit v1.2.3


From ba1debdfed974f25aa598c283567878657b292ee Mon Sep 17 00:00:00 2001
From: Vadim Kochan <vadim4j@gmail.com>
Date: Wed, 10 Dec 2014 00:21:06 +0200
Subject: wireless: Support of IFLA_INFO_KIND rtnl attribute

It allows to identify the wlan kind of device for the user application,
e.g.:

    # ip -d link

    1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN mode DEFAULT group default
        link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 promiscuity 0
    2: enp0s25: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc pfifo_fast state DOWN mode DEFAULT group default qlen 1000
        link/ether XX:XX:XX:XX:XX:XX brd ff:ff:ff:ff:ff:ff promiscuity 0
    3: wlp3s0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP mode DEFAULT group default qlen 1000
        link/ether XX:XX:XX:XX:XX:XX brd ff:ff:ff:ff:ff:ff promiscuity 0
        wlan

Signed-off-by: Vadim Kochan <vadim4j@gmail.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
[make wireless_link_ops const]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/core.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/wireless/core.c b/net/wireless/core.c
index 53dda7728f86..4910758baab1 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -21,6 +21,7 @@
 #include <linux/sched.h>
 #include <net/genetlink.h>
 #include <net/cfg80211.h>
+#include <net/rtnetlink.h>
 #include "nl80211.h"
 #include "core.h"
 #include "sysfs.h"
@@ -937,6 +938,10 @@ void cfg80211_stop_iface(struct wiphy *wiphy, struct wireless_dev *wdev,
 }
 EXPORT_SYMBOL(cfg80211_stop_iface);
 
+static const struct rtnl_link_ops wireless_link_ops = {
+	.kind = "wlan",
+};
+
 static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
 					 unsigned long state, void *ptr)
 {
@@ -954,6 +959,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
 	switch (state) {
 	case NETDEV_POST_INIT:
 		SET_NETDEV_DEVTYPE(dev, &wiphy_type);
+		dev->rtnl_link_ops = &wireless_link_ops;
 		break;
 	case NETDEV_REGISTER:
 		/*
-- 
cgit v1.2.3


From 6b127c71fbdd3daacfd8b9f80b8e6ebfb70a889e Mon Sep 17 00:00:00 2001
From: Sujith Manoharan <c_manoha@qca.qualcomm.com>
Date: Wed, 10 Dec 2014 21:26:10 +0530
Subject: mac80211: Move IEEE80211_TX_CTL_PS_RESPONSE

Move IEEE80211_TX_CTL_PS_RESPONSE to info->control.flags since
this is used only in the TX path (by ath9k). This frees up
a bit which can be used for other purposes.

Signed-off-by: Sujith Manoharan <c_manoha@qca.qualcomm.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath9k/xmit.c | 6 ++++--
 include/net/mac80211.h                | 6 +++---
 net/mac80211/sta_info.c               | 7 ++++---
 3 files changed, 11 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
index e9bd02c2e844..4caee66e5f79 100644
--- a/drivers/net/wireless/ath/ath9k/xmit.c
+++ b/drivers/net/wireless/ath/ath9k/xmit.c
@@ -2259,7 +2259,7 @@ int ath_tx_start(struct ieee80211_hw *hw, struct sk_buff *skb,
 	struct ath_txq *txq = txctl->txq;
 	struct ath_atx_tid *tid = NULL;
 	struct ath_buf *bf;
-	bool queue, skip_uapsd = false;
+	bool queue, skip_uapsd = false, ps_resp;
 	int q, ret;
 
 	if (vif)
@@ -2268,6 +2268,8 @@ int ath_tx_start(struct ieee80211_hw *hw, struct sk_buff *skb,
 	if (info->flags & IEEE80211_TX_CTL_TX_OFFCHAN)
 		txctl->force_channel = true;
 
+	ps_resp = !!(info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE);
+
 	ret = ath_tx_prepare(hw, skb, txctl);
 	if (ret)
 	    return ret;
@@ -2310,7 +2312,7 @@ int ath_tx_start(struct ieee80211_hw *hw, struct sk_buff *skb,
 	if (txctl->an && queue)
 		tid = ath_get_skb_tid(sc, txctl->an, skb);
 
-	if (!skip_uapsd && (info->flags & IEEE80211_TX_CTL_PS_RESPONSE)) {
+	if (!skip_uapsd && ps_resp) {
 		ath_txq_unlock(sc, txq);
 		txq = sc->tx.uapsdq;
 		ath_txq_lock(sc, txq);
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 58d719ddaa60..b36e60d4c518 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -505,8 +505,6 @@ struct ieee80211_bss_conf {
  * @IEEE80211_TX_CTL_DONTFRAG: Don't fragment this packet even if it
  *	would be fragmented by size (this is optional, only used for
  *	monitor injection).
- * @IEEE80211_TX_CTL_PS_RESPONSE: This frame is a response to a poll
- *	frame (PS-Poll or uAPSD).
  *
  * Note: If you have to add new flags to the enumeration, then don't
  *	 forget to update %IEEE80211_TX_TEMPORARY_FLAGS when necessary.
@@ -542,7 +540,6 @@ enum mac80211_tx_info_flags {
 	IEEE80211_TX_STATUS_EOSP		= BIT(28),
 	IEEE80211_TX_CTL_USE_MINRATE		= BIT(29),
 	IEEE80211_TX_CTL_DONTFRAG		= BIT(30),
-	IEEE80211_TX_CTL_PS_RESPONSE		= BIT(31),
 };
 
 #define IEEE80211_TX_CTL_STBC_SHIFT		23
@@ -552,11 +549,14 @@ enum mac80211_tx_info_flags {
  *
  * @IEEE80211_TX_CTRL_PORT_CTRL_PROTO: this frame is a port control
  *	protocol frame (e.g. EAP)
+ * @IEEE80211_TX_CTRL_PS_RESPONSE: This frame is a response to a poll
+ *	frame (PS-Poll or uAPSD).
  *
  * These flags are used in tx_info->control.flags.
  */
 enum mac80211_tx_control_flags {
 	IEEE80211_TX_CTRL_PORT_CTRL_PROTO	= BIT(0),
+	IEEE80211_TX_CTRL_PS_RESPONSE		= BIT(1),
 };
 
 /*
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index a42f5b2b024d..db8b07ac6b10 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1243,10 +1243,11 @@ static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata,
 	 * ends the poll/service period.
 	 */
 	info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER |
-		       IEEE80211_TX_CTL_PS_RESPONSE |
 		       IEEE80211_TX_STATUS_EOSP |
 		       IEEE80211_TX_CTL_REQ_TX_STATUS;
 
+	info->control.flags |= IEEE80211_TX_CTRL_PS_RESPONSE;
+
 	if (call_driver)
 		drv_allow_buffered_frames(local, sta, BIT(tid), 1,
 					  reason, false);
@@ -1395,8 +1396,8 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta,
 			 * STA may still remain is PS mode after this frame
 			 * exchange.
 			 */
-			info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER |
-				       IEEE80211_TX_CTL_PS_RESPONSE;
+			info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER;
+			info->control.flags |= IEEE80211_TX_CTRL_PS_RESPONSE;
 
 			/*
 			 * Use MoreData flag to indicate whether there are
-- 
cgit v1.2.3


From 5cf16616e152dd5c274a65954c77f64892d025a8 Mon Sep 17 00:00:00 2001
From: Sujith Manoharan <c_manoha@qca.qualcomm.com>
Date: Wed, 10 Dec 2014 21:26:11 +0530
Subject: mac80211: Fix accounting of multicast frames

Since multicast frames are marked as no-ack, using
IEEE80211_TX_STAT_ACK to check if they have been
successfully transmitted by the driver is incorrect
since a driver can choose to ignore transmission status
for no-ack frames. This results in incorrect accounting
for such frames.

To fix this issue, this patch introduces a new flag
that can be used by drivers to indicate error-free
transmission of no-ack frames.

Signed-off-by: Sujith Manoharan <c_manoha@qca.qualcomm.com>
[add a note about not setting the flag for non-no-ack frames]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 6 ++++++
 net/mac80211/status.c  | 9 ++++++---
 2 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index b36e60d4c518..b24ef577aed9 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -505,6 +505,11 @@ struct ieee80211_bss_conf {
  * @IEEE80211_TX_CTL_DONTFRAG: Don't fragment this packet even if it
  *	would be fragmented by size (this is optional, only used for
  *	monitor injection).
+ * @IEEE80211_TX_STAT_NOACK_TRANSMITTED: A frame that was marked with
+ *	IEEE80211_TX_CTL_NO_ACK has been successfully transmitted without
+ *	any errors (like issues specific to the driver/HW).
+ *	This flag must not be set for frames that don't request no-ack
+ *	behaviour with IEEE80211_TX_CTL_NO_ACK.
  *
  * Note: If you have to add new flags to the enumeration, then don't
  *	 forget to update %IEEE80211_TX_TEMPORARY_FLAGS when necessary.
@@ -540,6 +545,7 @@ enum mac80211_tx_info_flags {
 	IEEE80211_TX_STATUS_EOSP		= BIT(28),
 	IEEE80211_TX_CTL_USE_MINRATE		= BIT(29),
 	IEEE80211_TX_CTL_DONTFRAG		= BIT(30),
+	IEEE80211_TX_STAT_NOACK_TRANSMITTED	= BIT(31),
 };
 
 #define IEEE80211_TX_CTL_STBC_SHIFT		23
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index bb146f377ee4..d64037c96729 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -664,13 +664,15 @@ void ieee80211_tx_status_noskb(struct ieee80211_hw *hw,
 	struct ieee80211_supported_band *sband;
 	int retry_count;
 	int rates_idx;
-	bool acked;
+	bool acked, noack_success;
 
 	rates_idx = ieee80211_tx_get_rates(hw, info, &retry_count);
 
 	sband = hw->wiphy->bands[info->band];
 
 	acked = !!(info->flags & IEEE80211_TX_STAT_ACK);
+	noack_success = !!(info->flags & IEEE80211_TX_STAT_NOACK_TRANSMITTED);
+
 	if (pubsta) {
 		struct sta_info *sta;
 
@@ -696,7 +698,7 @@ void ieee80211_tx_status_noskb(struct ieee80211_hw *hw,
 		rate_control_tx_status_noskb(local, sband, sta, info);
 	}
 
-	if (acked) {
+	if (acked || noack_success) {
 		    local->dot11TransmittedFrameCount++;
 		    if (!pubsta)
 			    local->dot11MulticastTransmittedFrameCount++;
@@ -856,7 +858,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 	 * Fragments are passed to low-level drivers as separate skbs, so these
 	 * are actually fragments, not frames. Update frame counters only for
 	 * the first fragment of the frame. */
-	if (info->flags & IEEE80211_TX_STAT_ACK) {
+	if ((info->flags & IEEE80211_TX_STAT_ACK) ||
+	    (info->flags & IEEE80211_TX_STAT_NOACK_TRANSMITTED)) {
 		if (ieee80211_is_first_frag(hdr->seq_ctrl)) {
 			local->dot11TransmittedFrameCount++;
 			if (is_multicast_ether_addr(hdr->addr1))
-- 
cgit v1.2.3


From 4bcc56bb3a9622e877772598d1cac124266b2b8a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 13 Nov 2014 11:23:53 +0100
Subject: mac80211: ask driver to look at power level when starting AP

The power level might have been set, but as the interface was idle
it might not have taken effect yet. Ask the driver to check the
power level when starting up an AP so that in this case the correct
power level is used in case the device/driver can only set it when
the interface is actually active.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/cfg.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index e75d5c53e97b..169665835b6c 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -678,7 +678,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
 		      BSS_CHANGED_BEACON_ENABLED |
 		      BSS_CHANGED_BEACON |
 		      BSS_CHANGED_SSID |
-		      BSS_CHANGED_P2P_PS;
+		      BSS_CHANGED_P2P_PS |
+		      BSS_CHANGED_TXPOWER;
 	int err;
 
 	old = sdata_dereference(sdata->u.ap.beacon, sdata);
-- 
cgit v1.2.3


From 848955ccf0bdf42fff33e021a76f6daec98fe59b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 11 Nov 2014 12:48:42 +0100
Subject: mac80211: move U-APSD enablement to vif flags

In order to let drivers have more dynamic U-APSD support,
move the enablement flag to the virtual interface driver
flags. This lets drivers not only set it up differently
for different interfaces, but also enable/disable on the
fly if needed.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath10k/mac.c       |  3 ++-
 drivers/net/wireless/cw1200/main.c          |  1 -
 drivers/net/wireless/cw1200/sta.c           |  1 +
 drivers/net/wireless/iwlwifi/mvm/mac80211.c | 13 ++++++-------
 drivers/net/wireless/ti/wl1251/main.c       |  5 ++---
 drivers/net/wireless/ti/wlcore/main.c       |  2 +-
 include/net/mac80211.h                      | 15 +++++++--------
 net/mac80211/debugfs.c                      |  2 --
 net/mac80211/main.c                         |  4 ----
 net/mac80211/mlme.c                         |  7 ++++++-
 10 files changed, 25 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index c4005670cba2..2619db1e3e74 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -2871,6 +2871,8 @@ static int ath10k_add_interface(struct ieee80211_hw *hw,
 	int bit;
 	u32 vdev_param;
 
+	vif->driver_flags |= IEEE80211_VIF_SUPPORTS_UAPSD;
+
 	mutex_lock(&ar->conf_mutex);
 
 	memset(arvif, 0, sizeof(*arvif));
@@ -5024,7 +5026,6 @@ int ath10k_mac_register(struct ath10k *ar)
 	ar->hw->flags = IEEE80211_HW_SIGNAL_DBM |
 			IEEE80211_HW_SUPPORTS_PS |
 			IEEE80211_HW_SUPPORTS_DYNAMIC_PS |
-			IEEE80211_HW_SUPPORTS_UAPSD |
 			IEEE80211_HW_MFP_CAPABLE |
 			IEEE80211_HW_REPORTS_TX_ACK_STATUS |
 			IEEE80211_HW_HAS_RATE_CONTROL |
diff --git a/drivers/net/wireless/cw1200/main.c b/drivers/net/wireless/cw1200/main.c
index 3e78cc3ccb78..0da6e423da63 100644
--- a/drivers/net/wireless/cw1200/main.c
+++ b/drivers/net/wireless/cw1200/main.c
@@ -282,7 +282,6 @@ static struct ieee80211_hw *cw1200_init_common(const u8 *macaddr,
 		    IEEE80211_HW_SUPPORTS_PS |
 		    IEEE80211_HW_SUPPORTS_DYNAMIC_PS |
 		    IEEE80211_HW_REPORTS_TX_ACK_STATUS |
-		    IEEE80211_HW_SUPPORTS_UAPSD |
 		    IEEE80211_HW_CONNECTION_MONITOR |
 		    IEEE80211_HW_AMPDU_AGGREGATION |
 		    IEEE80211_HW_TX_AMPDU_SETUP_IN_HW |
diff --git a/drivers/net/wireless/cw1200/sta.c b/drivers/net/wireless/cw1200/sta.c
index 5b84664db13b..a1e3237c0be8 100644
--- a/drivers/net/wireless/cw1200/sta.c
+++ b/drivers/net/wireless/cw1200/sta.c
@@ -213,6 +213,7 @@ int cw1200_add_interface(struct ieee80211_hw *dev,
 	/* __le32 auto_calibration_mode = __cpu_to_le32(1); */
 
 	vif->driver_flags |= IEEE80211_VIF_BEACON_FILTER |
+			     IEEE80211_VIF_SUPPORTS_UAPSD |
 			     IEEE80211_VIF_SUPPORTS_CQM_RSSI;
 
 	mutex_lock(&priv->conf_mutex);
diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c
index 31a5b3f4266c..346331d3c696 100644
--- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c
@@ -326,6 +326,8 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm)
 	hw->radiotap_vht_details |= IEEE80211_RADIOTAP_VHT_KNOWN_STBC |
 		IEEE80211_RADIOTAP_VHT_KNOWN_BEAMFORMED;
 	hw->rate_control_algorithm = "iwl-mvm-rs";
+	hw->uapsd_queues = IWL_MVM_UAPSD_QUEUES;
+	hw->uapsd_max_sp_len = IWL_UAPSD_MAX_SP;
 
 	/*
 	 * Enable 11w if advertised by firmware and software crypto
@@ -336,13 +338,6 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm)
 	    !iwlwifi_mod_params.sw_crypto)
 		hw->flags |= IEEE80211_HW_MFP_CAPABLE;
 
-	if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_UAPSD_SUPPORT &&
-	    !iwlwifi_mod_params.uapsd_disable) {
-		hw->flags |= IEEE80211_HW_SUPPORTS_UAPSD;
-		hw->uapsd_queues = IWL_MVM_UAPSD_QUEUES;
-		hw->uapsd_max_sp_len = IWL_UAPSD_MAX_SP;
-	}
-
 	if (mvm->fw->ucode_capa.api[0] & IWL_UCODE_TLV_API_LMAC_SCAN ||
 	    mvm->fw->ucode_capa.capa[0] & IWL_UCODE_TLV_CAPA_UMAC_SCAN) {
 		hw->flags |= IEEE80211_SINGLE_HW_SCAN_ON_ALL_BANDS;
@@ -1147,6 +1142,10 @@ static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw,
 		mvm->bf_allowed_vif = mvmvif;
 		vif->driver_flags |= IEEE80211_VIF_BEACON_FILTER |
 				     IEEE80211_VIF_SUPPORTS_CQM_RSSI;
+		if (mvm->fw->ucode_capa.flags &
+					IWL_UCODE_TLV_FLAGS_UAPSD_SUPPORT &&
+		    !iwlwifi_mod_params.uapsd_disable)
+			vif->driver_flags |= IEEE80211_VIF_SUPPORTS_UAPSD;
 	}
 
 	/*
diff --git a/drivers/net/wireless/ti/wl1251/main.c b/drivers/net/wireless/ti/wl1251/main.c
index 0b30a7b4d663..d4ba009ac9aa 100644
--- a/drivers/net/wireless/ti/wl1251/main.c
+++ b/drivers/net/wireless/ti/wl1251/main.c
@@ -500,6 +500,7 @@ static int wl1251_op_add_interface(struct ieee80211_hw *hw,
 	int ret = 0;
 
 	vif->driver_flags |= IEEE80211_VIF_BEACON_FILTER |
+			     IEEE80211_VIF_SUPPORTS_UAPSD |
 			     IEEE80211_VIF_SUPPORTS_CQM_RSSI;
 
 	wl1251_debug(DEBUG_MAC80211, "mac80211 add interface type %d mac %pM",
@@ -1480,9 +1481,7 @@ int wl1251_init_ieee80211(struct wl1251 *wl)
 	/* unit us */
 	/* FIXME: find a proper value */
 
-	wl->hw->flags = IEEE80211_HW_SIGNAL_DBM |
-		IEEE80211_HW_SUPPORTS_PS |
-		IEEE80211_HW_SUPPORTS_UAPSD;
+	wl->hw->flags = IEEE80211_HW_SIGNAL_DBM | IEEE80211_HW_SUPPORTS_PS;
 
 	wl->hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION) |
 					 BIT(NL80211_IFTYPE_ADHOC);
diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index 6ad3fcedab9b..2a99456b6b8f 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -2508,6 +2508,7 @@ static int wl1271_op_add_interface(struct ieee80211_hw *hw,
 	}
 
 	vif->driver_flags |= IEEE80211_VIF_BEACON_FILTER |
+			     IEEE80211_VIF_SUPPORTS_UAPSD |
 			     IEEE80211_VIF_SUPPORTS_CQM_RSSI;
 
 	wl1271_debug(DEBUG_MAC80211, "mac80211 add interface type %d mac %pM",
@@ -5776,7 +5777,6 @@ static int wl1271_init_ieee80211(struct wl1271 *wl)
 	wl->hw->flags = IEEE80211_HW_SIGNAL_DBM |
 		IEEE80211_HW_SUPPORTS_PS |
 		IEEE80211_HW_SUPPORTS_DYNAMIC_PS |
-		IEEE80211_HW_SUPPORTS_UAPSD |
 		IEEE80211_HW_HAS_RATE_CONTROL |
 		IEEE80211_HW_CONNECTION_MONITOR |
 		IEEE80211_HW_REPORTS_TX_ACK_STATUS |
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index b24ef577aed9..4913c00539fb 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1187,10 +1187,15 @@ struct ieee80211_channel_switch {
  *	monitoring on this virtual interface -- i.e. it can monitor
  *	connection quality related parameters, such as the RSSI level and
  *	provide notifications if configured trigger levels are reached.
+ * @IEEE80211_VIF_SUPPORTS_UAPSD: The device can do U-APSD for this
+ *	interface. This flag should be set during interface addition,
+ *	but may be set/cleared as late as authentication to an AP. It is
+ *	only valid for managed/station mode interfaces.
  */
 enum ieee80211_vif_flags {
 	IEEE80211_VIF_BEACON_FILTER		= BIT(0),
 	IEEE80211_VIF_SUPPORTS_CQM_RSSI		= BIT(1),
+	IEEE80211_VIF_SUPPORTS_UAPSD		= BIT(2),
 };
 
 /**
@@ -1589,11 +1594,6 @@ struct ieee80211_tx_control {
  * @IEEE80211_HW_MFP_CAPABLE:
  *	Hardware supports management frame protection (MFP, IEEE 802.11w).
  *
- * @IEEE80211_HW_SUPPORTS_UAPSD:
- *	Hardware supports Unscheduled Automatic Power Save Delivery
- *	(U-APSD) in managed mode. The mode is configured with
- *	conf_tx() operation.
- *
  * @IEEE80211_HW_REPORTS_TX_ACK_STATUS:
  *	Hardware can provide ack status reports of Tx frames to
  *	the stack.
@@ -1679,8 +1679,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_MFP_CAPABLE			= 1<<13,
 	IEEE80211_HW_WANT_MONITOR_VIF			= 1<<14,
 	IEEE80211_HW_NO_AUTO_VIF			= 1<<15,
-	/* free slot */
-	IEEE80211_HW_SUPPORTS_UAPSD			= 1<<17,
+	/* free slots */
 	IEEE80211_HW_REPORTS_TX_ACK_STATUS		= 1<<18,
 	IEEE80211_HW_CONNECTION_MONITOR			= 1<<19,
 	IEEE80211_HW_QUEUE_CONTROL			= 1<<20,
@@ -2032,7 +2031,7 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb);
  * enabled whenever user has enabled powersave.
  *
  * Driver informs U-APSD client support by enabling
- * %IEEE80211_HW_SUPPORTS_UAPSD flag. The mode is configured through the
+ * %IEEE80211_VIF_SUPPORTS_UAPSD flag. The mode is configured through the
  * uapsd parameter in conf_tx() operation. Hardware needs to send the QoS
  * Nullfunc frames and stay awake until the service period has ended. To
  * utilize U-APSD, dynamic powersave is disabled for voip AC and all frames
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 54a189f0393e..eeb0bbd69d98 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -303,8 +303,6 @@ static ssize_t hwflags_read(struct file *file, char __user *user_buf,
 		sf += scnprintf(buf + sf, mxln - sf, "SUPPORTS_DYNAMIC_PS\n");
 	if (local->hw.flags & IEEE80211_HW_MFP_CAPABLE)
 		sf += scnprintf(buf + sf, mxln - sf, "MFP_CAPABLE\n");
-	if (local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD)
-		sf += scnprintf(buf + sf, mxln - sf, "SUPPORTS_UAPSD\n");
 	if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
 		sf += scnprintf(buf + sf, mxln - sf,
 				"REPORTS_TX_ACK_STATUS\n");
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 6ab99da38db9..d9ce33663c73 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -916,10 +916,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		}
 	}
 
-	WARN((local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD)
-	     && (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK),
-	     "U-APSD not supported with HW_PS_NULLFUNC_STACK\n");
-
 	/*
 	 * Calculate scan IE length -- we need this to alloc
 	 * memory and to subtract from the driver limit. It
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 75a9bf50207e..f495b800b92c 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -4667,8 +4667,13 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 		ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
 	rcu_read_unlock();
 
+	if (WARN((sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_UAPSD) &&
+		 (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK),
+	     "U-APSD not supported with HW_PS_NULLFUNC_STACK\n"))
+		sdata->vif.driver_flags &= ~IEEE80211_VIF_SUPPORTS_UAPSD;
+
 	if (bss->wmm_used && bss->uapsd_supported &&
-	    (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD)) {
+	    (sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_UAPSD)) {
 		assoc_data->uapsd = true;
 		ifmgd->flags |= IEEE80211_STA_UAPSD_ENABLED;
 	} else {
-- 
cgit v1.2.3


From d29544571507336811b81ca631baf6159cc8146a Mon Sep 17 00:00:00 2001
From: Janusz Dziedzic <janusz.dziedzic@tieto.com>
Date: Wed, 17 Dec 2014 10:00:52 +0100
Subject: mac80211: notify NSS changed when IBSS and HT

When using IBSS in HT mode, we always get NSS=1
in rc_update callback. Force NSS recalculation when
rates updated and notify driver that NSS changed.

Signed-off-by: Janusz Dziedzic <janusz.dziedzic@tieto.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ibss.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 509bc157ce55..b606b53a49a7 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -1069,9 +1069,16 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 		}
 
 		if (sta && rates_updated) {
-			drv_sta_rc_update(local, sdata, &sta->sta,
-					  IEEE80211_RC_SUPP_RATES_CHANGED);
+			u32 changed = IEEE80211_RC_SUPP_RATES_CHANGED;
+			u8 rx_nss = sta->sta.rx_nss;
+
+			/* Force rx_nss recalculation */
+			sta->sta.rx_nss = 0;
 			rate_control_rate_init(sta);
+			if (sta->sta.rx_nss != rx_nss)
+				changed |= IEEE80211_RC_NSS_CHANGED;
+
+			drv_sta_rc_update(local, sdata, &sta->sta, changed);
 		}
 
 		rcu_read_unlock();
-- 
cgit v1.2.3


From 688b1ecfb9ed0484754d2653386e3c44c58ede5c Mon Sep 17 00:00:00 2001
From: Luciano Coelho <luciano.coelho@intel.com>
Date: Tue, 16 Dec 2014 16:01:39 +0200
Subject: mac80211: notify channel switch at the end of
 ieee80211_chswitch_post_beacon()

The call to cfg80211_ch_switch_notify() should be at the end of the
ieee80211_chswitch_post_beacon() function, because it should only be
sent if everything succeeded.

Fixes: d04b5ac9e70b ("cfg80211/mac80211: allow any interface to send channel switch notifications")
Signed-off-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index f495b800b92c..ea7c714ba936 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1053,8 +1053,6 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_sub_if_data *sdata)
 		sdata->csa_block_tx = false;
 	}
 
-	cfg80211_ch_switch_notify(sdata->dev, &sdata->reserved_chandef);
-
 	sdata->vif.csa_active = false;
 	ifmgd->csa_waiting_bcn = false;
 
@@ -1066,6 +1064,8 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_sub_if_data *sdata)
 				     &ifmgd->csa_connection_drop_work);
 		return;
 	}
+
+	cfg80211_ch_switch_notify(sdata->dev, &sdata->reserved_chandef);
 }
 
 void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success)
-- 
cgit v1.2.3


From 2ae70efcea7a695a62bb47170d3fb16674b8dbea Mon Sep 17 00:00:00 2001
From: "Nishikawa, Kenzoh" <Kenzoh.Nishikawa@jp.sony.com>
Date: Tue, 16 Dec 2014 01:41:06 +0000
Subject: mac80211: keep sending peer candidate events while in listen state

Instead of sending peer candidate events just once, send them as long
as the peer remains in the LISTEN state in the peering state machine,
when userspace is implementing the peering manager.
Userspace may silence the events from a peer by progressing the state
machine or by setting the link state to BLOCKED.

Fixes the problem that a mesh peering process won't be fired again after
the previous first peering trial fails due to like air propagation error
if the peering is managed by user space such as wpa_supplicant.

This patch works with another patch for wpa_supplicant described here
which fires a peering process again triggered by the notice from kernel.
http://lists.shmoo.com/pipermail/hostap/2014-November/031235.html

Signed-off-by: Kenzoh Nishikawa <Kenzoh.Nishikawa@jp.sony.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mesh_plink.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index b488e1859b18..fa94ca15ba95 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -523,6 +523,13 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata,
 	    sdata->u.mesh.mshcfg.auto_open_plinks &&
 	    rssi_threshold_check(sdata, sta))
 		changed = mesh_plink_open(sta);
+	else if (sta->plink_state == NL80211_PLINK_LISTEN &&
+		 (sdata->u.mesh.user_mpm ||
+		  sdata->u.mesh.security & IEEE80211_MESH_SEC_AUTHED))
+		cfg80211_notify_new_peer_candidate(sdata->dev, hw_addr,
+						   elems->ie_start,
+						   elems->total_len,
+						   GFP_ATOMIC);
 
 	ieee80211_mps_frame_release(sta, elems);
 out:
-- 
cgit v1.2.3


From ad30ca2c03cecfb1b0749874bdceead269542de6 Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Mon, 15 Dec 2014 19:25:59 +0200
Subject: cfg80211: allow usermode to query wiphy specific regdom

If a wiphy-idx is specified, the kernel will return the wiphy specific
regdomain, if such exists. Otherwise return the global regdom.

When no wiphy-idx is specified, return the global regdomain as well as
all wiphy-specific regulatory domains in the system, via a new nested
list of attributes.

Add a new attribute for each wiphy-specific regdomain, for usermode to
identify it as such.

Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h |  10 ++-
 net/wireless/nl80211.c       | 173 ++++++++++++++++++++++++++++++++++---------
 net/wireless/reg.c           |   2 +-
 net/wireless/reg.h           |   1 +
 4 files changed, 151 insertions(+), 35 deletions(-)

(limited to 'net')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index b37bd5a1cb82..2d384d041224 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -252,7 +252,15 @@
  *	%NL80211_ATTR_IFINDEX.
  *
  * @NL80211_CMD_GET_REG: ask the wireless core to send us its currently set
- * 	regulatory domain.
+ *	regulatory domain. If %NL80211_ATTR_WIPHY is specified and the device
+ *	has a private regulatory domain, it will be returned. Otherwise, the
+ *	global regdomain will be returned.
+ *	A device will have a private regulatory domain if it uses the
+ *	regulatory_hint() API. Even when a private regdomain is used the channel
+ *	information will still be mended according to further hints from
+ *	the regulatory core to help with compliance. A dump version of this API
+ *	is now available which will returns the global regdomain as well as
+ *	all private regdomains of present wiphys (for those that have it).
  * @NL80211_CMD_SET_REG: Set current regulatory domain. CRDA sends this command
  *	after being queried by the kernel. CRDA replies by sending a regulatory
  *	domain structure which consists of %NL80211_ATTR_REG_ALPHA set to our
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index a17d6bc6b22c..2d5dc428c5ab 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5327,42 +5327,20 @@ static int nl80211_update_mesh_config(struct sk_buff *skb,
 	return err;
 }
 
-static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
+static int nl80211_put_regdom(const struct ieee80211_regdomain *regdom,
+			      struct sk_buff *msg)
 {
-	const struct ieee80211_regdomain *regdom;
-	struct sk_buff *msg;
-	void *hdr = NULL;
 	struct nlattr *nl_reg_rules;
 	unsigned int i;
 
-	if (!cfg80211_regdomain)
-		return -EINVAL;
-
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return -ENOBUFS;
-
-	hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
-			     NL80211_CMD_GET_REG);
-	if (!hdr)
-		goto put_failure;
-
-	if (reg_last_request_cell_base() &&
-	    nla_put_u32(msg, NL80211_ATTR_USER_REG_HINT_TYPE,
-			NL80211_USER_REG_HINT_CELL_BASE))
-		goto nla_put_failure;
-
-	rcu_read_lock();
-	regdom = rcu_dereference(cfg80211_regdomain);
-
 	if (nla_put_string(msg, NL80211_ATTR_REG_ALPHA2, regdom->alpha2) ||
 	    (regdom->dfs_region &&
 	     nla_put_u8(msg, NL80211_ATTR_DFS_REGION, regdom->dfs_region)))
-		goto nla_put_failure_rcu;
+		goto nla_put_failure;
 
 	nl_reg_rules = nla_nest_start(msg, NL80211_ATTR_REG_RULES);
 	if (!nl_reg_rules)
-		goto nla_put_failure_rcu;
+		goto nla_put_failure;
 
 	for (i = 0; i < regdom->n_reg_rules; i++) {
 		struct nlattr *nl_reg_rule;
@@ -5377,7 +5355,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
 
 		nl_reg_rule = nla_nest_start(msg, i);
 		if (!nl_reg_rule)
-			goto nla_put_failure_rcu;
+			goto nla_put_failure;
 
 		max_bandwidth_khz = freq_range->max_bandwidth_khz;
 		if (!max_bandwidth_khz)
@@ -5398,13 +5376,64 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
 				power_rule->max_eirp) ||
 		    nla_put_u32(msg, NL80211_ATTR_DFS_CAC_TIME,
 				reg_rule->dfs_cac_ms))
-			goto nla_put_failure_rcu;
+			goto nla_put_failure;
 
 		nla_nest_end(msg, nl_reg_rule);
 	}
-	rcu_read_unlock();
 
 	nla_nest_end(msg, nl_reg_rules);
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int nl80211_get_reg_do(struct sk_buff *skb, struct genl_info *info)
+{
+	const struct ieee80211_regdomain *regdom = NULL;
+	struct cfg80211_registered_device *rdev;
+	struct wiphy *wiphy = NULL;
+	struct sk_buff *msg;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOBUFS;
+
+	hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
+			     NL80211_CMD_GET_REG);
+	if (!hdr)
+		goto put_failure;
+
+	if (info->attrs[NL80211_ATTR_WIPHY]) {
+		rdev = cfg80211_get_dev_from_info(genl_info_net(info), info);
+		if (IS_ERR(rdev)) {
+			nlmsg_free(msg);
+			return PTR_ERR(rdev);
+		}
+
+		wiphy = &rdev->wiphy;
+		regdom = get_wiphy_regdom(wiphy);
+
+		if (regdom &&
+		    nla_put_u32(msg, NL80211_ATTR_WIPHY, get_wiphy_idx(wiphy)))
+			goto nla_put_failure;
+	}
+
+	if (!wiphy && reg_last_request_cell_base() &&
+	    nla_put_u32(msg, NL80211_ATTR_USER_REG_HINT_TYPE,
+			NL80211_USER_REG_HINT_CELL_BASE))
+		goto nla_put_failure;
+
+	rcu_read_lock();
+
+	if (!regdom)
+		regdom = rcu_dereference(cfg80211_regdomain);
+
+	if (nl80211_put_regdom(regdom, msg))
+		goto nla_put_failure_rcu;
+
+	rcu_read_unlock();
 
 	genlmsg_end(msg, hdr);
 	return genlmsg_reply(msg, info);
@@ -5418,6 +5447,79 @@ put_failure:
 	return -EMSGSIZE;
 }
 
+static int nl80211_send_regdom(struct sk_buff *msg, struct netlink_callback *cb,
+			       u32 seq, int flags, struct wiphy *wiphy,
+			       const struct ieee80211_regdomain *regdom)
+{
+	void *hdr = nl80211hdr_put(msg, NETLINK_CB(cb->skb).portid, seq, flags,
+				   NL80211_CMD_GET_REG);
+
+	if (!hdr)
+		return -1;
+
+	genl_dump_check_consistent(cb, hdr, &nl80211_fam);
+
+	if (nl80211_put_regdom(regdom, msg))
+		goto nla_put_failure;
+
+	if (!wiphy && reg_last_request_cell_base() &&
+	    nla_put_u32(msg, NL80211_ATTR_USER_REG_HINT_TYPE,
+			NL80211_USER_REG_HINT_CELL_BASE))
+		goto nla_put_failure;
+
+	if (wiphy &&
+	    nla_put_u32(msg, NL80211_ATTR_WIPHY, get_wiphy_idx(wiphy)))
+		goto nla_put_failure;
+
+	return genlmsg_end(msg, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static int nl80211_get_reg_dump(struct sk_buff *skb,
+				struct netlink_callback *cb)
+{
+	const struct ieee80211_regdomain *regdom = NULL;
+	struct cfg80211_registered_device *rdev;
+	int err, reg_idx, start = cb->args[2];
+
+	rtnl_lock();
+
+	if (cfg80211_regdomain && start == 0) {
+		err = nl80211_send_regdom(skb, cb, cb->nlh->nlmsg_seq,
+					  NLM_F_MULTI, NULL,
+					  rtnl_dereference(cfg80211_regdomain));
+		if (err < 0)
+			goto out_err;
+	}
+
+	/* the global regdom is idx 0 */
+	reg_idx = 1;
+	list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+		regdom = get_wiphy_regdom(&rdev->wiphy);
+		if (!regdom)
+			continue;
+
+		if (++reg_idx <= start)
+			continue;
+
+		err = nl80211_send_regdom(skb, cb, cb->nlh->nlmsg_seq,
+					  NLM_F_MULTI, &rdev->wiphy, regdom);
+		if (err < 0) {
+			reg_idx--;
+			break;
+		}
+	}
+
+	cb->args[2] = reg_idx;
+	err = skb->len;
+out_err:
+	rtnl_unlock();
+	return err;
+}
+
 static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
 {
 	struct nlattr *tb[NL80211_REG_RULE_ATTR_MAX + 1];
@@ -10225,7 +10327,8 @@ static const struct genl_ops nl80211_ops[] = {
 	},
 	{
 		.cmd = NL80211_CMD_GET_REG,
-		.doit = nl80211_get_reg,
+		.doit = nl80211_get_reg_do,
+		.dumpit = nl80211_get_reg_dump,
 		.policy = nl80211_policy,
 		.internal_flags = NL80211_FLAG_NEED_RTNL,
 		/* can be retrieved by unprivileged users */
@@ -10983,9 +11086,13 @@ void nl80211_send_reg_change_event(struct regulatory_request *request)
 			goto nla_put_failure;
 	}
 
-	if (request->wiphy_idx != WIPHY_IDX_INVALID &&
-	    nla_put_u32(msg, NL80211_ATTR_WIPHY, request->wiphy_idx))
-		goto nla_put_failure;
+	if (request->wiphy_idx != WIPHY_IDX_INVALID) {
+		struct wiphy *wiphy = wiphy_idx_to_wiphy(request->wiphy_idx);
+
+		if (wiphy &&
+		    nla_put_u32(msg, NL80211_ATTR_WIPHY, request->wiphy_idx))
+			goto nla_put_failure;
+	}
 
 	genlmsg_end(msg, hdr);
 
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index d83480b6efde..2d9760084b74 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -142,7 +142,7 @@ static const struct ieee80211_regdomain *get_cfg80211_regdom(void)
 	return rtnl_dereference(cfg80211_regdomain);
 }
 
-static const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy)
+const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy)
 {
 	return rtnl_dereference(wiphy->regd);
 }
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index 5e48031ccb9a..4b45d6e61d24 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -38,6 +38,7 @@ unsigned int reg_get_max_bandwidth(const struct ieee80211_regdomain *rd,
 				   const struct ieee80211_reg_rule *rule);
 
 bool reg_last_request_cell_base(void);
+const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy);
 
 /**
  * regulatory_hint_found_beacon - hints a beacon was found on a channel
-- 
cgit v1.2.3


From b0d7aa59592b4270531de5ce65dcf18338a2d98c Mon Sep 17 00:00:00 2001
From: Jonathan Doron <jond@wizery.com>
Date: Mon, 15 Dec 2014 19:26:00 +0200
Subject: cfg80211: allow wiphy specific regdomain management

Add a new regulatory flag that allows a driver to manage regdomain
changes/updates for its own wiphy.
A self-managed wiphys only employs regulatory information obtained from
the FW and driver and does not use other cfg80211 sources like
beacon-hints, country-code IEs and hints from other devices on the same
system. Conversely, a self-managed wiphy does not share its regulatory
hints with other devices in the system. If a system contains several
devices, one or more of which are self-managed, there might be
contradictory regulatory settings between them. Usage of flag is
generally discouraged. Only use it if the FW/driver is incompatible
with non-locally originated hints.

A new API lets the driver send a complete regdomain, to be applied on
its wiphy only.

After a wiphy-specific regdomain change takes place, usermode will get
a new type of change notification. The regulatory core also takes care
enforce regulatory restrictions, in case some interfaces are on
forbidden channels.

Signed-off-by: Jonathan Doron <jonathanx.doron@intel.com>
Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Reviewed-by: Luis R. Rodriguez <mcgrof@suse.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 14 +++++++
 include/net/regulatory.h     | 19 ++++++++++
 include/uapi/linux/nl80211.h |  6 +++
 net/wireless/core.c          |  8 ++++
 net/wireless/core.h          |  7 ++++
 net/wireless/nl80211.c       | 49 +++++++++++++++---------
 net/wireless/nl80211.h       | 16 +++++++-
 net/wireless/reg.c           | 88 ++++++++++++++++++++++++++++++++++++++++++++
 8 files changed, 188 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 4ebb816241fa..4bc1fc9971a5 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3807,6 +3807,20 @@ const u8 *cfg80211_find_vendor_ie(unsigned int oui, u8 oui_type,
  */
 int regulatory_hint(struct wiphy *wiphy, const char *alpha2);
 
+/**
+ * regulatory_set_wiphy_regd - set regdom info for self managed drivers
+ * @wiphy: the wireless device we want to process the regulatory domain on
+ * @rd: the regulatory domain informatoin to use for this wiphy
+ *
+ * Set the regulatory domain information for self-managed wiphys, only they
+ * may use this function. See %REGULATORY_WIPHY_SELF_MANAGED for more
+ * information.
+ *
+ * Return: 0 on success. -EINVAL, -EPERM
+ */
+int regulatory_set_wiphy_regd(struct wiphy *wiphy,
+			      struct ieee80211_regdomain *rd);
+
 /**
  * wiphy_apply_custom_regulatory - apply a custom driver regulatory domain
  * @wiphy: the wireless device we want to process the regulatory domain on
diff --git a/include/net/regulatory.h b/include/net/regulatory.h
index b776d72d84be..ebc5a2ed8631 100644
--- a/include/net/regulatory.h
+++ b/include/net/regulatory.h
@@ -147,6 +147,24 @@ struct regulatory_request {
  *	NL80211_IFTYPE_P2P_CLIENT, NL80211_IFTYPE_P2P_GO,
  *	NL80211_IFTYPE_P2P_DEVICE. The flag will be set by default if a device
  *	includes any modes unsupported for enforcement checking.
+ * @REGULATORY_WIPHY_SELF_MANAGED: for devices that employ wiphy-specific
+ *	regdom management. These devices will ignore all regdom changes not
+ *	originating from their own wiphy.
+ *	A self-managed wiphys only employs regulatory information obtained from
+ *	the FW and driver and does not use other cfg80211 sources like
+ *	beacon-hints, country-code IEs and hints from other devices on the same
+ *	system. Conversely, a self-managed wiphy does not share its regulatory
+ *	hints with other devices in the system. If a system contains several
+ *	devices, one or more of which are self-managed, there might be
+ *	contradictory regulatory settings between them. Usage of flag is
+ *	generally discouraged. Only use it if the FW/driver is incompatible
+ *	with non-locally originated hints.
+ *	This flag is incompatible with the flags: %REGULATORY_CUSTOM_REG,
+ *	%REGULATORY_STRICT_REG, %REGULATORY_COUNTRY_IE_FOLLOW_POWER,
+ *	%REGULATORY_COUNTRY_IE_IGNORE and %REGULATORY_DISABLE_BEACON_HINTS.
+ *	Mixing any of the above flags with this flag will result in a failure
+ *	to register the wiphy. This flag implies
+ *	%REGULATORY_DISABLE_BEACON_HINTS and %REGULATORY_COUNTRY_IE_IGNORE.
  */
 enum ieee80211_regulatory_flags {
 	REGULATORY_CUSTOM_REG			= BIT(0),
@@ -156,6 +174,7 @@ enum ieee80211_regulatory_flags {
 	REGULATORY_COUNTRY_IE_IGNORE		= BIT(4),
 	REGULATORY_ENABLE_RELAX_NO_IR           = BIT(5),
 	REGULATORY_IGNORE_STALE_KICKOFF         = BIT(6),
+	REGULATORY_WIPHY_SELF_MANAGED		= BIT(7),
 };
 
 struct ieee80211_freq_range {
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 2d384d041224..fb58e654f523 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -782,6 +782,10 @@
  *	peer given by %NL80211_ATTR_MAC. Both peers must be on the base channel
  *	when this command completes.
  *
+ * @NL80211_CMD_WIPHY_REG_CHANGE: Similar to %NL80211_CMD_REG_CHANGE, but used
+ *	as an event to indicate changes for devices with wiphy-specific regdom
+ *	management.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -966,6 +970,8 @@ enum nl80211_commands {
 	NL80211_CMD_TDLS_CHANNEL_SWITCH,
 	NL80211_CMD_TDLS_CANCEL_CHANNEL_SWITCH,
 
+	NL80211_CMD_WIPHY_REG_CHANGE,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 4910758baab1..b661fcce7e3c 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -561,6 +561,14 @@ int wiphy_register(struct wiphy *wiphy)
 				       BIT(NL80211_IFTYPE_MONITOR)))
 		wiphy->regulatory_flags |= REGULATORY_IGNORE_STALE_KICKOFF;
 
+	if (WARN_ON((wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) &&
+		    (wiphy->regulatory_flags &
+					(REGULATORY_CUSTOM_REG |
+					 REGULATORY_STRICT_REG |
+					 REGULATORY_COUNTRY_IE_FOLLOW_POWER |
+					 REGULATORY_COUNTRY_IE_IGNORE))))
+		return -EINVAL;
+
 	if (WARN_ON(wiphy->coalesce &&
 		    (!wiphy->coalesce->n_rules ||
 		     !wiphy->coalesce->n_patterns) &&
diff --git a/net/wireless/core.h b/net/wireless/core.h
index faa5b1609aae..e87cae57a83b 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -36,6 +36,13 @@ struct cfg80211_registered_device {
 	 * the country on the country IE changed. */
 	char country_ie_alpha2[2];
 
+	/*
+	 * the driver requests the regulatory core to set this regulatory
+	 * domain as the wiphy's. Only used for %REGULATORY_WIPHY_SELF_MANAGED
+	 * devices using the regulatory_set_wiphy_regd() API
+	 */
+	const struct ieee80211_regdomain *requested_regd;
+
 	/* If a Country IE has been received this tells us the environment
 	 * which its telling us its in. This defaults to ENVIRON_ANY */
 	enum environment_cap env;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 2d5dc428c5ab..eebb7e422989 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -11042,25 +11042,9 @@ void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev,
 				NL80211_MCGRP_SCAN, GFP_KERNEL);
 }
 
-/*
- * This can happen on global regulatory changes or device specific settings
- * based on custom world regulatory domains.
- */
-void nl80211_send_reg_change_event(struct regulatory_request *request)
+static bool nl80211_reg_change_event_fill(struct sk_buff *msg,
+					  struct regulatory_request *request)
 {
-	struct sk_buff *msg;
-	void *hdr;
-
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return;
-
-	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_REG_CHANGE);
-	if (!hdr) {
-		nlmsg_free(msg);
-		return;
-	}
-
 	/* Userspace can always count this one always being set */
 	if (nla_put_u8(msg, NL80211_ATTR_REG_INITIATOR, request->initiator))
 		goto nla_put_failure;
@@ -11094,6 +11078,35 @@ void nl80211_send_reg_change_event(struct regulatory_request *request)
 			goto nla_put_failure;
 	}
 
+	return true;
+
+nla_put_failure:
+	return false;
+}
+
+/*
+ * This can happen on global regulatory changes or device specific settings
+ * based on custom regulatory domains.
+ */
+void nl80211_common_reg_change_event(enum nl80211_commands cmd_id,
+				     struct regulatory_request *request)
+{
+	struct sk_buff *msg;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, cmd_id);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	if (nl80211_reg_change_event_fill(msg, request) == false)
+		goto nla_put_failure;
+
 	genlmsg_end(msg, hdr);
 
 	rcu_read_lock();
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 7ad70d6f0cc6..84d4edf1d545 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -17,7 +17,21 @@ void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev,
 			     struct net_device *netdev, u32 cmd);
 void nl80211_send_sched_scan_results(struct cfg80211_registered_device *rdev,
 				     struct net_device *netdev);
-void nl80211_send_reg_change_event(struct regulatory_request *request);
+void nl80211_common_reg_change_event(enum nl80211_commands cmd_id,
+				     struct regulatory_request *request);
+
+static inline void
+nl80211_send_reg_change_event(struct regulatory_request *request)
+{
+	nl80211_common_reg_change_event(NL80211_CMD_REG_CHANGE, request);
+}
+
+static inline void
+nl80211_send_wiphy_reg_change_event(struct regulatory_request *request)
+{
+	nl80211_common_reg_change_event(NL80211_CMD_WIPHY_REG_CHANGE, request);
+}
+
 void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev,
 			  struct net_device *netdev,
 			  const u8 *buf, size_t len, gfp_t gfp);
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 2d9760084b74..c040f8a0f1ed 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1307,6 +1307,9 @@ static bool ignore_reg_update(struct wiphy *wiphy,
 {
 	struct regulatory_request *lr = get_last_request();
 
+	if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED)
+		return true;
+
 	if (!lr) {
 		REG_DBG_PRINT("Ignoring regulatory request set by %s "
 			      "since last_request is not set\n",
@@ -2147,11 +2150,52 @@ static void reg_process_pending_beacon_hints(void)
 	spin_unlock_bh(&reg_pending_beacons_lock);
 }
 
+static void reg_process_self_managed_hints(void)
+{
+	struct cfg80211_registered_device *rdev;
+	struct wiphy *wiphy;
+	const struct ieee80211_regdomain *tmp;
+	const struct ieee80211_regdomain *regd;
+	enum ieee80211_band band;
+	struct regulatory_request request = {};
+
+	list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+		wiphy = &rdev->wiphy;
+
+		spin_lock(&reg_requests_lock);
+		regd = rdev->requested_regd;
+		rdev->requested_regd = NULL;
+		spin_unlock(&reg_requests_lock);
+
+		if (regd == NULL)
+			continue;
+
+		tmp = get_wiphy_regdom(wiphy);
+		rcu_assign_pointer(wiphy->regd, regd);
+		rcu_free_regdom(tmp);
+
+		for (band = 0; band < IEEE80211_NUM_BANDS; band++)
+			handle_band_custom(wiphy, wiphy->bands[band], regd);
+
+		reg_process_ht_flags(wiphy);
+
+		request.wiphy_idx = get_wiphy_idx(wiphy);
+		request.alpha2[0] = regd->alpha2[0];
+		request.alpha2[1] = regd->alpha2[1];
+		request.initiator = NL80211_REGDOM_SET_BY_DRIVER;
+
+		nl80211_send_wiphy_reg_change_event(&request);
+	}
+
+	reg_check_channels();
+}
+
 static void reg_todo(struct work_struct *work)
 {
 	rtnl_lock();
 	reg_process_pending_hints();
 	reg_process_pending_beacon_hints();
+	reg_process_self_managed_hints();
 	rtnl_unlock();
 }
 
@@ -2432,6 +2476,8 @@ static void restore_regulatory_settings(bool reset_user)
 	world_alpha2[1] = cfg80211_world_regdom->alpha2[1];
 
 	list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+		if (rdev->wiphy.regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED)
+			continue;
 		if (rdev->wiphy.regulatory_flags & REGULATORY_CUSTOM_REG)
 			restore_custom_reg_settings(&rdev->wiphy);
 	}
@@ -2835,10 +2881,52 @@ int set_regdom(const struct ieee80211_regdomain *rd)
 	return 0;
 }
 
+int regulatory_set_wiphy_regd(struct wiphy *wiphy,
+			      struct ieee80211_regdomain *rd)
+{
+	const struct ieee80211_regdomain *regd;
+	const struct ieee80211_regdomain *prev_regd;
+	struct cfg80211_registered_device *rdev;
+
+	if (WARN_ON(!wiphy || !rd))
+		return -EINVAL;
+
+	if (WARN(!(wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED),
+		 "wiphy should have REGULATORY_WIPHY_SELF_MANAGED\n"))
+		return -EPERM;
+
+	if (WARN(!is_valid_rd(rd), "Invalid regulatory domain detected\n")) {
+		print_regdomain_info(rd);
+		return -EINVAL;
+	}
+
+	regd = reg_copy_regd(rd);
+	if (IS_ERR(regd))
+		return PTR_ERR(regd);
+
+	rdev = wiphy_to_rdev(wiphy);
+
+	spin_lock(&reg_requests_lock);
+	prev_regd = rdev->requested_regd;
+	rdev->requested_regd = regd;
+	spin_unlock(&reg_requests_lock);
+
+	kfree(prev_regd);
+
+	schedule_work(&reg_work);
+	return 0;
+}
+EXPORT_SYMBOL(regulatory_set_wiphy_regd);
+
 void wiphy_regulatory_register(struct wiphy *wiphy)
 {
 	struct regulatory_request *lr;
 
+	/* self-managed devices ignore external hints */
+	if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED)
+		wiphy->regulatory_flags |= REGULATORY_DISABLE_BEACON_HINTS |
+					   REGULATORY_COUNTRY_IE_IGNORE;
+
 	if (!reg_dev_ignore_cell_hint(wiphy))
 		reg_num_devs_support_basehint++;
 
-- 
cgit v1.2.3


From 1bdd716cbccabc8127fbbaaa663c3090302ef78b Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Mon, 15 Dec 2014 19:26:01 +0200
Subject: cfg80211: return private regdom for self-managed devices

If a device has self-managed regulatory, insist on returning the wiphy
specific regdomain if a wiphy-idx is specified. The global regdomain is
meaningless for such devices.

Also add an attribute for self-managed devices, so usermode can
distinguish them as such.

Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Reviewed-by: Luis R. Rodriguez <mcgrof@suse.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 10 ++++++++++
 net/wireless/nl80211.c       | 24 ++++++++++++++++++++++++
 2 files changed, 34 insertions(+)

(limited to 'net')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index fb58e654f523..b3ada0b3a276 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -261,6 +261,9 @@
  *	the regulatory core to help with compliance. A dump version of this API
  *	is now available which will returns the global regdomain as well as
  *	all private regdomains of present wiphys (for those that have it).
+ *	If a wiphy is self-managed (%NL80211_ATTR_WIPHY_SELF_MANAGED_REG), then
+ *	its private regdomain is the only valid one for it. The regulatory
+ *	core is not used to help with compliance in this case.
  * @NL80211_CMD_SET_REG: Set current regulatory domain. CRDA sends this command
  *	after being queried by the kernel. CRDA replies by sending a regulatory
  *	domain structure which consists of %NL80211_ATTR_REG_ALPHA set to our
@@ -1702,6 +1705,11 @@ enum nl80211_commands {
  *
  * @NL80211_ATTR_MAC_MASK: MAC address mask
  *
+ * @NL80211_ATTR_WIPHY_SELF_MANAGED_REG: flag attribute indicating this device
+ *	is self-managing its regulatory information and any regulatory domain
+ *	obtained from it is coming from the device's wiphy and not the global
+ *	cfg80211 regdomain.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2059,6 +2067,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_MAC_MASK,
 
+	NL80211_ATTR_WIPHY_SELF_MANAGED_REG,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index eebb7e422989..5b1907f4c181 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -396,6 +396,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_ADMITTED_TIME] = { .type = NLA_U16 },
 	[NL80211_ATTR_SMPS_MODE] = { .type = NLA_U8 },
 	[NL80211_ATTR_MAC_MASK] = { .len = ETH_ALEN },
+	[NL80211_ATTR_WIPHY_SELF_MANAGED_REG] = { .type = NLA_FLAG },
 };
 
 /* policy for the key attributes */
@@ -1701,6 +1702,10 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
 			       rdev->wiphy.max_num_csa_counters))
 			goto nla_put_failure;
 
+		if (rdev->wiphy.regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED &&
+		    nla_put_flag(msg, NL80211_ATTR_WIPHY_SELF_MANAGED_REG))
+			goto nla_put_failure;
+
 		/* done */
 		state->split_start = 0;
 		break;
@@ -5406,6 +5411,8 @@ static int nl80211_get_reg_do(struct sk_buff *skb, struct genl_info *info)
 		goto put_failure;
 
 	if (info->attrs[NL80211_ATTR_WIPHY]) {
+		bool self_managed;
+
 		rdev = cfg80211_get_dev_from_info(genl_info_net(info), info);
 		if (IS_ERR(rdev)) {
 			nlmsg_free(msg);
@@ -5413,8 +5420,16 @@ static int nl80211_get_reg_do(struct sk_buff *skb, struct genl_info *info)
 		}
 
 		wiphy = &rdev->wiphy;
+		self_managed = wiphy->regulatory_flags &
+			       REGULATORY_WIPHY_SELF_MANAGED;
 		regdom = get_wiphy_regdom(wiphy);
 
+		/* a self-managed-reg device must have a private regdom */
+		if (WARN_ON(!regdom && self_managed)) {
+			nlmsg_free(msg);
+			return -EINVAL;
+		}
+
 		if (regdom &&
 		    nla_put_u32(msg, NL80211_ATTR_WIPHY, get_wiphy_idx(wiphy)))
 			goto nla_put_failure;
@@ -5471,6 +5486,10 @@ static int nl80211_send_regdom(struct sk_buff *msg, struct netlink_callback *cb,
 	    nla_put_u32(msg, NL80211_ATTR_WIPHY, get_wiphy_idx(wiphy)))
 		goto nla_put_failure;
 
+	if (wiphy && wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED &&
+	    nla_put_flag(msg, NL80211_ATTR_WIPHY_SELF_MANAGED_REG))
+		goto nla_put_failure;
+
 	return genlmsg_end(msg, hdr);
 
 nla_put_failure:
@@ -11076,6 +11095,11 @@ static bool nl80211_reg_change_event_fill(struct sk_buff *msg,
 		if (wiphy &&
 		    nla_put_u32(msg, NL80211_ATTR_WIPHY, request->wiphy_idx))
 			goto nla_put_failure;
+
+		if (wiphy &&
+		    wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED &&
+		    nla_put_flag(msg, NL80211_ATTR_WIPHY_SELF_MANAGED_REG))
+			goto nla_put_failure;
 	}
 
 	return true;
-- 
cgit v1.2.3


From db8dfee57d37d2cd14a750477babbae52b69a494 Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Mon, 15 Dec 2014 19:26:02 +0200
Subject: cfg80211: avoid intersection when applying self-managed reg

The custom-reg handling function can currently only add flags to a given
channel. This results in stale flags being left applied. In some cases
a channel was disabled and even the orig_flags were changed to reflect
this.

Previously the API was designed for a single invocation before wiphy
registration, so this didn't matter. The previous approach doesn't scale
well to self-managed regulatory devices, particularly when a more
permissive regdom is applied after a restrictive one.

Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/reg.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index c040f8a0f1ed..9a5411cdf5e8 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1680,8 +1680,12 @@ static void handle_channel_custom(struct wiphy *wiphy,
 	if (IS_ERR(reg_rule)) {
 		REG_DBG_PRINT("Disabling freq %d MHz as custom regd has no rule that fits it\n",
 			      chan->center_freq);
-		chan->orig_flags |= IEEE80211_CHAN_DISABLED;
-		chan->flags = chan->orig_flags;
+		if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) {
+			chan->flags |= IEEE80211_CHAN_DISABLED;
+		} else {
+			chan->orig_flags |= IEEE80211_CHAN_DISABLED;
+			chan->flags = chan->orig_flags;
+		}
 		return;
 	}
 
@@ -1706,7 +1710,13 @@ static void handle_channel_custom(struct wiphy *wiphy,
 	chan->dfs_state = NL80211_DFS_USABLE;
 
 	chan->beacon_found = false;
-	chan->flags |= map_regdom_flags(reg_rule->flags) | bw_flags;
+
+	if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED)
+		chan->flags = chan->orig_flags | bw_flags |
+			      map_regdom_flags(reg_rule->flags);
+	else
+		chan->flags |= map_regdom_flags(reg_rule->flags) | bw_flags;
+
 	chan->max_antenna_gain = (int) MBI_TO_DBI(power_rule->max_antenna_gain);
 	chan->max_reg_power = chan->max_power =
 		(int) MBM_TO_DBM(power_rule->max_eirp);
-- 
cgit v1.2.3


From 179b8fc7fb59ec69835f225e6c813040bbf52ba8 Mon Sep 17 00:00:00 2001
From: Chaya Rachel Ivgi <chaya.rachel.ivgi@intel.com>
Date: Sun, 14 Dec 2014 10:38:59 +0200
Subject: mac80211: Fix ignored HT override configurations

HT override configurations was ignored when choosing the channel
(until now, the override configuration affected only the
capabilities shown in the IEs).

The override configurations received only on association time,
so in this case we should determine the channel again.

Signed-off-by: Chaya Rachel Ivgi <chaya.rachel.ivgi@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 43 +++++++++++++++++++++++++++++--------------
 1 file changed, 29 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index ea7c714ba936..877b647d3e5c 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -157,14 +157,18 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	struct cfg80211_chan_def vht_chandef;
+	struct ieee80211_sta_ht_cap sta_ht_cap;
 	u32 ht_cfreq, ret;
 
+	memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap));
+	ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap);
+
 	chandef->chan = channel;
 	chandef->width = NL80211_CHAN_WIDTH_20_NOHT;
 	chandef->center_freq1 = channel->center_freq;
 	chandef->center_freq2 = 0;
 
-	if (!ht_cap || !ht_oper || !sband->ht_cap.ht_supported) {
+	if (!ht_cap || !ht_oper || !sta_ht_cap.ht_supported) {
 		ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
 		goto out;
 	}
@@ -197,7 +201,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
 	}
 
 	/* check 40 MHz support, if we have it */
-	if (sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) {
+	if (sta_ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) {
 		switch (ht_oper->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) {
 		case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
 			chandef->width = NL80211_CHAN_WIDTH_40;
@@ -4196,9 +4200,13 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	struct ieee80211_bss *bss = (void *)cbss->priv;
 	struct sta_info *new_sta = NULL;
-	bool have_sta = false;
+	struct ieee80211_supported_band *sband;
+	struct ieee80211_sta_ht_cap sta_ht_cap;
+	bool have_sta = false, is_override = false;
 	int err;
 
+	sband = local->hw.wiphy->bands[cbss->channel->band];
+
 	if (WARN_ON(!ifmgd->auth_data && !ifmgd->assoc_data))
 		return -EINVAL;
 
@@ -4213,25 +4221,32 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
 		if (!new_sta)
 			return -ENOMEM;
 	}
+
+	memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap));
+	ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap);
+
+	is_override = (sta_ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) !=
+		      (sband->ht_cap.cap &
+		       IEEE80211_HT_CAP_SUP_WIDTH_20_40);
+
+	if (new_sta || is_override) {
+		err = ieee80211_prep_channel(sdata, cbss);
+		if (err) {
+			if (new_sta)
+				sta_info_free(local, new_sta);
+			return -EINVAL;
+		}
+	}
+
 	if (new_sta) {
 		u32 rates = 0, basic_rates = 0;
 		bool have_higher_than_11mbit;
 		int min_rate = INT_MAX, min_rate_index = -1;
 		struct ieee80211_chanctx_conf *chanctx_conf;
-		struct ieee80211_supported_band *sband;
 		const struct cfg80211_bss_ies *ies;
-		int shift;
+		int shift = ieee80211_vif_get_shift(&sdata->vif);
 		u32 rate_flags;
 
-		sband = local->hw.wiphy->bands[cbss->channel->band];
-
-		err = ieee80211_prep_channel(sdata, cbss);
-		if (err) {
-			sta_info_free(local, new_sta);
-			return -EINVAL;
-		}
-		shift = ieee80211_vif_get_shift(&sdata->vif);
-
 		rcu_read_lock();
 		chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
 		if (WARN_ON(!chanctx_conf)) {
-- 
cgit v1.2.3


From a5fee9cb6255b9bba5a977f92cb4807eafb89db0 Mon Sep 17 00:00:00 2001
From: Moshe Benji <Moshe.Benji@intel.com>
Date: Sun, 14 Dec 2014 11:05:50 +0200
Subject: mac80211: handle power constraint and country IEs in RRM

In beacons, handle the Country IE even if no Power Constraint IE
is present, and, capability wise, also in case that the Radio
Measurements capability is enabled.

In cases where the Country IE should be handled and that the
Power Constraint IE is not present, the Country IE alone will
set the power limit (and not both Country and Power Constraint
IEs).

Signed-off-by: Moshe Benji <moshe.benji@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 877b647d3e5c..38129dc4e7ca 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1287,8 +1287,11 @@ ieee80211_find_80211h_pwr_constr(struct ieee80211_sub_if_data *sdata,
 		country_ie_len -= 3;
 	}
 
-	if (have_chan_pwr)
+	if (have_chan_pwr && pwr_constr_elem)
 		*pwr_reduction = *pwr_constr_elem;
+	else
+		*pwr_reduction = 0;
+
 	return have_chan_pwr;
 }
 
@@ -1317,10 +1320,11 @@ static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata,
 	int chan_pwr = 0, pwr_reduction_80211h = 0;
 	int pwr_level_cisco, pwr_level_80211h;
 	int new_ap_level;
+	__le16 capab = mgmt->u.probe_resp.capab_info;
 
-	if (country_ie && pwr_constr_ie &&
-	    mgmt->u.probe_resp.capab_info &
-		cpu_to_le16(WLAN_CAPABILITY_SPECTRUM_MGMT)) {
+	if (country_ie &&
+	    (capab & cpu_to_le16(WLAN_CAPABILITY_SPECTRUM_MGMT) ||
+	     capab & cpu_to_le16(WLAN_CAPABILITY_RADIO_MEASURE))) {
 		has_80211h_pwr = ieee80211_find_80211h_pwr_constr(
 			sdata, channel, country_ie, country_ie_len,
 			pwr_constr_ie, &chan_pwr, &pwr_reduction_80211h);
-- 
cgit v1.2.3


From 1c45c5ce324fec967dca5993f79b54769da410dc Mon Sep 17 00:00:00 2001
From: Eliad Peller <eliad@wizery.com>
Date: Sun, 14 Dec 2014 11:05:51 +0200
Subject: mac80211: update sta bw on ht chanwidth action frame

Commit e1a0c6b ("mac80211: stop toggling IEEE80211_HT_CAP_SUP_WIDTH_20_40")
mistakenly removed the actual update of sta->sta.bandwidth.

Refactor ieee80211_sta_cur_vht_bw() into multiple functions
(calculate caps-bw and chandef-bw separately, and min them
with cur_max_bandwidth).

On ht chanwidth action frame set only cur_max_bandwidth
(according to the sta capabilities) and recalc the sta bw.

Signed-off-by: Eliad Peller <eliadx.peller@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ieee80211_i.h |  1 +
 net/mac80211/rx.c          | 11 +++++--
 net/mac80211/vht.c         | 73 ++++++++++++++++++++++++----------------------
 3 files changed, 47 insertions(+), 38 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index cc6e964d9837..4f45cab8b7f1 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1704,6 +1704,7 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
 				    struct ieee80211_supported_band *sband,
 				    const struct ieee80211_vht_cap *vht_cap_ie,
 				    struct sta_info *sta);
+enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta);
 enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta);
 void ieee80211_sta_set_rx_nss(struct sta_info *sta);
 u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 49c23bdf08bb..444ebff955c1 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2597,7 +2597,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 		case WLAN_HT_ACTION_NOTIFY_CHANWIDTH: {
 			struct ieee80211_supported_band *sband;
 			u8 chanwidth = mgmt->u.action.u.ht_notify_cw.chanwidth;
-			enum ieee80211_sta_rx_bandwidth new_bw;
+			enum ieee80211_sta_rx_bandwidth max_bw, new_bw;
 
 			/* If it doesn't support 40 MHz it can't change ... */
 			if (!(rx->sta->sta.ht_cap.cap &
@@ -2605,13 +2605,18 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 				goto handled;
 
 			if (chanwidth == IEEE80211_HT_CHANWIDTH_20MHZ)
-				new_bw = IEEE80211_STA_RX_BW_20;
+				max_bw = IEEE80211_STA_RX_BW_20;
 			else
-				new_bw = ieee80211_sta_cur_vht_bw(rx->sta);
+				max_bw = ieee80211_sta_cap_rx_bw(rx->sta);
+
+			/* set cur_max_bandwidth and recalc sta bw */
+			rx->sta->cur_max_bandwidth = max_bw;
+			new_bw = ieee80211_sta_cur_vht_bw(rx->sta);
 
 			if (rx->sta->sta.bandwidth == new_bw)
 				goto handled;
 
+			rx->sta->sta.bandwidth = new_bw;
 			sband = rx->local->hw.wiphy->bands[status->band];
 
 			rate_control_rate_update(local, sband, rx->sta,
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index bc9e8fc48785..85f9596da07b 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -269,51 +269,54 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
 	sta->sta.bandwidth = ieee80211_sta_cur_vht_bw(sta);
 }
 
-enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta)
+enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta)
 {
-	struct ieee80211_sub_if_data *sdata = sta->sdata;
-	u32 cap = sta->sta.vht_cap.cap;
-	enum ieee80211_sta_rx_bandwidth bw;
+	struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap;
+	u32 cap_width;
 
-	if (!sta->sta.vht_cap.vht_supported) {
-		bw = sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
-				IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20;
-		goto check_max;
-	}
+	if (!vht_cap->vht_supported)
+		return sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
+				IEEE80211_STA_RX_BW_40 :
+				IEEE80211_STA_RX_BW_20;
 
-	switch (sdata->vif.bss_conf.chandef.width) {
-	default:
-		WARN_ON_ONCE(1);
-		/* fall through */
+	cap_width = vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK;
+
+	if (cap_width == IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ ||
+	    cap_width == IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ)
+		return IEEE80211_STA_RX_BW_160;
+
+	return IEEE80211_STA_RX_BW_80;
+}
+
+static enum ieee80211_sta_rx_bandwidth
+ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width)
+{
+	switch (width) {
 	case NL80211_CHAN_WIDTH_20_NOHT:
 	case NL80211_CHAN_WIDTH_20:
-		bw = IEEE80211_STA_RX_BW_20;
-		break;
+		return IEEE80211_STA_RX_BW_20;
 	case NL80211_CHAN_WIDTH_40:
-		bw = sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
-				IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20;
-		break;
+		return IEEE80211_STA_RX_BW_40;
+	case NL80211_CHAN_WIDTH_80:
+		return IEEE80211_STA_RX_BW_80;
 	case NL80211_CHAN_WIDTH_160:
-		if ((cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) ==
-				IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ) {
-			bw = IEEE80211_STA_RX_BW_160;
-			break;
-		}
-		/* fall through */
 	case NL80211_CHAN_WIDTH_80P80:
-		if ((cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) ==
-				IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) {
-			bw = IEEE80211_STA_RX_BW_160;
-			break;
-		}
-		/* fall through */
-	case NL80211_CHAN_WIDTH_80:
-		bw = IEEE80211_STA_RX_BW_80;
+		return IEEE80211_STA_RX_BW_160;
+	default:
+		WARN_ON_ONCE(1);
+		return IEEE80211_STA_RX_BW_20;
 	}
+}
+
+enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta)
+{
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	enum ieee80211_sta_rx_bandwidth bw;
+
+	bw = ieee80211_chan_width_to_rx_bw(sdata->vif.bss_conf.chandef.width);
+	bw = min(bw, ieee80211_sta_cap_rx_bw(sta));
+	bw = min(bw, sta->cur_max_bandwidth);
 
- check_max:
-	if (bw > sta->cur_max_bandwidth)
-		bw = sta->cur_max_bandwidth;
 	return bw;
 }
 
-- 
cgit v1.2.3


From 1a952c94b544cbf01291186b8c6ca25fbb5873f2 Mon Sep 17 00:00:00 2001
From: Luciano Coelho <luciano.coelho@intel.com>
Date: Sun, 14 Dec 2014 11:05:52 +0200
Subject: mac80211: remove unused variable in ieee80211_parse_ch_switch_ie()

The ht_oper variable is assigned a value, but never used in
ieee80211_parse_ch_switch_ie().  Remove it.

Signed-off-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/spectmgmt.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c
index efeba56c913b..06e6ac8cc693 100644
--- a/net/mac80211/spectmgmt.c
+++ b/net/mac80211/spectmgmt.c
@@ -34,19 +34,15 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
 	struct cfg80211_chan_def new_vht_chandef = {};
 	const struct ieee80211_sec_chan_offs_ie *sec_chan_offs;
 	const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie;
-	const struct ieee80211_ht_operation *ht_oper;
 	int secondary_channel_offset = -1;
 
 	sec_chan_offs = elems->sec_chan_offs;
 	wide_bw_chansw_ie = elems->wide_bw_chansw_ie;
-	ht_oper = elems->ht_operation;
 
 	if (sta_flags & (IEEE80211_STA_DISABLE_HT |
 			 IEEE80211_STA_DISABLE_40MHZ)) {
 		sec_chan_offs = NULL;
 		wide_bw_chansw_ie = NULL;
-		/* only used for bandwidth here */
-		ht_oper = NULL;
 	}
 
 	if (sta_flags & IEEE80211_STA_DISABLE_VHT)
-- 
cgit v1.2.3


From 0f8b82456178d558f14011e06ebf9af937c4b197 Mon Sep 17 00:00:00 2001
From: Eliad Peller <eliad@wizery.com>
Date: Sun, 14 Dec 2014 11:05:53 +0200
Subject: mac80211: avoid reconfig if no interfaces are up

If there are no interfaces up, there is no reason
to continue the reconfig flow.

The current code might end up calling driver
callbacks (e.g. resume(), reconfig_complete())
while the driver is already stopped.

Signed-off-by: Eliad Peller <eliadx.peller@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/util.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 974ebe70f5b0..0f9bf479952e 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1735,6 +1735,10 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 	struct cfg80211_sched_scan_request *sched_scan_req;
 	bool sched_scan_stopped = false;
 
+	/* nothing to do if HW shouldn't run */
+	if (!local->open_count)
+		goto wake_up;
+
 #ifdef CONFIG_PM
 	if (local->suspended)
 		local->resuming = true;
@@ -1756,9 +1760,6 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 		reconfig_due_to_wowlan = true;
 	}
 #endif
-	/* everything else happens only if HW was up & running */
-	if (!local->open_count)
-		goto wake_up;
 
 	/*
 	 * Upon resume hardware can sometimes be goofy due to
@@ -2042,7 +2043,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 	 * If this is for hw restart things are still running.
 	 * We may want to change that later, however.
 	 */
-	if (!local->suspended || reconfig_due_to_wowlan)
+	if (local->open_count && (!local->suspended || reconfig_due_to_wowlan))
 		drv_reconfig_complete(local, IEEE80211_RECONFIG_TYPE_RESTART);
 
 	if (!local->suspended)
@@ -2054,7 +2055,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 	mb();
 	local->resuming = false;
 
-	if (!reconfig_due_to_wowlan)
+	if (local->open_count && !reconfig_due_to_wowlan)
 		drv_reconfig_complete(local, IEEE80211_RECONFIG_TYPE_SUSPEND);
 
 	list_for_each_entry(sdata, &local->interfaces, list) {
-- 
cgit v1.2.3


From 31a60ed1e95ab8afbadb65599bef12b195080a0c Mon Sep 17 00:00:00 2001
From: Jukka Rissanen <jukka.rissanen@linux.intel.com>
Date: Mon, 15 Dec 2014 13:25:38 +0200
Subject: nl80211: Convert sched_scan_req pointer to RCU pointer

Because of possible races when accessing sched_scan_req pointer in
rdev, the sched_scan_req is converted to RCU pointer.

Signed-off-by: Jukka Rissanen <jukka.rissanen@linux.intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h |  2 ++
 net/wireless/core.c    | 10 +++++++---
 net/wireless/core.h    |  2 +-
 net/wireless/nl80211.c | 19 +++++++++++--------
 net/wireless/scan.c    | 13 ++++++++-----
 5 files changed, 29 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 4bc1fc9971a5..45d4d7292e53 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1516,6 +1516,7 @@ struct cfg80211_match_set {
  * @mac_addr_mask: MAC address mask used with randomisation, bits that
  *	are 0 in the mask should be randomised, bits that are 1 should
  *	be taken from the @mac_addr
+ * @rcu_head: RCU callback used to free the struct
  */
 struct cfg80211_sched_scan_request {
 	struct cfg80211_ssid *ssids;
@@ -1537,6 +1538,7 @@ struct cfg80211_sched_scan_request {
 	struct wiphy *wiphy;
 	struct net_device *dev;
 	unsigned long scan_start;
+	struct rcu_head rcu_head;
 
 	/* keep last */
 	struct ieee80211_channel *channels[0];
diff --git a/net/wireless/core.c b/net/wireless/core.c
index b661fcce7e3c..0743449405ca 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -867,6 +867,7 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev,
 		      struct wireless_dev *wdev)
 {
 	struct net_device *dev = wdev->netdev;
+	struct cfg80211_sched_scan_request *sched_scan_req;
 
 	ASSERT_RTNL();
 	ASSERT_WDEV_LOCK(wdev);
@@ -877,7 +878,8 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev,
 		break;
 	case NL80211_IFTYPE_P2P_CLIENT:
 	case NL80211_IFTYPE_STATION:
-		if (rdev->sched_scan_req && dev == rdev->sched_scan_req->dev)
+		sched_scan_req = rtnl_dereference(rdev->sched_scan_req);
+		if (sched_scan_req && dev == sched_scan_req->dev)
 			__cfg80211_stop_sched_scan(rdev, false);
 
 #ifdef CONFIG_CFG80211_WEXT
@@ -956,6 +958,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_registered_device *rdev;
+	struct cfg80211_sched_scan_request *sched_scan_req;
 
 	if (!wdev)
 		return NOTIFY_DONE;
@@ -1021,8 +1024,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
 			___cfg80211_scan_done(rdev, false);
 		}
 
-		if (WARN_ON(rdev->sched_scan_req &&
-			    rdev->sched_scan_req->dev == wdev->netdev)) {
+		sched_scan_req = rtnl_dereference(rdev->sched_scan_req);
+		if (WARN_ON(sched_scan_req &&
+			    sched_scan_req->dev == wdev->netdev)) {
 			__cfg80211_stop_sched_scan(rdev, false);
 		}
 
diff --git a/net/wireless/core.h b/net/wireless/core.h
index e87cae57a83b..e82030c32311 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -70,7 +70,7 @@ struct cfg80211_registered_device {
 	u32 bss_generation;
 	struct cfg80211_scan_request *scan_req; /* protected by RTNL */
 	struct sk_buff *scan_msg;
-	struct cfg80211_sched_scan_request *sched_scan_req;
+	struct cfg80211_sched_scan_request __rcu *sched_scan_req;
 	unsigned long suspend_at;
 	struct work_struct scan_done_wk;
 	struct work_struct sched_scan_results_wk;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 5b1907f4c181..bacdf22fa472 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6190,6 +6190,7 @@ static int nl80211_start_sched_scan(struct sk_buff *skb,
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
 	struct net_device *dev = info->user_ptr[1];
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_sched_scan_request *sched_scan_req;
 	int err;
 
 	if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) ||
@@ -6199,27 +6200,29 @@ static int nl80211_start_sched_scan(struct sk_buff *skb,
 	if (rdev->sched_scan_req)
 		return -EINPROGRESS;
 
-	rdev->sched_scan_req = nl80211_parse_sched_scan(&rdev->wiphy, wdev,
-							info->attrs);
-	err = PTR_ERR_OR_ZERO(rdev->sched_scan_req);
+	sched_scan_req = nl80211_parse_sched_scan(&rdev->wiphy, wdev,
+						  info->attrs);
+
+	err = PTR_ERR_OR_ZERO(sched_scan_req);
 	if (err)
 		goto out_err;
 
-	err = rdev_sched_scan_start(rdev, dev, rdev->sched_scan_req);
+	err = rdev_sched_scan_start(rdev, dev, sched_scan_req);
 	if (err)
 		goto out_free;
 
-	rdev->sched_scan_req->dev = dev;
-	rdev->sched_scan_req->wiphy = &rdev->wiphy;
+	sched_scan_req->dev = dev;
+	sched_scan_req->wiphy = &rdev->wiphy;
+
+	rcu_assign_pointer(rdev->sched_scan_req, sched_scan_req);
 
 	nl80211_send_sched_scan(rdev, dev,
 				NL80211_CMD_START_SCHED_SCAN);
 	return 0;
 
 out_free:
-	kfree(rdev->sched_scan_req);
+	kfree(sched_scan_req);
 out_err:
-	rdev->sched_scan_req = NULL;
 	return err;
 }
 
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index bda39f149810..c705c3e2b751 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -257,7 +257,7 @@ void __cfg80211_sched_scan_results(struct work_struct *wk)
 
 	rtnl_lock();
 
-	request = rdev->sched_scan_req;
+	request = rtnl_dereference(rdev->sched_scan_req);
 
 	/* we don't have sched_scan_req anymore if the scan is stopping */
 	if (request) {
@@ -279,7 +279,8 @@ void cfg80211_sched_scan_results(struct wiphy *wiphy)
 {
 	trace_cfg80211_sched_scan_results(wiphy);
 	/* ignore if we're not scanning */
-	if (wiphy_to_rdev(wiphy)->sched_scan_req)
+
+	if (rcu_access_pointer(wiphy_to_rdev(wiphy)->sched_scan_req))
 		queue_work(cfg80211_wq,
 			   &wiphy_to_rdev(wiphy)->sched_scan_results_wk);
 }
@@ -308,6 +309,7 @@ EXPORT_SYMBOL(cfg80211_sched_scan_stopped);
 int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev,
 			       bool driver_initiated)
 {
+	struct cfg80211_sched_scan_request *sched_scan_req;
 	struct net_device *dev;
 
 	ASSERT_RTNL();
@@ -315,7 +317,8 @@ int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev,
 	if (!rdev->sched_scan_req)
 		return -ENOENT;
 
-	dev = rdev->sched_scan_req->dev;
+	sched_scan_req = rtnl_dereference(rdev->sched_scan_req);
+	dev = sched_scan_req->dev;
 
 	if (!driver_initiated) {
 		int err = rdev_sched_scan_stop(rdev, dev);
@@ -325,8 +328,8 @@ int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev,
 
 	nl80211_send_sched_scan(rdev, dev, NL80211_CMD_SCHED_SCAN_STOPPED);
 
-	kfree(rdev->sched_scan_req);
-	rdev->sched_scan_req = NULL;
+	RCU_INIT_POINTER(rdev->sched_scan_req, NULL);
+	kfree_rcu(sched_scan_req, rcu_head);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 93a1e86ce10e4898f9ca9cd09d659a8a7780ee5e Mon Sep 17 00:00:00 2001
From: Jukka Rissanen <jukka.rissanen@linux.intel.com>
Date: Mon, 15 Dec 2014 13:25:39 +0200
Subject: nl80211: Stop scheduled scan if netlink client disappears

An attribute NL80211_ATTR_SOCKET_OWNER can be set by the scan initiator.
If present, the attribute will cause the scan to be stopped if the client
dies.

Signed-off-by: Jukka Rissanen <jukka.rissanen@linux.intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  3 +++
 include/uapi/linux/nl80211.h |  3 +++
 net/wireless/core.c          | 16 ++++++++++++++++
 net/wireless/core.h          |  2 ++
 net/wireless/nl80211.c       | 16 ++++++++++++++++
 5 files changed, 40 insertions(+)

(limited to 'net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 45d4d7292e53..bd672ea08c9a 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1517,6 +1517,8 @@ struct cfg80211_match_set {
  *	are 0 in the mask should be randomised, bits that are 1 should
  *	be taken from the @mac_addr
  * @rcu_head: RCU callback used to free the struct
+ * @owner_nlportid: netlink portid of owner (if this should is a request
+ *	owned by a particular socket)
  */
 struct cfg80211_sched_scan_request {
 	struct cfg80211_ssid *ssids;
@@ -1539,6 +1541,7 @@ struct cfg80211_sched_scan_request {
 	struct net_device *dev;
 	unsigned long scan_start;
 	struct rcu_head rcu_head;
+	u32 owner_nlportid;
 
 	/* keep last */
 	struct ieee80211_channel *channels[0];
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index b3ada0b3a276..c0383e983544 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1672,6 +1672,9 @@ enum nl80211_commands {
  * @NL80211_ATTR_SOCKET_OWNER: Flag attribute, if set during interface
  *	creation then the new interface will be owned by the netlink socket
  *	that created it and will be destroyed when the socket is closed.
+ *	If set during scheduled scan start then the new scan req will be
+ *	owned by the netlink socket that created it and the scheduled scan will
+ *	be stopped when the socket is closed.
  *
  * @NL80211_ATTR_TDLS_INITIATOR: flag attribute indicating the current end is
  *	the TDLS link initiator.
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 0743449405ca..456e4c38c279 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -321,6 +321,20 @@ static void cfg80211_destroy_iface_wk(struct work_struct *work)
 	rtnl_unlock();
 }
 
+static void cfg80211_sched_scan_stop_wk(struct work_struct *work)
+{
+	struct cfg80211_registered_device *rdev;
+
+	rdev = container_of(work, struct cfg80211_registered_device,
+			   sched_scan_stop_wk);
+
+	rtnl_lock();
+
+	__cfg80211_stop_sched_scan(rdev, false);
+
+	rtnl_unlock();
+}
+
 /* exported functions */
 
 struct wiphy *wiphy_new_nm(const struct cfg80211_ops *ops, int sizeof_priv,
@@ -407,6 +421,7 @@ use_default_name:
 	INIT_LIST_HEAD(&rdev->destroy_list);
 	spin_lock_init(&rdev->destroy_list_lock);
 	INIT_WORK(&rdev->destroy_work, cfg80211_destroy_iface_wk);
+	INIT_WORK(&rdev->sched_scan_stop_wk, cfg80211_sched_scan_stop_wk);
 
 #ifdef CONFIG_CFG80211_DEFAULT_PS
 	rdev->wiphy.flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT;
@@ -787,6 +802,7 @@ void wiphy_unregister(struct wiphy *wiphy)
 	flush_work(&rdev->event_work);
 	cancel_delayed_work_sync(&rdev->dfs_update_channels_wk);
 	flush_work(&rdev->destroy_work);
+	flush_work(&rdev->sched_scan_stop_wk);
 
 #ifdef CONFIG_PM
 	if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup)
diff --git a/net/wireless/core.h b/net/wireless/core.h
index e82030c32311..801cd49c5a0c 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -91,6 +91,8 @@ struct cfg80211_registered_device {
 	struct list_head destroy_list;
 	struct work_struct destroy_work;
 
+	struct work_struct sched_scan_stop_wk;
+
 	/* must be last because of the way we do wiphy_priv(),
 	 * and it should at least be aligned to NETDEV_ALIGN */
 	struct wiphy wiphy __aligned(NETDEV_ALIGN);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index bacdf22fa472..702920134b34 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6214,6 +6214,9 @@ static int nl80211_start_sched_scan(struct sk_buff *skb,
 	sched_scan_req->dev = dev;
 	sched_scan_req->wiphy = &rdev->wiphy;
 
+	if (info->attrs[NL80211_ATTR_SOCKET_OWNER])
+		sched_scan_req->owner_nlportid = info->snd_portid;
+
 	rcu_assign_pointer(rdev->sched_scan_req, sched_scan_req);
 
 	nl80211_send_sched_scan(rdev, dev,
@@ -12618,6 +12621,13 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
 
 	list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) {
 		bool schedule_destroy_work = false;
+		bool schedule_scan_stop = false;
+		struct cfg80211_sched_scan_request *sched_scan_req =
+			rcu_dereference(rdev->sched_scan_req);
+
+		if (sched_scan_req && notify->portid &&
+		    sched_scan_req->owner_nlportid == notify->portid)
+			schedule_scan_stop = true;
 
 		list_for_each_entry_rcu(wdev, &rdev->wdev_list, list) {
 			cfg80211_mlme_unregister_socket(wdev, notify->portid);
@@ -12648,6 +12658,12 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
 				spin_unlock(&rdev->destroy_list_lock);
 				schedule_work(&rdev->destroy_work);
 			}
+		} else if (schedule_scan_stop) {
+			sched_scan_req->owner_nlportid = 0;
+
+			if (rdev->ops->sched_scan_stop &&
+			    rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
+				schedule_work(&rdev->sched_scan_stop_wk);
 		}
 	}
 
-- 
cgit v1.2.3


From 7fe9a3882bb37195c41ab125a0f2852398d2646a Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Wed, 10 Dec 2014 15:33:12 +0100
Subject: ieee802154: rework cca setting

The current cca setting handle is a driver specific call. We need to
introduce some 802.15.4 specific layer and mapping 802.15.4 cca modes to
driver specific ones inside the 802.15.4 driver. This patch will add
such 802.15.4 layer and mapping the cca settings to driver specific ones.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/net/ieee802154/at86rf230.c | 30 ++++++++++++++++++++++++++++--
 include/net/cfg802154.h            |  7 ++++++-
 include/net/ieee802154_netdev.h    |  4 +++-
 include/net/mac802154.h            |  5 ++++-
 net/ieee802154/nl-mac.c            |  4 ++--
 net/ieee802154/nl802154.c          |  2 +-
 net/ieee802154/sysfs.c             |  2 +-
 net/mac802154/driver-ops.h         |  5 +++--
 net/mac802154/mac_cmd.c            |  6 +++---
 9 files changed, 51 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c
index 1c0135620c62..1ac46ba41fd8 100644
--- a/drivers/net/ieee802154/at86rf230.c
+++ b/drivers/net/ieee802154/at86rf230.c
@@ -1146,11 +1146,37 @@ at86rf230_set_lbt(struct ieee802154_hw *hw, bool on)
 }
 
 static int
-at86rf230_set_cca_mode(struct ieee802154_hw *hw, u8 mode)
+at86rf230_set_cca_mode(struct ieee802154_hw *hw,
+		       const struct wpan_phy_cca *cca)
 {
 	struct at86rf230_local *lp = hw->priv;
+	u8 val;
 
-	return at86rf230_write_subreg(lp, SR_CCA_MODE, mode);
+	/* mapping 802.15.4 to driver spec */
+	switch (cca->mode) {
+	case NL802154_CCA_ENERGY:
+		val = 1;
+		break;
+	case NL802154_CCA_CARRIER:
+		val = 2;
+		break;
+	case NL802154_CCA_ENERGY_CARRIER:
+		switch (cca->opt) {
+		case NL802154_CCA_OPT_ENERGY_CARRIER_AND:
+			val = 3;
+			break;
+		case NL802154_CCA_OPT_ENERGY_CARRIER_OR:
+			val = 0;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return at86rf230_write_subreg(lp, SR_CCA_MODE, val);
 }
 
 static int
diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h
index 7f713acfa106..6ee2618ac78a 100644
--- a/include/net/cfg802154.h
+++ b/include/net/cfg802154.h
@@ -56,6 +56,11 @@ struct cfg802154_ops {
 				struct wpan_dev *wpan_dev, bool mode);
 };
 
+struct wpan_phy_cca {
+	enum nl802154_cca_modes mode;
+	enum nl802154_cca_opts opt;
+};
+
 struct wpan_phy {
 	struct mutex pib_lock;
 
@@ -76,7 +81,7 @@ struct wpan_phy {
 	u8 current_page;
 	u32 channels_supported[IEEE802154_MAX_PAGE + 1];
 	s8 transmit_power;
-	u8 cca_mode;
+	struct wpan_phy_cca cca;
 
 	__le64 perm_extended_addr;
 
diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h
index 83bb8a73d23c..94a297052442 100644
--- a/include/net/ieee802154_netdev.h
+++ b/include/net/ieee802154_netdev.h
@@ -28,6 +28,8 @@
 #include <linux/skbuff.h>
 #include <linux/ieee802154.h>
 
+#include <net/cfg802154.h>
+
 struct ieee802154_sechdr {
 #if defined(__LITTLE_ENDIAN_BITFIELD)
 	u8 level:3,
@@ -337,7 +339,7 @@ struct ieee802154_mac_params {
 	s8 frame_retries;
 
 	bool lbt;
-	u8 cca_mode;
+	struct wpan_phy_cca cca;
 	s32 cca_ed_level;
 };
 
diff --git a/include/net/mac802154.h b/include/net/mac802154.h
index c823d910b46c..850647811749 100644
--- a/include/net/mac802154.h
+++ b/include/net/mac802154.h
@@ -20,6 +20,8 @@
 #include <linux/ieee802154.h>
 #include <linux/skbuff.h>
 
+#include <net/cfg802154.h>
+
 /* General MAC frame format:
  *  2 bytes: Frame Control
  *  1 byte:  Sequence Number
@@ -212,7 +214,8 @@ struct ieee802154_ops {
 					    unsigned long changed);
 	int		(*set_txpower)(struct ieee802154_hw *hw, int db);
 	int		(*set_lbt)(struct ieee802154_hw *hw, bool on);
-	int		(*set_cca_mode)(struct ieee802154_hw *hw, u8 mode);
+	int		(*set_cca_mode)(struct ieee802154_hw *hw,
+					const struct wpan_phy_cca *cca);
 	int		(*set_cca_ed_level)(struct ieee802154_hw *hw,
 					    s32 level);
 	int		(*set_csma_params)(struct ieee802154_hw *hw,
diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c
index cd919493c976..3c902e9516fb 100644
--- a/net/ieee802154/nl-mac.c
+++ b/net/ieee802154/nl-mac.c
@@ -121,7 +121,7 @@ static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid,
 			       params.transmit_power) ||
 		    nla_put_u8(msg, IEEE802154_ATTR_LBT_ENABLED, params.lbt) ||
 		    nla_put_u8(msg, IEEE802154_ATTR_CCA_MODE,
-			       params.cca_mode) ||
+			       params.cca.mode) ||
 		    nla_put_s32(msg, IEEE802154_ATTR_CCA_ED_LEVEL,
 				params.cca_ed_level) ||
 		    nla_put_u8(msg, IEEE802154_ATTR_CSMA_RETRIES,
@@ -516,7 +516,7 @@ int ieee802154_set_macparams(struct sk_buff *skb, struct genl_info *info)
 		params.lbt = nla_get_u8(info->attrs[IEEE802154_ATTR_LBT_ENABLED]);
 
 	if (info->attrs[IEEE802154_ATTR_CCA_MODE])
-		params.cca_mode = nla_get_u8(info->attrs[IEEE802154_ATTR_CCA_MODE]);
+		params.cca.mode = nla_get_u8(info->attrs[IEEE802154_ATTR_CCA_MODE]);
 
 	if (info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL])
 		params.cca_ed_level = nla_get_s32(info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]);
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 889647744697..1efbe4250024 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -291,7 +291,7 @@ static int nl802154_send_wpan_phy(struct cfg802154_registered_device *rdev,
 
 	/* cca mode */
 	if (nla_put_u8(msg, NL802154_ATTR_CCA_MODE,
-		       rdev->wpan_phy.cca_mode))
+		       rdev->wpan_phy.cca.mode))
 		goto nla_put_failure;
 
 	if (nla_put_s8(msg, NL802154_ATTR_TX_POWER,
diff --git a/net/ieee802154/sysfs.c b/net/ieee802154/sysfs.c
index 1613b9c65dfa..dff55c2d87f3 100644
--- a/net/ieee802154/sysfs.c
+++ b/net/ieee802154/sysfs.c
@@ -68,7 +68,7 @@ static DEVICE_ATTR_RO(name)
 MASTER_SHOW(current_channel, "%d");
 MASTER_SHOW(current_page, "%d");
 MASTER_SHOW(transmit_power, "%d +- 1 dB");
-MASTER_SHOW(cca_mode, "%d");
+MASTER_SHOW_COMPLEX(cca_mode, "%d", phy->cca.mode);
 
 static ssize_t channels_supported_show(struct device *dev,
 				       struct device_attribute *attr,
diff --git a/net/mac802154/driver-ops.h b/net/mac802154/driver-ops.h
index f21e864613d0..98180a9fff4a 100644
--- a/net/mac802154/driver-ops.h
+++ b/net/mac802154/driver-ops.h
@@ -70,7 +70,8 @@ static inline int drv_set_tx_power(struct ieee802154_local *local, s8 dbm)
 	return local->ops->set_txpower(&local->hw, dbm);
 }
 
-static inline int drv_set_cca_mode(struct ieee802154_local *local, u8 cca_mode)
+static inline int drv_set_cca_mode(struct ieee802154_local *local,
+				   const struct wpan_phy_cca *cca)
 {
 	might_sleep();
 
@@ -79,7 +80,7 @@ static inline int drv_set_cca_mode(struct ieee802154_local *local, u8 cca_mode)
 		return -EOPNOTSUPP;
 	}
 
-	return local->ops->set_cca_mode(&local->hw, cca_mode);
+	return local->ops->set_cca_mode(&local->hw, cca);
 }
 
 static inline int drv_set_lbt_mode(struct ieee802154_local *local, bool mode)
diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c
index 6aacb1816889..bdccb4ecd30f 100644
--- a/net/mac802154/mac_cmd.c
+++ b/net/mac802154/mac_cmd.c
@@ -81,7 +81,7 @@ static int mac802154_set_mac_params(struct net_device *dev,
 
 	/* PHY */
 	wpan_dev->wpan_phy->transmit_power = params->transmit_power;
-	wpan_dev->wpan_phy->cca_mode = params->cca_mode;
+	wpan_dev->wpan_phy->cca = params->cca;
 	wpan_dev->wpan_phy->cca_ed_level = params->cca_ed_level;
 
 	/* MAC */
@@ -98,7 +98,7 @@ static int mac802154_set_mac_params(struct net_device *dev,
 	}
 
 	if (local->hw.flags & IEEE802154_HW_CCA_MODE) {
-		ret = drv_set_cca_mode(local, params->cca_mode);
+		ret = drv_set_cca_mode(local, &params->cca);
 		if (ret < 0)
 			return ret;
 	}
@@ -122,7 +122,7 @@ static void mac802154_get_mac_params(struct net_device *dev,
 
 	/* PHY */
 	params->transmit_power = wpan_dev->wpan_phy->transmit_power;
-	params->cca_mode = wpan_dev->wpan_phy->cca_mode;
+	params->cca = wpan_dev->wpan_phy->cca;
 	params->cca_ed_level = wpan_dev->wpan_phy->cca_ed_level;
 
 	/* MAC */
-- 
cgit v1.2.3


From ba2a9506a76450568cbc0d51626d94cf8528c0c7 Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Wed, 10 Dec 2014 15:33:13 +0100
Subject: nl802154: introduce support for cca settings

This patch adds support for setting cca parameters via nl802154.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/cfg802154.h   |  3 +++
 include/net/nl802154.h    |  2 +-
 net/ieee802154/nl802154.c | 46 +++++++++++++++++++++++++++++++++++++++++++---
 net/ieee802154/rdev-ops.h |  7 +++++++
 net/mac802154/cfg.c       | 21 +++++++++++++++++++++
 5 files changed, 75 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h
index 6ee2618ac78a..eeda67652766 100644
--- a/include/net/cfg802154.h
+++ b/include/net/cfg802154.h
@@ -25,6 +25,7 @@
 #include <net/nl802154.h>
 
 struct wpan_phy;
+struct wpan_phy_cca;
 
 struct cfg802154_ops {
 	struct net_device * (*add_virtual_intf_deprecated)(struct wpan_phy *wpan_phy,
@@ -39,6 +40,8 @@ struct cfg802154_ops {
 	int	(*del_virtual_intf)(struct wpan_phy *wpan_phy,
 				    struct wpan_dev *wpan_dev);
 	int	(*set_channel)(struct wpan_phy *wpan_phy, u8 page, u8 channel);
+	int	(*set_cca_mode)(struct wpan_phy *wpan_phy,
+				const struct wpan_phy_cca *cca);
 	int	(*set_pan_id)(struct wpan_phy *wpan_phy,
 			      struct wpan_dev *wpan_dev, __le16 pan_id);
 	int	(*set_short_addr)(struct wpan_phy *wpan_phy,
diff --git a/include/net/nl802154.h b/include/net/nl802154.h
index 86c1b2f15b57..f8b5bc997959 100644
--- a/include/net/nl802154.h
+++ b/include/net/nl802154.h
@@ -82,7 +82,7 @@ enum nl802154_attrs {
 	NL802154_ATTR_TX_POWER,
 
 	NL802154_ATTR_CCA_MODE,
-	NL802154_ATTR_CCA_MODE3_AND,
+	NL802154_ATTR_CCA_OPT,
 	NL802154_ATTR_CCA_ED_LEVEL,
 
 	NL802154_ATTR_MAX_FRAME_RETRIES,
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 1efbe4250024..a25b9bbd077b 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -209,7 +209,8 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = {
 
 	[NL802154_ATTR_TX_POWER] = { .type = NLA_S8, },
 
-	[NL802154_ATTR_CCA_MODE] = { .type = NLA_U8, },
+	[NL802154_ATTR_CCA_MODE] = { .type = NLA_U32, },
+	[NL802154_ATTR_CCA_OPT] = { .type = NLA_U32, },
 
 	[NL802154_ATTR_SUPPORTED_CHANNEL] = { .type = NLA_U32, },
 
@@ -290,10 +291,16 @@ static int nl802154_send_wpan_phy(struct cfg802154_registered_device *rdev,
 		goto nla_put_failure;
 
 	/* cca mode */
-	if (nla_put_u8(msg, NL802154_ATTR_CCA_MODE,
-		       rdev->wpan_phy.cca.mode))
+	if (nla_put_u32(msg, NL802154_ATTR_CCA_MODE,
+			rdev->wpan_phy.cca.mode))
 		goto nla_put_failure;
 
+	if (rdev->wpan_phy.cca.mode == NL802154_CCA_ENERGY_CARRIER) {
+		if (nla_put_u32(msg, NL802154_ATTR_CCA_OPT,
+				rdev->wpan_phy.cca.opt))
+			goto nla_put_failure;
+	}
+
 	if (nla_put_s8(msg, NL802154_ATTR_TX_POWER,
 		       rdev->wpan_phy.transmit_power))
 		goto nla_put_failure;
@@ -622,6 +629,31 @@ static int nl802154_set_channel(struct sk_buff *skb, struct genl_info *info)
 	return rdev_set_channel(rdev, page, channel);
 }
 
+static int nl802154_set_cca_mode(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg802154_registered_device *rdev = info->user_ptr[0];
+	struct wpan_phy_cca cca;
+
+	if (!info->attrs[NL802154_ATTR_CCA_MODE])
+		return -EINVAL;
+
+	cca.mode = nla_get_u32(info->attrs[NL802154_ATTR_CCA_MODE]);
+	/* checking 802.15.4 constraints */
+	if (cca.mode < NL802154_CCA_ENERGY || cca.mode > NL802154_CCA_ATTR_MAX)
+		return -EINVAL;
+
+	if (cca.mode == NL802154_CCA_ENERGY_CARRIER) {
+		if (!info->attrs[NL802154_ATTR_CCA_OPT])
+			return -EINVAL;
+
+		cca.opt = nla_get_u32(info->attrs[NL802154_ATTR_CCA_OPT]);
+		if (cca.opt > NL802154_CCA_OPT_ATTR_MAX)
+			return -EINVAL;
+	}
+
+	return rdev_set_cca_mode(rdev, &cca);
+}
+
 static int nl802154_set_pan_id(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg802154_registered_device *rdev = info->user_ptr[0];
@@ -894,6 +926,14 @@ static const struct genl_ops nl802154_ops[] = {
 		.internal_flags = NL802154_FLAG_NEED_WPAN_PHY |
 				  NL802154_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL802154_CMD_SET_CCA_MODE,
+		.doit = nl802154_set_cca_mode,
+		.policy = nl802154_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL802154_FLAG_NEED_WPAN_PHY |
+				  NL802154_FLAG_NEED_RTNL,
+	},
 	{
 		.cmd = NL802154_CMD_SET_PAN_ID,
 		.doit = nl802154_set_pan_id,
diff --git a/net/ieee802154/rdev-ops.h b/net/ieee802154/rdev-ops.h
index aff54fbd9264..7c46732fad2b 100644
--- a/net/ieee802154/rdev-ops.h
+++ b/net/ieee802154/rdev-ops.h
@@ -41,6 +41,13 @@ rdev_set_channel(struct cfg802154_registered_device *rdev, u8 page, u8 channel)
 	return rdev->ops->set_channel(&rdev->wpan_phy, page, channel);
 }
 
+static inline int
+rdev_set_cca_mode(struct cfg802154_registered_device *rdev,
+		  const struct wpan_phy_cca *cca)
+{
+	return rdev->ops->set_cca_mode(&rdev->wpan_phy, cca);
+}
+
 static inline int
 rdev_set_pan_id(struct cfg802154_registered_device *rdev,
 		struct wpan_dev *wpan_dev, __le16 pan_id)
diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c
index c035708ada16..7d31da503dcf 100644
--- a/net/mac802154/cfg.c
+++ b/net/mac802154/cfg.c
@@ -86,6 +86,26 @@ ieee802154_set_channel(struct wpan_phy *wpan_phy, u8 page, u8 channel)
 	return ret;
 }
 
+static int
+ieee802154_set_cca_mode(struct wpan_phy *wpan_phy,
+			const struct wpan_phy_cca *cca)
+{
+	struct ieee802154_local *local = wpan_phy_priv(wpan_phy);
+	int ret;
+
+	ASSERT_RTNL();
+
+	/* check if phy support this setting */
+	if (!(local->hw.flags & IEEE802154_HW_CCA_MODE))
+		return -EOPNOTSUPP;
+
+	ret = drv_set_cca_mode(local, cca);
+	if (!ret)
+		wpan_phy->cca = *cca;
+
+	return ret;
+}
+
 static int
 ieee802154_set_pan_id(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
 		      __le16 pan_id)
@@ -201,6 +221,7 @@ const struct cfg802154_ops mac802154_config_ops = {
 	.add_virtual_intf = ieee802154_add_iface,
 	.del_virtual_intf = ieee802154_del_iface,
 	.set_channel = ieee802154_set_channel,
+	.set_cca_mode = ieee802154_set_cca_mode,
 	.set_pan_id = ieee802154_set_pan_id,
 	.set_short_addr = ieee802154_set_short_addr,
 	.set_backoff_exponent = ieee802154_set_backoff_exponent,
-- 
cgit v1.2.3


From 1d2dc5b7b32393bb2d818e0de82a66b1a654d329 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 19 Dec 2014 13:40:19 +0200
Subject: Bluetooth: Split hci_update_page_scan into two functions

To keep the parameter list and its semantics clear it makes sense to
split the hci_update_page_scan function into two separate functions: one
taking a hci_dev and another taking a hci_request. The one taking a
hci_dev constructs its own hci_request and then calls the other
function.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  3 ++-
 net/bluetooth/hci_core.c         | 17 ++++++++++++-----
 net/bluetooth/hci_event.c        |  4 ++--
 net/bluetooth/mgmt.c             | 16 ++++++++--------
 4 files changed, 24 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 3c7827005c25..664451a64d72 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1306,7 +1306,8 @@ bool hci_req_pending(struct hci_dev *hdev);
 void hci_req_add_le_scan_disable(struct hci_request *req);
 void hci_req_add_le_passive_scan(struct hci_request *req);
 
-void hci_update_page_scan(struct hci_dev *hdev, struct hci_request *req);
+void hci_update_page_scan(struct hci_dev *hdev);
+void __hci_update_page_scan(struct hci_request *req);
 
 struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen,
 			       const void *param, u32 timeout);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 5dcacf9607e4..3aa9015a8858 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -5787,8 +5787,9 @@ static bool disconnected_whitelist_entries(struct hci_dev *hdev)
 	return false;
 }
 
-void hci_update_page_scan(struct hci_dev *hdev, struct hci_request *req)
+void __hci_update_page_scan(struct hci_request *req)
 {
+	struct hci_dev *hdev = req->hdev;
 	u8 scan;
 
 	if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags))
@@ -5812,8 +5813,14 @@ void hci_update_page_scan(struct hci_dev *hdev, struct hci_request *req)
 	if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags))
 		scan |= SCAN_INQUIRY;
 
-	if (req)
-		hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
-	else
-		hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
+	hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
+}
+
+void hci_update_page_scan(struct hci_dev *hdev)
+{
+	struct hci_request req;
+
+	hci_req_init(&req, hdev);
+	__hci_update_page_scan(&req);
+	hci_req_run(&req, NULL);
 }
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 39a5c8a01726..65f4ec8945a4 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -2127,7 +2127,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 			hci_send_cmd(hdev, HCI_OP_READ_REMOTE_FEATURES,
 				     sizeof(cp), &cp);
 
-			hci_update_page_scan(hdev, NULL);
+			hci_update_page_scan(hdev);
 		}
 
 		/* Set packet type for incoming connection */
@@ -2308,7 +2308,7 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		if (test_bit(HCI_CONN_FLUSH_KEY, &conn->flags))
 			hci_remove_link_key(hdev, &conn->dst);
 
-		hci_update_page_scan(hdev, NULL);
+		hci_update_page_scan(hdev);
 	}
 
 	params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 693ce8bcd06e..f86f3ec684ba 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -1566,7 +1566,7 @@ static void set_discoverable_complete(struct hci_dev *hdev, u8 status)
 	 * entries.
 	 */
 	hci_req_init(&req, hdev);
-	hci_update_page_scan(hdev, &req);
+	__hci_update_page_scan(&req);
 	update_class(&req);
 	hci_req_run(&req, NULL);
 
@@ -1813,7 +1813,7 @@ static void set_connectable_complete(struct hci_dev *hdev, u8 status)
 
 	if (conn_changed || discov_changed) {
 		new_settings(hdev, cmd->sk);
-		hci_update_page_scan(hdev, NULL);
+		hci_update_page_scan(hdev);
 		if (discov_changed)
 			mgmt_update_adv_data(hdev);
 		hci_update_background_scan(hdev);
@@ -1847,7 +1847,7 @@ static int set_connectable_update_settings(struct hci_dev *hdev,
 		return err;
 
 	if (changed) {
-		hci_update_page_scan(hdev, NULL);
+		hci_update_page_scan(hdev);
 		hci_update_background_scan(hdev);
 		return new_settings(hdev, sk);
 	}
@@ -4697,7 +4697,7 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 	hci_req_init(&req, hdev);
 
 	write_fast_connectable(&req, false);
-	hci_update_page_scan(hdev, &req);
+	__hci_update_page_scan(&req);
 
 	/* Since only the advertising data flags will change, there
 	 * is no need to update the scan response data.
@@ -5473,7 +5473,7 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
 		if (err)
 			goto unlock;
 
-		hci_update_page_scan(hdev, NULL);
+		hci_update_page_scan(hdev);
 
 		goto added;
 	}
@@ -5556,7 +5556,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
 				goto unlock;
 			}
 
-			hci_update_page_scan(hdev, NULL);
+			hci_update_page_scan(hdev);
 
 			device_removed(sk, hdev, &cp->addr.bdaddr,
 				       cp->addr.type);
@@ -5607,7 +5607,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
 			kfree(b);
 		}
 
-		hci_update_page_scan(hdev, NULL);
+		hci_update_page_scan(hdev);
 
 		list_for_each_entry_safe(p, tmp, &hdev->le_conn_params, list) {
 			if (p->auto_connect == HCI_AUTO_CONN_DISABLED)
@@ -6139,7 +6139,7 @@ static int powered_update_hci(struct hci_dev *hdev)
 
 	if (lmp_bredr_capable(hdev)) {
 		write_fast_connectable(&req, false);
-		hci_update_page_scan(hdev, &req);
+		__hci_update_page_scan(&req);
 		update_class(&req);
 		update_name(&req);
 		update_eir(&req);
-- 
cgit v1.2.3


From 0857dd3bed62d8f905f61a399d1ed76464b5270f Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 19 Dec 2014 13:40:20 +0200
Subject: Bluetooth: Split hci_request helpers to hci_request.[ch]

None of the hci_request related things in net/bluetooth/hci_core.h are
needed anywhere outside of the core bluetooth module. This patch creates
a new net/bluetooth/hci_request.c file with its corresponding h-file and
moves the functionality there from hci_core.c and hci_core.h.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  25 ---
 net/bluetooth/Makefile           |   2 +-
 net/bluetooth/hci_conn.c         |   1 +
 net/bluetooth/hci_core.c         | 370 +-----------------------------------
 net/bluetooth/hci_event.c        |   1 +
 net/bluetooth/hci_request.c      | 397 +++++++++++++++++++++++++++++++++++++++
 net/bluetooth/hci_request.h      |  51 +++++
 net/bluetooth/mgmt.c             |   1 +
 8 files changed, 453 insertions(+), 395 deletions(-)
 create mode 100644 net/bluetooth/hci_request.c
 create mode 100644 net/bluetooth/hci_request.h

(limited to 'net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 664451a64d72..93066f70f8ab 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1284,31 +1284,8 @@ static inline int hci_check_conn_params(u16 min, u16 max, u16 latency,
 int hci_register_cb(struct hci_cb *hcb);
 int hci_unregister_cb(struct hci_cb *hcb);
 
-struct hci_request {
-	struct hci_dev		*hdev;
-	struct sk_buff_head	cmd_q;
-
-	/* If something goes wrong when building the HCI request, the error
-	 * value is stored in this field.
-	 */
-	int			err;
-};
-
-void hci_req_init(struct hci_request *req, struct hci_dev *hdev);
-int hci_req_run(struct hci_request *req, hci_req_complete_t complete);
-void hci_req_add(struct hci_request *req, u16 opcode, u32 plen,
-		 const void *param);
-void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
-		    const void *param, u8 event);
-void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status);
 bool hci_req_pending(struct hci_dev *hdev);
 
-void hci_req_add_le_scan_disable(struct hci_request *req);
-void hci_req_add_le_passive_scan(struct hci_request *req);
-
-void hci_update_page_scan(struct hci_dev *hdev);
-void __hci_update_page_scan(struct hci_request *req);
-
 struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen,
 			       const void *param, u32 timeout);
 struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen,
@@ -1418,8 +1395,6 @@ u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency,
 void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __le64 rand,
 							__u8 ltk[16]);
 
-int hci_update_random_address(struct hci_request *req, bool require_privacy,
-			      u8 *own_addr_type);
 void hci_copy_identity_address(struct hci_dev *hdev, bdaddr_t *bdaddr,
 			       u8 *bdaddr_type);
 
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index a5432a6a0ae6..0a176fc9e291 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -13,6 +13,6 @@ bluetooth_6lowpan-y := 6lowpan.o
 
 bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \
 	hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o sco.o lib.o \
-	a2mp.o amp.o ecc.o
+	a2mp.o amp.o ecc.o hci_request.o
 
 subdir-ccflags-y += -D__CHECK_ENDIAN__
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index fe18825cc8a4..4405fb352c70 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -30,6 +30,7 @@
 #include <net/bluetooth/hci_core.h>
 #include <net/bluetooth/l2cap.h>
 
+#include "hci_request.h"
 #include "smp.h"
 #include "a2mp.h"
 
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 3aa9015a8858..2cfaaa6acb04 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -37,6 +37,7 @@
 #include <net/bluetooth/l2cap.h>
 #include <net/bluetooth/mgmt.h>
 
+#include "hci_request.h"
 #include "smp.h"
 
 static void hci_rx_work(struct work_struct *work);
@@ -3901,112 +3902,6 @@ static void le_scan_disable_work(struct work_struct *work)
 		BT_ERR("Disable LE scanning request failed: err %d", err);
 }
 
-static void set_random_addr(struct hci_request *req, bdaddr_t *rpa)
-{
-	struct hci_dev *hdev = req->hdev;
-
-	/* If we're advertising or initiating an LE connection we can't
-	 * go ahead and change the random address at this time. This is
-	 * because the eventual initiator address used for the
-	 * subsequently created connection will be undefined (some
-	 * controllers use the new address and others the one we had
-	 * when the operation started).
-	 *
-	 * In this kind of scenario skip the update and let the random
-	 * address be updated at the next cycle.
-	 */
-	if (test_bit(HCI_LE_ADV, &hdev->dev_flags) ||
-	    hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT)) {
-		BT_DBG("Deferring random address update");
-		set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags);
-		return;
-	}
-
-	hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, rpa);
-}
-
-int hci_update_random_address(struct hci_request *req, bool require_privacy,
-			      u8 *own_addr_type)
-{
-	struct hci_dev *hdev = req->hdev;
-	int err;
-
-	/* If privacy is enabled use a resolvable private address. If
-	 * current RPA has expired or there is something else than
-	 * the current RPA in use, then generate a new one.
-	 */
-	if (test_bit(HCI_PRIVACY, &hdev->dev_flags)) {
-		int to;
-
-		*own_addr_type = ADDR_LE_DEV_RANDOM;
-
-		if (!test_and_clear_bit(HCI_RPA_EXPIRED, &hdev->dev_flags) &&
-		    !bacmp(&hdev->random_addr, &hdev->rpa))
-			return 0;
-
-		err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa);
-		if (err < 0) {
-			BT_ERR("%s failed to generate new RPA", hdev->name);
-			return err;
-		}
-
-		set_random_addr(req, &hdev->rpa);
-
-		to = msecs_to_jiffies(hdev->rpa_timeout * 1000);
-		queue_delayed_work(hdev->workqueue, &hdev->rpa_expired, to);
-
-		return 0;
-	}
-
-	/* In case of required privacy without resolvable private address,
-	 * use an non-resolvable private address. This is useful for active
-	 * scanning and non-connectable advertising.
-	 */
-	if (require_privacy) {
-		bdaddr_t nrpa;
-
-		while (true) {
-			/* The non-resolvable private address is generated
-			 * from random six bytes with the two most significant
-			 * bits cleared.
-			 */
-			get_random_bytes(&nrpa, 6);
-			nrpa.b[5] &= 0x3f;
-
-			/* The non-resolvable private address shall not be
-			 * equal to the public address.
-			 */
-			if (bacmp(&hdev->bdaddr, &nrpa))
-				break;
-		}
-
-		*own_addr_type = ADDR_LE_DEV_RANDOM;
-		set_random_addr(req, &nrpa);
-		return 0;
-	}
-
-	/* If forcing static address is in use or there is no public
-	 * address use the static address as random address (but skip
-	 * the HCI command if the current random address is already the
-	 * static one.
-	 */
-	if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) ||
-	    !bacmp(&hdev->bdaddr, BDADDR_ANY)) {
-		*own_addr_type = ADDR_LE_DEV_RANDOM;
-		if (bacmp(&hdev->static_addr, &hdev->random_addr))
-			hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6,
-				    &hdev->static_addr);
-		return 0;
-	}
-
-	/* Neither privacy nor static address is being used so use a
-	 * public address.
-	 */
-	*own_addr_type = ADDR_LE_DEV_PUBLIC;
-
-	return 0;
-}
-
 /* Copy the Identity Address of the controller.
  *
  * If the controller has a public BD_ADDR, then by default use that one.
@@ -4539,76 +4434,11 @@ static void hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
 	}
 }
 
-void hci_req_init(struct hci_request *req, struct hci_dev *hdev)
-{
-	skb_queue_head_init(&req->cmd_q);
-	req->hdev = hdev;
-	req->err = 0;
-}
-
-int hci_req_run(struct hci_request *req, hci_req_complete_t complete)
-{
-	struct hci_dev *hdev = req->hdev;
-	struct sk_buff *skb;
-	unsigned long flags;
-
-	BT_DBG("length %u", skb_queue_len(&req->cmd_q));
-
-	/* If an error occurred during request building, remove all HCI
-	 * commands queued on the HCI request queue.
-	 */
-	if (req->err) {
-		skb_queue_purge(&req->cmd_q);
-		return req->err;
-	}
-
-	/* Do not allow empty requests */
-	if (skb_queue_empty(&req->cmd_q))
-		return -ENODATA;
-
-	skb = skb_peek_tail(&req->cmd_q);
-	bt_cb(skb)->req.complete = complete;
-
-	spin_lock_irqsave(&hdev->cmd_q.lock, flags);
-	skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q);
-	spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);
-
-	queue_work(hdev->workqueue, &hdev->cmd_work);
-
-	return 0;
-}
-
 bool hci_req_pending(struct hci_dev *hdev)
 {
 	return (hdev->req_status == HCI_REQ_PEND);
 }
 
-static struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode,
-				       u32 plen, const void *param)
-{
-	int len = HCI_COMMAND_HDR_SIZE + plen;
-	struct hci_command_hdr *hdr;
-	struct sk_buff *skb;
-
-	skb = bt_skb_alloc(len, GFP_ATOMIC);
-	if (!skb)
-		return NULL;
-
-	hdr = (struct hci_command_hdr *) skb_put(skb, HCI_COMMAND_HDR_SIZE);
-	hdr->opcode = cpu_to_le16(opcode);
-	hdr->plen   = plen;
-
-	if (plen)
-		memcpy(skb_put(skb, plen), param, plen);
-
-	BT_DBG("skb len %d", skb->len);
-
-	bt_cb(skb)->pkt_type = HCI_COMMAND_PKT;
-	bt_cb(skb)->opcode = opcode;
-
-	return skb;
-}
-
 /* Send HCI command */
 int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen,
 		 const void *param)
@@ -4634,43 +4464,6 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen,
 	return 0;
 }
 
-/* Queue a command to an asynchronous HCI request */
-void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
-		    const void *param, u8 event)
-{
-	struct hci_dev *hdev = req->hdev;
-	struct sk_buff *skb;
-
-	BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen);
-
-	/* If an error occurred during request building, there is no point in
-	 * queueing the HCI command. We can simply return.
-	 */
-	if (req->err)
-		return;
-
-	skb = hci_prepare_cmd(hdev, opcode, plen, param);
-	if (!skb) {
-		BT_ERR("%s no memory for command (opcode 0x%4.4x)",
-		       hdev->name, opcode);
-		req->err = -ENOMEM;
-		return;
-	}
-
-	if (skb_queue_empty(&req->cmd_q))
-		bt_cb(skb)->req.start = true;
-
-	bt_cb(skb)->req.event = event;
-
-	skb_queue_tail(&req->cmd_q, skb);
-}
-
-void hci_req_add(struct hci_request *req, u16 opcode, u32 plen,
-		 const void *param)
-{
-	hci_req_add_ev(req, opcode, plen, param, 0);
-}
-
 /* Get data from the previously sent command */
 void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode)
 {
@@ -5519,167 +5312,6 @@ static void hci_cmd_work(struct work_struct *work)
 	}
 }
 
-void hci_req_add_le_scan_disable(struct hci_request *req)
-{
-	struct hci_cp_le_set_scan_enable cp;
-
-	memset(&cp, 0, sizeof(cp));
-	cp.enable = LE_SCAN_DISABLE;
-	hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
-}
-
-static void add_to_white_list(struct hci_request *req,
-			      struct hci_conn_params *params)
-{
-	struct hci_cp_le_add_to_white_list cp;
-
-	cp.bdaddr_type = params->addr_type;
-	bacpy(&cp.bdaddr, &params->addr);
-
-	hci_req_add(req, HCI_OP_LE_ADD_TO_WHITE_LIST, sizeof(cp), &cp);
-}
-
-static u8 update_white_list(struct hci_request *req)
-{
-	struct hci_dev *hdev = req->hdev;
-	struct hci_conn_params *params;
-	struct bdaddr_list *b;
-	uint8_t white_list_entries = 0;
-
-	/* Go through the current white list programmed into the
-	 * controller one by one and check if that address is still
-	 * in the list of pending connections or list of devices to
-	 * report. If not present in either list, then queue the
-	 * command to remove it from the controller.
-	 */
-	list_for_each_entry(b, &hdev->le_white_list, list) {
-		struct hci_cp_le_del_from_white_list cp;
-
-		if (hci_pend_le_action_lookup(&hdev->pend_le_conns,
-					      &b->bdaddr, b->bdaddr_type) ||
-		    hci_pend_le_action_lookup(&hdev->pend_le_reports,
-					      &b->bdaddr, b->bdaddr_type)) {
-			white_list_entries++;
-			continue;
-		}
-
-		cp.bdaddr_type = b->bdaddr_type;
-		bacpy(&cp.bdaddr, &b->bdaddr);
-
-		hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST,
-			    sizeof(cp), &cp);
-	}
-
-	/* Since all no longer valid white list entries have been
-	 * removed, walk through the list of pending connections
-	 * and ensure that any new device gets programmed into
-	 * the controller.
-	 *
-	 * If the list of the devices is larger than the list of
-	 * available white list entries in the controller, then
-	 * just abort and return filer policy value to not use the
-	 * white list.
-	 */
-	list_for_each_entry(params, &hdev->pend_le_conns, action) {
-		if (hci_bdaddr_list_lookup(&hdev->le_white_list,
-					   &params->addr, params->addr_type))
-			continue;
-
-		if (white_list_entries >= hdev->le_white_list_size) {
-			/* Select filter policy to accept all advertising */
-			return 0x00;
-		}
-
-		if (hci_find_irk_by_addr(hdev, &params->addr,
-					 params->addr_type)) {
-			/* White list can not be used with RPAs */
-			return 0x00;
-		}
-
-		white_list_entries++;
-		add_to_white_list(req, params);
-	}
-
-	/* After adding all new pending connections, walk through
-	 * the list of pending reports and also add these to the
-	 * white list if there is still space.
-	 */
-	list_for_each_entry(params, &hdev->pend_le_reports, action) {
-		if (hci_bdaddr_list_lookup(&hdev->le_white_list,
-					   &params->addr, params->addr_type))
-			continue;
-
-		if (white_list_entries >= hdev->le_white_list_size) {
-			/* Select filter policy to accept all advertising */
-			return 0x00;
-		}
-
-		if (hci_find_irk_by_addr(hdev, &params->addr,
-					 params->addr_type)) {
-			/* White list can not be used with RPAs */
-			return 0x00;
-		}
-
-		white_list_entries++;
-		add_to_white_list(req, params);
-	}
-
-	/* Select filter policy to use white list */
-	return 0x01;
-}
-
-void hci_req_add_le_passive_scan(struct hci_request *req)
-{
-	struct hci_cp_le_set_scan_param param_cp;
-	struct hci_cp_le_set_scan_enable enable_cp;
-	struct hci_dev *hdev = req->hdev;
-	u8 own_addr_type;
-	u8 filter_policy;
-
-	/* Set require_privacy to false since no SCAN_REQ are send
-	 * during passive scanning. Not using an non-resolvable address
-	 * here is important so that peer devices using direct
-	 * advertising with our address will be correctly reported
-	 * by the controller.
-	 */
-	if (hci_update_random_address(req, false, &own_addr_type))
-		return;
-
-	/* Adding or removing entries from the white list must
-	 * happen before enabling scanning. The controller does
-	 * not allow white list modification while scanning.
-	 */
-	filter_policy = update_white_list(req);
-
-	/* When the controller is using random resolvable addresses and
-	 * with that having LE privacy enabled, then controllers with
-	 * Extended Scanner Filter Policies support can now enable support
-	 * for handling directed advertising.
-	 *
-	 * So instead of using filter polices 0x00 (no whitelist)
-	 * and 0x01 (whitelist enabled) use the new filter policies
-	 * 0x02 (no whitelist) and 0x03 (whitelist enabled).
-	 */
-	if (test_bit(HCI_PRIVACY, &hdev->dev_flags) &&
-	    (hdev->le_features[0] & HCI_LE_EXT_SCAN_POLICY))
-		filter_policy |= 0x02;
-
-	memset(&param_cp, 0, sizeof(param_cp));
-	param_cp.type = LE_SCAN_PASSIVE;
-	param_cp.interval = cpu_to_le16(hdev->le_scan_interval);
-	param_cp.window = cpu_to_le16(hdev->le_scan_window);
-	param_cp.own_address_type = own_addr_type;
-	param_cp.filter_policy = filter_policy;
-	hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp),
-		    &param_cp);
-
-	memset(&enable_cp, 0, sizeof(enable_cp));
-	enable_cp.enable = LE_SCAN_ENABLE;
-	enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
-	hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp),
-		    &enable_cp);
-}
-
 static void update_background_scan_complete(struct hci_dev *hdev, u8 status)
 {
 	if (status)
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 65f4ec8945a4..a412eb1e1f61 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -30,6 +30,7 @@
 #include <net/bluetooth/hci_core.h>
 #include <net/bluetooth/mgmt.h>
 
+#include "hci_request.h"
 #include "a2mp.h"
 #include "amp.h"
 #include "smp.h"
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
new file mode 100644
index 000000000000..eba83a2a6556
--- /dev/null
+++ b/net/bluetooth/hci_request.c
@@ -0,0 +1,397 @@
+/*
+   BlueZ - Bluetooth protocol stack for Linux
+
+   Copyright (C) 2014 Intel Corporation
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
+   SOFTWARE IS DISCLAIMED.
+*/
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+
+#include "smp.h"
+#include "hci_request.h"
+
+void hci_req_init(struct hci_request *req, struct hci_dev *hdev)
+{
+	skb_queue_head_init(&req->cmd_q);
+	req->hdev = hdev;
+	req->err = 0;
+}
+
+int hci_req_run(struct hci_request *req, hci_req_complete_t complete)
+{
+	struct hci_dev *hdev = req->hdev;
+	struct sk_buff *skb;
+	unsigned long flags;
+
+	BT_DBG("length %u", skb_queue_len(&req->cmd_q));
+
+	/* If an error occurred during request building, remove all HCI
+	 * commands queued on the HCI request queue.
+	 */
+	if (req->err) {
+		skb_queue_purge(&req->cmd_q);
+		return req->err;
+	}
+
+	/* Do not allow empty requests */
+	if (skb_queue_empty(&req->cmd_q))
+		return -ENODATA;
+
+	skb = skb_peek_tail(&req->cmd_q);
+	bt_cb(skb)->req.complete = complete;
+
+	spin_lock_irqsave(&hdev->cmd_q.lock, flags);
+	skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q);
+	spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);
+
+	queue_work(hdev->workqueue, &hdev->cmd_work);
+
+	return 0;
+}
+
+struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen,
+				const void *param)
+{
+	int len = HCI_COMMAND_HDR_SIZE + plen;
+	struct hci_command_hdr *hdr;
+	struct sk_buff *skb;
+
+	skb = bt_skb_alloc(len, GFP_ATOMIC);
+	if (!skb)
+		return NULL;
+
+	hdr = (struct hci_command_hdr *) skb_put(skb, HCI_COMMAND_HDR_SIZE);
+	hdr->opcode = cpu_to_le16(opcode);
+	hdr->plen   = plen;
+
+	if (plen)
+		memcpy(skb_put(skb, plen), param, plen);
+
+	BT_DBG("skb len %d", skb->len);
+
+	bt_cb(skb)->pkt_type = HCI_COMMAND_PKT;
+	bt_cb(skb)->opcode = opcode;
+
+	return skb;
+}
+
+/* Queue a command to an asynchronous HCI request */
+void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
+		    const void *param, u8 event)
+{
+	struct hci_dev *hdev = req->hdev;
+	struct sk_buff *skb;
+
+	BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen);
+
+	/* If an error occurred during request building, there is no point in
+	 * queueing the HCI command. We can simply return.
+	 */
+	if (req->err)
+		return;
+
+	skb = hci_prepare_cmd(hdev, opcode, plen, param);
+	if (!skb) {
+		BT_ERR("%s no memory for command (opcode 0x%4.4x)",
+		       hdev->name, opcode);
+		req->err = -ENOMEM;
+		return;
+	}
+
+	if (skb_queue_empty(&req->cmd_q))
+		bt_cb(skb)->req.start = true;
+
+	bt_cb(skb)->req.event = event;
+
+	skb_queue_tail(&req->cmd_q, skb);
+}
+
+void hci_req_add(struct hci_request *req, u16 opcode, u32 plen,
+		 const void *param)
+{
+	hci_req_add_ev(req, opcode, plen, param, 0);
+}
+
+void hci_req_add_le_scan_disable(struct hci_request *req)
+{
+	struct hci_cp_le_set_scan_enable cp;
+
+	memset(&cp, 0, sizeof(cp));
+	cp.enable = LE_SCAN_DISABLE;
+	hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
+}
+
+static void add_to_white_list(struct hci_request *req,
+			      struct hci_conn_params *params)
+{
+	struct hci_cp_le_add_to_white_list cp;
+
+	cp.bdaddr_type = params->addr_type;
+	bacpy(&cp.bdaddr, &params->addr);
+
+	hci_req_add(req, HCI_OP_LE_ADD_TO_WHITE_LIST, sizeof(cp), &cp);
+}
+
+static u8 update_white_list(struct hci_request *req)
+{
+	struct hci_dev *hdev = req->hdev;
+	struct hci_conn_params *params;
+	struct bdaddr_list *b;
+	uint8_t white_list_entries = 0;
+
+	/* Go through the current white list programmed into the
+	 * controller one by one and check if that address is still
+	 * in the list of pending connections or list of devices to
+	 * report. If not present in either list, then queue the
+	 * command to remove it from the controller.
+	 */
+	list_for_each_entry(b, &hdev->le_white_list, list) {
+		struct hci_cp_le_del_from_white_list cp;
+
+		if (hci_pend_le_action_lookup(&hdev->pend_le_conns,
+					      &b->bdaddr, b->bdaddr_type) ||
+		    hci_pend_le_action_lookup(&hdev->pend_le_reports,
+					      &b->bdaddr, b->bdaddr_type)) {
+			white_list_entries++;
+			continue;
+		}
+
+		cp.bdaddr_type = b->bdaddr_type;
+		bacpy(&cp.bdaddr, &b->bdaddr);
+
+		hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST,
+			    sizeof(cp), &cp);
+	}
+
+	/* Since all no longer valid white list entries have been
+	 * removed, walk through the list of pending connections
+	 * and ensure that any new device gets programmed into
+	 * the controller.
+	 *
+	 * If the list of the devices is larger than the list of
+	 * available white list entries in the controller, then
+	 * just abort and return filer policy value to not use the
+	 * white list.
+	 */
+	list_for_each_entry(params, &hdev->pend_le_conns, action) {
+		if (hci_bdaddr_list_lookup(&hdev->le_white_list,
+					   &params->addr, params->addr_type))
+			continue;
+
+		if (white_list_entries >= hdev->le_white_list_size) {
+			/* Select filter policy to accept all advertising */
+			return 0x00;
+		}
+
+		if (hci_find_irk_by_addr(hdev, &params->addr,
+					 params->addr_type)) {
+			/* White list can not be used with RPAs */
+			return 0x00;
+		}
+
+		white_list_entries++;
+		add_to_white_list(req, params);
+	}
+
+	/* After adding all new pending connections, walk through
+	 * the list of pending reports and also add these to the
+	 * white list if there is still space.
+	 */
+	list_for_each_entry(params, &hdev->pend_le_reports, action) {
+		if (hci_bdaddr_list_lookup(&hdev->le_white_list,
+					   &params->addr, params->addr_type))
+			continue;
+
+		if (white_list_entries >= hdev->le_white_list_size) {
+			/* Select filter policy to accept all advertising */
+			return 0x00;
+		}
+
+		if (hci_find_irk_by_addr(hdev, &params->addr,
+					 params->addr_type)) {
+			/* White list can not be used with RPAs */
+			return 0x00;
+		}
+
+		white_list_entries++;
+		add_to_white_list(req, params);
+	}
+
+	/* Select filter policy to use white list */
+	return 0x01;
+}
+
+void hci_req_add_le_passive_scan(struct hci_request *req)
+{
+	struct hci_cp_le_set_scan_param param_cp;
+	struct hci_cp_le_set_scan_enable enable_cp;
+	struct hci_dev *hdev = req->hdev;
+	u8 own_addr_type;
+	u8 filter_policy;
+
+	/* Set require_privacy to false since no SCAN_REQ are send
+	 * during passive scanning. Not using an non-resolvable address
+	 * here is important so that peer devices using direct
+	 * advertising with our address will be correctly reported
+	 * by the controller.
+	 */
+	if (hci_update_random_address(req, false, &own_addr_type))
+		return;
+
+	/* Adding or removing entries from the white list must
+	 * happen before enabling scanning. The controller does
+	 * not allow white list modification while scanning.
+	 */
+	filter_policy = update_white_list(req);
+
+	/* When the controller is using random resolvable addresses and
+	 * with that having LE privacy enabled, then controllers with
+	 * Extended Scanner Filter Policies support can now enable support
+	 * for handling directed advertising.
+	 *
+	 * So instead of using filter polices 0x00 (no whitelist)
+	 * and 0x01 (whitelist enabled) use the new filter policies
+	 * 0x02 (no whitelist) and 0x03 (whitelist enabled).
+	 */
+	if (test_bit(HCI_PRIVACY, &hdev->dev_flags) &&
+	    (hdev->le_features[0] & HCI_LE_EXT_SCAN_POLICY))
+		filter_policy |= 0x02;
+
+	memset(&param_cp, 0, sizeof(param_cp));
+	param_cp.type = LE_SCAN_PASSIVE;
+	param_cp.interval = cpu_to_le16(hdev->le_scan_interval);
+	param_cp.window = cpu_to_le16(hdev->le_scan_window);
+	param_cp.own_address_type = own_addr_type;
+	param_cp.filter_policy = filter_policy;
+	hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp),
+		    &param_cp);
+
+	memset(&enable_cp, 0, sizeof(enable_cp));
+	enable_cp.enable = LE_SCAN_ENABLE;
+	enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
+	hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp),
+		    &enable_cp);
+}
+
+static void set_random_addr(struct hci_request *req, bdaddr_t *rpa)
+{
+	struct hci_dev *hdev = req->hdev;
+
+	/* If we're advertising or initiating an LE connection we can't
+	 * go ahead and change the random address at this time. This is
+	 * because the eventual initiator address used for the
+	 * subsequently created connection will be undefined (some
+	 * controllers use the new address and others the one we had
+	 * when the operation started).
+	 *
+	 * In this kind of scenario skip the update and let the random
+	 * address be updated at the next cycle.
+	 */
+	if (test_bit(HCI_LE_ADV, &hdev->dev_flags) ||
+	    hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT)) {
+		BT_DBG("Deferring random address update");
+		set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags);
+		return;
+	}
+
+	hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, rpa);
+}
+
+int hci_update_random_address(struct hci_request *req, bool require_privacy,
+			      u8 *own_addr_type)
+{
+	struct hci_dev *hdev = req->hdev;
+	int err;
+
+	/* If privacy is enabled use a resolvable private address. If
+	 * current RPA has expired or there is something else than
+	 * the current RPA in use, then generate a new one.
+	 */
+	if (test_bit(HCI_PRIVACY, &hdev->dev_flags)) {
+		int to;
+
+		*own_addr_type = ADDR_LE_DEV_RANDOM;
+
+		if (!test_and_clear_bit(HCI_RPA_EXPIRED, &hdev->dev_flags) &&
+		    !bacmp(&hdev->random_addr, &hdev->rpa))
+			return 0;
+
+		err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa);
+		if (err < 0) {
+			BT_ERR("%s failed to generate new RPA", hdev->name);
+			return err;
+		}
+
+		set_random_addr(req, &hdev->rpa);
+
+		to = msecs_to_jiffies(hdev->rpa_timeout * 1000);
+		queue_delayed_work(hdev->workqueue, &hdev->rpa_expired, to);
+
+		return 0;
+	}
+
+	/* In case of required privacy without resolvable private address,
+	 * use an non-resolvable private address. This is useful for active
+	 * scanning and non-connectable advertising.
+	 */
+	if (require_privacy) {
+		bdaddr_t nrpa;
+
+		while (true) {
+			/* The non-resolvable private address is generated
+			 * from random six bytes with the two most significant
+			 * bits cleared.
+			 */
+			get_random_bytes(&nrpa, 6);
+			nrpa.b[5] &= 0x3f;
+
+			/* The non-resolvable private address shall not be
+			 * equal to the public address.
+			 */
+			if (bacmp(&hdev->bdaddr, &nrpa))
+				break;
+		}
+
+		*own_addr_type = ADDR_LE_DEV_RANDOM;
+		set_random_addr(req, &nrpa);
+		return 0;
+	}
+
+	/* If forcing static address is in use or there is no public
+	 * address use the static address as random address (but skip
+	 * the HCI command if the current random address is already the
+	 * static one.
+	 */
+	if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) ||
+	    !bacmp(&hdev->bdaddr, BDADDR_ANY)) {
+		*own_addr_type = ADDR_LE_DEV_RANDOM;
+		if (bacmp(&hdev->static_addr, &hdev->random_addr))
+			hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6,
+				    &hdev->static_addr);
+		return 0;
+	}
+
+	/* Neither privacy nor static address is being used so use a
+	 * public address.
+	 */
+	*own_addr_type = ADDR_LE_DEV_PUBLIC;
+
+	return 0;
+}
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
new file mode 100644
index 000000000000..1793a46fea65
--- /dev/null
+++ b/net/bluetooth/hci_request.h
@@ -0,0 +1,51 @@
+/*
+   BlueZ - Bluetooth protocol stack for Linux
+   Copyright (C) 2014 Intel Corporation
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
+   SOFTWARE IS DISCLAIMED.
+*/
+
+struct hci_request {
+	struct hci_dev		*hdev;
+	struct sk_buff_head	cmd_q;
+
+	/* If something goes wrong when building the HCI request, the error
+	 * value is stored in this field.
+	 */
+	int			err;
+};
+
+void hci_req_init(struct hci_request *req, struct hci_dev *hdev);
+int hci_req_run(struct hci_request *req, hci_req_complete_t complete);
+void hci_req_add(struct hci_request *req, u16 opcode, u32 plen,
+		 const void *param);
+void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
+		    const void *param, u8 event);
+void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status);
+
+struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen,
+				const void *param);
+
+void hci_req_add_le_scan_disable(struct hci_request *req);
+void hci_req_add_le_passive_scan(struct hci_request *req);
+
+void hci_update_page_scan(struct hci_dev *hdev);
+void __hci_update_page_scan(struct hci_request *req);
+
+int hci_update_random_address(struct hci_request *req, bool require_privacy,
+			      u8 *own_addr_type);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index f86f3ec684ba..95473e966703 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -32,6 +32,7 @@
 #include <net/bluetooth/l2cap.h>
 #include <net/bluetooth/mgmt.h>
 
+#include "hci_request.h"
 #include "smp.h"
 
 #define MGMT_VERSION	1
-- 
cgit v1.2.3


From 8d819a92cc7fef4294dd11faa60050fd3c5460e0 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Wed, 17 Dec 2014 13:38:34 +0100
Subject: mac80211: minstrel: reduce size of struct minstrel_rate_stats

On minstrel_ht, the size of the per-sta struct is almost 18k, making it
an order-3 allocation.
A few fields inside the per-rate statistics are bigger than they need to
be. This patch reduces the size enough to cut down the per-sta struct to
about 13k (order-2 allocation).

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rc80211_minstrel.c |  6 +++---
 net/mac80211/rc80211_minstrel.h | 15 ++++++++-------
 2 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index d51f6b1c549b..7c86a002df95 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -263,12 +263,12 @@ static inline unsigned int
 minstrel_get_retry_count(struct minstrel_rate *mr,
 			 struct ieee80211_tx_info *info)
 {
-	unsigned int retry = mr->adjusted_retry_count;
+	u8 retry = mr->adjusted_retry_count;
 
 	if (info->control.use_rts)
-		retry = max(2U, min(mr->stats.retry_count_rtscts, retry));
+		retry = max_t(u8, 2, min(mr->stats.retry_count_rtscts, retry));
 	else if (info->control.use_cts_prot)
-		retry = max(2U, min(mr->retry_count_cts, retry));
+		retry = max_t(u8, 2, min(mr->retry_count_cts, retry));
 	return retry;
 }
 
diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h
index 97eca86a4af0..410efe620c57 100644
--- a/net/mac80211/rc80211_minstrel.h
+++ b/net/mac80211/rc80211_minstrel.h
@@ -33,8 +33,8 @@ minstrel_ewma(int old, int new, int weight)
 
 struct minstrel_rate_stats {
 	/* current / last sampling period attempts/success counters */
-	unsigned int attempts, last_attempts;
-	unsigned int success, last_success;
+	u16 attempts, last_attempts;
+	u16 success, last_success;
 
 	/* total attempts/success counters */
 	u64 att_hist, succ_hist;
@@ -46,8 +46,8 @@ struct minstrel_rate_stats {
 	unsigned int cur_prob, probability;
 
 	/* maximum retry counts */
-	unsigned int retry_count;
-	unsigned int retry_count_rtscts;
+	u8 retry_count;
+	u8 retry_count_rtscts;
 
 	u8 sample_skipped;
 	bool retry_updated;
@@ -55,14 +55,15 @@ struct minstrel_rate_stats {
 
 struct minstrel_rate {
 	int bitrate;
-	int rix;
+
+	s8 rix;
+	u8 retry_count_cts;
+	u8 adjusted_retry_count;
 
 	unsigned int perfect_tx_time;
 	unsigned int ack_time;
 
 	int sample_limit;
-	unsigned int retry_count_cts;
-	unsigned int adjusted_retry_count;
 
 	struct minstrel_rate_stats stats;
 };
-- 
cgit v1.2.3


From 2cf22218b00f46f93b39a9355b830e9e8e4fd077 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 19 Dec 2014 22:26:00 +0200
Subject: Bluetooth: Add hci_request support for hci_update_background_scan

Many places using hci_update_background_scan() try to synchronize
whatever they're doing with the help of hci_request callbacks. However,
since the hci_update_background_scan() function hasn't so far accepted a
hci_request pointer any commands triggered by it have been left out by
the synchronization. This patch modifies the API in a similar way as was
done for hci_update_page_scan, i.e. there's a variant that takes a
hci_request and another one that takes a hci_dev.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  2 -
 net/bluetooth/hci_core.c         | 89 -------------------------------------
 net/bluetooth/hci_request.c      | 96 ++++++++++++++++++++++++++++++++++++++++
 net/bluetooth/hci_request.h      |  3 ++
 net/bluetooth/mgmt.c             | 12 ++---
 5 files changed, 105 insertions(+), 97 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 93066f70f8ab..8eccdf029500 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -930,8 +930,6 @@ struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list,
 						  bdaddr_t *addr,
 						  u8 addr_type);
 
-void hci_update_background_scan(struct hci_dev *hdev);
-
 void hci_uuids_clear(struct hci_dev *hdev);
 
 void hci_link_keys_clear(struct hci_dev *hdev);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 2cfaaa6acb04..def6fba01b45 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -5312,95 +5312,6 @@ static void hci_cmd_work(struct work_struct *work)
 	}
 }
 
-static void update_background_scan_complete(struct hci_dev *hdev, u8 status)
-{
-	if (status)
-		BT_DBG("HCI request failed to update background scanning: "
-		       "status 0x%2.2x", status);
-}
-
-/* This function controls the background scanning based on hdev->pend_le_conns
- * list. If there are pending LE connection we start the background scanning,
- * otherwise we stop it.
- *
- * This function requires the caller holds hdev->lock.
- */
-void hci_update_background_scan(struct hci_dev *hdev)
-{
-	struct hci_request req;
-	struct hci_conn *conn;
-	int err;
-
-	if (!test_bit(HCI_UP, &hdev->flags) ||
-	    test_bit(HCI_INIT, &hdev->flags) ||
-	    test_bit(HCI_SETUP, &hdev->dev_flags) ||
-	    test_bit(HCI_CONFIG, &hdev->dev_flags) ||
-	    test_bit(HCI_AUTO_OFF, &hdev->dev_flags) ||
-	    test_bit(HCI_UNREGISTER, &hdev->dev_flags))
-		return;
-
-	/* No point in doing scanning if LE support hasn't been enabled */
-	if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags))
-		return;
-
-	/* If discovery is active don't interfere with it */
-	if (hdev->discovery.state != DISCOVERY_STOPPED)
-		return;
-
-	/* Reset RSSI and UUID filters when starting background scanning
-	 * since these filters are meant for service discovery only.
-	 *
-	 * The Start Discovery and Start Service Discovery operations
-	 * ensure to set proper values for RSSI threshold and UUID
-	 * filter list. So it is safe to just reset them here.
-	 */
-	hci_discovery_filter_clear(hdev);
-
-	hci_req_init(&req, hdev);
-
-	if (list_empty(&hdev->pend_le_conns) &&
-	    list_empty(&hdev->pend_le_reports)) {
-		/* If there is no pending LE connections or devices
-		 * to be scanned for, we should stop the background
-		 * scanning.
-		 */
-
-		/* If controller is not scanning we are done. */
-		if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags))
-			return;
-
-		hci_req_add_le_scan_disable(&req);
-
-		BT_DBG("%s stopping background scanning", hdev->name);
-	} else {
-		/* If there is at least one pending LE connection, we should
-		 * keep the background scan running.
-		 */
-
-		/* If controller is connecting, we should not start scanning
-		 * since some controllers are not able to scan and connect at
-		 * the same time.
-		 */
-		conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT);
-		if (conn)
-			return;
-
-		/* If controller is currently scanning, we stop it to ensure we
-		 * don't miss any advertising (due to duplicates filter).
-		 */
-		if (test_bit(HCI_LE_SCAN, &hdev->dev_flags))
-			hci_req_add_le_scan_disable(&req);
-
-		hci_req_add_le_passive_scan(&req);
-
-		BT_DBG("%s starting background scanning", hdev->name);
-	}
-
-	err = hci_req_run(&req, update_background_scan_complete);
-	if (err)
-		BT_ERR("Failed to run HCI request: err %d", err);
-}
-
 static bool disconnected_whitelist_entries(struct hci_dev *hdev)
 {
 	struct bdaddr_list *b;
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index eba83a2a6556..e49f682f1550 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -395,3 +395,99 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
 
 	return 0;
 }
+
+/* This function controls the background scanning based on hdev->pend_le_conns
+ * list. If there are pending LE connection we start the background scanning,
+ * otherwise we stop it.
+ *
+ * This function requires the caller holds hdev->lock.
+ */
+void __hci_update_background_scan(struct hci_request *req)
+{
+	struct hci_dev *hdev = req->hdev;
+	struct hci_conn *conn;
+
+	if (!test_bit(HCI_UP, &hdev->flags) ||
+	    test_bit(HCI_INIT, &hdev->flags) ||
+	    test_bit(HCI_SETUP, &hdev->dev_flags) ||
+	    test_bit(HCI_CONFIG, &hdev->dev_flags) ||
+	    test_bit(HCI_AUTO_OFF, &hdev->dev_flags) ||
+	    test_bit(HCI_UNREGISTER, &hdev->dev_flags))
+		return;
+
+	/* No point in doing scanning if LE support hasn't been enabled */
+	if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags))
+		return;
+
+	/* If discovery is active don't interfere with it */
+	if (hdev->discovery.state != DISCOVERY_STOPPED)
+		return;
+
+	/* Reset RSSI and UUID filters when starting background scanning
+	 * since these filters are meant for service discovery only.
+	 *
+	 * The Start Discovery and Start Service Discovery operations
+	 * ensure to set proper values for RSSI threshold and UUID
+	 * filter list. So it is safe to just reset them here.
+	 */
+	hci_discovery_filter_clear(hdev);
+
+	if (list_empty(&hdev->pend_le_conns) &&
+	    list_empty(&hdev->pend_le_reports)) {
+		/* If there is no pending LE connections or devices
+		 * to be scanned for, we should stop the background
+		 * scanning.
+		 */
+
+		/* If controller is not scanning we are done. */
+		if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags))
+			return;
+
+		hci_req_add_le_scan_disable(req);
+
+		BT_DBG("%s stopping background scanning", hdev->name);
+	} else {
+		/* If there is at least one pending LE connection, we should
+		 * keep the background scan running.
+		 */
+
+		/* If controller is connecting, we should not start scanning
+		 * since some controllers are not able to scan and connect at
+		 * the same time.
+		 */
+		conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT);
+		if (conn)
+			return;
+
+		/* If controller is currently scanning, we stop it to ensure we
+		 * don't miss any advertising (due to duplicates filter).
+		 */
+		if (test_bit(HCI_LE_SCAN, &hdev->dev_flags))
+			hci_req_add_le_scan_disable(req);
+
+		hci_req_add_le_passive_scan(req);
+
+		BT_DBG("%s starting background scanning", hdev->name);
+	}
+}
+
+static void update_background_scan_complete(struct hci_dev *hdev, u8 status)
+{
+	if (status)
+		BT_DBG("HCI request failed to update background scanning: "
+		       "status 0x%2.2x", status);
+}
+
+void hci_update_background_scan(struct hci_dev *hdev)
+{
+	int err;
+	struct hci_request req;
+
+	hci_req_init(&req, hdev);
+
+	__hci_update_background_scan(&req);
+
+	err = hci_req_run(&req, update_background_scan_complete);
+	if (err && err != -ENODATA)
+		BT_ERR("Failed to run HCI request: err %d", err);
+}
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index 1793a46fea65..adf074d33544 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -49,3 +49,6 @@ void __hci_update_page_scan(struct hci_request *req);
 
 int hci_update_random_address(struct hci_request *req, bool require_privacy,
 			      u8 *own_addr_type);
+
+void hci_update_background_scan(struct hci_dev *hdev);
+void __hci_update_background_scan(struct hci_request *req);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 95473e966703..3afe1e175eb8 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -2228,9 +2228,8 @@ static void le_enable_complete(struct hci_dev *hdev, u8 status)
 		hci_req_init(&req, hdev);
 		update_adv_data(&req);
 		update_scan_rsp_data(&req);
+		__hci_update_background_scan(&req);
 		hci_req_run(&req, NULL);
-
-		hci_update_background_scan(hdev);
 	}
 
 unlock:
@@ -6038,8 +6037,9 @@ void mgmt_index_removed(struct hci_dev *hdev)
 }
 
 /* This function requires the caller holds hdev->lock */
-static void restart_le_actions(struct hci_dev *hdev)
+static void restart_le_actions(struct hci_request *req)
 {
+	struct hci_dev *hdev = req->hdev;
 	struct hci_conn_params *p;
 
 	list_for_each_entry(p, &hdev->le_conn_params, list) {
@@ -6061,7 +6061,7 @@ static void restart_le_actions(struct hci_dev *hdev)
 		}
 	}
 
-	hci_update_background_scan(hdev);
+	__hci_update_background_scan(req);
 }
 
 static void powered_complete(struct hci_dev *hdev, u8 status)
@@ -6072,8 +6072,6 @@ static void powered_complete(struct hci_dev *hdev, u8 status)
 
 	hci_dev_lock(hdev);
 
-	restart_le_actions(hdev);
-
 	mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match);
 
 	new_settings(hdev, match.sk);
@@ -6131,6 +6129,8 @@ static int powered_update_hci(struct hci_dev *hdev)
 
 		if (test_bit(HCI_ADVERTISING, &hdev->dev_flags))
 			enable_advertising(&req);
+
+		restart_le_actions(&req);
 	}
 
 	link_sec = test_bit(HCI_LINK_SECURITY, &hdev->dev_flags);
-- 
cgit v1.2.3


From 51ef3ebe7bcc212f4cbbeac48bda26ee90a6edbe Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 19 Dec 2014 22:26:01 +0200
Subject: Bluetooth: Fix Remove Device to wait for HCI before sending
 cmd_complete

This patch updates the Remove Device mgmt command handler to use a
hci_request to wait for HCI command completion before notifying user
space of the mgmt command completion. This way we ensure that once the
mgmt command returns all HCI commands triggered by it have also
completed.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/mgmt.c | 84 ++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 62 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 3afe1e175eb8..6b925733c6f8 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -5523,24 +5523,55 @@ static void device_removed(struct sock *sk, struct hci_dev *hdev,
 	mgmt_event(MGMT_EV_DEVICE_REMOVED, hdev, &ev, sizeof(ev), sk);
 }
 
+static void remove_device_complete(struct hci_dev *hdev, u8 status)
+{
+	struct pending_cmd *cmd;
+
+	BT_DBG("status 0x%02x", status);
+
+	hci_dev_lock(hdev);
+
+	cmd = mgmt_pending_find(MGMT_OP_REMOVE_DEVICE, hdev);
+	if (!cmd)
+		goto unlock;
+
+	cmd->cmd_complete(cmd, mgmt_status(status));
+	mgmt_pending_remove(cmd);
+
+unlock:
+	hci_dev_unlock(hdev);
+}
+
 static int remove_device(struct sock *sk, struct hci_dev *hdev,
 			 void *data, u16 len)
 {
 	struct mgmt_cp_remove_device *cp = data;
+	struct pending_cmd *cmd;
+	struct hci_request req;
 	int err;
 
 	BT_DBG("%s", hdev->name);
 
+	hci_req_init(&req, hdev);
+
 	hci_dev_lock(hdev);
 
+	cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_DEVICE, hdev, data, len);
+	if (!cmd) {
+		err = -ENOMEM;
+		goto unlock;
+	}
+
+	cmd->cmd_complete = addr_cmd_complete;
+
 	if (bacmp(&cp->addr.bdaddr, BDADDR_ANY)) {
 		struct hci_conn_params *params;
 		u8 addr_type;
 
 		if (!bdaddr_type_is_valid(cp->addr.type)) {
-			err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE,
-					   MGMT_STATUS_INVALID_PARAMS,
-					   &cp->addr, sizeof(cp->addr));
+			err = 0;
+			cmd->cmd_complete(cmd, MGMT_STATUS_INVALID_PARAMS);
+			mgmt_pending_remove(cmd);
 			goto unlock;
 		}
 
@@ -5549,14 +5580,14 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
 						  &cp->addr.bdaddr,
 						  cp->addr.type);
 			if (err) {
-				err = cmd_complete(sk, hdev->id,
-						   MGMT_OP_REMOVE_DEVICE,
-						   MGMT_STATUS_INVALID_PARAMS,
-						   &cp->addr, sizeof(cp->addr));
+				err = 0;
+				cmd->cmd_complete(cmd,
+						  MGMT_STATUS_INVALID_PARAMS);
+				mgmt_pending_remove(cmd);
 				goto unlock;
 			}
 
-			hci_update_page_scan(hdev);
+			__hci_update_page_scan(&req);
 
 			device_removed(sk, hdev, &cp->addr.bdaddr,
 				       cp->addr.type);
@@ -5571,23 +5602,23 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
 		params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr,
 						addr_type);
 		if (!params) {
-			err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE,
-					   MGMT_STATUS_INVALID_PARAMS,
-					   &cp->addr, sizeof(cp->addr));
+			err = 0;
+			cmd->cmd_complete(cmd, MGMT_STATUS_INVALID_PARAMS);
+			mgmt_pending_remove(cmd);
 			goto unlock;
 		}
 
 		if (params->auto_connect == HCI_AUTO_CONN_DISABLED) {
-			err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE,
-					   MGMT_STATUS_INVALID_PARAMS,
-					   &cp->addr, sizeof(cp->addr));
+			err = 0;
+			cmd->cmd_complete(cmd, MGMT_STATUS_INVALID_PARAMS);
+			mgmt_pending_remove(cmd);
 			goto unlock;
 		}
 
 		list_del(&params->action);
 		list_del(&params->list);
 		kfree(params);
-		hci_update_background_scan(hdev);
+		__hci_update_background_scan(&req);
 
 		device_removed(sk, hdev, &cp->addr.bdaddr, cp->addr.type);
 	} else {
@@ -5595,9 +5626,9 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
 		struct bdaddr_list *b, *btmp;
 
 		if (cp->addr.type) {
-			err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE,
-					   MGMT_STATUS_INVALID_PARAMS,
-					   &cp->addr, sizeof(cp->addr));
+			err = 0;
+			cmd->cmd_complete(cmd, MGMT_STATUS_INVALID_PARAMS);
+			mgmt_pending_remove(cmd);
 			goto unlock;
 		}
 
@@ -5607,7 +5638,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
 			kfree(b);
 		}
 
-		hci_update_page_scan(hdev);
+		__hci_update_page_scan(&req);
 
 		list_for_each_entry_safe(p, tmp, &hdev->le_conn_params, list) {
 			if (p->auto_connect == HCI_AUTO_CONN_DISABLED)
@@ -5620,12 +5651,21 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
 
 		BT_DBG("All LE connection parameters were removed");
 
-		hci_update_background_scan(hdev);
+		__hci_update_background_scan(&req);
 	}
 
 complete:
-	err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE,
-			   MGMT_STATUS_SUCCESS, &cp->addr, sizeof(cp->addr));
+	err = hci_req_run(&req, remove_device_complete);
+	if (err < 0) {
+		/* ENODATA means no HCI commands were needed (e.g. if
+		 * the adapter is powered off).
+		 */
+		if (err == -ENODATA) {
+			cmd->cmd_complete(cmd, MGMT_STATUS_SUCCESS);
+			err = 0;
+		}
+		mgmt_pending_remove(cmd);
+	}
 
 unlock:
 	hci_dev_unlock(hdev);
-- 
cgit v1.2.3


From 5a154e6f71dfd41c7b5cf96a13c83fca91e7df7f Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 19 Dec 2014 22:26:02 +0200
Subject: Bluetooth: Fix Add Device to wait for HCI before sending cmd_complete

This patch updates the Add Device mgmt command handler to use a
hci_request to wait for HCI command completion before notifying user
space of the mgmt command completion. To do this we need to add an extra
hci_request parameter to the hci_conn_params_set function. Since this
function has no other users besides mgmt.c it's moved there as a static
function.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |   2 -
 net/bluetooth/hci_core.c         |  58 -------------------
 net/bluetooth/mgmt.c             | 119 +++++++++++++++++++++++++++++++++++----
 3 files changed, 109 insertions(+), 70 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 8eccdf029500..79724c87ab00 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -920,8 +920,6 @@ struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev,
 					       bdaddr_t *addr, u8 addr_type);
 struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
 					    bdaddr_t *addr, u8 addr_type);
-int hci_conn_params_set(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type,
-			u8 auto_connect);
 void hci_conn_params_del(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type);
 void hci_conn_params_clear_all(struct hci_dev *hdev);
 void hci_conn_params_clear_disabled(struct hci_dev *hdev);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index def6fba01b45..ee2096c7ec2c 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -3660,23 +3660,6 @@ struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev,
 	return NULL;
 }
 
-static bool is_connected(struct hci_dev *hdev, bdaddr_t *addr, u8 type)
-{
-	struct hci_conn *conn;
-
-	conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, addr);
-	if (!conn)
-		return false;
-
-	if (conn->dst_type != type)
-		return false;
-
-	if (conn->state != BT_CONNECTED)
-		return false;
-
-	return true;
-}
-
 /* This function requires the caller holds hdev->lock */
 struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list,
 						  bdaddr_t *addr, u8 addr_type)
@@ -3732,47 +3715,6 @@ struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
 	return params;
 }
 
-/* This function requires the caller holds hdev->lock */
-int hci_conn_params_set(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type,
-			u8 auto_connect)
-{
-	struct hci_conn_params *params;
-
-	params = hci_conn_params_add(hdev, addr, addr_type);
-	if (!params)
-		return -EIO;
-
-	if (params->auto_connect == auto_connect)
-		return 0;
-
-	list_del_init(&params->action);
-
-	switch (auto_connect) {
-	case HCI_AUTO_CONN_DISABLED:
-	case HCI_AUTO_CONN_LINK_LOSS:
-		hci_update_background_scan(hdev);
-		break;
-	case HCI_AUTO_CONN_REPORT:
-		list_add(&params->action, &hdev->pend_le_reports);
-		hci_update_background_scan(hdev);
-		break;
-	case HCI_AUTO_CONN_DIRECT:
-	case HCI_AUTO_CONN_ALWAYS:
-		if (!is_connected(hdev, addr, addr_type)) {
-			list_add(&params->action, &hdev->pend_le_conns);
-			hci_update_background_scan(hdev);
-		}
-		break;
-	}
-
-	params->auto_connect = auto_connect;
-
-	BT_DBG("addr %pMR (type %u) auto_connect %u", addr, addr_type,
-	       auto_connect);
-
-	return 0;
-}
-
 static void hci_conn_params_free(struct hci_conn_params *params)
 {
 	if (params->conn) {
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 6b925733c6f8..ec7c0ec3d8d3 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -5425,6 +5425,65 @@ unlock:
 	return err;
 }
 
+static bool is_connected(struct hci_dev *hdev, bdaddr_t *addr, u8 type)
+{
+	struct hci_conn *conn;
+
+	conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, addr);
+	if (!conn)
+		return false;
+
+	if (conn->dst_type != type)
+		return false;
+
+	if (conn->state != BT_CONNECTED)
+		return false;
+
+	return true;
+}
+
+/* This function requires the caller holds hdev->lock */
+static int hci_conn_params_set(struct hci_request *req, bdaddr_t *addr,
+			       u8 addr_type, u8 auto_connect)
+{
+	struct hci_dev *hdev = req->hdev;
+	struct hci_conn_params *params;
+
+	params = hci_conn_params_add(hdev, addr, addr_type);
+	if (!params)
+		return -EIO;
+
+	if (params->auto_connect == auto_connect)
+		return 0;
+
+	list_del_init(&params->action);
+
+	switch (auto_connect) {
+	case HCI_AUTO_CONN_DISABLED:
+	case HCI_AUTO_CONN_LINK_LOSS:
+		__hci_update_background_scan(req);
+		break;
+	case HCI_AUTO_CONN_REPORT:
+		list_add(&params->action, &hdev->pend_le_reports);
+		__hci_update_background_scan(req);
+		break;
+	case HCI_AUTO_CONN_DIRECT:
+	case HCI_AUTO_CONN_ALWAYS:
+		if (!is_connected(hdev, addr, addr_type)) {
+			list_add(&params->action, &hdev->pend_le_conns);
+			__hci_update_background_scan(req);
+		}
+		break;
+	}
+
+	params->auto_connect = auto_connect;
+
+	BT_DBG("addr %pMR (type %u) auto_connect %u", addr, addr_type,
+	       auto_connect);
+
+	return 0;
+}
+
 static void device_added(struct sock *sk, struct hci_dev *hdev,
 			 bdaddr_t *bdaddr, u8 type, u8 action)
 {
@@ -5437,10 +5496,31 @@ static void device_added(struct sock *sk, struct hci_dev *hdev,
 	mgmt_event(MGMT_EV_DEVICE_ADDED, hdev, &ev, sizeof(ev), sk);
 }
 
+static void add_device_complete(struct hci_dev *hdev, u8 status)
+{
+	struct pending_cmd *cmd;
+
+	BT_DBG("status 0x%02x", status);
+
+	hci_dev_lock(hdev);
+
+	cmd = mgmt_pending_find(MGMT_OP_ADD_DEVICE, hdev);
+	if (!cmd)
+		goto unlock;
+
+	cmd->cmd_complete(cmd, mgmt_status(status));
+	mgmt_pending_remove(cmd);
+
+unlock:
+	hci_dev_unlock(hdev);
+}
+
 static int add_device(struct sock *sk, struct hci_dev *hdev,
 		      void *data, u16 len)
 {
 	struct mgmt_cp_add_device *cp = data;
+	struct pending_cmd *cmd;
+	struct hci_request req;
 	u8 auto_conn, addr_type;
 	int err;
 
@@ -5457,14 +5537,24 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
 				    MGMT_STATUS_INVALID_PARAMS,
 				    &cp->addr, sizeof(cp->addr));
 
+	hci_req_init(&req, hdev);
+
 	hci_dev_lock(hdev);
 
+	cmd = mgmt_pending_add(sk, MGMT_OP_ADD_DEVICE, hdev, data, len);
+	if (!cmd) {
+		err = -ENOMEM;
+		goto unlock;
+	}
+
+	cmd->cmd_complete = addr_cmd_complete;
+
 	if (cp->addr.type == BDADDR_BREDR) {
 		/* Only incoming connections action is supported for now */
 		if (cp->action != 0x01) {
-			err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE,
-					   MGMT_STATUS_INVALID_PARAMS,
-					   &cp->addr, sizeof(cp->addr));
+			err = 0;
+			cmd->cmd_complete(cmd, MGMT_STATUS_INVALID_PARAMS);
+			mgmt_pending_remove(cmd);
 			goto unlock;
 		}
 
@@ -5473,7 +5563,7 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
 		if (err)
 			goto unlock;
 
-		hci_update_page_scan(hdev);
+		__hci_update_page_scan(&req);
 
 		goto added;
 	}
@@ -5493,19 +5583,28 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
 	/* If the connection parameters don't exist for this device,
 	 * they will be created and configured with defaults.
 	 */
-	if (hci_conn_params_set(hdev, &cp->addr.bdaddr, addr_type,
+	if (hci_conn_params_set(&req, &cp->addr.bdaddr, addr_type,
 				auto_conn) < 0) {
-		err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE,
-				   MGMT_STATUS_FAILED,
-				   &cp->addr, sizeof(cp->addr));
+		err = 0;
+		cmd->cmd_complete(cmd, MGMT_STATUS_FAILED);
+		mgmt_pending_remove(cmd);
 		goto unlock;
 	}
 
 added:
 	device_added(sk, hdev, &cp->addr.bdaddr, cp->addr.type, cp->action);
 
-	err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE,
-			   MGMT_STATUS_SUCCESS, &cp->addr, sizeof(cp->addr));
+	err = hci_req_run(&req, add_device_complete);
+	if (err < 0) {
+		/* ENODATA means no HCI commands were needed (e.g. if
+		 * the adapter is powered off).
+		 */
+		if (err == -ENODATA) {
+			cmd->cmd_complete(cmd, MGMT_STATUS_SUCCESS);
+			err = 0;
+		}
+		mgmt_pending_remove(cmd);
+	}
 
 unlock:
 	hci_dev_unlock(hdev);
-- 
cgit v1.2.3


From 9df7465351c00707d7d63a52e01187a6e169fe4a Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 19 Dec 2014 22:26:03 +0200
Subject: Bluetooth: Add return parameter to cmd_complete callbacks

The cmd_complete callbacks for pending mgmt commands may fail e.g. in
the case of memory allocation. Previously this error would be caught and
returned to user space in the form of a failed write on the mgmt socket
(when the error happened in the mgmt command handler) but with the
introduction of the generic cmd_complete callback this information was
lost. This patch returns the feature by making cmd_complete callbacks
return int instead of void.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/mgmt.c | 85 +++++++++++++++++++++++++++-------------------------
 1 file changed, 45 insertions(+), 40 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index ec7c0ec3d8d3..3d2f7ad1e655 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -139,7 +139,7 @@ struct pending_cmd {
 	size_t param_len;
 	struct sock *sk;
 	void *user_data;
-	void (*cmd_complete)(struct pending_cmd *cmd, u8 status);
+	int (*cmd_complete)(struct pending_cmd *cmd, u8 status);
 };
 
 /* HCI to MGMT error code conversion table */
@@ -1487,16 +1487,16 @@ static void cmd_complete_rsp(struct pending_cmd *cmd, void *data)
 	cmd_status_rsp(cmd, data);
 }
 
-static void generic_cmd_complete(struct pending_cmd *cmd, u8 status)
+static int generic_cmd_complete(struct pending_cmd *cmd, u8 status)
 {
-	cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, cmd->param,
-		     cmd->param_len);
+	return cmd_complete(cmd->sk, cmd->index, cmd->opcode, status,
+			    cmd->param, cmd->param_len);
 }
 
-static void addr_cmd_complete(struct pending_cmd *cmd, u8 status)
+static int addr_cmd_complete(struct pending_cmd *cmd, u8 status)
 {
-	cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, cmd->param,
-		     sizeof(struct mgmt_addr_info));
+	return cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, cmd->param,
+			    sizeof(struct mgmt_addr_info));
 }
 
 static u8 mgmt_bredr_support(struct hci_dev *hdev)
@@ -3098,16 +3098,17 @@ static struct pending_cmd *find_pairing(struct hci_conn *conn)
 	return NULL;
 }
 
-static void pairing_complete(struct pending_cmd *cmd, u8 status)
+static int pairing_complete(struct pending_cmd *cmd, u8 status)
 {
 	struct mgmt_rp_pair_device rp;
 	struct hci_conn *conn = cmd->user_data;
+	int err;
 
 	bacpy(&rp.addr.bdaddr, &conn->dst);
 	rp.addr.type = link_to_bdaddr(conn->type, conn->dst_type);
 
-	cmd_complete(cmd->sk, cmd->index, MGMT_OP_PAIR_DEVICE, status,
-		     &rp, sizeof(rp));
+	err = cmd_complete(cmd->sk, cmd->index, MGMT_OP_PAIR_DEVICE, status,
+			   &rp, sizeof(rp));
 
 	/* So we don't get further callbacks for this connection */
 	conn->connect_cfm_cb = NULL;
@@ -3122,6 +3123,8 @@ static void pairing_complete(struct pending_cmd *cmd, u8 status)
 	clear_bit(HCI_CONN_PARAM_REMOVAL_PEND, &conn->flags);
 
 	hci_conn_put(conn);
+
+	return err;
 }
 
 void mgmt_smp_complete(struct hci_conn *conn, bool complete)
@@ -3947,9 +3950,10 @@ failed:
 	return err;
 }
 
-static void service_discovery_cmd_complete(struct pending_cmd *cmd, u8 status)
+static int service_discovery_cmd_complete(struct pending_cmd *cmd, u8 status)
 {
-	cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, cmd->param, 1);
+	return cmd_complete(cmd->sk, cmd->index, cmd->opcode, status,
+			    cmd->param, 1);
 }
 
 static int start_service_discovery(struct sock *sk, struct hci_dev *hdev,
@@ -5091,10 +5095,11 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
 	return err;
 }
 
-static void conn_info_cmd_complete(struct pending_cmd *cmd, u8 status)
+static int conn_info_cmd_complete(struct pending_cmd *cmd, u8 status)
 {
 	struct hci_conn *conn = cmd->user_data;
 	struct mgmt_rp_get_conn_info rp;
+	int err;
 
 	memcpy(&rp.addr, cmd->param, sizeof(rp.addr));
 
@@ -5108,11 +5113,13 @@ static void conn_info_cmd_complete(struct pending_cmd *cmd, u8 status)
 		rp.max_tx_power = HCI_TX_POWER_INVALID;
 	}
 
-	cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO, status,
-		     &rp, sizeof(rp));
+	err = cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO, status,
+			   &rp, sizeof(rp));
 
 	hci_conn_drop(conn);
 	hci_conn_put(conn);
+
+	return err;
 }
 
 static void conn_info_refresh_complete(struct hci_dev *hdev, u8 hci_status)
@@ -5286,11 +5293,12 @@ unlock:
 	return err;
 }
 
-static void clock_info_cmd_complete(struct pending_cmd *cmd, u8 status)
+static int clock_info_cmd_complete(struct pending_cmd *cmd, u8 status)
 {
 	struct hci_conn *conn = cmd->user_data;
 	struct mgmt_rp_get_clock_info rp;
 	struct hci_dev *hdev;
+	int err;
 
 	memset(&rp, 0, sizeof(rp));
 	memcpy(&rp.addr, &cmd->param, sizeof(rp.addr));
@@ -5310,12 +5318,15 @@ static void clock_info_cmd_complete(struct pending_cmd *cmd, u8 status)
 	}
 
 complete:
-	cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, &rp, sizeof(rp));
+	err = cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, &rp,
+			   sizeof(rp));
 
 	if (conn) {
 		hci_conn_drop(conn);
 		hci_conn_put(conn);
 	}
+
+	return err;
 }
 
 static void get_clock_info_complete(struct hci_dev *hdev, u8 status)
@@ -5552,8 +5563,8 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
 	if (cp->addr.type == BDADDR_BREDR) {
 		/* Only incoming connections action is supported for now */
 		if (cp->action != 0x01) {
-			err = 0;
-			cmd->cmd_complete(cmd, MGMT_STATUS_INVALID_PARAMS);
+			err = cmd->cmd_complete(cmd,
+						MGMT_STATUS_INVALID_PARAMS);
 			mgmt_pending_remove(cmd);
 			goto unlock;
 		}
@@ -5585,8 +5596,7 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
 	 */
 	if (hci_conn_params_set(&req, &cp->addr.bdaddr, addr_type,
 				auto_conn) < 0) {
-		err = 0;
-		cmd->cmd_complete(cmd, MGMT_STATUS_FAILED);
+		err = cmd->cmd_complete(cmd, MGMT_STATUS_FAILED);
 		mgmt_pending_remove(cmd);
 		goto unlock;
 	}
@@ -5599,10 +5609,8 @@ added:
 		/* ENODATA means no HCI commands were needed (e.g. if
 		 * the adapter is powered off).
 		 */
-		if (err == -ENODATA) {
-			cmd->cmd_complete(cmd, MGMT_STATUS_SUCCESS);
-			err = 0;
-		}
+		if (err == -ENODATA)
+			err = cmd->cmd_complete(cmd, MGMT_STATUS_SUCCESS);
 		mgmt_pending_remove(cmd);
 	}
 
@@ -5668,8 +5676,8 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
 		u8 addr_type;
 
 		if (!bdaddr_type_is_valid(cp->addr.type)) {
-			err = 0;
-			cmd->cmd_complete(cmd, MGMT_STATUS_INVALID_PARAMS);
+			err = cmd->cmd_complete(cmd,
+						MGMT_STATUS_INVALID_PARAMS);
 			mgmt_pending_remove(cmd);
 			goto unlock;
 		}
@@ -5679,9 +5687,8 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
 						  &cp->addr.bdaddr,
 						  cp->addr.type);
 			if (err) {
-				err = 0;
-				cmd->cmd_complete(cmd,
-						  MGMT_STATUS_INVALID_PARAMS);
+				err = cmd->cmd_complete(cmd,
+							MGMT_STATUS_INVALID_PARAMS);
 				mgmt_pending_remove(cmd);
 				goto unlock;
 			}
@@ -5701,15 +5708,15 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
 		params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr,
 						addr_type);
 		if (!params) {
-			err = 0;
-			cmd->cmd_complete(cmd, MGMT_STATUS_INVALID_PARAMS);
+			err = cmd->cmd_complete(cmd,
+						MGMT_STATUS_INVALID_PARAMS);
 			mgmt_pending_remove(cmd);
 			goto unlock;
 		}
 
 		if (params->auto_connect == HCI_AUTO_CONN_DISABLED) {
-			err = 0;
-			cmd->cmd_complete(cmd, MGMT_STATUS_INVALID_PARAMS);
+			err = cmd->cmd_complete(cmd,
+						MGMT_STATUS_INVALID_PARAMS);
 			mgmt_pending_remove(cmd);
 			goto unlock;
 		}
@@ -5725,8 +5732,8 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
 		struct bdaddr_list *b, *btmp;
 
 		if (cp->addr.type) {
-			err = 0;
-			cmd->cmd_complete(cmd, MGMT_STATUS_INVALID_PARAMS);
+			err = cmd->cmd_complete(cmd,
+						MGMT_STATUS_INVALID_PARAMS);
 			mgmt_pending_remove(cmd);
 			goto unlock;
 		}
@@ -5759,10 +5766,8 @@ complete:
 		/* ENODATA means no HCI commands were needed (e.g. if
 		 * the adapter is powered off).
 		 */
-		if (err == -ENODATA) {
-			cmd->cmd_complete(cmd, MGMT_STATUS_SUCCESS);
-			err = 0;
-		}
+		if (err == -ENODATA)
+			err = cmd->cmd_complete(cmd, MGMT_STATUS_SUCCESS);
 		mgmt_pending_remove(cmd);
 	}
 
-- 
cgit v1.2.3


From 405a26110a735b88963875459efe63f4d274f49d Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 19 Dec 2014 23:18:22 +0200
Subject: Bluetooth: Move hci_update_page_scan to hci_request.c

This is a left-over from the patch that created hci_request.c. The
hci_update_page_scan functions should have been moved from hci_core.c
there.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_core.c    | 56 ---------------------------------------------
 net/bluetooth/hci_request.c | 56 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 56 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index ee2096c7ec2c..9790a0108e69 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -5253,59 +5253,3 @@ static void hci_cmd_work(struct work_struct *work)
 		}
 	}
 }
-
-static bool disconnected_whitelist_entries(struct hci_dev *hdev)
-{
-	struct bdaddr_list *b;
-
-	list_for_each_entry(b, &hdev->whitelist, list) {
-		struct hci_conn *conn;
-
-		conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &b->bdaddr);
-		if (!conn)
-			return true;
-
-		if (conn->state != BT_CONNECTED && conn->state != BT_CONFIG)
-			return true;
-	}
-
-	return false;
-}
-
-void __hci_update_page_scan(struct hci_request *req)
-{
-	struct hci_dev *hdev = req->hdev;
-	u8 scan;
-
-	if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags))
-		return;
-
-	if (!hdev_is_powered(hdev))
-		return;
-
-	if (mgmt_powering_down(hdev))
-		return;
-
-	if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags) ||
-	    disconnected_whitelist_entries(hdev))
-		scan = SCAN_PAGE;
-	else
-		scan = SCAN_DISABLED;
-
-	if (test_bit(HCI_PSCAN, &hdev->flags) == !!(scan & SCAN_PAGE))
-		return;
-
-	if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags))
-		scan |= SCAN_INQUIRY;
-
-	hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
-}
-
-void hci_update_page_scan(struct hci_dev *hdev)
-{
-	struct hci_request req;
-
-	hci_req_init(&req, hdev);
-	__hci_update_page_scan(&req);
-	hci_req_run(&req, NULL);
-}
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index e49f682f1550..5e64ea70a106 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -396,6 +396,62 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
 	return 0;
 }
 
+static bool disconnected_whitelist_entries(struct hci_dev *hdev)
+{
+	struct bdaddr_list *b;
+
+	list_for_each_entry(b, &hdev->whitelist, list) {
+		struct hci_conn *conn;
+
+		conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &b->bdaddr);
+		if (!conn)
+			return true;
+
+		if (conn->state != BT_CONNECTED && conn->state != BT_CONFIG)
+			return true;
+	}
+
+	return false;
+}
+
+void __hci_update_page_scan(struct hci_request *req)
+{
+	struct hci_dev *hdev = req->hdev;
+	u8 scan;
+
+	if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags))
+		return;
+
+	if (!hdev_is_powered(hdev))
+		return;
+
+	if (mgmt_powering_down(hdev))
+		return;
+
+	if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags) ||
+	    disconnected_whitelist_entries(hdev))
+		scan = SCAN_PAGE;
+	else
+		scan = SCAN_DISABLED;
+
+	if (test_bit(HCI_PSCAN, &hdev->flags) == !!(scan & SCAN_PAGE))
+		return;
+
+	if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags))
+		scan |= SCAN_INQUIRY;
+
+	hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
+}
+
+void hci_update_page_scan(struct hci_dev *hdev)
+{
+	struct hci_request req;
+
+	hci_req_init(&req, hdev);
+	__hci_update_page_scan(&req);
+	hci_req_run(&req, NULL);
+}
+
 /* This function controls the background scanning based on hdev->pend_le_conns
  * list. If there are pending LE connection we start the background scanning,
  * otherwise we stop it.
-- 
cgit v1.2.3


From bd37a78de91e1b67b540b43f10bbb2d552348cbd Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Fri, 19 Dec 2014 23:45:58 +0100
Subject: mac802154: iface: check concurrent ifaces

This patch adds a check for concurrent interfaces while calling
interface up. This avoids to have different mac parameters on one phy.
Otherwise it could be that a interface can overwrite current phy mac
settings which is set by an another interface.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/mac802154/iface.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)

(limited to 'net')

diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c
index 9ae893057dd7..61f3ff00a508 100644
--- a/net/mac802154/iface.c
+++ b/net/mac802154/iface.c
@@ -175,6 +175,79 @@ err:
 	return res;
 }
 
+static int
+ieee802154_check_mac_settings(struct ieee802154_local *local,
+			      struct wpan_dev *wpan_dev,
+			      struct wpan_dev *nwpan_dev)
+{
+	ASSERT_RTNL();
+
+	if (local->hw.flags & IEEE802154_HW_PROMISCUOUS) {
+		if (wpan_dev->promiscuous_mode != nwpan_dev->promiscuous_mode)
+			return -EBUSY;
+	}
+
+	if (local->hw.flags & IEEE802154_HW_AFILT) {
+		if (wpan_dev->pan_id != nwpan_dev->pan_id)
+			return -EBUSY;
+
+		if (wpan_dev->short_addr != nwpan_dev->short_addr)
+			return -EBUSY;
+
+		if (wpan_dev->extended_addr != nwpan_dev->extended_addr)
+			return -EBUSY;
+	}
+
+	if (local->hw.flags & IEEE802154_HW_CSMA_PARAMS) {
+		if (wpan_dev->min_be != nwpan_dev->min_be)
+			return -EBUSY;
+
+		if (wpan_dev->max_be != nwpan_dev->max_be)
+			return -EBUSY;
+
+		if (wpan_dev->csma_retries != nwpan_dev->csma_retries)
+			return -EBUSY;
+	}
+
+	if (local->hw.flags & IEEE802154_HW_FRAME_RETRIES) {
+		if (wpan_dev->frame_retries != nwpan_dev->frame_retries)
+			return -EBUSY;
+	}
+
+	if (local->hw.flags & IEEE802154_HW_LBT) {
+		if (wpan_dev->lbt != nwpan_dev->lbt)
+			return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int
+ieee802154_check_concurrent_iface(struct ieee802154_sub_if_data *sdata,
+				  enum nl802154_iftype iftype)
+{
+	struct ieee802154_local *local = sdata->local;
+	struct wpan_dev *wpan_dev = &sdata->wpan_dev;
+	struct ieee802154_sub_if_data *nsdata;
+
+	/* we hold the RTNL here so can safely walk the list */
+	list_for_each_entry(nsdata, &local->interfaces, list) {
+		if (nsdata != sdata && ieee802154_sdata_running(nsdata)) {
+			int ret;
+
+			/* check all phy mac sublayer settings are the same.
+			 * We have only one phy, different values makes trouble.
+			 */
+			ret = ieee802154_check_mac_settings(local, wpan_dev,
+							    &nsdata->wpan_dev);
+			if (ret < 0)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
 static int mac802154_wpan_open(struct net_device *dev)
 {
 	int rc;
@@ -183,6 +256,10 @@ static int mac802154_wpan_open(struct net_device *dev)
 	struct wpan_dev *wpan_dev = &sdata->wpan_dev;
 	struct wpan_phy *phy = sdata->local->phy;
 
+	rc = ieee802154_check_concurrent_iface(sdata, sdata->vif.type);
+	if (rc < 0)
+		return rc;
+
 	rc = mac802154_slave_open(dev);
 	if (rc < 0)
 		return rc;
-- 
cgit v1.2.3


From cd1c56653a65e5559cf48effe8864ff6df4b4430 Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Fri, 19 Dec 2014 23:45:59 +0100
Subject: ieee802154: iface: move multiple node type check

This patch moves the handling for checking on multiple node type
interface to the corresponding concurrent iface check function.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/mac802154/iface.c | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c
index 61f3ff00a508..6fb6bdf9868c 100644
--- a/net/mac802154/iface.c
+++ b/net/mac802154/iface.c
@@ -137,25 +137,11 @@ static int mac802154_wpan_mac_addr(struct net_device *dev, void *p)
 static int mac802154_slave_open(struct net_device *dev)
 {
 	struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
-	struct ieee802154_sub_if_data *subif;
 	struct ieee802154_local *local = sdata->local;
 	int res = 0;
 
 	ASSERT_RTNL();
 
-	if (sdata->vif.type == NL802154_IFTYPE_NODE) {
-		mutex_lock(&sdata->local->iflist_mtx);
-		list_for_each_entry(subif, &sdata->local->interfaces, list) {
-			if (subif != sdata &&
-			    subif->vif.type == sdata->vif.type &&
-			    ieee802154_sdata_running(subif)) {
-				mutex_unlock(&sdata->local->iflist_mtx);
-				return -EBUSY;
-			}
-		}
-		mutex_unlock(&sdata->local->iflist_mtx);
-	}
-
 	set_bit(SDATA_STATE_RUNNING, &sdata->state);
 
 	if (!local->open_count) {
@@ -235,6 +221,15 @@ ieee802154_check_concurrent_iface(struct ieee802154_sub_if_data *sdata,
 		if (nsdata != sdata && ieee802154_sdata_running(nsdata)) {
 			int ret;
 
+			/* TODO currently we don't support multiple node types
+			 * we need to run skb_clone at rx path. Check if there
+			 * exist really an use case if we need to support
+			 * multiple node types at the same time.
+			 */
+			if (sdata->vif.type == NL802154_IFTYPE_NODE &&
+			    nsdata->vif.type == NL802154_IFTYPE_NODE)
+				return -EBUSY;
+
 			/* check all phy mac sublayer settings are the same.
 			 * We have only one phy, different values makes trouble.
 			 */
-- 
cgit v1.2.3


From 50b5b952b7c2bf2c75c257a62a6c456a0bbfdfa3 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Fri, 19 Dec 2014 23:05:35 +0100
Subject: Bluetooth: Support static address when BR/EDR has been disabled

Every BR/EDR/LE dual-mode controller requires to have a public address
and so far that has become the identity address and own address. The
only way to change that behavior was with a force_static_address
debugfs option.

However the host can actually disable the BR/EDR part of a dual-mode
controller and turn into a single mode LE controller. In that case
it makes perfect sense for a host to use a chosen static address
instead of the public address.

So if the host disables BR/EDR and configures a static address, then
that static address is used as identity address and own address. If
the host does not configure a static address, then the public address
is used as before.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_core.c    | 8 +++++++-
 net/bluetooth/hci_request.c | 8 +++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 9790a0108e69..91dca121dbb6 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -3852,12 +3852,18 @@ static void le_scan_disable_work(struct work_struct *work)
  *
  * For debugging purposes it is possible to force controllers with a
  * public address to use the static random address instead.
+ *
+ * In case BR/EDR has been disabled on a dual-mode controller and
+ * userspace has configured a static address, then that address
+ * becomes the identity address instead of the public BR/EDR address.
  */
 void hci_copy_identity_address(struct hci_dev *hdev, bdaddr_t *bdaddr,
 			       u8 *bdaddr_type)
 {
 	if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) ||
-	    !bacmp(&hdev->bdaddr, BDADDR_ANY)) {
+	    !bacmp(&hdev->bdaddr, BDADDR_ANY) ||
+	    (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) &&
+	     bacmp(&hdev->static_addr, BDADDR_ANY))) {
 		bacpy(bdaddr, &hdev->static_addr);
 		*bdaddr_type = ADDR_LE_DEV_RANDOM;
 	} else {
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 5e64ea70a106..324c6418b17c 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -378,9 +378,15 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
 	 * address use the static address as random address (but skip
 	 * the HCI command if the current random address is already the
 	 * static one.
+	 *
+	 * In case BR/EDR has been disabled on a dual-mode controller
+	 * and a static address has been configured, then use that
+	 * address instead of the public BR/EDR address.
 	 */
 	if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) ||
-	    !bacmp(&hdev->bdaddr, BDADDR_ANY)) {
+	    !bacmp(&hdev->bdaddr, BDADDR_ANY) ||
+	    (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) &&
+	     bacmp(&hdev->static_addr, BDADDR_ANY))) {
 		*own_addr_type = ADDR_LE_DEV_RANDOM;
 		if (bacmp(&hdev->static_addr, &hdev->random_addr))
 			hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6,
-- 
cgit v1.2.3


From 60c5f5fb1f8640cb050822512246b79a68914145 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 20 Dec 2014 16:05:13 +0100
Subject: Bluetooth: Add skeleton functions for debugfs creation

The debugfs file creation has been part of the core initialization
handling of controllers. With the introduction of Bluetooth 4.2 core
specification, the number of debugfs files is increasing even further.

To avoid cluttering the core controller handling, create a separate
file hci_debugfs.c to centralize all debugfs file creation. For now
leave the current files in the core, but in the future all debugfs
file creation will be moved.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/Makefile      |  2 +-
 net/bluetooth/hci_core.c    |  7 +++++++
 net/bluetooth/hci_debugfs.c | 41 +++++++++++++++++++++++++++++++++++++++++
 net/bluetooth/hci_debugfs.h | 25 +++++++++++++++++++++++++
 4 files changed, 74 insertions(+), 1 deletion(-)
 create mode 100644 net/bluetooth/hci_debugfs.c
 create mode 100644 net/bluetooth/hci_debugfs.h

(limited to 'net')

diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index 0a176fc9e291..364cff1ad4b1 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -13,6 +13,6 @@ bluetooth_6lowpan-y := 6lowpan.o
 
 bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \
 	hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o sco.o lib.o \
-	a2mp.o amp.o ecc.o hci_request.o
+	a2mp.o amp.o ecc.o hci_request.o hci_debugfs.o
 
 subdir-ccflags-y += -D__CHECK_ENDIAN__
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 91dca121dbb6..b3ab2bfb3f86 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -38,6 +38,7 @@
 #include <net/bluetooth/mgmt.h>
 
 #include "hci_request.h"
+#include "hci_debugfs.h"
 #include "smp.h"
 
 static void hci_rx_work(struct work_struct *work);
@@ -1865,6 +1866,8 @@ static int __hci_init(struct hci_dev *hdev)
 	debugfs_create_file("conn_info_max_age", 0644, hdev->debugfs, hdev,
 			    &conn_info_max_age_fops);
 
+	hci_debugfs_create_common(hdev);
+
 	if (lmp_bredr_capable(hdev)) {
 		debugfs_create_file("inquiry_cache", 0444, hdev->debugfs,
 				    hdev, &inquiry_cache_fops);
@@ -1874,6 +1877,8 @@ static int __hci_init(struct hci_dev *hdev)
 				    hdev, &dev_class_fops);
 		debugfs_create_file("voice_setting", 0444, hdev->debugfs,
 				    hdev, &voice_setting_fops);
+
+		hci_debugfs_create_bredr(hdev);
 	}
 
 	if (lmp_ssp_capable(hdev)) {
@@ -1944,6 +1949,8 @@ static int __hci_init(struct hci_dev *hdev)
 				   hdev->debugfs,
 				   &hdev->discov_interleaved_timeout);
 
+		hci_debugfs_create_le(hdev);
+
 		smp_register(hdev);
 	}
 
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
new file mode 100644
index 000000000000..b8cf0734cc3d
--- /dev/null
+++ b/net/bluetooth/hci_debugfs.c
@@ -0,0 +1,41 @@
+/*
+   BlueZ - Bluetooth protocol stack for Linux
+
+   Copyright (C) 2014 Intel Corporation
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
+   SOFTWARE IS DISCLAIMED.
+*/
+
+#include <linux/debugfs.h>
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+
+#include "hci_debugfs.h"
+
+void hci_debugfs_create_common(struct hci_dev *hdev)
+{
+}
+
+void hci_debugfs_create_bredr(struct hci_dev *hdev)
+{
+}
+
+void hci_debugfs_create_le(struct hci_dev *hdev)
+{
+}
diff --git a/net/bluetooth/hci_debugfs.h b/net/bluetooth/hci_debugfs.h
new file mode 100644
index 000000000000..f191100b50c5
--- /dev/null
+++ b/net/bluetooth/hci_debugfs.h
@@ -0,0 +1,25 @@
+/*
+   BlueZ - Bluetooth protocol stack for Linux
+   Copyright (C) 2014 Intel Corporation
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
+   SOFTWARE IS DISCLAIMED.
+*/
+
+void hci_debugfs_create_common(struct hci_dev *hdev);
+void hci_debugfs_create_bredr(struct hci_dev *hdev);
+void hci_debugfs_create_le(struct hci_dev *hdev);
-- 
cgit v1.2.3


From 40ce72b1951c594c7f9f5f5df4aeebed6a07af92 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 20 Dec 2014 16:05:14 +0100
Subject: Bluetooth: Move common debugfs file creation into hci_debugfs.c

This patch moves the creation of the debugs files common for all
controllers into hci_debugfs.c file.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_core.c    | 201 --------------------------------------------
 net/bluetooth/hci_debugfs.c | 200 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 200 insertions(+), 201 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index b3ab2bfb3f86..dde29f4bfaa9 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -139,104 +139,6 @@ static const struct file_operations dut_mode_fops = {
 	.llseek		= default_llseek,
 };
 
-static int features_show(struct seq_file *f, void *ptr)
-{
-	struct hci_dev *hdev = f->private;
-	u8 p;
-
-	hci_dev_lock(hdev);
-	for (p = 0; p < HCI_MAX_PAGES && p <= hdev->max_page; p++) {
-		seq_printf(f, "%2u: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x "
-			   "0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", p,
-			   hdev->features[p][0], hdev->features[p][1],
-			   hdev->features[p][2], hdev->features[p][3],
-			   hdev->features[p][4], hdev->features[p][5],
-			   hdev->features[p][6], hdev->features[p][7]);
-	}
-	if (lmp_le_capable(hdev))
-		seq_printf(f, "LE: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x "
-			   "0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n",
-			   hdev->le_features[0], hdev->le_features[1],
-			   hdev->le_features[2], hdev->le_features[3],
-			   hdev->le_features[4], hdev->le_features[5],
-			   hdev->le_features[6], hdev->le_features[7]);
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-static int features_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, features_show, inode->i_private);
-}
-
-static const struct file_operations features_fops = {
-	.open		= features_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int blacklist_show(struct seq_file *f, void *p)
-{
-	struct hci_dev *hdev = f->private;
-	struct bdaddr_list *b;
-
-	hci_dev_lock(hdev);
-	list_for_each_entry(b, &hdev->blacklist, list)
-		seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type);
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-static int blacklist_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, blacklist_show, inode->i_private);
-}
-
-static const struct file_operations blacklist_fops = {
-	.open		= blacklist_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int uuids_show(struct seq_file *f, void *p)
-{
-	struct hci_dev *hdev = f->private;
-	struct bt_uuid *uuid;
-
-	hci_dev_lock(hdev);
-	list_for_each_entry(uuid, &hdev->uuids, list) {
-		u8 i, val[16];
-
-		/* The Bluetooth UUID values are stored in big endian,
-		 * but with reversed byte order. So convert them into
-		 * the right order for the %pUb modifier.
-		 */
-		for (i = 0; i < 16; i++)
-			val[i] = uuid->uuid[15 - i];
-
-		seq_printf(f, "%pUb\n", val);
-	}
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-static int uuids_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, uuids_show, inode->i_private);
-}
-
-static const struct file_operations uuids_fops = {
-	.open		= uuids_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int inquiry_cache_show(struct seq_file *f, void *p)
 {
 	struct hci_dev *hdev = f->private;
@@ -584,62 +486,6 @@ static int sniff_max_interval_get(void *data, u64 *val)
 DEFINE_SIMPLE_ATTRIBUTE(sniff_max_interval_fops, sniff_max_interval_get,
 			sniff_max_interval_set, "%llu\n");
 
-static int conn_info_min_age_set(void *data, u64 val)
-{
-	struct hci_dev *hdev = data;
-
-	if (val == 0 || val > hdev->conn_info_max_age)
-		return -EINVAL;
-
-	hci_dev_lock(hdev);
-	hdev->conn_info_min_age = val;
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-static int conn_info_min_age_get(void *data, u64 *val)
-{
-	struct hci_dev *hdev = data;
-
-	hci_dev_lock(hdev);
-	*val = hdev->conn_info_min_age;
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(conn_info_min_age_fops, conn_info_min_age_get,
-			conn_info_min_age_set, "%llu\n");
-
-static int conn_info_max_age_set(void *data, u64 val)
-{
-	struct hci_dev *hdev = data;
-
-	if (val == 0 || val < hdev->conn_info_min_age)
-		return -EINVAL;
-
-	hci_dev_lock(hdev);
-	hdev->conn_info_max_age = val;
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-static int conn_info_max_age_get(void *data, u64 *val)
-{
-	struct hci_dev *hdev = data;
-
-	hci_dev_lock(hdev);
-	*val = hdev->conn_info_max_age;
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(conn_info_max_age_fops, conn_info_max_age_get,
-			conn_info_max_age_set, "%llu\n");
-
 static int identity_show(struct seq_file *f, void *p)
 {
 	struct hci_dev *hdev = f->private;
@@ -1041,36 +887,6 @@ static int adv_max_interval_get(void *data, u64 *val)
 DEFINE_SIMPLE_ATTRIBUTE(adv_max_interval_fops, adv_max_interval_get,
 			adv_max_interval_set, "%llu\n");
 
-static int device_list_show(struct seq_file *f, void *ptr)
-{
-	struct hci_dev *hdev = f->private;
-	struct hci_conn_params *p;
-	struct bdaddr_list *b;
-
-	hci_dev_lock(hdev);
-	list_for_each_entry(b, &hdev->whitelist, list)
-		seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type);
-	list_for_each_entry(p, &hdev->le_conn_params, list) {
-		seq_printf(f, "%pMR (type %u) %u\n", &p->addr, p->addr_type,
-			   p->auto_connect);
-	}
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-static int device_list_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, device_list_show, inode->i_private);
-}
-
-static const struct file_operations device_list_fops = {
-	.open		= device_list_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 /* ---- HCI requests ---- */
 
 static void hci_req_sync_complete(struct hci_dev *hdev, u8 result)
@@ -1849,23 +1665,6 @@ static int __hci_init(struct hci_dev *hdev)
 	if (!test_bit(HCI_SETUP, &hdev->dev_flags))
 		return 0;
 
-	debugfs_create_file("features", 0444, hdev->debugfs, hdev,
-			    &features_fops);
-	debugfs_create_u16("manufacturer", 0444, hdev->debugfs,
-			   &hdev->manufacturer);
-	debugfs_create_u8("hci_version", 0444, hdev->debugfs, &hdev->hci_ver);
-	debugfs_create_u16("hci_revision", 0444, hdev->debugfs, &hdev->hci_rev);
-	debugfs_create_file("device_list", 0444, hdev->debugfs, hdev,
-			    &device_list_fops);
-	debugfs_create_file("blacklist", 0444, hdev->debugfs, hdev,
-			    &blacklist_fops);
-	debugfs_create_file("uuids", 0444, hdev->debugfs, hdev, &uuids_fops);
-
-	debugfs_create_file("conn_info_min_age", 0644, hdev->debugfs, hdev,
-			    &conn_info_min_age_fops);
-	debugfs_create_file("conn_info_max_age", 0644, hdev->debugfs, hdev,
-			    &conn_info_max_age_fops);
-
 	hci_debugfs_create_common(hdev);
 
 	if (lmp_bredr_capable(hdev)) {
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index b8cf0734cc3d..4d06beef8c53 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -28,8 +28,208 @@
 
 #include "hci_debugfs.h"
 
+static int features_show(struct seq_file *f, void *ptr)
+{
+	struct hci_dev *hdev = f->private;
+	u8 p;
+
+	hci_dev_lock(hdev);
+	for (p = 0; p < HCI_MAX_PAGES && p <= hdev->max_page; p++) {
+		seq_printf(f, "%2u: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x "
+			   "0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", p,
+			   hdev->features[p][0], hdev->features[p][1],
+			   hdev->features[p][2], hdev->features[p][3],
+			   hdev->features[p][4], hdev->features[p][5],
+			   hdev->features[p][6], hdev->features[p][7]);
+	}
+	if (lmp_le_capable(hdev))
+		seq_printf(f, "LE: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x "
+			   "0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n",
+			   hdev->le_features[0], hdev->le_features[1],
+			   hdev->le_features[2], hdev->le_features[3],
+			   hdev->le_features[4], hdev->le_features[5],
+			   hdev->le_features[6], hdev->le_features[7]);
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+static int features_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, features_show, inode->i_private);
+}
+
+static const struct file_operations features_fops = {
+	.open		= features_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int device_list_show(struct seq_file *f, void *ptr)
+{
+	struct hci_dev *hdev = f->private;
+	struct hci_conn_params *p;
+	struct bdaddr_list *b;
+
+	hci_dev_lock(hdev);
+	list_for_each_entry(b, &hdev->whitelist, list)
+		seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type);
+	list_for_each_entry(p, &hdev->le_conn_params, list) {
+		seq_printf(f, "%pMR (type %u) %u\n", &p->addr, p->addr_type,
+			   p->auto_connect);
+	}
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+static int device_list_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, device_list_show, inode->i_private);
+}
+
+static const struct file_operations device_list_fops = {
+	.open		= device_list_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int blacklist_show(struct seq_file *f, void *p)
+{
+	struct hci_dev *hdev = f->private;
+	struct bdaddr_list *b;
+
+	hci_dev_lock(hdev);
+	list_for_each_entry(b, &hdev->blacklist, list)
+		seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type);
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+static int blacklist_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, blacklist_show, inode->i_private);
+}
+
+static const struct file_operations blacklist_fops = {
+	.open		= blacklist_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int uuids_show(struct seq_file *f, void *p)
+{
+	struct hci_dev *hdev = f->private;
+	struct bt_uuid *uuid;
+
+	hci_dev_lock(hdev);
+	list_for_each_entry(uuid, &hdev->uuids, list) {
+		u8 i, val[16];
+
+		/* The Bluetooth UUID values are stored in big endian,
+		 * but with reversed byte order. So convert them into
+		 * the right order for the %pUb modifier.
+		 */
+		for (i = 0; i < 16; i++)
+			val[i] = uuid->uuid[15 - i];
+
+		seq_printf(f, "%pUb\n", val);
+	}
+	hci_dev_unlock(hdev);
+
+       return 0;
+}
+
+static int uuids_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, uuids_show, inode->i_private);
+}
+
+static const struct file_operations uuids_fops = {
+	.open		= uuids_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int conn_info_min_age_set(void *data, u64 val)
+{
+	struct hci_dev *hdev = data;
+
+	if (val == 0 || val > hdev->conn_info_max_age)
+		return -EINVAL;
+
+	hci_dev_lock(hdev);
+	hdev->conn_info_min_age = val;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+static int conn_info_min_age_get(void *data, u64 *val)
+{
+	struct hci_dev *hdev = data;
+
+	hci_dev_lock(hdev);
+	*val = hdev->conn_info_min_age;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(conn_info_min_age_fops, conn_info_min_age_get,
+			conn_info_min_age_set, "%llu\n");
+
+static int conn_info_max_age_set(void *data, u64 val)
+{
+	struct hci_dev *hdev = data;
+
+	if (val == 0 || val < hdev->conn_info_min_age)
+		return -EINVAL;
+
+	hci_dev_lock(hdev);
+	hdev->conn_info_max_age = val;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+static int conn_info_max_age_get(void *data, u64 *val)
+{
+	struct hci_dev *hdev = data;
+
+	hci_dev_lock(hdev);
+	*val = hdev->conn_info_max_age;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(conn_info_max_age_fops, conn_info_max_age_get,
+			conn_info_max_age_set, "%llu\n");
+
 void hci_debugfs_create_common(struct hci_dev *hdev)
 {
+	debugfs_create_file("features", 0444, hdev->debugfs, hdev,
+			    &features_fops);
+	debugfs_create_u16("manufacturer", 0444, hdev->debugfs,
+			   &hdev->manufacturer);
+	debugfs_create_u8("hci_version", 0444, hdev->debugfs, &hdev->hci_ver);
+	debugfs_create_u16("hci_revision", 0444, hdev->debugfs, &hdev->hci_rev);
+	debugfs_create_file("device_list", 0444, hdev->debugfs, hdev,
+			    &device_list_fops);
+	debugfs_create_file("blacklist", 0444, hdev->debugfs, hdev,
+			    &blacklist_fops);
+	debugfs_create_file("uuids", 0444, hdev->debugfs, hdev, &uuids_fops);
+
+	debugfs_create_file("conn_info_min_age", 0644, hdev->debugfs, hdev,
+			    &conn_info_min_age_fops);
+	debugfs_create_file("conn_info_max_age", 0644, hdev->debugfs, hdev,
+			    &conn_info_max_age_fops);
 }
 
 void hci_debugfs_create_bredr(struct hci_dev *hdev)
-- 
cgit v1.2.3


From 71c3b60ec6d288f2551b517186b025da4cbb18b5 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 20 Dec 2014 16:05:15 +0100
Subject: Bluetooth: Move BR/EDR debugfs file creation into hci_debugfs.c

This patch moves the creation of the debugs files for BR/EDR controllers
into hci_debugfs.c file.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_core.c    | 350 +-------------------------------------------
 net/bluetooth/hci_debugfs.c | 349 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 350 insertions(+), 349 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index dde29f4bfaa9..d8976cb01b89 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -139,266 +139,6 @@ static const struct file_operations dut_mode_fops = {
 	.llseek		= default_llseek,
 };
 
-static int inquiry_cache_show(struct seq_file *f, void *p)
-{
-	struct hci_dev *hdev = f->private;
-	struct discovery_state *cache = &hdev->discovery;
-	struct inquiry_entry *e;
-
-	hci_dev_lock(hdev);
-
-	list_for_each_entry(e, &cache->all, all) {
-		struct inquiry_data *data = &e->data;
-		seq_printf(f, "%pMR %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %d %u\n",
-			   &data->bdaddr,
-			   data->pscan_rep_mode, data->pscan_period_mode,
-			   data->pscan_mode, data->dev_class[2],
-			   data->dev_class[1], data->dev_class[0],
-			   __le16_to_cpu(data->clock_offset),
-			   data->rssi, data->ssp_mode, e->timestamp);
-	}
-
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-static int inquiry_cache_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, inquiry_cache_show, inode->i_private);
-}
-
-static const struct file_operations inquiry_cache_fops = {
-	.open		= inquiry_cache_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int link_keys_show(struct seq_file *f, void *ptr)
-{
-	struct hci_dev *hdev = f->private;
-	struct link_key *key;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(key, &hdev->link_keys, list)
-		seq_printf(f, "%pMR %u %*phN %u\n", &key->bdaddr, key->type,
-			   HCI_LINK_KEY_SIZE, key->val, key->pin_len);
-	rcu_read_unlock();
-
-	return 0;
-}
-
-static int link_keys_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, link_keys_show, inode->i_private);
-}
-
-static const struct file_operations link_keys_fops = {
-	.open		= link_keys_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int dev_class_show(struct seq_file *f, void *ptr)
-{
-	struct hci_dev *hdev = f->private;
-
-	hci_dev_lock(hdev);
-	seq_printf(f, "0x%.2x%.2x%.2x\n", hdev->dev_class[2],
-		   hdev->dev_class[1], hdev->dev_class[0]);
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-static int dev_class_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, dev_class_show, inode->i_private);
-}
-
-static const struct file_operations dev_class_fops = {
-	.open		= dev_class_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int voice_setting_get(void *data, u64 *val)
-{
-	struct hci_dev *hdev = data;
-
-	hci_dev_lock(hdev);
-	*val = hdev->voice_setting;
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(voice_setting_fops, voice_setting_get,
-			NULL, "0x%4.4llx\n");
-
-static int auto_accept_delay_set(void *data, u64 val)
-{
-	struct hci_dev *hdev = data;
-
-	hci_dev_lock(hdev);
-	hdev->auto_accept_delay = val;
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-static int auto_accept_delay_get(void *data, u64 *val)
-{
-	struct hci_dev *hdev = data;
-
-	hci_dev_lock(hdev);
-	*val = hdev->auto_accept_delay;
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(auto_accept_delay_fops, auto_accept_delay_get,
-			auto_accept_delay_set, "%llu\n");
-
-static ssize_t force_sc_support_read(struct file *file, char __user *user_buf,
-				     size_t count, loff_t *ppos)
-{
-	struct hci_dev *hdev = file->private_data;
-	char buf[3];
-
-	buf[0] = test_bit(HCI_FORCE_SC, &hdev->dbg_flags) ? 'Y': 'N';
-	buf[1] = '\n';
-	buf[2] = '\0';
-	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
-}
-
-static ssize_t force_sc_support_write(struct file *file,
-				      const char __user *user_buf,
-				      size_t count, loff_t *ppos)
-{
-	struct hci_dev *hdev = file->private_data;
-	char buf[32];
-	size_t buf_size = min(count, (sizeof(buf)-1));
-	bool enable;
-
-	if (test_bit(HCI_UP, &hdev->flags))
-		return -EBUSY;
-
-	if (copy_from_user(buf, user_buf, buf_size))
-		return -EFAULT;
-
-	buf[buf_size] = '\0';
-	if (strtobool(buf, &enable))
-		return -EINVAL;
-
-	if (enable == test_bit(HCI_FORCE_SC, &hdev->dbg_flags))
-		return -EALREADY;
-
-	change_bit(HCI_FORCE_SC, &hdev->dbg_flags);
-
-	return count;
-}
-
-static const struct file_operations force_sc_support_fops = {
-	.open		= simple_open,
-	.read		= force_sc_support_read,
-	.write		= force_sc_support_write,
-	.llseek		= default_llseek,
-};
-
-static ssize_t force_lesc_support_read(struct file *file, char __user *user_buf,
-				       size_t count, loff_t *ppos)
-{
-	struct hci_dev *hdev = file->private_data;
-	char buf[3];
-
-	buf[0] = test_bit(HCI_FORCE_LESC, &hdev->dbg_flags) ? 'Y': 'N';
-	buf[1] = '\n';
-	buf[2] = '\0';
-	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
-}
-
-static ssize_t force_lesc_support_write(struct file *file,
-					const char __user *user_buf,
-					size_t count, loff_t *ppos)
-{
-	struct hci_dev *hdev = file->private_data;
-	char buf[32];
-	size_t buf_size = min(count, (sizeof(buf)-1));
-	bool enable;
-
-	if (copy_from_user(buf, user_buf, buf_size))
-		return -EFAULT;
-
-	buf[buf_size] = '\0';
-	if (strtobool(buf, &enable))
-		return -EINVAL;
-
-	if (enable == test_bit(HCI_FORCE_LESC, &hdev->dbg_flags))
-		return -EALREADY;
-
-	change_bit(HCI_FORCE_LESC, &hdev->dbg_flags);
-
-	return count;
-}
-
-static const struct file_operations force_lesc_support_fops = {
-	.open		= simple_open,
-	.read		= force_lesc_support_read,
-	.write		= force_lesc_support_write,
-	.llseek		= default_llseek,
-};
-
-static ssize_t sc_only_mode_read(struct file *file, char __user *user_buf,
-				 size_t count, loff_t *ppos)
-{
-	struct hci_dev *hdev = file->private_data;
-	char buf[3];
-
-	buf[0] = test_bit(HCI_SC_ONLY, &hdev->dev_flags) ? 'Y': 'N';
-	buf[1] = '\n';
-	buf[2] = '\0';
-	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
-}
-
-static const struct file_operations sc_only_mode_fops = {
-	.open		= simple_open,
-	.read		= sc_only_mode_read,
-	.llseek		= default_llseek,
-};
-
-static int idle_timeout_set(void *data, u64 val)
-{
-	struct hci_dev *hdev = data;
-
-	if (val != 0 && (val < 500 || val > 3600000))
-		return -EINVAL;
-
-	hci_dev_lock(hdev);
-	hdev->idle_timeout = val;
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-static int idle_timeout_get(void *data, u64 *val)
-{
-	struct hci_dev *hdev = data;
-
-	hci_dev_lock(hdev);
-	*val = hdev->idle_timeout;
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(idle_timeout_fops, idle_timeout_get,
-			idle_timeout_set, "%llu\n");
-
 static int rpa_timeout_set(void *data, u64 val)
 {
 	struct hci_dev *hdev = data;
@@ -430,62 +170,6 @@ static int rpa_timeout_get(void *data, u64 *val)
 DEFINE_SIMPLE_ATTRIBUTE(rpa_timeout_fops, rpa_timeout_get,
 			rpa_timeout_set, "%llu\n");
 
-static int sniff_min_interval_set(void *data, u64 val)
-{
-	struct hci_dev *hdev = data;
-
-	if (val == 0 || val % 2 || val > hdev->sniff_max_interval)
-		return -EINVAL;
-
-	hci_dev_lock(hdev);
-	hdev->sniff_min_interval = val;
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-static int sniff_min_interval_get(void *data, u64 *val)
-{
-	struct hci_dev *hdev = data;
-
-	hci_dev_lock(hdev);
-	*val = hdev->sniff_min_interval;
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(sniff_min_interval_fops, sniff_min_interval_get,
-			sniff_min_interval_set, "%llu\n");
-
-static int sniff_max_interval_set(void *data, u64 val)
-{
-	struct hci_dev *hdev = data;
-
-	if (val == 0 || val % 2 || val < hdev->sniff_min_interval)
-		return -EINVAL;
-
-	hci_dev_lock(hdev);
-	hdev->sniff_max_interval = val;
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-static int sniff_max_interval_get(void *data, u64 *val)
-{
-	struct hci_dev *hdev = data;
-
-	hci_dev_lock(hdev);
-	*val = hdev->sniff_max_interval;
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(sniff_max_interval_fops, sniff_max_interval_get,
-			sniff_max_interval_set, "%llu\n");
-
 static int identity_show(struct seq_file *f, void *p)
 {
 	struct hci_dev *hdev = f->private;
@@ -1667,40 +1351,8 @@ static int __hci_init(struct hci_dev *hdev)
 
 	hci_debugfs_create_common(hdev);
 
-	if (lmp_bredr_capable(hdev)) {
-		debugfs_create_file("inquiry_cache", 0444, hdev->debugfs,
-				    hdev, &inquiry_cache_fops);
-		debugfs_create_file("link_keys", 0400, hdev->debugfs,
-				    hdev, &link_keys_fops);
-		debugfs_create_file("dev_class", 0444, hdev->debugfs,
-				    hdev, &dev_class_fops);
-		debugfs_create_file("voice_setting", 0444, hdev->debugfs,
-				    hdev, &voice_setting_fops);
-
+	if (lmp_bredr_capable(hdev))
 		hci_debugfs_create_bredr(hdev);
-	}
-
-	if (lmp_ssp_capable(hdev)) {
-		debugfs_create_file("auto_accept_delay", 0644, hdev->debugfs,
-				    hdev, &auto_accept_delay_fops);
-		debugfs_create_file("force_sc_support", 0644, hdev->debugfs,
-				    hdev, &force_sc_support_fops);
-		debugfs_create_file("sc_only_mode", 0444, hdev->debugfs,
-				    hdev, &sc_only_mode_fops);
-		if (lmp_le_capable(hdev))
-			debugfs_create_file("force_lesc_support", 0644,
-					    hdev->debugfs, hdev,
-					    &force_lesc_support_fops);
-	}
-
-	if (lmp_sniff_capable(hdev)) {
-		debugfs_create_file("idle_timeout", 0644, hdev->debugfs,
-				    hdev, &idle_timeout_fops);
-		debugfs_create_file("sniff_min_interval", 0644, hdev->debugfs,
-				    hdev, &sniff_min_interval_fops);
-		debugfs_create_file("sniff_max_interval", 0644, hdev->debugfs,
-				    hdev, &sniff_max_interval_fops);
-	}
 
 	if (lmp_le_capable(hdev)) {
 		debugfs_create_file("identity", 0400, hdev->debugfs,
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index 4d06beef8c53..435f091301cd 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -232,8 +232,357 @@ void hci_debugfs_create_common(struct hci_dev *hdev)
 			    &conn_info_max_age_fops);
 }
 
+static int inquiry_cache_show(struct seq_file *f, void *p)
+{
+	struct hci_dev *hdev = f->private;
+	struct discovery_state *cache = &hdev->discovery;
+	struct inquiry_entry *e;
+
+	hci_dev_lock(hdev);
+
+	list_for_each_entry(e, &cache->all, all) {
+		struct inquiry_data *data = &e->data;
+		seq_printf(f, "%pMR %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %d %u\n",
+			   &data->bdaddr,
+			   data->pscan_rep_mode, data->pscan_period_mode,
+			   data->pscan_mode, data->dev_class[2],
+			   data->dev_class[1], data->dev_class[0],
+			   __le16_to_cpu(data->clock_offset),
+			   data->rssi, data->ssp_mode, e->timestamp);
+	}
+
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+static int inquiry_cache_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, inquiry_cache_show, inode->i_private);
+}
+
+static const struct file_operations inquiry_cache_fops = {
+	.open		= inquiry_cache_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int link_keys_show(struct seq_file *f, void *ptr)
+{
+	struct hci_dev *hdev = f->private;
+	struct link_key *key;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(key, &hdev->link_keys, list)
+		seq_printf(f, "%pMR %u %*phN %u\n", &key->bdaddr, key->type,
+			   HCI_LINK_KEY_SIZE, key->val, key->pin_len);
+	rcu_read_unlock();
+
+	return 0;
+}
+
+static int link_keys_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, link_keys_show, inode->i_private);
+}
+
+static const struct file_operations link_keys_fops = {
+	.open		= link_keys_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int dev_class_show(struct seq_file *f, void *ptr)
+{
+	struct hci_dev *hdev = f->private;
+
+	hci_dev_lock(hdev);
+	seq_printf(f, "0x%.2x%.2x%.2x\n", hdev->dev_class[2],
+		   hdev->dev_class[1], hdev->dev_class[0]);
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+static int dev_class_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, dev_class_show, inode->i_private);
+}
+
+static const struct file_operations dev_class_fops = {
+	.open		= dev_class_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int voice_setting_get(void *data, u64 *val)
+{
+	struct hci_dev *hdev = data;
+
+	hci_dev_lock(hdev);
+	*val = hdev->voice_setting;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(voice_setting_fops, voice_setting_get,
+			NULL, "0x%4.4llx\n");
+
+static int auto_accept_delay_set(void *data, u64 val)
+{
+	struct hci_dev *hdev = data;
+
+	hci_dev_lock(hdev);
+	hdev->auto_accept_delay = val;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+static int auto_accept_delay_get(void *data, u64 *val)
+{
+	struct hci_dev *hdev = data;
+
+	hci_dev_lock(hdev);
+	*val = hdev->auto_accept_delay;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(auto_accept_delay_fops, auto_accept_delay_get,
+			auto_accept_delay_set, "%llu\n");
+
+static ssize_t sc_only_mode_read(struct file *file, char __user *user_buf,
+				 size_t count, loff_t *ppos)
+{
+	struct hci_dev *hdev = file->private_data;
+	char buf[3];
+
+	buf[0] = test_bit(HCI_SC_ONLY, &hdev->dev_flags) ? 'Y': 'N';
+	buf[1] = '\n';
+	buf[2] = '\0';
+	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
+}
+
+static const struct file_operations sc_only_mode_fops = {
+	.open		= simple_open,
+	.read		= sc_only_mode_read,
+	.llseek		= default_llseek,
+};
+
+static ssize_t force_sc_support_read(struct file *file, char __user *user_buf,
+				     size_t count, loff_t *ppos)
+{
+	struct hci_dev *hdev = file->private_data;
+	char buf[3];
+
+	buf[0] = test_bit(HCI_FORCE_SC, &hdev->dbg_flags) ? 'Y': 'N';
+	buf[1] = '\n';
+	buf[2] = '\0';
+	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
+}
+
+static ssize_t force_sc_support_write(struct file *file,
+				      const char __user *user_buf,
+				      size_t count, loff_t *ppos)
+{
+	struct hci_dev *hdev = file->private_data;
+	char buf[32];
+	size_t buf_size = min(count, (sizeof(buf)-1));
+	bool enable;
+
+	if (test_bit(HCI_UP, &hdev->flags))
+		return -EBUSY;
+
+	if (copy_from_user(buf, user_buf, buf_size))
+		return -EFAULT;
+
+	buf[buf_size] = '\0';
+	if (strtobool(buf, &enable))
+		return -EINVAL;
+
+	if (enable == test_bit(HCI_FORCE_SC, &hdev->dbg_flags))
+		return -EALREADY;
+
+	change_bit(HCI_FORCE_SC, &hdev->dbg_flags);
+
+	return count;
+}
+
+static const struct file_operations force_sc_support_fops = {
+	.open		= simple_open,
+	.read		= force_sc_support_read,
+	.write		= force_sc_support_write,
+	.llseek		= default_llseek,
+};
+
+static ssize_t force_lesc_support_read(struct file *file,
+				       char __user *user_buf,
+				       size_t count, loff_t *ppos)
+{
+	struct hci_dev *hdev = file->private_data;
+	char buf[3];
+
+	buf[0] = test_bit(HCI_FORCE_LESC, &hdev->dbg_flags) ? 'Y': 'N';
+	buf[1] = '\n';
+	buf[2] = '\0';
+	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
+}
+
+static ssize_t force_lesc_support_write(struct file *file,
+					const char __user *user_buf,
+					size_t count, loff_t *ppos)
+{
+	struct hci_dev *hdev = file->private_data;
+	char buf[32];
+	size_t buf_size = min(count, (sizeof(buf)-1));
+	bool enable;
+
+	if (copy_from_user(buf, user_buf, buf_size))
+		return -EFAULT;
+
+	buf[buf_size] = '\0';
+	if (strtobool(buf, &enable))
+		return -EINVAL;
+
+	if (enable == test_bit(HCI_FORCE_LESC, &hdev->dbg_flags))
+		return -EALREADY;
+
+	change_bit(HCI_FORCE_LESC, &hdev->dbg_flags);
+
+	return count;
+}
+
+static const struct file_operations force_lesc_support_fops = {
+	.open		= simple_open,
+	.read		= force_lesc_support_read,
+	.write		= force_lesc_support_write,
+	.llseek		= default_llseek,
+};
+
+static int idle_timeout_set(void *data, u64 val)
+{
+	struct hci_dev *hdev = data;
+
+	if (val != 0 && (val < 500 || val > 3600000))
+		return -EINVAL;
+
+	hci_dev_lock(hdev);
+	hdev->idle_timeout = val;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+static int idle_timeout_get(void *data, u64 *val)
+{
+	struct hci_dev *hdev = data;
+
+	hci_dev_lock(hdev);
+	*val = hdev->idle_timeout;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(idle_timeout_fops, idle_timeout_get,
+			idle_timeout_set, "%llu\n");
+
+static int sniff_min_interval_set(void *data, u64 val)
+{
+	struct hci_dev *hdev = data;
+
+	if (val == 0 || val % 2 || val > hdev->sniff_max_interval)
+		return -EINVAL;
+
+	hci_dev_lock(hdev);
+	hdev->sniff_min_interval = val;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+static int sniff_min_interval_get(void *data, u64 *val)
+{
+	struct hci_dev *hdev = data;
+
+	hci_dev_lock(hdev);
+	*val = hdev->sniff_min_interval;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(sniff_min_interval_fops, sniff_min_interval_get,
+			sniff_min_interval_set, "%llu\n");
+
+static int sniff_max_interval_set(void *data, u64 val)
+{
+	struct hci_dev *hdev = data;
+
+	if (val == 0 || val % 2 || val < hdev->sniff_min_interval)
+		return -EINVAL;
+
+	hci_dev_lock(hdev);
+	hdev->sniff_max_interval = val;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+static int sniff_max_interval_get(void *data, u64 *val)
+{
+	struct hci_dev *hdev = data;
+
+	hci_dev_lock(hdev);
+	*val = hdev->sniff_max_interval;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(sniff_max_interval_fops, sniff_max_interval_get,
+			sniff_max_interval_set, "%llu\n");
+
 void hci_debugfs_create_bredr(struct hci_dev *hdev)
 {
+	debugfs_create_file("inquiry_cache", 0444, hdev->debugfs, hdev,
+			    &inquiry_cache_fops);
+	debugfs_create_file("link_keys", 0400, hdev->debugfs, hdev,
+			    &link_keys_fops);
+	debugfs_create_file("dev_class", 0444, hdev->debugfs, hdev,
+			    &dev_class_fops);
+	debugfs_create_file("voice_setting", 0444, hdev->debugfs, hdev,
+			    &voice_setting_fops);
+
+	if (lmp_ssp_capable(hdev)) {
+		debugfs_create_file("auto_accept_delay", 0644, hdev->debugfs,
+				    hdev, &auto_accept_delay_fops);
+		debugfs_create_file("sc_only_mode", 0444, hdev->debugfs,
+				    hdev, &sc_only_mode_fops);
+
+		debugfs_create_file("force_sc_support", 0644, hdev->debugfs,
+				    hdev, &force_sc_support_fops);
+
+		if (lmp_le_capable(hdev))
+			debugfs_create_file("force_lesc_support", 0644,
+					    hdev->debugfs, hdev,
+					    &force_lesc_support_fops);
+	}
+
+	if (lmp_sniff_capable(hdev)) {
+		debugfs_create_file("idle_timeout", 0644, hdev->debugfs,
+				    hdev, &idle_timeout_fops);
+		debugfs_create_file("sniff_min_interval", 0644, hdev->debugfs,
+				    hdev, &sniff_min_interval_fops);
+		debugfs_create_file("sniff_max_interval", 0644, hdev->debugfs,
+				    hdev, &sniff_max_interval_fops);
+	}
 }
 
 void hci_debugfs_create_le(struct hci_dev *hdev)
-- 
cgit v1.2.3


From 3a5c82b78fd28ef39c5d07e72c78a569a6ea658a Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 20 Dec 2014 16:05:16 +0100
Subject: Bluetooth: Move LE debugfs file creation into hci_debugfs.c

This patch moves the creation of the debugs files for LE controllers
into hci_debugfs.c file.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_core.c    | 478 --------------------------------------------
 net/bluetooth/hci_debugfs.c | 474 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 474 insertions(+), 478 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index d8976cb01b89..2db071b2f0f4 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -139,438 +139,6 @@ static const struct file_operations dut_mode_fops = {
 	.llseek		= default_llseek,
 };
 
-static int rpa_timeout_set(void *data, u64 val)
-{
-	struct hci_dev *hdev = data;
-
-	/* Require the RPA timeout to be at least 30 seconds and at most
-	 * 24 hours.
-	 */
-	if (val < 30 || val > (60 * 60 * 24))
-		return -EINVAL;
-
-	hci_dev_lock(hdev);
-	hdev->rpa_timeout = val;
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-static int rpa_timeout_get(void *data, u64 *val)
-{
-	struct hci_dev *hdev = data;
-
-	hci_dev_lock(hdev);
-	*val = hdev->rpa_timeout;
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(rpa_timeout_fops, rpa_timeout_get,
-			rpa_timeout_set, "%llu\n");
-
-static int identity_show(struct seq_file *f, void *p)
-{
-	struct hci_dev *hdev = f->private;
-	bdaddr_t addr;
-	u8 addr_type;
-
-	hci_dev_lock(hdev);
-
-	hci_copy_identity_address(hdev, &addr, &addr_type);
-
-	seq_printf(f, "%pMR (type %u) %*phN %pMR\n", &addr, addr_type,
-		   16, hdev->irk, &hdev->rpa);
-
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-static int identity_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, identity_show, inode->i_private);
-}
-
-static const struct file_operations identity_fops = {
-	.open		= identity_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int random_address_show(struct seq_file *f, void *p)
-{
-	struct hci_dev *hdev = f->private;
-
-	hci_dev_lock(hdev);
-	seq_printf(f, "%pMR\n", &hdev->random_addr);
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-static int random_address_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, random_address_show, inode->i_private);
-}
-
-static const struct file_operations random_address_fops = {
-	.open		= random_address_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int static_address_show(struct seq_file *f, void *p)
-{
-	struct hci_dev *hdev = f->private;
-
-	hci_dev_lock(hdev);
-	seq_printf(f, "%pMR\n", &hdev->static_addr);
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-static int static_address_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, static_address_show, inode->i_private);
-}
-
-static const struct file_operations static_address_fops = {
-	.open		= static_address_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static ssize_t force_static_address_read(struct file *file,
-					 char __user *user_buf,
-					 size_t count, loff_t *ppos)
-{
-	struct hci_dev *hdev = file->private_data;
-	char buf[3];
-
-	buf[0] = test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) ? 'Y': 'N';
-	buf[1] = '\n';
-	buf[2] = '\0';
-	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
-}
-
-static ssize_t force_static_address_write(struct file *file,
-					  const char __user *user_buf,
-					  size_t count, loff_t *ppos)
-{
-	struct hci_dev *hdev = file->private_data;
-	char buf[32];
-	size_t buf_size = min(count, (sizeof(buf)-1));
-	bool enable;
-
-	if (test_bit(HCI_UP, &hdev->flags))
-		return -EBUSY;
-
-	if (copy_from_user(buf, user_buf, buf_size))
-		return -EFAULT;
-
-	buf[buf_size] = '\0';
-	if (strtobool(buf, &enable))
-		return -EINVAL;
-
-	if (enable == test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags))
-		return -EALREADY;
-
-	change_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags);
-
-	return count;
-}
-
-static const struct file_operations force_static_address_fops = {
-	.open		= simple_open,
-	.read		= force_static_address_read,
-	.write		= force_static_address_write,
-	.llseek		= default_llseek,
-};
-
-static int white_list_show(struct seq_file *f, void *ptr)
-{
-	struct hci_dev *hdev = f->private;
-	struct bdaddr_list *b;
-
-	hci_dev_lock(hdev);
-	list_for_each_entry(b, &hdev->le_white_list, list)
-		seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type);
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-static int white_list_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, white_list_show, inode->i_private);
-}
-
-static const struct file_operations white_list_fops = {
-	.open		= white_list_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int identity_resolving_keys_show(struct seq_file *f, void *ptr)
-{
-	struct hci_dev *hdev = f->private;
-	struct smp_irk *irk;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(irk, &hdev->identity_resolving_keys, list) {
-		seq_printf(f, "%pMR (type %u) %*phN %pMR\n",
-			   &irk->bdaddr, irk->addr_type,
-			   16, irk->val, &irk->rpa);
-	}
-	rcu_read_unlock();
-
-	return 0;
-}
-
-static int identity_resolving_keys_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, identity_resolving_keys_show,
-			   inode->i_private);
-}
-
-static const struct file_operations identity_resolving_keys_fops = {
-	.open		= identity_resolving_keys_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int long_term_keys_show(struct seq_file *f, void *ptr)
-{
-	struct hci_dev *hdev = f->private;
-	struct smp_ltk *ltk;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(ltk, &hdev->long_term_keys, list)
-		seq_printf(f, "%pMR (type %u) %u 0x%02x %u %.4x %.16llx %*phN\n",
-			   &ltk->bdaddr, ltk->bdaddr_type, ltk->authenticated,
-			   ltk->type, ltk->enc_size, __le16_to_cpu(ltk->ediv),
-			   __le64_to_cpu(ltk->rand), 16, ltk->val);
-	rcu_read_unlock();
-
-	return 0;
-}
-
-static int long_term_keys_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, long_term_keys_show, inode->i_private);
-}
-
-static const struct file_operations long_term_keys_fops = {
-	.open		= long_term_keys_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int conn_min_interval_set(void *data, u64 val)
-{
-	struct hci_dev *hdev = data;
-
-	if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval)
-		return -EINVAL;
-
-	hci_dev_lock(hdev);
-	hdev->le_conn_min_interval = val;
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-static int conn_min_interval_get(void *data, u64 *val)
-{
-	struct hci_dev *hdev = data;
-
-	hci_dev_lock(hdev);
-	*val = hdev->le_conn_min_interval;
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(conn_min_interval_fops, conn_min_interval_get,
-			conn_min_interval_set, "%llu\n");
-
-static int conn_max_interval_set(void *data, u64 val)
-{
-	struct hci_dev *hdev = data;
-
-	if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval)
-		return -EINVAL;
-
-	hci_dev_lock(hdev);
-	hdev->le_conn_max_interval = val;
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-static int conn_max_interval_get(void *data, u64 *val)
-{
-	struct hci_dev *hdev = data;
-
-	hci_dev_lock(hdev);
-	*val = hdev->le_conn_max_interval;
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(conn_max_interval_fops, conn_max_interval_get,
-			conn_max_interval_set, "%llu\n");
-
-static int conn_latency_set(void *data, u64 val)
-{
-	struct hci_dev *hdev = data;
-
-	if (val > 0x01f3)
-		return -EINVAL;
-
-	hci_dev_lock(hdev);
-	hdev->le_conn_latency = val;
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-static int conn_latency_get(void *data, u64 *val)
-{
-	struct hci_dev *hdev = data;
-
-	hci_dev_lock(hdev);
-	*val = hdev->le_conn_latency;
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(conn_latency_fops, conn_latency_get,
-			conn_latency_set, "%llu\n");
-
-static int supervision_timeout_set(void *data, u64 val)
-{
-	struct hci_dev *hdev = data;
-
-	if (val < 0x000a || val > 0x0c80)
-		return -EINVAL;
-
-	hci_dev_lock(hdev);
-	hdev->le_supv_timeout = val;
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-static int supervision_timeout_get(void *data, u64 *val)
-{
-	struct hci_dev *hdev = data;
-
-	hci_dev_lock(hdev);
-	*val = hdev->le_supv_timeout;
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(supervision_timeout_fops, supervision_timeout_get,
-			supervision_timeout_set, "%llu\n");
-
-static int adv_channel_map_set(void *data, u64 val)
-{
-	struct hci_dev *hdev = data;
-
-	if (val < 0x01 || val > 0x07)
-		return -EINVAL;
-
-	hci_dev_lock(hdev);
-	hdev->le_adv_channel_map = val;
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-static int adv_channel_map_get(void *data, u64 *val)
-{
-	struct hci_dev *hdev = data;
-
-	hci_dev_lock(hdev);
-	*val = hdev->le_adv_channel_map;
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(adv_channel_map_fops, adv_channel_map_get,
-			adv_channel_map_set, "%llu\n");
-
-static int adv_min_interval_set(void *data, u64 val)
-{
-	struct hci_dev *hdev = data;
-
-	if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval)
-		return -EINVAL;
-
-	hci_dev_lock(hdev);
-	hdev->le_adv_min_interval = val;
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-static int adv_min_interval_get(void *data, u64 *val)
-{
-	struct hci_dev *hdev = data;
-
-	hci_dev_lock(hdev);
-	*val = hdev->le_adv_min_interval;
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(adv_min_interval_fops, adv_min_interval_get,
-			adv_min_interval_set, "%llu\n");
-
-static int adv_max_interval_set(void *data, u64 val)
-{
-	struct hci_dev *hdev = data;
-
-	if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval)
-		return -EINVAL;
-
-	hci_dev_lock(hdev);
-	hdev->le_adv_max_interval = val;
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-static int adv_max_interval_get(void *data, u64 *val)
-{
-	struct hci_dev *hdev = data;
-
-	hci_dev_lock(hdev);
-	*val = hdev->le_adv_max_interval;
-	hci_dev_unlock(hdev);
-
-	return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(adv_max_interval_fops, adv_max_interval_get,
-			adv_max_interval_set, "%llu\n");
-
 /* ---- HCI requests ---- */
 
 static void hci_req_sync_complete(struct hci_dev *hdev, u8 result)
@@ -1355,53 +923,7 @@ static int __hci_init(struct hci_dev *hdev)
 		hci_debugfs_create_bredr(hdev);
 
 	if (lmp_le_capable(hdev)) {
-		debugfs_create_file("identity", 0400, hdev->debugfs,
-				    hdev, &identity_fops);
-		debugfs_create_file("rpa_timeout", 0644, hdev->debugfs,
-				    hdev, &rpa_timeout_fops);
-		debugfs_create_file("random_address", 0444, hdev->debugfs,
-				    hdev, &random_address_fops);
-		debugfs_create_file("static_address", 0444, hdev->debugfs,
-				    hdev, &static_address_fops);
-
-		/* For controllers with a public address, provide a debug
-		 * option to force the usage of the configured static
-		 * address. By default the public address is used.
-		 */
-		if (bacmp(&hdev->bdaddr, BDADDR_ANY))
-			debugfs_create_file("force_static_address", 0644,
-					    hdev->debugfs, hdev,
-					    &force_static_address_fops);
-
-		debugfs_create_u8("white_list_size", 0444, hdev->debugfs,
-				  &hdev->le_white_list_size);
-		debugfs_create_file("white_list", 0444, hdev->debugfs, hdev,
-				    &white_list_fops);
-		debugfs_create_file("identity_resolving_keys", 0400,
-				    hdev->debugfs, hdev,
-				    &identity_resolving_keys_fops);
-		debugfs_create_file("long_term_keys", 0400, hdev->debugfs,
-				    hdev, &long_term_keys_fops);
-		debugfs_create_file("conn_min_interval", 0644, hdev->debugfs,
-				    hdev, &conn_min_interval_fops);
-		debugfs_create_file("conn_max_interval", 0644, hdev->debugfs,
-				    hdev, &conn_max_interval_fops);
-		debugfs_create_file("conn_latency", 0644, hdev->debugfs,
-				    hdev, &conn_latency_fops);
-		debugfs_create_file("supervision_timeout", 0644, hdev->debugfs,
-				    hdev, &supervision_timeout_fops);
-		debugfs_create_file("adv_channel_map", 0644, hdev->debugfs,
-				    hdev, &adv_channel_map_fops);
-		debugfs_create_file("adv_min_interval", 0644, hdev->debugfs,
-				    hdev, &adv_min_interval_fops);
-		debugfs_create_file("adv_max_interval", 0644, hdev->debugfs,
-				    hdev, &adv_max_interval_fops);
-		debugfs_create_u16("discov_interleaved_timeout", 0644,
-				   hdev->debugfs,
-				   &hdev->discov_interleaved_timeout);
-
 		hci_debugfs_create_le(hdev);
-
 		smp_register(hdev);
 	}
 
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index 435f091301cd..a7a0db03b0b8 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -585,6 +585,480 @@ void hci_debugfs_create_bredr(struct hci_dev *hdev)
 	}
 }
 
+static int identity_show(struct seq_file *f, void *p)
+{
+	struct hci_dev *hdev = f->private;
+	bdaddr_t addr;
+	u8 addr_type;
+
+	hci_dev_lock(hdev);
+
+	hci_copy_identity_address(hdev, &addr, &addr_type);
+
+	seq_printf(f, "%pMR (type %u) %*phN %pMR\n", &addr, addr_type,
+		   16, hdev->irk, &hdev->rpa);
+
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+static int identity_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, identity_show, inode->i_private);
+}
+
+static const struct file_operations identity_fops = {
+	.open		= identity_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int rpa_timeout_set(void *data, u64 val)
+{
+	struct hci_dev *hdev = data;
+
+	/* Require the RPA timeout to be at least 30 seconds and at most
+	 * 24 hours.
+	 */
+	if (val < 30 || val > (60 * 60 * 24))
+		return -EINVAL;
+
+	hci_dev_lock(hdev);
+	hdev->rpa_timeout = val;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+static int rpa_timeout_get(void *data, u64 *val)
+{
+	struct hci_dev *hdev = data;
+
+	hci_dev_lock(hdev);
+	*val = hdev->rpa_timeout;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(rpa_timeout_fops, rpa_timeout_get,
+			rpa_timeout_set, "%llu\n");
+
+static int random_address_show(struct seq_file *f, void *p)
+{
+	struct hci_dev *hdev = f->private;
+
+	hci_dev_lock(hdev);
+	seq_printf(f, "%pMR\n", &hdev->random_addr);
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+static int random_address_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, random_address_show, inode->i_private);
+}
+
+static const struct file_operations random_address_fops = {
+	.open		= random_address_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int static_address_show(struct seq_file *f, void *p)
+{
+	struct hci_dev *hdev = f->private;
+
+	hci_dev_lock(hdev);
+	seq_printf(f, "%pMR\n", &hdev->static_addr);
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+static int static_address_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, static_address_show, inode->i_private);
+}
+
+static const struct file_operations static_address_fops = {
+	.open		= static_address_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static ssize_t force_static_address_read(struct file *file,
+					 char __user *user_buf,
+					 size_t count, loff_t *ppos)
+{
+	struct hci_dev *hdev = file->private_data;
+	char buf[3];
+
+	buf[0] = test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) ? 'Y': 'N';
+	buf[1] = '\n';
+	buf[2] = '\0';
+	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
+}
+
+static ssize_t force_static_address_write(struct file *file,
+					  const char __user *user_buf,
+					  size_t count, loff_t *ppos)
+{
+	struct hci_dev *hdev = file->private_data;
+	char buf[32];
+	size_t buf_size = min(count, (sizeof(buf)-1));
+	bool enable;
+
+	if (test_bit(HCI_UP, &hdev->flags))
+		return -EBUSY;
+
+	if (copy_from_user(buf, user_buf, buf_size))
+		return -EFAULT;
+
+	buf[buf_size] = '\0';
+	if (strtobool(buf, &enable))
+		return -EINVAL;
+
+	if (enable == test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags))
+		return -EALREADY;
+
+	change_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags);
+
+	return count;
+}
+
+static const struct file_operations force_static_address_fops = {
+	.open		= simple_open,
+	.read		= force_static_address_read,
+	.write		= force_static_address_write,
+	.llseek		= default_llseek,
+};
+
+static int white_list_show(struct seq_file *f, void *ptr)
+{
+	struct hci_dev *hdev = f->private;
+	struct bdaddr_list *b;
+
+	hci_dev_lock(hdev);
+	list_for_each_entry(b, &hdev->le_white_list, list)
+		seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type);
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+static int white_list_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, white_list_show, inode->i_private);
+}
+
+static const struct file_operations white_list_fops = {
+	.open		= white_list_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int identity_resolving_keys_show(struct seq_file *f, void *ptr)
+{
+	struct hci_dev *hdev = f->private;
+	struct smp_irk *irk;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(irk, &hdev->identity_resolving_keys, list) {
+		seq_printf(f, "%pMR (type %u) %*phN %pMR\n",
+			   &irk->bdaddr, irk->addr_type,
+			   16, irk->val, &irk->rpa);
+	}
+	rcu_read_unlock();
+
+	return 0;
+}
+
+static int identity_resolving_keys_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, identity_resolving_keys_show,
+			   inode->i_private);
+}
+
+static const struct file_operations identity_resolving_keys_fops = {
+	.open		= identity_resolving_keys_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int long_term_keys_show(struct seq_file *f, void *ptr)
+{
+	struct hci_dev *hdev = f->private;
+	struct smp_ltk *ltk;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ltk, &hdev->long_term_keys, list)
+		seq_printf(f, "%pMR (type %u) %u 0x%02x %u %.4x %.16llx %*phN\n",
+			   &ltk->bdaddr, ltk->bdaddr_type, ltk->authenticated,
+			   ltk->type, ltk->enc_size, __le16_to_cpu(ltk->ediv),
+			   __le64_to_cpu(ltk->rand), 16, ltk->val);
+	rcu_read_unlock();
+
+	return 0;
+}
+
+static int long_term_keys_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, long_term_keys_show, inode->i_private);
+}
+
+static const struct file_operations long_term_keys_fops = {
+	.open		= long_term_keys_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int conn_min_interval_set(void *data, u64 val)
+{
+	struct hci_dev *hdev = data;
+
+	if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval)
+		return -EINVAL;
+
+	hci_dev_lock(hdev);
+	hdev->le_conn_min_interval = val;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+static int conn_min_interval_get(void *data, u64 *val)
+{
+	struct hci_dev *hdev = data;
+
+	hci_dev_lock(hdev);
+	*val = hdev->le_conn_min_interval;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(conn_min_interval_fops, conn_min_interval_get,
+			conn_min_interval_set, "%llu\n");
+
+static int conn_max_interval_set(void *data, u64 val)
+{
+	struct hci_dev *hdev = data;
+
+	if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval)
+		return -EINVAL;
+
+	hci_dev_lock(hdev);
+	hdev->le_conn_max_interval = val;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+static int conn_max_interval_get(void *data, u64 *val)
+{
+	struct hci_dev *hdev = data;
+
+	hci_dev_lock(hdev);
+	*val = hdev->le_conn_max_interval;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(conn_max_interval_fops, conn_max_interval_get,
+			conn_max_interval_set, "%llu\n");
+
+static int conn_latency_set(void *data, u64 val)
+{
+	struct hci_dev *hdev = data;
+
+	if (val > 0x01f3)
+		return -EINVAL;
+
+	hci_dev_lock(hdev);
+	hdev->le_conn_latency = val;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+static int conn_latency_get(void *data, u64 *val)
+{
+	struct hci_dev *hdev = data;
+
+	hci_dev_lock(hdev);
+	*val = hdev->le_conn_latency;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(conn_latency_fops, conn_latency_get,
+			conn_latency_set, "%llu\n");
+
+static int supervision_timeout_set(void *data, u64 val)
+{
+	struct hci_dev *hdev = data;
+
+	if (val < 0x000a || val > 0x0c80)
+		return -EINVAL;
+
+	hci_dev_lock(hdev);
+	hdev->le_supv_timeout = val;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+static int supervision_timeout_get(void *data, u64 *val)
+{
+	struct hci_dev *hdev = data;
+
+	hci_dev_lock(hdev);
+	*val = hdev->le_supv_timeout;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(supervision_timeout_fops, supervision_timeout_get,
+			supervision_timeout_set, "%llu\n");
+
+static int adv_channel_map_set(void *data, u64 val)
+{
+	struct hci_dev *hdev = data;
+
+	if (val < 0x01 || val > 0x07)
+		return -EINVAL;
+
+	hci_dev_lock(hdev);
+	hdev->le_adv_channel_map = val;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+static int adv_channel_map_get(void *data, u64 *val)
+{
+	struct hci_dev *hdev = data;
+
+	hci_dev_lock(hdev);
+	*val = hdev->le_adv_channel_map;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(adv_channel_map_fops, adv_channel_map_get,
+			adv_channel_map_set, "%llu\n");
+
+static int adv_min_interval_set(void *data, u64 val)
+{
+	struct hci_dev *hdev = data;
+
+	if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval)
+		return -EINVAL;
+
+	hci_dev_lock(hdev);
+	hdev->le_adv_min_interval = val;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+static int adv_min_interval_get(void *data, u64 *val)
+{
+	struct hci_dev *hdev = data;
+
+	hci_dev_lock(hdev);
+	*val = hdev->le_adv_min_interval;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(adv_min_interval_fops, adv_min_interval_get,
+			adv_min_interval_set, "%llu\n");
+
+static int adv_max_interval_set(void *data, u64 val)
+{
+	struct hci_dev *hdev = data;
+
+	if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval)
+		return -EINVAL;
+
+	hci_dev_lock(hdev);
+	hdev->le_adv_max_interval = val;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+static int adv_max_interval_get(void *data, u64 *val)
+{
+	struct hci_dev *hdev = data;
+
+	hci_dev_lock(hdev);
+	*val = hdev->le_adv_max_interval;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(adv_max_interval_fops, adv_max_interval_get,
+			adv_max_interval_set, "%llu\n");
+
 void hci_debugfs_create_le(struct hci_dev *hdev)
 {
+	debugfs_create_file("identity", 0400, hdev->debugfs, hdev,
+			    &identity_fops);
+	debugfs_create_file("rpa_timeout", 0644, hdev->debugfs, hdev,
+			    &rpa_timeout_fops);
+	debugfs_create_file("random_address", 0444, hdev->debugfs, hdev,
+			    &random_address_fops);
+	debugfs_create_file("static_address", 0444, hdev->debugfs, hdev,
+			    &static_address_fops);
+
+	/* For controllers with a public address, provide a debug
+	 * option to force the usage of the configured static
+	 * address. By default the public address is used.
+	 */
+	if (bacmp(&hdev->bdaddr, BDADDR_ANY))
+		debugfs_create_file("force_static_address", 0644,
+				    hdev->debugfs, hdev,
+				    &force_static_address_fops);
+
+	debugfs_create_u8("white_list_size", 0444, hdev->debugfs,
+			  &hdev->le_white_list_size);
+	debugfs_create_file("white_list", 0444, hdev->debugfs, hdev,
+			    &white_list_fops);
+	debugfs_create_file("identity_resolving_keys", 0400, hdev->debugfs,
+			    hdev, &identity_resolving_keys_fops);
+	debugfs_create_file("long_term_keys", 0400, hdev->debugfs, hdev,
+			    &long_term_keys_fops);
+	debugfs_create_file("conn_min_interval", 0644, hdev->debugfs, hdev,
+			    &conn_min_interval_fops);
+	debugfs_create_file("conn_max_interval", 0644, hdev->debugfs, hdev,
+			    &conn_max_interval_fops);
+	debugfs_create_file("conn_latency", 0644, hdev->debugfs, hdev,
+			    &conn_latency_fops);
+	debugfs_create_file("supervision_timeout", 0644, hdev->debugfs, hdev,
+			    &supervision_timeout_fops);
+	debugfs_create_file("adv_channel_map", 0644, hdev->debugfs, hdev,
+			    &adv_channel_map_fops);
+	debugfs_create_file("adv_min_interval", 0644, hdev->debugfs, hdev,
+			    &adv_min_interval_fops);
+	debugfs_create_file("adv_max_interval", 0644, hdev->debugfs, hdev,
+			    &adv_max_interval_fops);
+	debugfs_create_u16("discov_interleaved_timeout", 0644, hdev->debugfs,
+			   &hdev->discov_interleaved_timeout);
 }
-- 
cgit v1.2.3


From a9f6068e0072839594d246089204644bffd2c988 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 20 Dec 2014 16:28:39 +0100
Subject: Bluetooth: Enable basics for LE Data Length Extension feature

When the controller supports the new LE Data Length Extension feature
from Bluetooth 4.2 specification, enable the new events and read the
values for default and maxmimum data length supported by the controller.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_core.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 2db071b2f0f4..01e35ef6d201 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -805,6 +805,12 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt)
 						 * Parameter Request
 						 */
 
+		/* If the controller supports the Data Length Extension
+		 * feature, enable the corresponding event.
+		 */
+		if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT)
+			events[0] |= 0x40;	/* LE Data Length Change */
+
 		/* If the controller supports Extended Scanner Filter
 		 * Policies, enable the correspondig event.
 		 */
@@ -835,6 +841,14 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt)
 			hci_req_add(req, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL);
 		}
 
+		if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) {
+			/* Read LE Maximum Data Length */
+			hci_req_add(req, HCI_OP_LE_READ_MAX_DATA_LEN, 0, NULL);
+
+			/* Read LE Suggested Default Data Length */
+			hci_req_add(req, HCI_OP_LE_READ_DEF_DATA_LEN, 0, NULL);
+		}
+
 		hci_set_le_support(req);
 	}
 
-- 
cgit v1.2.3


From a8e1bfaa55cf8ac4e419a09bdda5bb45bcd8f985 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 20 Dec 2014 16:28:40 +0100
Subject: Bluetooth: Store default and maximum LE data length settings

When the controller supports the LE Data Length Extension feature, the
default and maximum data length are read and now stored.

For backwards compatibility all values are initialized to the data
length values from Bluetooth 4.1 and earlier specifications.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h |  6 ++++
 net/bluetooth/hci_core.c         |  6 ++++
 net/bluetooth/hci_event.c        | 61 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 73 insertions(+)

(limited to 'net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 79724c87ab00..f20f6bd668bd 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -220,6 +220,12 @@ struct hci_dev {
 	__u16		le_conn_max_interval;
 	__u16		le_conn_latency;
 	__u16		le_supv_timeout;
+	__u16		le_def_tx_len;
+	__u16		le_def_tx_time;
+	__u16		le_max_tx_len;
+	__u16		le_max_tx_time;
+	__u16		le_max_rx_len;
+	__u16		le_max_rx_time;
 	__u16		discov_interleaved_timeout;
 	__u16		conn_info_min_age;
 	__u16		conn_info_max_age;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 01e35ef6d201..47f0311d1006 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2896,6 +2896,12 @@ struct hci_dev *hci_alloc_dev(void)
 	hdev->le_conn_max_interval = 0x0038;
 	hdev->le_conn_latency = 0x0000;
 	hdev->le_supv_timeout = 0x002a;
+	hdev->le_def_tx_len = 0x001b;
+	hdev->le_def_tx_time = 0x0148;
+	hdev->le_max_tx_len = 0x001b;
+	hdev->le_max_tx_time = 0x0148;
+	hdev->le_max_rx_len = 0x001b;
+	hdev->le_max_rx_time = 0x0148;
 
 	hdev->rpa_timeout = HCI_DEFAULT_RPA_TIMEOUT;
 	hdev->discov_interleaved_timeout = DISCOV_INTERLEAVED_TIMEOUT;
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index a412eb1e1f61..a3055e90a5bb 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1280,6 +1280,55 @@ static void hci_cc_le_read_supported_states(struct hci_dev *hdev,
 	memcpy(hdev->le_states, rp->le_states, 8);
 }
 
+static void hci_cc_le_read_def_data_len(struct hci_dev *hdev,
+					struct sk_buff *skb)
+{
+	struct hci_rp_le_read_def_data_len *rp = (void *) skb->data;
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
+
+	if (rp->status)
+		return;
+
+	hdev->le_def_tx_len = le16_to_cpu(rp->tx_len);
+	hdev->le_def_tx_time = le16_to_cpu(rp->tx_time);
+}
+
+static void hci_cc_le_write_def_data_len(struct hci_dev *hdev,
+					 struct sk_buff *skb)
+{
+	struct hci_cp_le_write_def_data_len *sent;
+	__u8 status = *((__u8 *) skb->data);
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+	if (status)
+		return;
+
+	sent = hci_sent_cmd_data(hdev, HCI_OP_LE_WRITE_DEF_DATA_LEN);
+	if (!sent)
+		return;
+
+	hdev->le_def_tx_len = le16_to_cpu(sent->tx_len);
+	hdev->le_def_tx_time = le16_to_cpu(sent->tx_time);
+}
+
+static void hci_cc_le_read_max_data_len(struct hci_dev *hdev,
+					struct sk_buff *skb)
+{
+	struct hci_rp_le_read_max_data_len *rp = (void *) skb->data;
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
+
+	if (rp->status)
+		return;
+
+	hdev->le_max_tx_len = le16_to_cpu(rp->tx_len);
+	hdev->le_max_tx_time = le16_to_cpu(rp->tx_time);
+	hdev->le_max_rx_len = le16_to_cpu(rp->rx_len);
+	hdev->le_max_rx_time = le16_to_cpu(rp->rx_time);
+}
+
 static void hci_cc_write_le_host_supported(struct hci_dev *hdev,
 					   struct sk_buff *skb)
 {
@@ -2847,6 +2896,18 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_cc_le_read_supported_states(hdev, skb);
 		break;
 
+	case HCI_OP_LE_READ_DEF_DATA_LEN:
+		hci_cc_le_read_def_data_len(hdev, skb);
+		break;
+
+	case HCI_OP_LE_WRITE_DEF_DATA_LEN:
+		hci_cc_le_write_def_data_len(hdev, skb);
+		break;
+
+	case HCI_OP_LE_READ_MAX_DATA_LEN:
+		hci_cc_le_read_max_data_len(hdev, skb);
+		break;
+
 	case HCI_OP_WRITE_LE_HOST_SUPPORTED:
 		hci_cc_write_le_host_supported(hdev, skb);
 		break;
-- 
cgit v1.2.3


From 23b9ceb74f8e46bddd61a1e2afd9317221be74b7 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 20 Dec 2014 17:13:41 +0100
Subject: Bluetooth: Create debugfs directory for each connection handle

For every internal representation of a Bluetooth connection which is
identified by hci_conn, create a debugfs directory with the handle
number as directory name.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h |  1 +
 net/bluetooth/hci_conn.c         |  3 +++
 net/bluetooth/hci_debugfs.c      | 12 ++++++++++++
 net/bluetooth/hci_debugfs.h      |  1 +
 net/bluetooth/hci_event.c        |  5 +++++
 5 files changed, 22 insertions(+)

(limited to 'net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index f20f6bd668bd..3e7e5110f298 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -440,6 +440,7 @@ struct hci_conn {
 	struct delayed_work le_conn_timeout;
 
 	struct device	dev;
+	struct dentry	*debugfs;
 
 	struct hci_dev	*hdev;
 	void		*l2cap_data;
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 4405fb352c70..75240aaca101 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -25,6 +25,7 @@
 /* Bluetooth HCI connection handling. */
 
 #include <linux/export.h>
+#include <linux/debugfs.h>
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
@@ -547,6 +548,8 @@ int hci_conn_del(struct hci_conn *conn)
 
 	hci_conn_del_sysfs(conn);
 
+	debugfs_remove_recursive(conn->debugfs);
+
 	if (test_bit(HCI_CONN_PARAM_REMOVAL_PEND, &conn->flags))
 		hci_conn_params_del(conn->hdev, &conn->dst, conn->dst_type);
 
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index a7a0db03b0b8..ee33ce88d3d8 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -1062,3 +1062,15 @@ void hci_debugfs_create_le(struct hci_dev *hdev)
 	debugfs_create_u16("discov_interleaved_timeout", 0644, hdev->debugfs,
 			   &hdev->discov_interleaved_timeout);
 }
+
+void hci_debugfs_create_conn(struct hci_conn *conn)
+{
+	struct hci_dev *hdev = conn->hdev;
+	char name[6];
+
+	if (IS_ERR_OR_NULL(hdev->debugfs))
+		return;
+
+	snprintf(name, sizeof(name), "%u", conn->handle);
+	conn->debugfs = debugfs_create_dir(name, hdev->debugfs);
+}
diff --git a/net/bluetooth/hci_debugfs.h b/net/bluetooth/hci_debugfs.h
index f191100b50c5..fb68efe083c5 100644
--- a/net/bluetooth/hci_debugfs.h
+++ b/net/bluetooth/hci_debugfs.h
@@ -23,3 +23,4 @@
 void hci_debugfs_create_common(struct hci_dev *hdev);
 void hci_debugfs_create_bredr(struct hci_dev *hdev);
 void hci_debugfs_create_le(struct hci_dev *hdev);
+void hci_debugfs_create_conn(struct hci_conn *conn);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index a3055e90a5bb..eed44c643c0c 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -31,6 +31,7 @@
 #include <net/bluetooth/mgmt.h>
 
 #include "hci_request.h"
+#include "hci_debugfs.h"
 #include "a2mp.h"
 #include "amp.h"
 #include "smp.h"
@@ -2162,6 +2163,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		} else
 			conn->state = BT_CONNECTED;
 
+		hci_debugfs_create_conn(conn);
 		hci_conn_add_sysfs(conn);
 
 		if (test_bit(HCI_AUTH, &hdev->flags))
@@ -3638,6 +3640,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev,
 		conn->handle = __le16_to_cpu(ev->handle);
 		conn->state  = BT_CONNECTED;
 
+		hci_debugfs_create_conn(conn);
 		hci_conn_add_sysfs(conn);
 		break;
 
@@ -4178,6 +4181,7 @@ static void hci_phy_link_complete_evt(struct hci_dev *hdev,
 	hcon->disc_timeout = HCI_DISCONN_TIMEOUT;
 	hci_conn_drop(hcon);
 
+	hci_debugfs_create_conn(hcon);
 	hci_conn_add_sysfs(hcon);
 
 	amp_physical_cfm(bredr_hcon, hcon);
@@ -4384,6 +4388,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 	conn->le_conn_latency = le16_to_cpu(ev->latency);
 	conn->le_supv_timeout = le16_to_cpu(ev->supervision_timeout);
 
+	hci_debugfs_create_conn(conn);
 	hci_conn_add_sysfs(conn);
 
 	hci_proto_connect_cfm(conn, ev->status);
-- 
cgit v1.2.3


From 72e4a6bd02204eed0464d6139439d7e89b94266e Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 20 Dec 2014 18:00:41 +0100
Subject: Bluetooth: Remove duplicate constant for RFCOMM PSM

The RFCOMM_PSM constant is actually a duplicate. So remove it and
use the L2CAP_PSM_RFCOMM constant instead.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/rfcomm.h | 2 --
 net/bluetooth/rfcomm/core.c    | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h
index 578b83127af1..4190af53a46a 100644
--- a/include/net/bluetooth/rfcomm.h
+++ b/include/net/bluetooth/rfcomm.h
@@ -24,8 +24,6 @@
 #ifndef __RFCOMM_H
 #define __RFCOMM_H
 
-#define RFCOMM_PSM 3
-
 #define RFCOMM_CONN_TIMEOUT (HZ * 30)
 #define RFCOMM_DISC_TIMEOUT (HZ * 20)
 #define RFCOMM_AUTH_TIMEOUT (HZ * 25)
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 73f8c75abe6e..4fea24275b17 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -771,7 +771,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src,
 
 	bacpy(&addr.l2_bdaddr, dst);
 	addr.l2_family = AF_BLUETOOTH;
-	addr.l2_psm    = cpu_to_le16(RFCOMM_PSM);
+	addr.l2_psm    = cpu_to_le16(L2CAP_PSM_RFCOMM);
 	addr.l2_cid    = 0;
 	addr.l2_bdaddr_type = BDADDR_BREDR;
 	*err = kernel_connect(sock, (struct sockaddr *) &addr, sizeof(addr), O_NONBLOCK);
@@ -2038,7 +2038,7 @@ static int rfcomm_add_listener(bdaddr_t *ba)
 	/* Bind socket */
 	bacpy(&addr.l2_bdaddr, ba);
 	addr.l2_family = AF_BLUETOOTH;
-	addr.l2_psm    = cpu_to_le16(RFCOMM_PSM);
+	addr.l2_psm    = cpu_to_le16(L2CAP_PSM_RFCOMM);
 	addr.l2_cid    = 0;
 	addr.l2_bdaddr_type = BDADDR_BREDR;
 	err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr));
-- 
cgit v1.2.3


From 85b89af07d509531cbbb51adb9ec65f90f304473 Mon Sep 17 00:00:00 2001
From: Eliad Peller <eliad@wizery.com>
Date: Sun, 21 Dec 2014 15:25:28 +0200
Subject: mac80211: fix dot11MulticastTransmittedFrameCount tested address

dot11MulticastTransmittedFrameCount should be updated according
to the DA, which might be different from A1. Checking A1 results
in the counter being 0 in case of station, as to-DS data frames
use A1 for the BSSID.

This behaviour is defined in state machines, specifically in the
sta_tx_dcf_3.1d(10) description of 802.11-2012.

Signed-off-by: Eliad Peller <eliad@wizery.com>
[rewrite commit message]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/status.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index d64037c96729..7d4e9307164c 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -862,7 +862,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 	    (info->flags & IEEE80211_TX_STAT_NOACK_TRANSMITTED)) {
 		if (ieee80211_is_first_frag(hdr->seq_ctrl)) {
 			local->dot11TransmittedFrameCount++;
-			if (is_multicast_ether_addr(hdr->addr1))
+			if (is_multicast_ether_addr(ieee80211_get_DA(hdr)))
 				local->dot11MulticastTransmittedFrameCount++;
 			if (retry_count > 0)
 				local->dot11RetryCount++;
-- 
cgit v1.2.3


From 88eab472ec21f01d3e36ff926f8bd4f742687075 Mon Sep 17 00:00:00 2001
From: Marcelo Leitner <mleitner@redhat.com>
Date: Wed, 3 Dec 2014 17:30:19 -0200
Subject: netfilter: conntrack: adjust nf_conntrack_buckets default value

Manually bumping either nf_conntrack_buckets or nf_conntrack_max has
become a common task as our Linux servers tend to serve more and more
clients/applications, so let's adjust nf_conntrack_buckets this to a
more updated value.

Now for systems with more than 4GB of memory, nf_conntrack_buckets
becomes 65536 instead of 16384, resulting in nf_conntrack_max=256k
entries.

Signed-off-by: Marcelo Ricardo Leitner <mleitner@redhat.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 Documentation/networking/nf_conntrack-sysctl.txt |  3 ++-
 net/netfilter/nf_conntrack_core.c                | 11 ++++++++---
 2 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/Documentation/networking/nf_conntrack-sysctl.txt b/Documentation/networking/nf_conntrack-sysctl.txt
index 70da5086153d..f55599c62c9d 100644
--- a/Documentation/networking/nf_conntrack-sysctl.txt
+++ b/Documentation/networking/nf_conntrack-sysctl.txt
@@ -11,7 +11,8 @@ nf_conntrack_buckets - INTEGER (read-only)
 	Size of hash table. If not specified as parameter during module
 	loading, the default size is calculated by dividing total memory
 	by 16384 to determine the number of buckets but the hash table will
-	never have fewer than 32 or more than 16384 buckets.
+	never have fewer than 32 and limited to 16384 buckets. For systems
+	with more than 4GB of memory it will be 65536 buckets.
 
 nf_conntrack_checksum - BOOLEAN
 	0 - disabled
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index a11674806707..da58cd4f2cb7 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1624,13 +1624,18 @@ int nf_conntrack_init_start(void)
 	for (i = 0; i < CONNTRACK_LOCKS; i++)
 		spin_lock_init(&nf_conntrack_locks[i]);
 
-	/* Idea from tcp.c: use 1/16384 of memory.  On i386: 32MB
-	 * machine has 512 buckets. >= 1GB machines have 16384 buckets. */
 	if (!nf_conntrack_htable_size) {
+		/* Idea from tcp.c: use 1/16384 of memory.
+		 * On i386: 32MB machine has 512 buckets.
+		 * >= 1GB machines have 16384 buckets.
+		 * >= 4GB machines have 65536 buckets.
+		 */
 		nf_conntrack_htable_size
 			= (((totalram_pages << PAGE_SHIFT) / 16384)
 			   / sizeof(struct hlist_head));
-		if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
+		if (totalram_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE)))
+			nf_conntrack_htable_size = 65536;
+		else if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
 			nf_conntrack_htable_size = 16384;
 		if (nf_conntrack_htable_size < 32)
 			nf_conntrack_htable_size = 32;
-- 
cgit v1.2.3


From 372e28661a5c1cda34598314e2e341a7529d6219 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 16 Dec 2014 12:17:13 -0800
Subject: netfilter: xt_osf: Use continue to reduce indentation

Invert logic in test to use continue.

This routine already uses continue, use it a bit more to
minimize > 80 column long lines and unnecessary indentation.

No change in compiled object file.

Other miscellanea:

o Remove trailing whitespace
o Realign arguments to multiline statement

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_osf.c | 169 +++++++++++++++++++++++++------------------------
 1 file changed, 85 insertions(+), 84 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index c529161cdbf8..0778855ea5e7 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -225,6 +225,8 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(kf, &xt_osf_fingers[df], finger_entry) {
+		int foptsize, optnum;
+
 		f = &kf->finger;
 
 		if (!(info->flags & XT_OSF_LOG) && strcmp(info->genre, f->genre))
@@ -233,110 +235,109 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
 		optp = _optp;
 		fmatch = FMATCH_WRONG;
 
-		if (totlen == f->ss && xt_osf_ttl(skb, info, f->ttl)) {
-			int foptsize, optnum;
+		if (totlen != f->ss || !xt_osf_ttl(skb, info, f->ttl))
+			continue;
 
-			/*
-			 * Should not happen if userspace parser was written correctly.
-			 */
-			if (f->wss.wc >= OSF_WSS_MAX)
-				continue;
+		/*
+		 * Should not happen if userspace parser was written correctly.
+		 */
+		if (f->wss.wc >= OSF_WSS_MAX)
+			continue;
 
-			/* Check options */
+		/* Check options */
 
-			foptsize = 0;
-			for (optnum = 0; optnum < f->opt_num; ++optnum)
-				foptsize += f->opt[optnum].length;
+		foptsize = 0;
+		for (optnum = 0; optnum < f->opt_num; ++optnum)
+			foptsize += f->opt[optnum].length;
 
-			if (foptsize > MAX_IPOPTLEN ||
-				optsize > MAX_IPOPTLEN ||
-				optsize != foptsize)
-				continue;
+		if (foptsize > MAX_IPOPTLEN ||
+		    optsize > MAX_IPOPTLEN ||
+		    optsize != foptsize)
+			continue;
 
-			check_WSS = f->wss.wc;
+		check_WSS = f->wss.wc;
 
-			for (optnum = 0; optnum < f->opt_num; ++optnum) {
-				if (f->opt[optnum].kind == (*optp)) {
-					__u32 len = f->opt[optnum].length;
-					const __u8 *optend = optp + len;
-					int loop_cont = 0;
+		for (optnum = 0; optnum < f->opt_num; ++optnum) {
+			if (f->opt[optnum].kind == (*optp)) {
+				__u32 len = f->opt[optnum].length;
+				const __u8 *optend = optp + len;
+				int loop_cont = 0;
 
-					fmatch = FMATCH_OK;
+				fmatch = FMATCH_OK;
 
-					switch (*optp) {
-					case OSFOPT_MSS:
-						mss = optp[3];
-						mss <<= 8;
-						mss |= optp[2];
+				switch (*optp) {
+				case OSFOPT_MSS:
+					mss = optp[3];
+					mss <<= 8;
+					mss |= optp[2];
 
-						mss = ntohs((__force __be16)mss);
-						break;
-					case OSFOPT_TS:
-						loop_cont = 1;
-						break;
-					}
+					mss = ntohs((__force __be16)mss);
+					break;
+				case OSFOPT_TS:
+					loop_cont = 1;
+					break;
+				}
 
-					optp = optend;
-				} else
-					fmatch = FMATCH_OPT_WRONG;
+				optp = optend;
+			} else
+				fmatch = FMATCH_OPT_WRONG;
 
-				if (fmatch != FMATCH_OK)
-					break;
-			}
+			if (fmatch != FMATCH_OK)
+				break;
+		}
 
-			if (fmatch != FMATCH_OPT_WRONG) {
-				fmatch = FMATCH_WRONG;
+		if (fmatch != FMATCH_OPT_WRONG) {
+			fmatch = FMATCH_WRONG;
 
-				switch (check_WSS) {
-				case OSF_WSS_PLAIN:
-					if (f->wss.val == 0 || window == f->wss.val)
-						fmatch = FMATCH_OK;
-					break;
-				case OSF_WSS_MSS:
-					/*
-					 * Some smart modems decrease mangle MSS to 
-					 * SMART_MSS_2, so we check standard, decreased
-					 * and the one provided in the fingerprint MSS
-					 * values.
-					 */
+			switch (check_WSS) {
+			case OSF_WSS_PLAIN:
+				if (f->wss.val == 0 || window == f->wss.val)
+					fmatch = FMATCH_OK;
+				break;
+			case OSF_WSS_MSS:
+				/*
+				 * Some smart modems decrease mangle MSS to
+				 * SMART_MSS_2, so we check standard, decreased
+				 * and the one provided in the fingerprint MSS
+				 * values.
+				 */
 #define SMART_MSS_1	1460
 #define SMART_MSS_2	1448
-					if (window == f->wss.val * mss ||
-					    window == f->wss.val * SMART_MSS_1 ||
-					    window == f->wss.val * SMART_MSS_2)
-						fmatch = FMATCH_OK;
-					break;
-				case OSF_WSS_MTU:
-					if (window == f->wss.val * (mss + 40) ||
-					    window == f->wss.val * (SMART_MSS_1 + 40) ||
-					    window == f->wss.val * (SMART_MSS_2 + 40))
-						fmatch = FMATCH_OK;
-					break;
-				case OSF_WSS_MODULO:
-					if ((window % f->wss.val) == 0)
-						fmatch = FMATCH_OK;
-					break;
-				}
+				if (window == f->wss.val * mss ||
+				    window == f->wss.val * SMART_MSS_1 ||
+				    window == f->wss.val * SMART_MSS_2)
+					fmatch = FMATCH_OK;
+				break;
+			case OSF_WSS_MTU:
+				if (window == f->wss.val * (mss + 40) ||
+				    window == f->wss.val * (SMART_MSS_1 + 40) ||
+				    window == f->wss.val * (SMART_MSS_2 + 40))
+					fmatch = FMATCH_OK;
+				break;
+			case OSF_WSS_MODULO:
+				if ((window % f->wss.val) == 0)
+					fmatch = FMATCH_OK;
+				break;
 			}
+		}
 
-			if (fmatch != FMATCH_OK)
-				continue;
+		if (fmatch != FMATCH_OK)
+			continue;
 
-			fcount++;
+		fcount++;
 
-			if (info->flags & XT_OSF_LOG)
-				nf_log_packet(net, p->family, p->hooknum, skb,
-					p->in, p->out, NULL,
-					"%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n",
-					f->genre, f->version, f->subtype,
-					&ip->saddr, ntohs(tcp->source),
-					&ip->daddr, ntohs(tcp->dest),
-					f->ttl - ip->ttl);
+		if (info->flags & XT_OSF_LOG)
+			nf_log_packet(net, p->family, p->hooknum, skb,
+				      p->in, p->out, NULL,
+				      "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n",
+				      f->genre, f->version, f->subtype,
+				      &ip->saddr, ntohs(tcp->source),
+				      &ip->daddr, ntohs(tcp->dest),
+				      f->ttl - ip->ttl);
 
-			if ((info->flags & XT_OSF_LOG) &&
-			    info->loglevel == XT_OSF_LOGLEVEL_FIRST)
-				break;
-		}
+		if ((info->flags & XT_OSF_LOG) &&
+		    info->loglevel == XT_OSF_LOGLEVEL_FIRST)
+			break;
 	}
 	rcu_read_unlock();
 
-- 
cgit v1.2.3


From 8aefc4d1c6acb03adfada4eaf7dd66c4d91c34f6 Mon Sep 17 00:00:00 2001
From: Fabian Frederick <fabf@skynet.be>
Date: Mon, 22 Dec 2014 19:36:15 +0100
Subject: netfilter: log: remove unnecessary sizeof(char)

sizeof(char) is always 1.

Suggested-by: Joe Perches <joe@perches.com>
Signed-off-by: Fabian Frederick <fabf@skynet.be>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_log.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 43c926cae9c0..0d8448f19dfe 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -425,8 +425,7 @@ static int netfilter_log_sysctl_init(struct net *net)
 			nf_log_sysctl_table[i].procname	=
 				nf_log_sysctl_fnames[i];
 			nf_log_sysctl_table[i].data = NULL;
-			nf_log_sysctl_table[i].maxlen =
-				NFLOGGER_NAME_LEN * sizeof(char);
+			nf_log_sysctl_table[i].maxlen = NFLOGGER_NAME_LEN;
 			nf_log_sysctl_table[i].mode = 0644;
 			nf_log_sysctl_table[i].proc_handler =
 				nf_log_proc_dostring;
-- 
cgit v1.2.3


From 0f3adeae6011fe82ccdee858dad6b7b2bd790add Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Fri, 26 Dec 2014 04:42:34 +0100
Subject: Bluetooth: Remove BlueFritz! specific check from initialization

The AVM BlueFritz! USB controllers had a special handling in the
Bluetooth core when it comes to reading the supported commands.

Both drivers now set the HCI_QUIRK_BROKEN_LOCAL_COMMANDS and with
that it is no longer needed to look for vendor specific details.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_core.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 47f0311d1006..5ef5221c1813 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -623,10 +623,16 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt)
 	if (lmp_le_capable(hdev))
 		le_setup(req);
 
-	/* AVM Berlin (31), aka "BlueFRITZ!", doesn't support the read
-	 * local supported commands HCI command.
+	/* All Bluetooth 1.2 and later controllers should support the
+	 * HCI command for reading the local supported commands.
+	 *
+	 * Unfortunately some controllers indicate Bluetooth 1.2 support,
+	 * but do not have support for this command. If that is the case,
+	 * the driver can quirk the behavior and skip reading the local
+	 * supported commands.
 	 */
-	if (hdev->manufacturer != 31 && hdev->hci_ver > BLUETOOTH_VER_1_1)
+	if (hdev->hci_ver > BLUETOOTH_VER_1_1 &&
+	    !test_bit(HCI_QUIRK_BROKEN_LOCAL_COMMANDS, &hdev->quirks))
 		hci_req_add(req, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL);
 
 	if (lmp_ssp_capable(hdev)) {
-- 
cgit v1.2.3


From 4da50de895588a0268b636314d7c1d08f18b20c9 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Mon, 29 Dec 2014 12:04:10 +0200
Subject: Bluetooth: Fix const declarations for smp_f5 and smp_f6

These SMP crypto functions should have all their input parameters
declared as const. This patch fixes the parameters that were missing the
const declaration.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/smp.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index b67749bb55bf..9025e177d278 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -223,8 +223,9 @@ static int smp_f4(struct crypto_hash *tfm_cmac, const u8 u[32], const u8 v[32],
 	return err;
 }
 
-static int smp_f5(struct crypto_hash *tfm_cmac, u8 w[32], u8 n1[16], u8 n2[16],
-		  u8 a1[7], u8 a2[7], u8 mackey[16], u8 ltk[16])
+static int smp_f5(struct crypto_hash *tfm_cmac, const u8 w[32],
+		  const u8 n1[16], const u8 n2[16], const u8 a1[7],
+		  const u8 a2[7], u8 mackey[16], u8 ltk[16])
 {
 	/* The btle, salt and length "magic" values are as defined in
 	 * the SMP section of the Bluetooth core specification. In ASCII
@@ -276,7 +277,7 @@ static int smp_f5(struct crypto_hash *tfm_cmac, u8 w[32], u8 n1[16], u8 n2[16],
 }
 
 static int smp_f6(struct crypto_hash *tfm_cmac, const u8 w[16],
-		  const u8 n1[16], u8 n2[16], const u8 r[16],
+		  const u8 n1[16], const u8 n2[16], const u8 r[16],
 		  const u8 io_cap[3], const u8 a1[7], const u8 a2[7],
 		  u8 res[16])
 {
-- 
cgit v1.2.3


From ee485290c6af942f0371def632c32e747d110b1e Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 29 Dec 2014 20:48:35 -0800
Subject: Bluetooth: Add support for self testing framework

This add support for the Bluetooth self testing framework that allows
running certain test cases of sample data to ensure correctness of its
basic functionality.

With this patch only the basic framework will be added. It contains
the build magic that allows running this at module loading time or
at late_initcall stage when built into the kernel image.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/Kconfig        | 13 ++++++++++
 net/bluetooth/Makefile       |  2 ++
 net/bluetooth/af_bluetooth.c |  6 +++++
 net/bluetooth/selftest.c     | 62 ++++++++++++++++++++++++++++++++++++++++++++
 net/bluetooth/selftest.h     | 45 ++++++++++++++++++++++++++++++++
 5 files changed, 128 insertions(+)
 create mode 100644 net/bluetooth/selftest.c
 create mode 100644 net/bluetooth/selftest.h

(limited to 'net')

diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index 29bcafc41adf..3296841531c9 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -64,4 +64,17 @@ config BT_6LOWPAN
 	help
 	  IPv6 compression over Bluetooth Low Energy.
 
+config BT_SELFTEST
+	bool "Bluetooth self testing support"
+	depends on BT && DEBUG_KERNEL
+	help
+	  Run self tests when initializing the Bluetooth subsystem.  This
+	  is a developer option and can cause significant delay when booting
+	  the system.
+
+	  When the Bluetooth subsystem is built as module, then the test
+	  cases are run first thing at module load time.  When the Bluetooth
+	  subsystem is compiled into the kernel image, then the test cases
+	  are run late in the initcall hierarchy.
+
 source "drivers/bluetooth/Kconfig"
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index 364cff1ad4b1..8e96e3072266 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -15,4 +15,6 @@ bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \
 	hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o sco.o lib.o \
 	a2mp.o amp.o ecc.o hci_request.o hci_debugfs.o
 
+bluetooth-$(CONFIG_BT_SELFTEST) += selftest.o
+
 subdir-ccflags-y += -D__CHECK_ENDIAN__
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 012e3b03589d..ce22e0cfa923 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -31,6 +31,8 @@
 #include <net/bluetooth/bluetooth.h>
 #include <linux/proc_fs.h>
 
+#include "selftest.h"
+
 #define VERSION "2.20"
 
 /* Bluetooth sockets */
@@ -716,6 +718,10 @@ static int __init bt_init(void)
 
 	BT_INFO("Core ver %s", VERSION);
 
+	err = bt_selftest();
+	if (err < 0)
+		return err;
+
 	bt_debugfs = debugfs_create_dir("bluetooth", NULL);
 
 	err = bt_sysfs_init();
diff --git a/net/bluetooth/selftest.c b/net/bluetooth/selftest.c
new file mode 100644
index 000000000000..00cac4a5e229
--- /dev/null
+++ b/net/bluetooth/selftest.c
@@ -0,0 +1,62 @@
+/*
+   BlueZ - Bluetooth protocol stack for Linux
+
+   Copyright (C) 2014 Intel Corporation
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
+   SOFTWARE IS DISCLAIMED.
+*/
+
+#include <net/bluetooth/bluetooth.h>
+
+#include "selftest.h"
+
+static int __init run_selftest(void)
+{
+	BT_INFO("Starting self testing");
+
+	BT_INFO("Finished self testing");
+
+	return 0;
+}
+
+#if IS_MODULE(CONFIG_BT)
+
+/* This is run when CONFIG_BT_SELFTEST=y and CONFIG_BT=m and is just a
+ * wrapper to allow running this at module init.
+ *
+ * If CONFIG_BT_SELFTEST=n, then this code is not compiled at all.
+ */
+int __init bt_selftest(void)
+{
+	return run_selftest();
+}
+
+#else
+
+/* This is run when CONFIG_BT_SELFTEST=y and CONFIG_BT=y and is run
+ * via late_initcall() as last item in the initialization sequence.
+ *
+ * If CONFIG_BT_SELFTEST=n, then this code is not compiled at all.
+ */
+static int __init bt_selftest_init(void)
+{
+	return run_selftest();
+}
+late_initcall(bt_selftest_init);
+
+#endif
diff --git a/net/bluetooth/selftest.h b/net/bluetooth/selftest.h
new file mode 100644
index 000000000000..2aa0a346a913
--- /dev/null
+++ b/net/bluetooth/selftest.h
@@ -0,0 +1,45 @@
+/*
+   BlueZ - Bluetooth protocol stack for Linux
+   Copyright (C) 2014 Intel Corporation
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
+   SOFTWARE IS DISCLAIMED.
+*/
+
+#if IS_ENABLED(CONFIG_BT_SELFTEST) && IS_MODULE(CONFIG_BT)
+
+/* When CONFIG_BT_SELFTEST=y and the CONFIG_BT=m, then the self testing
+ * is run at module loading time.
+ */
+int bt_selftest(void);
+
+#else
+
+/* When CONFIG_BT_SELFTEST=y and CONFIG_BT=y, then the self testing
+ * is run via late_initcall() to make sure that subsys_initcall() of
+ * the Bluetooth subsystem and device_initcall() of the Crypto subsystem
+ * do not clash.
+ *
+ * When CONFIG_BT_SELFTEST=n, then this turns into an empty call that
+ * has no impact.
+ */
+static inline int bt_selftest(void)
+{
+	return 0;
+}
+
+#endif
-- 
cgit v1.2.3


From 0b6415b65234ff723d32fd7dcdd917ba7ad5fe86 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Mon, 29 Dec 2014 20:48:36 -0800
Subject: Bluetooth: Add support for ECDH test cases

This patch adds the test cases for ECDH cryptographic functionality
used by Bluetooth Low Energy Secure Connections feature.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/Kconfig    |   7 ++
 net/bluetooth/selftest.c | 169 ++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 175 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index 3296841531c9..8d3b607a72be 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -77,4 +77,11 @@ config BT_SELFTEST
 	  subsystem is compiled into the kernel image, then the test cases
 	  are run late in the initcall hierarchy.
 
+config BT_SELFTEST_ECDH
+	bool "ECDH test cases"
+	depends on BT_LE && BT_SELFTEST
+	help
+	  Run test cases for ECDH cryptographic functionality used by the
+	  Bluetooth Low Energy Secure Connections feature.
+
 source "drivers/bluetooth/Kconfig"
diff --git a/net/bluetooth/selftest.c b/net/bluetooth/selftest.c
index 00cac4a5e229..b9cb33cd45d7 100644
--- a/net/bluetooth/selftest.c
+++ b/net/bluetooth/selftest.c
@@ -23,15 +23,182 @@
 
 #include <net/bluetooth/bluetooth.h>
 
+#include "ecc.h"
 #include "selftest.h"
 
+#if IS_ENABLED(CONFIG_BT_SELFTEST_ECDH)
+
+static const u8 priv_a_1[32] __initconst = {
+	0xbd, 0x1a, 0x3c, 0xcd, 0xa6, 0xb8, 0x99, 0x58,
+	0x99, 0xb7, 0x40, 0xeb, 0x7b, 0x60, 0xff, 0x4a,
+	0x50, 0x3f, 0x10, 0xd2, 0xe3, 0xb3, 0xc9, 0x74,
+	0x38, 0x5f, 0xc5, 0xa3, 0xd4, 0xf6, 0x49, 0x3f,
+};
+static const u8 priv_b_1[32] __initconst = {
+	0xfd, 0xc5, 0x7f, 0xf4, 0x49, 0xdd, 0x4f, 0x6b,
+	0xfb, 0x7c, 0x9d, 0xf1, 0xc2, 0x9a, 0xcb, 0x59,
+	0x2a, 0xe7, 0xd4, 0xee, 0xfb, 0xfc, 0x0a, 0x90,
+	0x9a, 0xbb, 0xf6, 0x32, 0x3d, 0x8b, 0x18, 0x55,
+};
+static const u8 pub_a_1[64] __initconst = {
+	0xe6, 0x9d, 0x35, 0x0e, 0x48, 0x01, 0x03, 0xcc,
+	0xdb, 0xfd, 0xf4, 0xac, 0x11, 0x91, 0xf4, 0xef,
+	0xb9, 0xa5, 0xf9, 0xe9, 0xa7, 0x83, 0x2c, 0x5e,
+	0x2c, 0xbe, 0x97, 0xf2, 0xd2, 0x03, 0xb0, 0x20,
+
+	0x8b, 0xd2, 0x89, 0x15, 0xd0, 0x8e, 0x1c, 0x74,
+	0x24, 0x30, 0xed, 0x8f, 0xc2, 0x45, 0x63, 0x76,
+	0x5c, 0x15, 0x52, 0x5a, 0xbf, 0x9a, 0x32, 0x63,
+	0x6d, 0xeb, 0x2a, 0x65, 0x49, 0x9c, 0x80, 0xdc,
+};
+static const u8 pub_b_1[64] __initconst = {
+	0x90, 0xa1, 0xaa, 0x2f, 0xb2, 0x77, 0x90, 0x55,
+	0x9f, 0xa6, 0x15, 0x86, 0xfd, 0x8a, 0xb5, 0x47,
+	0x00, 0x4c, 0x9e, 0xf1, 0x84, 0x22, 0x59, 0x09,
+	0x96, 0x1d, 0xaf, 0x1f, 0xf0, 0xf0, 0xa1, 0x1e,
+
+	0x4a, 0x21, 0xb1, 0x15, 0xf9, 0xaf, 0x89, 0x5f,
+	0x76, 0x36, 0x8e, 0xe2, 0x30, 0x11, 0x2d, 0x47,
+	0x60, 0x51, 0xb8, 0x9a, 0x3a, 0x70, 0x56, 0x73,
+	0x37, 0xad, 0x9d, 0x42, 0x3e, 0xf3, 0x55, 0x4c,
+};
+static const u8 dhkey_1[32] __initconst = {
+	0x98, 0xa6, 0xbf, 0x73, 0xf3, 0x34, 0x8d, 0x86,
+	0xf1, 0x66, 0xf8, 0xb4, 0x13, 0x6b, 0x79, 0x99,
+	0x9b, 0x7d, 0x39, 0x0a, 0xa6, 0x10, 0x10, 0x34,
+	0x05, 0xad, 0xc8, 0x57, 0xa3, 0x34, 0x02, 0xec,
+};
+
+static const u8 priv_a_2[32] __initconst = {
+	0x63, 0x76, 0x45, 0xd0, 0xf7, 0x73, 0xac, 0xb7,
+	0xff, 0xdd, 0x03, 0x72, 0xb9, 0x72, 0x85, 0xb4,
+	0x41, 0xb6, 0x5d, 0x0c, 0x5d, 0x54, 0x84, 0x60,
+	0x1a, 0xa3, 0x9a, 0x3c, 0x69, 0x16, 0xa5, 0x06,
+};
+static const u8 priv_b_2[32] __initconst = {
+	0xba, 0x30, 0x55, 0x50, 0x19, 0xa2, 0xca, 0xa3,
+	0xa5, 0x29, 0x08, 0xc6, 0xb5, 0x03, 0x88, 0x7e,
+	0x03, 0x2b, 0x50, 0x73, 0xd4, 0x2e, 0x50, 0x97,
+	0x64, 0xcd, 0x72, 0x0d, 0x67, 0xa0, 0x9a, 0x52,
+};
+static const u8 pub_a_2[64] __initconst = {
+	0xdd, 0x78, 0x5c, 0x74, 0x03, 0x9b, 0x7e, 0x98,
+	0xcb, 0x94, 0x87, 0x4a, 0xad, 0xfa, 0xf8, 0xd5,
+	0x43, 0x3e, 0x5c, 0xaf, 0xea, 0xb5, 0x4c, 0xf4,
+	0x9e, 0x80, 0x79, 0x57, 0x7b, 0xa4, 0x31, 0x2c,
+
+	0x4f, 0x5d, 0x71, 0x43, 0x77, 0x43, 0xf8, 0xea,
+	0xd4, 0x3e, 0xbd, 0x17, 0x91, 0x10, 0x21, 0xd0,
+	0x1f, 0x87, 0x43, 0x8e, 0x40, 0xe2, 0x52, 0xcd,
+	0xbe, 0xdf, 0x98, 0x38, 0x18, 0x12, 0x95, 0x91,
+};
+static const u8 pub_b_2[64] __initconst = {
+	0xcc, 0x00, 0x65, 0xe1, 0xf5, 0x6c, 0x0d, 0xcf,
+	0xec, 0x96, 0x47, 0x20, 0x66, 0xc9, 0xdb, 0x84,
+	0x81, 0x75, 0xa8, 0x4d, 0xc0, 0xdf, 0xc7, 0x9d,
+	0x1b, 0x3f, 0x3d, 0xf2, 0x3f, 0xe4, 0x65, 0xf4,
+
+	0x79, 0xb2, 0xec, 0xd8, 0xca, 0x55, 0xa1, 0xa8,
+	0x43, 0x4d, 0x6b, 0xca, 0x10, 0xb0, 0xc2, 0x01,
+	0xc2, 0x33, 0x4e, 0x16, 0x24, 0xc4, 0xef, 0xee,
+	0x99, 0xd8, 0xbb, 0xbc, 0x48, 0xd0, 0x01, 0x02,
+};
+static const u8 dhkey_2[32] __initconst = {
+	0x69, 0xeb, 0x21, 0x32, 0xf2, 0xc6, 0x05, 0x41,
+	0x60, 0x19, 0xcd, 0x5e, 0x94, 0xe1, 0xe6, 0x5f,
+	0x33, 0x07, 0xe3, 0x38, 0x4b, 0x68, 0xe5, 0x62,
+	0x3f, 0x88, 0x6d, 0x2f, 0x3a, 0x84, 0x85, 0xab,
+};
+
+static const u8 priv_a_3[32] __initconst = {
+	0xbd, 0x1a, 0x3c, 0xcd, 0xa6, 0xb8, 0x99, 0x58,
+	0x99, 0xb7, 0x40, 0xeb, 0x7b, 0x60, 0xff, 0x4a,
+	0x50, 0x3f, 0x10, 0xd2, 0xe3, 0xb3, 0xc9, 0x74,
+	0x38, 0x5f, 0xc5, 0xa3, 0xd4, 0xf6, 0x49, 0x3f,
+};
+static const u8 pub_a_3[64] __initconst = {
+	0xe6, 0x9d, 0x35, 0x0e, 0x48, 0x01, 0x03, 0xcc,
+	0xdb, 0xfd, 0xf4, 0xac, 0x11, 0x91, 0xf4, 0xef,
+	0xb9, 0xa5, 0xf9, 0xe9, 0xa7, 0x83, 0x2c, 0x5e,
+	0x2c, 0xbe, 0x97, 0xf2, 0xd2, 0x03, 0xb0, 0x20,
+
+	0x8b, 0xd2, 0x89, 0x15, 0xd0, 0x8e, 0x1c, 0x74,
+	0x24, 0x30, 0xed, 0x8f, 0xc2, 0x45, 0x63, 0x76,
+	0x5c, 0x15, 0x52, 0x5a, 0xbf, 0x9a, 0x32, 0x63,
+	0x6d, 0xeb, 0x2a, 0x65, 0x49, 0x9c, 0x80, 0xdc,
+};
+static const u8 dhkey_3[32] __initconst = {
+	0x2d, 0xab, 0x00, 0x48, 0xcb, 0xb3, 0x7b, 0xda,
+	0x55, 0x7b, 0x8b, 0x72, 0xa8, 0x57, 0x87, 0xc3,
+	0x87, 0x27, 0x99, 0x32, 0xfc, 0x79, 0x5f, 0xae,
+	0x7c, 0x1c, 0xf9, 0x49, 0xe6, 0xd7, 0xaa, 0x70,
+};
+
+static int __init test_ecdh_sample(const u8 priv_a[32], const u8 priv_b[32],
+				   const u8 pub_a[64], const u8 pub_b[64],
+				   const u8 dhkey[32])
+{
+	u8 dhkey_a[32], dhkey_b[32];
+
+	ecdh_shared_secret(pub_b, priv_a, dhkey_a);
+	ecdh_shared_secret(pub_a, priv_b, dhkey_b);
+
+	if (memcmp(dhkey_a, dhkey, 32))
+		return -EINVAL;
+
+	if (memcmp(dhkey_b, dhkey, 32))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int __init test_ecdh(void)
+{
+	int err;
+
+	err = test_ecdh_sample(priv_a_1, priv_b_1, pub_a_1, pub_b_1, dhkey_1);
+	if (err) {
+		BT_ERR("ECDH sample 1 failed");
+		return err;
+	}
+
+	err = test_ecdh_sample(priv_a_2, priv_b_2, pub_a_2, pub_b_2, dhkey_2);
+	if (err) {
+		BT_ERR("ECDH sample 2 failed");
+		return err;
+	}
+
+	err = test_ecdh_sample(priv_a_3, priv_a_3, pub_a_3, pub_a_3, dhkey_3);
+	if (err) {
+		BT_ERR("ECDH sample 3 failed");
+		return err;
+	}
+
+	BT_INFO("ECDH test passed");
+
+	return 0;
+}
+
+#else
+
+static inline int test_ecdh(void)
+{
+	return 0;
+}
+
+#endif
+
 static int __init run_selftest(void)
 {
+	int err;
+
 	BT_INFO("Starting self testing");
 
+	err = test_ecdh();
+
 	BT_INFO("Finished self testing");
 
-	return 0;
+	return err;
 }
 
 #if IS_MODULE(CONFIG_BT)
-- 
cgit v1.2.3


From 0a2b0f0452e2499a1037305fcfc314d0cdeb5260 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Tue, 30 Dec 2014 09:50:39 +0200
Subject: Bluetooth: Add skeleton for SMP self-tests

This patch adds the initial skeleton and kernel config option for SMP
self-tests.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/Kconfig    |  7 +++++++
 net/bluetooth/selftest.c |  7 +++++++
 net/bluetooth/smp.c      | 39 +++++++++++++++++++++++++++++++++++++++
 net/bluetooth/smp.h      | 13 +++++++++++++
 4 files changed, 66 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index 8d3b607a72be..7de74635a110 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -84,4 +84,11 @@ config BT_SELFTEST_ECDH
 	  Run test cases for ECDH cryptographic functionality used by the
 	  Bluetooth Low Energy Secure Connections feature.
 
+config BT_SELFTEST_SMP
+	bool "SMP test cases"
+	depends on BT_LE && BT_SELFTEST
+	help
+	  Run test cases for SMP cryptographic functionality, including both
+	  legacy SMP as well as the Secure Connections features.
+
 source "drivers/bluetooth/Kconfig"
diff --git a/net/bluetooth/selftest.c b/net/bluetooth/selftest.c
index b9cb33cd45d7..a7602b3d0b0d 100644
--- a/net/bluetooth/selftest.c
+++ b/net/bluetooth/selftest.c
@@ -22,8 +22,10 @@
 */
 
 #include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
 
 #include "ecc.h"
+#include "smp.h"
 #include "selftest.h"
 
 #if IS_ENABLED(CONFIG_BT_SELFTEST_ECDH)
@@ -195,7 +197,12 @@ static int __init run_selftest(void)
 	BT_INFO("Starting self testing");
 
 	err = test_ecdh();
+	if (err)
+		goto done;
 
+	err = bt_selftest_smp();
+
+done:
 	BT_INFO("Finished self testing");
 
 	return err;
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 9025e177d278..b47528d66a5f 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -3022,3 +3022,42 @@ void smp_unregister(struct hci_dev *hdev)
 		smp_del_chan(chan);
 	}
 }
+
+#if IS_ENABLED(CONFIG_BT_SELFTEST_SMP)
+
+static int __init run_selftests(struct crypto_blkcipher *tfm_aes,
+				struct crypto_hash *tfm_cmac)
+{
+	BT_INFO("SMP test passed");
+
+	return 0;
+}
+
+int __init bt_selftest_smp(void)
+{
+	struct crypto_blkcipher *tfm_aes;
+	struct crypto_hash *tfm_cmac;
+	int err;
+
+	tfm_aes = crypto_alloc_blkcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm_aes)) {
+		BT_ERR("Unable to create ECB crypto context");
+		return PTR_ERR(tfm_aes);
+	}
+
+	tfm_cmac = crypto_alloc_hash("cmac(aes)", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm_cmac)) {
+		BT_ERR("Unable to create CMAC crypto context");
+		crypto_free_blkcipher(tfm_aes);
+		return PTR_ERR(tfm_cmac);
+	}
+
+	err = run_selftests(tfm_aes, tfm_cmac);
+
+	crypto_free_hash(tfm_cmac);
+	crypto_free_blkcipher(tfm_aes);
+
+	return err;
+}
+
+#endif
diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h
index 3296bf42ae80..60c5b73fcb4b 100644
--- a/net/bluetooth/smp.h
+++ b/net/bluetooth/smp.h
@@ -192,4 +192,17 @@ int smp_generate_rpa(struct hci_dev *hdev, const u8 irk[16], bdaddr_t *rpa);
 int smp_register(struct hci_dev *hdev);
 void smp_unregister(struct hci_dev *hdev);
 
+#if IS_ENABLED(CONFIG_BT_SELFTEST_SMP)
+
+int bt_selftest_smp(void);
+
+#else
+
+static inline int bt_selftest_smp(void)
+{
+	return 0;
+}
+
+#endif
+
 #endif /* __SMP_H */
-- 
cgit v1.2.3


From cfc4198e714fcee1a68c558d243e7e4f8de85577 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Tue, 30 Dec 2014 09:50:40 +0200
Subject: Bluetooth: Add legacy SMP tests

This patch adds self-tests for legacy SMP crypto functions. The sample
data has been taken from the core specification.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/smp.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 95 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index b47528d66a5f..f76c6d02b5b8 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -3025,9 +3025,104 @@ void smp_unregister(struct hci_dev *hdev)
 
 #if IS_ENABLED(CONFIG_BT_SELFTEST_SMP)
 
+static int __init test_ah(struct crypto_blkcipher *tfm_aes)
+{
+	const u8 irk[16] = {
+			0x9b, 0x7d, 0x39, 0x0a, 0xa6, 0x10, 0x10, 0x34,
+			0x05, 0xad, 0xc8, 0x57, 0xa3, 0x34, 0x02, 0xec };
+	const u8 r[3] = { 0x94, 0x81, 0x70 };
+	const u8 exp[3] = { 0xaa, 0xfb, 0x0d };
+	u8 res[3];
+	int err;
+
+	err = smp_ah(tfm_aes, irk, r, res);
+	if (err)
+		return err;
+
+	if (memcmp(res, exp, 3))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int __init test_c1(struct crypto_blkcipher *tfm_aes)
+{
+	const u8 k[16] = {
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
+	const u8 r[16] = {
+			0xe0, 0x2e, 0x70, 0xc6, 0x4e, 0x27, 0x88, 0x63,
+			0x0e, 0x6f, 0xad, 0x56, 0x21, 0xd5, 0x83, 0x57 };
+	const u8 preq[7] = { 0x01, 0x01, 0x00, 0x00, 0x10, 0x07, 0x07 };
+	const u8 pres[7] = { 0x02, 0x03, 0x00, 0x00, 0x08, 0x00, 0x05 };
+	const u8 _iat = 0x01;
+	const u8 _rat = 0x00;
+	const bdaddr_t ra = { { 0xb6, 0xb5, 0xb4, 0xb3, 0xb2, 0xb1 } };
+	const bdaddr_t ia = { { 0xa6, 0xa5, 0xa4, 0xa3, 0xa2, 0xa1 } };
+	const u8 exp[16] = {
+			0x86, 0x3b, 0xf1, 0xbe, 0xc5, 0x4d, 0xa7, 0xd2,
+			0xea, 0x88, 0x89, 0x87, 0xef, 0x3f, 0x1e, 0x1e };
+	u8 res[16];
+	int err;
+
+	err = smp_c1(tfm_aes, k, r, preq, pres, _iat, &ia, _rat, &ra, res);
+	if (err)
+		return err;
+
+	if (memcmp(res, exp, 16))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int __init test_s1(struct crypto_blkcipher *tfm_aes)
+{
+	const u8 k[16] = {
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
+	const u8 r1[16] = {
+			0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11 };
+	const u8 r2[16] = {
+			0x00, 0xff, 0xee, 0xdd, 0xcc, 0xbb, 0xaa, 0x99 };
+	const u8 exp[16] = {
+			0x62, 0xa0, 0x6d, 0x79, 0xae, 0x16, 0x42, 0x5b,
+			0x9b, 0xf4, 0xb0, 0xe8, 0xf0, 0xe1, 0x1f, 0x9a };
+	u8 res[16];
+	int err;
+
+	err = smp_s1(tfm_aes, k, r1, r2, res);
+	if (err)
+		return err;
+
+	if (memcmp(res, exp, 16))
+		return -EINVAL;
+
+	return 0;
+}
+
 static int __init run_selftests(struct crypto_blkcipher *tfm_aes,
 				struct crypto_hash *tfm_cmac)
 {
+	int err;
+
+	err = test_ah(tfm_aes);
+	if (err) {
+		BT_ERR("smp_ah test failed");
+		return err;
+	}
+
+	err = test_c1(tfm_aes);
+	if (err) {
+		BT_ERR("smp_c1 test failed");
+		return err;
+	}
+
+	err = test_s1(tfm_aes);
+	if (err) {
+		BT_ERR("smp_s1 test failed");
+		return err;
+	}
+
 	BT_INFO("SMP test passed");
 
 	return 0;
-- 
cgit v1.2.3


From fb2969a3a9a66a93f4c39e0c9f96e8bbb18e37a1 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Tue, 30 Dec 2014 09:50:41 +0200
Subject: Bluetooth: Add LE Secure Connections tests for SMP

This patch adds SMP self-tests for the Secure Connections crypto
functions. The sample data has been taken from the core specification.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/smp.c | 186 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 186 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index f76c6d02b5b8..3a4333b5801a 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -3100,6 +3100,162 @@ static int __init test_s1(struct crypto_blkcipher *tfm_aes)
 	return 0;
 }
 
+static int __init test_f4(struct crypto_hash *tfm_cmac)
+{
+	const u8 u[32] = {
+			0xe6, 0x9d, 0x35, 0x0e, 0x48, 0x01, 0x03, 0xcc,
+			0xdb, 0xfd, 0xf4, 0xac, 0x11, 0x91, 0xf4, 0xef,
+			0xb9, 0xa5, 0xf9, 0xe9, 0xa7, 0x83, 0x2c, 0x5e,
+			0x2c, 0xbe, 0x97, 0xf2, 0xd2, 0x03, 0xb0, 0x20 };
+	const u8 v[32] = {
+			0xfd, 0xc5, 0x7f, 0xf4, 0x49, 0xdd, 0x4f, 0x6b,
+			0xfb, 0x7c, 0x9d, 0xf1, 0xc2, 0x9a, 0xcb, 0x59,
+			0x2a, 0xe7, 0xd4, 0xee, 0xfb, 0xfc, 0x0a, 0x90,
+			0x9a, 0xbb, 0xf6, 0x32, 0x3d, 0x8b, 0x18, 0x55 };
+	const u8 x[16] = {
+			0xab, 0xae, 0x2b, 0x71, 0xec, 0xb2, 0xff, 0xff,
+			0x3e, 0x73, 0x77, 0xd1, 0x54, 0x84, 0xcb, 0xd5 };
+	const u8 z = 0x00;
+	const u8 exp[16] = {
+			0x2d, 0x87, 0x74, 0xa9, 0xbe, 0xa1, 0xed, 0xf1,
+			0x1c, 0xbd, 0xa9, 0x07, 0xf1, 0x16, 0xc9, 0xf2 };
+	u8 res[16];
+	int err;
+
+	err = smp_f4(tfm_cmac, u, v, x, z, res);
+	if (err)
+		return err;
+
+	if (memcmp(res, exp, 16))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int __init test_f5(struct crypto_hash *tfm_cmac)
+{
+	const u8 w[32] = {
+			0x98, 0xa6, 0xbf, 0x73, 0xf3, 0x34, 0x8d, 0x86,
+			0xf1, 0x66, 0xf8, 0xb4, 0x13, 0x6b, 0x79, 0x99,
+			0x9b, 0x7d, 0x39, 0x0a, 0xa6, 0x10, 0x10, 0x34,
+			0x05, 0xad, 0xc8, 0x57, 0xa3, 0x34, 0x02, 0xec };
+	const u8 n1[16] = {
+			0xab, 0xae, 0x2b, 0x71, 0xec, 0xb2, 0xff, 0xff,
+			0x3e, 0x73, 0x77, 0xd1, 0x54, 0x84, 0xcb, 0xd5 };
+	const u8 n2[16] = {
+			0xcf, 0xc4, 0x3d, 0xff, 0xf7, 0x83, 0x65, 0x21,
+			0x6e, 0x5f, 0xa7, 0x25, 0xcc, 0xe7, 0xe8, 0xa6 };
+	const u8 a1[7] = { 0xce, 0xbf, 0x37, 0x37, 0x12, 0x56, 0x00 };
+	const u8 a2[7] = { 0xc1, 0xcf, 0x2d, 0x70, 0x13, 0xa7, 0x00 };
+	const u8 exp_ltk[16] = {
+			0x38, 0x0a, 0x75, 0x94, 0xb5, 0x22, 0x05, 0x98,
+			0x23, 0xcd, 0xd7, 0x69, 0x11, 0x79, 0x86, 0x69 };
+	const u8 exp_mackey[16] = {
+			0x20, 0x6e, 0x63, 0xce, 0x20, 0x6a, 0x3f, 0xfd,
+			0x02, 0x4a, 0x08, 0xa1, 0x76, 0xf1, 0x65, 0x29 };
+	u8 mackey[16], ltk[16];
+	int err;
+
+	err = smp_f5(tfm_cmac, w, n1, n2, a1, a2, mackey, ltk);
+	if (err)
+		return err;
+
+	if (memcmp(mackey, exp_mackey, 16))
+		return -EINVAL;
+
+	if (memcmp(ltk, exp_ltk, 16))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int __init test_f6(struct crypto_hash *tfm_cmac)
+{
+	const u8 w[16] = {
+			0x20, 0x6e, 0x63, 0xce, 0x20, 0x6a, 0x3f, 0xfd,
+			0x02, 0x4a, 0x08, 0xa1, 0x76, 0xf1, 0x65, 0x29 };
+	const u8 n1[16] = {
+			0xab, 0xae, 0x2b, 0x71, 0xec, 0xb2, 0xff, 0xff,
+			0x3e, 0x73, 0x77, 0xd1, 0x54, 0x84, 0xcb, 0xd5 };
+	const u8 n2[16] = {
+			0xcf, 0xc4, 0x3d, 0xff, 0xf7, 0x83, 0x65, 0x21,
+			0x6e, 0x5f, 0xa7, 0x25, 0xcc, 0xe7, 0xe8, 0xa6 };
+	const u8 r[16] = {
+			0xc8, 0x0f, 0x2d, 0x0c, 0xd2, 0x42, 0xda, 0x08,
+			0x54, 0xbb, 0x53, 0xb4, 0x3b, 0x34, 0xa3, 0x12 };
+	const u8 io_cap[3] = { 0x02, 0x01, 0x01 };
+	const u8 a1[7] = { 0xce, 0xbf, 0x37, 0x37, 0x12, 0x56, 0x00 };
+	const u8 a2[7] = { 0xc1, 0xcf, 0x2d, 0x70, 0x13, 0xa7, 0x00 };
+	const u8 exp[16] = {
+			0x61, 0x8f, 0x95, 0xda, 0x09, 0x0b, 0x6c, 0xd2,
+			0xc5, 0xe8, 0xd0, 0x9c, 0x98, 0x73, 0xc4, 0xe3 };
+	u8 res[16];
+	int err;
+
+	err = smp_f6(tfm_cmac, w, n1, n2, r, io_cap, a1, a2, res);
+	if (err)
+		return err;
+
+	if (memcmp(res, exp, 16))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int __init test_g2(struct crypto_hash *tfm_cmac)
+{
+	const u8 u[32] = {
+			0xe6, 0x9d, 0x35, 0x0e, 0x48, 0x01, 0x03, 0xcc,
+			0xdb, 0xfd, 0xf4, 0xac, 0x11, 0x91, 0xf4, 0xef,
+			0xb9, 0xa5, 0xf9, 0xe9, 0xa7, 0x83, 0x2c, 0x5e,
+			0x2c, 0xbe, 0x97, 0xf2, 0xd2, 0x03, 0xb0, 0x20 };
+	const u8 v[32] = {
+			0xfd, 0xc5, 0x7f, 0xf4, 0x49, 0xdd, 0x4f, 0x6b,
+			0xfb, 0x7c, 0x9d, 0xf1, 0xc2, 0x9a, 0xcb, 0x59,
+			0x2a, 0xe7, 0xd4, 0xee, 0xfb, 0xfc, 0x0a, 0x90,
+			0x9a, 0xbb, 0xf6, 0x32, 0x3d, 0x8b, 0x18, 0x55 };
+	const u8 x[16] = {
+			0xab, 0xae, 0x2b, 0x71, 0xec, 0xb2, 0xff, 0xff,
+			0x3e, 0x73, 0x77, 0xd1, 0x54, 0x84, 0xcb, 0xd5 };
+	const u8 y[16] = {
+			0xcf, 0xc4, 0x3d, 0xff, 0xf7, 0x83, 0x65, 0x21,
+			0x6e, 0x5f, 0xa7, 0x25, 0xcc, 0xe7, 0xe8, 0xa6 };
+	const u32 exp_val = 0x2f9ed5ba % 1000000;
+	u32 val;
+	int err;
+
+	err = smp_g2(tfm_cmac, u, v, x, y, &val);
+	if (err)
+		return err;
+
+	if (val != exp_val)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int __init test_h6(struct crypto_hash *tfm_cmac)
+{
+	const u8 w[16] = {
+			0x9b, 0x7d, 0x39, 0x0a, 0xa6, 0x10, 0x10, 0x34,
+			0x05, 0xad, 0xc8, 0x57, 0xa3, 0x34, 0x02, 0xec };
+	const u8 key_id[4] = { 0x72, 0x62, 0x65, 0x6c };
+	const u8 exp[16] = {
+			0x99, 0x63, 0xb1, 0x80, 0xe2, 0xa9, 0xd3, 0xe8,
+			0x1c, 0xc9, 0x6d, 0xe7, 0x02, 0xe1, 0x9a, 0x2d };
+	u8 res[16];
+	int err;
+
+	err = smp_h6(tfm_cmac, w, key_id, res);
+	if (err)
+		return err;
+
+	if (memcmp(res, exp, 16))
+		return -EINVAL;
+
+	return 0;
+}
+
 static int __init run_selftests(struct crypto_blkcipher *tfm_aes,
 				struct crypto_hash *tfm_cmac)
 {
@@ -3123,6 +3279,36 @@ static int __init run_selftests(struct crypto_blkcipher *tfm_aes,
 		return err;
 	}
 
+	err = test_f4(tfm_cmac);
+	if (err) {
+		BT_ERR("smp_f4 test failed");
+		return err;
+	}
+
+	err = test_f5(tfm_cmac);
+	if (err) {
+		BT_ERR("smp_f5 test failed");
+		return err;
+	}
+
+	err = test_f6(tfm_cmac);
+	if (err) {
+		BT_ERR("smp_f6 test failed");
+		return err;
+	}
+
+	err = test_g2(tfm_cmac);
+	if (err) {
+		BT_ERR("smp_g2 test failed");
+		return err;
+	}
+
+	err = test_h6(tfm_cmac);
+	if (err) {
+		BT_ERR("smp_h6 test failed");
+		return err;
+	}
+
 	BT_INFO("SMP test passed");
 
 	return 0;
-- 
cgit v1.2.3


From 255047b0dca31e6b8ce254481a0b65d559d2ebb8 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Tue, 30 Dec 2014 00:11:20 -0800
Subject: Bluetooth: Add timing information to SMP test case runs

After successful completion of the SMP test cases, print the time it
took to run them.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/smp.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 3a4333b5801a..358264c0e785 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -3259,8 +3259,12 @@ static int __init test_h6(struct crypto_hash *tfm_cmac)
 static int __init run_selftests(struct crypto_blkcipher *tfm_aes,
 				struct crypto_hash *tfm_cmac)
 {
+	ktime_t calltime, delta, rettime;
+	unsigned long long duration;
 	int err;
 
+	calltime = ktime_get();
+
 	err = test_ah(tfm_aes);
 	if (err) {
 		BT_ERR("smp_ah test failed");
@@ -3309,7 +3313,11 @@ static int __init run_selftests(struct crypto_blkcipher *tfm_aes,
 		return err;
 	}
 
-	BT_INFO("SMP test passed");
+	rettime = ktime_get();
+	delta = ktime_sub(rettime, calltime);
+	duration = (unsigned long long) ktime_to_ns(delta) >> 10;
+
+	BT_INFO("SMP test passed in %lld usecs", duration);
 
 	return 0;
 }
-- 
cgit v1.2.3


From e64b4fb66cc428cef0a62ea899b671c1aad05f3a Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Tue, 30 Dec 2014 00:11:21 -0800
Subject: Bluetooth: Add timing information to ECDH test case runs

After successful completion of the ECDH test cases, print the time it
took to run them.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/selftest.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/selftest.c b/net/bluetooth/selftest.c
index a7602b3d0b0d..9c67315172cf 100644
--- a/net/bluetooth/selftest.c
+++ b/net/bluetooth/selftest.c
@@ -156,8 +156,12 @@ static int __init test_ecdh_sample(const u8 priv_a[32], const u8 priv_b[32],
 
 static int __init test_ecdh(void)
 {
+	ktime_t calltime, delta, rettime;
+	unsigned long long duration;
 	int err;
 
+	calltime = ktime_get();
+
 	err = test_ecdh_sample(priv_a_1, priv_b_1, pub_a_1, pub_b_1, dhkey_1);
 	if (err) {
 		BT_ERR("ECDH sample 1 failed");
@@ -176,7 +180,11 @@ static int __init test_ecdh(void)
 		return err;
 	}
 
-	BT_INFO("ECDH test passed");
+	rettime = ktime_get();
+	delta = ktime_sub(rettime, calltime);
+	duration = (unsigned long long) ktime_to_ns(delta) >> 10;
+
+	BT_INFO("ECDH test passed in %lld usecs", duration);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 886eaa1fe668e75615a48df2ff68e23b762e31e8 Mon Sep 17 00:00:00 2001
From: Fabian Frederick <fabf@skynet.be>
Date: Thu, 25 Dec 2014 12:05:50 +0100
Subject: tipc: replace 0 by NULL for pointers

Fix sparse warning:
net/tipc/link.c:1924:40: warning: Using plain integer as NULL pointer

Signed-off-by: Fabian Frederick <fabf@skynet.be>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 23bcc1132365..082c3b5b32a1 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1921,7 +1921,7 @@ static struct tipc_node *tipc_link_find_owner(const char *link_name,
 {
 	struct tipc_link *l_ptr;
 	struct tipc_node *n_ptr;
-	struct tipc_node *found_node = 0;
+	struct tipc_node *found_node = NULL;
 	int i;
 
 	*bearer_id = 0;
-- 
cgit v1.2.3


From 33f72e6f0c67f673fd0c63a8182dbd9ffb8cf50b Mon Sep 17 00:00:00 2001
From: Bill Hong <bhong@brocade.com>
Date: Sat, 27 Dec 2014 10:12:39 -0800
Subject: l2tp : multicast notification to the registered listeners

Previously l2tp module did not provide any means for the user space to
get notified when tunnels/sessions are added/modified/deleted.
This change contains the following
- create a multicast group for the listeners to register.
- notify the registered listeners when the tunnels/sessions are
  created/modified/deleted.

Signed-off-by: Bill Hong <bhong@brocade.com>
Reviewed-by: Stephen Hemminger <stephen@networkplumber.org>
Reviewed-by: Sven-Thorsten Dietrich <sven@brocade.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/l2tp.h |   1 +
 net/l2tp/l2tp_netlink.c   | 101 +++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 92 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h
index 21caa2631c20..347ef22a964e 100644
--- a/include/uapi/linux/l2tp.h
+++ b/include/uapi/linux/l2tp.h
@@ -178,5 +178,6 @@ enum l2tp_seqmode {
  */
 #define L2TP_GENL_NAME		"l2tp"
 #define L2TP_GENL_VERSION	0x1
+#define L2TP_GENL_MCGROUP       "l2tp"
 
 #endif /* _UAPI_LINUX_L2TP_H_ */
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 0ac907adb2f4..6b16598f31d5 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -40,6 +40,18 @@ static struct genl_family l2tp_nl_family = {
 	.netnsok	= true,
 };
 
+static const struct genl_multicast_group l2tp_multicast_group[] = {
+	{
+		.name = L2TP_GENL_MCGROUP,
+	},
+};
+
+static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq,
+			       int flags, struct l2tp_tunnel *tunnel, u8 cmd);
+static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq,
+				int flags, struct l2tp_session *session,
+				u8 cmd);
+
 /* Accessed under genl lock */
 static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX];
 
@@ -97,6 +109,52 @@ out:
 	return ret;
 }
 
+static int l2tp_tunnel_notify(struct genl_family *family,
+			      struct genl_info *info,
+			      struct l2tp_tunnel *tunnel,
+			      u8 cmd)
+{
+	struct sk_buff *msg;
+	int ret;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq,
+				  NLM_F_ACK, tunnel, cmd);
+
+	if (ret >= 0)
+		return genlmsg_multicast_allns(family, msg, 0,	0, GFP_ATOMIC);
+
+	nlmsg_free(msg);
+
+	return ret;
+}
+
+static int l2tp_session_notify(struct genl_family *family,
+			       struct genl_info *info,
+			       struct l2tp_session *session,
+			       u8 cmd)
+{
+	struct sk_buff *msg;
+	int ret;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq,
+				   NLM_F_ACK, session, cmd);
+
+	if (ret >= 0)
+		return genlmsg_multicast_allns(family, msg, 0,	0, GFP_ATOMIC);
+
+	nlmsg_free(msg);
+
+	return ret;
+}
+
 static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info)
 {
 	u32 tunnel_id;
@@ -188,6 +246,9 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info
 		break;
 	}
 
+	if (ret >= 0)
+		ret = l2tp_tunnel_notify(&l2tp_nl_family, info,
+					 tunnel, L2TP_CMD_TUNNEL_CREATE);
 out:
 	return ret;
 }
@@ -211,6 +272,9 @@ static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info
 		goto out;
 	}
 
+	l2tp_tunnel_notify(&l2tp_nl_family, info,
+			   tunnel, L2TP_CMD_TUNNEL_DELETE);
+
 	(void) l2tp_tunnel_delete(tunnel);
 
 out:
@@ -239,12 +303,15 @@ static int l2tp_nl_cmd_tunnel_modify(struct sk_buff *skb, struct genl_info *info
 	if (info->attrs[L2TP_ATTR_DEBUG])
 		tunnel->debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
 
+	ret = l2tp_tunnel_notify(&l2tp_nl_family, info,
+				 tunnel, L2TP_CMD_TUNNEL_MODIFY);
+
 out:
 	return ret;
 }
 
 static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int flags,
-			       struct l2tp_tunnel *tunnel)
+			       struct l2tp_tunnel *tunnel, u8 cmd)
 {
 	void *hdr;
 	struct nlattr *nest;
@@ -254,8 +321,7 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
 	struct ipv6_pinfo *np = NULL;
 #endif
 
-	hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags,
-			  L2TP_CMD_TUNNEL_GET);
+	hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, cmd);
 	if (!hdr)
 		return -EMSGSIZE;
 
@@ -359,7 +425,7 @@ static int l2tp_nl_cmd_tunnel_get(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq,
-				  NLM_F_ACK, tunnel);
+				  NLM_F_ACK, tunnel, L2TP_CMD_TUNNEL_GET);
 	if (ret < 0)
 		goto err_out;
 
@@ -385,7 +451,7 @@ static int l2tp_nl_cmd_tunnel_dump(struct sk_buff *skb, struct netlink_callback
 
 		if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).portid,
 					cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					tunnel) <= 0)
+					tunnel, L2TP_CMD_TUNNEL_GET) <= 0)
 			goto out;
 
 		ti++;
@@ -539,6 +605,13 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
 		ret = (*l2tp_nl_cmd_ops[cfg.pw_type]->session_create)(net, tunnel_id,
 			session_id, peer_session_id, &cfg);
 
+	if (ret >= 0) {
+		session = l2tp_session_find(net, tunnel, session_id);
+		if (session)
+			ret = l2tp_session_notify(&l2tp_nl_family, info, session,
+						  L2TP_CMD_SESSION_CREATE);
+	}
+
 out:
 	return ret;
 }
@@ -555,6 +628,9 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf
 		goto out;
 	}
 
+	l2tp_session_notify(&l2tp_nl_family, info,
+			    session, L2TP_CMD_SESSION_DELETE);
+
 	pw_type = session->pwtype;
 	if (pw_type < __L2TP_PWTYPE_MAX)
 		if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete)
@@ -601,12 +677,15 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
 	if (info->attrs[L2TP_ATTR_MRU])
 		session->mru = nla_get_u16(info->attrs[L2TP_ATTR_MRU]);
 
+	ret = l2tp_session_notify(&l2tp_nl_family, info,
+				  session, L2TP_CMD_SESSION_MODIFY);
+
 out:
 	return ret;
 }
 
 static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int flags,
-				struct l2tp_session *session)
+				struct l2tp_session *session, u8 cmd)
 {
 	void *hdr;
 	struct nlattr *nest;
@@ -615,7 +694,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl
 
 	sk = tunnel->sock;
 
-	hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, L2TP_CMD_SESSION_GET);
+	hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, cmd);
 	if (!hdr)
 		return -EMSGSIZE;
 
@@ -699,7 +778,7 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq,
-				   0, session);
+				   0, session, L2TP_CMD_SESSION_GET);
 	if (ret < 0)
 		goto err_out;
 
@@ -737,7 +816,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback
 
 		if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).portid,
 					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					 session) <= 0)
+					 session, L2TP_CMD_SESSION_GET) <= 0)
 			break;
 
 		si++;
@@ -896,7 +975,9 @@ EXPORT_SYMBOL_GPL(l2tp_nl_unregister_ops);
 static int l2tp_nl_init(void)
 {
 	pr_info("L2TP netlink interface\n");
-	return genl_register_family_with_ops(&l2tp_nl_family, l2tp_nl_ops);
+	return genl_register_family_with_ops_groups(&l2tp_nl_family,
+						    l2tp_nl_ops,
+						    l2tp_multicast_group);
 }
 
 static void l2tp_nl_cleanup(void)
-- 
cgit v1.2.3


From bec94d430f2c97159e21e38c0f1fa4da3710d5e1 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemming@brocade.com>
Date: Sat, 27 Dec 2014 10:01:42 -0800
Subject: gre: allow live address change

The GRE tap device supports Ethernet over GRE, but doesn't
care about the source address of the tunnel, therefore it
can be changed without bring device down.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 4f4bf5b99686..942576e27df1 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -673,6 +673,7 @@ static bool ipgre_netlink_encap_parms(struct nlattr *data[],
 static int gre_tap_init(struct net_device *dev)
 {
 	__gre_tunnel_init(dev);
+	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
 
 	return ip_tunnel_init(dev);
 }
-- 
cgit v1.2.3


From 8274a97aa4c694ad0d7b31b283a89dcca140e62b Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Wed, 31 Dec 2014 10:55:29 -0800
Subject: fib_trie: Update usage stats to be percpu instead of global variables

The trie usage stats were currently being shared by all threads that were
calling fib_table_lookup.  As a result when multiple threads were
performing lookups simultaneously the trie would begin to cache bounce
between those threads.

In order to prevent this I have updated the usage stats to use a set of
percpu variables.  By doing this we should be able to avoid the cache
bouncing and still make use of these stats.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c |  2 +-
 net/ipv4/fib_trie.c     | 68 ++++++++++++++++++++++++++++++++++---------------
 2 files changed, 49 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 23104a3f2924..66890209208f 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -67,7 +67,7 @@ static int __net_init fib4_rules_init(struct net *net)
 	return 0;
 
 fail:
-	kfree(local_table);
+	fib_free_table(local_table);
 	return -ENOMEM;
 }
 #else
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 18bcaf2ff2fd..d3dbb4821ae8 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -153,7 +153,7 @@ struct trie_stat {
 struct trie {
 	struct rt_trie_node __rcu *trie;
 #ifdef CONFIG_IP_FIB_TRIE_STATS
-	struct trie_use_stats stats;
+	struct trie_use_stats __percpu *stats;
 #endif
 };
 
@@ -631,7 +631,7 @@ static struct rt_trie_node *resize(struct trie *t, struct tnode *tn)
 		if (IS_ERR(tn)) {
 			tn = old_tn;
 #ifdef CONFIG_IP_FIB_TRIE_STATS
-			t->stats.resize_node_skipped++;
+			this_cpu_inc(t->stats->resize_node_skipped);
 #endif
 			break;
 		}
@@ -658,7 +658,7 @@ static struct rt_trie_node *resize(struct trie *t, struct tnode *tn)
 		if (IS_ERR(tn)) {
 			tn = old_tn;
 #ifdef CONFIG_IP_FIB_TRIE_STATS
-			t->stats.resize_node_skipped++;
+			this_cpu_inc(t->stats->resize_node_skipped);
 #endif
 			break;
 		}
@@ -1357,7 +1357,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l,
 			err = fib_props[fa->fa_type].error;
 			if (err) {
 #ifdef CONFIG_IP_FIB_TRIE_STATS
-				t->stats.semantic_match_passed++;
+				this_cpu_inc(t->stats->semantic_match_passed);
 #endif
 				return err;
 			}
@@ -1372,7 +1372,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l,
 					continue;
 
 #ifdef CONFIG_IP_FIB_TRIE_STATS
-				t->stats.semantic_match_passed++;
+				this_cpu_inc(t->stats->semantic_match_passed);
 #endif
 				res->prefixlen = li->plen;
 				res->nh_sel = nhsel;
@@ -1388,7 +1388,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l,
 		}
 
 #ifdef CONFIG_IP_FIB_TRIE_STATS
-		t->stats.semantic_match_miss++;
+		this_cpu_inc(t->stats->semantic_match_miss);
 #endif
 	}
 
@@ -1399,6 +1399,9 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
 		     struct fib_result *res, int fib_flags)
 {
 	struct trie *t = (struct trie *) tb->tb_data;
+#ifdef CONFIG_IP_FIB_TRIE_STATS
+	struct trie_use_stats __percpu *stats = t->stats;
+#endif
 	int ret;
 	struct rt_trie_node *n;
 	struct tnode *pn;
@@ -1417,7 +1420,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
 		goto failed;
 
 #ifdef CONFIG_IP_FIB_TRIE_STATS
-	t->stats.gets++;
+	this_cpu_inc(stats->gets);
 #endif
 
 	/* Just a leaf? */
@@ -1441,7 +1444,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
 
 		if (n == NULL) {
 #ifdef CONFIG_IP_FIB_TRIE_STATS
-			t->stats.null_node_hit++;
+			this_cpu_inc(stats->null_node_hit);
 #endif
 			goto backtrace;
 		}
@@ -1576,7 +1579,7 @@ backtrace:
 			chopped_off = 0;
 
 #ifdef CONFIG_IP_FIB_TRIE_STATS
-			t->stats.backtrack++;
+			this_cpu_inc(stats->backtrack);
 #endif
 			goto backtrace;
 		}
@@ -1830,6 +1833,11 @@ int fib_table_flush(struct fib_table *tb)
 
 void fib_free_table(struct fib_table *tb)
 {
+#ifdef CONFIG_IP_FIB_TRIE_STATS
+	struct trie *t = (struct trie *)tb->tb_data;
+
+	free_percpu(t->stats);
+#endif /* CONFIG_IP_FIB_TRIE_STATS */
 	kfree(tb);
 }
 
@@ -1973,7 +1981,14 @@ struct fib_table *fib_trie_table(u32 id)
 	tb->tb_num_default = 0;
 
 	t = (struct trie *) tb->tb_data;
-	memset(t, 0, sizeof(*t));
+	RCU_INIT_POINTER(t->trie, NULL);
+#ifdef CONFIG_IP_FIB_TRIE_STATS
+	t->stats = alloc_percpu(struct trie_use_stats);
+	if (!t->stats) {
+		kfree(tb);
+		tb = NULL;
+	}
+#endif
 
 	return tb;
 }
@@ -2139,18 +2154,31 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
 
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 static void trie_show_usage(struct seq_file *seq,
-			    const struct trie_use_stats *stats)
+			    const struct trie_use_stats __percpu *stats)
 {
+	struct trie_use_stats s = { 0 };
+	int cpu;
+
+	/* loop through all of the CPUs and gather up the stats */
+	for_each_possible_cpu(cpu) {
+		const struct trie_use_stats *pcpu = per_cpu_ptr(stats, cpu);
+
+		s.gets += pcpu->gets;
+		s.backtrack += pcpu->backtrack;
+		s.semantic_match_passed += pcpu->semantic_match_passed;
+		s.semantic_match_miss += pcpu->semantic_match_miss;
+		s.null_node_hit += pcpu->null_node_hit;
+		s.resize_node_skipped += pcpu->resize_node_skipped;
+	}
+
 	seq_printf(seq, "\nCounters:\n---------\n");
-	seq_printf(seq, "gets = %u\n", stats->gets);
-	seq_printf(seq, "backtracks = %u\n", stats->backtrack);
+	seq_printf(seq, "gets = %u\n", s.gets);
+	seq_printf(seq, "backtracks = %u\n", s.backtrack);
 	seq_printf(seq, "semantic match passed = %u\n",
-		   stats->semantic_match_passed);
-	seq_printf(seq, "semantic match miss = %u\n",
-		   stats->semantic_match_miss);
-	seq_printf(seq, "null node hit= %u\n", stats->null_node_hit);
-	seq_printf(seq, "skipped node resize = %u\n\n",
-		   stats->resize_node_skipped);
+		   s.semantic_match_passed);
+	seq_printf(seq, "semantic match miss = %u\n", s.semantic_match_miss);
+	seq_printf(seq, "null node hit= %u\n", s.null_node_hit);
+	seq_printf(seq, "skipped node resize = %u\n\n", s.resize_node_skipped);
 }
 #endif /*  CONFIG_IP_FIB_TRIE_STATS */
 
@@ -2191,7 +2219,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v)
 			trie_collect_stats(t, &stat);
 			trie_show_stats(seq, &stat);
 #ifdef CONFIG_IP_FIB_TRIE_STATS
-			trie_show_usage(seq, &t->stats);
+			trie_show_usage(seq, t->stats);
 #endif
 		}
 	}
-- 
cgit v1.2.3


From 64c9b6fb26ebcc82e36438d4084f2258f29dbadf Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Wed, 31 Dec 2014 10:55:35 -0800
Subject: fib_trie: Make leaf and tnode more uniform

This change makes some fundamental changes to the way leaves and tnodes are
constructed.  The big differences are:
1.  Leaves now populate pos and bits indicating their full key size.
2.  Trie nodes now mask out their lower bits to be consistent with the leaf
3.  Both structures have been reordered so that rt_trie_node now consisists
    of a much larger region including the pos, bits, and rcu portions of
    the tnode structure.

On 32b systems this will result in the leaf being 4B larger as the pos and
bits values were added to a hole created by the key as it was only 4B in
length.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 192 ++++++++++++++++++++++------------------------------
 1 file changed, 82 insertions(+), 110 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index d3dbb4821ae8..2b7220728b24 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -87,24 +87,38 @@
 
 typedef unsigned int t_key;
 
-#define T_TNODE 0
-#define T_LEAF  1
-#define NODE_TYPE_MASK	0x1UL
-#define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK)
+#define IS_TNODE(n) ((n)->bits)
+#define IS_LEAF(n) (!(n)->bits)
 
-#define IS_TNODE(n) (!(n->parent & T_LEAF))
-#define IS_LEAF(n) (n->parent & T_LEAF)
+struct tnode {
+	t_key key;
+	unsigned char bits;		/* 2log(KEYLENGTH) bits needed */
+	unsigned char pos;		/* 2log(KEYLENGTH) bits needed */
+	struct tnode __rcu *parent;
+	union {
+		struct rcu_head rcu;
+		struct tnode *tnode_free;
+	};
+	unsigned int full_children;	/* KEYLENGTH bits needed */
+	unsigned int empty_children;	/* KEYLENGTH bits needed */
+	struct rt_trie_node __rcu *child[0];
+};
 
 struct rt_trie_node {
-	unsigned long parent;
 	t_key key;
+	unsigned char bits;
+	unsigned char pos;
+	struct tnode __rcu *parent;
+	struct rcu_head rcu;
 };
 
 struct leaf {
-	unsigned long parent;
 	t_key key;
-	struct hlist_head list;
+	unsigned char bits;
+	unsigned char pos;
+	struct tnode __rcu *parent;
 	struct rcu_head rcu;
+	struct hlist_head list;
 };
 
 struct leaf_info {
@@ -115,20 +129,6 @@ struct leaf_info {
 	struct rcu_head rcu;
 };
 
-struct tnode {
-	unsigned long parent;
-	t_key key;
-	unsigned char pos;		/* 2log(KEYLENGTH) bits needed */
-	unsigned char bits;		/* 2log(KEYLENGTH) bits needed */
-	unsigned int full_children;	/* KEYLENGTH bits needed */
-	unsigned int empty_children;	/* KEYLENGTH bits needed */
-	union {
-		struct rcu_head rcu;
-		struct tnode *tnode_free;
-	};
-	struct rt_trie_node __rcu *child[0];
-};
-
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 struct trie_use_stats {
 	unsigned int gets;
@@ -176,38 +176,27 @@ static const int sync_pages = 128;
 static struct kmem_cache *fn_alias_kmem __read_mostly;
 static struct kmem_cache *trie_leaf_kmem __read_mostly;
 
-/*
- * caller must hold RTNL
- */
-static inline struct tnode *node_parent(const struct rt_trie_node *node)
-{
-	unsigned long parent;
+/* caller must hold RTNL */
+#define node_parent(n) rtnl_dereference((n)->parent)
 
-	parent = rcu_dereference_index_check(node->parent, lockdep_rtnl_is_held());
+/* caller must hold RCU read lock or RTNL */
+#define node_parent_rcu(n) rcu_dereference_rtnl((n)->parent)
 
-	return (struct tnode *)(parent & ~NODE_TYPE_MASK);
-}
-
-/*
- * caller must hold RCU read lock or RTNL
- */
-static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node)
+/* wrapper for rcu_assign_pointer */
+static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr)
 {
-	unsigned long parent;
-
-	parent = rcu_dereference_index_check(node->parent, rcu_read_lock_held() ||
-							   lockdep_rtnl_is_held());
-
-	return (struct tnode *)(parent & ~NODE_TYPE_MASK);
+	if (node)
+		rcu_assign_pointer(node->parent, ptr);
 }
 
-/* Same as rcu_assign_pointer
- * but that macro() assumes that value is a pointer.
+#define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER((n)->parent, p)
+
+/* This provides us with the number of children in this node, in the case of a
+ * leaf this will return 0 meaning none of the children are accessible.
  */
-static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr)
+static inline int tnode_child_length(const struct tnode *tn)
 {
-	smp_wmb();
-	node->parent = (unsigned long)ptr | NODE_TYPE(node);
+	return (1ul << tn->bits) & ~(1ul);
 }
 
 /*
@@ -215,7 +204,7 @@ static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr)
  */
 static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsigned int i)
 {
-	BUG_ON(i >= 1U << tn->bits);
+	BUG_ON(i >= tnode_child_length(tn));
 
 	return rtnl_dereference(tn->child[i]);
 }
@@ -225,16 +214,11 @@ static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsig
  */
 static inline struct rt_trie_node *tnode_get_child_rcu(const struct tnode *tn, unsigned int i)
 {
-	BUG_ON(i >= 1U << tn->bits);
+	BUG_ON(i >= tnode_child_length(tn));
 
 	return rcu_dereference_rtnl(tn->child[i]);
 }
 
-static inline int tnode_child_length(const struct tnode *tn)
-{
-	return 1 << tn->bits;
-}
-
 static inline t_key mask_pfx(t_key k, unsigned int l)
 {
 	return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l);
@@ -336,11 +320,6 @@ static inline int tkey_mismatch(t_key a, int offset, t_key b)
 
 */
 
-static inline void check_tnode(const struct tnode *tn)
-{
-	WARN_ON(tn && tn->pos+tn->bits > 32);
-}
-
 static const int halve_threshold = 25;
 static const int inflate_threshold = 50;
 static const int halve_threshold_root = 15;
@@ -426,11 +405,20 @@ static void tnode_free_flush(void)
 	}
 }
 
-static struct leaf *leaf_new(void)
+static struct leaf *leaf_new(t_key key)
 {
 	struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
 	if (l) {
-		l->parent = T_LEAF;
+		l->parent = NULL;
+		/* set key and pos to reflect full key value
+		 * any trailing zeros in the key should be ignored
+		 * as the nodes are searched
+		 */
+		l->key = key;
+		l->pos = KEYLENGTH;
+		/* set bits to 0 indicating we are not a tnode */
+		l->bits = 0;
+
 		INIT_HLIST_HEAD(&l->list);
 	}
 	return l;
@@ -451,12 +439,16 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
 {
 	size_t sz = sizeof(struct tnode) + (sizeof(struct rt_trie_node *) << bits);
 	struct tnode *tn = tnode_alloc(sz);
+	unsigned int shift = pos + bits;
+
+	/* verify bits and pos their msb bits clear and values are valid */
+	BUG_ON(!bits || (shift > KEYLENGTH));
 
 	if (tn) {
-		tn->parent = T_TNODE;
+		tn->parent = NULL;
 		tn->pos = pos;
 		tn->bits = bits;
-		tn->key = key;
+		tn->key = mask_pfx(key, pos);
 		tn->full_children = 0;
 		tn->empty_children = 1<<bits;
 	}
@@ -473,10 +465,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
 
 static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node *n)
 {
-	if (n == NULL || IS_LEAF(n))
-		return 0;
-
-	return ((struct tnode *) n)->pos == tn->pos + tn->bits;
+	return n && IS_TNODE(n) && (n->pos == (tn->pos + tn->bits));
 }
 
 static inline void put_child(struct tnode *tn, int i,
@@ -514,8 +503,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *
 	else if (!wasfull && isfull)
 		tn->full_children++;
 
-	if (n)
-		node_set_parent(n, tn);
+	node_set_parent(n, tn);
 
 	rcu_assign_pointer(tn->child[i], n);
 }
@@ -523,7 +511,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *
 #define MAX_WORK 10
 static struct rt_trie_node *resize(struct trie *t, struct tnode *tn)
 {
-	int i;
+	struct rt_trie_node *n = NULL;
 	struct tnode *old_tn;
 	int inflate_threshold_use;
 	int halve_threshold_use;
@@ -536,12 +524,11 @@ static struct rt_trie_node *resize(struct trie *t, struct tnode *tn)
 		 tn, inflate_threshold, halve_threshold);
 
 	/* No children */
-	if (tn->empty_children == tnode_child_length(tn)) {
-		tnode_free_safe(tn);
-		return NULL;
-	}
+	if (tn->empty_children > (tnode_child_length(tn) - 1))
+		goto no_children;
+
 	/* One child */
-	if (tn->empty_children == tnode_child_length(tn) - 1)
+	if (tn->empty_children == (tnode_child_length(tn) - 1))
 		goto one_child;
 	/*
 	 * Double as long as the resulting node has a number of
@@ -607,11 +594,9 @@ static struct rt_trie_node *resize(struct trie *t, struct tnode *tn)
 	 *
 	 */
 
-	check_tnode(tn);
-
 	/* Keep root node larger  */
 
-	if (!node_parent((struct rt_trie_node *)tn)) {
+	if (!node_parent(tn)) {
 		inflate_threshold_use = inflate_threshold_root;
 		halve_threshold_use = halve_threshold_root;
 	} else {
@@ -637,8 +622,6 @@ static struct rt_trie_node *resize(struct trie *t, struct tnode *tn)
 		}
 	}
 
-	check_tnode(tn);
-
 	/* Return if at least one inflate is run */
 	if (max_work != MAX_WORK)
 		return (struct rt_trie_node *) tn;
@@ -666,21 +649,16 @@ static struct rt_trie_node *resize(struct trie *t, struct tnode *tn)
 
 
 	/* Only one child remains */
-	if (tn->empty_children == tnode_child_length(tn) - 1) {
+	if (tn->empty_children == (tnode_child_length(tn) - 1)) {
+		unsigned long i;
 one_child:
-		for (i = 0; i < tnode_child_length(tn); i++) {
-			struct rt_trie_node *n;
-
-			n = rtnl_dereference(tn->child[i]);
-			if (!n)
-				continue;
-
-			/* compress one level */
-
-			node_set_parent(n, NULL);
-			tnode_free_safe(tn);
-			return n;
-		}
+		for (i = tnode_child_length(tn); !n && i;)
+			n = tnode_get_child(tn, --i);
+no_children:
+		/* compress one level */
+		node_set_parent(n, NULL);
+		tnode_free_safe(tn);
+		return n;
 	}
 	return (struct rt_trie_node *) tn;
 }
@@ -760,8 +738,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
 
 		/* A leaf or an internal node with skipped bits */
 
-		if (IS_LEAF(node) || ((struct tnode *) node)->pos >
-		   tn->pos + tn->bits - 1) {
+		if (IS_LEAF(node) || (node->pos > (tn->pos + tn->bits - 1))) {
 			put_child(tn,
 				tkey_extract_bits(node->key, oldtnode->pos, oldtnode->bits + 1),
 				node);
@@ -958,11 +935,9 @@ fib_find_node(struct trie *t, u32 key)
 	pos = 0;
 	n = rcu_dereference_rtnl(t->trie);
 
-	while (n != NULL &&  NODE_TYPE(n) == T_TNODE) {
+	while (n && IS_TNODE(n)) {
 		tn = (struct tnode *) n;
 
-		check_tnode(tn);
-
 		if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
 			pos = tn->pos + tn->bits;
 			n = tnode_get_child_rcu(tn,
@@ -988,7 +963,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
 
 	key = tn->key;
 
-	while (tn != NULL && (tp = node_parent((struct rt_trie_node *)tn)) != NULL) {
+	while (tn != NULL && (tp = node_parent(tn)) != NULL) {
 		cindex = tkey_extract_bits(key, tp->pos, tp->bits);
 		wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
 		tn = (struct tnode *)resize(t, tn);
@@ -996,7 +971,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
 		tnode_put_child_reorg(tp, cindex,
 				      (struct rt_trie_node *)tn, wasfull);
 
-		tp = node_parent((struct rt_trie_node *) tn);
+		tp = node_parent(tn);
 		if (!tp)
 			rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
 
@@ -1048,11 +1023,9 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
 	 * If it doesn't, we need to replace it with a T_TNODE.
 	 */
 
-	while (n != NULL &&  NODE_TYPE(n) == T_TNODE) {
+	while (n && IS_TNODE(n)) {
 		tn = (struct tnode *) n;
 
-		check_tnode(tn);
-
 		if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
 			tp = tn;
 			pos = tn->pos + tn->bits;
@@ -1087,12 +1060,11 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
 		insert_leaf_info(&l->list, li);
 		goto done;
 	}
-	l = leaf_new();
+	l = leaf_new(key);
 
 	if (!l)
 		return NULL;
 
-	l->key = key;
 	li = leaf_info_new(plen);
 
 	if (!li) {
@@ -1569,7 +1541,7 @@ backtrace:
 		if (chopped_off <= pn->bits) {
 			cindex &= ~(1 << (chopped_off-1));
 		} else {
-			struct tnode *parent = node_parent_rcu((struct rt_trie_node *) pn);
+			struct tnode *parent = node_parent_rcu(pn);
 			if (!parent)
 				goto failed;
 
@@ -1597,7 +1569,7 @@ EXPORT_SYMBOL_GPL(fib_table_lookup);
  */
 static void trie_leaf_remove(struct trie *t, struct leaf *l)
 {
-	struct tnode *tp = node_parent((struct rt_trie_node *) l);
+	struct tnode *tp = node_parent(l);
 
 	pr_debug("entering trie_leaf_remove(%p)\n", l);
 
@@ -2374,7 +2346,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
 
 	if (IS_TNODE(n)) {
 		struct tnode *tn = (struct tnode *) n;
-		__be32 prf = htonl(mask_pfx(tn->key, tn->pos));
+		__be32 prf = htonl(tn->key);
 
 		seq_indent(seq, iter->depth-1);
 		seq_printf(seq, "  +-- %pI4/%d %d %d %d\n",
-- 
cgit v1.2.3


From 37fd30f2da573c2625ed29561e5b8eb7b0258860 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Wed, 31 Dec 2014 10:55:41 -0800
Subject: fib_trie: Merge tnode_free and leaf_free into node_free

Both the leaf and the tnode had an rcu_head in them, but they had them in
slightly different places.  Since we now have them in the same spot and
know that any node with bits == 0 is a leaf and the rest are either vmalloc
or kmalloc tnodes depending on the value of bits it makes it easy to combine
the functions and reduce overhead.

In addition I have taken advantage of the rcu_head pointer to go ahead and
put together a simple linked list instead of using the tnode pointer as
this way we can merge either type of structure for freeing.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 90 ++++++++++++++++++++++++-----------------------------
 1 file changed, 40 insertions(+), 50 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 2b7220728b24..d1a9907901cb 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -95,15 +95,17 @@ struct tnode {
 	unsigned char bits;		/* 2log(KEYLENGTH) bits needed */
 	unsigned char pos;		/* 2log(KEYLENGTH) bits needed */
 	struct tnode __rcu *parent;
-	union {
-		struct rcu_head rcu;
-		struct tnode *tnode_free;
-	};
+	struct rcu_head rcu;
+	/* everything above this comment must be the same as rt_trie_node */
 	unsigned int full_children;	/* KEYLENGTH bits needed */
 	unsigned int empty_children;	/* KEYLENGTH bits needed */
 	struct rt_trie_node __rcu *child[0];
 };
 
+/* This struct represents the shared bits between tnode and leaf.  If any
+ * ordering is changed here is must also be updated in tnode and leaf as
+ * well.
+ */
 struct rt_trie_node {
 	t_key key;
 	unsigned char bits;
@@ -118,6 +120,7 @@ struct leaf {
 	unsigned char pos;
 	struct tnode __rcu *parent;
 	struct rcu_head rcu;
+	/* everything above this comment must be the same as rt_trie_node */
 	struct hlist_head list;
 };
 
@@ -163,7 +166,7 @@ static struct rt_trie_node *resize(struct trie *t, struct tnode *tn);
 static struct tnode *inflate(struct trie *t, struct tnode *tn);
 static struct tnode *halve(struct trie *t, struct tnode *tn);
 /* tnodes to free after resize(); protected by RTNL */
-static struct tnode *tnode_free_head;
+static struct callback_head *tnode_free_head;
 static size_t tnode_free_size;
 
 /*
@@ -336,17 +339,23 @@ static inline void alias_free_mem_rcu(struct fib_alias *fa)
 	call_rcu(&fa->rcu, __alias_free_mem);
 }
 
-static void __leaf_free_rcu(struct rcu_head *head)
-{
-	struct leaf *l = container_of(head, struct leaf, rcu);
-	kmem_cache_free(trie_leaf_kmem, l);
-}
+#define TNODE_KMALLOC_MAX \
+	ilog2((PAGE_SIZE - sizeof(struct tnode)) / sizeof(struct rt_trie_node *))
 
-static inline void free_leaf(struct leaf *l)
+static void __node_free_rcu(struct rcu_head *head)
 {
-	call_rcu(&l->rcu, __leaf_free_rcu);
+	struct rt_trie_node *n = container_of(head, struct rt_trie_node, rcu);
+
+	if (IS_LEAF(n))
+		kmem_cache_free(trie_leaf_kmem, n);
+	else if (n->bits <= TNODE_KMALLOC_MAX)
+		kfree(n);
+	else
+		vfree(n);
 }
 
+#define node_free(n) call_rcu(&n->rcu, __node_free_rcu)
+
 static inline void free_leaf_info(struct leaf_info *leaf)
 {
 	kfree_rcu(leaf, rcu);
@@ -360,43 +369,24 @@ static struct tnode *tnode_alloc(size_t size)
 		return vzalloc(size);
 }
 
-static void __tnode_free_rcu(struct rcu_head *head)
-{
-	struct tnode *tn = container_of(head, struct tnode, rcu);
-	size_t size = sizeof(struct tnode) +
-		      (sizeof(struct rt_trie_node *) << tn->bits);
-
-	if (size <= PAGE_SIZE)
-		kfree(tn);
-	else
-		vfree(tn);
-}
-
-static inline void tnode_free(struct tnode *tn)
-{
-	if (IS_LEAF(tn))
-		free_leaf((struct leaf *) tn);
-	else
-		call_rcu(&tn->rcu, __tnode_free_rcu);
-}
-
 static void tnode_free_safe(struct tnode *tn)
 {
 	BUG_ON(IS_LEAF(tn));
-	tn->tnode_free = tnode_free_head;
-	tnode_free_head = tn;
-	tnode_free_size += sizeof(struct tnode) +
-			   (sizeof(struct rt_trie_node *) << tn->bits);
+	tn->rcu.next = tnode_free_head;
+	tnode_free_head = &tn->rcu;
 }
 
 static void tnode_free_flush(void)
 {
-	struct tnode *tn;
+	struct callback_head *head;
+
+	while ((head = tnode_free_head)) {
+		struct tnode *tn = container_of(head, struct tnode, rcu);
+
+		tnode_free_head = head->next;
+		tnode_free_size += offsetof(struct tnode, child[1 << tn->bits]);
 
-	while ((tn = tnode_free_head)) {
-		tnode_free_head = tn->tnode_free;
-		tn->tnode_free = NULL;
-		tnode_free(tn);
+		node_free(tn);
 	}
 
 	if (tnode_free_size >= PAGE_SIZE * sync_pages) {
@@ -437,7 +427,7 @@ static struct leaf_info *leaf_info_new(int plen)
 
 static struct tnode *tnode_new(t_key key, int pos, int bits)
 {
-	size_t sz = sizeof(struct tnode) + (sizeof(struct rt_trie_node *) << bits);
+	size_t sz = offsetof(struct tnode, child[1 << bits]);
 	struct tnode *tn = tnode_alloc(sz);
 	unsigned int shift = pos + bits;
 
@@ -666,15 +656,15 @@ no_children:
 
 static void tnode_clean_free(struct tnode *tn)
 {
+	struct rt_trie_node *tofree;
 	int i;
-	struct tnode *tofree;
 
 	for (i = 0; i < tnode_child_length(tn); i++) {
-		tofree = (struct tnode *)rtnl_dereference(tn->child[i]);
+		tofree = rtnl_dereference(tn->child[i]);
 		if (tofree)
-			tnode_free(tofree);
+			node_free(tofree);
 	}
-	tnode_free(tn);
+	node_free(tn);
 }
 
 static struct tnode *inflate(struct trie *t, struct tnode *tn)
@@ -717,7 +707,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
 					  inode->bits - 1);
 
 			if (!right) {
-				tnode_free(left);
+				node_free(left);
 				goto nomem;
 			}
 
@@ -1068,7 +1058,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
 	li = leaf_info_new(plen);
 
 	if (!li) {
-		free_leaf(l);
+		node_free(l);
 		return NULL;
 	}
 
@@ -1100,7 +1090,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
 
 		if (!tn) {
 			free_leaf_info(li);
-			free_leaf(l);
+			node_free(l);
 			return NULL;
 		}
 
@@ -1580,7 +1570,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l)
 	} else
 		RCU_INIT_POINTER(t->trie, NULL);
 
-	free_leaf(l);
+	node_free(l);
 }
 
 /*
-- 
cgit v1.2.3


From adaf981685b7b17d773d17f4ee6a128a3a6b6dc8 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Wed, 31 Dec 2014 10:55:47 -0800
Subject: fib_trie: Merge leaf into tnode

This change makes it so that leaf and tnode are the same struct.  As a
result there is no need for rt_trie_node anymore since everyting can be
merged into tnode.

On 32b systems this results in the leaf being 4 bytes larger, however I
don't know if that is really an issue as this and an eariler patch that
added bits & pos have increased the size from 20 to 28.  If I am not
mistaken slub/slab allocate on power of 2 sizes so 20 was likely being
rounded up to 32 anyway.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 322 +++++++++++++++++++++++-----------------------------
 1 file changed, 140 insertions(+), 182 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index d1a9907901cb..94c929f6c752 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -96,32 +96,16 @@ struct tnode {
 	unsigned char pos;		/* 2log(KEYLENGTH) bits needed */
 	struct tnode __rcu *parent;
 	struct rcu_head rcu;
-	/* everything above this comment must be the same as rt_trie_node */
-	unsigned int full_children;	/* KEYLENGTH bits needed */
-	unsigned int empty_children;	/* KEYLENGTH bits needed */
-	struct rt_trie_node __rcu *child[0];
-};
-
-/* This struct represents the shared bits between tnode and leaf.  If any
- * ordering is changed here is must also be updated in tnode and leaf as
- * well.
- */
-struct rt_trie_node {
-	t_key key;
-	unsigned char bits;
-	unsigned char pos;
-	struct tnode __rcu *parent;
-	struct rcu_head rcu;
-};
-
-struct leaf {
-	t_key key;
-	unsigned char bits;
-	unsigned char pos;
-	struct tnode __rcu *parent;
-	struct rcu_head rcu;
-	/* everything above this comment must be the same as rt_trie_node */
-	struct hlist_head list;
+	union {
+		/* The fields in this struct are valid if bits > 0 (TNODE) */
+		struct {
+			unsigned int full_children;  /* KEYLENGTH bits needed */
+			unsigned int empty_children; /* KEYLENGTH bits needed */
+			struct tnode __rcu *child[0];
+		};
+		/* This list pointer if valid if bits == 0 (LEAF) */
+		struct hlist_head list;
+	};
 };
 
 struct leaf_info {
@@ -154,15 +138,15 @@ struct trie_stat {
 };
 
 struct trie {
-	struct rt_trie_node __rcu *trie;
+	struct tnode __rcu *trie;
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 	struct trie_use_stats __percpu *stats;
 #endif
 };
 
-static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n,
+static void tnode_put_child_reorg(struct tnode *tn, int i, struct tnode *n,
 				  int wasfull);
-static struct rt_trie_node *resize(struct trie *t, struct tnode *tn);
+static struct tnode *resize(struct trie *t, struct tnode *tn);
 static struct tnode *inflate(struct trie *t, struct tnode *tn);
 static struct tnode *halve(struct trie *t, struct tnode *tn);
 /* tnodes to free after resize(); protected by RTNL */
@@ -186,10 +170,10 @@ static struct kmem_cache *trie_leaf_kmem __read_mostly;
 #define node_parent_rcu(n) rcu_dereference_rtnl((n)->parent)
 
 /* wrapper for rcu_assign_pointer */
-static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr)
+static inline void node_set_parent(struct tnode *n, struct tnode *tp)
 {
-	if (node)
-		rcu_assign_pointer(node->parent, ptr);
+	if (n)
+		rcu_assign_pointer(n->parent, tp);
 }
 
 #define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER((n)->parent, p)
@@ -205,7 +189,7 @@ static inline int tnode_child_length(const struct tnode *tn)
 /*
  * caller must hold RTNL
  */
-static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsigned int i)
+static inline struct tnode *tnode_get_child(const struct tnode *tn, unsigned int i)
 {
 	BUG_ON(i >= tnode_child_length(tn));
 
@@ -215,7 +199,7 @@ static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsig
 /*
  * caller must hold RCU read lock or RTNL
  */
-static inline struct rt_trie_node *tnode_get_child_rcu(const struct tnode *tn, unsigned int i)
+static inline struct tnode *tnode_get_child_rcu(const struct tnode *tn, unsigned int i)
 {
 	BUG_ON(i >= tnode_child_length(tn));
 
@@ -340,11 +324,11 @@ static inline void alias_free_mem_rcu(struct fib_alias *fa)
 }
 
 #define TNODE_KMALLOC_MAX \
-	ilog2((PAGE_SIZE - sizeof(struct tnode)) / sizeof(struct rt_trie_node *))
+	ilog2((PAGE_SIZE - sizeof(struct tnode)) / sizeof(struct tnode *))
 
 static void __node_free_rcu(struct rcu_head *head)
 {
-	struct rt_trie_node *n = container_of(head, struct rt_trie_node, rcu);
+	struct tnode *n = container_of(head, struct tnode, rcu);
 
 	if (IS_LEAF(n))
 		kmem_cache_free(trie_leaf_kmem, n);
@@ -395,9 +379,9 @@ static void tnode_free_flush(void)
 	}
 }
 
-static struct leaf *leaf_new(t_key key)
+static struct tnode *leaf_new(t_key key)
 {
-	struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
+	struct tnode *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
 	if (l) {
 		l->parent = NULL;
 		/* set key and pos to reflect full key value
@@ -444,7 +428,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
 	}
 
 	pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode),
-		 sizeof(struct rt_trie_node *) << bits);
+		 sizeof(struct tnode *) << bits);
 	return tn;
 }
 
@@ -453,13 +437,13 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
  * and no bits are skipped. See discussion in dyntree paper p. 6
  */
 
-static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node *n)
+static inline int tnode_full(const struct tnode *tn, const struct tnode *n)
 {
 	return n && IS_TNODE(n) && (n->pos == (tn->pos + tn->bits));
 }
 
 static inline void put_child(struct tnode *tn, int i,
-			     struct rt_trie_node *n)
+			     struct tnode *n)
 {
 	tnode_put_child_reorg(tn, i, n, -1);
 }
@@ -469,10 +453,10 @@ static inline void put_child(struct tnode *tn, int i,
   * Update the value of full_children and empty_children.
   */
 
-static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n,
+static void tnode_put_child_reorg(struct tnode *tn, int i, struct tnode *n,
 				  int wasfull)
 {
-	struct rt_trie_node *chi = rtnl_dereference(tn->child[i]);
+	struct tnode *chi = rtnl_dereference(tn->child[i]);
 	int isfull;
 
 	BUG_ON(i >= 1<<tn->bits);
@@ -499,10 +483,9 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *
 }
 
 #define MAX_WORK 10
-static struct rt_trie_node *resize(struct trie *t, struct tnode *tn)
+static struct tnode *resize(struct trie *t, struct tnode *tn)
 {
-	struct rt_trie_node *n = NULL;
-	struct tnode *old_tn;
+	struct tnode *old_tn, *n = NULL;
 	int inflate_threshold_use;
 	int halve_threshold_use;
 	int max_work;
@@ -614,7 +597,7 @@ static struct rt_trie_node *resize(struct trie *t, struct tnode *tn)
 
 	/* Return if at least one inflate is run */
 	if (max_work != MAX_WORK)
-		return (struct rt_trie_node *) tn;
+		return tn;
 
 	/*
 	 * Halve as long as the number of empty children in this
@@ -650,13 +633,13 @@ no_children:
 		tnode_free_safe(tn);
 		return n;
 	}
-	return (struct rt_trie_node *) tn;
+	return tn;
 }
 
 
 static void tnode_clean_free(struct tnode *tn)
 {
-	struct rt_trie_node *tofree;
+	struct tnode *tofree;
 	int i;
 
 	for (i = 0; i < tnode_child_length(tn); i++) {
@@ -667,10 +650,10 @@ static void tnode_clean_free(struct tnode *tn)
 	node_free(tn);
 }
 
-static struct tnode *inflate(struct trie *t, struct tnode *tn)
+static struct tnode *inflate(struct trie *t, struct tnode *oldtnode)
 {
-	struct tnode *oldtnode = tn;
-	int olen = tnode_child_length(tn);
+	int olen = tnode_child_length(oldtnode);
+	struct tnode *tn;
 	int i;
 
 	pr_debug("In inflate\n");
@@ -690,11 +673,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
 	for (i = 0; i < olen; i++) {
 		struct tnode *inode;
 
-		inode = (struct tnode *) tnode_get_child(oldtnode, i);
-		if (inode &&
-		    IS_TNODE(inode) &&
-		    inode->pos == oldtnode->pos + oldtnode->bits &&
-		    inode->bits > 1) {
+		inode = tnode_get_child(oldtnode, i);
+		if (tnode_full(oldtnode, inode) && inode->bits > 1) {
 			struct tnode *left, *right;
 			t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos;
 
@@ -711,33 +691,29 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
 				goto nomem;
 			}
 
-			put_child(tn, 2*i, (struct rt_trie_node *) left);
-			put_child(tn, 2*i+1, (struct rt_trie_node *) right);
+			put_child(tn, 2*i, left);
+			put_child(tn, 2*i+1, right);
 		}
 	}
 
 	for (i = 0; i < olen; i++) {
-		struct tnode *inode;
-		struct rt_trie_node *node = tnode_get_child(oldtnode, i);
+		struct tnode *inode = tnode_get_child(oldtnode, i);
 		struct tnode *left, *right;
 		int size, j;
 
 		/* An empty child */
-		if (node == NULL)
+		if (inode == NULL)
 			continue;
 
 		/* A leaf or an internal node with skipped bits */
-
-		if (IS_LEAF(node) || (node->pos > (tn->pos + tn->bits - 1))) {
+		if (!tnode_full(oldtnode, inode)) {
 			put_child(tn,
-				tkey_extract_bits(node->key, oldtnode->pos, oldtnode->bits + 1),
-				node);
+				tkey_extract_bits(inode->key, tn->pos, tn->bits),
+				inode);
 			continue;
 		}
 
 		/* An internal node with two children */
-		inode = (struct tnode *) node;
-
 		if (inode->bits == 1) {
 			put_child(tn, 2*i, rtnl_dereference(inode->child[0]));
 			put_child(tn, 2*i+1, rtnl_dereference(inode->child[1]));
@@ -769,12 +745,12 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
 		 *   bit to zero.
 		 */
 
-		left = (struct tnode *) tnode_get_child(tn, 2*i);
+		left = tnode_get_child(tn, 2*i);
 		put_child(tn, 2*i, NULL);
 
 		BUG_ON(!left);
 
-		right = (struct tnode *) tnode_get_child(tn, 2*i+1);
+		right = tnode_get_child(tn, 2*i+1);
 		put_child(tn, 2*i+1, NULL);
 
 		BUG_ON(!right);
@@ -796,12 +772,11 @@ nomem:
 	return ERR_PTR(-ENOMEM);
 }
 
-static struct tnode *halve(struct trie *t, struct tnode *tn)
+static struct tnode *halve(struct trie *t, struct tnode *oldtnode)
 {
-	struct tnode *oldtnode = tn;
-	struct rt_trie_node *left, *right;
+	int olen = tnode_child_length(oldtnode);
+	struct tnode *tn, *left, *right;
 	int i;
-	int olen = tnode_child_length(tn);
 
 	pr_debug("In halve\n");
 
@@ -830,7 +805,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
 			if (!newn)
 				goto nomem;
 
-			put_child(tn, i/2, (struct rt_trie_node *)newn);
+			put_child(tn, i/2, newn);
 		}
 
 	}
@@ -855,7 +830,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
 		}
 
 		/* Two nonempty children */
-		newBinNode = (struct tnode *) tnode_get_child(tn, i/2);
+		newBinNode = tnode_get_child(tn, i/2);
 		put_child(tn, i/2, NULL);
 		put_child(newBinNode, 0, left);
 		put_child(newBinNode, 1, right);
@@ -871,7 +846,7 @@ nomem:
 /* readside must use rcu_read_lock currently dump routines
  via get_fa_head and dump */
 
-static struct leaf_info *find_leaf_info(struct leaf *l, int plen)
+static struct leaf_info *find_leaf_info(struct tnode *l, int plen)
 {
 	struct hlist_head *head = &l->list;
 	struct leaf_info *li;
@@ -883,7 +858,7 @@ static struct leaf_info *find_leaf_info(struct leaf *l, int plen)
 	return NULL;
 }
 
-static inline struct list_head *get_fa_head(struct leaf *l, int plen)
+static inline struct list_head *get_fa_head(struct tnode *l, int plen)
 {
 	struct leaf_info *li = find_leaf_info(l, plen);
 
@@ -915,32 +890,25 @@ static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
 
 /* rcu_read_lock needs to be hold by caller from readside */
 
-static struct leaf *
-fib_find_node(struct trie *t, u32 key)
+static struct tnode *fib_find_node(struct trie *t, u32 key)
 {
-	int pos;
-	struct tnode *tn;
-	struct rt_trie_node *n;
-
-	pos = 0;
-	n = rcu_dereference_rtnl(t->trie);
+	struct tnode *n = rcu_dereference_rtnl(t->trie);
+	int pos = 0;
 
 	while (n && IS_TNODE(n)) {
-		tn = (struct tnode *) n;
-
-		if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
-			pos = tn->pos + tn->bits;
-			n = tnode_get_child_rcu(tn,
+		if (tkey_sub_equals(n->key, pos, n->pos-pos, key)) {
+			pos = n->pos + n->bits;
+			n = tnode_get_child_rcu(n,
 						tkey_extract_bits(key,
-								  tn->pos,
-								  tn->bits));
+								  n->pos,
+								  n->bits));
 		} else
 			break;
 	}
 	/* Case we have found a leaf. Compare prefixes */
 
 	if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key))
-		return (struct leaf *)n;
+		return n;
 
 	return NULL;
 }
@@ -956,14 +924,13 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
 	while (tn != NULL && (tp = node_parent(tn)) != NULL) {
 		cindex = tkey_extract_bits(key, tp->pos, tp->bits);
 		wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
-		tn = (struct tnode *)resize(t, tn);
+		tn = resize(t, tn);
 
-		tnode_put_child_reorg(tp, cindex,
-				      (struct rt_trie_node *)tn, wasfull);
+		tnode_put_child_reorg(tp, cindex, tn, wasfull);
 
 		tp = node_parent(tn);
 		if (!tp)
-			rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
+			rcu_assign_pointer(t->trie, tn);
 
 		tnode_free_flush();
 		if (!tp)
@@ -973,9 +940,9 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
 
 	/* Handle last (top) tnode */
 	if (IS_TNODE(tn))
-		tn = (struct tnode *)resize(t, tn);
+		tn = resize(t, tn);
 
-	rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
+	rcu_assign_pointer(t->trie, tn);
 	tnode_free_flush();
 }
 
@@ -985,8 +952,8 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
 {
 	int pos, newpos;
 	struct tnode *tp = NULL, *tn = NULL;
-	struct rt_trie_node *n;
-	struct leaf *l;
+	struct tnode *n;
+	struct tnode *l;
 	int missbit;
 	struct list_head *fa_head = NULL;
 	struct leaf_info *li;
@@ -1014,17 +981,15 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
 	 */
 
 	while (n && IS_TNODE(n)) {
-		tn = (struct tnode *) n;
-
-		if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
-			tp = tn;
-			pos = tn->pos + tn->bits;
-			n = tnode_get_child(tn,
+		if (tkey_sub_equals(n->key, pos, n->pos-pos, key)) {
+			tp = n;
+			pos = n->pos + n->bits;
+			n = tnode_get_child(n,
 					    tkey_extract_bits(key,
-							      tn->pos,
-							      tn->bits));
+							      n->pos,
+							      n->bits));
 
-			BUG_ON(n && node_parent(n) != tn);
+			BUG_ON(n && node_parent(n) != tp);
 		} else
 			break;
 	}
@@ -1040,14 +1005,13 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
 	/* Case 1: n is a leaf. Compare prefixes */
 
 	if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
-		l = (struct leaf *) n;
 		li = leaf_info_new(plen);
 
 		if (!li)
 			return NULL;
 
 		fa_head = &li->falh;
-		insert_leaf_info(&l->list, li);
+		insert_leaf_info(&n->list, li);
 		goto done;
 	}
 	l = leaf_new(key);
@@ -1068,10 +1032,10 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
 	if (t->trie && n == NULL) {
 		/* Case 2: n is NULL, and will just insert a new leaf */
 
-		node_set_parent((struct rt_trie_node *)l, tp);
+		node_set_parent(l, tp);
 
 		cindex = tkey_extract_bits(key, tp->pos, tp->bits);
-		put_child(tp, cindex, (struct rt_trie_node *)l);
+		put_child(tp, cindex, l);
 	} else {
 		/* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
 		/*
@@ -1094,17 +1058,17 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
 			return NULL;
 		}
 
-		node_set_parent((struct rt_trie_node *)tn, tp);
+		node_set_parent(tn, tp);
 
 		missbit = tkey_extract_bits(key, newpos, 1);
-		put_child(tn, missbit, (struct rt_trie_node *)l);
+		put_child(tn, missbit, l);
 		put_child(tn, 1-missbit, n);
 
 		if (tp) {
 			cindex = tkey_extract_bits(key, tp->pos, tp->bits);
-			put_child(tp, cindex, (struct rt_trie_node *)tn);
+			put_child(tp, cindex, tn);
 		} else {
-			rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
+			rcu_assign_pointer(t->trie, tn);
 		}
 
 		tp = tn;
@@ -1134,7 +1098,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 	u8 tos = cfg->fc_tos;
 	u32 key, mask;
 	int err;
-	struct leaf *l;
+	struct tnode *l;
 
 	if (plen > 32)
 		return -EINVAL;
@@ -1292,7 +1256,7 @@ err:
 }
 
 /* should be called with rcu_read_lock */
-static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l,
+static int check_leaf(struct fib_table *tb, struct trie *t, struct tnode *l,
 		      t_key key,  const struct flowi4 *flp,
 		      struct fib_result *res, int fib_flags)
 {
@@ -1365,7 +1329,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
 	struct trie_use_stats __percpu *stats = t->stats;
 #endif
 	int ret;
-	struct rt_trie_node *n;
+	struct tnode *n;
 	struct tnode *pn;
 	unsigned int pos, bits;
 	t_key key = ntohl(flp->daddr);
@@ -1387,11 +1351,11 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
 
 	/* Just a leaf? */
 	if (IS_LEAF(n)) {
-		ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags);
+		ret = check_leaf(tb, t, n, key, flp, res, fib_flags);
 		goto found;
 	}
 
-	pn = (struct tnode *) n;
+	pn = n;
 	chopped_off = 0;
 
 	while (pn) {
@@ -1412,13 +1376,13 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
 		}
 
 		if (IS_LEAF(n)) {
-			ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags);
+			ret = check_leaf(tb, t, n, key, flp, res, fib_flags);
 			if (ret > 0)
 				goto backtrace;
 			goto found;
 		}
 
-		cn = (struct tnode *)n;
+		cn = n;
 
 		/*
 		 * It's a tnode, and we can do some extra checks here if we
@@ -1506,7 +1470,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
 				current_prefix_length = mp;
 		}
 
-		pn = (struct tnode *)n; /* Descend */
+		pn = n; /* Descend */
 		chopped_off = 0;
 		continue;
 
@@ -1557,7 +1521,7 @@ EXPORT_SYMBOL_GPL(fib_table_lookup);
 /*
  * Remove the leaf and return parent.
  */
-static void trie_leaf_remove(struct trie *t, struct leaf *l)
+static void trie_leaf_remove(struct trie *t, struct tnode *l)
 {
 	struct tnode *tp = node_parent(l);
 
@@ -1584,7 +1548,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
 	u8 tos = cfg->fc_tos;
 	struct fib_alias *fa, *fa_to_delete;
 	struct list_head *fa_head;
-	struct leaf *l;
+	struct tnode *l;
 	struct leaf_info *li;
 
 	if (plen > 32)
@@ -1682,7 +1646,7 @@ static int trie_flush_list(struct list_head *head)
 	return found;
 }
 
-static int trie_flush_leaf(struct leaf *l)
+static int trie_flush_leaf(struct tnode *l)
 {
 	int found = 0;
 	struct hlist_head *lih = &l->list;
@@ -1704,7 +1668,7 @@ static int trie_flush_leaf(struct leaf *l)
  * Scan for the next right leaf starting at node p->child[idx]
  * Since we have back pointer, no recursion necessary.
  */
-static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c)
+static struct tnode *leaf_walk_rcu(struct tnode *p, struct tnode *c)
 {
 	do {
 		t_key idx;
@@ -1720,47 +1684,46 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c)
 				continue;
 
 			if (IS_LEAF(c))
-				return (struct leaf *) c;
+				return c;
 
 			/* Rescan start scanning in new node */
-			p = (struct tnode *) c;
+			p = c;
 			idx = 0;
 		}
 
 		/* Node empty, walk back up to parent */
-		c = (struct rt_trie_node *) p;
+		c = p;
 	} while ((p = node_parent_rcu(c)) != NULL);
 
 	return NULL; /* Root of trie */
 }
 
-static struct leaf *trie_firstleaf(struct trie *t)
+static struct tnode *trie_firstleaf(struct trie *t)
 {
-	struct tnode *n = (struct tnode *)rcu_dereference_rtnl(t->trie);
+	struct tnode *n = rcu_dereference_rtnl(t->trie);
 
 	if (!n)
 		return NULL;
 
 	if (IS_LEAF(n))          /* trie is just a leaf */
-		return (struct leaf *) n;
+		return n;
 
 	return leaf_walk_rcu(n, NULL);
 }
 
-static struct leaf *trie_nextleaf(struct leaf *l)
+static struct tnode *trie_nextleaf(struct tnode *l)
 {
-	struct rt_trie_node *c = (struct rt_trie_node *) l;
-	struct tnode *p = node_parent_rcu(c);
+	struct tnode *p = node_parent_rcu(l);
 
 	if (!p)
 		return NULL;	/* trie with just one leaf */
 
-	return leaf_walk_rcu(p, c);
+	return leaf_walk_rcu(p, l);
 }
 
-static struct leaf *trie_leafindex(struct trie *t, int index)
+static struct tnode *trie_leafindex(struct trie *t, int index)
 {
-	struct leaf *l = trie_firstleaf(t);
+	struct tnode *l = trie_firstleaf(t);
 
 	while (l && index-- > 0)
 		l = trie_nextleaf(l);
@@ -1775,7 +1738,7 @@ static struct leaf *trie_leafindex(struct trie *t, int index)
 int fib_table_flush(struct fib_table *tb)
 {
 	struct trie *t = (struct trie *) tb->tb_data;
-	struct leaf *l, *ll = NULL;
+	struct tnode *l, *ll = NULL;
 	int found = 0;
 
 	for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) {
@@ -1840,7 +1803,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
 	return skb->len;
 }
 
-static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb,
+static int fn_trie_dump_leaf(struct tnode *l, struct fib_table *tb,
 			struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct leaf_info *li;
@@ -1876,7 +1839,7 @@ static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb,
 int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
 		   struct netlink_callback *cb)
 {
-	struct leaf *l;
+	struct tnode *l;
 	struct trie *t = (struct trie *) tb->tb_data;
 	t_key key = cb->args[2];
 	int count = cb->args[3];
@@ -1922,7 +1885,7 @@ void __init fib_trie_init(void)
 					  0, SLAB_PANIC, NULL);
 
 	trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
-					   max(sizeof(struct leaf),
+					   max(sizeof(struct tnode),
 					       sizeof(struct leaf_info)),
 					   0, SLAB_PANIC, NULL);
 }
@@ -1965,7 +1928,7 @@ struct fib_trie_iter {
 	unsigned int depth;
 };
 
-static struct rt_trie_node *fib_trie_get_next(struct fib_trie_iter *iter)
+static struct tnode *fib_trie_get_next(struct fib_trie_iter *iter)
 {
 	struct tnode *tn = iter->tnode;
 	unsigned int cindex = iter->index;
@@ -1979,7 +1942,7 @@ static struct rt_trie_node *fib_trie_get_next(struct fib_trie_iter *iter)
 		 iter->tnode, iter->index, iter->depth);
 rescan:
 	while (cindex < (1<<tn->bits)) {
-		struct rt_trie_node *n = tnode_get_child_rcu(tn, cindex);
+		struct tnode *n = tnode_get_child_rcu(tn, cindex);
 
 		if (n) {
 			if (IS_LEAF(n)) {
@@ -1987,7 +1950,7 @@ rescan:
 				iter->index = cindex + 1;
 			} else {
 				/* push down one level */
-				iter->tnode = (struct tnode *) n;
+				iter->tnode = n;
 				iter->index = 0;
 				++iter->depth;
 			}
@@ -1998,7 +1961,7 @@ rescan:
 	}
 
 	/* Current node exhausted, pop back up */
-	p = node_parent_rcu((struct rt_trie_node *)tn);
+	p = node_parent_rcu(tn);
 	if (p) {
 		cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
 		tn = p;
@@ -2010,10 +1973,10 @@ rescan:
 	return NULL;
 }
 
-static struct rt_trie_node *fib_trie_get_first(struct fib_trie_iter *iter,
+static struct tnode *fib_trie_get_first(struct fib_trie_iter *iter,
 				       struct trie *t)
 {
-	struct rt_trie_node *n;
+	struct tnode *n;
 
 	if (!t)
 		return NULL;
@@ -2023,7 +1986,7 @@ static struct rt_trie_node *fib_trie_get_first(struct fib_trie_iter *iter,
 		return NULL;
 
 	if (IS_TNODE(n)) {
-		iter->tnode = (struct tnode *) n;
+		iter->tnode = n;
 		iter->index = 0;
 		iter->depth = 1;
 	} else {
@@ -2037,7 +2000,7 @@ static struct rt_trie_node *fib_trie_get_first(struct fib_trie_iter *iter,
 
 static void trie_collect_stats(struct trie *t, struct trie_stat *s)
 {
-	struct rt_trie_node *n;
+	struct tnode *n;
 	struct fib_trie_iter iter;
 
 	memset(s, 0, sizeof(*s));
@@ -2045,7 +2008,6 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s)
 	rcu_read_lock();
 	for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) {
 		if (IS_LEAF(n)) {
-			struct leaf *l = (struct leaf *)n;
 			struct leaf_info *li;
 
 			s->leaves++;
@@ -2053,18 +2015,17 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s)
 			if (iter.depth > s->maxdepth)
 				s->maxdepth = iter.depth;
 
-			hlist_for_each_entry_rcu(li, &l->list, hlist)
+			hlist_for_each_entry_rcu(li, &n->list, hlist)
 				++s->prefixes;
 		} else {
-			const struct tnode *tn = (const struct tnode *) n;
 			int i;
 
 			s->tnodes++;
-			if (tn->bits < MAX_STAT_DEPTH)
-				s->nodesizes[tn->bits]++;
+			if (n->bits < MAX_STAT_DEPTH)
+				s->nodesizes[n->bits]++;
 
-			for (i = 0; i < (1<<tn->bits); i++)
-				if (!tn->child[i])
+			for (i = 0; i < tnode_child_length(n); i++)
+				if (!rcu_access_pointer(n->child[i]))
 					s->nullpointers++;
 		}
 	}
@@ -2088,7 +2049,7 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
 	seq_printf(seq, "\tMax depth:      %u\n", stat->maxdepth);
 
 	seq_printf(seq, "\tLeaves:         %u\n", stat->leaves);
-	bytes = sizeof(struct leaf) * stat->leaves;
+	bytes = sizeof(struct tnode) * stat->leaves;
 
 	seq_printf(seq, "\tPrefixes:       %u\n", stat->prefixes);
 	bytes += sizeof(struct leaf_info) * stat->prefixes;
@@ -2109,7 +2070,7 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
 	seq_putc(seq, '\n');
 	seq_printf(seq, "\tPointers: %u\n", pointers);
 
-	bytes += sizeof(struct rt_trie_node *) * pointers;
+	bytes += sizeof(struct tnode *) * pointers;
 	seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
 	seq_printf(seq, "Total size: %u  kB\n", (bytes + 1023) / 1024);
 }
@@ -2163,7 +2124,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v)
 	seq_printf(seq,
 		   "Basic info: size of leaf:"
 		   " %Zd bytes, size of tnode: %Zd bytes.\n",
-		   sizeof(struct leaf), sizeof(struct tnode));
+		   sizeof(struct tnode), sizeof(struct tnode));
 
 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
 		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
@@ -2202,7 +2163,7 @@ static const struct file_operations fib_triestat_fops = {
 	.release = single_release_net,
 };
 
-static struct rt_trie_node *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
+static struct tnode *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
 {
 	struct fib_trie_iter *iter = seq->private;
 	struct net *net = seq_file_net(seq);
@@ -2214,7 +2175,7 @@ static struct rt_trie_node *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
 		struct fib_table *tb;
 
 		hlist_for_each_entry_rcu(tb, head, tb_hlist) {
-			struct rt_trie_node *n;
+			struct tnode *n;
 
 			for (n = fib_trie_get_first(iter,
 						    (struct trie *) tb->tb_data);
@@ -2243,7 +2204,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	struct fib_table *tb = iter->tb;
 	struct hlist_node *tb_node;
 	unsigned int h;
-	struct rt_trie_node *n;
+	struct tnode *n;
 
 	++*pos;
 	/* next node in same table */
@@ -2329,29 +2290,26 @@ static inline const char *rtn_type(char *buf, size_t len, unsigned int t)
 static int fib_trie_seq_show(struct seq_file *seq, void *v)
 {
 	const struct fib_trie_iter *iter = seq->private;
-	struct rt_trie_node *n = v;
+	struct tnode *n = v;
 
 	if (!node_parent_rcu(n))
 		fib_table_print(seq, iter->tb);
 
 	if (IS_TNODE(n)) {
-		struct tnode *tn = (struct tnode *) n;
-		__be32 prf = htonl(tn->key);
+		__be32 prf = htonl(n->key);
 
-		seq_indent(seq, iter->depth-1);
+		seq_indent(seq, iter->depth - 1);
 		seq_printf(seq, "  +-- %pI4/%d %d %d %d\n",
-			   &prf, tn->pos, tn->bits, tn->full_children,
-			   tn->empty_children);
-
+			   &prf, n->pos, n->bits, n->full_children,
+			   n->empty_children);
 	} else {
-		struct leaf *l = (struct leaf *) n;
 		struct leaf_info *li;
-		__be32 val = htonl(l->key);
+		__be32 val = htonl(n->key);
 
 		seq_indent(seq, iter->depth);
 		seq_printf(seq, "  |-- %pI4\n", &val);
 
-		hlist_for_each_entry_rcu(li, &l->list, hlist) {
+		hlist_for_each_entry_rcu(li, &n->list, hlist) {
 			struct fib_alias *fa;
 
 			list_for_each_entry_rcu(fa, &li->falh, fa_list) {
@@ -2401,9 +2359,9 @@ struct fib_route_iter {
 	t_key	key;
 };
 
-static struct leaf *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos)
+static struct tnode *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos)
 {
-	struct leaf *l = NULL;
+	struct tnode *l = NULL;
 	struct trie *t = iter->main_trie;
 
 	/* use cache location of last found key */
@@ -2448,7 +2406,7 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
 static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct fib_route_iter *iter = seq->private;
-	struct leaf *l = v;
+	struct tnode *l = v;
 
 	++*pos;
 	if (v == SEQ_START_TOKEN) {
@@ -2494,7 +2452,7 @@ static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info
  */
 static int fib_route_seq_show(struct seq_file *seq, void *v)
 {
-	struct leaf *l = v;
+	struct tnode *l = v;
 	struct leaf_info *li;
 
 	if (v == SEQ_START_TOKEN) {
-- 
cgit v1.2.3


From 9f9e636d4f89f788c5cf9c6a5357501c0d405fcb Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Wed, 31 Dec 2014 10:55:54 -0800
Subject: fib_trie: Optimize fib_table_lookup to avoid wasting time on
 loops/variables

This patch is meant to reduce the complexity of fib_table_lookup by reducing
the number of variables to the bare minimum while still keeping the same if
not improved functionality versus the original.

Most of this change was started off by the desire to rid the function of
chopped_off and current_prefix_length as they actually added very little to
the function since they only applied when computing the cindex.  I was able
to replace them mostly with just a check for the prefix match.  As long as
the prefix between the key and the node being tested was the same we know
we can search the tnode fully versus just testing cindex 0.

The second portion of the change ended up being a massive reordering.
Originally the calls to check_leaf were up near the start of the loop, and
the backtracing and descending into lower levels of tnodes was later.  This
didn't make much sense as the structure of the tree means the leaves are
always the last thing to be tested.  As such I reordered things so that we
instead have a loop that will delve into the tree and only exit when we
have either found a leaf or we have exhausted the tree.  The advantage of
rearranging things like this is that we can fully inline check_leaf since
there is now only one reference to it in the function.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 250 +++++++++++++++++++---------------------------------
 1 file changed, 93 insertions(+), 157 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 94c929f6c752..3fe4dd917ce1 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -90,6 +90,9 @@ typedef unsigned int t_key;
 #define IS_TNODE(n) ((n)->bits)
 #define IS_LEAF(n) (!(n)->bits)
 
+#define get_shift(_kv) (KEYLENGTH - (_kv)->pos - (_kv)->bits)
+#define get_index(_key, _kv) (((_key) ^ (_kv)->key) >> get_shift(_kv))
+
 struct tnode {
 	t_key key;
 	unsigned char bits;		/* 2log(KEYLENGTH) bits needed */
@@ -1281,7 +1284,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct tnode *l,
 				continue;
 			fib_alias_accessed(fa);
 			err = fib_props[fa->fa_type].error;
-			if (err) {
+			if (unlikely(err < 0)) {
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 				this_cpu_inc(t->stats->semantic_match_passed);
 #endif
@@ -1303,7 +1306,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct tnode *l,
 				res->prefixlen = li->plen;
 				res->nh_sel = nhsel;
 				res->type = fa->fa_type;
-				res->scope = fa->fa_info->fib_scope;
+				res->scope = fi->fib_scope;
 				res->fi = fi;
 				res->table = tb;
 				res->fa_head = &li->falh;
@@ -1321,23 +1324,24 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct tnode *l,
 	return 1;
 }
 
+static inline t_key prefix_mismatch(t_key key, struct tnode *n)
+{
+	t_key prefix = n->key;
+
+	return (key ^ prefix) & (prefix | -prefix);
+}
+
 int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
 		     struct fib_result *res, int fib_flags)
 {
-	struct trie *t = (struct trie *) tb->tb_data;
+	struct trie *t = (struct trie *)tb->tb_data;
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 	struct trie_use_stats __percpu *stats = t->stats;
 #endif
-	int ret;
-	struct tnode *n;
-	struct tnode *pn;
-	unsigned int pos, bits;
-	t_key key = ntohl(flp->daddr);
-	unsigned int chopped_off;
-	t_key cindex = 0;
-	unsigned int current_prefix_length = KEYLENGTH;
-	struct tnode *cn;
-	t_key pref_mismatch;
+	const t_key key = ntohl(flp->daddr);
+	struct tnode *n, *pn;
+	t_key cindex;
+	int ret = 1;
 
 	rcu_read_lock();
 
@@ -1349,170 +1353,102 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
 	this_cpu_inc(stats->gets);
 #endif
 
-	/* Just a leaf? */
-	if (IS_LEAF(n)) {
-		ret = check_leaf(tb, t, n, key, flp, res, fib_flags);
-		goto found;
-	}
-
 	pn = n;
-	chopped_off = 0;
-
-	while (pn) {
-		pos = pn->pos;
-		bits = pn->bits;
-
-		if (!chopped_off)
-			cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length),
-						   pos, bits);
-
-		n = tnode_get_child_rcu(pn, cindex);
-
-		if (n == NULL) {
-#ifdef CONFIG_IP_FIB_TRIE_STATS
-			this_cpu_inc(stats->null_node_hit);
-#endif
-			goto backtrace;
-		}
+	cindex = 0;
+
+	/* Step 1: Travel to the longest prefix match in the trie */
+	for (;;) {
+		unsigned long index = get_index(key, n);
+
+		/* This bit of code is a bit tricky but it combines multiple
+		 * checks into a single check.  The prefix consists of the
+		 * prefix plus zeros for the "bits" in the prefix. The index
+		 * is the difference between the key and this value.  From
+		 * this we can actually derive several pieces of data.
+		 *   if !(index >> bits)
+		 *     we know the value is child index
+		 *   else
+		 *     we have a mismatch in skip bits and failed
+		 */
+		if (index >> n->bits)
+			break;
 
-		if (IS_LEAF(n)) {
-			ret = check_leaf(tb, t, n, key, flp, res, fib_flags);
-			if (ret > 0)
-				goto backtrace;
+		/* we have found a leaf. Prefixes have already been compared */
+		if (IS_LEAF(n))
 			goto found;
-		}
 
-		cn = n;
-
-		/*
-		 * It's a tnode, and we can do some extra checks here if we
-		 * like, to avoid descending into a dead-end branch.
-		 * This tnode is in the parent's child array at index
-		 * key[p_pos..p_pos+p_bits] but potentially with some bits
-		 * chopped off, so in reality the index may be just a
-		 * subprefix, padded with zero at the end.
-		 * We can also take a look at any skipped bits in this
-		 * tnode - everything up to p_pos is supposed to be ok,
-		 * and the non-chopped bits of the index (se previous
-		 * paragraph) are also guaranteed ok, but the rest is
-		 * considered unknown.
-		 *
-		 * The skipped bits are key[pos+bits..cn->pos].
-		 */
-
-		/* If current_prefix_length < pos+bits, we are already doing
-		 * actual prefix  matching, which means everything from
-		 * pos+(bits-chopped_off) onward must be zero along some
-		 * branch of this subtree - otherwise there is *no* valid
-		 * prefix present. Here we can only check the skipped
-		 * bits. Remember, since we have already indexed into the
-		 * parent's child array, we know that the bits we chopped of
-		 * *are* zero.
+		/* only record pn and cindex if we are going to be chopping
+		 * bits later.  Otherwise we are just wasting cycles.
 		 */
-
-		/* NOTA BENE: Checking only skipped bits
-		   for the new node here */
-
-		if (current_prefix_length < pos+bits) {
-			if (tkey_extract_bits(cn->key, current_prefix_length,
-						cn->pos - current_prefix_length)
-			    || !(cn->child[0]))
-				goto backtrace;
+		if (index) {
+			pn = n;
+			cindex = index;
 		}
 
-		/*
-		 * If chopped_off=0, the index is fully validated and we
-		 * only need to look at the skipped bits for this, the new,
-		 * tnode. What we actually want to do is to find out if
-		 * these skipped bits match our key perfectly, or if we will
-		 * have to count on finding a matching prefix further down,
-		 * because if we do, we would like to have some way of
-		 * verifying the existence of such a prefix at this point.
-		 */
+		n = rcu_dereference(n->child[index]);
+		if (unlikely(!n))
+			goto backtrace;
+	}
 
-		/* The only thing we can do at this point is to verify that
-		 * any such matching prefix can indeed be a prefix to our
-		 * key, and if the bits in the node we are inspecting that
-		 * do not match our key are not ZERO, this cannot be true.
-		 * Thus, find out where there is a mismatch (before cn->pos)
-		 * and verify that all the mismatching bits are zero in the
-		 * new tnode's key.
-		 */
+	/* Step 2: Sort out leaves and begin backtracing for longest prefix */
+	for (;;) {
+		/* record the pointer where our next node pointer is stored */
+		struct tnode __rcu **cptr = n->child;
 
-		/*
-		 * Note: We aren't very concerned about the piece of
-		 * the key that precede pn->pos+pn->bits, since these
-		 * have already been checked. The bits after cn->pos
-		 * aren't checked since these are by definition
-		 * "unknown" at this point. Thus, what we want to see
-		 * is if we are about to enter the "prefix matching"
-		 * state, and in that case verify that the skipped
-		 * bits that will prevail throughout this subtree are
-		 * zero, as they have to be if we are to find a
-		 * matching prefix.
+		/* This test verifies that none of the bits that differ
+		 * between the key and the prefix exist in the region of
+		 * the lsb and higher in the prefix.
 		 */
+		if (unlikely(prefix_mismatch(key, n)))
+			goto backtrace;
 
-		pref_mismatch = mask_pfx(cn->key ^ key, cn->pos);
+		/* exit out and process leaf */
+		if (unlikely(IS_LEAF(n)))
+			break;
 
-		/*
-		 * In short: If skipped bits in this node do not match
-		 * the search key, enter the "prefix matching"
-		 * state.directly.
+		/* Don't bother recording parent info.  Since we are in
+		 * prefix match mode we will have to come back to wherever
+		 * we started this traversal anyway
 		 */
-		if (pref_mismatch) {
-			/* fls(x) = __fls(x) + 1 */
-			int mp = KEYLENGTH - __fls(pref_mismatch) - 1;
-
-			if (tkey_extract_bits(cn->key, mp, cn->pos - mp) != 0)
-				goto backtrace;
-
-			if (current_prefix_length >= cn->pos)
-				current_prefix_length = mp;
-		}
-
-		pn = n; /* Descend */
-		chopped_off = 0;
-		continue;
 
+		while ((n = rcu_dereference(*cptr)) == NULL) {
 backtrace:
-		chopped_off++;
-
-		/* As zero don't change the child key (cindex) */
-		while ((chopped_off <= pn->bits)
-		       && !(cindex & (1<<(chopped_off-1))))
-			chopped_off++;
-
-		/* Decrease current_... with bits chopped off */
-		if (current_prefix_length > pn->pos + pn->bits - chopped_off)
-			current_prefix_length = pn->pos + pn->bits
-				- chopped_off;
-
-		/*
-		 * Either we do the actual chop off according or if we have
-		 * chopped off all bits in this tnode walk up to our parent.
-		 */
-
-		if (chopped_off <= pn->bits) {
-			cindex &= ~(1 << (chopped_off-1));
-		} else {
-			struct tnode *parent = node_parent_rcu(pn);
-			if (!parent)
-				goto failed;
-
-			/* Get Child's index */
-			cindex = tkey_extract_bits(pn->key, parent->pos, parent->bits);
-			pn = parent;
-			chopped_off = 0;
-
 #ifdef CONFIG_IP_FIB_TRIE_STATS
-			this_cpu_inc(stats->backtrack);
+			if (!n)
+				this_cpu_inc(stats->null_node_hit);
 #endif
-			goto backtrace;
+			/* If we are at cindex 0 there are no more bits for
+			 * us to strip at this level so we must ascend back
+			 * up one level to see if there are any more bits to
+			 * be stripped there.
+			 */
+			while (!cindex) {
+				t_key pkey = pn->key;
+
+				pn = node_parent_rcu(pn);
+				if (unlikely(!pn))
+					goto failed;
+#ifdef CONFIG_IP_FIB_TRIE_STATS
+				this_cpu_inc(stats->backtrack);
+#endif
+				/* Get Child's index */
+				cindex = get_index(pkey, pn);
+			}
+
+			/* strip the least significant bit from the cindex */
+			cindex &= cindex - 1;
+
+			/* grab pointer for next child node */
+			cptr = &pn->child[cindex];
 		}
 	}
-failed:
-	ret = 1;
+
 found:
+	/* Step 3: Process the leaf, if that fails fall back to backtracing */
+	ret = check_leaf(tb, t, n, key, flp, res, fib_flags);
+	if (unlikely(ret > 0))
+		goto backtrace;
+failed:
 	rcu_read_unlock();
 	return ret;
 }
-- 
cgit v1.2.3


From 939afb0657dd8c8f9486d172d6bb62fc902e2f23 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Wed, 31 Dec 2014 10:56:00 -0800
Subject: fib_trie: Optimize fib_find_node

This patch makes use of the same features I made use of for
fib_table_lookup to streamline fib_find_node.  The resultant code should be
smaller and run faster than the original.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 36 +++++++++++++++++++++---------------
 1 file changed, 21 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 3fe4dd917ce1..ac04f31a632e 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -892,28 +892,34 @@ static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
 }
 
 /* rcu_read_lock needs to be hold by caller from readside */
-
 static struct tnode *fib_find_node(struct trie *t, u32 key)
 {
 	struct tnode *n = rcu_dereference_rtnl(t->trie);
-	int pos = 0;
 
-	while (n && IS_TNODE(n)) {
-		if (tkey_sub_equals(n->key, pos, n->pos-pos, key)) {
-			pos = n->pos + n->bits;
-			n = tnode_get_child_rcu(n,
-						tkey_extract_bits(key,
-								  n->pos,
-								  n->bits));
-		} else
+	while (n) {
+		unsigned long index = get_index(key, n);
+
+		/* This bit of code is a bit tricky but it combines multiple
+		 * checks into a single check.  The prefix consists of the
+		 * prefix plus zeros for the bits in the cindex. The index
+		 * is the difference between the key and this value.  From
+		 * this we can actually derive several pieces of data.
+		 *   if !(index >> bits)
+		 *     we know the value is cindex
+		 *   else
+		 *     we have a mismatch in skip bits and failed
+		 */
+		if (index >> n->bits)
+			return NULL;
+
+		/* we have found a leaf. Prefixes have already been compared */
+		if (IS_LEAF(n))
 			break;
-	}
-	/* Case we have found a leaf. Compare prefixes */
 
-	if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key))
-		return n;
+		n = rcu_dereference_rtnl(n->child[index]);
+	}
 
-	return NULL;
+	return n;
 }
 
 static void trie_rebalance(struct trie *t, struct tnode *tn)
-- 
cgit v1.2.3


From 836a0123c98c91df71678ce63c0d42770996582a Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Wed, 31 Dec 2014 10:56:06 -0800
Subject: fib_trie: Optimize fib_table_insert

This patch updates the fib_table_insert function to take advantage of the
changes made to improve the performance of fib_table_lookup.  As a result
the code should be smaller and run faster then the original.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 196 +++++++++++++++++++---------------------------------
 1 file changed, 71 insertions(+), 125 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index ac04f31a632e..8a147c3baaba 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -222,31 +222,6 @@ static inline t_key tkey_extract_bits(t_key a, unsigned int offset, unsigned int
 		return 0;
 }
 
-static inline int tkey_equals(t_key a, t_key b)
-{
-	return a == b;
-}
-
-static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
-{
-	if (bits == 0 || offset >= KEYLENGTH)
-		return 1;
-	bits = bits > KEYLENGTH ? KEYLENGTH : bits;
-	return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
-}
-
-static inline int tkey_mismatch(t_key a, int offset, t_key b)
-{
-	t_key diff = a ^ b;
-	int i = offset;
-
-	if (!diff)
-		return 0;
-	while ((diff << i) >> (KEYLENGTH-1) == 0)
-		i++;
-	return i;
-}
-
 /*
   To understand this stuff, an understanding of keys and all their bits is
   necessary. Every node in the trie has a key associated with it, but not
@@ -485,6 +460,15 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct tnode *n,
 	rcu_assign_pointer(tn->child[i], n);
 }
 
+static void put_child_root(struct tnode *tp, struct trie *t,
+			   t_key key, struct tnode *n)
+{
+	if (tp)
+		put_child(tp, get_index(key, tp), n);
+	else
+		rcu_assign_pointer(t->trie, n);
+}
+
 #define MAX_WORK 10
 static struct tnode *resize(struct trie *t, struct tnode *tn)
 {
@@ -959,138 +943,100 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
 
 static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
 {
-	int pos, newpos;
-	struct tnode *tp = NULL, *tn = NULL;
-	struct tnode *n;
-	struct tnode *l;
-	int missbit;
 	struct list_head *fa_head = NULL;
+	struct tnode *l, *n, *tp = NULL;
 	struct leaf_info *li;
-	t_key cindex;
 
-	pos = 0;
+	li = leaf_info_new(plen);
+	if (!li)
+		return NULL;
+	fa_head = &li->falh;
+
 	n = rtnl_dereference(t->trie);
 
 	/* If we point to NULL, stop. Either the tree is empty and we should
 	 * just put a new leaf in if, or we have reached an empty child slot,
 	 * and we should just put our new leaf in that.
-	 * If we point to a T_TNODE, check if it matches our key. Note that
-	 * a T_TNODE might be skipping any number of bits - its 'pos' need
-	 * not be the parent's 'pos'+'bits'!
-	 *
-	 * If it does match the current key, get pos/bits from it, extract
-	 * the index from our key, push the T_TNODE and walk the tree.
-	 *
-	 * If it doesn't, we have to replace it with a new T_TNODE.
 	 *
-	 * If we point to a T_LEAF, it might or might not have the same key
-	 * as we do. If it does, just change the value, update the T_LEAF's
-	 * value, and return it.
-	 * If it doesn't, we need to replace it with a T_TNODE.
+	 * If we hit a node with a key that does't match then we should stop
+	 * and create a new tnode to replace that node and insert ourselves
+	 * and the other node into the new tnode.
 	 */
+	while (n) {
+		unsigned long index = get_index(key, n);
 
-	while (n && IS_TNODE(n)) {
-		if (tkey_sub_equals(n->key, pos, n->pos-pos, key)) {
-			tp = n;
-			pos = n->pos + n->bits;
-			n = tnode_get_child(n,
-					    tkey_extract_bits(key,
-							      n->pos,
-							      n->bits));
-
-			BUG_ON(n && node_parent(n) != tp);
-		} else
+		/* This bit of code is a bit tricky but it combines multiple
+		 * checks into a single check.  The prefix consists of the
+		 * prefix plus zeros for the "bits" in the prefix. The index
+		 * is the difference between the key and this value.  From
+		 * this we can actually derive several pieces of data.
+		 *   if !(index >> bits)
+		 *     we know the value is child index
+		 *   else
+		 *     we have a mismatch in skip bits and failed
+		 */
+		if (index >> n->bits)
 			break;
-	}
-
-	/*
-	 * n  ----> NULL, LEAF or TNODE
-	 *
-	 * tp is n's (parent) ----> NULL or TNODE
-	 */
-
-	BUG_ON(tp && IS_LEAF(tp));
-
-	/* Case 1: n is a leaf. Compare prefixes */
-
-	if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
-		li = leaf_info_new(plen);
 
-		if (!li)
-			return NULL;
+		/* we have found a leaf. Prefixes have already been compared */
+		if (IS_LEAF(n)) {
+			/* Case 1: n is a leaf, and prefixes match*/
+			insert_leaf_info(&n->list, li);
+			return fa_head;
+		}
 
-		fa_head = &li->falh;
-		insert_leaf_info(&n->list, li);
-		goto done;
+		tp = n;
+		n = rcu_dereference_rtnl(n->child[index]);
 	}
-	l = leaf_new(key);
-
-	if (!l)
-		return NULL;
-
-	li = leaf_info_new(plen);
 
-	if (!li) {
-		node_free(l);
+	l = leaf_new(key);
+	if (!l) {
+		free_leaf_info(li);
 		return NULL;
 	}
 
-	fa_head = &li->falh;
 	insert_leaf_info(&l->list, li);
 
-	if (t->trie && n == NULL) {
-		/* Case 2: n is NULL, and will just insert a new leaf */
-
-		node_set_parent(l, tp);
-
-		cindex = tkey_extract_bits(key, tp->pos, tp->bits);
-		put_child(tp, cindex, l);
-	} else {
-		/* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
-		/*
-		 *  Add a new tnode here
-		 *  first tnode need some special handling
-		 */
+	/* Case 2: n is a LEAF or a TNODE and the key doesn't match.
+	 *
+	 *  Add a new tnode here
+	 *  first tnode need some special handling
+	 *  leaves us in position for handling as case 3
+	 */
+	if (n) {
+		struct tnode *tn;
+		int newpos;
 
-		if (n) {
-			pos = tp ? tp->pos+tp->bits : 0;
-			newpos = tkey_mismatch(key, pos, n->key);
-			tn = tnode_new(n->key, newpos, 1);
-		} else {
-			newpos = 0;
-			tn = tnode_new(key, newpos, 1); /* First tnode */
-		}
+		newpos = KEYLENGTH - __fls(n->key ^ key) - 1;
 
+		tn = tnode_new(key, newpos, 1);
 		if (!tn) {
 			free_leaf_info(li);
 			node_free(l);
 			return NULL;
 		}
 
-		node_set_parent(tn, tp);
-
-		missbit = tkey_extract_bits(key, newpos, 1);
-		put_child(tn, missbit, l);
-		put_child(tn, 1-missbit, n);
+		/* initialize routes out of node */
+		NODE_INIT_PARENT(tn, tp);
+		put_child(tn, get_index(key, tn) ^ 1, n);
 
-		if (tp) {
-			cindex = tkey_extract_bits(key, tp->pos, tp->bits);
-			put_child(tp, cindex, tn);
-		} else {
-			rcu_assign_pointer(t->trie, tn);
-		}
+		/* start adding routes into the node */
+		put_child_root(tp, t, key, tn);
+		node_set_parent(n, tn);
 
+		/* parent now has a NULL spot where the leaf can go */
 		tp = tn;
 	}
 
-	if (tp && tp->pos + tp->bits > 32)
-		pr_warn("fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
-			tp, tp->pos, tp->bits, key, plen);
-
-	/* Rebalance the trie */
+	/* Case 3: n is NULL, and will just insert a new leaf */
+	if (tp) {
+		NODE_INIT_PARENT(l, tp);
+		put_child(tp, get_index(key, tp), l);
+		trie_rebalance(t, tp);
+	} else {
+		rcu_assign_pointer(t->trie, l);
+	}
 
-	trie_rebalance(t, tp);
-done:
 	return fa_head;
 }
 
@@ -1470,11 +1416,11 @@ static void trie_leaf_remove(struct trie *t, struct tnode *l)
 	pr_debug("entering trie_leaf_remove(%p)\n", l);
 
 	if (tp) {
-		t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits);
-		put_child(tp, cindex, NULL);
+		put_child(tp, get_index(l->key, tp), NULL);
 		trie_rebalance(t, tp);
-	} else
+	} else {
 		RCU_INIT_POINTER(t->trie, NULL);
+	}
 
 	node_free(l);
 }
-- 
cgit v1.2.3


From e9b44019d41a5cf41de43e895a5ed38b19a59b5b Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Wed, 31 Dec 2014 10:56:12 -0800
Subject: fib_trie: Update meaning of pos to represent unchecked bits

This change moves the pos value to the other side of the "bits" field.  By
doing this it actually simplifies a significant amount of code in the trie.

For example when halving a tree we know that the bit lost exists at
oldnode->pos, and if we inflate the tree the new bit being add is at
tn->pos.  Previously to find those bits you would have to subtract pos and
bits from the keylength or start with a value of (1 << 31) and then shift
that.

There are a number of spots throughout the code that benefit from this.  In
the case of the hot-path searches the main advantage is that we can drop 2
or more operations from the search path as we no longer need to compute the
value for the index to be shifted by and can instead just use the raw pos
value.

In addition the tkey_extract_bits is now defunct and can be replaced by
get_index since the two operations were doing the same thing, but now
get_index does it much more quickly as it is only an xor and shift versus a
pair of shifts and a subtraction.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 194 ++++++++++++++++++++++------------------------------
 1 file changed, 81 insertions(+), 113 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 8a147c3baaba..9d675486b38c 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -90,8 +90,7 @@ typedef unsigned int t_key;
 #define IS_TNODE(n) ((n)->bits)
 #define IS_LEAF(n) (!(n)->bits)
 
-#define get_shift(_kv) (KEYLENGTH - (_kv)->pos - (_kv)->bits)
-#define get_index(_key, _kv) (((_key) ^ (_kv)->key) >> get_shift(_kv))
+#define get_index(_key, _kv) (((_key) ^ (_kv)->key) >> (_kv)->pos)
 
 struct tnode {
 	t_key key;
@@ -209,81 +208,64 @@ static inline struct tnode *tnode_get_child_rcu(const struct tnode *tn, unsigned
 	return rcu_dereference_rtnl(tn->child[i]);
 }
 
-static inline t_key mask_pfx(t_key k, unsigned int l)
-{
-	return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l);
-}
-
-static inline t_key tkey_extract_bits(t_key a, unsigned int offset, unsigned int bits)
-{
-	if (offset < KEYLENGTH)
-		return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
-	else
-		return 0;
-}
-
-/*
-  To understand this stuff, an understanding of keys and all their bits is
-  necessary. Every node in the trie has a key associated with it, but not
-  all of the bits in that key are significant.
-
-  Consider a node 'n' and its parent 'tp'.
-
-  If n is a leaf, every bit in its key is significant. Its presence is
-  necessitated by path compression, since during a tree traversal (when
-  searching for a leaf - unless we are doing an insertion) we will completely
-  ignore all skipped bits we encounter. Thus we need to verify, at the end of
-  a potentially successful search, that we have indeed been walking the
-  correct key path.
-
-  Note that we can never "miss" the correct key in the tree if present by
-  following the wrong path. Path compression ensures that segments of the key
-  that are the same for all keys with a given prefix are skipped, but the
-  skipped part *is* identical for each node in the subtrie below the skipped
-  bit! trie_insert() in this implementation takes care of that - note the
-  call to tkey_sub_equals() in trie_insert().
-
-  if n is an internal node - a 'tnode' here, the various parts of its key
-  have many different meanings.
-
-  Example:
-  _________________________________________________________________
-  | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
-  -----------------------------------------------------------------
-    0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
-
-  _________________________________________________________________
-  | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
-  -----------------------------------------------------------------
-   16  17  18  19  20  21  22  23  24  25  26  27  28  29  30  31
-
-  tp->pos = 7
-  tp->bits = 3
-  n->pos = 15
-  n->bits = 4
-
-  First, let's just ignore the bits that come before the parent tp, that is
-  the bits from 0 to (tp->pos-1). They are *known* but at this point we do
-  not use them for anything.
-
-  The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
-  index into the parent's child array. That is, they will be used to find
-  'n' among tp's children.
-
-  The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
-  for the node n.
-
-  All the bits we have seen so far are significant to the node n. The rest
-  of the bits are really not needed or indeed known in n->key.
-
-  The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
-  n's child array, and will of course be different for each child.
-
-
-  The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
-  at this point.
-
-*/
+/* To understand this stuff, an understanding of keys and all their bits is
+ * necessary. Every node in the trie has a key associated with it, but not
+ * all of the bits in that key are significant.
+ *
+ * Consider a node 'n' and its parent 'tp'.
+ *
+ * If n is a leaf, every bit in its key is significant. Its presence is
+ * necessitated by path compression, since during a tree traversal (when
+ * searching for a leaf - unless we are doing an insertion) we will completely
+ * ignore all skipped bits we encounter. Thus we need to verify, at the end of
+ * a potentially successful search, that we have indeed been walking the
+ * correct key path.
+ *
+ * Note that we can never "miss" the correct key in the tree if present by
+ * following the wrong path. Path compression ensures that segments of the key
+ * that are the same for all keys with a given prefix are skipped, but the
+ * skipped part *is* identical for each node in the subtrie below the skipped
+ * bit! trie_insert() in this implementation takes care of that.
+ *
+ * if n is an internal node - a 'tnode' here, the various parts of its key
+ * have many different meanings.
+ *
+ * Example:
+ * _________________________________________________________________
+ * | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
+ * -----------------------------------------------------------------
+ *  31  30  29  28  27  26  25  24  23  22  21  20  19  18  17  16
+ *
+ * _________________________________________________________________
+ * | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
+ * -----------------------------------------------------------------
+ *  15  14  13  12  11  10   9   8   7   6   5   4   3   2   1   0
+ *
+ * tp->pos = 22
+ * tp->bits = 3
+ * n->pos = 13
+ * n->bits = 4
+ *
+ * First, let's just ignore the bits that come before the parent tp, that is
+ * the bits from (tp->pos + tp->bits) to 31. They are *known* but at this
+ * point we do not use them for anything.
+ *
+ * The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
+ * index into the parent's child array. That is, they will be used to find
+ * 'n' among tp's children.
+ *
+ * The bits from (n->pos + n->bits) to (tn->pos - 1) - "S" - are skipped bits
+ * for the node n.
+ *
+ * All the bits we have seen so far are significant to the node n. The rest
+ * of the bits are really not needed or indeed known in n->key.
+ *
+ * The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
+ * n's child array, and will of course be different for each child.
+ *
+ * The rest of the bits, from 0 to (n->pos + n->bits), are completely unknown
+ * at this point.
+ */
 
 static const int halve_threshold = 25;
 static const int inflate_threshold = 50;
@@ -367,7 +349,7 @@ static struct tnode *leaf_new(t_key key)
 		 * as the nodes are searched
 		 */
 		l->key = key;
-		l->pos = KEYLENGTH;
+		l->pos = 0;
 		/* set bits to 0 indicating we are not a tnode */
 		l->bits = 0;
 
@@ -400,7 +382,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
 		tn->parent = NULL;
 		tn->pos = pos;
 		tn->bits = bits;
-		tn->key = mask_pfx(key, pos);
+		tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0;
 		tn->full_children = 0;
 		tn->empty_children = 1<<bits;
 	}
@@ -410,14 +392,12 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
 	return tn;
 }
 
-/*
- * Check whether a tnode 'n' is "full", i.e. it is an internal node
+/* Check whether a tnode 'n' is "full", i.e. it is an internal node
  * and no bits are skipped. See discussion in dyntree paper p. 6
  */
-
 static inline int tnode_full(const struct tnode *tn, const struct tnode *n)
 {
-	return n && IS_TNODE(n) && (n->pos == (tn->pos + tn->bits));
+	return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n);
 }
 
 static inline void put_child(struct tnode *tn, int i,
@@ -641,11 +621,12 @@ static struct tnode *inflate(struct trie *t, struct tnode *oldtnode)
 {
 	int olen = tnode_child_length(oldtnode);
 	struct tnode *tn;
+	t_key m;
 	int i;
 
 	pr_debug("In inflate\n");
 
-	tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
+	tn = tnode_new(oldtnode->key, oldtnode->pos - 1, oldtnode->bits + 1);
 
 	if (!tn)
 		return ERR_PTR(-ENOMEM);
@@ -656,21 +637,18 @@ static struct tnode *inflate(struct trie *t, struct tnode *oldtnode)
 	 * fails. In case of failure we return the oldnode and  inflate
 	 * of tnode is ignored.
 	 */
+	for (i = 0, m = 1u << tn->pos; i < olen; i++) {
+		struct tnode *inode = tnode_get_child(oldtnode, i);
 
-	for (i = 0; i < olen; i++) {
-		struct tnode *inode;
-
-		inode = tnode_get_child(oldtnode, i);
-		if (tnode_full(oldtnode, inode) && inode->bits > 1) {
+		if (tnode_full(oldtnode, inode) && (inode->bits > 1)) {
 			struct tnode *left, *right;
-			t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos;
 
-			left = tnode_new(inode->key&(~m), inode->pos + 1,
+			left = tnode_new(inode->key & ~m, inode->pos,
 					 inode->bits - 1);
 			if (!left)
 				goto nomem;
 
-			right = tnode_new(inode->key|m, inode->pos + 1,
+			right = tnode_new(inode->key | m, inode->pos,
 					  inode->bits - 1);
 
 			if (!right) {
@@ -694,9 +672,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *oldtnode)
 
 		/* A leaf or an internal node with skipped bits */
 		if (!tnode_full(oldtnode, inode)) {
-			put_child(tn,
-				tkey_extract_bits(inode->key, tn->pos, tn->bits),
-				inode);
+			put_child(tn, get_index(inode->key, tn), inode);
 			continue;
 		}
 
@@ -767,7 +743,7 @@ static struct tnode *halve(struct trie *t, struct tnode *oldtnode)
 
 	pr_debug("In halve\n");
 
-	tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
+	tn = tnode_new(oldtnode->key, oldtnode->pos + 1, oldtnode->bits - 1);
 
 	if (!tn)
 		return ERR_PTR(-ENOMEM);
@@ -787,7 +763,7 @@ static struct tnode *halve(struct trie *t, struct tnode *oldtnode)
 		if (left && right) {
 			struct tnode *newn;
 
-			newn = tnode_new(left->key, tn->pos + tn->bits, 1);
+			newn = tnode_new(left->key, oldtnode->pos, 1);
 
 			if (!newn)
 				goto nomem;
@@ -915,7 +891,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
 	key = tn->key;
 
 	while (tn != NULL && (tp = node_parent(tn)) != NULL) {
-		cindex = tkey_extract_bits(key, tp->pos, tp->bits);
+		cindex = get_index(key, tp);
 		wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
 		tn = resize(t, tn);
 
@@ -1005,11 +981,8 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
 	 */
 	if (n) {
 		struct tnode *tn;
-		int newpos;
-
-		newpos = KEYLENGTH - __fls(n->key ^ key) - 1;
 
-		tn = tnode_new(key, newpos, 1);
+		tn = tnode_new(key, __fls(key ^ n->key), 1);
 		if (!tn) {
 			free_leaf_info(li);
 			node_free(l);
@@ -1559,12 +1532,7 @@ static int trie_flush_leaf(struct tnode *l)
 static struct tnode *leaf_walk_rcu(struct tnode *p, struct tnode *c)
 {
 	do {
-		t_key idx;
-
-		if (c)
-			idx = tkey_extract_bits(c->key, p->pos, p->bits) + 1;
-		else
-			idx = 0;
+		t_key idx = c ? idx = get_index(c->key, p) + 1 : 0;
 
 		while (idx < 1u << p->bits) {
 			c = tnode_get_child_rcu(p, idx++);
@@ -1851,7 +1819,7 @@ rescan:
 	/* Current node exhausted, pop back up */
 	p = node_parent_rcu(tn);
 	if (p) {
-		cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
+		cindex = get_index(tn->key, p) + 1;
 		tn = p;
 		--iter->depth;
 		goto rescan;
@@ -2186,10 +2154,10 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
 	if (IS_TNODE(n)) {
 		__be32 prf = htonl(n->key);
 
-		seq_indent(seq, iter->depth - 1);
-		seq_printf(seq, "  +-- %pI4/%d %d %d %d\n",
-			   &prf, n->pos, n->bits, n->full_children,
-			   n->empty_children);
+		seq_indent(seq, iter->depth-1);
+		seq_printf(seq, "  +-- %pI4/%zu %u %u %u\n",
+			   &prf, KEYLENGTH - n->pos - n->bits, n->bits,
+			   n->full_children, n->empty_children);
 	} else {
 		struct leaf_info *li;
 		__be32 val = htonl(n->key);
-- 
cgit v1.2.3


From 98293e8d2f51976d5f790400859a517c0f7cb598 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Wed, 31 Dec 2014 10:56:18 -0800
Subject: fib_trie: Use unsigned long for anything dealing with a shift by bits

This change makes it so that anything that can be shifted by, or compared
to a value shifted by bits is updated to be an unsigned long.  This is
mostly a precaution against an insanely huge address space that somehow
starts coming close to the 2^32 root node size which would require
something like 1.5 billion addresses.

I chose unsigned long instead of unsigned long long since I do not believe
it is possible to allocate a 32 bit tnode on a 32 bit system as the memory
consumed would be 16GB + 28B which exceeds the addressible space for any
one process.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 53 ++++++++++++++++++++++++++---------------------------
 1 file changed, 26 insertions(+), 27 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 9d675486b38c..28a3065470bc 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -146,8 +146,8 @@ struct trie {
 #endif
 };
 
-static void tnode_put_child_reorg(struct tnode *tn, int i, struct tnode *n,
-				  int wasfull);
+static void tnode_put_child_reorg(struct tnode *tn, unsigned long i,
+				  struct tnode *n, int wasfull);
 static struct tnode *resize(struct trie *t, struct tnode *tn);
 static struct tnode *inflate(struct trie *t, struct tnode *tn);
 static struct tnode *halve(struct trie *t, struct tnode *tn);
@@ -183,25 +183,23 @@ static inline void node_set_parent(struct tnode *n, struct tnode *tp)
 /* This provides us with the number of children in this node, in the case of a
  * leaf this will return 0 meaning none of the children are accessible.
  */
-static inline int tnode_child_length(const struct tnode *tn)
+static inline unsigned long tnode_child_length(const struct tnode *tn)
 {
 	return (1ul << tn->bits) & ~(1ul);
 }
 
-/*
- * caller must hold RTNL
- */
-static inline struct tnode *tnode_get_child(const struct tnode *tn, unsigned int i)
+/* caller must hold RTNL */
+static inline struct tnode *tnode_get_child(const struct tnode *tn,
+					    unsigned long i)
 {
 	BUG_ON(i >= tnode_child_length(tn));
 
 	return rtnl_dereference(tn->child[i]);
 }
 
-/*
- * caller must hold RCU read lock or RTNL
- */
-static inline struct tnode *tnode_get_child_rcu(const struct tnode *tn, unsigned int i)
+/* caller must hold RCU read lock or RTNL */
+static inline struct tnode *tnode_get_child_rcu(const struct tnode *tn,
+						unsigned long i)
 {
 	BUG_ON(i >= tnode_child_length(tn));
 
@@ -400,7 +398,7 @@ static inline int tnode_full(const struct tnode *tn, const struct tnode *n)
 	return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n);
 }
 
-static inline void put_child(struct tnode *tn, int i,
+static inline void put_child(struct tnode *tn, unsigned long i,
 			     struct tnode *n)
 {
 	tnode_put_child_reorg(tn, i, n, -1);
@@ -411,13 +409,13 @@ static inline void put_child(struct tnode *tn, int i,
   * Update the value of full_children and empty_children.
   */
 
-static void tnode_put_child_reorg(struct tnode *tn, int i, struct tnode *n,
-				  int wasfull)
+static void tnode_put_child_reorg(struct tnode *tn, unsigned long i,
+				  struct tnode *n, int wasfull)
 {
 	struct tnode *chi = rtnl_dereference(tn->child[i]);
 	int isfull;
 
-	BUG_ON(i >= 1<<tn->bits);
+	BUG_ON(i >= tnode_child_length(tn));
 
 	/* update emptyChildren */
 	if (n == NULL && chi != NULL)
@@ -607,10 +605,10 @@ no_children:
 static void tnode_clean_free(struct tnode *tn)
 {
 	struct tnode *tofree;
-	int i;
+	unsigned long i;
 
 	for (i = 0; i < tnode_child_length(tn); i++) {
-		tofree = rtnl_dereference(tn->child[i]);
+		tofree = tnode_get_child(tn, i);
 		if (tofree)
 			node_free(tofree);
 	}
@@ -619,10 +617,10 @@ static void tnode_clean_free(struct tnode *tn)
 
 static struct tnode *inflate(struct trie *t, struct tnode *oldtnode)
 {
-	int olen = tnode_child_length(oldtnode);
+	unsigned long olen = tnode_child_length(oldtnode);
 	struct tnode *tn;
+	unsigned long i;
 	t_key m;
-	int i;
 
 	pr_debug("In inflate\n");
 
@@ -664,7 +662,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *oldtnode)
 	for (i = 0; i < olen; i++) {
 		struct tnode *inode = tnode_get_child(oldtnode, i);
 		struct tnode *left, *right;
-		int size, j;
+		unsigned long size, j;
 
 		/* An empty child */
 		if (inode == NULL)
@@ -737,7 +735,7 @@ nomem:
 
 static struct tnode *halve(struct trie *t, struct tnode *oldtnode)
 {
-	int olen = tnode_child_length(oldtnode);
+	unsigned long olen = tnode_child_length(oldtnode);
 	struct tnode *tn, *left, *right;
 	int i;
 
@@ -1532,9 +1530,9 @@ static int trie_flush_leaf(struct tnode *l)
 static struct tnode *leaf_walk_rcu(struct tnode *p, struct tnode *c)
 {
 	do {
-		t_key idx = c ? idx = get_index(c->key, p) + 1 : 0;
+		unsigned long idx = c ? idx = get_index(c->key, p) + 1 : 0;
 
-		while (idx < 1u << p->bits) {
+		while (idx < tnode_child_length(p)) {
 			c = tnode_get_child_rcu(p, idx++);
 			if (!c)
 				continue;
@@ -1786,8 +1784,8 @@ struct fib_trie_iter {
 
 static struct tnode *fib_trie_get_next(struct fib_trie_iter *iter)
 {
+	unsigned long cindex = iter->index;
 	struct tnode *tn = iter->tnode;
-	unsigned int cindex = iter->index;
 	struct tnode *p;
 
 	/* A single entry routing table */
@@ -1797,7 +1795,7 @@ static struct tnode *fib_trie_get_next(struct fib_trie_iter *iter)
 	pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
 		 iter->tnode, iter->index, iter->depth);
 rescan:
-	while (cindex < (1<<tn->bits)) {
+	while (cindex < tnode_child_length(tn)) {
 		struct tnode *n = tnode_get_child_rcu(tn, cindex);
 
 		if (n) {
@@ -1874,15 +1872,16 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s)
 			hlist_for_each_entry_rcu(li, &n->list, hlist)
 				++s->prefixes;
 		} else {
-			int i;
+			unsigned long i;
 
 			s->tnodes++;
 			if (n->bits < MAX_STAT_DEPTH)
 				s->nodesizes[n->bits]++;
 
-			for (i = 0; i < tnode_child_length(n); i++)
+			for (i = 0; i < tnode_child_length(n); i++) {
 				if (!rcu_access_pointer(n->child[i]))
 					s->nullpointers++;
+			}
 		}
 	}
 	rcu_read_unlock();
-- 
cgit v1.2.3


From 345e9b54268ae065520a7252c182d22ef4591718 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Wed, 31 Dec 2014 10:56:24 -0800
Subject: fib_trie: Push rcu_read_lock/unlock to callers

This change is to start cleaning up some of the rcu_read_lock/unlock
handling.  I realized while reviewing the code there are several spots that
I don't believe are being handled correctly or are masking warnings by
locally calling rcu_read_lock/unlock instead of calling them at the correct
level.

A common example is a call to fib_get_table followed by fib_table_lookup.
The rcu_read_lock/unlock ought to wrap both but there are several spots where
they were not wrapped.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h    |  50 ++++++++++--------
 net/ipv4/fib_frontend.c |  27 +++++-----
 net/ipv4/fib_rules.c    |  22 ++++----
 net/ipv4/fib_trie.c     | 137 +++++++++++++++++++++---------------------------
 4 files changed, 114 insertions(+), 122 deletions(-)

(limited to 'net')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 09a819ee2151..5bd120e4bc0a 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -222,16 +222,19 @@ static inline struct fib_table *fib_new_table(struct net *net, u32 id)
 static inline int fib_lookup(struct net *net, const struct flowi4 *flp,
 			     struct fib_result *res)
 {
-	struct fib_table *table;
+	int err = -ENETUNREACH;
+
+	rcu_read_lock();
+
+	if (!fib_table_lookup(fib_get_table(net, RT_TABLE_LOCAL), flp, res,
+			      FIB_LOOKUP_NOREF) ||
+	    !fib_table_lookup(fib_get_table(net, RT_TABLE_MAIN), flp, res,
+			      FIB_LOOKUP_NOREF))
+		err = 0;
 
-	table = fib_get_table(net, RT_TABLE_LOCAL);
-	if (!fib_table_lookup(table, flp, res, FIB_LOOKUP_NOREF))
-		return 0;
+	rcu_read_unlock();
 
-	table = fib_get_table(net, RT_TABLE_MAIN);
-	if (!fib_table_lookup(table, flp, res, FIB_LOOKUP_NOREF))
-		return 0;
-	return -ENETUNREACH;
+	return err;
 }
 
 #else /* CONFIG_IP_MULTIPLE_TABLES */
@@ -247,20 +250,25 @@ static inline int fib_lookup(struct net *net, struct flowi4 *flp,
 			     struct fib_result *res)
 {
 	if (!net->ipv4.fib_has_custom_rules) {
+		int err = -ENETUNREACH;
+
+		rcu_read_lock();
+
 		res->tclassid = 0;
-		if (net->ipv4.fib_local &&
-		    !fib_table_lookup(net->ipv4.fib_local, flp, res,
-				      FIB_LOOKUP_NOREF))
-			return 0;
-		if (net->ipv4.fib_main &&
-		    !fib_table_lookup(net->ipv4.fib_main, flp, res,
-				      FIB_LOOKUP_NOREF))
-			return 0;
-		if (net->ipv4.fib_default &&
-		    !fib_table_lookup(net->ipv4.fib_default, flp, res,
-				      FIB_LOOKUP_NOREF))
-			return 0;
-		return -ENETUNREACH;
+		if ((net->ipv4.fib_local &&
+		     !fib_table_lookup(net->ipv4.fib_local, flp, res,
+				       FIB_LOOKUP_NOREF)) ||
+		    (net->ipv4.fib_main &&
+		     !fib_table_lookup(net->ipv4.fib_main, flp, res,
+				       FIB_LOOKUP_NOREF)) ||
+		    (net->ipv4.fib_default &&
+		     !fib_table_lookup(net->ipv4.fib_default, flp, res,
+				       FIB_LOOKUP_NOREF)))
+			err = 0;
+
+		rcu_read_unlock();
+
+		return err;
 	}
 	return __fib_lookup(net, flp, res);
 }
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 66890209208f..57be71dd6a9e 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -109,6 +109,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id)
 	return tb;
 }
 
+/* caller must hold either rtnl or rcu read lock */
 struct fib_table *fib_get_table(struct net *net, u32 id)
 {
 	struct fib_table *tb;
@@ -119,15 +120,11 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
 		id = RT_TABLE_MAIN;
 	h = id & (FIB_TABLE_HASHSZ - 1);
 
-	rcu_read_lock();
 	head = &net->ipv4.fib_table_hash[h];
 	hlist_for_each_entry_rcu(tb, head, tb_hlist) {
-		if (tb->tb_id == id) {
-			rcu_read_unlock();
+		if (tb->tb_id == id)
 			return tb;
-		}
 	}
-	rcu_read_unlock();
 	return NULL;
 }
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
@@ -167,16 +164,18 @@ static inline unsigned int __inet_dev_addr_type(struct net *net,
 	if (ipv4_is_multicast(addr))
 		return RTN_MULTICAST;
 
+	rcu_read_lock();
+
 	local_table = fib_get_table(net, RT_TABLE_LOCAL);
 	if (local_table) {
 		ret = RTN_UNICAST;
-		rcu_read_lock();
 		if (!fib_table_lookup(local_table, &fl4, &res, FIB_LOOKUP_NOREF)) {
 			if (!dev || dev == res.fi->fib_dev)
 				ret = res.type;
 		}
-		rcu_read_unlock();
 	}
+
+	rcu_read_unlock();
 	return ret;
 }
 
@@ -919,7 +918,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
 #undef BRD1_OK
 }
 
-static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb)
+static void nl_fib_lookup(struct net *net, struct fib_result_nl *frn)
 {
 
 	struct fib_result       res;
@@ -929,6 +928,11 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb)
 		.flowi4_tos = frn->fl_tos,
 		.flowi4_scope = frn->fl_scope,
 	};
+	struct fib_table *tb;
+
+	rcu_read_lock();
+
+	tb = fib_get_table(net, frn->tb_id_in);
 
 	frn->err = -ENOENT;
 	if (tb) {
@@ -945,6 +949,8 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb)
 		}
 		local_bh_enable();
 	}
+
+	rcu_read_unlock();
 }
 
 static void nl_fib_input(struct sk_buff *skb)
@@ -952,7 +958,6 @@ static void nl_fib_input(struct sk_buff *skb)
 	struct net *net;
 	struct fib_result_nl *frn;
 	struct nlmsghdr *nlh;
-	struct fib_table *tb;
 	u32 portid;
 
 	net = sock_net(skb->sk);
@@ -967,9 +972,7 @@ static void nl_fib_input(struct sk_buff *skb)
 	nlh = nlmsg_hdr(skb);
 
 	frn = (struct fib_result_nl *) nlmsg_data(nlh);
-	tb = fib_get_table(net, frn->tb_id_in);
-
-	nl_fib_lookup(frn, tb);
+	nl_fib_lookup(net, frn);
 
 	portid = NETLINK_CB(skb).portid;      /* netlink portid */
 	NETLINK_CB(skb).portid = 0;        /* from kernel */
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 8f7bd56955b0..d3db718be51d 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -81,27 +81,25 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
 		break;
 
 	case FR_ACT_UNREACHABLE:
-		err = -ENETUNREACH;
-		goto errout;
+		return -ENETUNREACH;
 
 	case FR_ACT_PROHIBIT:
-		err = -EACCES;
-		goto errout;
+		return -EACCES;
 
 	case FR_ACT_BLACKHOLE:
 	default:
-		err = -EINVAL;
-		goto errout;
+		return -EINVAL;
 	}
 
+	rcu_read_lock();
+
 	tbl = fib_get_table(rule->fr_net, rule->table);
-	if (!tbl)
-		goto errout;
+	if (tbl)
+		err = fib_table_lookup(tbl, &flp->u.ip4,
+				       (struct fib_result *)arg->result,
+				       arg->flags);
 
-	err = fib_table_lookup(tbl, &flp->u.ip4, (struct fib_result *) arg->result, arg->flags);
-	if (err > 0)
-		err = -EAGAIN;
-errout:
+	rcu_read_unlock();
 	return err;
 }
 
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 28a3065470bc..987b06d1effe 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1181,72 +1181,6 @@ err:
 	return err;
 }
 
-/* should be called with rcu_read_lock */
-static int check_leaf(struct fib_table *tb, struct trie *t, struct tnode *l,
-		      t_key key,  const struct flowi4 *flp,
-		      struct fib_result *res, int fib_flags)
-{
-	struct leaf_info *li;
-	struct hlist_head *hhead = &l->list;
-
-	hlist_for_each_entry_rcu(li, hhead, hlist) {
-		struct fib_alias *fa;
-
-		if (l->key != (key & li->mask_plen))
-			continue;
-
-		list_for_each_entry_rcu(fa, &li->falh, fa_list) {
-			struct fib_info *fi = fa->fa_info;
-			int nhsel, err;
-
-			if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
-				continue;
-			if (fi->fib_dead)
-				continue;
-			if (fa->fa_info->fib_scope < flp->flowi4_scope)
-				continue;
-			fib_alias_accessed(fa);
-			err = fib_props[fa->fa_type].error;
-			if (unlikely(err < 0)) {
-#ifdef CONFIG_IP_FIB_TRIE_STATS
-				this_cpu_inc(t->stats->semantic_match_passed);
-#endif
-				return err;
-			}
-			if (fi->fib_flags & RTNH_F_DEAD)
-				continue;
-			for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
-				const struct fib_nh *nh = &fi->fib_nh[nhsel];
-
-				if (nh->nh_flags & RTNH_F_DEAD)
-					continue;
-				if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif)
-					continue;
-
-#ifdef CONFIG_IP_FIB_TRIE_STATS
-				this_cpu_inc(t->stats->semantic_match_passed);
-#endif
-				res->prefixlen = li->plen;
-				res->nh_sel = nhsel;
-				res->type = fa->fa_type;
-				res->scope = fi->fib_scope;
-				res->fi = fi;
-				res->table = tb;
-				res->fa_head = &li->falh;
-				if (!(fib_flags & FIB_LOOKUP_NOREF))
-					atomic_inc(&fi->fib_clntref);
-				return 0;
-			}
-		}
-
-#ifdef CONFIG_IP_FIB_TRIE_STATS
-		this_cpu_inc(t->stats->semantic_match_miss);
-#endif
-	}
-
-	return 1;
-}
-
 static inline t_key prefix_mismatch(t_key key, struct tnode *n)
 {
 	t_key prefix = n->key;
@@ -1254,6 +1188,7 @@ static inline t_key prefix_mismatch(t_key key, struct tnode *n)
 	return (key ^ prefix) & (prefix | -prefix);
 }
 
+/* should be called with rcu_read_lock */
 int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
 		     struct fib_result *res, int fib_flags)
 {
@@ -1263,14 +1198,12 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
 #endif
 	const t_key key = ntohl(flp->daddr);
 	struct tnode *n, *pn;
+	struct leaf_info *li;
 	t_key cindex;
-	int ret = 1;
-
-	rcu_read_lock();
 
 	n = rcu_dereference(t->trie);
 	if (!n)
-		goto failed;
+		return -EAGAIN;
 
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 	this_cpu_inc(stats->gets);
@@ -1350,7 +1283,7 @@ backtrace:
 
 				pn = node_parent_rcu(pn);
 				if (unlikely(!pn))
-					goto failed;
+					return -EAGAIN;
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 				this_cpu_inc(stats->backtrack);
 #endif
@@ -1368,12 +1301,62 @@ backtrace:
 
 found:
 	/* Step 3: Process the leaf, if that fails fall back to backtracing */
-	ret = check_leaf(tb, t, n, key, flp, res, fib_flags);
-	if (unlikely(ret > 0))
-		goto backtrace;
-failed:
-	rcu_read_unlock();
-	return ret;
+	hlist_for_each_entry_rcu(li, &n->list, hlist) {
+		struct fib_alias *fa;
+
+		if ((key ^ n->key) & li->mask_plen)
+			continue;
+
+		list_for_each_entry_rcu(fa, &li->falh, fa_list) {
+			struct fib_info *fi = fa->fa_info;
+			int nhsel, err;
+
+			if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
+				continue;
+			if (fi->fib_dead)
+				continue;
+			if (fa->fa_info->fib_scope < flp->flowi4_scope)
+				continue;
+			fib_alias_accessed(fa);
+			err = fib_props[fa->fa_type].error;
+			if (unlikely(err < 0)) {
+#ifdef CONFIG_IP_FIB_TRIE_STATS
+				this_cpu_inc(stats->semantic_match_passed);
+#endif
+				return err;
+			}
+			if (fi->fib_flags & RTNH_F_DEAD)
+				continue;
+			for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
+				const struct fib_nh *nh = &fi->fib_nh[nhsel];
+
+				if (nh->nh_flags & RTNH_F_DEAD)
+					continue;
+				if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif)
+					continue;
+
+				if (!(fib_flags & FIB_LOOKUP_NOREF))
+					atomic_inc(&fi->fib_clntref);
+
+				res->prefixlen = li->plen;
+				res->nh_sel = nhsel;
+				res->type = fa->fa_type;
+				res->scope = fi->fib_scope;
+				res->fi = fi;
+				res->table = tb;
+				res->fa_head = &li->falh;
+#ifdef CONFIG_IP_FIB_TRIE_STATS
+				this_cpu_inc(stats->semantic_match_passed);
+#endif
+				return err;
+			}
+		}
+
+#ifdef CONFIG_IP_FIB_TRIE_STATS
+		this_cpu_inc(stats->semantic_match_miss);
+#endif
+	}
+	goto backtrace;
 }
 EXPORT_SYMBOL_GPL(fib_table_lookup);
 
-- 
cgit v1.2.3


From cf3637bb8f07fb3b6791708f1d5118afb5446061 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Wed, 31 Dec 2014 10:56:31 -0800
Subject: fib_trie: Move resize to after inflate/halve

This change consists of a cut/paste of resize to behind inflate and halve
so that I could remove the two function prototypes.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 311 ++++++++++++++++++++++++++--------------------------
 1 file changed, 154 insertions(+), 157 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 987b06d1effe..d044fa355f69 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -149,8 +149,6 @@ struct trie {
 static void tnode_put_child_reorg(struct tnode *tn, unsigned long i,
 				  struct tnode *n, int wasfull);
 static struct tnode *resize(struct trie *t, struct tnode *tn);
-static struct tnode *inflate(struct trie *t, struct tnode *tn);
-static struct tnode *halve(struct trie *t, struct tnode *tn);
 /* tnodes to free after resize(); protected by RTNL */
 static struct callback_head *tnode_free_head;
 static size_t tnode_free_size;
@@ -447,161 +445,6 @@ static void put_child_root(struct tnode *tp, struct trie *t,
 		rcu_assign_pointer(t->trie, n);
 }
 
-#define MAX_WORK 10
-static struct tnode *resize(struct trie *t, struct tnode *tn)
-{
-	struct tnode *old_tn, *n = NULL;
-	int inflate_threshold_use;
-	int halve_threshold_use;
-	int max_work;
-
-	if (!tn)
-		return NULL;
-
-	pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
-		 tn, inflate_threshold, halve_threshold);
-
-	/* No children */
-	if (tn->empty_children > (tnode_child_length(tn) - 1))
-		goto no_children;
-
-	/* One child */
-	if (tn->empty_children == (tnode_child_length(tn) - 1))
-		goto one_child;
-	/*
-	 * Double as long as the resulting node has a number of
-	 * nonempty nodes that are above the threshold.
-	 */
-
-	/*
-	 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
-	 * the Helsinki University of Technology and Matti Tikkanen of Nokia
-	 * Telecommunications, page 6:
-	 * "A node is doubled if the ratio of non-empty children to all
-	 * children in the *doubled* node is at least 'high'."
-	 *
-	 * 'high' in this instance is the variable 'inflate_threshold'. It
-	 * is expressed as a percentage, so we multiply it with
-	 * tnode_child_length() and instead of multiplying by 2 (since the
-	 * child array will be doubled by inflate()) and multiplying
-	 * the left-hand side by 100 (to handle the percentage thing) we
-	 * multiply the left-hand side by 50.
-	 *
-	 * The left-hand side may look a bit weird: tnode_child_length(tn)
-	 * - tn->empty_children is of course the number of non-null children
-	 * in the current node. tn->full_children is the number of "full"
-	 * children, that is non-null tnodes with a skip value of 0.
-	 * All of those will be doubled in the resulting inflated tnode, so
-	 * we just count them one extra time here.
-	 *
-	 * A clearer way to write this would be:
-	 *
-	 * to_be_doubled = tn->full_children;
-	 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
-	 *     tn->full_children;
-	 *
-	 * new_child_length = tnode_child_length(tn) * 2;
-	 *
-	 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
-	 *      new_child_length;
-	 * if (new_fill_factor >= inflate_threshold)
-	 *
-	 * ...and so on, tho it would mess up the while () loop.
-	 *
-	 * anyway,
-	 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
-	 *      inflate_threshold
-	 *
-	 * avoid a division:
-	 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
-	 *      inflate_threshold * new_child_length
-	 *
-	 * expand not_to_be_doubled and to_be_doubled, and shorten:
-	 * 100 * (tnode_child_length(tn) - tn->empty_children +
-	 *    tn->full_children) >= inflate_threshold * new_child_length
-	 *
-	 * expand new_child_length:
-	 * 100 * (tnode_child_length(tn) - tn->empty_children +
-	 *    tn->full_children) >=
-	 *      inflate_threshold * tnode_child_length(tn) * 2
-	 *
-	 * shorten again:
-	 * 50 * (tn->full_children + tnode_child_length(tn) -
-	 *    tn->empty_children) >= inflate_threshold *
-	 *    tnode_child_length(tn)
-	 *
-	 */
-
-	/* Keep root node larger  */
-
-	if (!node_parent(tn)) {
-		inflate_threshold_use = inflate_threshold_root;
-		halve_threshold_use = halve_threshold_root;
-	} else {
-		inflate_threshold_use = inflate_threshold;
-		halve_threshold_use = halve_threshold;
-	}
-
-	max_work = MAX_WORK;
-	while ((tn->full_children > 0 &&  max_work-- &&
-		50 * (tn->full_children + tnode_child_length(tn)
-		      - tn->empty_children)
-		>= inflate_threshold_use * tnode_child_length(tn))) {
-
-		old_tn = tn;
-		tn = inflate(t, tn);
-
-		if (IS_ERR(tn)) {
-			tn = old_tn;
-#ifdef CONFIG_IP_FIB_TRIE_STATS
-			this_cpu_inc(t->stats->resize_node_skipped);
-#endif
-			break;
-		}
-	}
-
-	/* Return if at least one inflate is run */
-	if (max_work != MAX_WORK)
-		return tn;
-
-	/*
-	 * Halve as long as the number of empty children in this
-	 * node is above threshold.
-	 */
-
-	max_work = MAX_WORK;
-	while (tn->bits > 1 &&  max_work-- &&
-	       100 * (tnode_child_length(tn) - tn->empty_children) <
-	       halve_threshold_use * tnode_child_length(tn)) {
-
-		old_tn = tn;
-		tn = halve(t, tn);
-		if (IS_ERR(tn)) {
-			tn = old_tn;
-#ifdef CONFIG_IP_FIB_TRIE_STATS
-			this_cpu_inc(t->stats->resize_node_skipped);
-#endif
-			break;
-		}
-	}
-
-
-	/* Only one child remains */
-	if (tn->empty_children == (tnode_child_length(tn) - 1)) {
-		unsigned long i;
-one_child:
-		for (i = tnode_child_length(tn); !n && i;)
-			n = tnode_get_child(tn, --i);
-no_children:
-		/* compress one level */
-		node_set_parent(n, NULL);
-		tnode_free_safe(tn);
-		return n;
-	}
-	return tn;
-}
-
-
 static void tnode_clean_free(struct tnode *tn)
 {
 	struct tnode *tofree;
@@ -804,6 +647,160 @@ nomem:
 	return ERR_PTR(-ENOMEM);
 }
 
+#define MAX_WORK 10
+static struct tnode *resize(struct trie *t, struct tnode *tn)
+{
+	struct tnode *old_tn, *n = NULL;
+	int inflate_threshold_use;
+	int halve_threshold_use;
+	int max_work;
+
+	if (!tn)
+		return NULL;
+
+	pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
+		 tn, inflate_threshold, halve_threshold);
+
+	/* No children */
+	if (tn->empty_children > (tnode_child_length(tn) - 1))
+		goto no_children;
+
+	/* One child */
+	if (tn->empty_children == (tnode_child_length(tn) - 1))
+		goto one_child;
+	/*
+	 * Double as long as the resulting node has a number of
+	 * nonempty nodes that are above the threshold.
+	 */
+
+	/*
+	 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
+	 * the Helsinki University of Technology and Matti Tikkanen of Nokia
+	 * Telecommunications, page 6:
+	 * "A node is doubled if the ratio of non-empty children to all
+	 * children in the *doubled* node is at least 'high'."
+	 *
+	 * 'high' in this instance is the variable 'inflate_threshold'. It
+	 * is expressed as a percentage, so we multiply it with
+	 * tnode_child_length() and instead of multiplying by 2 (since the
+	 * child array will be doubled by inflate()) and multiplying
+	 * the left-hand side by 100 (to handle the percentage thing) we
+	 * multiply the left-hand side by 50.
+	 *
+	 * The left-hand side may look a bit weird: tnode_child_length(tn)
+	 * - tn->empty_children is of course the number of non-null children
+	 * in the current node. tn->full_children is the number of "full"
+	 * children, that is non-null tnodes with a skip value of 0.
+	 * All of those will be doubled in the resulting inflated tnode, so
+	 * we just count them one extra time here.
+	 *
+	 * A clearer way to write this would be:
+	 *
+	 * to_be_doubled = tn->full_children;
+	 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
+	 *     tn->full_children;
+	 *
+	 * new_child_length = tnode_child_length(tn) * 2;
+	 *
+	 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
+	 *      new_child_length;
+	 * if (new_fill_factor >= inflate_threshold)
+	 *
+	 * ...and so on, tho it would mess up the while () loop.
+	 *
+	 * anyway,
+	 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
+	 *      inflate_threshold
+	 *
+	 * avoid a division:
+	 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
+	 *      inflate_threshold * new_child_length
+	 *
+	 * expand not_to_be_doubled and to_be_doubled, and shorten:
+	 * 100 * (tnode_child_length(tn) - tn->empty_children +
+	 *    tn->full_children) >= inflate_threshold * new_child_length
+	 *
+	 * expand new_child_length:
+	 * 100 * (tnode_child_length(tn) - tn->empty_children +
+	 *    tn->full_children) >=
+	 *      inflate_threshold * tnode_child_length(tn) * 2
+	 *
+	 * shorten again:
+	 * 50 * (tn->full_children + tnode_child_length(tn) -
+	 *    tn->empty_children) >= inflate_threshold *
+	 *    tnode_child_length(tn)
+	 *
+	 */
+
+	/* Keep root node larger  */
+
+	if (!node_parent(tn)) {
+		inflate_threshold_use = inflate_threshold_root;
+		halve_threshold_use = halve_threshold_root;
+	} else {
+		inflate_threshold_use = inflate_threshold;
+		halve_threshold_use = halve_threshold;
+	}
+
+	max_work = MAX_WORK;
+	while ((tn->full_children > 0 &&  max_work-- &&
+		50 * (tn->full_children + tnode_child_length(tn)
+		      - tn->empty_children)
+		>= inflate_threshold_use * tnode_child_length(tn))) {
+
+		old_tn = tn;
+		tn = inflate(t, tn);
+
+		if (IS_ERR(tn)) {
+			tn = old_tn;
+#ifdef CONFIG_IP_FIB_TRIE_STATS
+			this_cpu_inc(t->stats->resize_node_skipped);
+#endif
+			break;
+		}
+	}
+
+	/* Return if at least one inflate is run */
+	if (max_work != MAX_WORK)
+		return tn;
+
+	/*
+	 * Halve as long as the number of empty children in this
+	 * node is above threshold.
+	 */
+
+	max_work = MAX_WORK;
+	while (tn->bits > 1 &&  max_work-- &&
+	       100 * (tnode_child_length(tn) - tn->empty_children) <
+	       halve_threshold_use * tnode_child_length(tn)) {
+
+		old_tn = tn;
+		tn = halve(t, tn);
+		if (IS_ERR(tn)) {
+			tn = old_tn;
+#ifdef CONFIG_IP_FIB_TRIE_STATS
+			this_cpu_inc(t->stats->resize_node_skipped);
+#endif
+			break;
+		}
+	}
+
+
+	/* Only one child remains */
+	if (tn->empty_children == (tnode_child_length(tn) - 1)) {
+		unsigned long i;
+one_child:
+		for (i = tnode_child_length(tn); !n && i;)
+			n = tnode_get_child(tn, --i);
+no_children:
+		/* compress one level */
+		node_set_parent(n, NULL);
+		tnode_free_safe(tn);
+		return n;
+	}
+	return tn;
+}
+
 /* readside must use rcu_read_lock currently dump routines
  via get_fa_head and dump */
 
-- 
cgit v1.2.3


From f05a48198bf742efd9d36553c51e98a648dbe807 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Wed, 31 Dec 2014 10:56:37 -0800
Subject: fib_trie: Add functions should_inflate and should_halve

This change pulls the logic for if we should inflate/halve the nodes out
into separate functions.  It also addresses what I believe is a bug where 1
full node is all that is needed to keep a node from ever being halved.

Simple script to reproduce the issue:
	modprobe dummy;	ifconfig dummy0 up
	for i in `seq 0 255`; do ifconfig dummy0:$i 10.0.${i}.1/24 up; done
	ifconfig dummy0:256 10.0.255.33/16 up
	for i in `seq 0 254`; do ifconfig dummy0:$i down; done

Results from /proc/net/fib_triestat
Before:
	Local:
		Aver depth:     3.00
		Max depth:      4
		Leaves:         17
		Prefixes:       18
		Internal nodes: 11
		  1: 8  2: 2  10: 1
		Pointers: 1048
	Null ptrs: 1021
	Total size: 11  kB
After:
	Local:
		Aver depth:     3.41
		Max depth:      5
		Leaves:         17
		Prefixes:       18
		Internal nodes: 12
		  1: 8  2: 3  3: 1
		Pointers: 36
	Null ptrs: 8
	Total size: 3  kB

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 175 ++++++++++++++++++++++++++--------------------------
 1 file changed, 89 insertions(+), 86 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index d044fa355f69..58e1224786fa 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -647,12 +647,94 @@ nomem:
 	return ERR_PTR(-ENOMEM);
 }
 
+/* From "Implementing a dynamic compressed trie" by Stefan Nilsson of
+ * the Helsinki University of Technology and Matti Tikkanen of Nokia
+ * Telecommunications, page 6:
+ * "A node is doubled if the ratio of non-empty children to all
+ * children in the *doubled* node is at least 'high'."
+ *
+ * 'high' in this instance is the variable 'inflate_threshold'. It
+ * is expressed as a percentage, so we multiply it with
+ * tnode_child_length() and instead of multiplying by 2 (since the
+ * child array will be doubled by inflate()) and multiplying
+ * the left-hand side by 100 (to handle the percentage thing) we
+ * multiply the left-hand side by 50.
+ *
+ * The left-hand side may look a bit weird: tnode_child_length(tn)
+ * - tn->empty_children is of course the number of non-null children
+ * in the current node. tn->full_children is the number of "full"
+ * children, that is non-null tnodes with a skip value of 0.
+ * All of those will be doubled in the resulting inflated tnode, so
+ * we just count them one extra time here.
+ *
+ * A clearer way to write this would be:
+ *
+ * to_be_doubled = tn->full_children;
+ * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
+ *     tn->full_children;
+ *
+ * new_child_length = tnode_child_length(tn) * 2;
+ *
+ * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
+ *      new_child_length;
+ * if (new_fill_factor >= inflate_threshold)
+ *
+ * ...and so on, tho it would mess up the while () loop.
+ *
+ * anyway,
+ * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
+ *      inflate_threshold
+ *
+ * avoid a division:
+ * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
+ *      inflate_threshold * new_child_length
+ *
+ * expand not_to_be_doubled and to_be_doubled, and shorten:
+ * 100 * (tnode_child_length(tn) - tn->empty_children +
+ *    tn->full_children) >= inflate_threshold * new_child_length
+ *
+ * expand new_child_length:
+ * 100 * (tnode_child_length(tn) - tn->empty_children +
+ *    tn->full_children) >=
+ *      inflate_threshold * tnode_child_length(tn) * 2
+ *
+ * shorten again:
+ * 50 * (tn->full_children + tnode_child_length(tn) -
+ *    tn->empty_children) >= inflate_threshold *
+ *    tnode_child_length(tn)
+ *
+ */
+static bool should_inflate(const struct tnode *tn)
+{
+	unsigned long used = tnode_child_length(tn);
+	unsigned long threshold = used;
+
+	/* Keep root node larger */
+	threshold *= node_parent(tn) ? inflate_threshold :
+				       inflate_threshold_root;
+	used += tn->full_children;
+	used -= tn->empty_children;
+
+	return tn->pos && ((50 * used) >= threshold);
+}
+
+static bool should_halve(const struct tnode *tn)
+{
+	unsigned long used = tnode_child_length(tn);
+	unsigned long threshold = used;
+
+	/* Keep root node larger */
+	threshold *= node_parent(tn) ? halve_threshold :
+				       halve_threshold_root;
+	used -= tn->empty_children;
+
+	return (tn->bits > 1) && ((100 * used) < threshold);
+}
+
 #define MAX_WORK 10
 static struct tnode *resize(struct trie *t, struct tnode *tn)
 {
 	struct tnode *old_tn, *n = NULL;
-	int inflate_threshold_use;
-	int halve_threshold_use;
 	int max_work;
 
 	if (!tn)
@@ -668,86 +750,12 @@ static struct tnode *resize(struct trie *t, struct tnode *tn)
 	/* One child */
 	if (tn->empty_children == (tnode_child_length(tn) - 1))
 		goto one_child;
-	/*
-	 * Double as long as the resulting node has a number of
-	 * nonempty nodes that are above the threshold.
-	 */
 
-	/*
-	 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
-	 * the Helsinki University of Technology and Matti Tikkanen of Nokia
-	 * Telecommunications, page 6:
-	 * "A node is doubled if the ratio of non-empty children to all
-	 * children in the *doubled* node is at least 'high'."
-	 *
-	 * 'high' in this instance is the variable 'inflate_threshold'. It
-	 * is expressed as a percentage, so we multiply it with
-	 * tnode_child_length() and instead of multiplying by 2 (since the
-	 * child array will be doubled by inflate()) and multiplying
-	 * the left-hand side by 100 (to handle the percentage thing) we
-	 * multiply the left-hand side by 50.
-	 *
-	 * The left-hand side may look a bit weird: tnode_child_length(tn)
-	 * - tn->empty_children is of course the number of non-null children
-	 * in the current node. tn->full_children is the number of "full"
-	 * children, that is non-null tnodes with a skip value of 0.
-	 * All of those will be doubled in the resulting inflated tnode, so
-	 * we just count them one extra time here.
-	 *
-	 * A clearer way to write this would be:
-	 *
-	 * to_be_doubled = tn->full_children;
-	 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
-	 *     tn->full_children;
-	 *
-	 * new_child_length = tnode_child_length(tn) * 2;
-	 *
-	 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
-	 *      new_child_length;
-	 * if (new_fill_factor >= inflate_threshold)
-	 *
-	 * ...and so on, tho it would mess up the while () loop.
-	 *
-	 * anyway,
-	 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
-	 *      inflate_threshold
-	 *
-	 * avoid a division:
-	 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
-	 *      inflate_threshold * new_child_length
-	 *
-	 * expand not_to_be_doubled and to_be_doubled, and shorten:
-	 * 100 * (tnode_child_length(tn) - tn->empty_children +
-	 *    tn->full_children) >= inflate_threshold * new_child_length
-	 *
-	 * expand new_child_length:
-	 * 100 * (tnode_child_length(tn) - tn->empty_children +
-	 *    tn->full_children) >=
-	 *      inflate_threshold * tnode_child_length(tn) * 2
-	 *
-	 * shorten again:
-	 * 50 * (tn->full_children + tnode_child_length(tn) -
-	 *    tn->empty_children) >= inflate_threshold *
-	 *    tnode_child_length(tn)
-	 *
+	/* Double as long as the resulting node has a number of
+	 * nonempty nodes that are above the threshold.
 	 */
-
-	/* Keep root node larger  */
-
-	if (!node_parent(tn)) {
-		inflate_threshold_use = inflate_threshold_root;
-		halve_threshold_use = halve_threshold_root;
-	} else {
-		inflate_threshold_use = inflate_threshold;
-		halve_threshold_use = halve_threshold;
-	}
-
 	max_work = MAX_WORK;
-	while ((tn->full_children > 0 &&  max_work-- &&
-		50 * (tn->full_children + tnode_child_length(tn)
-		      - tn->empty_children)
-		>= inflate_threshold_use * tnode_child_length(tn))) {
-
+	while (should_inflate(tn) && max_work--) {
 		old_tn = tn;
 		tn = inflate(t, tn);
 
@@ -764,16 +772,11 @@ static struct tnode *resize(struct trie *t, struct tnode *tn)
 	if (max_work != MAX_WORK)
 		return tn;
 
-	/*
-	 * Halve as long as the number of empty children in this
+	/* Halve as long as the number of empty children in this
 	 * node is above threshold.
 	 */
-
 	max_work = MAX_WORK;
-	while (tn->bits > 1 &&  max_work-- &&
-	       100 * (tnode_child_length(tn) - tn->empty_children) <
-	       halve_threshold_use * tnode_child_length(tn)) {
-
+	while (should_halve(tn) && max_work--) {
 		old_tn = tn;
 		tn = halve(t, tn);
 		if (IS_ERR(tn)) {
-- 
cgit v1.2.3


From ff181ed8768fa57fbda40fa97914f27fe275129b Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Wed, 31 Dec 2014 10:56:43 -0800
Subject: fib_trie: Push assignment of child to parent down into inflate/halve

This change makes it so that the assignment of the tnode to the parent is
handled directly within whatever function is currently handling the node be
it inflate, halve, or resize.  By doing this we can avoid some of the need
to set NULL pointers in the tree while we are resizing the subnodes.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 149 +++++++++++++++++++++++-----------------------------
 1 file changed, 66 insertions(+), 83 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 58e1224786fa..7fbd2c5a76b1 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -146,9 +146,7 @@ struct trie {
 #endif
 };
 
-static void tnode_put_child_reorg(struct tnode *tn, unsigned long i,
-				  struct tnode *n, int wasfull);
-static struct tnode *resize(struct trie *t, struct tnode *tn);
+static void resize(struct trie *t, struct tnode *tn);
 /* tnodes to free after resize(); protected by RTNL */
 static struct callback_head *tnode_free_head;
 static size_t tnode_free_size;
@@ -396,22 +394,13 @@ static inline int tnode_full(const struct tnode *tn, const struct tnode *n)
 	return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n);
 }
 
-static inline void put_child(struct tnode *tn, unsigned long i,
-			     struct tnode *n)
-{
-	tnode_put_child_reorg(tn, i, n, -1);
-}
-
- /*
-  * Add a child at position i overwriting the old value.
-  * Update the value of full_children and empty_children.
-  */
-
-static void tnode_put_child_reorg(struct tnode *tn, unsigned long i,
-				  struct tnode *n, int wasfull)
+/* Add a child at position i overwriting the old value.
+ * Update the value of full_children and empty_children.
+ */
+static void put_child(struct tnode *tn, unsigned long i, struct tnode *n)
 {
 	struct tnode *chi = rtnl_dereference(tn->child[i]);
-	int isfull;
+	int isfull, wasfull;
 
 	BUG_ON(i >= tnode_child_length(tn));
 
@@ -422,10 +411,9 @@ static void tnode_put_child_reorg(struct tnode *tn, unsigned long i,
 		tn->empty_children--;
 
 	/* update fullChildren */
-	if (wasfull == -1)
-		wasfull = tnode_full(tn, chi);
-
+	wasfull = tnode_full(tn, chi);
 	isfull = tnode_full(tn, n);
+
 	if (wasfull && !isfull)
 		tn->full_children--;
 	else if (!wasfull && isfull)
@@ -458,9 +446,10 @@ static void tnode_clean_free(struct tnode *tn)
 	node_free(tn);
 }
 
-static struct tnode *inflate(struct trie *t, struct tnode *oldtnode)
+static int inflate(struct trie *t, struct tnode *oldtnode)
 {
 	unsigned long olen = tnode_child_length(oldtnode);
+	struct tnode *tp = node_parent(oldtnode);
 	struct tnode *tn;
 	unsigned long i;
 	t_key m;
@@ -468,9 +457,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *oldtnode)
 	pr_debug("In inflate\n");
 
 	tn = tnode_new(oldtnode->key, oldtnode->pos - 1, oldtnode->bits + 1);
-
 	if (!tn)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 
 	/*
 	 * Preallocate and store tnodes before the actual work so we
@@ -564,30 +552,36 @@ static struct tnode *inflate(struct trie *t, struct tnode *oldtnode)
 			put_child(left, j, rtnl_dereference(inode->child[j]));
 			put_child(right, j, rtnl_dereference(inode->child[j + size]));
 		}
-		put_child(tn, 2*i, resize(t, left));
-		put_child(tn, 2*i+1, resize(t, right));
+
+		put_child(tn, 2 * i, left);
+		put_child(tn, 2 * i + 1, right);
 
 		tnode_free_safe(inode);
+
+		resize(t, left);
+		resize(t, right);
 	}
+
+	put_child_root(tp, t, tn->key, tn);
 	tnode_free_safe(oldtnode);
-	return tn;
+	return 0;
 nomem:
 	tnode_clean_free(tn);
-	return ERR_PTR(-ENOMEM);
+	return -ENOMEM;
 }
 
-static struct tnode *halve(struct trie *t, struct tnode *oldtnode)
+static int halve(struct trie *t, struct tnode *oldtnode)
 {
 	unsigned long olen = tnode_child_length(oldtnode);
+	struct tnode *tp = node_parent(oldtnode);
 	struct tnode *tn, *left, *right;
 	int i;
 
 	pr_debug("In halve\n");
 
 	tn = tnode_new(oldtnode->key, oldtnode->pos + 1, oldtnode->bits - 1);
-
 	if (!tn)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 
 	/*
 	 * Preallocate and store tnodes before the actual work so we
@@ -606,8 +600,10 @@ static struct tnode *halve(struct trie *t, struct tnode *oldtnode)
 
 			newn = tnode_new(left->key, oldtnode->pos, 1);
 
-			if (!newn)
-				goto nomem;
+			if (!newn) {
+				tnode_clean_free(tn);
+				return -ENOMEM;
+			}
 
 			put_child(tn, i/2, newn);
 		}
@@ -635,16 +631,18 @@ static struct tnode *halve(struct trie *t, struct tnode *oldtnode)
 
 		/* Two nonempty children */
 		newBinNode = tnode_get_child(tn, i/2);
-		put_child(tn, i/2, NULL);
 		put_child(newBinNode, 0, left);
 		put_child(newBinNode, 1, right);
-		put_child(tn, i/2, resize(t, newBinNode));
+
+		put_child(tn, i / 2, newBinNode);
+
+		resize(t, newBinNode);
 	}
+
+	put_child_root(tp, t, tn->key, tn);
 	tnode_free_safe(oldtnode);
-	return tn;
-nomem:
-	tnode_clean_free(tn);
-	return ERR_PTR(-ENOMEM);
+
+	return 0;
 }
 
 /* From "Implementing a dynamic compressed trie" by Stefan Nilsson of
@@ -704,45 +702,48 @@ nomem:
  *    tnode_child_length(tn)
  *
  */
-static bool should_inflate(const struct tnode *tn)
+static bool should_inflate(const struct tnode *tp, const struct tnode *tn)
 {
 	unsigned long used = tnode_child_length(tn);
 	unsigned long threshold = used;
 
 	/* Keep root node larger */
-	threshold *= node_parent(tn) ? inflate_threshold :
-				       inflate_threshold_root;
+	threshold *= tp ? inflate_threshold : inflate_threshold_root;
 	used += tn->full_children;
 	used -= tn->empty_children;
 
 	return tn->pos && ((50 * used) >= threshold);
 }
 
-static bool should_halve(const struct tnode *tn)
+static bool should_halve(const struct tnode *tp, const struct tnode *tn)
 {
 	unsigned long used = tnode_child_length(tn);
 	unsigned long threshold = used;
 
 	/* Keep root node larger */
-	threshold *= node_parent(tn) ? halve_threshold :
-				       halve_threshold_root;
+	threshold *= tp ? halve_threshold : halve_threshold_root;
 	used -= tn->empty_children;
 
 	return (tn->bits > 1) && ((100 * used) < threshold);
 }
 
 #define MAX_WORK 10
-static struct tnode *resize(struct trie *t, struct tnode *tn)
+static void resize(struct trie *t, struct tnode *tn)
 {
-	struct tnode *old_tn, *n = NULL;
+	struct tnode *tp = node_parent(tn), *n = NULL;
+	struct tnode __rcu **cptr;
 	int max_work;
 
-	if (!tn)
-		return NULL;
-
 	pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
 		 tn, inflate_threshold, halve_threshold);
 
+	/* track the tnode via the pointer from the parent instead of
+	 * doing it ourselves.  This way we can let RCU fully do its
+	 * thing without us interfering
+	 */
+	cptr = tp ? &tp->child[get_index(tn->key, tp)] : &t->trie;
+	BUG_ON(tn != rtnl_dereference(*cptr));
+
 	/* No children */
 	if (tn->empty_children > (tnode_child_length(tn) - 1))
 		goto no_children;
@@ -755,39 +756,35 @@ static struct tnode *resize(struct trie *t, struct tnode *tn)
 	 * nonempty nodes that are above the threshold.
 	 */
 	max_work = MAX_WORK;
-	while (should_inflate(tn) && max_work--) {
-		old_tn = tn;
-		tn = inflate(t, tn);
-
-		if (IS_ERR(tn)) {
-			tn = old_tn;
+	while (should_inflate(tp, tn) && max_work--) {
+		if (inflate(t, tn)) {
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 			this_cpu_inc(t->stats->resize_node_skipped);
 #endif
 			break;
 		}
+
+		tn = rtnl_dereference(*cptr);
 	}
 
 	/* Return if at least one inflate is run */
 	if (max_work != MAX_WORK)
-		return tn;
+		return;
 
 	/* Halve as long as the number of empty children in this
 	 * node is above threshold.
 	 */
 	max_work = MAX_WORK;
-	while (should_halve(tn) && max_work--) {
-		old_tn = tn;
-		tn = halve(t, tn);
-		if (IS_ERR(tn)) {
-			tn = old_tn;
+	while (should_halve(tp, tn) && max_work--) {
+		if (halve(t, tn)) {
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 			this_cpu_inc(t->stats->resize_node_skipped);
 #endif
 			break;
 		}
-	}
 
+		tn = rtnl_dereference(*cptr);
+	}
 
 	/* Only one child remains */
 	if (tn->empty_children == (tnode_child_length(tn) - 1)) {
@@ -797,11 +794,12 @@ one_child:
 			n = tnode_get_child(tn, --i);
 no_children:
 		/* compress one level */
-		node_set_parent(n, NULL);
+		put_child_root(tp, t, tn->key, n);
+		node_set_parent(n, tp);
+
+		/* drop dead node */
 		tnode_free_safe(tn);
-		return n;
 	}
-	return tn;
 }
 
 /* readside must use rcu_read_lock currently dump routines
@@ -882,34 +880,19 @@ static struct tnode *fib_find_node(struct trie *t, u32 key)
 
 static void trie_rebalance(struct trie *t, struct tnode *tn)
 {
-	int wasfull;
-	t_key cindex, key;
 	struct tnode *tp;
 
-	key = tn->key;
-
-	while (tn != NULL && (tp = node_parent(tn)) != NULL) {
-		cindex = get_index(key, tp);
-		wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
-		tn = resize(t, tn);
-
-		tnode_put_child_reorg(tp, cindex, tn, wasfull);
-
-		tp = node_parent(tn);
-		if (!tp)
-			rcu_assign_pointer(t->trie, tn);
+	while ((tp = node_parent(tn)) != NULL) {
+		resize(t, tn);
 
 		tnode_free_flush();
-		if (!tp)
-			break;
 		tn = tp;
 	}
 
 	/* Handle last (top) tnode */
 	if (IS_TNODE(tn))
-		tn = resize(t, tn);
+		resize(t, tn);
 
-	rcu_assign_pointer(t->trie, tn);
 	tnode_free_flush();
 }
 
-- 
cgit v1.2.3


From fc86a93b46d707c6e245b15127409340eaf6ae47 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Wed, 31 Dec 2014 10:56:49 -0800
Subject: fib_trie: Push tnode flushing down to inflate/halve

This change pushes the tnode freeing down into the inflate and halve
functions.  It makes more sense here as we have a better grasp of what is
going on and when a given cluster of nodes is ready to be freed.

I believe this may address a bug in the freeing logic as well.  For some
reason if the freelist got to a certain size we would call
synchronize_rcu().  I'm assuming that what they meant to do is call
synchronize_rcu() after they had handed off that much memory via
call_rcu().  As such that is what I have updated the behavior to be.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 103 +++++++++++++++++++++++++---------------------------
 1 file changed, 50 insertions(+), 53 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 7fbd2c5a76b1..485983ef0444 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -147,8 +147,6 @@ struct trie {
 };
 
 static void resize(struct trie *t, struct tnode *tn);
-/* tnodes to free after resize(); protected by RTNL */
-static struct callback_head *tnode_free_head;
 static size_t tnode_free_size;
 
 /*
@@ -307,32 +305,6 @@ static struct tnode *tnode_alloc(size_t size)
 		return vzalloc(size);
 }
 
-static void tnode_free_safe(struct tnode *tn)
-{
-	BUG_ON(IS_LEAF(tn));
-	tn->rcu.next = tnode_free_head;
-	tnode_free_head = &tn->rcu;
-}
-
-static void tnode_free_flush(void)
-{
-	struct callback_head *head;
-
-	while ((head = tnode_free_head)) {
-		struct tnode *tn = container_of(head, struct tnode, rcu);
-
-		tnode_free_head = head->next;
-		tnode_free_size += offsetof(struct tnode, child[1 << tn->bits]);
-
-		node_free(tn);
-	}
-
-	if (tnode_free_size >= PAGE_SIZE * sync_pages) {
-		tnode_free_size = 0;
-		synchronize_rcu();
-	}
-}
-
 static struct tnode *leaf_new(t_key key)
 {
 	struct tnode *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
@@ -433,17 +405,33 @@ static void put_child_root(struct tnode *tp, struct trie *t,
 		rcu_assign_pointer(t->trie, n);
 }
 
-static void tnode_clean_free(struct tnode *tn)
+static inline void tnode_free_init(struct tnode *tn)
 {
-	struct tnode *tofree;
-	unsigned long i;
+	tn->rcu.next = NULL;
+}
+
+static inline void tnode_free_append(struct tnode *tn, struct tnode *n)
+{
+	n->rcu.next = tn->rcu.next;
+	tn->rcu.next = &n->rcu;
+}
 
-	for (i = 0; i < tnode_child_length(tn); i++) {
-		tofree = tnode_get_child(tn, i);
-		if (tofree)
-			node_free(tofree);
+static void tnode_free(struct tnode *tn)
+{
+	struct callback_head *head = &tn->rcu;
+
+	while (head) {
+		head = head->next;
+		tnode_free_size += offsetof(struct tnode, child[1 << tn->bits]);
+		node_free(tn);
+
+		tn = container_of(head, struct tnode, rcu);
+	}
+
+	if (tnode_free_size >= PAGE_SIZE * sync_pages) {
+		tnode_free_size = 0;
+		synchronize_rcu();
 	}
-	node_free(tn);
 }
 
 static int inflate(struct trie *t, struct tnode *oldtnode)
@@ -476,20 +464,23 @@ static int inflate(struct trie *t, struct tnode *oldtnode)
 					 inode->bits - 1);
 			if (!left)
 				goto nomem;
+			tnode_free_append(tn, left);
 
 			right = tnode_new(inode->key | m, inode->pos,
 					  inode->bits - 1);
 
-			if (!right) {
-				node_free(left);
+			if (!right)
 				goto nomem;
-			}
+			tnode_free_append(tn, right);
 
 			put_child(tn, 2*i, left);
 			put_child(tn, 2*i+1, right);
 		}
 	}
 
+	/* prepare oldtnode to be freed */
+	tnode_free_init(oldtnode);
+
 	for (i = 0; i < olen; i++) {
 		struct tnode *inode = tnode_get_child(oldtnode, i);
 		struct tnode *left, *right;
@@ -505,12 +496,13 @@ static int inflate(struct trie *t, struct tnode *oldtnode)
 			continue;
 		}
 
+		/* drop the node in the old tnode free list */
+		tnode_free_append(oldtnode, inode);
+
 		/* An internal node with two children */
 		if (inode->bits == 1) {
 			put_child(tn, 2*i, rtnl_dereference(inode->child[0]));
 			put_child(tn, 2*i+1, rtnl_dereference(inode->child[1]));
-
-			tnode_free_safe(inode);
 			continue;
 		}
 
@@ -556,17 +548,19 @@ static int inflate(struct trie *t, struct tnode *oldtnode)
 		put_child(tn, 2 * i, left);
 		put_child(tn, 2 * i + 1, right);
 
-		tnode_free_safe(inode);
-
+		/* resize child nodes */
 		resize(t, left);
 		resize(t, right);
 	}
 
 	put_child_root(tp, t, tn->key, tn);
-	tnode_free_safe(oldtnode);
+
+	/* we completed without error, prepare to free old node */
+	tnode_free(oldtnode);
 	return 0;
 nomem:
-	tnode_clean_free(tn);
+	/* all pointers should be clean so we are done */
+	tnode_free(tn);
 	return -ENOMEM;
 }
 
@@ -599,17 +593,20 @@ static int halve(struct trie *t, struct tnode *oldtnode)
 			struct tnode *newn;
 
 			newn = tnode_new(left->key, oldtnode->pos, 1);
-
 			if (!newn) {
-				tnode_clean_free(tn);
+				tnode_free(tn);
 				return -ENOMEM;
 			}
+			tnode_free_append(tn, newn);
 
 			put_child(tn, i/2, newn);
 		}
 
 	}
 
+	/* prepare oldtnode to be freed */
+	tnode_free_init(oldtnode);
+
 	for (i = 0; i < olen; i += 2) {
 		struct tnode *newBinNode;
 
@@ -636,11 +633,14 @@ static int halve(struct trie *t, struct tnode *oldtnode)
 
 		put_child(tn, i / 2, newBinNode);
 
+		/* resize child node */
 		resize(t, newBinNode);
 	}
 
 	put_child_root(tp, t, tn->key, tn);
-	tnode_free_safe(oldtnode);
+
+	/* all pointers should be clean so we are done */
+	tnode_free(oldtnode);
 
 	return 0;
 }
@@ -798,7 +798,8 @@ no_children:
 		node_set_parent(n, tp);
 
 		/* drop dead node */
-		tnode_free_safe(tn);
+		tnode_free_init(tn);
+		tnode_free(tn);
 	}
 }
 
@@ -884,16 +885,12 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
 
 	while ((tp = node_parent(tn)) != NULL) {
 		resize(t, tn);
-
-		tnode_free_flush();
 		tn = tp;
 	}
 
 	/* Handle last (top) tnode */
 	if (IS_TNODE(tn))
 		resize(t, tn);
-
-	tnode_free_flush();
 }
 
 /* only used from updater-side */
-- 
cgit v1.2.3


From 12c081a5c82ef64a90906247cecfa5422962e387 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Wed, 31 Dec 2014 10:56:55 -0800
Subject: fib_trie: inflate/halve nodes in a more RCU friendly way

This change pulls the node_set_parent functionality out of put_child_reorg
and instead leaves that to the function to take care of as well.  By doing
this we can fully construct the new cluster of tnodes and all of the
pointers out of it before we start routing pointers into it.

I am suspecting this will likely fix some concurency issues though I don't
have a good test to show as such.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 236 +++++++++++++++++++++++++---------------------------
 1 file changed, 115 insertions(+), 121 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 485983ef0444..0c88df09d25d 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -391,8 +391,6 @@ static void put_child(struct tnode *tn, unsigned long i, struct tnode *n)
 	else if (!wasfull && isfull)
 		tn->full_children++;
 
-	node_set_parent(n, tn);
-
 	rcu_assign_pointer(tn->child[i], n);
 }
 
@@ -436,10 +434,8 @@ static void tnode_free(struct tnode *tn)
 
 static int inflate(struct trie *t, struct tnode *oldtnode)
 {
-	unsigned long olen = tnode_child_length(oldtnode);
-	struct tnode *tp = node_parent(oldtnode);
-	struct tnode *tn;
-	unsigned long i;
+	struct tnode *inode, *node0, *node1, *tn, *tp;
+	unsigned long i, j, k;
 	t_key m;
 
 	pr_debug("In inflate\n");
@@ -448,43 +444,13 @@ static int inflate(struct trie *t, struct tnode *oldtnode)
 	if (!tn)
 		return -ENOMEM;
 
-	/*
-	 * Preallocate and store tnodes before the actual work so we
-	 * don't get into an inconsistent state if memory allocation
-	 * fails. In case of failure we return the oldnode and  inflate
-	 * of tnode is ignored.
+	/* Assemble all of the pointers in our cluster, in this case that
+	 * represents all of the pointers out of our allocated nodes that
+	 * point to existing tnodes and the links between our allocated
+	 * nodes.
 	 */
-	for (i = 0, m = 1u << tn->pos; i < olen; i++) {
-		struct tnode *inode = tnode_get_child(oldtnode, i);
-
-		if (tnode_full(oldtnode, inode) && (inode->bits > 1)) {
-			struct tnode *left, *right;
-
-			left = tnode_new(inode->key & ~m, inode->pos,
-					 inode->bits - 1);
-			if (!left)
-				goto nomem;
-			tnode_free_append(tn, left);
-
-			right = tnode_new(inode->key | m, inode->pos,
-					  inode->bits - 1);
-
-			if (!right)
-				goto nomem;
-			tnode_free_append(tn, right);
-
-			put_child(tn, 2*i, left);
-			put_child(tn, 2*i+1, right);
-		}
-	}
-
-	/* prepare oldtnode to be freed */
-	tnode_free_init(oldtnode);
-
-	for (i = 0; i < olen; i++) {
-		struct tnode *inode = tnode_get_child(oldtnode, i);
-		struct tnode *left, *right;
-		unsigned long size, j;
+	for (i = tnode_child_length(oldtnode), m = 1u << tn->pos; i;) {
+		inode = tnode_get_child(oldtnode, --i);
 
 		/* An empty child */
 		if (inode == NULL)
@@ -496,65 +462,99 @@ static int inflate(struct trie *t, struct tnode *oldtnode)
 			continue;
 		}
 
-		/* drop the node in the old tnode free list */
-		tnode_free_append(oldtnode, inode);
-
 		/* An internal node with two children */
 		if (inode->bits == 1) {
-			put_child(tn, 2*i, rtnl_dereference(inode->child[0]));
-			put_child(tn, 2*i+1, rtnl_dereference(inode->child[1]));
+			put_child(tn, 2 * i + 1, tnode_get_child(inode, 1));
+			put_child(tn, 2 * i, tnode_get_child(inode, 0));
 			continue;
 		}
 
-		/* An internal node with more than two children */
-
 		/* We will replace this node 'inode' with two new
-		 * ones, 'left' and 'right', each with half of the
+		 * ones, 'node0' and 'node1', each with half of the
 		 * original children. The two new nodes will have
 		 * a position one bit further down the key and this
 		 * means that the "significant" part of their keys
 		 * (see the discussion near the top of this file)
 		 * will differ by one bit, which will be "0" in
-		 * left's key and "1" in right's key. Since we are
+		 * node0's key and "1" in node1's key. Since we are
 		 * moving the key position by one step, the bit that
 		 * we are moving away from - the bit at position
-		 * (inode->pos) - is the one that will differ between
-		 * left and right. So... we synthesize that bit in the
-		 * two  new keys.
-		 * The mask 'm' below will be a single "one" bit at
-		 * the position (inode->pos)
+		 * (tn->pos) - is the one that will differ between
+		 * node0 and node1. So... we synthesize that bit in the
+		 * two new keys.
 		 */
+		node1 = tnode_new(inode->key | m, inode->pos, inode->bits - 1);
+		if (!node1)
+			goto nomem;
+		tnode_free_append(tn, node1);
+
+		node0 = tnode_new(inode->key & ~m, inode->pos, inode->bits - 1);
+		if (!node0)
+			goto nomem;
+		tnode_free_append(tn, node0);
+
+		/* populate child pointers in new nodes */
+		for (k = tnode_child_length(inode), j = k / 2; j;) {
+			put_child(node1, --j, tnode_get_child(inode, --k));
+			put_child(node0, j, tnode_get_child(inode, j));
+			put_child(node1, --j, tnode_get_child(inode, --k));
+			put_child(node0, j, tnode_get_child(inode, j));
+		}
 
-		/* Use the old key, but set the new significant
-		 *   bit to zero.
-		 */
+		/* link new nodes to parent */
+		NODE_INIT_PARENT(node1, tn);
+		NODE_INIT_PARENT(node0, tn);
+
+		/* link parent to nodes */
+		put_child(tn, 2 * i + 1, node1);
+		put_child(tn, 2 * i, node0);
+	}
 
-		left = tnode_get_child(tn, 2*i);
-		put_child(tn, 2*i, NULL);
+	/* setup the parent pointer into and out of this node */
+	tp = node_parent(oldtnode);
+	NODE_INIT_PARENT(tn, tp);
+	put_child_root(tp, t, tn->key, tn);
+
+	/* prepare oldtnode to be freed */
+	tnode_free_init(oldtnode);
 
-		BUG_ON(!left);
+	/* update all child nodes parent pointers to route to us */
+	for (i = tnode_child_length(oldtnode); i;) {
+		inode = tnode_get_child(oldtnode, --i);
+
+		/* A leaf or an internal node with skipped bits */
+		if (!tnode_full(oldtnode, inode)) {
+			node_set_parent(inode, tn);
+			continue;
+		}
 
-		right = tnode_get_child(tn, 2*i+1);
-		put_child(tn, 2*i+1, NULL);
+		/* drop the node in the old tnode free list */
+		tnode_free_append(oldtnode, inode);
 
-		BUG_ON(!right);
+		/* fetch new nodes */
+		node1 = tnode_get_child(tn, 2 * i + 1);
+		node0 = tnode_get_child(tn, 2 * i);
 
-		size = tnode_child_length(left);
-		for (j = 0; j < size; j++) {
-			put_child(left, j, rtnl_dereference(inode->child[j]));
-			put_child(right, j, rtnl_dereference(inode->child[j + size]));
+		/* bits == 1 then node0 and node1 represent inode's children */
+		if (inode->bits == 1) {
+			node_set_parent(node1, tn);
+			node_set_parent(node0, tn);
+			continue;
 		}
 
-		put_child(tn, 2 * i, left);
-		put_child(tn, 2 * i + 1, right);
+		/* update parent pointers in child node's children */
+		for (k = tnode_child_length(inode), j = k / 2; j;) {
+			node_set_parent(tnode_get_child(inode, --k), node1);
+			node_set_parent(tnode_get_child(inode, --j), node0);
+			node_set_parent(tnode_get_child(inode, --k), node1);
+			node_set_parent(tnode_get_child(inode, --j), node0);
+		}
 
 		/* resize child nodes */
-		resize(t, left);
-		resize(t, right);
+		resize(t, node1);
+		resize(t, node0);
 	}
 
-	put_child_root(tp, t, tn->key, tn);
-
 	/* we completed without error, prepare to free old node */
 	tnode_free(oldtnode);
 	return 0;
@@ -566,10 +566,8 @@ nomem:
 
 static int halve(struct trie *t, struct tnode *oldtnode)
 {
-	unsigned long olen = tnode_child_length(oldtnode);
-	struct tnode *tp = node_parent(oldtnode);
-	struct tnode *tn, *left, *right;
-	int i;
+	struct tnode *tn, *tp, *inode, *node0, *node1;
+	unsigned long i;
 
 	pr_debug("In halve\n");
 
@@ -577,68 +575,64 @@ static int halve(struct trie *t, struct tnode *oldtnode)
 	if (!tn)
 		return -ENOMEM;
 
-	/*
-	 * Preallocate and store tnodes before the actual work so we
-	 * don't get into an inconsistent state if memory allocation
-	 * fails. In case of failure we return the oldnode and halve
-	 * of tnode is ignored.
+	/* Assemble all of the pointers in our cluster, in this case that
+	 * represents all of the pointers out of our allocated nodes that
+	 * point to existing tnodes and the links between our allocated
+	 * nodes.
 	 */
+	for (i = tnode_child_length(oldtnode); i;) {
+		node1 = tnode_get_child(oldtnode, --i);
+		node0 = tnode_get_child(oldtnode, --i);
 
-	for (i = 0; i < olen; i += 2) {
-		left = tnode_get_child(oldtnode, i);
-		right = tnode_get_child(oldtnode, i+1);
+		/* At least one of the children is empty */
+		if (!node1 || !node0) {
+			put_child(tn, i / 2, node1 ? : node0);
+			continue;
+		}
 
 		/* Two nonempty children */
-		if (left && right) {
-			struct tnode *newn;
-
-			newn = tnode_new(left->key, oldtnode->pos, 1);
-			if (!newn) {
-				tnode_free(tn);
-				return -ENOMEM;
-			}
-			tnode_free_append(tn, newn);
-
-			put_child(tn, i/2, newn);
+		inode = tnode_new(node0->key, oldtnode->pos, 1);
+		if (!inode) {
+			tnode_free(tn);
+			return -ENOMEM;
 		}
+		tnode_free_append(tn, inode);
 
+		/* initialize pointers out of node */
+		put_child(inode, 1, node1);
+		put_child(inode, 0, node0);
+		NODE_INIT_PARENT(inode, tn);
+
+		/* link parent to node */
+		put_child(tn, i / 2, inode);
 	}
 
+	/* setup the parent pointer out of and back into this node */
+	tp = node_parent(oldtnode);
+	NODE_INIT_PARENT(tn, tp);
+	put_child_root(tp, t, tn->key, tn);
+
 	/* prepare oldtnode to be freed */
 	tnode_free_init(oldtnode);
 
-	for (i = 0; i < olen; i += 2) {
-		struct tnode *newBinNode;
-
-		left = tnode_get_child(oldtnode, i);
-		right = tnode_get_child(oldtnode, i+1);
-
-		/* At least one of the children is empty */
-		if (left == NULL) {
-			if (right == NULL)    /* Both are empty */
-				continue;
-			put_child(tn, i/2, right);
-			continue;
-		}
+	/* update all of the child parent pointers */
+	for (i = tnode_child_length(tn); i;) {
+		inode = tnode_get_child(tn, --i);
 
-		if (right == NULL) {
-			put_child(tn, i/2, left);
+		/* only new tnodes will be considered "full" nodes */
+		if (!tnode_full(tn, inode)) {
+			node_set_parent(inode, tn);
 			continue;
 		}
 
 		/* Two nonempty children */
-		newBinNode = tnode_get_child(tn, i/2);
-		put_child(newBinNode, 0, left);
-		put_child(newBinNode, 1, right);
-
-		put_child(tn, i / 2, newBinNode);
+		node_set_parent(tnode_get_child(inode, 1), inode);
+		node_set_parent(tnode_get_child(inode, 0), inode);
 
 		/* resize child node */
-		resize(t, newBinNode);
+		resize(t, inode);
 	}
 
-	put_child_root(tp, t, tn->key, tn);
-
 	/* all pointers should be clean so we are done */
 	tnode_free(oldtnode);
 
-- 
cgit v1.2.3


From 21d1f11db0e2f20a549ad8322879507850544670 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Wed, 31 Dec 2014 10:57:02 -0800
Subject: fib_trie: Remove checks for index >= tnode_child_length from
 tnode_get_child

For some reason the compiler doesn't seem to understand that when we are in
a loop that runs from tnode_child_length - 1 to 0 we don't expect the value
of tn->bits to change.  As such every call to tnode_get_child was rerunning
tnode_chile_length which ended up consuming quite a bit of space in the
resultant assembly code.

I have gone though and verified that in all cases where tnode_get_child
is used we are either winding though a fixed loop from tnode_child_length -
1 to 0, or are in a fastpath case where we are verifying the value by
either checking for any remaining bits after shifting index by bits and
testing for leaf, or by using tnode_child_length.

size net/ipv4/fib_trie.o
Before:
   text	   data	    bss	    dec	    hex	filename
  15506	    376	      8	  15890	   3e12	net/ipv4/fib_trie.o

After:
   text	   data	    bss	    dec	    hex	filename
  14827	    376	      8	  15211	   3b6b	net/ipv4/fib_trie.o

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 0c88df09d25d..87fc9a466fa2 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -186,8 +186,6 @@ static inline unsigned long tnode_child_length(const struct tnode *tn)
 static inline struct tnode *tnode_get_child(const struct tnode *tn,
 					    unsigned long i)
 {
-	BUG_ON(i >= tnode_child_length(tn));
-
 	return rtnl_dereference(tn->child[i]);
 }
 
@@ -195,8 +193,6 @@ static inline struct tnode *tnode_get_child(const struct tnode *tn,
 static inline struct tnode *tnode_get_child_rcu(const struct tnode *tn,
 						unsigned long i)
 {
-	BUG_ON(i >= tnode_child_length(tn));
-
 	return rcu_dereference_rtnl(tn->child[i]);
 }
 
@@ -371,7 +367,7 @@ static inline int tnode_full(const struct tnode *tn, const struct tnode *n)
  */
 static void put_child(struct tnode *tn, unsigned long i, struct tnode *n)
 {
-	struct tnode *chi = rtnl_dereference(tn->child[i]);
+	struct tnode *chi = tnode_get_child(tn, i);
 	int isfull, wasfull;
 
 	BUG_ON(i >= tnode_child_length(tn));
@@ -867,7 +863,7 @@ static struct tnode *fib_find_node(struct trie *t, u32 key)
 		if (IS_LEAF(n))
 			break;
 
-		n = rcu_dereference_rtnl(n->child[index]);
+		n = tnode_get_child_rcu(n, index);
 	}
 
 	return n;
@@ -934,7 +930,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
 		}
 
 		tp = n;
-		n = rcu_dereference_rtnl(n->child[index]);
+		n = tnode_get_child_rcu(n, index);
 	}
 
 	l = leaf_new(key);
@@ -1215,7 +1211,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
 			cindex = index;
 		}
 
-		n = rcu_dereference(n->child[index]);
+		n = tnode_get_child_rcu(n, index);
 		if (unlikely(!n))
 			goto backtrace;
 	}
@@ -1835,7 +1831,7 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s)
 			if (n->bits < MAX_STAT_DEPTH)
 				s->nodesizes[n->bits]++;
 
-			for (i = 0; i < tnode_child_length(n); i++) {
+			for (i = tnode_child_length(n); i--;) {
 				if (!rcu_access_pointer(n->child[i]))
 					s->nullpointers++;
 			}
-- 
cgit v1.2.3


From 5405afd1a30620de8601436bae739c67c0bc7324 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Wed, 31 Dec 2014 10:57:08 -0800
Subject: fib_trie: Add tracking value for suffix length

This change adds a tracking value for the maximum suffix length of all
prefixes stored in any given tnode.  With this value we can determine if we
need to backtrace or not based on if the suffix is greater than the pos
value.

By doing this we can reduce the CPU overhead for lookups in the local table
as many of the prefixes there are 32b long and have a suffix length of 0
meaning we can immediately backtrace to the root node without needing to
test any of the nodes between it and where we ended up.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 122 +++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 116 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 87fc9a466fa2..281e5e00025f 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -96,6 +96,7 @@ struct tnode {
 	t_key key;
 	unsigned char bits;		/* 2log(KEYLENGTH) bits needed */
 	unsigned char pos;		/* 2log(KEYLENGTH) bits needed */
+	unsigned char slen;
 	struct tnode __rcu *parent;
 	struct rcu_head rcu;
 	union {
@@ -311,6 +312,7 @@ static struct tnode *leaf_new(t_key key)
 		 * as the nodes are searched
 		 */
 		l->key = key;
+		l->slen = 0;
 		l->pos = 0;
 		/* set bits to 0 indicating we are not a tnode */
 		l->bits = 0;
@@ -342,6 +344,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
 
 	if (tn) {
 		tn->parent = NULL;
+		tn->slen = pos;
 		tn->pos = pos;
 		tn->bits = bits;
 		tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0;
@@ -387,6 +390,9 @@ static void put_child(struct tnode *tn, unsigned long i, struct tnode *n)
 	else if (!wasfull && isfull)
 		tn->full_children++;
 
+	if (n && (tn->slen < n->slen))
+		tn->slen = n->slen;
+
 	rcu_assign_pointer(tn->child[i], n);
 }
 
@@ -635,6 +641,41 @@ static int halve(struct trie *t, struct tnode *oldtnode)
 	return 0;
 }
 
+static unsigned char update_suffix(struct tnode *tn)
+{
+	unsigned char slen = tn->pos;
+	unsigned long stride, i;
+
+	/* search though the list of children looking for nodes that might
+	 * have a suffix greater than the one we currently have.  This is
+	 * why we start with a stride of 2 since a stride of 1 would
+	 * represent the nodes with suffix length equal to tn->pos
+	 */
+	for (i = 0, stride = 0x2ul ; i < tnode_child_length(tn); i += stride) {
+		struct tnode *n = tnode_get_child(tn, i);
+
+		if (!n || (n->slen <= slen))
+			continue;
+
+		/* update stride and slen based on new value */
+		stride <<= (n->slen - slen);
+		slen = n->slen;
+		i &= ~(stride - 1);
+
+		/* if slen covers all but the last bit we can stop here
+		 * there will be nothing longer than that since only node
+		 * 0 and 1 << (bits - 1) could have that as their suffix
+		 * length.
+		 */
+		if ((slen + 1) >= (tn->pos + tn->bits))
+			break;
+	}
+
+	tn->slen = slen;
+
+	return slen;
+}
+
 /* From "Implementing a dynamic compressed trie" by Stefan Nilsson of
  * the Helsinki University of Technology and Matti Tikkanen of Nokia
  * Telecommunications, page 6:
@@ -790,6 +831,19 @@ no_children:
 		/* drop dead node */
 		tnode_free_init(tn);
 		tnode_free(tn);
+		return;
+	}
+
+	/* Return if at least one deflate was run */
+	if (max_work != MAX_WORK)
+		return;
+
+	/* push the suffix length to the parent node */
+	if (tn->slen > tn->pos) {
+		unsigned char slen = update_suffix(tn);
+
+		if (tp && (slen > tp->slen))
+			tp->slen = slen;
 	}
 }
 
@@ -818,8 +872,58 @@ static inline struct list_head *get_fa_head(struct tnode *l, int plen)
 	return &li->falh;
 }
 
-static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
+static void leaf_pull_suffix(struct tnode *l)
+{
+	struct tnode *tp = node_parent(l);
+
+	while (tp && (tp->slen > tp->pos) && (tp->slen > l->slen)) {
+		if (update_suffix(tp) > l->slen)
+			break;
+		tp = node_parent(tp);
+	}
+}
+
+static void leaf_push_suffix(struct tnode *l)
 {
+	struct tnode *tn = node_parent(l);
+
+	/* if this is a new leaf then tn will be NULL and we can sort
+	 * out parent suffix lengths as a part of trie_rebalance
+	 */
+	while (tn && (tn->slen < l->slen)) {
+		tn->slen = l->slen;
+		tn = node_parent(tn);
+	}
+}
+
+static void remove_leaf_info(struct tnode *l, struct leaf_info *old)
+{
+	struct hlist_node *prev;
+
+	/* record the location of the pointer to this object */
+	prev = rtnl_dereference(hlist_pprev_rcu(&old->hlist));
+
+	/* remove the leaf info from the list */
+	hlist_del_rcu(&old->hlist);
+
+	/* if we emptied the list this leaf will be freed and we can sort
+	 * out parent suffix lengths as a part of trie_rebalance
+	 */
+	if (hlist_empty(&l->list))
+		return;
+
+	/* if we removed the tail then we need to update slen */
+	if (!rcu_access_pointer(hlist_next_rcu(prev))) {
+		struct leaf_info *li = hlist_entry(prev, typeof(*li), hlist);
+
+		l->slen = KEYLENGTH - li->plen;
+		leaf_pull_suffix(l);
+	}
+}
+
+static void insert_leaf_info(struct tnode *l, struct leaf_info *new)
+{
+	struct hlist_head *head = &l->list;
 	struct leaf_info *li = NULL, *last = NULL;
 
 	if (hlist_empty(head)) {
@@ -836,6 +940,12 @@ static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
 		else
 			hlist_add_before_rcu(&new->hlist, &li->hlist);
 	}
+
+	/* if we added to the tail node then we need to update slen */
+	if (!rcu_access_pointer(hlist_next_rcu(&new->hlist))) {
+		l->slen = KEYLENGTH - new->plen;
+		leaf_push_suffix(l);
+	}
 }
 
 /* rcu_read_lock needs to be hold by caller from readside */
@@ -925,7 +1035,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
 		/* we have found a leaf. Prefixes have already been compared */
 		if (IS_LEAF(n)) {
 			/* Case 1: n is a leaf, and prefixes match*/
-			insert_leaf_info(&n->list, li);
+			insert_leaf_info(n, li);
 			return fa_head;
 		}
 
@@ -939,7 +1049,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
 		return NULL;
 	}
 
-	insert_leaf_info(&l->list, li);
+	insert_leaf_info(l, li);
 
 	/* Case 2: n is a LEAF or a TNODE and the key doesn't match.
 	 *
@@ -1206,7 +1316,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
 		/* only record pn and cindex if we are going to be chopping
 		 * bits later.  Otherwise we are just wasting cycles.
 		 */
-		if (index) {
+		if (n->slen > n->pos) {
 			pn = n;
 			cindex = index;
 		}
@@ -1225,7 +1335,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
 		 * between the key and the prefix exist in the region of
 		 * the lsb and higher in the prefix.
 		 */
-		if (unlikely(prefix_mismatch(key, n)))
+		if (unlikely(prefix_mismatch(key, n)) || (n->slen == n->pos))
 			goto backtrace;
 
 		/* exit out and process leaf */
@@ -1425,7 +1535,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
 		tb->tb_num_default--;
 
 	if (list_empty(fa_head)) {
-		hlist_del_rcu(&li->hlist);
+		remove_leaf_info(l, li);
 		free_leaf_info(li);
 	}
 
-- 
cgit v1.2.3


From 9b174d88c257150562b0101fcc6cb6c3cb74275c Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Tue, 30 Dec 2014 19:10:15 -0800
Subject: net: Add Transparent Ethernet Bridging GRO support.

Currently the only tunnel protocol that supports GRO with encapsulated
Ethernet is VXLAN. This pulls out the Ethernet code into a proper layer
so that it can be used by other tunnel protocols such as GRE and Geneve.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c         | 53 +++-----------------------
 include/linux/etherdevice.h |  4 ++
 net/ethernet/eth.c          | 92 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 102 insertions(+), 47 deletions(-)

(limited to 'net')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 7fbd89fbe107..2ab0922af0b4 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -549,10 +549,7 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, struct sk_buff
 {
 	struct sk_buff *p, **pp = NULL;
 	struct vxlanhdr *vh, *vh2;
-	struct ethhdr *eh, *eh2;
-	unsigned int hlen, off_vx, off_eth;
-	const struct packet_offload *ptype;
-	__be16 type;
+	unsigned int hlen, off_vx;
 	int flush = 1;
 
 	off_vx = skb_gro_offset(skb);
@@ -563,17 +560,6 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, struct sk_buff
 		if (unlikely(!vh))
 			goto out;
 	}
-	skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
-	skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
-
-	off_eth = skb_gro_offset(skb);
-	hlen = off_eth + sizeof(*eh);
-	eh   = skb_gro_header_fast(skb, off_eth);
-	if (skb_gro_header_hard(skb, hlen)) {
-		eh = skb_gro_header_slow(skb, hlen, off_eth);
-		if (unlikely(!eh))
-			goto out;
-	}
 
 	flush = 0;
 
@@ -582,28 +568,16 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, struct sk_buff
 			continue;
 
 		vh2 = (struct vxlanhdr *)(p->data + off_vx);
-		eh2 = (struct ethhdr   *)(p->data + off_eth);
-		if (vh->vx_vni != vh2->vx_vni || compare_ether_header(eh, eh2)) {
+		if (vh->vx_vni != vh2->vx_vni) {
 			NAPI_GRO_CB(p)->same_flow = 0;
 			continue;
 		}
 	}
 
-	type = eh->h_proto;
-
-	rcu_read_lock();
-	ptype = gro_find_receive_by_type(type);
-	if (ptype == NULL) {
-		flush = 1;
-		goto out_unlock;
-	}
-
-	skb_gro_pull(skb, sizeof(*eh)); /* pull inner eth header */
-	skb_gro_postpull_rcsum(skb, eh, sizeof(*eh));
-	pp = ptype->callbacks.gro_receive(head, skb);
+	skb_gro_pull(skb, sizeof(struct vxlanhdr));
+	skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
+	pp = eth_gro_receive(head, skb);
 
-out_unlock:
-	rcu_read_unlock();
 out:
 	NAPI_GRO_CB(skb)->flush |= flush;
 
@@ -612,24 +586,9 @@ out:
 
 static int vxlan_gro_complete(struct sk_buff *skb, int nhoff)
 {
-	struct ethhdr *eh;
-	struct packet_offload *ptype;
-	__be16 type;
-	int vxlan_len  = sizeof(struct vxlanhdr) + sizeof(struct ethhdr);
-	int err = -ENOSYS;
-
 	udp_tunnel_gro_complete(skb, nhoff);
 
-	eh = (struct ethhdr *)(skb->data + nhoff + sizeof(struct vxlanhdr));
-	type = eh->h_proto;
-
-	rcu_read_lock();
-	ptype = gro_find_complete_by_type(type);
-	if (ptype != NULL)
-		err = ptype->callbacks.gro_complete(skb, nhoff + vxlan_len);
-
-	rcu_read_unlock();
-	return err;
+	return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
 }
 
 /* Notify netdevs that UDP port started listening */
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 41c891d05f04..1d869d185a0d 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -52,6 +52,10 @@ struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
 #define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1)
 #define alloc_etherdev_mq(sizeof_priv, count) alloc_etherdev_mqs(sizeof_priv, count, count)
 
+struct sk_buff **eth_gro_receive(struct sk_buff **head,
+				 struct sk_buff *skb);
+int eth_gro_complete(struct sk_buff *skb, int nhoff);
+
 /* Reserved Ethernet Addresses per IEEE 802.1Q */
 static const u8 eth_reserved_addr_base[ETH_ALEN] __aligned(2) =
 { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 33a140e15834..238f38d21641 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -424,3 +424,95 @@ ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len)
 	return scnprintf(buf, PAGE_SIZE, "%*phC\n", len, addr);
 }
 EXPORT_SYMBOL(sysfs_format_mac);
+
+struct sk_buff **eth_gro_receive(struct sk_buff **head,
+				 struct sk_buff *skb)
+{
+	struct sk_buff *p, **pp = NULL;
+	struct ethhdr *eh, *eh2;
+	unsigned int hlen, off_eth;
+	const struct packet_offload *ptype;
+	__be16 type;
+	int flush = 1;
+
+	off_eth = skb_gro_offset(skb);
+	hlen = off_eth + sizeof(*eh);
+	eh = skb_gro_header_fast(skb, off_eth);
+	if (skb_gro_header_hard(skb, hlen)) {
+		eh = skb_gro_header_slow(skb, hlen, off_eth);
+		if (unlikely(!eh))
+			goto out;
+	}
+
+	flush = 0;
+
+	for (p = *head; p; p = p->next) {
+		if (!NAPI_GRO_CB(p)->same_flow)
+			continue;
+
+		eh2 = (struct ethhdr *)(p->data + off_eth);
+		if (compare_ether_header(eh, eh2)) {
+			NAPI_GRO_CB(p)->same_flow = 0;
+			continue;
+		}
+	}
+
+	type = eh->h_proto;
+
+	rcu_read_lock();
+	ptype = gro_find_receive_by_type(type);
+	if (ptype == NULL) {
+		flush = 1;
+		goto out_unlock;
+	}
+
+	skb_gro_pull(skb, sizeof(*eh));
+	skb_gro_postpull_rcsum(skb, eh, sizeof(*eh));
+	pp = ptype->callbacks.gro_receive(head, skb);
+
+out_unlock:
+	rcu_read_unlock();
+out:
+	NAPI_GRO_CB(skb)->flush |= flush;
+
+	return pp;
+}
+EXPORT_SYMBOL(eth_gro_receive);
+
+int eth_gro_complete(struct sk_buff *skb, int nhoff)
+{
+	struct ethhdr *eh = (struct ethhdr *)(skb->data + nhoff);
+	__be16 type = eh->h_proto;
+	struct packet_offload *ptype;
+	int err = -ENOSYS;
+
+	if (skb->encapsulation)
+		skb_set_inner_mac_header(skb, nhoff);
+
+	rcu_read_lock();
+	ptype = gro_find_complete_by_type(type);
+	if (ptype != NULL)
+		err = ptype->callbacks.gro_complete(skb, nhoff +
+						    sizeof(struct ethhdr));
+
+	rcu_read_unlock();
+	return err;
+}
+EXPORT_SYMBOL(eth_gro_complete);
+
+static struct packet_offload eth_packet_offload __read_mostly = {
+	.type = cpu_to_be16(ETH_P_TEB),
+	.callbacks = {
+		.gro_receive = eth_gro_receive,
+		.gro_complete = eth_gro_complete,
+	},
+};
+
+static int __init eth_offload_init(void)
+{
+	dev_add_offload(&eth_packet_offload);
+
+	return 0;
+}
+
+fs_initcall(eth_offload_init);
-- 
cgit v1.2.3


From a4c9ea5e8fec680134d22aa99b54d1cd8c226ebd Mon Sep 17 00:00:00 2001
From: Joe Stringer <joestringer@nicira.com>
Date: Tue, 30 Dec 2014 19:10:16 -0800
Subject: geneve: Add Geneve GRO support

This results in an approximately 30% increase in throughput
when handling encapsulated bulk traffic.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/geneve.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 95 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index 394a200f93c1..19e256e1dd92 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -149,6 +149,99 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
 }
 EXPORT_SYMBOL_GPL(geneve_xmit_skb);
 
+static int geneve_hlen(struct genevehdr *gh)
+{
+	return sizeof(*gh) + gh->opt_len * 4;
+}
+
+static struct sk_buff **geneve_gro_receive(struct sk_buff **head,
+					   struct sk_buff *skb)
+{
+	struct sk_buff *p, **pp = NULL;
+	struct genevehdr *gh, *gh2;
+	unsigned int hlen, gh_len, off_gnv;
+	const struct packet_offload *ptype;
+	__be16 type;
+	int flush = 1;
+
+	off_gnv = skb_gro_offset(skb);
+	hlen = off_gnv + sizeof(*gh);
+	gh = skb_gro_header_fast(skb, off_gnv);
+	if (skb_gro_header_hard(skb, hlen)) {
+		gh = skb_gro_header_slow(skb, hlen, off_gnv);
+		if (unlikely(!gh))
+			goto out;
+	}
+
+	if (gh->ver != GENEVE_VER || gh->oam)
+		goto out;
+	gh_len = geneve_hlen(gh);
+
+	hlen = off_gnv + gh_len;
+	if (skb_gro_header_hard(skb, hlen)) {
+		gh = skb_gro_header_slow(skb, hlen, off_gnv);
+		if (unlikely(!gh))
+			goto out;
+	}
+
+	flush = 0;
+
+	for (p = *head; p; p = p->next) {
+		if (!NAPI_GRO_CB(p)->same_flow)
+			continue;
+
+		gh2 = (struct genevehdr *)(p->data + off_gnv);
+		if (gh->opt_len != gh2->opt_len ||
+		    memcmp(gh, gh2, gh_len)) {
+			NAPI_GRO_CB(p)->same_flow = 0;
+			continue;
+		}
+	}
+
+	type = gh->proto_type;
+
+	rcu_read_lock();
+	ptype = gro_find_receive_by_type(type);
+	if (ptype == NULL) {
+		flush = 1;
+		goto out_unlock;
+	}
+
+	skb_gro_pull(skb, gh_len);
+	skb_gro_postpull_rcsum(skb, gh, gh_len);
+	pp = ptype->callbacks.gro_receive(head, skb);
+
+out_unlock:
+	rcu_read_unlock();
+out:
+	NAPI_GRO_CB(skb)->flush |= flush;
+
+	return pp;
+}
+
+static int geneve_gro_complete(struct sk_buff *skb, int nhoff)
+{
+	struct genevehdr *gh;
+	struct packet_offload *ptype;
+	__be16 type;
+	int gh_len;
+	int err = -ENOSYS;
+
+	udp_tunnel_gro_complete(skb, nhoff);
+
+	gh = (struct genevehdr *)(skb->data + nhoff);
+	gh_len = geneve_hlen(gh);
+	type = gh->proto_type;
+
+	rcu_read_lock();
+	ptype = gro_find_complete_by_type(type);
+	if (ptype != NULL)
+		err = ptype->callbacks.gro_complete(skb, nhoff + gh_len);
+
+	rcu_read_unlock();
+	return err;
+}
+
 static void geneve_notify_add_rx_port(struct geneve_sock *gs)
 {
 	struct sock *sk = gs->sock->sk;
@@ -278,8 +371,8 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
 
 	/* Initialize the geneve udp offloads structure */
 	gs->udp_offloads.port = port;
-	gs->udp_offloads.callbacks.gro_receive = NULL;
-	gs->udp_offloads.callbacks.gro_complete = NULL;
+	gs->udp_offloads.callbacks.gro_receive  = geneve_gro_receive;
+	gs->udp_offloads.callbacks.gro_complete = geneve_gro_complete;
 
 	spin_lock(&gn->sock_lock);
 	hlist_add_head_rcu(&gs->hlist, gs_head(net, port));
-- 
cgit v1.2.3


From e8768f971558019ed83eee8210375cd2143deef2 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 31 Dec 2014 13:33:41 +0100
Subject: net: skbuff: don't zero tc members when freeing skb

Not needed, only four cases:
 - kfree_skb (or one of its aliases).
   Don't need to zero, memory will be freed.
 - kfree_skb_partial and head was stolen:  memory will be freed.
 - skb_morph:  The skb header fields (including tc ones) will be
   copied over from the 'to-be-morphed' skb right after
   skb_release_head_state returns.
 - skb_segment:  Same as before, all the skb header
   fields are copied over from the original skb right away.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 395c15b82087..5a2a2e887a12 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -677,13 +677,6 @@ static void skb_release_head_state(struct sk_buff *skb)
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	nf_bridge_put(skb->nf_bridge);
 #endif
-/* XXX: IS this still necessary? - JHS */
-#ifdef CONFIG_NET_SCHED
-	skb->tc_index = 0;
-#ifdef CONFIG_NET_CLS_ACT
-	skb->tc_verd = 0;
-#endif
-#endif
 }
 
 /* Free everything but the sk_buff shell. */
-- 
cgit v1.2.3


From 300acfdec916be696373864226267b11302d3b84 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 31 Dec 2014 14:43:16 -0800
Subject: Bluetooth: Introduce force_bredr_smp debugfs option for testing

Testing cross-transport pairing that starts on BR/EDR is only valid when
using a controller with BR/EDR Secure Connections. Devices will indicate
this by providing BR/EDR SMP fixed channel over L2CAP. To allow testing
of this feature on Bluetooth 4.0 controller or controllers without the
BR/EDR Secure Connections features, introduce a force_bredr_smp debugfs
option that allows faking the required AES connection.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci.h |  1 +
 net/bluetooth/l2cap_core.c  |  2 +-
 net/bluetooth/smp.c         | 78 ++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 76 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 1849a437f6e1..fddb93f168b8 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -174,6 +174,7 @@ enum {
 	HCI_DUT_MODE,
 	HCI_FORCE_SC,
 	HCI_FORCE_LESC,
+	HCI_FORCE_BREDR_SMP,
 	HCI_FORCE_STATIC_ADDR,
 };
 
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index d04dc0095736..11029b212874 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -6968,7 +6968,7 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon)
 
 	if (test_bit(HCI_LE_ENABLED, &hcon->hdev->dev_flags) &&
 	    (bredr_sc_enabled(hcon->hdev) ||
-	     test_bit(HCI_FORCE_LESC, &hcon->hdev->dbg_flags)))
+	     test_bit(HCI_FORCE_BREDR_SMP, &hcon->hdev->dbg_flags)))
 		conn->local_fixed_chan |= L2CAP_FC_SMP_BREDR;
 
 	mutex_init(&conn->ident_lock);
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 358264c0e785..73643a6ca3cf 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -20,6 +20,7 @@
    SOFTWARE IS DISCLAIMED.
 */
 
+#include <linux/debugfs.h>
 #include <linux/crypto.h>
 #include <linux/scatterlist.h>
 #include <crypto/b128ops.h>
@@ -1675,7 +1676,7 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb)
 	if (conn->hcon->type == ACL_LINK) {
 		/* We must have a BR/EDR SC link */
 		if (!test_bit(HCI_CONN_AES_CCM, &conn->hcon->flags) &&
-		    !test_bit(HCI_FORCE_LESC, &hdev->dbg_flags))
+		    !test_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags))
 			return SMP_CROSS_TRANSP_NOT_ALLOWED;
 
 		set_bit(SMP_FLAG_SC, &smp->flags);
@@ -2738,7 +2739,7 @@ static void bredr_pairing(struct l2cap_chan *chan)
 
 	/* BR/EDR must use Secure Connections for SMP */
 	if (!test_bit(HCI_CONN_AES_CCM, &hcon->flags) &&
-	    !test_bit(HCI_FORCE_LESC, &hdev->dbg_flags))
+	    !test_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags))
 		return;
 
 	/* If our LE support is not enabled don't do anything */
@@ -2976,6 +2977,66 @@ static void smp_del_chan(struct l2cap_chan *chan)
 	l2cap_chan_put(chan);
 }
 
+static ssize_t force_bredr_smp_read(struct file *file,
+				    char __user *user_buf,
+				    size_t count, loff_t *ppos)
+{
+	struct hci_dev *hdev = file->private_data;
+	char buf[3];
+
+	buf[0] = test_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags) ? 'Y': 'N';
+	buf[1] = '\n';
+	buf[2] = '\0';
+	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
+}
+
+static ssize_t force_bredr_smp_write(struct file *file,
+				     const char __user *user_buf,
+				     size_t count, loff_t *ppos)
+{
+	struct hci_dev *hdev = file->private_data;
+	char buf[32];
+	size_t buf_size = min(count, (sizeof(buf)-1));
+	bool enable;
+
+	if (copy_from_user(buf, user_buf, buf_size))
+		return -EFAULT;
+
+	buf[buf_size] = '\0';
+	if (strtobool(buf, &enable))
+		return -EINVAL;
+
+	if (enable == test_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags))
+		return -EALREADY;
+
+	if (enable) {
+		struct l2cap_chan *chan;
+
+		chan = smp_add_cid(hdev, L2CAP_CID_SMP_BREDR);
+		if (IS_ERR(chan))
+			return PTR_ERR(chan);
+
+		hdev->smp_bredr_data = chan;
+	} else {
+		struct l2cap_chan *chan;
+
+		chan = hdev->smp_bredr_data;
+		hdev->smp_bredr_data = NULL;
+		smp_del_chan(chan);
+	}
+
+	change_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags);
+
+	return count;
+}
+
+static const struct file_operations force_bredr_smp_fops = {
+	.open		= simple_open,
+	.read		= force_bredr_smp_read,
+	.write		= force_bredr_smp_write,
+	.llseek		= default_llseek,
+};
+
 int smp_register(struct hci_dev *hdev)
 {
 	struct l2cap_chan *chan;
@@ -2988,9 +3049,18 @@ int smp_register(struct hci_dev *hdev)
 
 	hdev->smp_data = chan;
 
-	if (!lmp_sc_capable(hdev) &&
-	    !test_bit(HCI_FORCE_LESC, &hdev->dbg_flags))
+	/* If the controller does not support BR/EDR Secure Connections
+	 * feature, then the BR/EDR SMP channel shall not be present.
+	 *
+	 * To test this with Bluetooth 4.0 controllers, create a debugfs
+	 * switch that allows forcing BR/EDR SMP support and accepting
+	 * cross-transport pairing on non-AES encrypted connections.
+	 */
+	if (!lmp_sc_capable(hdev)) {
+		debugfs_create_file("force_bredr_smp", 0644, hdev->debugfs,
+				    hdev, &force_bredr_smp_fops);
 		return 0;
+	}
 
 	chan = smp_add_cid(hdev, L2CAP_CID_SMP_BREDR);
 	if (IS_ERR(chan)) {
-- 
cgit v1.2.3


From 91389af67c3a8d8f9eca5f51deda58fae4d9326e Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 31 Dec 2014 14:43:17 -0800
Subject: Bluetooth: Remove broken force_lesc_support debugfs option

The force_lesc_support debugfs option never really worked. It has a race
condition between creating the debugfs entry and registering the L2CAP
fixed channel for BR/EDR SMP support.

Also this has been replaced with a working force_bredr_smp debugfs
switch that developers can use now.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci.h |  1 -
 net/bluetooth/hci_debugfs.c | 49 ---------------------------------------------
 2 files changed, 50 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index fddb93f168b8..884ba004237e 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -173,7 +173,6 @@ enum {
 enum {
 	HCI_DUT_MODE,
 	HCI_FORCE_SC,
-	HCI_FORCE_LESC,
 	HCI_FORCE_BREDR_SMP,
 	HCI_FORCE_STATIC_ADDR,
 };
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index ee33ce88d3d8..dc8f994a957b 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -421,50 +421,6 @@ static const struct file_operations force_sc_support_fops = {
 	.llseek		= default_llseek,
 };
 
-static ssize_t force_lesc_support_read(struct file *file,
-				       char __user *user_buf,
-				       size_t count, loff_t *ppos)
-{
-	struct hci_dev *hdev = file->private_data;
-	char buf[3];
-
-	buf[0] = test_bit(HCI_FORCE_LESC, &hdev->dbg_flags) ? 'Y': 'N';
-	buf[1] = '\n';
-	buf[2] = '\0';
-	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
-}
-
-static ssize_t force_lesc_support_write(struct file *file,
-					const char __user *user_buf,
-					size_t count, loff_t *ppos)
-{
-	struct hci_dev *hdev = file->private_data;
-	char buf[32];
-	size_t buf_size = min(count, (sizeof(buf)-1));
-	bool enable;
-
-	if (copy_from_user(buf, user_buf, buf_size))
-		return -EFAULT;
-
-	buf[buf_size] = '\0';
-	if (strtobool(buf, &enable))
-		return -EINVAL;
-
-	if (enable == test_bit(HCI_FORCE_LESC, &hdev->dbg_flags))
-		return -EALREADY;
-
-	change_bit(HCI_FORCE_LESC, &hdev->dbg_flags);
-
-	return count;
-}
-
-static const struct file_operations force_lesc_support_fops = {
-	.open		= simple_open,
-	.read		= force_lesc_support_read,
-	.write		= force_lesc_support_write,
-	.llseek		= default_llseek,
-};
-
 static int idle_timeout_set(void *data, u64 val)
 {
 	struct hci_dev *hdev = data;
@@ -568,11 +524,6 @@ void hci_debugfs_create_bredr(struct hci_dev *hdev)
 
 		debugfs_create_file("force_sc_support", 0644, hdev->debugfs,
 				    hdev, &force_sc_support_fops);
-
-		if (lmp_le_capable(hdev))
-			debugfs_create_file("force_lesc_support", 0644,
-					    hdev->debugfs, hdev,
-					    &force_lesc_support_fops);
 	}
 
 	if (lmp_sniff_capable(hdev)) {
-- 
cgit v1.2.3


From 05b3c3e7905d00a1fe2e9184fdd9b5eac427c736 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 31 Dec 2014 14:43:18 -0800
Subject: Bluetooth: Remove no longer needed force_sc_support debugfs option

The force_sc_support debugfs option was introduced to easily work with
pre-production Bluetooth 4.1 silicon. This option is no longer needed
since controllers supporting BR/EDR Secure Connections feature are now
available.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci.h      |  1 -
 include/net/bluetooth/hci_core.h |  3 +--
 net/bluetooth/hci_debugfs.c      | 49 ----------------------------------------
 net/bluetooth/mgmt.c             | 11 ++++-----
 4 files changed, 5 insertions(+), 59 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 884ba004237e..aee16bf5d34f 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -172,7 +172,6 @@ enum {
  */
 enum {
 	HCI_DUT_MODE,
-	HCI_FORCE_SC,
 	HCI_FORCE_BREDR_SMP,
 	HCI_FORCE_STATIC_ADDR,
 };
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 3e7e5110f298..89f4e3c8a097 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1017,8 +1017,7 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
 
 #define hdev_is_powered(hdev) (test_bit(HCI_UP, &hdev->flags) && \
 				!test_bit(HCI_AUTO_OFF, &hdev->dev_flags))
-#define bredr_sc_enabled(dev) ((lmp_sc_capable(dev) || \
-				test_bit(HCI_FORCE_SC, &(dev)->dbg_flags)) && \
+#define bredr_sc_enabled(dev) (lmp_sc_capable(dev) && \
 			       test_bit(HCI_SC_ENABLED, &(dev)->dev_flags))
 
 /* ----- HCI protocols ----- */
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index dc8f994a957b..d72ebc2b11fa 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -375,52 +375,6 @@ static const struct file_operations sc_only_mode_fops = {
 	.llseek		= default_llseek,
 };
 
-static ssize_t force_sc_support_read(struct file *file, char __user *user_buf,
-				     size_t count, loff_t *ppos)
-{
-	struct hci_dev *hdev = file->private_data;
-	char buf[3];
-
-	buf[0] = test_bit(HCI_FORCE_SC, &hdev->dbg_flags) ? 'Y': 'N';
-	buf[1] = '\n';
-	buf[2] = '\0';
-	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
-}
-
-static ssize_t force_sc_support_write(struct file *file,
-				      const char __user *user_buf,
-				      size_t count, loff_t *ppos)
-{
-	struct hci_dev *hdev = file->private_data;
-	char buf[32];
-	size_t buf_size = min(count, (sizeof(buf)-1));
-	bool enable;
-
-	if (test_bit(HCI_UP, &hdev->flags))
-		return -EBUSY;
-
-	if (copy_from_user(buf, user_buf, buf_size))
-		return -EFAULT;
-
-	buf[buf_size] = '\0';
-	if (strtobool(buf, &enable))
-		return -EINVAL;
-
-	if (enable == test_bit(HCI_FORCE_SC, &hdev->dbg_flags))
-		return -EALREADY;
-
-	change_bit(HCI_FORCE_SC, &hdev->dbg_flags);
-
-	return count;
-}
-
-static const struct file_operations force_sc_support_fops = {
-	.open		= simple_open,
-	.read		= force_sc_support_read,
-	.write		= force_sc_support_write,
-	.llseek		= default_llseek,
-};
-
 static int idle_timeout_set(void *data, u64 val)
 {
 	struct hci_dev *hdev = data;
@@ -521,9 +475,6 @@ void hci_debugfs_create_bredr(struct hci_dev *hdev)
 				    hdev, &auto_accept_delay_fops);
 		debugfs_create_file("sc_only_mode", 0444, hdev->debugfs,
 				    hdev, &sc_only_mode_fops);
-
-		debugfs_create_file("force_sc_support", 0644, hdev->debugfs,
-				    hdev, &force_sc_support_fops);
 	}
 
 	if (lmp_sniff_capable(hdev)) {
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 3d2f7ad1e655..6b3f5537e441 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -570,8 +570,7 @@ static u32 get_supported_settings(struct hci_dev *hdev)
 			settings |= MGMT_SETTING_HS;
 		}
 
-		if (lmp_sc_capable(hdev) ||
-		    test_bit(HCI_FORCE_SC, &hdev->dbg_flags))
+		if (lmp_sc_capable(hdev))
 			settings |= MGMT_SETTING_SECURE_CONN;
 	}
 
@@ -4727,8 +4726,8 @@ static int set_secure_conn(struct sock *sk, struct hci_dev *hdev,
 
 	BT_DBG("request for %s", hdev->name);
 
-	if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags) &&
-	    !lmp_sc_capable(hdev) && !test_bit(HCI_FORCE_SC, &hdev->dbg_flags))
+	if (!lmp_sc_capable(hdev) &&
+	    !test_bit(HCI_LE_ENABLED, &hdev->dev_flags))
 		return cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN,
 				  MGMT_STATUS_NOT_SUPPORTED);
 
@@ -4738,9 +4737,7 @@ static int set_secure_conn(struct sock *sk, struct hci_dev *hdev,
 
 	hci_dev_lock(hdev);
 
-	if (!hdev_is_powered(hdev) ||
-	    (!lmp_sc_capable(hdev) &&
-	     !test_bit(HCI_FORCE_SC, &hdev->dbg_flags)) ||
+	if (!hdev_is_powered(hdev) || !lmp_sc_capable(hdev) ||
 	    !test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) {
 		bool changed;
 
-- 
cgit v1.2.3


From cb0d2faeb1356f7d453bab3f1028f169d8ffe99c Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 31 Dec 2014 14:43:19 -0800
Subject: Bluetooth: Fix scope of sc_only_mode debugfs entry

The sc_only_mode debugfs entry is used to read the current state of the
Secure Connections Only mode. Before Bluetooth 4.2 this mode was only
for BR/EDR controllers and with that tight to the support Secure Simple
Pairing. Since Secure Connections is now available for BR/EDR and LE
this debugfs entry is no longer correctly place.

Move it to the common section and enable it when either BR/EDR Secure
Connections feature is supported or when the controller has LE support.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_debugfs.c | 45 +++++++++++++++++++++++----------------------
 1 file changed, 23 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index d72ebc2b11fa..ead89a5ad9ce 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -212,6 +212,24 @@ static int conn_info_max_age_get(void *data, u64 *val)
 DEFINE_SIMPLE_ATTRIBUTE(conn_info_max_age_fops, conn_info_max_age_get,
 			conn_info_max_age_set, "%llu\n");
 
+static ssize_t sc_only_mode_read(struct file *file, char __user *user_buf,
+				 size_t count, loff_t *ppos)
+{
+	struct hci_dev *hdev = file->private_data;
+	char buf[3];
+
+	buf[0] = test_bit(HCI_SC_ONLY, &hdev->dev_flags) ? 'Y': 'N';
+	buf[1] = '\n';
+	buf[2] = '\0';
+	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
+}
+
+static const struct file_operations sc_only_mode_fops = {
+	.open		= simple_open,
+	.read		= sc_only_mode_read,
+	.llseek		= default_llseek,
+};
+
 void hci_debugfs_create_common(struct hci_dev *hdev)
 {
 	debugfs_create_file("features", 0444, hdev->debugfs, hdev,
@@ -230,6 +248,10 @@ void hci_debugfs_create_common(struct hci_dev *hdev)
 			    &conn_info_min_age_fops);
 	debugfs_create_file("conn_info_max_age", 0644, hdev->debugfs, hdev,
 			    &conn_info_max_age_fops);
+
+	if (lmp_sc_capable(hdev) || lmp_le_capable(hdev))
+		debugfs_create_file("sc_only_mode", 0444, hdev->debugfs,
+				    hdev, &sc_only_mode_fops);
 }
 
 static int inquiry_cache_show(struct seq_file *f, void *p)
@@ -357,24 +379,6 @@ static int auto_accept_delay_get(void *data, u64 *val)
 DEFINE_SIMPLE_ATTRIBUTE(auto_accept_delay_fops, auto_accept_delay_get,
 			auto_accept_delay_set, "%llu\n");
 
-static ssize_t sc_only_mode_read(struct file *file, char __user *user_buf,
-				 size_t count, loff_t *ppos)
-{
-	struct hci_dev *hdev = file->private_data;
-	char buf[3];
-
-	buf[0] = test_bit(HCI_SC_ONLY, &hdev->dev_flags) ? 'Y': 'N';
-	buf[1] = '\n';
-	buf[2] = '\0';
-	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
-}
-
-static const struct file_operations sc_only_mode_fops = {
-	.open		= simple_open,
-	.read		= sc_only_mode_read,
-	.llseek		= default_llseek,
-};
-
 static int idle_timeout_set(void *data, u64 val)
 {
 	struct hci_dev *hdev = data;
@@ -470,12 +474,9 @@ void hci_debugfs_create_bredr(struct hci_dev *hdev)
 	debugfs_create_file("voice_setting", 0444, hdev->debugfs, hdev,
 			    &voice_setting_fops);
 
-	if (lmp_ssp_capable(hdev)) {
+	if (lmp_ssp_capable(hdev))
 		debugfs_create_file("auto_accept_delay", 0644, hdev->debugfs,
 				    hdev, &auto_accept_delay_fops);
-		debugfs_create_file("sc_only_mode", 0444, hdev->debugfs,
-				    hdev, &sc_only_mode_fops);
-	}
 
 	if (lmp_sniff_capable(hdev)) {
 		debugfs_create_file("idle_timeout", 0644, hdev->debugfs,
-- 
cgit v1.2.3


From 203de21bf6b19f81267c373ca5323e6386fe5809 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 31 Dec 2014 20:01:22 -0800
Subject: Bluetooth: Fix for a leftover debug of pairing credentials

One of the LE Secure Connections security credentials was still using
the BT_DBG instead of SMP_DBG.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/smp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 73643a6ca3cf..96c7d51a2753 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -300,7 +300,7 @@ static int smp_f6(struct crypto_hash *tfm_cmac, const u8 w[16],
 	if (err)
 		return err;
 
-	BT_DBG("res %16phN", res);
+	SMP_DBG("res %16phN", res);
 
 	return err;
 }
-- 
cgit v1.2.3


From ec6cef9cd98d9d2dac8ec8f1d11b8a3a9524e390 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Thu, 1 Jan 2015 02:05:16 -0800
Subject: Bluetooth: Fix SMP channel registration for unconfigured controllers

When the Bluetooth controllers requires an unconfigured state (for
example when the BD_ADDR is missing), then it is important to try
to register the SMP channels when the controller transitions to the
configured state.

This also fixes an issue with the debugfs entires that are not present
for controllers that start out as unconfigured.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_core.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 5ef5221c1813..6c12110b75a7 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -931,10 +931,20 @@ static int __hci_init(struct hci_dev *hdev)
 	if (err < 0)
 		return err;
 
-	/* Only create debugfs entries during the initial setup
-	 * phase and not every time the controller gets powered on.
+	/* This function is only called when the controller is actually in
+	 * configured state. When the controller is marked as unconfigured,
+	 * this initialization procedure is not run.
+	 *
+	 * It means that it is possible that a controller runs through its
+	 * setup phase and then discovers missing settings. If that is the
+	 * case, then this function will not be called. It then will only
+	 * be called during the config phase.
+	 *
+	 * So only when in setup phase or config phase, create the debugfs
+	 * entries and register the SMP channels.
 	 */
-	if (!test_bit(HCI_SETUP, &hdev->dev_flags))
+	if (!test_bit(HCI_SETUP, &hdev->dev_flags) &&
+	    !test_bit(HCI_CONFIG, &hdev->dev_flags))
 		return 0;
 
 	hci_debugfs_create_common(hdev);
-- 
cgit v1.2.3


From 955d7fc93ed095c74fdc439c2893a87ca1652148 Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Wed, 31 Dec 2014 19:39:08 +0100
Subject: ieee802154: handle socket functionality as module

This patch makes the ieee802154 socket handling as module. Currently
this is part of ieee802154 module. It pointed out that ieee802154 module
has also two module_init/module_exit functions. One inside of core.c and
the other in af_ieee802154.c. This patch will also solve this issue by
handle the af_802154 as separate module.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/ieee802154/Kconfig  | 9 +++++++++
 net/ieee802154/Makefile | 3 ++-
 2 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ieee802154/Kconfig b/net/ieee802154/Kconfig
index c0d4154d144f..98a190dea97d 100644
--- a/net/ieee802154/Kconfig
+++ b/net/ieee802154/Kconfig
@@ -10,6 +10,15 @@ config IEEE802154
 	  Say Y here to compile LR-WPAN support into the kernel or say M to
 	  compile it as modules.
 
+config IEEE802154_SOCKET
+	tristate "IEEE 802.15.4 socket interface"
+	depends on IEEE802154
+	default y
+	---help---
+	  Socket interface for IEEE 802.15.4. Contains DGRAM sockets interface
+	  for 802.15.4 dataframes. Also RAW socket interface to build MAC
+	  header from userspace.
+
 config IEEE802154_6LOWPAN
 	tristate "6lowpan support over IEEE 802.15.4"
 	depends on IEEE802154 && 6LOWPAN
diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile
index 9f6970f2a28b..53612d1debe1 100644
--- a/net/ieee802154/Makefile
+++ b/net/ieee802154/Makefile
@@ -1,4 +1,5 @@
-obj-$(CONFIG_IEEE802154) += ieee802154.o af_802154.o
+obj-$(CONFIG_IEEE802154) += ieee802154.o
+obj-$(CONFIG_IEEE802154_SOCKET) += af_802154.o
 obj-$(CONFIG_IEEE802154_6LOWPAN) += ieee802154_6lowpan.o
 
 ieee802154_6lowpan-y := 6lowpan_rtnl.o reassembly.o
-- 
cgit v1.2.3


From 79300e3a438f93c7431b4bf40fa18580ccb34be1 Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Wed, 31 Dec 2014 19:39:09 +0100
Subject: ieee802154: socket: change module name

This patch changes the module name of af_802154 to ieee802154_socket.
Just for keeping the name convention according to the 6LoWPAN module
ieee802154_6lowpan.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/ieee802154/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile
index 53612d1debe1..19b2bec1fd09 100644
--- a/net/ieee802154/Makefile
+++ b/net/ieee802154/Makefile
@@ -1,10 +1,10 @@
 obj-$(CONFIG_IEEE802154) += ieee802154.o
-obj-$(CONFIG_IEEE802154_SOCKET) += af_802154.o
+obj-$(CONFIG_IEEE802154_SOCKET) += ieee802154_socket.o
 obj-$(CONFIG_IEEE802154_6LOWPAN) += ieee802154_6lowpan.o
 
 ieee802154_6lowpan-y := 6lowpan_rtnl.o reassembly.o
 ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o core.o \
                 header_ops.o sysfs.o nl802154.o
-af_802154-y := af_ieee802154.o raw.o dgram.o
+ieee802154_socket-y := af_ieee802154.o raw.o dgram.o
 
 ccflags-y += -D__CHECK_ENDIAN__
-- 
cgit v1.2.3


From 78f821b648267d3ee1a63093cd0c41cc99b97727 Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Wed, 31 Dec 2014 19:39:10 +0100
Subject: ieee802154: socket: put handling into one file

This patch puts all related socket handling into one file. This is just
a cleanup to do all socket handling stuff inside of one implementation
file.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/ieee802154/Makefile        |   2 +-
 net/ieee802154/af802154.h      |  33 --
 net/ieee802154/af_ieee802154.c | 762 ++++++++++++++++++++++++++++++++++++++++-
 net/ieee802154/dgram.c         | 549 -----------------------------
 net/ieee802154/raw.c           | 270 ---------------
 5 files changed, 760 insertions(+), 856 deletions(-)
 delete mode 100644 net/ieee802154/af802154.h
 delete mode 100644 net/ieee802154/dgram.c
 delete mode 100644 net/ieee802154/raw.c

(limited to 'net')

diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile
index 19b2bec1fd09..e6ddfcd73510 100644
--- a/net/ieee802154/Makefile
+++ b/net/ieee802154/Makefile
@@ -5,6 +5,6 @@ obj-$(CONFIG_IEEE802154_6LOWPAN) += ieee802154_6lowpan.o
 ieee802154_6lowpan-y := 6lowpan_rtnl.o reassembly.o
 ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o core.o \
                 header_ops.o sysfs.o nl802154.o
-ieee802154_socket-y := af_ieee802154.o raw.o dgram.o
+ieee802154_socket-y := af_ieee802154.o
 
 ccflags-y += -D__CHECK_ENDIAN__
diff --git a/net/ieee802154/af802154.h b/net/ieee802154/af802154.h
deleted file mode 100644
index 343b63e6f953..000000000000
--- a/net/ieee802154/af802154.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Internal interfaces for ieee 802.15.4 address family.
- *
- * Copyright 2007, 2008, 2009 Siemens AG
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * Written by:
- * Sergey Lapin <slapin@ossfans.org>
- * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
- */
-
-#ifndef AF802154_H
-#define AF802154_H
-
-struct sk_buff;
-struct net_device;
-struct ieee802154_addr;
-extern struct proto ieee802154_raw_prot;
-extern struct proto ieee802154_dgram_prot;
-void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb);
-int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb);
-struct net_device *ieee802154_get_dev(struct net *net,
-				      const struct ieee802154_addr *addr);
-
-#endif
diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c
index d0a1282cdf43..64beb0b3c560 100644
--- a/net/ieee802154/af_ieee802154.c
+++ b/net/ieee802154/af_ieee802154.c
@@ -34,10 +34,8 @@
 #include <net/af_ieee802154.h>
 #include <net/ieee802154_netdev.h>
 
-#include "af802154.h"
-
 /* Utility function for families */
-struct net_device*
+static struct net_device*
 ieee802154_get_dev(struct net *net, const struct ieee802154_addr *addr)
 {
 	struct net_device *dev = NULL;
@@ -182,6 +180,248 @@ static int ieee802154_sock_ioctl(struct socket *sock, unsigned int cmd,
 	}
 }
 
+/* RAW Sockets (802.15.4 created in userspace) */
+static HLIST_HEAD(raw_head);
+static DEFINE_RWLOCK(raw_lock);
+
+static void raw_hash(struct sock *sk)
+{
+	write_lock_bh(&raw_lock);
+	sk_add_node(sk, &raw_head);
+	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+	write_unlock_bh(&raw_lock);
+}
+
+static void raw_unhash(struct sock *sk)
+{
+	write_lock_bh(&raw_lock);
+	if (sk_del_node_init(sk))
+		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+	write_unlock_bh(&raw_lock);
+}
+
+static void raw_close(struct sock *sk, long timeout)
+{
+	sk_common_release(sk);
+}
+
+static int raw_bind(struct sock *sk, struct sockaddr *_uaddr, int len)
+{
+	struct ieee802154_addr addr;
+	struct sockaddr_ieee802154 *uaddr = (struct sockaddr_ieee802154 *)_uaddr;
+	int err = 0;
+	struct net_device *dev = NULL;
+
+	if (len < sizeof(*uaddr))
+		return -EINVAL;
+
+	uaddr = (struct sockaddr_ieee802154 *)_uaddr;
+	if (uaddr->family != AF_IEEE802154)
+		return -EINVAL;
+
+	lock_sock(sk);
+
+	ieee802154_addr_from_sa(&addr, &uaddr->addr);
+	dev = ieee802154_get_dev(sock_net(sk), &addr);
+	if (!dev) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	if (dev->type != ARPHRD_IEEE802154) {
+		err = -ENODEV;
+		goto out_put;
+	}
+
+	sk->sk_bound_dev_if = dev->ifindex;
+	sk_dst_reset(sk);
+
+out_put:
+	dev_put(dev);
+out:
+	release_sock(sk);
+
+	return err;
+}
+
+static int raw_connect(struct sock *sk, struct sockaddr *uaddr,
+		       int addr_len)
+{
+	return -ENOTSUPP;
+}
+
+static int raw_disconnect(struct sock *sk, int flags)
+{
+	return 0;
+}
+
+static int raw_sendmsg(struct kiocb *iocb, struct sock *sk,
+		       struct msghdr *msg, size_t size)
+{
+	struct net_device *dev;
+	unsigned int mtu;
+	struct sk_buff *skb;
+	int hlen, tlen;
+	int err;
+
+	if (msg->msg_flags & MSG_OOB) {
+		pr_debug("msg->msg_flags = 0x%x\n", msg->msg_flags);
+		return -EOPNOTSUPP;
+	}
+
+	lock_sock(sk);
+	if (!sk->sk_bound_dev_if)
+		dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154);
+	else
+		dev = dev_get_by_index(sock_net(sk), sk->sk_bound_dev_if);
+	release_sock(sk);
+
+	if (!dev) {
+		pr_debug("no dev\n");
+		err = -ENXIO;
+		goto out;
+	}
+
+	mtu = dev->mtu;
+	pr_debug("name = %s, mtu = %u\n", dev->name, mtu);
+
+	if (size > mtu) {
+		pr_debug("size = %Zu, mtu = %u\n", size, mtu);
+		err = -EINVAL;
+		goto out_dev;
+	}
+
+	hlen = LL_RESERVED_SPACE(dev);
+	tlen = dev->needed_tailroom;
+	skb = sock_alloc_send_skb(sk, hlen + tlen + size,
+				  msg->msg_flags & MSG_DONTWAIT, &err);
+	if (!skb)
+		goto out_dev;
+
+	skb_reserve(skb, hlen);
+
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
+
+	err = memcpy_from_msg(skb_put(skb, size), msg, size);
+	if (err < 0)
+		goto out_skb;
+
+	skb->dev = dev;
+	skb->sk  = sk;
+	skb->protocol = htons(ETH_P_IEEE802154);
+
+	dev_put(dev);
+
+	err = dev_queue_xmit(skb);
+	if (err > 0)
+		err = net_xmit_errno(err);
+
+	return err ?: size;
+
+out_skb:
+	kfree_skb(skb);
+out_dev:
+	dev_put(dev);
+out:
+	return err;
+}
+
+static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+		       size_t len, int noblock, int flags, int *addr_len)
+{
+	size_t copied = 0;
+	int err = -EOPNOTSUPP;
+	struct sk_buff *skb;
+
+	skb = skb_recv_datagram(sk, flags, noblock, &err);
+	if (!skb)
+		goto out;
+
+	copied = skb->len;
+	if (len < copied) {
+		msg->msg_flags |= MSG_TRUNC;
+		copied = len;
+	}
+
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
+	if (err)
+		goto done;
+
+	sock_recv_ts_and_drops(msg, sk, skb);
+
+	if (flags & MSG_TRUNC)
+		copied = skb->len;
+done:
+	skb_free_datagram(sk, skb);
+out:
+	if (err)
+		return err;
+	return copied;
+}
+
+static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (!skb)
+		return NET_RX_DROP;
+
+	if (sock_queue_rcv_skb(sk, skb) < 0) {
+		kfree_skb(skb);
+		return NET_RX_DROP;
+	}
+
+	return NET_RX_SUCCESS;
+}
+
+static void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb)
+{
+	struct sock *sk;
+
+	read_lock(&raw_lock);
+	sk_for_each(sk, &raw_head) {
+		bh_lock_sock(sk);
+		if (!sk->sk_bound_dev_if ||
+		    sk->sk_bound_dev_if == dev->ifindex) {
+			struct sk_buff *clone;
+
+			clone = skb_clone(skb, GFP_ATOMIC);
+			if (clone)
+				raw_rcv_skb(sk, clone);
+		}
+		bh_unlock_sock(sk);
+	}
+	read_unlock(&raw_lock);
+}
+
+static int raw_getsockopt(struct sock *sk, int level, int optname,
+			  char __user *optval, int __user *optlen)
+{
+	return -EOPNOTSUPP;
+}
+
+static int raw_setsockopt(struct sock *sk, int level, int optname,
+			  char __user *optval, unsigned int optlen)
+{
+	return -EOPNOTSUPP;
+}
+
+static struct proto ieee802154_raw_prot = {
+	.name		= "IEEE-802.15.4-RAW",
+	.owner		= THIS_MODULE,
+	.obj_size	= sizeof(struct sock),
+	.close		= raw_close,
+	.bind		= raw_bind,
+	.sendmsg	= raw_sendmsg,
+	.recvmsg	= raw_recvmsg,
+	.hash		= raw_hash,
+	.unhash		= raw_unhash,
+	.connect	= raw_connect,
+	.disconnect	= raw_disconnect,
+	.getsockopt	= raw_getsockopt,
+	.setsockopt	= raw_setsockopt,
+};
+
 static const struct proto_ops ieee802154_raw_ops = {
 	.family		   = PF_IEEE802154,
 	.owner		   = THIS_MODULE,
@@ -207,6 +447,522 @@ static const struct proto_ops ieee802154_raw_ops = {
 #endif
 };
 
+/* DGRAM Sockets (802.15.4 dataframes) */
+static HLIST_HEAD(dgram_head);
+static DEFINE_RWLOCK(dgram_lock);
+
+struct dgram_sock {
+	struct sock sk;
+
+	struct ieee802154_addr src_addr;
+	struct ieee802154_addr dst_addr;
+
+	unsigned int bound:1;
+	unsigned int connected:1;
+	unsigned int want_ack:1;
+	unsigned int secen:1;
+	unsigned int secen_override:1;
+	unsigned int seclevel:3;
+	unsigned int seclevel_override:1;
+};
+
+static inline struct dgram_sock *dgram_sk(const struct sock *sk)
+{
+	return container_of(sk, struct dgram_sock, sk);
+}
+
+static void dgram_hash(struct sock *sk)
+{
+	write_lock_bh(&dgram_lock);
+	sk_add_node(sk, &dgram_head);
+	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+	write_unlock_bh(&dgram_lock);
+}
+
+static void dgram_unhash(struct sock *sk)
+{
+	write_lock_bh(&dgram_lock);
+	if (sk_del_node_init(sk))
+		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+	write_unlock_bh(&dgram_lock);
+}
+
+static int dgram_init(struct sock *sk)
+{
+	struct dgram_sock *ro = dgram_sk(sk);
+
+	ro->want_ack = 1;
+	return 0;
+}
+
+static void dgram_close(struct sock *sk, long timeout)
+{
+	sk_common_release(sk);
+}
+
+static int dgram_bind(struct sock *sk, struct sockaddr *uaddr, int len)
+{
+	struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr;
+	struct ieee802154_addr haddr;
+	struct dgram_sock *ro = dgram_sk(sk);
+	int err = -EINVAL;
+	struct net_device *dev;
+
+	lock_sock(sk);
+
+	ro->bound = 0;
+
+	if (len < sizeof(*addr))
+		goto out;
+
+	if (addr->family != AF_IEEE802154)
+		goto out;
+
+	ieee802154_addr_from_sa(&haddr, &addr->addr);
+	dev = ieee802154_get_dev(sock_net(sk), &haddr);
+	if (!dev) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	if (dev->type != ARPHRD_IEEE802154) {
+		err = -ENODEV;
+		goto out_put;
+	}
+
+	ro->src_addr = haddr;
+
+	ro->bound = 1;
+	err = 0;
+out_put:
+	dev_put(dev);
+out:
+	release_sock(sk);
+
+	return err;
+}
+
+static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case SIOCOUTQ:
+	{
+		int amount = sk_wmem_alloc_get(sk);
+
+		return put_user(amount, (int __user *)arg);
+	}
+
+	case SIOCINQ:
+	{
+		struct sk_buff *skb;
+		unsigned long amount;
+
+		amount = 0;
+		spin_lock_bh(&sk->sk_receive_queue.lock);
+		skb = skb_peek(&sk->sk_receive_queue);
+		if (skb != NULL) {
+			/* We will only return the amount
+			 * of this packet since that is all
+			 * that will be read.
+			 */
+			amount = skb->len - ieee802154_hdr_length(skb);
+		}
+		spin_unlock_bh(&sk->sk_receive_queue.lock);
+		return put_user(amount, (int __user *)arg);
+	}
+	}
+
+	return -ENOIOCTLCMD;
+}
+
+/* FIXME: autobind */
+static int dgram_connect(struct sock *sk, struct sockaddr *uaddr,
+			 int len)
+{
+	struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr;
+	struct dgram_sock *ro = dgram_sk(sk);
+	int err = 0;
+
+	if (len < sizeof(*addr))
+		return -EINVAL;
+
+	if (addr->family != AF_IEEE802154)
+		return -EINVAL;
+
+	lock_sock(sk);
+
+	if (!ro->bound) {
+		err = -ENETUNREACH;
+		goto out;
+	}
+
+	ieee802154_addr_from_sa(&ro->dst_addr, &addr->addr);
+	ro->connected = 1;
+
+out:
+	release_sock(sk);
+	return err;
+}
+
+static int dgram_disconnect(struct sock *sk, int flags)
+{
+	struct dgram_sock *ro = dgram_sk(sk);
+
+	lock_sock(sk);
+	ro->connected = 0;
+	release_sock(sk);
+
+	return 0;
+}
+
+static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk,
+			 struct msghdr *msg, size_t size)
+{
+	struct net_device *dev;
+	unsigned int mtu;
+	struct sk_buff *skb;
+	struct ieee802154_mac_cb *cb;
+	struct dgram_sock *ro = dgram_sk(sk);
+	struct ieee802154_addr dst_addr;
+	int hlen, tlen;
+	int err;
+
+	if (msg->msg_flags & MSG_OOB) {
+		pr_debug("msg->msg_flags = 0x%x\n", msg->msg_flags);
+		return -EOPNOTSUPP;
+	}
+
+	if (!ro->connected && !msg->msg_name)
+		return -EDESTADDRREQ;
+	else if (ro->connected && msg->msg_name)
+		return -EISCONN;
+
+	if (!ro->bound)
+		dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154);
+	else
+		dev = ieee802154_get_dev(sock_net(sk), &ro->src_addr);
+
+	if (!dev) {
+		pr_debug("no dev\n");
+		err = -ENXIO;
+		goto out;
+	}
+	mtu = dev->mtu;
+	pr_debug("name = %s, mtu = %u\n", dev->name, mtu);
+
+	if (size > mtu) {
+		pr_debug("size = %Zu, mtu = %u\n", size, mtu);
+		err = -EMSGSIZE;
+		goto out_dev;
+	}
+
+	hlen = LL_RESERVED_SPACE(dev);
+	tlen = dev->needed_tailroom;
+	skb = sock_alloc_send_skb(sk, hlen + tlen + size,
+				  msg->msg_flags & MSG_DONTWAIT,
+				  &err);
+	if (!skb)
+		goto out_dev;
+
+	skb_reserve(skb, hlen);
+
+	skb_reset_network_header(skb);
+
+	cb = mac_cb_init(skb);
+	cb->type = IEEE802154_FC_TYPE_DATA;
+	cb->ackreq = ro->want_ack;
+
+	if (msg->msg_name) {
+		DECLARE_SOCKADDR(struct sockaddr_ieee802154*,
+				 daddr, msg->msg_name);
+
+		ieee802154_addr_from_sa(&dst_addr, &daddr->addr);
+	} else {
+		dst_addr = ro->dst_addr;
+	}
+
+	cb->secen = ro->secen;
+	cb->secen_override = ro->secen_override;
+	cb->seclevel = ro->seclevel;
+	cb->seclevel_override = ro->seclevel_override;
+
+	err = dev_hard_header(skb, dev, ETH_P_IEEE802154, &dst_addr,
+			      ro->bound ? &ro->src_addr : NULL, size);
+	if (err < 0)
+		goto out_skb;
+
+	err = memcpy_from_msg(skb_put(skb, size), msg, size);
+	if (err < 0)
+		goto out_skb;
+
+	skb->dev = dev;
+	skb->sk  = sk;
+	skb->protocol = htons(ETH_P_IEEE802154);
+
+	dev_put(dev);
+
+	err = dev_queue_xmit(skb);
+	if (err > 0)
+		err = net_xmit_errno(err);
+
+	return err ?: size;
+
+out_skb:
+	kfree_skb(skb);
+out_dev:
+	dev_put(dev);
+out:
+	return err;
+}
+
+static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk,
+			 struct msghdr *msg, size_t len, int noblock,
+			 int flags, int *addr_len)
+{
+	size_t copied = 0;
+	int err = -EOPNOTSUPP;
+	struct sk_buff *skb;
+	DECLARE_SOCKADDR(struct sockaddr_ieee802154 *, saddr, msg->msg_name);
+
+	skb = skb_recv_datagram(sk, flags, noblock, &err);
+	if (!skb)
+		goto out;
+
+	copied = skb->len;
+	if (len < copied) {
+		msg->msg_flags |= MSG_TRUNC;
+		copied = len;
+	}
+
+	/* FIXME: skip headers if necessary ?! */
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
+	if (err)
+		goto done;
+
+	sock_recv_ts_and_drops(msg, sk, skb);
+
+	if (saddr) {
+		saddr->family = AF_IEEE802154;
+		ieee802154_addr_to_sa(&saddr->addr, &mac_cb(skb)->source);
+		*addr_len = sizeof(*saddr);
+	}
+
+	if (flags & MSG_TRUNC)
+		copied = skb->len;
+done:
+	skb_free_datagram(sk, skb);
+out:
+	if (err)
+		return err;
+	return copied;
+}
+
+static int dgram_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (!skb)
+		return NET_RX_DROP;
+
+	if (sock_queue_rcv_skb(sk, skb) < 0) {
+		kfree_skb(skb);
+		return NET_RX_DROP;
+	}
+
+	return NET_RX_SUCCESS;
+}
+
+static inline bool
+ieee802154_match_sock(__le64 hw_addr, __le16 pan_id, __le16 short_addr,
+		      struct dgram_sock *ro)
+{
+	if (!ro->bound)
+		return true;
+
+	if (ro->src_addr.mode == IEEE802154_ADDR_LONG &&
+	    hw_addr == ro->src_addr.extended_addr)
+		return true;
+
+	if (ro->src_addr.mode == IEEE802154_ADDR_SHORT &&
+	    pan_id == ro->src_addr.pan_id &&
+	    short_addr == ro->src_addr.short_addr)
+		return true;
+
+	return false;
+}
+
+static int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb)
+{
+	struct sock *sk, *prev = NULL;
+	int ret = NET_RX_SUCCESS;
+	__le16 pan_id, short_addr;
+	__le64 hw_addr;
+
+	/* Data frame processing */
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
+	short_addr = ieee802154_mlme_ops(dev)->get_short_addr(dev);
+	hw_addr = ieee802154_devaddr_from_raw(dev->dev_addr);
+
+	read_lock(&dgram_lock);
+	sk_for_each(sk, &dgram_head) {
+		if (ieee802154_match_sock(hw_addr, pan_id, short_addr,
+					  dgram_sk(sk))) {
+			if (prev) {
+				struct sk_buff *clone;
+
+				clone = skb_clone(skb, GFP_ATOMIC);
+				if (clone)
+					dgram_rcv_skb(prev, clone);
+			}
+
+			prev = sk;
+		}
+	}
+
+	if (prev) {
+		dgram_rcv_skb(prev, skb);
+	} else {
+		kfree_skb(skb);
+		ret = NET_RX_DROP;
+	}
+	read_unlock(&dgram_lock);
+
+	return ret;
+}
+
+static int dgram_getsockopt(struct sock *sk, int level, int optname,
+			    char __user *optval, int __user *optlen)
+{
+	struct dgram_sock *ro = dgram_sk(sk);
+
+	int val, len;
+
+	if (level != SOL_IEEE802154)
+		return -EOPNOTSUPP;
+
+	if (get_user(len, optlen))
+		return -EFAULT;
+
+	len = min_t(unsigned int, len, sizeof(int));
+
+	switch (optname) {
+	case WPAN_WANTACK:
+		val = ro->want_ack;
+		break;
+	case WPAN_SECURITY:
+		if (!ro->secen_override)
+			val = WPAN_SECURITY_DEFAULT;
+		else if (ro->secen)
+			val = WPAN_SECURITY_ON;
+		else
+			val = WPAN_SECURITY_OFF;
+		break;
+	case WPAN_SECURITY_LEVEL:
+		if (!ro->seclevel_override)
+			val = WPAN_SECURITY_LEVEL_DEFAULT;
+		else
+			val = ro->seclevel;
+		break;
+	default:
+		return -ENOPROTOOPT;
+	}
+
+	if (put_user(len, optlen))
+		return -EFAULT;
+	if (copy_to_user(optval, &val, len))
+		return -EFAULT;
+	return 0;
+}
+
+static int dgram_setsockopt(struct sock *sk, int level, int optname,
+			    char __user *optval, unsigned int optlen)
+{
+	struct dgram_sock *ro = dgram_sk(sk);
+	struct net *net = sock_net(sk);
+	int val;
+	int err = 0;
+
+	if (optlen < sizeof(int))
+		return -EINVAL;
+
+	if (get_user(val, (int __user *)optval))
+		return -EFAULT;
+
+	lock_sock(sk);
+
+	switch (optname) {
+	case WPAN_WANTACK:
+		ro->want_ack = !!val;
+		break;
+	case WPAN_SECURITY:
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN) &&
+		    !ns_capable(net->user_ns, CAP_NET_RAW)) {
+			err = -EPERM;
+			break;
+		}
+
+		switch (val) {
+		case WPAN_SECURITY_DEFAULT:
+			ro->secen_override = 0;
+			break;
+		case WPAN_SECURITY_ON:
+			ro->secen_override = 1;
+			ro->secen = 1;
+			break;
+		case WPAN_SECURITY_OFF:
+			ro->secen_override = 1;
+			ro->secen = 0;
+			break;
+		default:
+			err = -EINVAL;
+			break;
+		}
+		break;
+	case WPAN_SECURITY_LEVEL:
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN) &&
+		    !ns_capable(net->user_ns, CAP_NET_RAW)) {
+			err = -EPERM;
+			break;
+		}
+
+		if (val < WPAN_SECURITY_LEVEL_DEFAULT ||
+		    val > IEEE802154_SCF_SECLEVEL_ENC_MIC128) {
+			err = -EINVAL;
+		} else if (val == WPAN_SECURITY_LEVEL_DEFAULT) {
+			ro->seclevel_override = 0;
+		} else {
+			ro->seclevel_override = 1;
+			ro->seclevel = val;
+		}
+		break;
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	release_sock(sk);
+	return err;
+}
+
+static struct proto ieee802154_dgram_prot = {
+	.name		= "IEEE-802.15.4-MAC",
+	.owner		= THIS_MODULE,
+	.obj_size	= sizeof(struct dgram_sock),
+	.init		= dgram_init,
+	.close		= dgram_close,
+	.bind		= dgram_bind,
+	.sendmsg	= dgram_sendmsg,
+	.recvmsg	= dgram_recvmsg,
+	.hash		= dgram_hash,
+	.unhash		= dgram_unhash,
+	.connect	= dgram_connect,
+	.disconnect	= dgram_disconnect,
+	.ioctl		= dgram_ioctl,
+	.getsockopt	= dgram_getsockopt,
+	.setsockopt	= dgram_setsockopt,
+};
+
 static const struct proto_ops ieee802154_dgram_ops = {
 	.family		   = PF_IEEE802154,
 	.owner		   = THIS_MODULE,
diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c
deleted file mode 100644
index d1930b70c4aa..000000000000
--- a/net/ieee802154/dgram.c
+++ /dev/null
@@ -1,549 +0,0 @@
-/*
- * IEEE 802.15.4 dgram socket interface
- *
- * Copyright 2007, 2008 Siemens AG
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * Written by:
- * Sergey Lapin <slapin@ossfans.org>
- * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
- */
-
-#include <linux/capability.h>
-#include <linux/net.h>
-#include <linux/module.h>
-#include <linux/if_arp.h>
-#include <linux/list.h>
-#include <linux/slab.h>
-#include <linux/ieee802154.h>
-#include <net/sock.h>
-#include <net/af_ieee802154.h>
-#include <net/ieee802154_netdev.h>
-
-#include <asm/ioctls.h>
-
-#include "af802154.h"
-
-static HLIST_HEAD(dgram_head);
-static DEFINE_RWLOCK(dgram_lock);
-
-struct dgram_sock {
-	struct sock sk;
-
-	struct ieee802154_addr src_addr;
-	struct ieee802154_addr dst_addr;
-
-	unsigned int bound:1;
-	unsigned int connected:1;
-	unsigned int want_ack:1;
-	unsigned int secen:1;
-	unsigned int secen_override:1;
-	unsigned int seclevel:3;
-	unsigned int seclevel_override:1;
-};
-
-static inline struct dgram_sock *dgram_sk(const struct sock *sk)
-{
-	return container_of(sk, struct dgram_sock, sk);
-}
-
-static void dgram_hash(struct sock *sk)
-{
-	write_lock_bh(&dgram_lock);
-	sk_add_node(sk, &dgram_head);
-	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
-	write_unlock_bh(&dgram_lock);
-}
-
-static void dgram_unhash(struct sock *sk)
-{
-	write_lock_bh(&dgram_lock);
-	if (sk_del_node_init(sk))
-		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
-	write_unlock_bh(&dgram_lock);
-}
-
-static int dgram_init(struct sock *sk)
-{
-	struct dgram_sock *ro = dgram_sk(sk);
-
-	ro->want_ack = 1;
-	return 0;
-}
-
-static void dgram_close(struct sock *sk, long timeout)
-{
-	sk_common_release(sk);
-}
-
-static int dgram_bind(struct sock *sk, struct sockaddr *uaddr, int len)
-{
-	struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr;
-	struct ieee802154_addr haddr;
-	struct dgram_sock *ro = dgram_sk(sk);
-	int err = -EINVAL;
-	struct net_device *dev;
-
-	lock_sock(sk);
-
-	ro->bound = 0;
-
-	if (len < sizeof(*addr))
-		goto out;
-
-	if (addr->family != AF_IEEE802154)
-		goto out;
-
-	ieee802154_addr_from_sa(&haddr, &addr->addr);
-	dev = ieee802154_get_dev(sock_net(sk), &haddr);
-	if (!dev) {
-		err = -ENODEV;
-		goto out;
-	}
-
-	if (dev->type != ARPHRD_IEEE802154) {
-		err = -ENODEV;
-		goto out_put;
-	}
-
-	ro->src_addr = haddr;
-
-	ro->bound = 1;
-	err = 0;
-out_put:
-	dev_put(dev);
-out:
-	release_sock(sk);
-
-	return err;
-}
-
-static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg)
-{
-	switch (cmd) {
-	case SIOCOUTQ:
-	{
-		int amount = sk_wmem_alloc_get(sk);
-
-		return put_user(amount, (int __user *)arg);
-	}
-
-	case SIOCINQ:
-	{
-		struct sk_buff *skb;
-		unsigned long amount;
-
-		amount = 0;
-		spin_lock_bh(&sk->sk_receive_queue.lock);
-		skb = skb_peek(&sk->sk_receive_queue);
-		if (skb != NULL) {
-			/* We will only return the amount
-			 * of this packet since that is all
-			 * that will be read.
-			 */
-			amount = skb->len - ieee802154_hdr_length(skb);
-		}
-		spin_unlock_bh(&sk->sk_receive_queue.lock);
-		return put_user(amount, (int __user *)arg);
-	}
-	}
-
-	return -ENOIOCTLCMD;
-}
-
-/* FIXME: autobind */
-static int dgram_connect(struct sock *sk, struct sockaddr *uaddr,
-			 int len)
-{
-	struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr;
-	struct dgram_sock *ro = dgram_sk(sk);
-	int err = 0;
-
-	if (len < sizeof(*addr))
-		return -EINVAL;
-
-	if (addr->family != AF_IEEE802154)
-		return -EINVAL;
-
-	lock_sock(sk);
-
-	if (!ro->bound) {
-		err = -ENETUNREACH;
-		goto out;
-	}
-
-	ieee802154_addr_from_sa(&ro->dst_addr, &addr->addr);
-	ro->connected = 1;
-
-out:
-	release_sock(sk);
-	return err;
-}
-
-static int dgram_disconnect(struct sock *sk, int flags)
-{
-	struct dgram_sock *ro = dgram_sk(sk);
-
-	lock_sock(sk);
-	ro->connected = 0;
-	release_sock(sk);
-
-	return 0;
-}
-
-static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk,
-			 struct msghdr *msg, size_t size)
-{
-	struct net_device *dev;
-	unsigned int mtu;
-	struct sk_buff *skb;
-	struct ieee802154_mac_cb *cb;
-	struct dgram_sock *ro = dgram_sk(sk);
-	struct ieee802154_addr dst_addr;
-	int hlen, tlen;
-	int err;
-
-	if (msg->msg_flags & MSG_OOB) {
-		pr_debug("msg->msg_flags = 0x%x\n", msg->msg_flags);
-		return -EOPNOTSUPP;
-	}
-
-	if (!ro->connected && !msg->msg_name)
-		return -EDESTADDRREQ;
-	else if (ro->connected && msg->msg_name)
-		return -EISCONN;
-
-	if (!ro->bound)
-		dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154);
-	else
-		dev = ieee802154_get_dev(sock_net(sk), &ro->src_addr);
-
-	if (!dev) {
-		pr_debug("no dev\n");
-		err = -ENXIO;
-		goto out;
-	}
-	mtu = dev->mtu;
-	pr_debug("name = %s, mtu = %u\n", dev->name, mtu);
-
-	if (size > mtu) {
-		pr_debug("size = %Zu, mtu = %u\n", size, mtu);
-		err = -EMSGSIZE;
-		goto out_dev;
-	}
-
-	hlen = LL_RESERVED_SPACE(dev);
-	tlen = dev->needed_tailroom;
-	skb = sock_alloc_send_skb(sk, hlen + tlen + size,
-				  msg->msg_flags & MSG_DONTWAIT,
-				  &err);
-	if (!skb)
-		goto out_dev;
-
-	skb_reserve(skb, hlen);
-
-	skb_reset_network_header(skb);
-
-	cb = mac_cb_init(skb);
-	cb->type = IEEE802154_FC_TYPE_DATA;
-	cb->ackreq = ro->want_ack;
-
-	if (msg->msg_name) {
-		DECLARE_SOCKADDR(struct sockaddr_ieee802154*,
-				 daddr, msg->msg_name);
-
-		ieee802154_addr_from_sa(&dst_addr, &daddr->addr);
-	} else {
-		dst_addr = ro->dst_addr;
-	}
-
-	cb->secen = ro->secen;
-	cb->secen_override = ro->secen_override;
-	cb->seclevel = ro->seclevel;
-	cb->seclevel_override = ro->seclevel_override;
-
-	err = dev_hard_header(skb, dev, ETH_P_IEEE802154, &dst_addr,
-			      ro->bound ? &ro->src_addr : NULL, size);
-	if (err < 0)
-		goto out_skb;
-
-	err = memcpy_from_msg(skb_put(skb, size), msg, size);
-	if (err < 0)
-		goto out_skb;
-
-	skb->dev = dev;
-	skb->sk  = sk;
-	skb->protocol = htons(ETH_P_IEEE802154);
-
-	dev_put(dev);
-
-	err = dev_queue_xmit(skb);
-	if (err > 0)
-		err = net_xmit_errno(err);
-
-	return err ?: size;
-
-out_skb:
-	kfree_skb(skb);
-out_dev:
-	dev_put(dev);
-out:
-	return err;
-}
-
-static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk,
-			 struct msghdr *msg, size_t len, int noblock,
-			 int flags, int *addr_len)
-{
-	size_t copied = 0;
-	int err = -EOPNOTSUPP;
-	struct sk_buff *skb;
-	DECLARE_SOCKADDR(struct sockaddr_ieee802154 *, saddr, msg->msg_name);
-
-	skb = skb_recv_datagram(sk, flags, noblock, &err);
-	if (!skb)
-		goto out;
-
-	copied = skb->len;
-	if (len < copied) {
-		msg->msg_flags |= MSG_TRUNC;
-		copied = len;
-	}
-
-	/* FIXME: skip headers if necessary ?! */
-	err = skb_copy_datagram_msg(skb, 0, msg, copied);
-	if (err)
-		goto done;
-
-	sock_recv_ts_and_drops(msg, sk, skb);
-
-	if (saddr) {
-		saddr->family = AF_IEEE802154;
-		ieee802154_addr_to_sa(&saddr->addr, &mac_cb(skb)->source);
-		*addr_len = sizeof(*saddr);
-	}
-
-	if (flags & MSG_TRUNC)
-		copied = skb->len;
-done:
-	skb_free_datagram(sk, skb);
-out:
-	if (err)
-		return err;
-	return copied;
-}
-
-static int dgram_rcv_skb(struct sock *sk, struct sk_buff *skb)
-{
-	skb = skb_share_check(skb, GFP_ATOMIC);
-	if (!skb)
-		return NET_RX_DROP;
-
-	if (sock_queue_rcv_skb(sk, skb) < 0) {
-		kfree_skb(skb);
-		return NET_RX_DROP;
-	}
-
-	return NET_RX_SUCCESS;
-}
-
-static inline bool
-ieee802154_match_sock(__le64 hw_addr, __le16 pan_id, __le16 short_addr,
-		      struct dgram_sock *ro)
-{
-	if (!ro->bound)
-		return true;
-
-	if (ro->src_addr.mode == IEEE802154_ADDR_LONG &&
-	    hw_addr == ro->src_addr.extended_addr)
-		return true;
-
-	if (ro->src_addr.mode == IEEE802154_ADDR_SHORT &&
-	    pan_id == ro->src_addr.pan_id &&
-	    short_addr == ro->src_addr.short_addr)
-		return true;
-
-	return false;
-}
-
-int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb)
-{
-	struct sock *sk, *prev = NULL;
-	int ret = NET_RX_SUCCESS;
-	__le16 pan_id, short_addr;
-	__le64 hw_addr;
-
-	/* Data frame processing */
-	BUG_ON(dev->type != ARPHRD_IEEE802154);
-
-	pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
-	short_addr = ieee802154_mlme_ops(dev)->get_short_addr(dev);
-	hw_addr = ieee802154_devaddr_from_raw(dev->dev_addr);
-
-	read_lock(&dgram_lock);
-	sk_for_each(sk, &dgram_head) {
-		if (ieee802154_match_sock(hw_addr, pan_id, short_addr,
-					  dgram_sk(sk))) {
-			if (prev) {
-				struct sk_buff *clone;
-
-				clone = skb_clone(skb, GFP_ATOMIC);
-				if (clone)
-					dgram_rcv_skb(prev, clone);
-			}
-
-			prev = sk;
-		}
-	}
-
-	if (prev) {
-		dgram_rcv_skb(prev, skb);
-	} else {
-		kfree_skb(skb);
-		ret = NET_RX_DROP;
-	}
-	read_unlock(&dgram_lock);
-
-	return ret;
-}
-
-static int dgram_getsockopt(struct sock *sk, int level, int optname,
-			    char __user *optval, int __user *optlen)
-{
-	struct dgram_sock *ro = dgram_sk(sk);
-
-	int val, len;
-
-	if (level != SOL_IEEE802154)
-		return -EOPNOTSUPP;
-
-	if (get_user(len, optlen))
-		return -EFAULT;
-
-	len = min_t(unsigned int, len, sizeof(int));
-
-	switch (optname) {
-	case WPAN_WANTACK:
-		val = ro->want_ack;
-		break;
-	case WPAN_SECURITY:
-		if (!ro->secen_override)
-			val = WPAN_SECURITY_DEFAULT;
-		else if (ro->secen)
-			val = WPAN_SECURITY_ON;
-		else
-			val = WPAN_SECURITY_OFF;
-		break;
-	case WPAN_SECURITY_LEVEL:
-		if (!ro->seclevel_override)
-			val = WPAN_SECURITY_LEVEL_DEFAULT;
-		else
-			val = ro->seclevel;
-		break;
-	default:
-		return -ENOPROTOOPT;
-	}
-
-	if (put_user(len, optlen))
-		return -EFAULT;
-	if (copy_to_user(optval, &val, len))
-		return -EFAULT;
-	return 0;
-}
-
-static int dgram_setsockopt(struct sock *sk, int level, int optname,
-			    char __user *optval, unsigned int optlen)
-{
-	struct dgram_sock *ro = dgram_sk(sk);
-	struct net *net = sock_net(sk);
-	int val;
-	int err = 0;
-
-	if (optlen < sizeof(int))
-		return -EINVAL;
-
-	if (get_user(val, (int __user *)optval))
-		return -EFAULT;
-
-	lock_sock(sk);
-
-	switch (optname) {
-	case WPAN_WANTACK:
-		ro->want_ack = !!val;
-		break;
-	case WPAN_SECURITY:
-		if (!ns_capable(net->user_ns, CAP_NET_ADMIN) &&
-		    !ns_capable(net->user_ns, CAP_NET_RAW)) {
-			err = -EPERM;
-			break;
-		}
-
-		switch (val) {
-		case WPAN_SECURITY_DEFAULT:
-			ro->secen_override = 0;
-			break;
-		case WPAN_SECURITY_ON:
-			ro->secen_override = 1;
-			ro->secen = 1;
-			break;
-		case WPAN_SECURITY_OFF:
-			ro->secen_override = 1;
-			ro->secen = 0;
-			break;
-		default:
-			err = -EINVAL;
-			break;
-		}
-		break;
-	case WPAN_SECURITY_LEVEL:
-		if (!ns_capable(net->user_ns, CAP_NET_ADMIN) &&
-		    !ns_capable(net->user_ns, CAP_NET_RAW)) {
-			err = -EPERM;
-			break;
-		}
-
-		if (val < WPAN_SECURITY_LEVEL_DEFAULT ||
-		    val > IEEE802154_SCF_SECLEVEL_ENC_MIC128) {
-			err = -EINVAL;
-		} else if (val == WPAN_SECURITY_LEVEL_DEFAULT) {
-			ro->seclevel_override = 0;
-		} else {
-			ro->seclevel_override = 1;
-			ro->seclevel = val;
-		}
-		break;
-	default:
-		err = -ENOPROTOOPT;
-		break;
-	}
-
-	release_sock(sk);
-	return err;
-}
-
-struct proto ieee802154_dgram_prot = {
-	.name		= "IEEE-802.15.4-MAC",
-	.owner		= THIS_MODULE,
-	.obj_size	= sizeof(struct dgram_sock),
-	.init		= dgram_init,
-	.close		= dgram_close,
-	.bind		= dgram_bind,
-	.sendmsg	= dgram_sendmsg,
-	.recvmsg	= dgram_recvmsg,
-	.hash		= dgram_hash,
-	.unhash		= dgram_unhash,
-	.connect	= dgram_connect,
-	.disconnect	= dgram_disconnect,
-	.ioctl		= dgram_ioctl,
-	.getsockopt	= dgram_getsockopt,
-	.setsockopt	= dgram_setsockopt,
-};
-
diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c
deleted file mode 100644
index 1674b115c891..000000000000
--- a/net/ieee802154/raw.c
+++ /dev/null
@@ -1,270 +0,0 @@
-/*
- * Raw IEEE 802.15.4 sockets
- *
- * Copyright 2007, 2008 Siemens AG
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * Written by:
- * Sergey Lapin <slapin@ossfans.org>
- * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
- */
-
-#include <linux/net.h>
-#include <linux/module.h>
-#include <linux/if_arp.h>
-#include <linux/list.h>
-#include <linux/slab.h>
-#include <net/sock.h>
-#include <net/af_ieee802154.h>
-#include <net/ieee802154_netdev.h>
-
-#include "af802154.h"
-
-static HLIST_HEAD(raw_head);
-static DEFINE_RWLOCK(raw_lock);
-
-static void raw_hash(struct sock *sk)
-{
-	write_lock_bh(&raw_lock);
-	sk_add_node(sk, &raw_head);
-	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
-	write_unlock_bh(&raw_lock);
-}
-
-static void raw_unhash(struct sock *sk)
-{
-	write_lock_bh(&raw_lock);
-	if (sk_del_node_init(sk))
-		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
-	write_unlock_bh(&raw_lock);
-}
-
-static void raw_close(struct sock *sk, long timeout)
-{
-	sk_common_release(sk);
-}
-
-static int raw_bind(struct sock *sk, struct sockaddr *_uaddr, int len)
-{
-	struct ieee802154_addr addr;
-	struct sockaddr_ieee802154 *uaddr = (struct sockaddr_ieee802154 *)_uaddr;
-	int err = 0;
-	struct net_device *dev = NULL;
-
-	if (len < sizeof(*uaddr))
-		return -EINVAL;
-
-	uaddr = (struct sockaddr_ieee802154 *)_uaddr;
-	if (uaddr->family != AF_IEEE802154)
-		return -EINVAL;
-
-	lock_sock(sk);
-
-	ieee802154_addr_from_sa(&addr, &uaddr->addr);
-	dev = ieee802154_get_dev(sock_net(sk), &addr);
-	if (!dev) {
-		err = -ENODEV;
-		goto out;
-	}
-
-	if (dev->type != ARPHRD_IEEE802154) {
-		err = -ENODEV;
-		goto out_put;
-	}
-
-	sk->sk_bound_dev_if = dev->ifindex;
-	sk_dst_reset(sk);
-
-out_put:
-	dev_put(dev);
-out:
-	release_sock(sk);
-
-	return err;
-}
-
-static int raw_connect(struct sock *sk, struct sockaddr *uaddr,
-		       int addr_len)
-{
-	return -ENOTSUPP;
-}
-
-static int raw_disconnect(struct sock *sk, int flags)
-{
-	return 0;
-}
-
-static int raw_sendmsg(struct kiocb *iocb, struct sock *sk,
-		       struct msghdr *msg, size_t size)
-{
-	struct net_device *dev;
-	unsigned int mtu;
-	struct sk_buff *skb;
-	int hlen, tlen;
-	int err;
-
-	if (msg->msg_flags & MSG_OOB) {
-		pr_debug("msg->msg_flags = 0x%x\n", msg->msg_flags);
-		return -EOPNOTSUPP;
-	}
-
-	lock_sock(sk);
-	if (!sk->sk_bound_dev_if)
-		dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154);
-	else
-		dev = dev_get_by_index(sock_net(sk), sk->sk_bound_dev_if);
-	release_sock(sk);
-
-	if (!dev) {
-		pr_debug("no dev\n");
-		err = -ENXIO;
-		goto out;
-	}
-
-	mtu = dev->mtu;
-	pr_debug("name = %s, mtu = %u\n", dev->name, mtu);
-
-	if (size > mtu) {
-		pr_debug("size = %Zu, mtu = %u\n", size, mtu);
-		err = -EINVAL;
-		goto out_dev;
-	}
-
-	hlen = LL_RESERVED_SPACE(dev);
-	tlen = dev->needed_tailroom;
-	skb = sock_alloc_send_skb(sk, hlen + tlen + size,
-				  msg->msg_flags & MSG_DONTWAIT, &err);
-	if (!skb)
-		goto out_dev;
-
-	skb_reserve(skb, hlen);
-
-	skb_reset_mac_header(skb);
-	skb_reset_network_header(skb);
-
-	err = memcpy_from_msg(skb_put(skb, size), msg, size);
-	if (err < 0)
-		goto out_skb;
-
-	skb->dev = dev;
-	skb->sk  = sk;
-	skb->protocol = htons(ETH_P_IEEE802154);
-
-	dev_put(dev);
-
-	err = dev_queue_xmit(skb);
-	if (err > 0)
-		err = net_xmit_errno(err);
-
-	return err ?: size;
-
-out_skb:
-	kfree_skb(skb);
-out_dev:
-	dev_put(dev);
-out:
-	return err;
-}
-
-static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
-		       size_t len, int noblock, int flags, int *addr_len)
-{
-	size_t copied = 0;
-	int err = -EOPNOTSUPP;
-	struct sk_buff *skb;
-
-	skb = skb_recv_datagram(sk, flags, noblock, &err);
-	if (!skb)
-		goto out;
-
-	copied = skb->len;
-	if (len < copied) {
-		msg->msg_flags |= MSG_TRUNC;
-		copied = len;
-	}
-
-	err = skb_copy_datagram_msg(skb, 0, msg, copied);
-	if (err)
-		goto done;
-
-	sock_recv_ts_and_drops(msg, sk, skb);
-
-	if (flags & MSG_TRUNC)
-		copied = skb->len;
-done:
-	skb_free_datagram(sk, skb);
-out:
-	if (err)
-		return err;
-	return copied;
-}
-
-static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
-{
-	skb = skb_share_check(skb, GFP_ATOMIC);
-	if (!skb)
-		return NET_RX_DROP;
-
-	if (sock_queue_rcv_skb(sk, skb) < 0) {
-		kfree_skb(skb);
-		return NET_RX_DROP;
-	}
-
-	return NET_RX_SUCCESS;
-}
-
-void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb)
-{
-	struct sock *sk;
-
-	read_lock(&raw_lock);
-	sk_for_each(sk, &raw_head) {
-		bh_lock_sock(sk);
-		if (!sk->sk_bound_dev_if ||
-		    sk->sk_bound_dev_if == dev->ifindex) {
-			struct sk_buff *clone;
-
-			clone = skb_clone(skb, GFP_ATOMIC);
-			if (clone)
-				raw_rcv_skb(sk, clone);
-		}
-		bh_unlock_sock(sk);
-	}
-	read_unlock(&raw_lock);
-}
-
-static int raw_getsockopt(struct sock *sk, int level, int optname,
-			  char __user *optval, int __user *optlen)
-{
-	return -EOPNOTSUPP;
-}
-
-static int raw_setsockopt(struct sock *sk, int level, int optname,
-			  char __user *optval, unsigned int optlen)
-{
-	return -EOPNOTSUPP;
-}
-
-struct proto ieee802154_raw_prot = {
-	.name		= "IEEE-802.15.4-RAW",
-	.owner		= THIS_MODULE,
-	.obj_size	= sizeof(struct sock),
-	.close		= raw_close,
-	.bind		= raw_bind,
-	.sendmsg	= raw_sendmsg,
-	.recvmsg	= raw_recvmsg,
-	.hash		= raw_hash,
-	.unhash		= raw_unhash,
-	.connect	= raw_connect,
-	.disconnect	= raw_disconnect,
-	.getsockopt	= raw_getsockopt,
-	.setsockopt	= raw_setsockopt,
-};
-- 
cgit v1.2.3


From df2f65de4b2d535c5b8e905768dd5fff1bfca445 Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Wed, 31 Dec 2014 19:39:11 +0100
Subject: ieee802154: socket: fix checkpatch issue

This patch solves the following checkpatch issue:

CHECK: Comparison to NULL could be written "skb"
+		if (skb != NULL) {

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/ieee802154/af_ieee802154.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c
index 64beb0b3c560..2878d8ca6d3b 100644
--- a/net/ieee802154/af_ieee802154.c
+++ b/net/ieee802154/af_ieee802154.c
@@ -560,7 +560,7 @@ static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg)
 		amount = 0;
 		spin_lock_bh(&sk->sk_receive_queue.lock);
 		skb = skb_peek(&sk->sk_receive_queue);
-		if (skb != NULL) {
+		if (skb) {
 			/* We will only return the amount
 			 * of this packet since that is all
 			 * that will be read.
-- 
cgit v1.2.3


From 71e36b1b01b9573f09141acb76e1b4c0809a0b4a Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Wed, 31 Dec 2014 19:39:12 +0100
Subject: ieee802154: rename af_ieee802154.c to socket.c

This patch renames the "af_ieee802154.c" to "socket.c". This is just a
cleanup to have a short name for it which describes the implementationm
stuff more human understandable.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/ieee802154/Makefile        |    2 +-
 net/ieee802154/af_ieee802154.c | 1125 ----------------------------------------
 net/ieee802154/socket.c        | 1125 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 1126 insertions(+), 1126 deletions(-)
 delete mode 100644 net/ieee802154/af_ieee802154.c
 create mode 100644 net/ieee802154/socket.c

(limited to 'net')

diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile
index e6ddfcd73510..88bde78754c5 100644
--- a/net/ieee802154/Makefile
+++ b/net/ieee802154/Makefile
@@ -5,6 +5,6 @@ obj-$(CONFIG_IEEE802154_6LOWPAN) += ieee802154_6lowpan.o
 ieee802154_6lowpan-y := 6lowpan_rtnl.o reassembly.o
 ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o core.o \
                 header_ops.o sysfs.o nl802154.o
-ieee802154_socket-y := af_ieee802154.o
+ieee802154_socket-y := socket.o
 
 ccflags-y += -D__CHECK_ENDIAN__
diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c
deleted file mode 100644
index 2878d8ca6d3b..000000000000
--- a/net/ieee802154/af_ieee802154.c
+++ /dev/null
@@ -1,1125 +0,0 @@
-/*
- * IEEE802154.4 socket interface
- *
- * Copyright 2007, 2008 Siemens AG
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * Written by:
- * Sergey Lapin <slapin@ossfans.org>
- * Maxim Gorbachyov <maxim.gorbachev@siemens.com>
- */
-
-#include <linux/net.h>
-#include <linux/capability.h>
-#include <linux/module.h>
-#include <linux/if_arp.h>
-#include <linux/if.h>
-#include <linux/termios.h>	/* For TIOCOUTQ/INQ */
-#include <linux/list.h>
-#include <linux/slab.h>
-#include <net/datalink.h>
-#include <net/psnap.h>
-#include <net/sock.h>
-#include <net/tcp_states.h>
-#include <net/route.h>
-
-#include <net/af_ieee802154.h>
-#include <net/ieee802154_netdev.h>
-
-/* Utility function for families */
-static struct net_device*
-ieee802154_get_dev(struct net *net, const struct ieee802154_addr *addr)
-{
-	struct net_device *dev = NULL;
-	struct net_device *tmp;
-	__le16 pan_id, short_addr;
-	u8 hwaddr[IEEE802154_ADDR_LEN];
-
-	switch (addr->mode) {
-	case IEEE802154_ADDR_LONG:
-		ieee802154_devaddr_to_raw(hwaddr, addr->extended_addr);
-		rcu_read_lock();
-		dev = dev_getbyhwaddr_rcu(net, ARPHRD_IEEE802154, hwaddr);
-		if (dev)
-			dev_hold(dev);
-		rcu_read_unlock();
-		break;
-	case IEEE802154_ADDR_SHORT:
-		if (addr->pan_id == cpu_to_le16(IEEE802154_PANID_BROADCAST) ||
-		    addr->short_addr == cpu_to_le16(IEEE802154_ADDR_UNDEF) ||
-		    addr->short_addr == cpu_to_le16(IEEE802154_ADDR_BROADCAST))
-			break;
-
-		rtnl_lock();
-
-		for_each_netdev(net, tmp) {
-			if (tmp->type != ARPHRD_IEEE802154)
-				continue;
-
-			pan_id = ieee802154_mlme_ops(tmp)->get_pan_id(tmp);
-			short_addr =
-				ieee802154_mlme_ops(tmp)->get_short_addr(tmp);
-
-			if (pan_id == addr->pan_id &&
-			    short_addr == addr->short_addr) {
-				dev = tmp;
-				dev_hold(dev);
-				break;
-			}
-		}
-
-		rtnl_unlock();
-		break;
-	default:
-		pr_warn("Unsupported ieee802154 address type: %d\n",
-			addr->mode);
-		break;
-	}
-
-	return dev;
-}
-
-static int ieee802154_sock_release(struct socket *sock)
-{
-	struct sock *sk = sock->sk;
-
-	if (sk) {
-		sock->sk = NULL;
-		sk->sk_prot->close(sk, 0);
-	}
-	return 0;
-}
-
-static int ieee802154_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
-				   struct msghdr *msg, size_t len)
-{
-	struct sock *sk = sock->sk;
-
-	return sk->sk_prot->sendmsg(iocb, sk, msg, len);
-}
-
-static int ieee802154_sock_bind(struct socket *sock, struct sockaddr *uaddr,
-				int addr_len)
-{
-	struct sock *sk = sock->sk;
-
-	if (sk->sk_prot->bind)
-		return sk->sk_prot->bind(sk, uaddr, addr_len);
-
-	return sock_no_bind(sock, uaddr, addr_len);
-}
-
-static int ieee802154_sock_connect(struct socket *sock, struct sockaddr *uaddr,
-				   int addr_len, int flags)
-{
-	struct sock *sk = sock->sk;
-
-	if (addr_len < sizeof(uaddr->sa_family))
-		return -EINVAL;
-
-	if (uaddr->sa_family == AF_UNSPEC)
-		return sk->sk_prot->disconnect(sk, flags);
-
-	return sk->sk_prot->connect(sk, uaddr, addr_len);
-}
-
-static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg,
-				unsigned int cmd)
-{
-	struct ifreq ifr;
-	int ret = -ENOIOCTLCMD;
-	struct net_device *dev;
-
-	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
-		return -EFAULT;
-
-	ifr.ifr_name[IFNAMSIZ-1] = 0;
-
-	dev_load(sock_net(sk), ifr.ifr_name);
-	dev = dev_get_by_name(sock_net(sk), ifr.ifr_name);
-
-	if (!dev)
-		return -ENODEV;
-
-	if (dev->type == ARPHRD_IEEE802154 && dev->netdev_ops->ndo_do_ioctl)
-		ret = dev->netdev_ops->ndo_do_ioctl(dev, &ifr, cmd);
-
-	if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq)))
-		ret = -EFAULT;
-	dev_put(dev);
-
-	return ret;
-}
-
-static int ieee802154_sock_ioctl(struct socket *sock, unsigned int cmd,
-				 unsigned long arg)
-{
-	struct sock *sk = sock->sk;
-
-	switch (cmd) {
-	case SIOCGSTAMP:
-		return sock_get_timestamp(sk, (struct timeval __user *)arg);
-	case SIOCGSTAMPNS:
-		return sock_get_timestampns(sk, (struct timespec __user *)arg);
-	case SIOCGIFADDR:
-	case SIOCSIFADDR:
-		return ieee802154_dev_ioctl(sk, (struct ifreq __user *)arg,
-				cmd);
-	default:
-		if (!sk->sk_prot->ioctl)
-			return -ENOIOCTLCMD;
-		return sk->sk_prot->ioctl(sk, cmd, arg);
-	}
-}
-
-/* RAW Sockets (802.15.4 created in userspace) */
-static HLIST_HEAD(raw_head);
-static DEFINE_RWLOCK(raw_lock);
-
-static void raw_hash(struct sock *sk)
-{
-	write_lock_bh(&raw_lock);
-	sk_add_node(sk, &raw_head);
-	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
-	write_unlock_bh(&raw_lock);
-}
-
-static void raw_unhash(struct sock *sk)
-{
-	write_lock_bh(&raw_lock);
-	if (sk_del_node_init(sk))
-		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
-	write_unlock_bh(&raw_lock);
-}
-
-static void raw_close(struct sock *sk, long timeout)
-{
-	sk_common_release(sk);
-}
-
-static int raw_bind(struct sock *sk, struct sockaddr *_uaddr, int len)
-{
-	struct ieee802154_addr addr;
-	struct sockaddr_ieee802154 *uaddr = (struct sockaddr_ieee802154 *)_uaddr;
-	int err = 0;
-	struct net_device *dev = NULL;
-
-	if (len < sizeof(*uaddr))
-		return -EINVAL;
-
-	uaddr = (struct sockaddr_ieee802154 *)_uaddr;
-	if (uaddr->family != AF_IEEE802154)
-		return -EINVAL;
-
-	lock_sock(sk);
-
-	ieee802154_addr_from_sa(&addr, &uaddr->addr);
-	dev = ieee802154_get_dev(sock_net(sk), &addr);
-	if (!dev) {
-		err = -ENODEV;
-		goto out;
-	}
-
-	if (dev->type != ARPHRD_IEEE802154) {
-		err = -ENODEV;
-		goto out_put;
-	}
-
-	sk->sk_bound_dev_if = dev->ifindex;
-	sk_dst_reset(sk);
-
-out_put:
-	dev_put(dev);
-out:
-	release_sock(sk);
-
-	return err;
-}
-
-static int raw_connect(struct sock *sk, struct sockaddr *uaddr,
-		       int addr_len)
-{
-	return -ENOTSUPP;
-}
-
-static int raw_disconnect(struct sock *sk, int flags)
-{
-	return 0;
-}
-
-static int raw_sendmsg(struct kiocb *iocb, struct sock *sk,
-		       struct msghdr *msg, size_t size)
-{
-	struct net_device *dev;
-	unsigned int mtu;
-	struct sk_buff *skb;
-	int hlen, tlen;
-	int err;
-
-	if (msg->msg_flags & MSG_OOB) {
-		pr_debug("msg->msg_flags = 0x%x\n", msg->msg_flags);
-		return -EOPNOTSUPP;
-	}
-
-	lock_sock(sk);
-	if (!sk->sk_bound_dev_if)
-		dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154);
-	else
-		dev = dev_get_by_index(sock_net(sk), sk->sk_bound_dev_if);
-	release_sock(sk);
-
-	if (!dev) {
-		pr_debug("no dev\n");
-		err = -ENXIO;
-		goto out;
-	}
-
-	mtu = dev->mtu;
-	pr_debug("name = %s, mtu = %u\n", dev->name, mtu);
-
-	if (size > mtu) {
-		pr_debug("size = %Zu, mtu = %u\n", size, mtu);
-		err = -EINVAL;
-		goto out_dev;
-	}
-
-	hlen = LL_RESERVED_SPACE(dev);
-	tlen = dev->needed_tailroom;
-	skb = sock_alloc_send_skb(sk, hlen + tlen + size,
-				  msg->msg_flags & MSG_DONTWAIT, &err);
-	if (!skb)
-		goto out_dev;
-
-	skb_reserve(skb, hlen);
-
-	skb_reset_mac_header(skb);
-	skb_reset_network_header(skb);
-
-	err = memcpy_from_msg(skb_put(skb, size), msg, size);
-	if (err < 0)
-		goto out_skb;
-
-	skb->dev = dev;
-	skb->sk  = sk;
-	skb->protocol = htons(ETH_P_IEEE802154);
-
-	dev_put(dev);
-
-	err = dev_queue_xmit(skb);
-	if (err > 0)
-		err = net_xmit_errno(err);
-
-	return err ?: size;
-
-out_skb:
-	kfree_skb(skb);
-out_dev:
-	dev_put(dev);
-out:
-	return err;
-}
-
-static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
-		       size_t len, int noblock, int flags, int *addr_len)
-{
-	size_t copied = 0;
-	int err = -EOPNOTSUPP;
-	struct sk_buff *skb;
-
-	skb = skb_recv_datagram(sk, flags, noblock, &err);
-	if (!skb)
-		goto out;
-
-	copied = skb->len;
-	if (len < copied) {
-		msg->msg_flags |= MSG_TRUNC;
-		copied = len;
-	}
-
-	err = skb_copy_datagram_msg(skb, 0, msg, copied);
-	if (err)
-		goto done;
-
-	sock_recv_ts_and_drops(msg, sk, skb);
-
-	if (flags & MSG_TRUNC)
-		copied = skb->len;
-done:
-	skb_free_datagram(sk, skb);
-out:
-	if (err)
-		return err;
-	return copied;
-}
-
-static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
-{
-	skb = skb_share_check(skb, GFP_ATOMIC);
-	if (!skb)
-		return NET_RX_DROP;
-
-	if (sock_queue_rcv_skb(sk, skb) < 0) {
-		kfree_skb(skb);
-		return NET_RX_DROP;
-	}
-
-	return NET_RX_SUCCESS;
-}
-
-static void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb)
-{
-	struct sock *sk;
-
-	read_lock(&raw_lock);
-	sk_for_each(sk, &raw_head) {
-		bh_lock_sock(sk);
-		if (!sk->sk_bound_dev_if ||
-		    sk->sk_bound_dev_if == dev->ifindex) {
-			struct sk_buff *clone;
-
-			clone = skb_clone(skb, GFP_ATOMIC);
-			if (clone)
-				raw_rcv_skb(sk, clone);
-		}
-		bh_unlock_sock(sk);
-	}
-	read_unlock(&raw_lock);
-}
-
-static int raw_getsockopt(struct sock *sk, int level, int optname,
-			  char __user *optval, int __user *optlen)
-{
-	return -EOPNOTSUPP;
-}
-
-static int raw_setsockopt(struct sock *sk, int level, int optname,
-			  char __user *optval, unsigned int optlen)
-{
-	return -EOPNOTSUPP;
-}
-
-static struct proto ieee802154_raw_prot = {
-	.name		= "IEEE-802.15.4-RAW",
-	.owner		= THIS_MODULE,
-	.obj_size	= sizeof(struct sock),
-	.close		= raw_close,
-	.bind		= raw_bind,
-	.sendmsg	= raw_sendmsg,
-	.recvmsg	= raw_recvmsg,
-	.hash		= raw_hash,
-	.unhash		= raw_unhash,
-	.connect	= raw_connect,
-	.disconnect	= raw_disconnect,
-	.getsockopt	= raw_getsockopt,
-	.setsockopt	= raw_setsockopt,
-};
-
-static const struct proto_ops ieee802154_raw_ops = {
-	.family		   = PF_IEEE802154,
-	.owner		   = THIS_MODULE,
-	.release	   = ieee802154_sock_release,
-	.bind		   = ieee802154_sock_bind,
-	.connect	   = ieee802154_sock_connect,
-	.socketpair	   = sock_no_socketpair,
-	.accept		   = sock_no_accept,
-	.getname	   = sock_no_getname,
-	.poll		   = datagram_poll,
-	.ioctl		   = ieee802154_sock_ioctl,
-	.listen		   = sock_no_listen,
-	.shutdown	   = sock_no_shutdown,
-	.setsockopt	   = sock_common_setsockopt,
-	.getsockopt	   = sock_common_getsockopt,
-	.sendmsg	   = ieee802154_sock_sendmsg,
-	.recvmsg	   = sock_common_recvmsg,
-	.mmap		   = sock_no_mmap,
-	.sendpage	   = sock_no_sendpage,
-#ifdef CONFIG_COMPAT
-	.compat_setsockopt = compat_sock_common_setsockopt,
-	.compat_getsockopt = compat_sock_common_getsockopt,
-#endif
-};
-
-/* DGRAM Sockets (802.15.4 dataframes) */
-static HLIST_HEAD(dgram_head);
-static DEFINE_RWLOCK(dgram_lock);
-
-struct dgram_sock {
-	struct sock sk;
-
-	struct ieee802154_addr src_addr;
-	struct ieee802154_addr dst_addr;
-
-	unsigned int bound:1;
-	unsigned int connected:1;
-	unsigned int want_ack:1;
-	unsigned int secen:1;
-	unsigned int secen_override:1;
-	unsigned int seclevel:3;
-	unsigned int seclevel_override:1;
-};
-
-static inline struct dgram_sock *dgram_sk(const struct sock *sk)
-{
-	return container_of(sk, struct dgram_sock, sk);
-}
-
-static void dgram_hash(struct sock *sk)
-{
-	write_lock_bh(&dgram_lock);
-	sk_add_node(sk, &dgram_head);
-	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
-	write_unlock_bh(&dgram_lock);
-}
-
-static void dgram_unhash(struct sock *sk)
-{
-	write_lock_bh(&dgram_lock);
-	if (sk_del_node_init(sk))
-		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
-	write_unlock_bh(&dgram_lock);
-}
-
-static int dgram_init(struct sock *sk)
-{
-	struct dgram_sock *ro = dgram_sk(sk);
-
-	ro->want_ack = 1;
-	return 0;
-}
-
-static void dgram_close(struct sock *sk, long timeout)
-{
-	sk_common_release(sk);
-}
-
-static int dgram_bind(struct sock *sk, struct sockaddr *uaddr, int len)
-{
-	struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr;
-	struct ieee802154_addr haddr;
-	struct dgram_sock *ro = dgram_sk(sk);
-	int err = -EINVAL;
-	struct net_device *dev;
-
-	lock_sock(sk);
-
-	ro->bound = 0;
-
-	if (len < sizeof(*addr))
-		goto out;
-
-	if (addr->family != AF_IEEE802154)
-		goto out;
-
-	ieee802154_addr_from_sa(&haddr, &addr->addr);
-	dev = ieee802154_get_dev(sock_net(sk), &haddr);
-	if (!dev) {
-		err = -ENODEV;
-		goto out;
-	}
-
-	if (dev->type != ARPHRD_IEEE802154) {
-		err = -ENODEV;
-		goto out_put;
-	}
-
-	ro->src_addr = haddr;
-
-	ro->bound = 1;
-	err = 0;
-out_put:
-	dev_put(dev);
-out:
-	release_sock(sk);
-
-	return err;
-}
-
-static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg)
-{
-	switch (cmd) {
-	case SIOCOUTQ:
-	{
-		int amount = sk_wmem_alloc_get(sk);
-
-		return put_user(amount, (int __user *)arg);
-	}
-
-	case SIOCINQ:
-	{
-		struct sk_buff *skb;
-		unsigned long amount;
-
-		amount = 0;
-		spin_lock_bh(&sk->sk_receive_queue.lock);
-		skb = skb_peek(&sk->sk_receive_queue);
-		if (skb) {
-			/* We will only return the amount
-			 * of this packet since that is all
-			 * that will be read.
-			 */
-			amount = skb->len - ieee802154_hdr_length(skb);
-		}
-		spin_unlock_bh(&sk->sk_receive_queue.lock);
-		return put_user(amount, (int __user *)arg);
-	}
-	}
-
-	return -ENOIOCTLCMD;
-}
-
-/* FIXME: autobind */
-static int dgram_connect(struct sock *sk, struct sockaddr *uaddr,
-			 int len)
-{
-	struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr;
-	struct dgram_sock *ro = dgram_sk(sk);
-	int err = 0;
-
-	if (len < sizeof(*addr))
-		return -EINVAL;
-
-	if (addr->family != AF_IEEE802154)
-		return -EINVAL;
-
-	lock_sock(sk);
-
-	if (!ro->bound) {
-		err = -ENETUNREACH;
-		goto out;
-	}
-
-	ieee802154_addr_from_sa(&ro->dst_addr, &addr->addr);
-	ro->connected = 1;
-
-out:
-	release_sock(sk);
-	return err;
-}
-
-static int dgram_disconnect(struct sock *sk, int flags)
-{
-	struct dgram_sock *ro = dgram_sk(sk);
-
-	lock_sock(sk);
-	ro->connected = 0;
-	release_sock(sk);
-
-	return 0;
-}
-
-static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk,
-			 struct msghdr *msg, size_t size)
-{
-	struct net_device *dev;
-	unsigned int mtu;
-	struct sk_buff *skb;
-	struct ieee802154_mac_cb *cb;
-	struct dgram_sock *ro = dgram_sk(sk);
-	struct ieee802154_addr dst_addr;
-	int hlen, tlen;
-	int err;
-
-	if (msg->msg_flags & MSG_OOB) {
-		pr_debug("msg->msg_flags = 0x%x\n", msg->msg_flags);
-		return -EOPNOTSUPP;
-	}
-
-	if (!ro->connected && !msg->msg_name)
-		return -EDESTADDRREQ;
-	else if (ro->connected && msg->msg_name)
-		return -EISCONN;
-
-	if (!ro->bound)
-		dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154);
-	else
-		dev = ieee802154_get_dev(sock_net(sk), &ro->src_addr);
-
-	if (!dev) {
-		pr_debug("no dev\n");
-		err = -ENXIO;
-		goto out;
-	}
-	mtu = dev->mtu;
-	pr_debug("name = %s, mtu = %u\n", dev->name, mtu);
-
-	if (size > mtu) {
-		pr_debug("size = %Zu, mtu = %u\n", size, mtu);
-		err = -EMSGSIZE;
-		goto out_dev;
-	}
-
-	hlen = LL_RESERVED_SPACE(dev);
-	tlen = dev->needed_tailroom;
-	skb = sock_alloc_send_skb(sk, hlen + tlen + size,
-				  msg->msg_flags & MSG_DONTWAIT,
-				  &err);
-	if (!skb)
-		goto out_dev;
-
-	skb_reserve(skb, hlen);
-
-	skb_reset_network_header(skb);
-
-	cb = mac_cb_init(skb);
-	cb->type = IEEE802154_FC_TYPE_DATA;
-	cb->ackreq = ro->want_ack;
-
-	if (msg->msg_name) {
-		DECLARE_SOCKADDR(struct sockaddr_ieee802154*,
-				 daddr, msg->msg_name);
-
-		ieee802154_addr_from_sa(&dst_addr, &daddr->addr);
-	} else {
-		dst_addr = ro->dst_addr;
-	}
-
-	cb->secen = ro->secen;
-	cb->secen_override = ro->secen_override;
-	cb->seclevel = ro->seclevel;
-	cb->seclevel_override = ro->seclevel_override;
-
-	err = dev_hard_header(skb, dev, ETH_P_IEEE802154, &dst_addr,
-			      ro->bound ? &ro->src_addr : NULL, size);
-	if (err < 0)
-		goto out_skb;
-
-	err = memcpy_from_msg(skb_put(skb, size), msg, size);
-	if (err < 0)
-		goto out_skb;
-
-	skb->dev = dev;
-	skb->sk  = sk;
-	skb->protocol = htons(ETH_P_IEEE802154);
-
-	dev_put(dev);
-
-	err = dev_queue_xmit(skb);
-	if (err > 0)
-		err = net_xmit_errno(err);
-
-	return err ?: size;
-
-out_skb:
-	kfree_skb(skb);
-out_dev:
-	dev_put(dev);
-out:
-	return err;
-}
-
-static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk,
-			 struct msghdr *msg, size_t len, int noblock,
-			 int flags, int *addr_len)
-{
-	size_t copied = 0;
-	int err = -EOPNOTSUPP;
-	struct sk_buff *skb;
-	DECLARE_SOCKADDR(struct sockaddr_ieee802154 *, saddr, msg->msg_name);
-
-	skb = skb_recv_datagram(sk, flags, noblock, &err);
-	if (!skb)
-		goto out;
-
-	copied = skb->len;
-	if (len < copied) {
-		msg->msg_flags |= MSG_TRUNC;
-		copied = len;
-	}
-
-	/* FIXME: skip headers if necessary ?! */
-	err = skb_copy_datagram_msg(skb, 0, msg, copied);
-	if (err)
-		goto done;
-
-	sock_recv_ts_and_drops(msg, sk, skb);
-
-	if (saddr) {
-		saddr->family = AF_IEEE802154;
-		ieee802154_addr_to_sa(&saddr->addr, &mac_cb(skb)->source);
-		*addr_len = sizeof(*saddr);
-	}
-
-	if (flags & MSG_TRUNC)
-		copied = skb->len;
-done:
-	skb_free_datagram(sk, skb);
-out:
-	if (err)
-		return err;
-	return copied;
-}
-
-static int dgram_rcv_skb(struct sock *sk, struct sk_buff *skb)
-{
-	skb = skb_share_check(skb, GFP_ATOMIC);
-	if (!skb)
-		return NET_RX_DROP;
-
-	if (sock_queue_rcv_skb(sk, skb) < 0) {
-		kfree_skb(skb);
-		return NET_RX_DROP;
-	}
-
-	return NET_RX_SUCCESS;
-}
-
-static inline bool
-ieee802154_match_sock(__le64 hw_addr, __le16 pan_id, __le16 short_addr,
-		      struct dgram_sock *ro)
-{
-	if (!ro->bound)
-		return true;
-
-	if (ro->src_addr.mode == IEEE802154_ADDR_LONG &&
-	    hw_addr == ro->src_addr.extended_addr)
-		return true;
-
-	if (ro->src_addr.mode == IEEE802154_ADDR_SHORT &&
-	    pan_id == ro->src_addr.pan_id &&
-	    short_addr == ro->src_addr.short_addr)
-		return true;
-
-	return false;
-}
-
-static int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb)
-{
-	struct sock *sk, *prev = NULL;
-	int ret = NET_RX_SUCCESS;
-	__le16 pan_id, short_addr;
-	__le64 hw_addr;
-
-	/* Data frame processing */
-	BUG_ON(dev->type != ARPHRD_IEEE802154);
-
-	pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
-	short_addr = ieee802154_mlme_ops(dev)->get_short_addr(dev);
-	hw_addr = ieee802154_devaddr_from_raw(dev->dev_addr);
-
-	read_lock(&dgram_lock);
-	sk_for_each(sk, &dgram_head) {
-		if (ieee802154_match_sock(hw_addr, pan_id, short_addr,
-					  dgram_sk(sk))) {
-			if (prev) {
-				struct sk_buff *clone;
-
-				clone = skb_clone(skb, GFP_ATOMIC);
-				if (clone)
-					dgram_rcv_skb(prev, clone);
-			}
-
-			prev = sk;
-		}
-	}
-
-	if (prev) {
-		dgram_rcv_skb(prev, skb);
-	} else {
-		kfree_skb(skb);
-		ret = NET_RX_DROP;
-	}
-	read_unlock(&dgram_lock);
-
-	return ret;
-}
-
-static int dgram_getsockopt(struct sock *sk, int level, int optname,
-			    char __user *optval, int __user *optlen)
-{
-	struct dgram_sock *ro = dgram_sk(sk);
-
-	int val, len;
-
-	if (level != SOL_IEEE802154)
-		return -EOPNOTSUPP;
-
-	if (get_user(len, optlen))
-		return -EFAULT;
-
-	len = min_t(unsigned int, len, sizeof(int));
-
-	switch (optname) {
-	case WPAN_WANTACK:
-		val = ro->want_ack;
-		break;
-	case WPAN_SECURITY:
-		if (!ro->secen_override)
-			val = WPAN_SECURITY_DEFAULT;
-		else if (ro->secen)
-			val = WPAN_SECURITY_ON;
-		else
-			val = WPAN_SECURITY_OFF;
-		break;
-	case WPAN_SECURITY_LEVEL:
-		if (!ro->seclevel_override)
-			val = WPAN_SECURITY_LEVEL_DEFAULT;
-		else
-			val = ro->seclevel;
-		break;
-	default:
-		return -ENOPROTOOPT;
-	}
-
-	if (put_user(len, optlen))
-		return -EFAULT;
-	if (copy_to_user(optval, &val, len))
-		return -EFAULT;
-	return 0;
-}
-
-static int dgram_setsockopt(struct sock *sk, int level, int optname,
-			    char __user *optval, unsigned int optlen)
-{
-	struct dgram_sock *ro = dgram_sk(sk);
-	struct net *net = sock_net(sk);
-	int val;
-	int err = 0;
-
-	if (optlen < sizeof(int))
-		return -EINVAL;
-
-	if (get_user(val, (int __user *)optval))
-		return -EFAULT;
-
-	lock_sock(sk);
-
-	switch (optname) {
-	case WPAN_WANTACK:
-		ro->want_ack = !!val;
-		break;
-	case WPAN_SECURITY:
-		if (!ns_capable(net->user_ns, CAP_NET_ADMIN) &&
-		    !ns_capable(net->user_ns, CAP_NET_RAW)) {
-			err = -EPERM;
-			break;
-		}
-
-		switch (val) {
-		case WPAN_SECURITY_DEFAULT:
-			ro->secen_override = 0;
-			break;
-		case WPAN_SECURITY_ON:
-			ro->secen_override = 1;
-			ro->secen = 1;
-			break;
-		case WPAN_SECURITY_OFF:
-			ro->secen_override = 1;
-			ro->secen = 0;
-			break;
-		default:
-			err = -EINVAL;
-			break;
-		}
-		break;
-	case WPAN_SECURITY_LEVEL:
-		if (!ns_capable(net->user_ns, CAP_NET_ADMIN) &&
-		    !ns_capable(net->user_ns, CAP_NET_RAW)) {
-			err = -EPERM;
-			break;
-		}
-
-		if (val < WPAN_SECURITY_LEVEL_DEFAULT ||
-		    val > IEEE802154_SCF_SECLEVEL_ENC_MIC128) {
-			err = -EINVAL;
-		} else if (val == WPAN_SECURITY_LEVEL_DEFAULT) {
-			ro->seclevel_override = 0;
-		} else {
-			ro->seclevel_override = 1;
-			ro->seclevel = val;
-		}
-		break;
-	default:
-		err = -ENOPROTOOPT;
-		break;
-	}
-
-	release_sock(sk);
-	return err;
-}
-
-static struct proto ieee802154_dgram_prot = {
-	.name		= "IEEE-802.15.4-MAC",
-	.owner		= THIS_MODULE,
-	.obj_size	= sizeof(struct dgram_sock),
-	.init		= dgram_init,
-	.close		= dgram_close,
-	.bind		= dgram_bind,
-	.sendmsg	= dgram_sendmsg,
-	.recvmsg	= dgram_recvmsg,
-	.hash		= dgram_hash,
-	.unhash		= dgram_unhash,
-	.connect	= dgram_connect,
-	.disconnect	= dgram_disconnect,
-	.ioctl		= dgram_ioctl,
-	.getsockopt	= dgram_getsockopt,
-	.setsockopt	= dgram_setsockopt,
-};
-
-static const struct proto_ops ieee802154_dgram_ops = {
-	.family		   = PF_IEEE802154,
-	.owner		   = THIS_MODULE,
-	.release	   = ieee802154_sock_release,
-	.bind		   = ieee802154_sock_bind,
-	.connect	   = ieee802154_sock_connect,
-	.socketpair	   = sock_no_socketpair,
-	.accept		   = sock_no_accept,
-	.getname	   = sock_no_getname,
-	.poll		   = datagram_poll,
-	.ioctl		   = ieee802154_sock_ioctl,
-	.listen		   = sock_no_listen,
-	.shutdown	   = sock_no_shutdown,
-	.setsockopt	   = sock_common_setsockopt,
-	.getsockopt	   = sock_common_getsockopt,
-	.sendmsg	   = ieee802154_sock_sendmsg,
-	.recvmsg	   = sock_common_recvmsg,
-	.mmap		   = sock_no_mmap,
-	.sendpage	   = sock_no_sendpage,
-#ifdef CONFIG_COMPAT
-	.compat_setsockopt = compat_sock_common_setsockopt,
-	.compat_getsockopt = compat_sock_common_getsockopt,
-#endif
-};
-
-/* Create a socket. Initialise the socket, blank the addresses
- * set the state.
- */
-static int ieee802154_create(struct net *net, struct socket *sock,
-			     int protocol, int kern)
-{
-	struct sock *sk;
-	int rc;
-	struct proto *proto;
-	const struct proto_ops *ops;
-
-	if (!net_eq(net, &init_net))
-		return -EAFNOSUPPORT;
-
-	switch (sock->type) {
-	case SOCK_RAW:
-		proto = &ieee802154_raw_prot;
-		ops = &ieee802154_raw_ops;
-		break;
-	case SOCK_DGRAM:
-		proto = &ieee802154_dgram_prot;
-		ops = &ieee802154_dgram_ops;
-		break;
-	default:
-		rc = -ESOCKTNOSUPPORT;
-		goto out;
-	}
-
-	rc = -ENOMEM;
-	sk = sk_alloc(net, PF_IEEE802154, GFP_KERNEL, proto);
-	if (!sk)
-		goto out;
-	rc = 0;
-
-	sock->ops = ops;
-
-	sock_init_data(sock, sk);
-	/* FIXME: sk->sk_destruct */
-	sk->sk_family = PF_IEEE802154;
-
-	/* Checksums on by default */
-	sock_set_flag(sk, SOCK_ZAPPED);
-
-	if (sk->sk_prot->hash)
-		sk->sk_prot->hash(sk);
-
-	if (sk->sk_prot->init) {
-		rc = sk->sk_prot->init(sk);
-		if (rc)
-			sk_common_release(sk);
-	}
-out:
-	return rc;
-}
-
-static const struct net_proto_family ieee802154_family_ops = {
-	.family		= PF_IEEE802154,
-	.create		= ieee802154_create,
-	.owner		= THIS_MODULE,
-};
-
-static int ieee802154_rcv(struct sk_buff *skb, struct net_device *dev,
-			  struct packet_type *pt, struct net_device *orig_dev)
-{
-	if (!netif_running(dev))
-		goto drop;
-	pr_debug("got frame, type %d, dev %p\n", dev->type, dev);
-#ifdef DEBUG
-	print_hex_dump_bytes("ieee802154_rcv ",
-			     DUMP_PREFIX_NONE, skb->data, skb->len);
-#endif
-
-	if (!net_eq(dev_net(dev), &init_net))
-		goto drop;
-
-	ieee802154_raw_deliver(dev, skb);
-
-	if (dev->type != ARPHRD_IEEE802154)
-		goto drop;
-
-	if (skb->pkt_type != PACKET_OTHERHOST)
-		return ieee802154_dgram_deliver(dev, skb);
-
-drop:
-	kfree_skb(skb);
-	return NET_RX_DROP;
-}
-
-static struct packet_type ieee802154_packet_type = {
-	.type = htons(ETH_P_IEEE802154),
-	.func = ieee802154_rcv,
-};
-
-static int __init af_ieee802154_init(void)
-{
-	int rc = -EINVAL;
-
-	rc = proto_register(&ieee802154_raw_prot, 1);
-	if (rc)
-		goto out;
-
-	rc = proto_register(&ieee802154_dgram_prot, 1);
-	if (rc)
-		goto err_dgram;
-
-	/* Tell SOCKET that we are alive */
-	rc = sock_register(&ieee802154_family_ops);
-	if (rc)
-		goto err_sock;
-	dev_add_pack(&ieee802154_packet_type);
-
-	rc = 0;
-	goto out;
-
-err_sock:
-	proto_unregister(&ieee802154_dgram_prot);
-err_dgram:
-	proto_unregister(&ieee802154_raw_prot);
-out:
-	return rc;
-}
-
-static void __exit af_ieee802154_remove(void)
-{
-	dev_remove_pack(&ieee802154_packet_type);
-	sock_unregister(PF_IEEE802154);
-	proto_unregister(&ieee802154_dgram_prot);
-	proto_unregister(&ieee802154_raw_prot);
-}
-
-module_init(af_ieee802154_init);
-module_exit(af_ieee802154_remove);
-
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NETPROTO(PF_IEEE802154);
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
new file mode 100644
index 000000000000..2878d8ca6d3b
--- /dev/null
+++ b/net/ieee802154/socket.c
@@ -0,0 +1,1125 @@
+/*
+ * IEEE802154.4 socket interface
+ *
+ * Copyright 2007, 2008 Siemens AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Written by:
+ * Sergey Lapin <slapin@ossfans.org>
+ * Maxim Gorbachyov <maxim.gorbachev@siemens.com>
+ */
+
+#include <linux/net.h>
+#include <linux/capability.h>
+#include <linux/module.h>
+#include <linux/if_arp.h>
+#include <linux/if.h>
+#include <linux/termios.h>	/* For TIOCOUTQ/INQ */
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <net/datalink.h>
+#include <net/psnap.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
+#include <net/route.h>
+
+#include <net/af_ieee802154.h>
+#include <net/ieee802154_netdev.h>
+
+/* Utility function for families */
+static struct net_device*
+ieee802154_get_dev(struct net *net, const struct ieee802154_addr *addr)
+{
+	struct net_device *dev = NULL;
+	struct net_device *tmp;
+	__le16 pan_id, short_addr;
+	u8 hwaddr[IEEE802154_ADDR_LEN];
+
+	switch (addr->mode) {
+	case IEEE802154_ADDR_LONG:
+		ieee802154_devaddr_to_raw(hwaddr, addr->extended_addr);
+		rcu_read_lock();
+		dev = dev_getbyhwaddr_rcu(net, ARPHRD_IEEE802154, hwaddr);
+		if (dev)
+			dev_hold(dev);
+		rcu_read_unlock();
+		break;
+	case IEEE802154_ADDR_SHORT:
+		if (addr->pan_id == cpu_to_le16(IEEE802154_PANID_BROADCAST) ||
+		    addr->short_addr == cpu_to_le16(IEEE802154_ADDR_UNDEF) ||
+		    addr->short_addr == cpu_to_le16(IEEE802154_ADDR_BROADCAST))
+			break;
+
+		rtnl_lock();
+
+		for_each_netdev(net, tmp) {
+			if (tmp->type != ARPHRD_IEEE802154)
+				continue;
+
+			pan_id = ieee802154_mlme_ops(tmp)->get_pan_id(tmp);
+			short_addr =
+				ieee802154_mlme_ops(tmp)->get_short_addr(tmp);
+
+			if (pan_id == addr->pan_id &&
+			    short_addr == addr->short_addr) {
+				dev = tmp;
+				dev_hold(dev);
+				break;
+			}
+		}
+
+		rtnl_unlock();
+		break;
+	default:
+		pr_warn("Unsupported ieee802154 address type: %d\n",
+			addr->mode);
+		break;
+	}
+
+	return dev;
+}
+
+static int ieee802154_sock_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+
+	if (sk) {
+		sock->sk = NULL;
+		sk->sk_prot->close(sk, 0);
+	}
+	return 0;
+}
+
+static int ieee802154_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
+				   struct msghdr *msg, size_t len)
+{
+	struct sock *sk = sock->sk;
+
+	return sk->sk_prot->sendmsg(iocb, sk, msg, len);
+}
+
+static int ieee802154_sock_bind(struct socket *sock, struct sockaddr *uaddr,
+				int addr_len)
+{
+	struct sock *sk = sock->sk;
+
+	if (sk->sk_prot->bind)
+		return sk->sk_prot->bind(sk, uaddr, addr_len);
+
+	return sock_no_bind(sock, uaddr, addr_len);
+}
+
+static int ieee802154_sock_connect(struct socket *sock, struct sockaddr *uaddr,
+				   int addr_len, int flags)
+{
+	struct sock *sk = sock->sk;
+
+	if (addr_len < sizeof(uaddr->sa_family))
+		return -EINVAL;
+
+	if (uaddr->sa_family == AF_UNSPEC)
+		return sk->sk_prot->disconnect(sk, flags);
+
+	return sk->sk_prot->connect(sk, uaddr, addr_len);
+}
+
+static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg,
+				unsigned int cmd)
+{
+	struct ifreq ifr;
+	int ret = -ENOIOCTLCMD;
+	struct net_device *dev;
+
+	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+		return -EFAULT;
+
+	ifr.ifr_name[IFNAMSIZ-1] = 0;
+
+	dev_load(sock_net(sk), ifr.ifr_name);
+	dev = dev_get_by_name(sock_net(sk), ifr.ifr_name);
+
+	if (!dev)
+		return -ENODEV;
+
+	if (dev->type == ARPHRD_IEEE802154 && dev->netdev_ops->ndo_do_ioctl)
+		ret = dev->netdev_ops->ndo_do_ioctl(dev, &ifr, cmd);
+
+	if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+		ret = -EFAULT;
+	dev_put(dev);
+
+	return ret;
+}
+
+static int ieee802154_sock_ioctl(struct socket *sock, unsigned int cmd,
+				 unsigned long arg)
+{
+	struct sock *sk = sock->sk;
+
+	switch (cmd) {
+	case SIOCGSTAMP:
+		return sock_get_timestamp(sk, (struct timeval __user *)arg);
+	case SIOCGSTAMPNS:
+		return sock_get_timestampns(sk, (struct timespec __user *)arg);
+	case SIOCGIFADDR:
+	case SIOCSIFADDR:
+		return ieee802154_dev_ioctl(sk, (struct ifreq __user *)arg,
+				cmd);
+	default:
+		if (!sk->sk_prot->ioctl)
+			return -ENOIOCTLCMD;
+		return sk->sk_prot->ioctl(sk, cmd, arg);
+	}
+}
+
+/* RAW Sockets (802.15.4 created in userspace) */
+static HLIST_HEAD(raw_head);
+static DEFINE_RWLOCK(raw_lock);
+
+static void raw_hash(struct sock *sk)
+{
+	write_lock_bh(&raw_lock);
+	sk_add_node(sk, &raw_head);
+	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+	write_unlock_bh(&raw_lock);
+}
+
+static void raw_unhash(struct sock *sk)
+{
+	write_lock_bh(&raw_lock);
+	if (sk_del_node_init(sk))
+		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+	write_unlock_bh(&raw_lock);
+}
+
+static void raw_close(struct sock *sk, long timeout)
+{
+	sk_common_release(sk);
+}
+
+static int raw_bind(struct sock *sk, struct sockaddr *_uaddr, int len)
+{
+	struct ieee802154_addr addr;
+	struct sockaddr_ieee802154 *uaddr = (struct sockaddr_ieee802154 *)_uaddr;
+	int err = 0;
+	struct net_device *dev = NULL;
+
+	if (len < sizeof(*uaddr))
+		return -EINVAL;
+
+	uaddr = (struct sockaddr_ieee802154 *)_uaddr;
+	if (uaddr->family != AF_IEEE802154)
+		return -EINVAL;
+
+	lock_sock(sk);
+
+	ieee802154_addr_from_sa(&addr, &uaddr->addr);
+	dev = ieee802154_get_dev(sock_net(sk), &addr);
+	if (!dev) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	if (dev->type != ARPHRD_IEEE802154) {
+		err = -ENODEV;
+		goto out_put;
+	}
+
+	sk->sk_bound_dev_if = dev->ifindex;
+	sk_dst_reset(sk);
+
+out_put:
+	dev_put(dev);
+out:
+	release_sock(sk);
+
+	return err;
+}
+
+static int raw_connect(struct sock *sk, struct sockaddr *uaddr,
+		       int addr_len)
+{
+	return -ENOTSUPP;
+}
+
+static int raw_disconnect(struct sock *sk, int flags)
+{
+	return 0;
+}
+
+static int raw_sendmsg(struct kiocb *iocb, struct sock *sk,
+		       struct msghdr *msg, size_t size)
+{
+	struct net_device *dev;
+	unsigned int mtu;
+	struct sk_buff *skb;
+	int hlen, tlen;
+	int err;
+
+	if (msg->msg_flags & MSG_OOB) {
+		pr_debug("msg->msg_flags = 0x%x\n", msg->msg_flags);
+		return -EOPNOTSUPP;
+	}
+
+	lock_sock(sk);
+	if (!sk->sk_bound_dev_if)
+		dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154);
+	else
+		dev = dev_get_by_index(sock_net(sk), sk->sk_bound_dev_if);
+	release_sock(sk);
+
+	if (!dev) {
+		pr_debug("no dev\n");
+		err = -ENXIO;
+		goto out;
+	}
+
+	mtu = dev->mtu;
+	pr_debug("name = %s, mtu = %u\n", dev->name, mtu);
+
+	if (size > mtu) {
+		pr_debug("size = %Zu, mtu = %u\n", size, mtu);
+		err = -EINVAL;
+		goto out_dev;
+	}
+
+	hlen = LL_RESERVED_SPACE(dev);
+	tlen = dev->needed_tailroom;
+	skb = sock_alloc_send_skb(sk, hlen + tlen + size,
+				  msg->msg_flags & MSG_DONTWAIT, &err);
+	if (!skb)
+		goto out_dev;
+
+	skb_reserve(skb, hlen);
+
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
+
+	err = memcpy_from_msg(skb_put(skb, size), msg, size);
+	if (err < 0)
+		goto out_skb;
+
+	skb->dev = dev;
+	skb->sk  = sk;
+	skb->protocol = htons(ETH_P_IEEE802154);
+
+	dev_put(dev);
+
+	err = dev_queue_xmit(skb);
+	if (err > 0)
+		err = net_xmit_errno(err);
+
+	return err ?: size;
+
+out_skb:
+	kfree_skb(skb);
+out_dev:
+	dev_put(dev);
+out:
+	return err;
+}
+
+static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+		       size_t len, int noblock, int flags, int *addr_len)
+{
+	size_t copied = 0;
+	int err = -EOPNOTSUPP;
+	struct sk_buff *skb;
+
+	skb = skb_recv_datagram(sk, flags, noblock, &err);
+	if (!skb)
+		goto out;
+
+	copied = skb->len;
+	if (len < copied) {
+		msg->msg_flags |= MSG_TRUNC;
+		copied = len;
+	}
+
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
+	if (err)
+		goto done;
+
+	sock_recv_ts_and_drops(msg, sk, skb);
+
+	if (flags & MSG_TRUNC)
+		copied = skb->len;
+done:
+	skb_free_datagram(sk, skb);
+out:
+	if (err)
+		return err;
+	return copied;
+}
+
+static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (!skb)
+		return NET_RX_DROP;
+
+	if (sock_queue_rcv_skb(sk, skb) < 0) {
+		kfree_skb(skb);
+		return NET_RX_DROP;
+	}
+
+	return NET_RX_SUCCESS;
+}
+
+static void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb)
+{
+	struct sock *sk;
+
+	read_lock(&raw_lock);
+	sk_for_each(sk, &raw_head) {
+		bh_lock_sock(sk);
+		if (!sk->sk_bound_dev_if ||
+		    sk->sk_bound_dev_if == dev->ifindex) {
+			struct sk_buff *clone;
+
+			clone = skb_clone(skb, GFP_ATOMIC);
+			if (clone)
+				raw_rcv_skb(sk, clone);
+		}
+		bh_unlock_sock(sk);
+	}
+	read_unlock(&raw_lock);
+}
+
+static int raw_getsockopt(struct sock *sk, int level, int optname,
+			  char __user *optval, int __user *optlen)
+{
+	return -EOPNOTSUPP;
+}
+
+static int raw_setsockopt(struct sock *sk, int level, int optname,
+			  char __user *optval, unsigned int optlen)
+{
+	return -EOPNOTSUPP;
+}
+
+static struct proto ieee802154_raw_prot = {
+	.name		= "IEEE-802.15.4-RAW",
+	.owner		= THIS_MODULE,
+	.obj_size	= sizeof(struct sock),
+	.close		= raw_close,
+	.bind		= raw_bind,
+	.sendmsg	= raw_sendmsg,
+	.recvmsg	= raw_recvmsg,
+	.hash		= raw_hash,
+	.unhash		= raw_unhash,
+	.connect	= raw_connect,
+	.disconnect	= raw_disconnect,
+	.getsockopt	= raw_getsockopt,
+	.setsockopt	= raw_setsockopt,
+};
+
+static const struct proto_ops ieee802154_raw_ops = {
+	.family		   = PF_IEEE802154,
+	.owner		   = THIS_MODULE,
+	.release	   = ieee802154_sock_release,
+	.bind		   = ieee802154_sock_bind,
+	.connect	   = ieee802154_sock_connect,
+	.socketpair	   = sock_no_socketpair,
+	.accept		   = sock_no_accept,
+	.getname	   = sock_no_getname,
+	.poll		   = datagram_poll,
+	.ioctl		   = ieee802154_sock_ioctl,
+	.listen		   = sock_no_listen,
+	.shutdown	   = sock_no_shutdown,
+	.setsockopt	   = sock_common_setsockopt,
+	.getsockopt	   = sock_common_getsockopt,
+	.sendmsg	   = ieee802154_sock_sendmsg,
+	.recvmsg	   = sock_common_recvmsg,
+	.mmap		   = sock_no_mmap,
+	.sendpage	   = sock_no_sendpage,
+#ifdef CONFIG_COMPAT
+	.compat_setsockopt = compat_sock_common_setsockopt,
+	.compat_getsockopt = compat_sock_common_getsockopt,
+#endif
+};
+
+/* DGRAM Sockets (802.15.4 dataframes) */
+static HLIST_HEAD(dgram_head);
+static DEFINE_RWLOCK(dgram_lock);
+
+struct dgram_sock {
+	struct sock sk;
+
+	struct ieee802154_addr src_addr;
+	struct ieee802154_addr dst_addr;
+
+	unsigned int bound:1;
+	unsigned int connected:1;
+	unsigned int want_ack:1;
+	unsigned int secen:1;
+	unsigned int secen_override:1;
+	unsigned int seclevel:3;
+	unsigned int seclevel_override:1;
+};
+
+static inline struct dgram_sock *dgram_sk(const struct sock *sk)
+{
+	return container_of(sk, struct dgram_sock, sk);
+}
+
+static void dgram_hash(struct sock *sk)
+{
+	write_lock_bh(&dgram_lock);
+	sk_add_node(sk, &dgram_head);
+	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+	write_unlock_bh(&dgram_lock);
+}
+
+static void dgram_unhash(struct sock *sk)
+{
+	write_lock_bh(&dgram_lock);
+	if (sk_del_node_init(sk))
+		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+	write_unlock_bh(&dgram_lock);
+}
+
+static int dgram_init(struct sock *sk)
+{
+	struct dgram_sock *ro = dgram_sk(sk);
+
+	ro->want_ack = 1;
+	return 0;
+}
+
+static void dgram_close(struct sock *sk, long timeout)
+{
+	sk_common_release(sk);
+}
+
+static int dgram_bind(struct sock *sk, struct sockaddr *uaddr, int len)
+{
+	struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr;
+	struct ieee802154_addr haddr;
+	struct dgram_sock *ro = dgram_sk(sk);
+	int err = -EINVAL;
+	struct net_device *dev;
+
+	lock_sock(sk);
+
+	ro->bound = 0;
+
+	if (len < sizeof(*addr))
+		goto out;
+
+	if (addr->family != AF_IEEE802154)
+		goto out;
+
+	ieee802154_addr_from_sa(&haddr, &addr->addr);
+	dev = ieee802154_get_dev(sock_net(sk), &haddr);
+	if (!dev) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	if (dev->type != ARPHRD_IEEE802154) {
+		err = -ENODEV;
+		goto out_put;
+	}
+
+	ro->src_addr = haddr;
+
+	ro->bound = 1;
+	err = 0;
+out_put:
+	dev_put(dev);
+out:
+	release_sock(sk);
+
+	return err;
+}
+
+static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case SIOCOUTQ:
+	{
+		int amount = sk_wmem_alloc_get(sk);
+
+		return put_user(amount, (int __user *)arg);
+	}
+
+	case SIOCINQ:
+	{
+		struct sk_buff *skb;
+		unsigned long amount;
+
+		amount = 0;
+		spin_lock_bh(&sk->sk_receive_queue.lock);
+		skb = skb_peek(&sk->sk_receive_queue);
+		if (skb) {
+			/* We will only return the amount
+			 * of this packet since that is all
+			 * that will be read.
+			 */
+			amount = skb->len - ieee802154_hdr_length(skb);
+		}
+		spin_unlock_bh(&sk->sk_receive_queue.lock);
+		return put_user(amount, (int __user *)arg);
+	}
+	}
+
+	return -ENOIOCTLCMD;
+}
+
+/* FIXME: autobind */
+static int dgram_connect(struct sock *sk, struct sockaddr *uaddr,
+			 int len)
+{
+	struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr;
+	struct dgram_sock *ro = dgram_sk(sk);
+	int err = 0;
+
+	if (len < sizeof(*addr))
+		return -EINVAL;
+
+	if (addr->family != AF_IEEE802154)
+		return -EINVAL;
+
+	lock_sock(sk);
+
+	if (!ro->bound) {
+		err = -ENETUNREACH;
+		goto out;
+	}
+
+	ieee802154_addr_from_sa(&ro->dst_addr, &addr->addr);
+	ro->connected = 1;
+
+out:
+	release_sock(sk);
+	return err;
+}
+
+static int dgram_disconnect(struct sock *sk, int flags)
+{
+	struct dgram_sock *ro = dgram_sk(sk);
+
+	lock_sock(sk);
+	ro->connected = 0;
+	release_sock(sk);
+
+	return 0;
+}
+
+static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk,
+			 struct msghdr *msg, size_t size)
+{
+	struct net_device *dev;
+	unsigned int mtu;
+	struct sk_buff *skb;
+	struct ieee802154_mac_cb *cb;
+	struct dgram_sock *ro = dgram_sk(sk);
+	struct ieee802154_addr dst_addr;
+	int hlen, tlen;
+	int err;
+
+	if (msg->msg_flags & MSG_OOB) {
+		pr_debug("msg->msg_flags = 0x%x\n", msg->msg_flags);
+		return -EOPNOTSUPP;
+	}
+
+	if (!ro->connected && !msg->msg_name)
+		return -EDESTADDRREQ;
+	else if (ro->connected && msg->msg_name)
+		return -EISCONN;
+
+	if (!ro->bound)
+		dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154);
+	else
+		dev = ieee802154_get_dev(sock_net(sk), &ro->src_addr);
+
+	if (!dev) {
+		pr_debug("no dev\n");
+		err = -ENXIO;
+		goto out;
+	}
+	mtu = dev->mtu;
+	pr_debug("name = %s, mtu = %u\n", dev->name, mtu);
+
+	if (size > mtu) {
+		pr_debug("size = %Zu, mtu = %u\n", size, mtu);
+		err = -EMSGSIZE;
+		goto out_dev;
+	}
+
+	hlen = LL_RESERVED_SPACE(dev);
+	tlen = dev->needed_tailroom;
+	skb = sock_alloc_send_skb(sk, hlen + tlen + size,
+				  msg->msg_flags & MSG_DONTWAIT,
+				  &err);
+	if (!skb)
+		goto out_dev;
+
+	skb_reserve(skb, hlen);
+
+	skb_reset_network_header(skb);
+
+	cb = mac_cb_init(skb);
+	cb->type = IEEE802154_FC_TYPE_DATA;
+	cb->ackreq = ro->want_ack;
+
+	if (msg->msg_name) {
+		DECLARE_SOCKADDR(struct sockaddr_ieee802154*,
+				 daddr, msg->msg_name);
+
+		ieee802154_addr_from_sa(&dst_addr, &daddr->addr);
+	} else {
+		dst_addr = ro->dst_addr;
+	}
+
+	cb->secen = ro->secen;
+	cb->secen_override = ro->secen_override;
+	cb->seclevel = ro->seclevel;
+	cb->seclevel_override = ro->seclevel_override;
+
+	err = dev_hard_header(skb, dev, ETH_P_IEEE802154, &dst_addr,
+			      ro->bound ? &ro->src_addr : NULL, size);
+	if (err < 0)
+		goto out_skb;
+
+	err = memcpy_from_msg(skb_put(skb, size), msg, size);
+	if (err < 0)
+		goto out_skb;
+
+	skb->dev = dev;
+	skb->sk  = sk;
+	skb->protocol = htons(ETH_P_IEEE802154);
+
+	dev_put(dev);
+
+	err = dev_queue_xmit(skb);
+	if (err > 0)
+		err = net_xmit_errno(err);
+
+	return err ?: size;
+
+out_skb:
+	kfree_skb(skb);
+out_dev:
+	dev_put(dev);
+out:
+	return err;
+}
+
+static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk,
+			 struct msghdr *msg, size_t len, int noblock,
+			 int flags, int *addr_len)
+{
+	size_t copied = 0;
+	int err = -EOPNOTSUPP;
+	struct sk_buff *skb;
+	DECLARE_SOCKADDR(struct sockaddr_ieee802154 *, saddr, msg->msg_name);
+
+	skb = skb_recv_datagram(sk, flags, noblock, &err);
+	if (!skb)
+		goto out;
+
+	copied = skb->len;
+	if (len < copied) {
+		msg->msg_flags |= MSG_TRUNC;
+		copied = len;
+	}
+
+	/* FIXME: skip headers if necessary ?! */
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
+	if (err)
+		goto done;
+
+	sock_recv_ts_and_drops(msg, sk, skb);
+
+	if (saddr) {
+		saddr->family = AF_IEEE802154;
+		ieee802154_addr_to_sa(&saddr->addr, &mac_cb(skb)->source);
+		*addr_len = sizeof(*saddr);
+	}
+
+	if (flags & MSG_TRUNC)
+		copied = skb->len;
+done:
+	skb_free_datagram(sk, skb);
+out:
+	if (err)
+		return err;
+	return copied;
+}
+
+static int dgram_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (!skb)
+		return NET_RX_DROP;
+
+	if (sock_queue_rcv_skb(sk, skb) < 0) {
+		kfree_skb(skb);
+		return NET_RX_DROP;
+	}
+
+	return NET_RX_SUCCESS;
+}
+
+static inline bool
+ieee802154_match_sock(__le64 hw_addr, __le16 pan_id, __le16 short_addr,
+		      struct dgram_sock *ro)
+{
+	if (!ro->bound)
+		return true;
+
+	if (ro->src_addr.mode == IEEE802154_ADDR_LONG &&
+	    hw_addr == ro->src_addr.extended_addr)
+		return true;
+
+	if (ro->src_addr.mode == IEEE802154_ADDR_SHORT &&
+	    pan_id == ro->src_addr.pan_id &&
+	    short_addr == ro->src_addr.short_addr)
+		return true;
+
+	return false;
+}
+
+static int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb)
+{
+	struct sock *sk, *prev = NULL;
+	int ret = NET_RX_SUCCESS;
+	__le16 pan_id, short_addr;
+	__le64 hw_addr;
+
+	/* Data frame processing */
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
+	short_addr = ieee802154_mlme_ops(dev)->get_short_addr(dev);
+	hw_addr = ieee802154_devaddr_from_raw(dev->dev_addr);
+
+	read_lock(&dgram_lock);
+	sk_for_each(sk, &dgram_head) {
+		if (ieee802154_match_sock(hw_addr, pan_id, short_addr,
+					  dgram_sk(sk))) {
+			if (prev) {
+				struct sk_buff *clone;
+
+				clone = skb_clone(skb, GFP_ATOMIC);
+				if (clone)
+					dgram_rcv_skb(prev, clone);
+			}
+
+			prev = sk;
+		}
+	}
+
+	if (prev) {
+		dgram_rcv_skb(prev, skb);
+	} else {
+		kfree_skb(skb);
+		ret = NET_RX_DROP;
+	}
+	read_unlock(&dgram_lock);
+
+	return ret;
+}
+
+static int dgram_getsockopt(struct sock *sk, int level, int optname,
+			    char __user *optval, int __user *optlen)
+{
+	struct dgram_sock *ro = dgram_sk(sk);
+
+	int val, len;
+
+	if (level != SOL_IEEE802154)
+		return -EOPNOTSUPP;
+
+	if (get_user(len, optlen))
+		return -EFAULT;
+
+	len = min_t(unsigned int, len, sizeof(int));
+
+	switch (optname) {
+	case WPAN_WANTACK:
+		val = ro->want_ack;
+		break;
+	case WPAN_SECURITY:
+		if (!ro->secen_override)
+			val = WPAN_SECURITY_DEFAULT;
+		else if (ro->secen)
+			val = WPAN_SECURITY_ON;
+		else
+			val = WPAN_SECURITY_OFF;
+		break;
+	case WPAN_SECURITY_LEVEL:
+		if (!ro->seclevel_override)
+			val = WPAN_SECURITY_LEVEL_DEFAULT;
+		else
+			val = ro->seclevel;
+		break;
+	default:
+		return -ENOPROTOOPT;
+	}
+
+	if (put_user(len, optlen))
+		return -EFAULT;
+	if (copy_to_user(optval, &val, len))
+		return -EFAULT;
+	return 0;
+}
+
+static int dgram_setsockopt(struct sock *sk, int level, int optname,
+			    char __user *optval, unsigned int optlen)
+{
+	struct dgram_sock *ro = dgram_sk(sk);
+	struct net *net = sock_net(sk);
+	int val;
+	int err = 0;
+
+	if (optlen < sizeof(int))
+		return -EINVAL;
+
+	if (get_user(val, (int __user *)optval))
+		return -EFAULT;
+
+	lock_sock(sk);
+
+	switch (optname) {
+	case WPAN_WANTACK:
+		ro->want_ack = !!val;
+		break;
+	case WPAN_SECURITY:
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN) &&
+		    !ns_capable(net->user_ns, CAP_NET_RAW)) {
+			err = -EPERM;
+			break;
+		}
+
+		switch (val) {
+		case WPAN_SECURITY_DEFAULT:
+			ro->secen_override = 0;
+			break;
+		case WPAN_SECURITY_ON:
+			ro->secen_override = 1;
+			ro->secen = 1;
+			break;
+		case WPAN_SECURITY_OFF:
+			ro->secen_override = 1;
+			ro->secen = 0;
+			break;
+		default:
+			err = -EINVAL;
+			break;
+		}
+		break;
+	case WPAN_SECURITY_LEVEL:
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN) &&
+		    !ns_capable(net->user_ns, CAP_NET_RAW)) {
+			err = -EPERM;
+			break;
+		}
+
+		if (val < WPAN_SECURITY_LEVEL_DEFAULT ||
+		    val > IEEE802154_SCF_SECLEVEL_ENC_MIC128) {
+			err = -EINVAL;
+		} else if (val == WPAN_SECURITY_LEVEL_DEFAULT) {
+			ro->seclevel_override = 0;
+		} else {
+			ro->seclevel_override = 1;
+			ro->seclevel = val;
+		}
+		break;
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	release_sock(sk);
+	return err;
+}
+
+static struct proto ieee802154_dgram_prot = {
+	.name		= "IEEE-802.15.4-MAC",
+	.owner		= THIS_MODULE,
+	.obj_size	= sizeof(struct dgram_sock),
+	.init		= dgram_init,
+	.close		= dgram_close,
+	.bind		= dgram_bind,
+	.sendmsg	= dgram_sendmsg,
+	.recvmsg	= dgram_recvmsg,
+	.hash		= dgram_hash,
+	.unhash		= dgram_unhash,
+	.connect	= dgram_connect,
+	.disconnect	= dgram_disconnect,
+	.ioctl		= dgram_ioctl,
+	.getsockopt	= dgram_getsockopt,
+	.setsockopt	= dgram_setsockopt,
+};
+
+static const struct proto_ops ieee802154_dgram_ops = {
+	.family		   = PF_IEEE802154,
+	.owner		   = THIS_MODULE,
+	.release	   = ieee802154_sock_release,
+	.bind		   = ieee802154_sock_bind,
+	.connect	   = ieee802154_sock_connect,
+	.socketpair	   = sock_no_socketpair,
+	.accept		   = sock_no_accept,
+	.getname	   = sock_no_getname,
+	.poll		   = datagram_poll,
+	.ioctl		   = ieee802154_sock_ioctl,
+	.listen		   = sock_no_listen,
+	.shutdown	   = sock_no_shutdown,
+	.setsockopt	   = sock_common_setsockopt,
+	.getsockopt	   = sock_common_getsockopt,
+	.sendmsg	   = ieee802154_sock_sendmsg,
+	.recvmsg	   = sock_common_recvmsg,
+	.mmap		   = sock_no_mmap,
+	.sendpage	   = sock_no_sendpage,
+#ifdef CONFIG_COMPAT
+	.compat_setsockopt = compat_sock_common_setsockopt,
+	.compat_getsockopt = compat_sock_common_getsockopt,
+#endif
+};
+
+/* Create a socket. Initialise the socket, blank the addresses
+ * set the state.
+ */
+static int ieee802154_create(struct net *net, struct socket *sock,
+			     int protocol, int kern)
+{
+	struct sock *sk;
+	int rc;
+	struct proto *proto;
+	const struct proto_ops *ops;
+
+	if (!net_eq(net, &init_net))
+		return -EAFNOSUPPORT;
+
+	switch (sock->type) {
+	case SOCK_RAW:
+		proto = &ieee802154_raw_prot;
+		ops = &ieee802154_raw_ops;
+		break;
+	case SOCK_DGRAM:
+		proto = &ieee802154_dgram_prot;
+		ops = &ieee802154_dgram_ops;
+		break;
+	default:
+		rc = -ESOCKTNOSUPPORT;
+		goto out;
+	}
+
+	rc = -ENOMEM;
+	sk = sk_alloc(net, PF_IEEE802154, GFP_KERNEL, proto);
+	if (!sk)
+		goto out;
+	rc = 0;
+
+	sock->ops = ops;
+
+	sock_init_data(sock, sk);
+	/* FIXME: sk->sk_destruct */
+	sk->sk_family = PF_IEEE802154;
+
+	/* Checksums on by default */
+	sock_set_flag(sk, SOCK_ZAPPED);
+
+	if (sk->sk_prot->hash)
+		sk->sk_prot->hash(sk);
+
+	if (sk->sk_prot->init) {
+		rc = sk->sk_prot->init(sk);
+		if (rc)
+			sk_common_release(sk);
+	}
+out:
+	return rc;
+}
+
+static const struct net_proto_family ieee802154_family_ops = {
+	.family		= PF_IEEE802154,
+	.create		= ieee802154_create,
+	.owner		= THIS_MODULE,
+};
+
+static int ieee802154_rcv(struct sk_buff *skb, struct net_device *dev,
+			  struct packet_type *pt, struct net_device *orig_dev)
+{
+	if (!netif_running(dev))
+		goto drop;
+	pr_debug("got frame, type %d, dev %p\n", dev->type, dev);
+#ifdef DEBUG
+	print_hex_dump_bytes("ieee802154_rcv ",
+			     DUMP_PREFIX_NONE, skb->data, skb->len);
+#endif
+
+	if (!net_eq(dev_net(dev), &init_net))
+		goto drop;
+
+	ieee802154_raw_deliver(dev, skb);
+
+	if (dev->type != ARPHRD_IEEE802154)
+		goto drop;
+
+	if (skb->pkt_type != PACKET_OTHERHOST)
+		return ieee802154_dgram_deliver(dev, skb);
+
+drop:
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
+
+static struct packet_type ieee802154_packet_type = {
+	.type = htons(ETH_P_IEEE802154),
+	.func = ieee802154_rcv,
+};
+
+static int __init af_ieee802154_init(void)
+{
+	int rc = -EINVAL;
+
+	rc = proto_register(&ieee802154_raw_prot, 1);
+	if (rc)
+		goto out;
+
+	rc = proto_register(&ieee802154_dgram_prot, 1);
+	if (rc)
+		goto err_dgram;
+
+	/* Tell SOCKET that we are alive */
+	rc = sock_register(&ieee802154_family_ops);
+	if (rc)
+		goto err_sock;
+	dev_add_pack(&ieee802154_packet_type);
+
+	rc = 0;
+	goto out;
+
+err_sock:
+	proto_unregister(&ieee802154_dgram_prot);
+err_dgram:
+	proto_unregister(&ieee802154_raw_prot);
+out:
+	return rc;
+}
+
+static void __exit af_ieee802154_remove(void)
+{
+	dev_remove_pack(&ieee802154_packet_type);
+	sock_unregister(PF_IEEE802154);
+	proto_unregister(&ieee802154_dgram_prot);
+	proto_unregister(&ieee802154_raw_prot);
+}
+
+module_init(af_ieee802154_init);
+module_exit(af_ieee802154_remove);
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(PF_IEEE802154);
-- 
cgit v1.2.3


From 9dc52d49e28c05411ac855cf9223c48ed819507b Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Wed, 31 Dec 2014 19:39:13 +0100
Subject: ieee802154: handle config as menuconfig

This patch handles the IEEE802154 Kconfig entry as menuconfig.
Furthermore we move this entry out of "Network Options" and put it into
"Networking" where the other networking subsystems are. This requires a
menuconfig entry like all other networking subsystems.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/ieee802154/Kconfig | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ieee802154/Kconfig b/net/ieee802154/Kconfig
index 98a190dea97d..9ea0af49a4ea 100644
--- a/net/ieee802154/Kconfig
+++ b/net/ieee802154/Kconfig
@@ -1,4 +1,4 @@
-config IEEE802154
+menuconfig IEEE802154
 	tristate "IEEE Std 802.15.4 Low-Rate Wireless Personal Area Networks support"
 	---help---
 	  IEEE Std 802.15.4 defines a low data rate, low power and low
@@ -10,9 +10,10 @@ config IEEE802154
 	  Say Y here to compile LR-WPAN support into the kernel or say M to
 	  compile it as modules.
 
+if IEEE802154
+
 config IEEE802154_SOCKET
 	tristate "IEEE 802.15.4 socket interface"
-	depends on IEEE802154
 	default y
 	---help---
 	  Socket interface for IEEE 802.15.4. Contains DGRAM sockets interface
@@ -21,6 +22,8 @@ config IEEE802154_SOCKET
 
 config IEEE802154_6LOWPAN
 	tristate "6lowpan support over IEEE 802.15.4"
-	depends on IEEE802154 && 6LOWPAN
+	depends on 6LOWPAN
 	---help---
 	  IPv6 compression over IEEE 802.15.4.
+
+endif
-- 
cgit v1.2.3


From 36cf942adf78c215f381554dfbac0ef8d05a6c21 Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Fri, 2 Jan 2015 15:49:41 +0100
Subject: mac802154: fix kbuild test robot warning

This patch fixs the following kbuild test robot warning:

coccinelle warnings: (new ones prefixed by >>)

>> net/mac802154/cfg.c:53:1-3: WARNING: PTR_ERR_OR_ZERO can be used

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/mac802154/cfg.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c
index 7d31da503dcf..5d9f68c75e5f 100644
--- a/net/mac802154/cfg.c
+++ b/net/mac802154/cfg.c
@@ -51,10 +51,7 @@ ieee802154_add_iface(struct wpan_phy *phy, const char *name,
 	struct net_device *err;
 
 	err = ieee802154_if_add(local, name, type, extended_addr);
-	if (IS_ERR(err))
-		return PTR_ERR(err);
-
-	return 0;
+	return PTR_ERR_OR_ZERO(err);
 }
 
 static int
-- 
cgit v1.2.3


From 8d24c0b43125ec26cc80e04588477a9a2afc025c Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Fri, 2 Jan 2015 23:00:14 +0100
Subject: rhashtable: Do hashing inside of rhashtable_lookup_compare()

Hash the key inside of rhashtable_lookup_compare() like
rhashtable_lookup() does. This allows to simplify the hashing
functions and keep them private.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Cc: netfilter-devel@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h |  5 +--
 lib/rhashtable.c           | 91 +++++++++++++++-------------------------------
 net/netfilter/nft_hash.c   | 46 ++++++++++++++---------
 net/netlink/af_netlink.c   |  5 +--
 4 files changed, 61 insertions(+), 86 deletions(-)

(limited to 'net')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index b93fd89b2e5e..1b51221c6bbd 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -96,9 +96,6 @@ static inline int lockdep_rht_mutex_is_held(const struct rhashtable *ht)
 
 int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params);
 
-u32 rhashtable_hashfn(const struct rhashtable *ht, const void *key, u32 len);
-u32 rhashtable_obj_hashfn(const struct rhashtable *ht, void *ptr);
-
 void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node);
 bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node);
 void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj,
@@ -111,7 +108,7 @@ int rhashtable_expand(struct rhashtable *ht);
 int rhashtable_shrink(struct rhashtable *ht);
 
 void *rhashtable_lookup(const struct rhashtable *ht, const void *key);
-void *rhashtable_lookup_compare(const struct rhashtable *ht, u32 hash,
+void *rhashtable_lookup_compare(const struct rhashtable *ht, const void *key,
 				bool (*compare)(void *, void *), void *arg);
 
 void rhashtable_destroy(const struct rhashtable *ht);
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 6c3c723e902b..1ee0eb636ca3 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -42,69 +42,39 @@ static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he)
 	return (void *) he - ht->p.head_offset;
 }
 
-static u32 __hashfn(const struct rhashtable *ht, const void *key,
-		      u32 len, u32 hsize)
+static u32 rht_bucket_index(const struct bucket_table *tbl, u32 hash)
 {
-	u32 h;
-
-	h = ht->p.hashfn(key, len, ht->p.hash_rnd);
-
-	return h & (hsize - 1);
-}
-
-/**
- * rhashtable_hashfn - compute hash for key of given length
- * @ht:		hash table to compute for
- * @key:	pointer to key
- * @len:	length of key
- *
- * Computes the hash value using the hash function provided in the 'hashfn'
- * of struct rhashtable_params. The returned value is guaranteed to be
- * smaller than the number of buckets in the hash table.
- */
-u32 rhashtable_hashfn(const struct rhashtable *ht, const void *key, u32 len)
-{
-	struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
-
-	return __hashfn(ht, key, len, tbl->size);
+	return hash & (tbl->size - 1);
 }
-EXPORT_SYMBOL_GPL(rhashtable_hashfn);
 
-static u32 obj_hashfn(const struct rhashtable *ht, const void *ptr, u32 hsize)
+static u32 obj_raw_hashfn(const struct rhashtable *ht, const void *ptr)
 {
-	if (unlikely(!ht->p.key_len)) {
-		u32 h;
-
-		h = ht->p.obj_hashfn(ptr, ht->p.hash_rnd);
+	u32 hash;
 
-		return h & (hsize - 1);
-	}
+	if (unlikely(!ht->p.key_len))
+		hash = ht->p.obj_hashfn(ptr, ht->p.hash_rnd);
+	else
+		hash = ht->p.hashfn(ptr + ht->p.key_offset, ht->p.key_len,
+				    ht->p.hash_rnd);
 
-	return __hashfn(ht, ptr + ht->p.key_offset, ht->p.key_len, hsize);
+	return hash;
 }
 
-/**
- * rhashtable_obj_hashfn - compute hash for hashed object
- * @ht:		hash table to compute for
- * @ptr:	pointer to hashed object
- *
- * Computes the hash value using the hash function `hashfn` respectively
- * 'obj_hashfn' depending on whether the hash table is set up to work with
- * a fixed length key. The returned value is guaranteed to be smaller than
- * the number of buckets in the hash table.
- */
-u32 rhashtable_obj_hashfn(const struct rhashtable *ht, void *ptr)
+static u32 key_hashfn(const struct rhashtable *ht, const void *key, u32 len)
 {
 	struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
+	u32 hash;
+
+	hash = ht->p.hashfn(key, len, ht->p.hash_rnd);
 
-	return obj_hashfn(ht, ptr, tbl->size);
+	return rht_bucket_index(tbl, hash);
 }
-EXPORT_SYMBOL_GPL(rhashtable_obj_hashfn);
 
 static u32 head_hashfn(const struct rhashtable *ht,
-		       const struct rhash_head *he, u32 hsize)
+		       const struct bucket_table *tbl,
+		       const struct rhash_head *he)
 {
-	return obj_hashfn(ht, rht_obj(ht, he), hsize);
+	return rht_bucket_index(tbl, obj_raw_hashfn(ht, rht_obj(ht, he)));
 }
 
 static struct bucket_table *bucket_table_alloc(size_t nbuckets)
@@ -170,9 +140,9 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
 	 * reaches a node that doesn't hash to the same bucket as the
 	 * previous node p. Call the previous node p;
 	 */
-	h = head_hashfn(ht, p, new_tbl->size);
+	h = head_hashfn(ht, new_tbl, p);
 	rht_for_each(he, p->next, ht) {
-		if (head_hashfn(ht, he, new_tbl->size) != h)
+		if (head_hashfn(ht, new_tbl, he) != h)
 			break;
 		p = he;
 	}
@@ -184,7 +154,7 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
 	next = NULL;
 	if (he) {
 		rht_for_each(he, he->next, ht) {
-			if (head_hashfn(ht, he, new_tbl->size) == h) {
+			if (head_hashfn(ht, new_tbl, he) == h) {
 				next = he;
 				break;
 			}
@@ -237,9 +207,9 @@ int rhashtable_expand(struct rhashtable *ht)
 	 * single imprecise chain.
 	 */
 	for (i = 0; i < new_tbl->size; i++) {
-		h = i & (old_tbl->size - 1);
+		h = rht_bucket_index(old_tbl, i);
 		rht_for_each(he, old_tbl->buckets[h], ht) {
-			if (head_hashfn(ht, he, new_tbl->size) == i) {
+			if (head_hashfn(ht, new_tbl, he) == i) {
 				RCU_INIT_POINTER(new_tbl->buckets[i], he);
 				break;
 			}
@@ -353,7 +323,7 @@ void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj)
 
 	ASSERT_RHT_MUTEX(ht);
 
-	hash = head_hashfn(ht, obj, tbl->size);
+	hash = head_hashfn(ht, tbl, obj);
 	RCU_INIT_POINTER(obj->next, tbl->buckets[hash]);
 	rcu_assign_pointer(tbl->buckets[hash], obj);
 	ht->nelems++;
@@ -413,7 +383,7 @@ bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj)
 
 	ASSERT_RHT_MUTEX(ht);
 
-	h = head_hashfn(ht, obj, tbl->size);
+	h = head_hashfn(ht, tbl, obj);
 
 	pprev = &tbl->buckets[h];
 	rht_for_each(he, tbl->buckets[h], ht) {
@@ -452,7 +422,7 @@ void *rhashtable_lookup(const struct rhashtable *ht, const void *key)
 
 	BUG_ON(!ht->p.key_len);
 
-	h = __hashfn(ht, key, ht->p.key_len, tbl->size);
+	h = key_hashfn(ht, key, ht->p.key_len);
 	rht_for_each_rcu(he, tbl->buckets[h], ht) {
 		if (memcmp(rht_obj(ht, he) + ht->p.key_offset, key,
 			   ht->p.key_len))
@@ -467,7 +437,7 @@ EXPORT_SYMBOL_GPL(rhashtable_lookup);
 /**
  * rhashtable_lookup_compare - search hash table with compare function
  * @ht:		hash table
- * @hash:	hash value of desired entry
+ * @key:	the pointer to the key
  * @compare:	compare function, must return true on match
  * @arg:	argument passed on to compare function
  *
@@ -479,15 +449,14 @@ EXPORT_SYMBOL_GPL(rhashtable_lookup);
  *
  * Returns the first entry on which the compare function returned true.
  */
-void *rhashtable_lookup_compare(const struct rhashtable *ht, u32 hash,
+void *rhashtable_lookup_compare(const struct rhashtable *ht, const void *key,
 				bool (*compare)(void *, void *), void *arg)
 {
 	const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
 	struct rhash_head *he;
+	u32 hash;
 
-	if (unlikely(hash >= tbl->size))
-		return NULL;
-
+	hash = key_hashfn(ht, key, ht->p.key_len);
 	rht_for_each_rcu(he, tbl->buckets[hash], ht) {
 		if (!compare(rht_obj(ht, he), arg))
 			continue;
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 1e316ce4cb5d..614ee099ba36 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -94,28 +94,40 @@ static void nft_hash_remove(const struct nft_set *set,
 	kfree(he);
 }
 
+struct nft_compare_arg {
+	const struct nft_set *set;
+	struct nft_set_elem *elem;
+};
+
+static bool nft_hash_compare(void *ptr, void *arg)
+{
+	struct nft_hash_elem *he = ptr;
+	struct nft_compare_arg *x = arg;
+
+	if (!nft_data_cmp(&he->key, &x->elem->key, x->set->klen)) {
+		x->elem->cookie = &he->node;
+		x->elem->flags = 0;
+		if (x->set->flags & NFT_SET_MAP)
+			nft_data_copy(&x->elem->data, he->data);
+
+		return true;
+	}
+
+	return false;
+}
+
 static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
 {
 	const struct rhashtable *priv = nft_set_priv(set);
-	const struct bucket_table *tbl = rht_dereference_rcu(priv->tbl, priv);
-	struct rhash_head __rcu * const *pprev;
-	struct nft_hash_elem *he;
-	u32 h;
-
-	h = rhashtable_hashfn(priv, &elem->key, set->klen);
-	pprev = &tbl->buckets[h];
-	rht_for_each_entry_rcu(he, tbl->buckets[h], node) {
-		if (nft_data_cmp(&he->key, &elem->key, set->klen)) {
-			pprev = &he->node.next;
-			continue;
-		}
+	struct nft_compare_arg arg = {
+		.set = set,
+		.elem = elem,
+	};
 
-		elem->cookie = (void *)pprev;
-		elem->flags = 0;
-		if (set->flags & NFT_SET_MAP)
-			nft_data_copy(&elem->data, he->data);
+	if (rhashtable_lookup_compare(priv, &elem->key,
+				      &nft_hash_compare, &arg))
 		return 0;
-	}
+
 	return -ENOENT;
 }
 
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 84ea76ca3f1f..a5d7ed627563 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1002,11 +1002,8 @@ static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid,
 		.net = net,
 		.portid = portid,
 	};
-	u32 hash;
 
-	hash = rhashtable_hashfn(&table->hash, &portid, sizeof(portid));
-
-	return rhashtable_lookup_compare(&table->hash, hash,
+	return rhashtable_lookup_compare(&table->hash, &portid,
 					 &netlink_compare, &arg);
 }
 
-- 
cgit v1.2.3


From 88d6ed15acff1cb44b1d1f3c0a393b7f7744957a Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Fri, 2 Jan 2015 23:00:16 +0100
Subject: rhashtable: Convert bucket iterators to take table and index

This patch is in preparation to introduce per bucket spinlocks. It
extends all iterator macros to take the bucket table and bucket
index. It also introduces a new rht_dereference_bucket() to
handle protected accesses to buckets.

It introduces a barrier() to the RCU iterators to the prevent
the compiler from caching the first element.

The lockdep verifier is introduced as stub which always succeeds
and properly implement in the next patch when the locks are
introduced.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 173 ++++++++++++++++++++++++++++++---------------
 lib/rhashtable.c           |  30 +++++---
 net/netfilter/nft_hash.c   |  12 ++--
 net/netlink/af_netlink.c   |  12 ++--
 net/netlink/diag.c         |   4 +-
 5 files changed, 152 insertions(+), 79 deletions(-)

(limited to 'net')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 1b51221c6bbd..b54e24a08806 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -87,11 +87,18 @@ struct rhashtable {
 
 #ifdef CONFIG_PROVE_LOCKING
 int lockdep_rht_mutex_is_held(const struct rhashtable *ht);
+int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash);
 #else
 static inline int lockdep_rht_mutex_is_held(const struct rhashtable *ht)
 {
 	return 1;
 }
+
+static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl,
+					     u32 hash)
+{
+	return 1;
+}
 #endif /* CONFIG_PROVE_LOCKING */
 
 int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params);
@@ -119,92 +126,144 @@ void rhashtable_destroy(const struct rhashtable *ht);
 #define rht_dereference_rcu(p, ht) \
 	rcu_dereference_check(p, lockdep_rht_mutex_is_held(ht))
 
-#define rht_entry(ptr, type, member) container_of(ptr, type, member)
-#define rht_entry_safe(ptr, type, member) \
-({ \
-	typeof(ptr) __ptr = (ptr); \
-	   __ptr ? rht_entry(__ptr, type, member) : NULL; \
-})
+#define rht_dereference_bucket(p, tbl, hash) \
+	rcu_dereference_protected(p, lockdep_rht_bucket_is_held(tbl, hash))
 
-#define rht_next_entry_safe(pos, ht, member) \
-({ \
-	pos ? rht_entry_safe(rht_dereference((pos)->member.next, ht), \
-			     typeof(*(pos)), member) : NULL; \
-})
+#define rht_dereference_bucket_rcu(p, tbl, hash) \
+	rcu_dereference_check(p, lockdep_rht_bucket_is_held(tbl, hash))
+
+#define rht_entry(tpos, pos, member) \
+	({ tpos = container_of(pos, typeof(*tpos), member); 1; })
 
 /**
- * rht_for_each - iterate over hash chain
- * @pos:	&struct rhash_head to use as a loop cursor.
- * @head:	head of the hash chain (struct rhash_head *)
- * @ht:		pointer to your struct rhashtable
+ * rht_for_each_continue - continue iterating over hash chain
+ * @pos:	the &struct rhash_head to use as a loop cursor.
+ * @head:	the previous &struct rhash_head to continue from
+ * @tbl:	the &struct bucket_table
+ * @hash:	the hash value / bucket index
  */
-#define rht_for_each(pos, head, ht) \
-	for (pos = rht_dereference(head, ht); \
+#define rht_for_each_continue(pos, head, tbl, hash) \
+	for (pos = rht_dereference_bucket(head, tbl, hash); \
 	     pos; \
-	     pos = rht_dereference((pos)->next, ht))
+	     pos = rht_dereference_bucket((pos)->next, tbl, hash))
+
+/**
+ * rht_for_each - iterate over hash chain
+ * @pos:	the &struct rhash_head to use as a loop cursor.
+ * @tbl:	the &struct bucket_table
+ * @hash:	the hash value / bucket index
+ */
+#define rht_for_each(pos, tbl, hash) \
+	rht_for_each_continue(pos, (tbl)->buckets[hash], tbl, hash)
+
+/**
+ * rht_for_each_entry_continue - continue iterating over hash chain
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct rhash_head to use as a loop cursor.
+ * @head:	the previous &struct rhash_head to continue from
+ * @tbl:	the &struct bucket_table
+ * @hash:	the hash value / bucket index
+ * @member:	name of the &struct rhash_head within the hashable struct.
+ */
+#define rht_for_each_entry_continue(tpos, pos, head, tbl, hash, member)	\
+	for (pos = rht_dereference_bucket(head, tbl, hash);		\
+	     pos && rht_entry(tpos, pos, member);			\
+	     pos = rht_dereference_bucket((pos)->next, tbl, hash))
 
 /**
  * rht_for_each_entry - iterate over hash chain of given type
- * @pos:	type * to use as a loop cursor.
- * @head:	head of the hash chain (struct rhash_head *)
- * @ht:		pointer to your struct rhashtable
- * @member:	name of the rhash_head within the hashable struct.
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct rhash_head to use as a loop cursor.
+ * @tbl:	the &struct bucket_table
+ * @hash:	the hash value / bucket index
+ * @member:	name of the &struct rhash_head within the hashable struct.
  */
-#define rht_for_each_entry(pos, head, ht, member) \
-	for (pos = rht_entry_safe(rht_dereference(head, ht), \
-				   typeof(*(pos)), member); \
-	     pos; \
-	     pos = rht_next_entry_safe(pos, ht, member))
+#define rht_for_each_entry(tpos, pos, tbl, hash, member)		\
+	rht_for_each_entry_continue(tpos, pos, (tbl)->buckets[hash],	\
+				    tbl, hash, member)
 
 /**
  * rht_for_each_entry_safe - safely iterate over hash chain of given type
- * @pos:	type * to use as a loop cursor.
- * @n:		type * to use for temporary next object storage
- * @head:	head of the hash chain (struct rhash_head *)
- * @ht:		pointer to your struct rhashtable
- * @member:	name of the rhash_head within the hashable struct.
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct rhash_head to use as a loop cursor.
+ * @next:	the &struct rhash_head to use as next in loop cursor.
+ * @tbl:	the &struct bucket_table
+ * @hash:	the hash value / bucket index
+ * @member:	name of the &struct rhash_head within the hashable struct.
  *
  * This hash chain list-traversal primitive allows for the looped code to
  * remove the loop cursor from the list.
  */
-#define rht_for_each_entry_safe(pos, n, head, ht, member)		\
-	for (pos = rht_entry_safe(rht_dereference(head, ht), \
-				  typeof(*(pos)), member), \
-	     n = rht_next_entry_safe(pos, ht, member); \
-	     pos; \
-	     pos = n, \
-	     n = rht_next_entry_safe(pos, ht, member))
+#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member)	    \
+	for (pos = rht_dereference_bucket((tbl)->buckets[hash], tbl, hash), \
+	     next = pos ? rht_dereference_bucket(pos->next, tbl, hash)      \
+			: NULL;						    \
+	     pos && rht_entry(tpos, pos, member);			    \
+	     pos = next)
+
+/**
+ * rht_for_each_rcu_continue - continue iterating over rcu hash chain
+ * @pos:	the &struct rhash_head to use as a loop cursor.
+ * @head:	the previous &struct rhash_head to continue from
+ * @tbl:	the &struct bucket_table
+ * @hash:	the hash value / bucket index
+ *
+ * This hash chain list-traversal primitive may safely run concurrently with
+ * the _rcu mutation primitives such as rhashtable_insert() as long as the
+ * traversal is guarded by rcu_read_lock().
+ */
+#define rht_for_each_rcu_continue(pos, head, tbl, hash)			\
+	for (({barrier(); }),						\
+	     pos = rht_dereference_bucket_rcu(head, tbl, hash);		\
+	     pos;							\
+	     pos = rcu_dereference_raw(pos->next))
 
 /**
  * rht_for_each_rcu - iterate over rcu hash chain
- * @pos:	&struct rhash_head to use as a loop cursor.
- * @head:	head of the hash chain (struct rhash_head *)
- * @ht:		pointer to your struct rhashtable
+ * @pos:	the &struct rhash_head to use as a loop cursor.
+ * @tbl:	the &struct bucket_table
+ * @hash:	the hash value / bucket index
  *
  * This hash chain list-traversal primitive may safely run concurrently with
- * the _rcu fkht mutation primitives such as rht_insert() as long as the
+ * the _rcu mutation primitives such as rhashtable_insert() as long as the
  * traversal is guarded by rcu_read_lock().
  */
-#define rht_for_each_rcu(pos, head, ht) \
-	for (pos = rht_dereference_rcu(head, ht); \
-	     pos; \
-	     pos = rht_dereference_rcu((pos)->next, ht))
+#define rht_for_each_rcu(pos, tbl, hash)				\
+	rht_for_each_rcu_continue(pos, (tbl)->buckets[hash], tbl, hash)
+
+/**
+ * rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct rhash_head to use as a loop cursor.
+ * @head:	the previous &struct rhash_head to continue from
+ * @tbl:	the &struct bucket_table
+ * @hash:	the hash value / bucket index
+ * @member:	name of the &struct rhash_head within the hashable struct.
+ *
+ * This hash chain list-traversal primitive may safely run concurrently with
+ * the _rcu mutation primitives such as rhashtable_insert() as long as the
+ * traversal is guarded by rcu_read_lock().
+ */
+#define rht_for_each_entry_rcu_continue(tpos, pos, head, tbl, hash, member) \
+	for (({barrier(); }),						    \
+	     pos = rht_dereference_bucket_rcu(head, tbl, hash);		    \
+	     pos && rht_entry(tpos, pos, member);			    \
+	     pos = rht_dereference_bucket_rcu(pos->next, tbl, hash))
 
 /**
  * rht_for_each_entry_rcu - iterate over rcu hash chain of given type
- * @pos:	type * to use as a loop cursor.
- * @head:	head of the hash chain (struct rhash_head *)
- * @member:	name of the rhash_head within the hashable struct.
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct rhash_head to use as a loop cursor.
+ * @tbl:	the &struct bucket_table
+ * @hash:	the hash value / bucket index
+ * @member:	name of the &struct rhash_head within the hashable struct.
  *
  * This hash chain list-traversal primitive may safely run concurrently with
- * the _rcu fkht mutation primitives such as rht_insert() as long as the
+ * the _rcu mutation primitives such as rhashtable_insert() as long as the
  * traversal is guarded by rcu_read_lock().
  */
-#define rht_for_each_entry_rcu(pos, head, member) \
-	for (pos = rht_entry_safe(rcu_dereference_raw(head), \
-				  typeof(*(pos)), member); \
-	     pos; \
-	     pos = rht_entry_safe(rcu_dereference_raw((pos)->member.next), \
-				  typeof(*(pos)), member))
+#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member)		\
+	rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\
+					tbl, hash, member)
 
 #endif /* _LINUX_RHASHTABLE_H */
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index b658245826a1..ce450d095fdf 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -35,6 +35,12 @@ int lockdep_rht_mutex_is_held(const struct rhashtable *ht)
 	return ht->p.mutex_is_held(ht->p.parent);
 }
 EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
+
+int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
+{
+	return 1;
+}
+EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
 #endif
 
 static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he)
@@ -141,7 +147,7 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
 	 * previous node p. Call the previous node p;
 	 */
 	h = head_hashfn(ht, new_tbl, p);
-	rht_for_each(he, p->next, ht) {
+	rht_for_each_continue(he, p->next, old_tbl, n) {
 		if (head_hashfn(ht, new_tbl, he) != h)
 			break;
 		p = he;
@@ -153,7 +159,7 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
 	 */
 	next = NULL;
 	if (he) {
-		rht_for_each(he, he->next, ht) {
+		rht_for_each_continue(he, he->next, old_tbl, n) {
 			if (head_hashfn(ht, new_tbl, he) == h) {
 				next = he;
 				break;
@@ -208,7 +214,7 @@ int rhashtable_expand(struct rhashtable *ht)
 	 */
 	for (i = 0; i < new_tbl->size; i++) {
 		h = rht_bucket_index(old_tbl, i);
-		rht_for_each(he, old_tbl->buckets[h], ht) {
+		rht_for_each(he, old_tbl, h) {
 			if (head_hashfn(ht, new_tbl, he) == i) {
 				RCU_INIT_POINTER(new_tbl->buckets[i], he);
 				break;
@@ -286,7 +292,7 @@ int rhashtable_shrink(struct rhashtable *ht)
 		 * to the new bucket.
 		 */
 		for (pprev = &ntbl->buckets[i]; *pprev != NULL;
-		     pprev = &rht_dereference(*pprev, ht)->next)
+		     pprev = &rht_dereference_bucket(*pprev, ntbl, i)->next)
 			;
 		RCU_INIT_POINTER(*pprev, tbl->buckets[i + ntbl->size]);
 	}
@@ -386,7 +392,7 @@ bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj)
 	h = head_hashfn(ht, tbl, obj);
 
 	pprev = &tbl->buckets[h];
-	rht_for_each(he, tbl->buckets[h], ht) {
+	rht_for_each(he, tbl, h) {
 		if (he != obj) {
 			pprev = &he->next;
 			continue;
@@ -423,7 +429,7 @@ void *rhashtable_lookup(const struct rhashtable *ht, const void *key)
 	BUG_ON(!ht->p.key_len);
 
 	h = key_hashfn(ht, key, ht->p.key_len);
-	rht_for_each_rcu(he, tbl->buckets[h], ht) {
+	rht_for_each_rcu(he, tbl, h) {
 		if (memcmp(rht_obj(ht, he) + ht->p.key_offset, key,
 			   ht->p.key_len))
 			continue;
@@ -457,7 +463,7 @@ void *rhashtable_lookup_compare(const struct rhashtable *ht, const void *key,
 	u32 hash;
 
 	hash = key_hashfn(ht, key, ht->p.key_len);
-	rht_for_each_rcu(he, tbl->buckets[hash], ht) {
+	rht_for_each_rcu(he, tbl, hash) {
 		if (!compare(rht_obj(ht, he), arg))
 			continue;
 		return rht_obj(ht, he);
@@ -625,6 +631,7 @@ static int __init test_rht_lookup(struct rhashtable *ht)
 static void test_bucket_stats(struct rhashtable *ht, bool quiet)
 {
 	unsigned int cnt, rcu_cnt, i, total = 0;
+	struct rhash_head *pos;
 	struct test_obj *obj;
 	struct bucket_table *tbl;
 
@@ -635,14 +642,14 @@ static void test_bucket_stats(struct rhashtable *ht, bool quiet)
 		if (!quiet)
 			pr_info(" [%#4x/%zu]", i, tbl->size);
 
-		rht_for_each_entry_rcu(obj, tbl->buckets[i], node) {
+		rht_for_each_entry_rcu(obj, pos, tbl, i, node) {
 			cnt++;
 			total++;
 			if (!quiet)
 				pr_cont(" [%p],", obj);
 		}
 
-		rht_for_each_entry_rcu(obj, tbl->buckets[i], node)
+		rht_for_each_entry_rcu(obj, pos, tbl, i, node)
 			rcu_cnt++;
 
 		if (rcu_cnt != cnt)
@@ -664,7 +671,8 @@ static void test_bucket_stats(struct rhashtable *ht, bool quiet)
 static int __init test_rhashtable(struct rhashtable *ht)
 {
 	struct bucket_table *tbl;
-	struct test_obj *obj, *next;
+	struct test_obj *obj;
+	struct rhash_head *pos, *next;
 	int err;
 	unsigned int i;
 
@@ -733,7 +741,7 @@ static int __init test_rhashtable(struct rhashtable *ht)
 error:
 	tbl = rht_dereference_rcu(ht->tbl, ht);
 	for (i = 0; i < tbl->size; i++)
-		rht_for_each_entry_safe(obj, next, tbl->buckets[i], ht, node)
+		rht_for_each_entry_safe(obj, pos, next, tbl, i, node)
 			kfree(obj);
 
 	return err;
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 614ee099ba36..d93f1f4c22a9 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -142,7 +142,9 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
 
 	tbl = rht_dereference_rcu(priv->tbl, priv);
 	for (i = 0; i < tbl->size; i++) {
-		rht_for_each_entry_rcu(he, tbl->buckets[i], node) {
+		struct rhash_head *pos;
+
+		rht_for_each_entry_rcu(he, pos, tbl, i, node) {
 			if (iter->count < iter->skip)
 				goto cont;
 
@@ -197,15 +199,13 @@ static void nft_hash_destroy(const struct nft_set *set)
 {
 	const struct rhashtable *priv = nft_set_priv(set);
 	const struct bucket_table *tbl = priv->tbl;
-	struct nft_hash_elem *he, *next;
+	struct nft_hash_elem *he;
+	struct rhash_head *pos, *next;
 	unsigned int i;
 
 	for (i = 0; i < tbl->size; i++) {
-		for (he = rht_entry(tbl->buckets[i], struct nft_hash_elem, node);
-		     he != NULL; he = next) {
-			next = rht_entry(he->node.next, struct nft_hash_elem, node);
+		rht_for_each_entry_safe(he, pos, next, tbl, i, node)
 			nft_hash_elem_destroy(set, he);
-		}
 	}
 	rhashtable_destroy(priv);
 }
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index a5d7ed627563..57449b6089c2 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2898,7 +2898,9 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
 		const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
 
 		for (j = 0; j < tbl->size; j++) {
-			rht_for_each_entry_rcu(nlk, tbl->buckets[j], node) {
+			struct rhash_head *node;
+
+			rht_for_each_entry_rcu(nlk, node, tbl, j, node) {
 				s = (struct sock *)nlk;
 
 				if (sock_net(s) != seq_file_net(seq))
@@ -2926,6 +2928,8 @@ static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
 static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct rhashtable *ht;
+	const struct bucket_table *tbl;
+	struct rhash_head *node;
 	struct netlink_sock *nlk;
 	struct nl_seq_iter *iter;
 	struct net *net;
@@ -2942,17 +2946,17 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 	i = iter->link;
 	ht = &nl_table[i].hash;
-	rht_for_each_entry(nlk, nlk->node.next, ht, node)
+	tbl = rht_dereference_rcu(ht->tbl, ht);
+	rht_for_each_entry_rcu_continue(nlk, node, nlk->node.next, tbl, iter->hash_idx, node)
 		if (net_eq(sock_net((struct sock *)nlk), net))
 			return nlk;
 
 	j = iter->hash_idx + 1;
 
 	do {
-		const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
 
 		for (; j < tbl->size; j++) {
-			rht_for_each_entry(nlk, tbl->buckets[j], ht, node) {
+			rht_for_each_entry_rcu(nlk, node, tbl, j, node) {
 				if (net_eq(sock_net((struct sock *)nlk), net)) {
 					iter->link = i;
 					iter->hash_idx = j;
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index de8c74a3c061..fcca36d81a62 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -113,7 +113,9 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
 	req = nlmsg_data(cb->nlh);
 
 	for (i = 0; i < htbl->size; i++) {
-		rht_for_each_entry(nlsk, htbl->buckets[i], ht, node) {
+		struct rhash_head *pos;
+
+		rht_for_each_entry(nlsk, pos, htbl, i, node) {
 			sk = (struct sock *)nlsk;
 
 			if (!net_eq(sock_net(sk), net))
-- 
cgit v1.2.3


From 897362e446436d245972e72c6bc5b33bd7a5c659 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Fri, 2 Jan 2015 23:00:18 +0100
Subject: nft_hash: Remove rhashtable_remove_pprev()

The removal function of nft_hash currently stores a reference to the
previous element during lookup which is used to optimize removal later
on. This was possible because a lock is held throughout calling
rhashtable_lookup() and rhashtable_remove().

With the introdution of deferred table resizing in parallel to lookups
and insertions, the nftables lock will no longer synchronize all
table mutations and the stored pprev may become invalid.

Removing this optimization makes removal slightly more expensive on
average but allows taking the resize cost out of the insert and
remove path.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Cc: netfilter-devel@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h |  2 --
 lib/rhashtable.c           | 34 +++++++---------------------------
 net/netfilter/nft_hash.c   | 11 +++--------
 3 files changed, 10 insertions(+), 37 deletions(-)

(limited to 'net')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index b54e24a08806..f624d4b5045f 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -105,8 +105,6 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params);
 
 void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node);
 bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node);
-void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj,
-			     struct rhash_head __rcu **pprev);
 
 bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size);
 bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size);
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 0bd29c178910..e6b85c4a5828 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -344,32 +344,6 @@ void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj)
 }
 EXPORT_SYMBOL_GPL(rhashtable_insert);
 
-/**
- * rhashtable_remove_pprev - remove object from hash table given previous element
- * @ht:		hash table
- * @obj:	pointer to hash head inside object
- * @pprev:	pointer to previous element
- *
- * Identical to rhashtable_remove() but caller is alreayd aware of the element
- * in front of the element to be deleted. This is in particular useful for
- * deletion when combined with walking or lookup.
- */
-void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj,
-			     struct rhash_head __rcu **pprev)
-{
-	struct bucket_table *tbl = rht_dereference(ht->tbl, ht);
-
-	ASSERT_RHT_MUTEX(ht);
-
-	RCU_INIT_POINTER(*pprev, obj->next);
-	ht->nelems--;
-
-	if (ht->p.shrink_decision &&
-	    ht->p.shrink_decision(ht, tbl->size))
-		rhashtable_shrink(ht);
-}
-EXPORT_SYMBOL_GPL(rhashtable_remove_pprev);
-
 /**
  * rhashtable_remove - remove object from hash table
  * @ht:		hash table
@@ -403,7 +377,13 @@ bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj)
 			continue;
 		}
 
-		rhashtable_remove_pprev(ht, he, pprev);
+		RCU_INIT_POINTER(*pprev, he->next);
+		ht->nelems--;
+
+		if (ht->p.shrink_decision &&
+		    ht->p.shrink_decision(ht, tbl->size))
+			rhashtable_shrink(ht);
+
 		return true;
 	}
 
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index d93f1f4c22a9..7f903cf9a1b9 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -83,15 +83,10 @@ static void nft_hash_remove(const struct nft_set *set,
 			    const struct nft_set_elem *elem)
 {
 	struct rhashtable *priv = nft_set_priv(set);
-	struct rhash_head *he, __rcu **pprev;
-
-	pprev = elem->cookie;
-	he = rht_dereference((*pprev), priv);
-
-	rhashtable_remove_pprev(priv, he, pprev);
 
+	rhashtable_remove(priv, elem->cookie);
 	synchronize_rcu();
-	kfree(he);
+	kfree(elem->cookie);
 }
 
 struct nft_compare_arg {
@@ -105,7 +100,7 @@ static bool nft_hash_compare(void *ptr, void *arg)
 	struct nft_compare_arg *x = arg;
 
 	if (!nft_data_cmp(&he->key, &x->elem->key, x->set->klen)) {
-		x->elem->cookie = &he->node;
+		x->elem->cookie = he;
 		x->elem->flags = 0;
 		if (x->set->flags & NFT_SET_MAP)
 			nft_data_copy(&x->elem->data, he->data);
-- 
cgit v1.2.3


From 97defe1ecf868b8127f8e62395499d6a06e4c4b1 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Fri, 2 Jan 2015 23:00:20 +0100
Subject: rhashtable: Per bucket locks & deferred expansion/shrinking

Introduces an array of spinlocks to protect bucket mutations. The number
of spinlocks per CPU is configurable and selected based on the hash of
the bucket. This allows for parallel insertions and removals of entries
which do not share a lock.

The patch also defers expansion and shrinking to a worker queue which
allows insertion and removal from atomic context. Insertions and
deletions may occur in parallel to it and are only held up briefly
while the particular bucket is linked or unzipped.

Mutations of the bucket table pointer is protected by a new mutex, read
access is RCU protected.

In the event of an expansion or shrinking, the new bucket table allocated
is exposed as a so called future table as soon as the resize process
starts.  Lookups, deletions, and insertions will briefly use both tables.
The future table becomes the main table after an RCU grace period and
initial linking of the old to the new table was performed. Optimization
of the chains to make use of the new number of buckets follows only the
new table is in use.

The side effect of this is that during that RCU grace period, a bucket
traversal using any rht_for_each() variant on the main table will not see
any insertions performed during the RCU grace period which would at that
point land in the future table. The lookup will see them as it searches
both tables if needed.

Having multiple insertions and removals occur in parallel requires nelems
to become an atomic counter.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h |  37 ++--
 lib/rhashtable.c           | 458 ++++++++++++++++++++++++++++++++++-----------
 net/netfilter/nft_hash.c   |  27 ++-
 net/netlink/af_netlink.c   |  15 +-
 4 files changed, 384 insertions(+), 153 deletions(-)

(limited to 'net')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index f624d4b5045f..a1688f0a6193 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -19,6 +19,7 @@
 #define _LINUX_RHASHTABLE_H
 
 #include <linux/rculist.h>
+#include <linux/workqueue.h>
 
 struct rhash_head {
 	struct rhash_head __rcu		*next;
@@ -26,8 +27,17 @@ struct rhash_head {
 
 #define INIT_HASH_HEAD(ptr) ((ptr)->next = NULL)
 
+/**
+ * struct bucket_table - Table of hash buckets
+ * @size: Number of hash buckets
+ * @locks_mask: Mask to apply before accessing locks[]
+ * @locks: Array of spinlocks protecting individual buckets
+ * @buckets: size * hash buckets
+ */
 struct bucket_table {
 	size_t				size;
+	unsigned int			locks_mask;
+	spinlock_t			*locks;
 	struct rhash_head __rcu		*buckets[];
 };
 
@@ -45,11 +55,11 @@ struct rhashtable;
  * @hash_rnd: Seed to use while hashing
  * @max_shift: Maximum number of shifts while expanding
  * @min_shift: Minimum number of shifts while shrinking
+ * @locks_mul: Number of bucket locks to allocate per cpu (default: 128)
  * @hashfn: Function to hash key
  * @obj_hashfn: Function to hash object
  * @grow_decision: If defined, may return true if table should expand
  * @shrink_decision: If defined, may return true if table should shrink
- * @mutex_is_held: Must return true if protecting mutex is held
  */
 struct rhashtable_params {
 	size_t			nelem_hint;
@@ -59,37 +69,42 @@ struct rhashtable_params {
 	u32			hash_rnd;
 	size_t			max_shift;
 	size_t			min_shift;
+	size_t			locks_mul;
 	rht_hashfn_t		hashfn;
 	rht_obj_hashfn_t	obj_hashfn;
 	bool			(*grow_decision)(const struct rhashtable *ht,
 						 size_t new_size);
 	bool			(*shrink_decision)(const struct rhashtable *ht,
 						   size_t new_size);
-#ifdef CONFIG_PROVE_LOCKING
-	int			(*mutex_is_held)(void *parent);
-	void			*parent;
-#endif
 };
 
 /**
  * struct rhashtable - Hash table handle
  * @tbl: Bucket table
+ * @future_tbl: Table under construction during expansion/shrinking
  * @nelems: Number of elements in table
  * @shift: Current size (1 << shift)
  * @p: Configuration parameters
+ * @run_work: Deferred worker to expand/shrink asynchronously
+ * @mutex: Mutex to protect current/future table swapping
+ * @being_destroyed: True if table is set up for destruction
  */
 struct rhashtable {
 	struct bucket_table __rcu	*tbl;
-	size_t				nelems;
+	struct bucket_table __rcu       *future_tbl;
+	atomic_t			nelems;
 	size_t				shift;
 	struct rhashtable_params	p;
+	struct delayed_work             run_work;
+	struct mutex                    mutex;
+	bool                            being_destroyed;
 };
 
 #ifdef CONFIG_PROVE_LOCKING
-int lockdep_rht_mutex_is_held(const struct rhashtable *ht);
+int lockdep_rht_mutex_is_held(struct rhashtable *ht);
 int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash);
 #else
-static inline int lockdep_rht_mutex_is_held(const struct rhashtable *ht)
+static inline int lockdep_rht_mutex_is_held(struct rhashtable *ht)
 {
 	return 1;
 }
@@ -112,11 +127,11 @@ bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size);
 int rhashtable_expand(struct rhashtable *ht);
 int rhashtable_shrink(struct rhashtable *ht);
 
-void *rhashtable_lookup(const struct rhashtable *ht, const void *key);
-void *rhashtable_lookup_compare(const struct rhashtable *ht, const void *key,
+void *rhashtable_lookup(struct rhashtable *ht, const void *key);
+void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key,
 				bool (*compare)(void *, void *), void *arg);
 
-void rhashtable_destroy(const struct rhashtable *ht);
+void rhashtable_destroy(struct rhashtable *ht);
 
 #define rht_dereference(p, ht) \
 	rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht))
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index e6b85c4a5828..312e3437c7bc 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -26,19 +26,42 @@
 
 #define HASH_DEFAULT_SIZE	64UL
 #define HASH_MIN_SIZE		4UL
+#define BUCKET_LOCKS_PER_CPU   128UL
+
+enum {
+	RHT_LOCK_NORMAL,
+	RHT_LOCK_NESTED,
+	RHT_LOCK_NESTED2,
+};
+
+/* The bucket lock is selected based on the hash and protects mutations
+ * on a group of hash buckets.
+ *
+ * IMPORTANT: When holding the bucket lock of both the old and new table
+ * during expansions and shrinking, the old bucket lock must always be
+ * acquired first.
+ */
+static spinlock_t *bucket_lock(const struct bucket_table *tbl, u32 hash)
+{
+	return &tbl->locks[hash & tbl->locks_mask];
+}
 
 #define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT))
+#define ASSERT_BUCKET_LOCK(TBL, HASH) \
+	BUG_ON(!lockdep_rht_bucket_is_held(TBL, HASH))
 
 #ifdef CONFIG_PROVE_LOCKING
-int lockdep_rht_mutex_is_held(const struct rhashtable *ht)
+int lockdep_rht_mutex_is_held(struct rhashtable *ht)
 {
-	return ht->p.mutex_is_held(ht->p.parent);
+	return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1;
 }
 EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
 
 int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
 {
-	return 1;
+	spinlock_t *lock = bucket_lock(tbl, hash);
+
+	return (debug_locks) ? lockdep_is_held(lock) : 1;
 }
 EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
 #endif
@@ -66,7 +89,7 @@ static u32 obj_raw_hashfn(const struct rhashtable *ht, const void *ptr)
 	return hash;
 }
 
-static u32 key_hashfn(const struct rhashtable *ht, const void *key, u32 len)
+static u32 key_hashfn(struct rhashtable *ht, const void *key, u32 len)
 {
 	struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
 	u32 hash;
@@ -95,7 +118,49 @@ static struct rhash_head __rcu **bucket_tail(struct bucket_table *tbl, u32 n)
 	return pprev;
 }
 
-static struct bucket_table *bucket_table_alloc(size_t nbuckets)
+static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl)
+{
+	unsigned int i, size;
+#if defined(CONFIG_PROVE_LOCKING)
+	unsigned int nr_pcpus = 2;
+#else
+	unsigned int nr_pcpus = num_possible_cpus();
+#endif
+
+	nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL);
+	size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
+
+	/* Never allocate more than one lock per bucket */
+	size = min_t(unsigned int, size, tbl->size);
+
+	if (sizeof(spinlock_t) != 0) {
+#ifdef CONFIG_NUMA
+		if (size * sizeof(spinlock_t) > PAGE_SIZE)
+			tbl->locks = vmalloc(size * sizeof(spinlock_t));
+		else
+#endif
+		tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
+					   GFP_KERNEL);
+		if (!tbl->locks)
+			return -ENOMEM;
+		for (i = 0; i < size; i++)
+			spin_lock_init(&tbl->locks[i]);
+	}
+	tbl->locks_mask = size - 1;
+
+	return 0;
+}
+
+static void bucket_table_free(const struct bucket_table *tbl)
+{
+	if (tbl)
+		kvfree(tbl->locks);
+
+	kvfree(tbl);
+}
+
+static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
+					       size_t nbuckets)
 {
 	struct bucket_table *tbl;
 	size_t size;
@@ -110,12 +175,12 @@ static struct bucket_table *bucket_table_alloc(size_t nbuckets)
 
 	tbl->size = nbuckets;
 
-	return tbl;
-}
+	if (alloc_bucket_locks(ht, tbl) < 0) {
+		bucket_table_free(tbl);
+		return NULL;
+	}
 
-static void bucket_table_free(const struct bucket_table *tbl)
-{
-	kvfree(tbl);
+	return tbl;
 }
 
 /**
@@ -126,7 +191,7 @@ static void bucket_table_free(const struct bucket_table *tbl)
 bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size)
 {
 	/* Expand table when exceeding 75% load */
-	return ht->nelems > (new_size / 4 * 3);
+	return atomic_read(&ht->nelems) > (new_size / 4 * 3);
 }
 EXPORT_SYMBOL_GPL(rht_grow_above_75);
 
@@ -138,41 +203,59 @@ EXPORT_SYMBOL_GPL(rht_grow_above_75);
 bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size)
 {
 	/* Shrink table beneath 30% load */
-	return ht->nelems < (new_size * 3 / 10);
+	return atomic_read(&ht->nelems) < (new_size * 3 / 10);
 }
 EXPORT_SYMBOL_GPL(rht_shrink_below_30);
 
 static void hashtable_chain_unzip(const struct rhashtable *ht,
 				  const struct bucket_table *new_tbl,
-				  struct bucket_table *old_tbl, size_t n)
+				  struct bucket_table *old_tbl,
+				  size_t old_hash)
 {
 	struct rhash_head *he, *p, *next;
-	unsigned int h;
+	spinlock_t *new_bucket_lock, *new_bucket_lock2 = NULL;
+	unsigned int new_hash, new_hash2;
+
+	ASSERT_BUCKET_LOCK(old_tbl, old_hash);
 
 	/* Old bucket empty, no work needed. */
-	p = rht_dereference(old_tbl->buckets[n], ht);
+	p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl,
+				   old_hash);
 	if (!p)
 		return;
 
+	new_hash = new_hash2 = head_hashfn(ht, new_tbl, p);
+	new_bucket_lock = bucket_lock(new_tbl, new_hash);
+
 	/* Advance the old bucket pointer one or more times until it
 	 * reaches a node that doesn't hash to the same bucket as the
 	 * previous node p. Call the previous node p;
 	 */
-	h = head_hashfn(ht, new_tbl, p);
-	rht_for_each_continue(he, p->next, old_tbl, n) {
-		if (head_hashfn(ht, new_tbl, he) != h)
+	rht_for_each_continue(he, p->next, old_tbl, old_hash) {
+		new_hash2 = head_hashfn(ht, new_tbl, he);
+		if (new_hash != new_hash2)
 			break;
 		p = he;
 	}
-	RCU_INIT_POINTER(old_tbl->buckets[n], p->next);
+	rcu_assign_pointer(old_tbl->buckets[old_hash], p->next);
+
+	spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
+
+	/* If we have encountered an entry that maps to a different bucket in
+	 * the new table, lock down that bucket as well as we might cut off
+	 * the end of the chain.
+	 */
+	new_bucket_lock2 = bucket_lock(new_tbl, new_hash);
+	if (new_bucket_lock != new_bucket_lock2)
+		spin_lock_bh_nested(new_bucket_lock2, RHT_LOCK_NESTED2);
 
 	/* Find the subsequent node which does hash to the same
 	 * bucket as node P, or NULL if no such node exists.
 	 */
 	next = NULL;
 	if (he) {
-		rht_for_each_continue(he, he->next, old_tbl, n) {
-			if (head_hashfn(ht, new_tbl, he) == h) {
+		rht_for_each_continue(he, he->next, old_tbl, old_hash) {
+			if (head_hashfn(ht, new_tbl, he) == new_hash) {
 				next = he;
 				break;
 			}
@@ -182,7 +265,23 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
 	/* Set p's next pointer to that subsequent node pointer,
 	 * bypassing the nodes which do not hash to p's bucket
 	 */
-	RCU_INIT_POINTER(p->next, next);
+	rcu_assign_pointer(p->next, next);
+
+	if (new_bucket_lock != new_bucket_lock2)
+		spin_unlock_bh(new_bucket_lock2);
+	spin_unlock_bh(new_bucket_lock);
+}
+
+static void link_old_to_new(struct bucket_table *new_tbl,
+			    unsigned int new_hash, struct rhash_head *entry)
+{
+	spinlock_t *new_bucket_lock;
+
+	new_bucket_lock = bucket_lock(new_tbl, new_hash);
+
+	spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
+	rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), entry);
+	spin_unlock_bh(new_bucket_lock);
 }
 
 /**
@@ -195,43 +294,59 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
  * This function may only be called in a context where it is safe to call
  * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
  *
- * The caller must ensure that no concurrent table mutations take place.
- * It is however valid to have concurrent lookups if they are RCU protected.
+ * The caller must ensure that no concurrent resizing occurs by holding
+ * ht->mutex.
+ *
+ * It is valid to have concurrent insertions and deletions protected by per
+ * bucket locks or concurrent RCU protected lookups and traversals.
  */
 int rhashtable_expand(struct rhashtable *ht)
 {
 	struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
 	struct rhash_head *he;
-	unsigned int i, h;
-	bool complete;
+	spinlock_t *old_bucket_lock;
+	unsigned int new_hash, old_hash;
+	bool complete = false;
 
 	ASSERT_RHT_MUTEX(ht);
 
 	if (ht->p.max_shift && ht->shift >= ht->p.max_shift)
 		return 0;
 
-	new_tbl = bucket_table_alloc(old_tbl->size * 2);
+	new_tbl = bucket_table_alloc(ht, old_tbl->size * 2);
 	if (new_tbl == NULL)
 		return -ENOMEM;
 
 	ht->shift++;
 
-	/* For each new bucket, search the corresponding old bucket
-	 * for the first entry that hashes to the new bucket, and
-	 * link the new bucket to that entry. Since all the entries
-	 * which will end up in the new bucket appear in the same
-	 * old bucket, this constructs an entirely valid new hash
-	 * table, but with multiple buckets "zipped" together into a
-	 * single imprecise chain.
+	/* Make insertions go into the new, empty table right away. Deletions
+	 * and lookups will be attempted in both tables until we synchronize.
+	 * The synchronize_rcu() guarantees for the new table to be picked up
+	 * so no new additions go into the old table while we relink.
+	 */
+	rcu_assign_pointer(ht->future_tbl, new_tbl);
+	synchronize_rcu();
+
+	/* For each new bucket, search the corresponding old bucket for the
+	 * first entry that hashes to the new bucket, and link the end of
+	 * newly formed bucket chain (containing entries added to future
+	 * table) to that entry. Since all the entries which will end up in
+	 * the new bucket appear in the same old bucket, this constructs an
+	 * entirely valid new hash table, but with multiple buckets
+	 * "zipped" together into a single imprecise chain.
 	 */
-	for (i = 0; i < new_tbl->size; i++) {
-		h = rht_bucket_index(old_tbl, i);
-		rht_for_each(he, old_tbl, h) {
-			if (head_hashfn(ht, new_tbl, he) == i) {
-				RCU_INIT_POINTER(new_tbl->buckets[i], he);
+	for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {
+		old_hash = rht_bucket_index(old_tbl, new_hash);
+		old_bucket_lock = bucket_lock(old_tbl, old_hash);
+
+		spin_lock_bh(old_bucket_lock);
+		rht_for_each(he, old_tbl, old_hash) {
+			if (head_hashfn(ht, new_tbl, he) == new_hash) {
+				link_old_to_new(new_tbl, new_hash, he);
 				break;
 			}
 		}
+		spin_unlock_bh(old_bucket_lock);
 	}
 
 	/* Publish the new table pointer. Lookups may now traverse
@@ -241,7 +356,7 @@ int rhashtable_expand(struct rhashtable *ht)
 	rcu_assign_pointer(ht->tbl, new_tbl);
 
 	/* Unzip interleaved hash chains */
-	do {
+	while (!complete && !ht->being_destroyed) {
 		/* Wait for readers. All new readers will see the new
 		 * table, and thus no references to the old table will
 		 * remain.
@@ -253,12 +368,17 @@ int rhashtable_expand(struct rhashtable *ht)
 		 * table): ...
 		 */
 		complete = true;
-		for (i = 0; i < old_tbl->size; i++) {
-			hashtable_chain_unzip(ht, new_tbl, old_tbl, i);
-			if (old_tbl->buckets[i] != NULL)
+		for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
+			old_bucket_lock = bucket_lock(old_tbl, old_hash);
+			spin_lock_bh(old_bucket_lock);
+
+			hashtable_chain_unzip(ht, new_tbl, old_tbl, old_hash);
+			if (old_tbl->buckets[old_hash] != NULL)
 				complete = false;
+
+			spin_unlock_bh(old_bucket_lock);
 		}
-	} while (!complete);
+	}
 
 	bucket_table_free(old_tbl);
 	return 0;
@@ -272,38 +392,65 @@ EXPORT_SYMBOL_GPL(rhashtable_expand);
  * This function may only be called in a context where it is safe to call
  * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
  *
+ * The caller must ensure that no concurrent resizing occurs by holding
+ * ht->mutex.
+ *
  * The caller must ensure that no concurrent table mutations take place.
  * It is however valid to have concurrent lookups if they are RCU protected.
+ *
+ * It is valid to have concurrent insertions and deletions protected by per
+ * bucket locks or concurrent RCU protected lookups and traversals.
  */
 int rhashtable_shrink(struct rhashtable *ht)
 {
-	struct bucket_table *ntbl, *tbl = rht_dereference(ht->tbl, ht);
-	unsigned int i;
+	struct bucket_table *new_tbl, *tbl = rht_dereference(ht->tbl, ht);
+	spinlock_t *new_bucket_lock, *old_bucket_lock1, *old_bucket_lock2;
+	unsigned int new_hash;
 
 	ASSERT_RHT_MUTEX(ht);
 
 	if (ht->shift <= ht->p.min_shift)
 		return 0;
 
-	ntbl = bucket_table_alloc(tbl->size / 2);
-	if (ntbl == NULL)
+	new_tbl = bucket_table_alloc(ht, tbl->size / 2);
+	if (new_tbl == NULL)
 		return -ENOMEM;
 
-	ht->shift--;
+	rcu_assign_pointer(ht->future_tbl, new_tbl);
+	synchronize_rcu();
 
-	/* Link each bucket in the new table to the first bucket
-	 * in the old table that contains entries which will hash
-	 * to the new bucket.
+	/* Link the first entry in the old bucket to the end of the
+	 * bucket in the new table. As entries are concurrently being
+	 * added to the new table, lock down the new bucket. As we
+	 * always divide the size in half when shrinking, each bucket
+	 * in the new table maps to exactly two buckets in the old
+	 * table.
+	 *
+	 * As removals can occur concurrently on the old table, we need
+	 * to lock down both matching buckets in the old table.
 	 */
-	for (i = 0; i < ntbl->size; i++) {
-		ntbl->buckets[i] = tbl->buckets[i];
-		RCU_INIT_POINTER(*bucket_tail(ntbl, i),
-				 tbl->buckets[i + ntbl->size]);
-
+	for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {
+		old_bucket_lock1 = bucket_lock(tbl, new_hash);
+		old_bucket_lock2 = bucket_lock(tbl, new_hash + new_tbl->size);
+		new_bucket_lock = bucket_lock(new_tbl, new_hash);
+
+		spin_lock_bh(old_bucket_lock1);
+		spin_lock_bh_nested(old_bucket_lock2, RHT_LOCK_NESTED);
+		spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED2);
+
+		rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),
+				   tbl->buckets[new_hash]);
+		rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),
+				   tbl->buckets[new_hash + new_tbl->size]);
+
+		spin_unlock_bh(new_bucket_lock);
+		spin_unlock_bh(old_bucket_lock2);
+		spin_unlock_bh(old_bucket_lock1);
 	}
 
 	/* Publish the new, valid hash table */
-	rcu_assign_pointer(ht->tbl, ntbl);
+	rcu_assign_pointer(ht->tbl, new_tbl);
+	ht->shift--;
 
 	/* Wait for readers. No new readers will have references to the
 	 * old hash table.
@@ -316,31 +463,63 @@ int rhashtable_shrink(struct rhashtable *ht)
 }
 EXPORT_SYMBOL_GPL(rhashtable_shrink);
 
+static void rht_deferred_worker(struct work_struct *work)
+{
+	struct rhashtable *ht;
+	struct bucket_table *tbl;
+
+	ht = container_of(work, struct rhashtable, run_work.work);
+	mutex_lock(&ht->mutex);
+	tbl = rht_dereference(ht->tbl, ht);
+
+	if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size))
+		rhashtable_expand(ht);
+	else if (ht->p.shrink_decision && ht->p.shrink_decision(ht, tbl->size))
+		rhashtable_shrink(ht);
+
+	mutex_unlock(&ht->mutex);
+}
+
 /**
  * rhashtable_insert - insert object into hash hash table
  * @ht:		hash table
  * @obj:	pointer to hash head inside object
  *
- * Will automatically grow the table via rhashtable_expand() if the the
- * grow_decision function specified at rhashtable_init() returns true.
+ * Will take a per bucket spinlock to protect against mutual mutations
+ * on the same bucket. Multiple insertions may occur in parallel unless
+ * they map to the same bucket lock.
  *
- * The caller must ensure that no concurrent table mutations occur. It is
- * however valid to have concurrent lookups if they are RCU protected.
+ * It is safe to call this function from atomic context.
+ *
+ * Will trigger an automatic deferred table resizing if the size grows
+ * beyond the watermark indicated by grow_decision() which can be passed
+ * to rhashtable_init().
  */
 void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj)
 {
-	struct bucket_table *tbl = rht_dereference(ht->tbl, ht);
-	u32 hash;
+	struct bucket_table *tbl;
+	spinlock_t *lock;
+	unsigned hash;
 
-	ASSERT_RHT_MUTEX(ht);
+	rcu_read_lock();
 
+	tbl = rht_dereference_rcu(ht->future_tbl, ht);
 	hash = head_hashfn(ht, tbl, obj);
+	lock = bucket_lock(tbl, hash);
+
+	spin_lock_bh(lock);
 	RCU_INIT_POINTER(obj->next, tbl->buckets[hash]);
 	rcu_assign_pointer(tbl->buckets[hash], obj);
-	ht->nelems++;
+	spin_unlock_bh(lock);
 
-	if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size))
-		rhashtable_expand(ht);
+	atomic_inc(&ht->nelems);
+
+	/* Only grow the table if no resizing is currently in progress. */
+	if (ht->tbl != ht->future_tbl &&
+	    ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size))
+		schedule_delayed_work(&ht->run_work, 0);
+
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(rhashtable_insert);
 
@@ -361,32 +540,56 @@ EXPORT_SYMBOL_GPL(rhashtable_insert);
  */
 bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj)
 {
-	struct bucket_table *tbl = rht_dereference(ht->tbl, ht);
+	struct bucket_table *tbl;
 	struct rhash_head __rcu **pprev;
 	struct rhash_head *he;
-	u32 h;
+	spinlock_t *lock;
+	unsigned int hash;
 
-	ASSERT_RHT_MUTEX(ht);
+	rcu_read_lock();
+	tbl = rht_dereference_rcu(ht->tbl, ht);
+	hash = head_hashfn(ht, tbl, obj);
 
-	h = head_hashfn(ht, tbl, obj);
+	lock = bucket_lock(tbl, hash);
+	spin_lock_bh(lock);
 
-	pprev = &tbl->buckets[h];
-	rht_for_each(he, tbl, h) {
+restart:
+	pprev = &tbl->buckets[hash];
+	rht_for_each(he, tbl, hash) {
 		if (he != obj) {
 			pprev = &he->next;
 			continue;
 		}
 
-		RCU_INIT_POINTER(*pprev, he->next);
-		ht->nelems--;
+		rcu_assign_pointer(*pprev, obj->next);
+		atomic_dec(&ht->nelems);
 
-		if (ht->p.shrink_decision &&
+		spin_unlock_bh(lock);
+
+		if (ht->tbl != ht->future_tbl &&
+		    ht->p.shrink_decision &&
 		    ht->p.shrink_decision(ht, tbl->size))
-			rhashtable_shrink(ht);
+			schedule_delayed_work(&ht->run_work, 0);
+
+		rcu_read_unlock();
 
 		return true;
 	}
 
+	if (tbl != rht_dereference_rcu(ht->tbl, ht)) {
+		spin_unlock_bh(lock);
+
+		tbl = rht_dereference_rcu(ht->tbl, ht);
+		hash = head_hashfn(ht, tbl, obj);
+
+		lock = bucket_lock(tbl, hash);
+		spin_lock_bh(lock);
+		goto restart;
+	}
+
+	spin_unlock_bh(lock);
+	rcu_read_unlock();
+
 	return false;
 }
 EXPORT_SYMBOL_GPL(rhashtable_remove);
@@ -402,25 +605,35 @@ EXPORT_SYMBOL_GPL(rhashtable_remove);
  * This lookup function may only be used for fixed key hash table (key_len
  * paramter set). It will BUG() if used inappropriately.
  *
- * Lookups may occur in parallel with hash mutations as long as the lookup is
- * guarded by rcu_read_lock(). The caller must take care of this.
+ * Lookups may occur in parallel with hashtable mutations and resizing.
  */
-void *rhashtable_lookup(const struct rhashtable *ht, const void *key)
+void *rhashtable_lookup(struct rhashtable *ht, const void *key)
 {
-	const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
+	const struct bucket_table *tbl, *old_tbl;
 	struct rhash_head *he;
-	u32 h;
+	u32 hash;
 
 	BUG_ON(!ht->p.key_len);
 
-	h = key_hashfn(ht, key, ht->p.key_len);
-	rht_for_each_rcu(he, tbl, h) {
+	rcu_read_lock();
+	old_tbl = rht_dereference_rcu(ht->tbl, ht);
+	tbl = rht_dereference_rcu(ht->future_tbl, ht);
+	hash = key_hashfn(ht, key, ht->p.key_len);
+restart:
+	rht_for_each_rcu(he, tbl, rht_bucket_index(tbl, hash)) {
 		if (memcmp(rht_obj(ht, he) + ht->p.key_offset, key,
 			   ht->p.key_len))
 			continue;
+		rcu_read_unlock();
 		return rht_obj(ht, he);
 	}
 
+	if (unlikely(tbl != old_tbl)) {
+		tbl = old_tbl;
+		goto restart;
+	}
+
+	rcu_read_unlock();
 	return NULL;
 }
 EXPORT_SYMBOL_GPL(rhashtable_lookup);
@@ -435,25 +648,36 @@ EXPORT_SYMBOL_GPL(rhashtable_lookup);
  * Traverses the bucket chain behind the provided hash value and calls the
  * specified compare function for each entry.
  *
- * Lookups may occur in parallel with hash mutations as long as the lookup is
- * guarded by rcu_read_lock(). The caller must take care of this.
+ * Lookups may occur in parallel with hashtable mutations and resizing.
  *
  * Returns the first entry on which the compare function returned true.
  */
-void *rhashtable_lookup_compare(const struct rhashtable *ht, const void *key,
+void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key,
 				bool (*compare)(void *, void *), void *arg)
 {
-	const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
+	const struct bucket_table *tbl, *old_tbl;
 	struct rhash_head *he;
 	u32 hash;
 
+	rcu_read_lock();
+
+	old_tbl = rht_dereference_rcu(ht->tbl, ht);
+	tbl = rht_dereference_rcu(ht->future_tbl, ht);
 	hash = key_hashfn(ht, key, ht->p.key_len);
-	rht_for_each_rcu(he, tbl, hash) {
+restart:
+	rht_for_each_rcu(he, tbl, rht_bucket_index(tbl, hash)) {
 		if (!compare(rht_obj(ht, he), arg))
 			continue;
+		rcu_read_unlock();
 		return rht_obj(ht, he);
 	}
 
+	if (unlikely(tbl != old_tbl)) {
+		tbl = old_tbl;
+		goto restart;
+	}
+	rcu_read_unlock();
+
 	return NULL;
 }
 EXPORT_SYMBOL_GPL(rhashtable_lookup_compare);
@@ -485,9 +709,6 @@ static size_t rounded_hashtable_size(struct rhashtable_params *params)
  *	.key_offset = offsetof(struct test_obj, key),
  *	.key_len = sizeof(int),
  *	.hashfn = jhash,
- * #ifdef CONFIG_PROVE_LOCKING
- *	.mutex_is_held = &my_mutex_is_held,
- * #endif
  * };
  *
  * Configuration Example 2: Variable length keys
@@ -507,9 +728,6 @@ static size_t rounded_hashtable_size(struct rhashtable_params *params)
  *	.head_offset = offsetof(struct test_obj, node),
  *	.hashfn = jhash,
  *	.obj_hashfn = my_hash_fn,
- * #ifdef CONFIG_PROVE_LOCKING
- *	.mutex_is_held = &my_mutex_is_held,
- * #endif
  * };
  */
 int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
@@ -529,18 +747,29 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
 	if (params->nelem_hint)
 		size = rounded_hashtable_size(params);
 
-	tbl = bucket_table_alloc(size);
+	memset(ht, 0, sizeof(*ht));
+	mutex_init(&ht->mutex);
+	memcpy(&ht->p, params, sizeof(*params));
+
+	if (params->locks_mul)
+		ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
+	else
+		ht->p.locks_mul = BUCKET_LOCKS_PER_CPU;
+
+	tbl = bucket_table_alloc(ht, size);
 	if (tbl == NULL)
 		return -ENOMEM;
 
-	memset(ht, 0, sizeof(*ht));
 	ht->shift = ilog2(tbl->size);
-	memcpy(&ht->p, params, sizeof(*params));
 	RCU_INIT_POINTER(ht->tbl, tbl);
+	RCU_INIT_POINTER(ht->future_tbl, tbl);
 
 	if (!ht->p.hash_rnd)
 		get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd));
 
+	if (ht->p.grow_decision || ht->p.shrink_decision)
+		INIT_DEFERRABLE_WORK(&ht->run_work, rht_deferred_worker);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(rhashtable_init);
@@ -553,9 +782,16 @@ EXPORT_SYMBOL_GPL(rhashtable_init);
  * has to make sure that no resizing may happen by unpublishing the hashtable
  * and waiting for the quiescent cycle before releasing the bucket array.
  */
-void rhashtable_destroy(const struct rhashtable *ht)
+void rhashtable_destroy(struct rhashtable *ht)
 {
-	bucket_table_free(ht->tbl);
+	ht->being_destroyed = true;
+
+	mutex_lock(&ht->mutex);
+
+	cancel_delayed_work(&ht->run_work);
+	bucket_table_free(rht_dereference(ht->tbl, ht));
+
+	mutex_unlock(&ht->mutex);
 }
 EXPORT_SYMBOL_GPL(rhashtable_destroy);
 
@@ -570,13 +806,6 @@ EXPORT_SYMBOL_GPL(rhashtable_destroy);
 #define TEST_PTR	((void *) 0xdeadbeef)
 #define TEST_NEXPANDS	4
 
-#ifdef CONFIG_PROVE_LOCKING
-static int test_mutex_is_held(void *parent)
-{
-	return 1;
-}
-#endif
-
 struct test_obj {
 	void			*ptr;
 	int			value;
@@ -646,10 +875,10 @@ static void test_bucket_stats(struct rhashtable *ht, bool quiet)
 				i, tbl->buckets[i], cnt);
 	}
 
-	pr_info("  Traversal complete: counted=%u, nelems=%zu, entries=%d\n",
-		total, ht->nelems, TEST_ENTRIES);
+	pr_info("  Traversal complete: counted=%u, nelems=%u, entries=%d\n",
+		total, atomic_read(&ht->nelems), TEST_ENTRIES);
 
-	if (total != ht->nelems || total != TEST_ENTRIES)
+	if (total != atomic_read(&ht->nelems) || total != TEST_ENTRIES)
 		pr_warn("Test failed: Total count mismatch ^^^");
 }
 
@@ -688,7 +917,9 @@ static int __init test_rhashtable(struct rhashtable *ht)
 
 	for (i = 0; i < TEST_NEXPANDS; i++) {
 		pr_info("  Table expansion iteration %u...\n", i);
+		mutex_lock(&ht->mutex);
 		rhashtable_expand(ht);
+		mutex_unlock(&ht->mutex);
 
 		rcu_read_lock();
 		pr_info("  Verifying lookups...\n");
@@ -698,7 +929,9 @@ static int __init test_rhashtable(struct rhashtable *ht)
 
 	for (i = 0; i < TEST_NEXPANDS; i++) {
 		pr_info("  Table shrinkage iteration %u...\n", i);
+		mutex_lock(&ht->mutex);
 		rhashtable_shrink(ht);
+		mutex_unlock(&ht->mutex);
 
 		rcu_read_lock();
 		pr_info("  Verifying lookups...\n");
@@ -741,9 +974,6 @@ static int __init test_rht_init(void)
 		.key_offset = offsetof(struct test_obj, value),
 		.key_len = sizeof(int),
 		.hashfn = jhash,
-#ifdef CONFIG_PROVE_LOCKING
-		.mutex_is_held = &test_mutex_is_held,
-#endif
 		.grow_decision = rht_grow_above_75,
 		.shrink_decision = rht_shrink_below_30,
 	};
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 7f903cf9a1b9..75887d7d2c6a 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -33,7 +33,7 @@ static bool nft_hash_lookup(const struct nft_set *set,
 			    const struct nft_data *key,
 			    struct nft_data *data)
 {
-	const struct rhashtable *priv = nft_set_priv(set);
+	struct rhashtable *priv = nft_set_priv(set);
 	const struct nft_hash_elem *he;
 
 	he = rhashtable_lookup(priv, key);
@@ -113,7 +113,7 @@ static bool nft_hash_compare(void *ptr, void *arg)
 
 static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
 {
-	const struct rhashtable *priv = nft_set_priv(set);
+	struct rhashtable *priv = nft_set_priv(set);
 	struct nft_compare_arg arg = {
 		.set = set,
 		.elem = elem,
@@ -129,7 +129,7 @@ static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
 static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
 			  struct nft_set_iter *iter)
 {
-	const struct rhashtable *priv = nft_set_priv(set);
+	struct rhashtable *priv = nft_set_priv(set);
 	const struct bucket_table *tbl;
 	const struct nft_hash_elem *he;
 	struct nft_set_elem elem;
@@ -162,13 +162,6 @@ static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
 	return sizeof(struct rhashtable);
 }
 
-#ifdef CONFIG_PROVE_LOCKING
-static int lockdep_nfnl_lock_is_held(void *parent)
-{
-	return lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES);
-}
-#endif
-
 static int nft_hash_init(const struct nft_set *set,
 			 const struct nft_set_desc *desc,
 			 const struct nlattr * const tb[])
@@ -182,9 +175,6 @@ static int nft_hash_init(const struct nft_set *set,
 		.hashfn = jhash,
 		.grow_decision = rht_grow_above_75,
 		.shrink_decision = rht_shrink_below_30,
-#ifdef CONFIG_PROVE_LOCKING
-		.mutex_is_held = lockdep_nfnl_lock_is_held,
-#endif
 	};
 
 	return rhashtable_init(priv, &params);
@@ -192,16 +182,23 @@ static int nft_hash_init(const struct nft_set *set,
 
 static void nft_hash_destroy(const struct nft_set *set)
 {
-	const struct rhashtable *priv = nft_set_priv(set);
-	const struct bucket_table *tbl = priv->tbl;
+	struct rhashtable *priv = nft_set_priv(set);
+	const struct bucket_table *tbl;
 	struct nft_hash_elem *he;
 	struct rhash_head *pos, *next;
 	unsigned int i;
 
+	/* Stop an eventual async resizing */
+	priv->being_destroyed = true;
+	mutex_lock(&priv->mutex);
+
+	tbl = rht_dereference(priv->tbl, priv);
 	for (i = 0; i < tbl->size; i++) {
 		rht_for_each_entry_safe(he, pos, next, tbl, i, node)
 			nft_hash_elem_destroy(set, he);
 	}
+	mutex_unlock(&priv->mutex);
+
 	rhashtable_destroy(priv);
 }
 
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 57449b6089c2..738c3bfaa564 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -114,15 +114,6 @@ static atomic_t nl_table_users = ATOMIC_INIT(0);
 DEFINE_MUTEX(nl_sk_hash_lock);
 EXPORT_SYMBOL_GPL(nl_sk_hash_lock);
 
-#ifdef CONFIG_PROVE_LOCKING
-static int lockdep_nl_sk_hash_is_held(void *parent)
-{
-	if (debug_locks)
-		return lockdep_is_held(&nl_sk_hash_lock) || lockdep_is_held(&nl_table_lock);
-	return 1;
-}
-#endif
-
 static ATOMIC_NOTIFIER_HEAD(netlink_chain);
 
 static DEFINE_SPINLOCK(netlink_tap_lock);
@@ -1063,7 +1054,8 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
 		goto err;
 
 	err = -ENOMEM;
-	if (BITS_PER_LONG > 32 && unlikely(table->hash.nelems >= UINT_MAX))
+	if (BITS_PER_LONG > 32 &&
+	    unlikely(atomic_read(&table->hash.nelems) >= UINT_MAX))
 		goto err;
 
 	nlk_sk(sk)->portid = portid;
@@ -3122,9 +3114,6 @@ static int __init netlink_proto_init(void)
 		.max_shift = 16, /* 64K */
 		.grow_decision = rht_grow_above_75,
 		.shrink_decision = rht_shrink_below_30,
-#ifdef CONFIG_PROVE_LOCKING
-		.mutex_is_held = lockdep_nl_sk_hash_is_held,
-#endif
 	};
 
 	if (err != 0)
-- 
cgit v1.2.3


From 21e4902aea80ef35afc00ee8d2abdea4f519b7f7 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Fri, 2 Jan 2015 23:00:22 +0100
Subject: netlink: Lockless lookup with RCU grace period in socket release

Defers the release of the socket reference using call_rcu() to
allow using an RCU read-side protected call to rhashtable_lookup()

This restores behaviour and performance gains as previously
introduced by e341694 ("netlink: Convert netlink_lookup() to use
RCU protected hash table") without the side effect of severely
delayed socket destruction.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 32 ++++++++++++++++----------------
 net/netlink/af_netlink.h |  1 +
 2 files changed, 17 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 738c3bfaa564..298e1df7132a 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -97,12 +97,12 @@ static int netlink_dump(struct sock *sk);
 static void netlink_skb_destructor(struct sk_buff *skb);
 
 /* nl_table locking explained:
- * Lookup and traversal are protected with nl_sk_hash_lock or nl_table_lock
- * combined with an RCU read-side lock. Insertion and removal are protected
- * with nl_sk_hash_lock while using RCU list modification primitives and may
- * run in parallel to nl_table_lock protected lookups. Destruction of the
- * Netlink socket may only occur *after* nl_table_lock has been acquired
- * either during or after the socket has been removed from the list.
+ * Lookup and traversal are protected with an RCU read-side lock. Insertion
+ * and removal are protected with nl_sk_hash_lock while using RCU list
+ * modification primitives and may run in parallel to RCU protected lookups.
+ * Destruction of the Netlink socket may only occur *after* nl_table_lock has
+ * been acquired * either during or after the socket has been removed from
+ * the list and after an RCU grace period.
  */
 DEFINE_RWLOCK(nl_table_lock);
 EXPORT_SYMBOL_GPL(nl_table_lock);
@@ -1003,13 +1003,11 @@ static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
 	struct netlink_table *table = &nl_table[protocol];
 	struct sock *sk;
 
-	read_lock(&nl_table_lock);
 	rcu_read_lock();
 	sk = __netlink_lookup(table, portid, net);
 	if (sk)
 		sock_hold(sk);
 	rcu_read_unlock();
-	read_unlock(&nl_table_lock);
 
 	return sk;
 }
@@ -1183,6 +1181,13 @@ out_module:
 	goto out;
 }
 
+static void deferred_put_nlk_sk(struct rcu_head *head)
+{
+	struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu);
+
+	sock_put(&nlk->sk);
+}
+
 static int netlink_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
@@ -1248,7 +1253,7 @@ static int netlink_release(struct socket *sock)
 	local_bh_disable();
 	sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
 	local_bh_enable();
-	sock_put(sk);
+	call_rcu(&nlk->rcu, deferred_put_nlk_sk);
 	return 0;
 }
 
@@ -1263,7 +1268,6 @@ static int netlink_autobind(struct socket *sock)
 
 retry:
 	cond_resched();
-	netlink_table_grab();
 	rcu_read_lock();
 	if (__netlink_lookup(table, portid, net)) {
 		/* Bind collision, search negative portid values. */
@@ -1271,11 +1275,9 @@ retry:
 		if (rover > -4097)
 			rover = -4097;
 		rcu_read_unlock();
-		netlink_table_ungrab();
 		goto retry;
 	}
 	rcu_read_unlock();
-	netlink_table_ungrab();
 
 	err = netlink_insert(sk, net, portid);
 	if (err == -EADDRINUSE)
@@ -2910,9 +2912,8 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
 }
 
 static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(nl_table_lock) __acquires(RCU)
+	__acquires(RCU)
 {
-	read_lock(&nl_table_lock);
 	rcu_read_lock();
 	return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
 }
@@ -2964,10 +2965,9 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void netlink_seq_stop(struct seq_file *seq, void *v)
-	__releases(RCU) __releases(nl_table_lock)
+	__releases(RCU)
 {
 	rcu_read_unlock();
-	read_unlock(&nl_table_lock);
 }
 
 
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index f123a88496f8..fd96fa76202a 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -50,6 +50,7 @@ struct netlink_sock {
 #endif /* CONFIG_NETLINK_MMAP */
 
 	struct rhash_head	node;
+	struct rcu_head		rcu;
 };
 
 static inline struct netlink_sock *nlk_sk(struct sock *sk)
-- 
cgit v1.2.3


From 04422da990b33bf814944a3dd5d49c920eae4f7b Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Fri, 2 Jan 2015 23:35:18 -0800
Subject: Bluetooth: Remove dead code for manufacturer inquiry mode quirks

There are some old Bluetooth modules from Silicon Wave and Broadcom
which support Inquiry Result with RSSI, but do not advertise it. The
core has quirks in the code to enable that inquiry mode. However as
it stands right now, that code is not even executed since entering
the function to determine which inquiry mode requires that the device
has the feature bit for Inquiry Result with RSSI set in the first
place. So this makes this dead code that hasn't work for a long
time.

In conclusion, just remove these extra quirks and simplify the setup
of the inquiry mode to be inline and with that a lot easier to read
and understand.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_core.c | 50 +++++++++++-------------------------------------
 1 file changed, 11 insertions(+), 39 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 6c12110b75a7..ebac859e1258 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -497,43 +497,6 @@ static void le_setup(struct hci_request *req)
 		set_bit(HCI_LE_ENABLED, &hdev->dev_flags);
 }
 
-static u8 hci_get_inquiry_mode(struct hci_dev *hdev)
-{
-	if (lmp_ext_inq_capable(hdev))
-		return 0x02;
-
-	if (lmp_inq_rssi_capable(hdev))
-		return 0x01;
-
-	if (hdev->manufacturer == 11 && hdev->hci_rev == 0x00 &&
-	    hdev->lmp_subver == 0x0757)
-		return 0x01;
-
-	if (hdev->manufacturer == 15) {
-		if (hdev->hci_rev == 0x03 && hdev->lmp_subver == 0x6963)
-			return 0x01;
-		if (hdev->hci_rev == 0x09 && hdev->lmp_subver == 0x6963)
-			return 0x01;
-		if (hdev->hci_rev == 0x00 && hdev->lmp_subver == 0x6965)
-			return 0x01;
-	}
-
-	if (hdev->manufacturer == 31 && hdev->hci_rev == 0x2005 &&
-	    hdev->lmp_subver == 0x1805)
-		return 0x01;
-
-	return 0x00;
-}
-
-static void hci_setup_inquiry_mode(struct hci_request *req)
-{
-	u8 mode;
-
-	mode = hci_get_inquiry_mode(req->hdev);
-
-	hci_req_add(req, HCI_OP_WRITE_INQUIRY_MODE, 1, &mode);
-}
-
 static void hci_setup_event_mask(struct hci_request *req)
 {
 	struct hci_dev *hdev = req->hdev;
@@ -658,8 +621,17 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt)
 		}
 	}
 
-	if (lmp_inq_rssi_capable(hdev))
-		hci_setup_inquiry_mode(req);
+	if (lmp_inq_rssi_capable(hdev)) {
+		u8 mode;
+
+		/* If Extended Inquiry Result events are supported, then
+		 * they are clearly preferred over Inquiry Result with RSSI
+		 * events.
+		 */
+		mode = lmp_ext_inq_capable(hdev) ? 0x02 : 0x01;
+
+		hci_req_add(req, HCI_OP_WRITE_INQUIRY_MODE, 1, &mode);
+	}
 
 	if (lmp_inq_tx_pwr_capable(hdev))
 		hci_req_add(req, HCI_OP_READ_INQ_RSP_TX_POWER, 0, NULL);
-- 
cgit v1.2.3


From 043ec9bf7b9d7cdce84d2e8d3df9b9eb520d929e Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Fri, 2 Jan 2015 23:35:19 -0800
Subject: Bluetooth: Introduce HCI_QUIRK_FIXUP_INQUIRY_MODE option

The HCI_QUIRK_FIXUP_INQUIRY_MODE option allows to force Inquiry Result
with RSSI setting on controllers that do not indicate support for it,
but where it is known to be fully functional.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci.h | 12 ++++++++++++
 net/bluetooth/hci_core.c    |  3 ++-
 2 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index aee16bf5d34f..d0bca316b43b 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -102,6 +102,18 @@ enum {
 	 */
 	HCI_QUIRK_FIXUP_BUFFER_SIZE,
 
+	/* When this quirk is set, then a controller that does not
+	 * indicate support for Inquiry Result with RSSI is assumed to
+	 * support it anyway. Some early Bluetooth 1.2 controllers had
+	 * wrongly configured local features that will require forcing
+	 * them to enable this mode. Getting RSSI information with the
+	 * inquiry responses is preferred since it allows for a better
+	 * user expierence.
+	 *
+	 * This quirk must be set before hci_register_dev is called.
+	 */
+	HCI_QUIRK_FIXUP_INQUIRY_MODE,
+
 	/* When this quirk is set, then the HCI Read Local Supported
 	 * Commands command is not supported. In general Bluetooth 1.2
 	 * and later controllers should support this command. However
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index ebac859e1258..bc5486ea5411 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -621,7 +621,8 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt)
 		}
 	}
 
-	if (lmp_inq_rssi_capable(hdev)) {
+	if (lmp_inq_rssi_capable(hdev) ||
+	    test_bit(HCI_QUIRK_FIXUP_INQUIRY_MODE, &hdev->quirks)) {
 		u8 mode;
 
 		/* If Extended Inquiry Result events are supported, then
-- 
cgit v1.2.3


From 61f3cade763dca46127146a52d829e30b8f48921 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Fri, 2 Jan 2015 18:26:02 -0800
Subject: geneve: Remove workqueue.

The work queue is used only to free the UDP socket upon destruction.
This is not necessary with Geneve and generally makes the code more
difficult to reason about. It also introduces nondeterministic
behavior such as when a socket is rapidly deleted and recreated, which
could fail as the the deletion happens asynchronously.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/geneve.h |  1 -
 net/ipv4/geneve.c    | 21 ++-------------------
 2 files changed, 2 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/include/net/geneve.h b/include/net/geneve.h
index 112132cf8e2e..56c7e1ac216a 100644
--- a/include/net/geneve.h
+++ b/include/net/geneve.h
@@ -71,7 +71,6 @@ struct geneve_sock {
 	struct hlist_node	hlist;
 	geneve_rcv_t		*rcv;
 	void			*rcv_data;
-	struct work_struct	del_work;
 	struct socket		*sock;
 	struct rcu_head		rcu;
 	atomic_t		refcnt;
diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index 19e256e1dd92..136a829e8746 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -61,8 +61,6 @@ struct geneve_net {
 
 static int geneve_net_id;
 
-static struct workqueue_struct *geneve_wq;
-
 static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
 {
 	return (struct genevehdr *)(udp_hdr(skb) + 1);
@@ -307,15 +305,6 @@ error:
 	return 1;
 }
 
-static void geneve_del_work(struct work_struct *work)
-{
-	struct geneve_sock *gs = container_of(work, struct geneve_sock,
-					      del_work);
-
-	udp_tunnel_sock_release(gs->sock);
-	kfree_rcu(gs, rcu);
-}
-
 static struct socket *geneve_create_sock(struct net *net, bool ipv6,
 					 __be16 port)
 {
@@ -356,8 +345,6 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
 	if (!gs)
 		return ERR_PTR(-ENOMEM);
 
-	INIT_WORK(&gs->del_work, geneve_del_work);
-
 	sock = geneve_create_sock(net, ipv6, port);
 	if (IS_ERR(sock)) {
 		kfree(gs);
@@ -430,7 +417,8 @@ void geneve_sock_release(struct geneve_sock *gs)
 	geneve_notify_del_rx_port(gs);
 	spin_unlock(&gn->sock_lock);
 
-	queue_work(geneve_wq, &gs->del_work);
+	udp_tunnel_sock_release(gs->sock);
+	kfree_rcu(gs, rcu);
 }
 EXPORT_SYMBOL_GPL(geneve_sock_release);
 
@@ -458,10 +446,6 @@ static int __init geneve_init_module(void)
 {
 	int rc;
 
-	geneve_wq = alloc_workqueue("geneve", 0, 0);
-	if (!geneve_wq)
-		return -ENOMEM;
-
 	rc = register_pernet_subsys(&geneve_net_ops);
 	if (rc)
 		return rc;
@@ -474,7 +458,6 @@ late_initcall(geneve_init_module);
 
 static void __exit geneve_cleanup_module(void)
 {
-	destroy_workqueue(geneve_wq);
 	unregister_pernet_subsys(&geneve_net_ops);
 }
 module_exit(geneve_cleanup_module);
-- 
cgit v1.2.3


From 829a3ada9cc7d4c30fa61f8033403fb6c8f8092a Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Fri, 2 Jan 2015 18:26:03 -0800
Subject: geneve: Simplify locking.

The existing Geneve locking scheme was pulled over directly from
VXLAN. However, VXLAN has a number of built in mechanisms which make
the locking more complex and are unlikely to be necessary with Geneve.
This simplifies the locking to use a basic scheme of a mutex
when doing updates plus RCU on receive.

In addition to making the code easier to read, this also avoids the
possibility of a race when creating or destroying sockets since
UDP sockets and the list of Geneve sockets are protected by different
locks. After this change, the entire operation is atomic.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/geneve.h |  2 +-
 net/ipv4/geneve.c    | 59 +++++++++++++++++++++++-----------------------------
 2 files changed, 27 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/include/net/geneve.h b/include/net/geneve.h
index 56c7e1ac216a..b40f4affc4cb 100644
--- a/include/net/geneve.h
+++ b/include/net/geneve.h
@@ -73,7 +73,7 @@ struct geneve_sock {
 	void			*rcv_data;
 	struct socket		*sock;
 	struct rcu_head		rcu;
-	atomic_t		refcnt;
+	int			refcnt;
 	struct udp_offload	udp_offloads;
 };
 
diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index 136a829e8746..ad8dbae11d01 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -17,7 +17,7 @@
 #include <linux/errno.h>
 #include <linux/slab.h>
 #include <linux/skbuff.h>
-#include <linux/rculist.h>
+#include <linux/list.h>
 #include <linux/netdevice.h>
 #include <linux/in.h>
 #include <linux/ip.h>
@@ -28,6 +28,7 @@
 #include <linux/if_vlan.h>
 #include <linux/hash.h>
 #include <linux/ethtool.h>
+#include <linux/mutex.h>
 #include <net/arp.h>
 #include <net/ndisc.h>
 #include <net/ip.h>
@@ -50,13 +51,15 @@
 #include <net/ip6_checksum.h>
 #endif
 
+/* Protects sock_list and refcounts. */
+static DEFINE_MUTEX(geneve_mutex);
+
 #define PORT_HASH_BITS 8
 #define PORT_HASH_SIZE (1<<PORT_HASH_BITS)
 
 /* per-network namespace private data for this module */
 struct geneve_net {
 	struct hlist_head	sock_list[PORT_HASH_SIZE];
-	spinlock_t		sock_lock;   /* Protects sock_list */
 };
 
 static int geneve_net_id;
@@ -78,7 +81,7 @@ static struct geneve_sock *geneve_find_sock(struct net *net, __be16 port)
 {
 	struct geneve_sock *gs;
 
-	hlist_for_each_entry_rcu(gs, gs_head(net, port), hlist) {
+	hlist_for_each_entry(gs, gs_head(net, port), hlist) {
 		if (inet_sk(gs->sock->sk)->inet_sport == port)
 			return gs;
 	}
@@ -336,7 +339,6 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
 						geneve_rcv_t *rcv, void *data,
 						bool ipv6)
 {
-	struct geneve_net *gn = net_generic(net, geneve_net_id);
 	struct geneve_sock *gs;
 	struct socket *sock;
 	struct udp_tunnel_sock_cfg tunnel_cfg;
@@ -352,7 +354,7 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
 	}
 
 	gs->sock = sock;
-	atomic_set(&gs->refcnt, 1);
+	gs->refcnt = 1;
 	gs->rcv = rcv;
 	gs->rcv_data = data;
 
@@ -360,11 +362,7 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
 	gs->udp_offloads.port = port;
 	gs->udp_offloads.callbacks.gro_receive  = geneve_gro_receive;
 	gs->udp_offloads.callbacks.gro_complete = geneve_gro_complete;
-
-	spin_lock(&gn->sock_lock);
-	hlist_add_head_rcu(&gs->hlist, gs_head(net, port));
 	geneve_notify_add_rx_port(gs);
-	spin_unlock(&gn->sock_lock);
 
 	/* Mark socket as an encapsulation socket */
 	tunnel_cfg.sk_user_data = gs;
@@ -373,6 +371,8 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
 	tunnel_cfg.encap_destroy = NULL;
 	setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
 
+	hlist_add_head(&gs->hlist, gs_head(net, port));
+
 	return gs;
 }
 
@@ -380,25 +380,21 @@ struct geneve_sock *geneve_sock_add(struct net *net, __be16 port,
 				    geneve_rcv_t *rcv, void *data,
 				    bool no_share, bool ipv6)
 {
-	struct geneve_net *gn = net_generic(net, geneve_net_id);
 	struct geneve_sock *gs;
 
-	gs = geneve_socket_create(net, port, rcv, data, ipv6);
-	if (!IS_ERR(gs))
-		return gs;
-
-	if (no_share)	/* Return error if sharing is not allowed. */
-		return ERR_PTR(-EINVAL);
+	mutex_lock(&geneve_mutex);
 
-	spin_lock(&gn->sock_lock);
 	gs = geneve_find_sock(net, port);
-	if (gs && ((gs->rcv != rcv) ||
-		   !atomic_add_unless(&gs->refcnt, 1, 0)))
+	if (gs) {
+		if (!no_share && gs->rcv == rcv)
+			gs->refcnt++;
+		else
 			gs = ERR_PTR(-EBUSY);
-	spin_unlock(&gn->sock_lock);
+	} else {
+		gs = geneve_socket_create(net, port, rcv, data, ipv6);
+	}
 
-	if (!gs)
-		gs = ERR_PTR(-EINVAL);
+	mutex_unlock(&geneve_mutex);
 
 	return gs;
 }
@@ -406,19 +402,18 @@ EXPORT_SYMBOL_GPL(geneve_sock_add);
 
 void geneve_sock_release(struct geneve_sock *gs)
 {
-	struct net *net = sock_net(gs->sock->sk);
-	struct geneve_net *gn = net_generic(net, geneve_net_id);
+	mutex_lock(&geneve_mutex);
 
-	if (!atomic_dec_and_test(&gs->refcnt))
-		return;
+	if (--gs->refcnt)
+		goto unlock;
 
-	spin_lock(&gn->sock_lock);
-	hlist_del_rcu(&gs->hlist);
+	hlist_del(&gs->hlist);
 	geneve_notify_del_rx_port(gs);
-	spin_unlock(&gn->sock_lock);
-
 	udp_tunnel_sock_release(gs->sock);
 	kfree_rcu(gs, rcu);
+
+unlock:
+	mutex_unlock(&geneve_mutex);
 }
 EXPORT_SYMBOL_GPL(geneve_sock_release);
 
@@ -427,8 +422,6 @@ static __net_init int geneve_init_net(struct net *net)
 	struct geneve_net *gn = net_generic(net, geneve_net_id);
 	unsigned int h;
 
-	spin_lock_init(&gn->sock_lock);
-
 	for (h = 0; h < PORT_HASH_SIZE; ++h)
 		INIT_HLIST_HEAD(&gn->sock_list[h]);
 
@@ -454,7 +447,7 @@ static int __init geneve_init_module(void)
 
 	return 0;
 }
-late_initcall(geneve_init_module);
+module_init(geneve_init_module);
 
 static void __exit geneve_cleanup_module(void)
 {
-- 
cgit v1.2.3


From df5dba8e52be50e615e03ef73b34611d82587f42 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Fri, 2 Jan 2015 18:26:04 -0800
Subject: geneve: Remove socket hash table.

The hash table for open Geneve ports is used only on creation and
deletion time. It is not performance critical and is not likely to
grow to a large number of items. Therefore, this can be changed
to use a simple linked list.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/geneve.h |  2 +-
 net/ipv4/geneve.c    | 26 +++++++-------------------
 2 files changed, 8 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/include/net/geneve.h b/include/net/geneve.h
index b40f4affc4cb..03aa2adb5bab 100644
--- a/include/net/geneve.h
+++ b/include/net/geneve.h
@@ -68,7 +68,7 @@ struct geneve_sock;
 typedef void (geneve_rcv_t)(struct geneve_sock *gs, struct sk_buff *skb);
 
 struct geneve_sock {
-	struct hlist_node	hlist;
+	struct list_head	list;
 	geneve_rcv_t		*rcv;
 	void			*rcv_data;
 	struct socket		*sock;
diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index ad8dbae11d01..4fe5a592821c 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -26,7 +26,6 @@
 #include <linux/etherdevice.h>
 #include <linux/if_ether.h>
 #include <linux/if_vlan.h>
-#include <linux/hash.h>
 #include <linux/ethtool.h>
 #include <linux/mutex.h>
 #include <net/arp.h>
@@ -54,12 +53,9 @@
 /* Protects sock_list and refcounts. */
 static DEFINE_MUTEX(geneve_mutex);
 
-#define PORT_HASH_BITS 8
-#define PORT_HASH_SIZE (1<<PORT_HASH_BITS)
-
 /* per-network namespace private data for this module */
 struct geneve_net {
-	struct hlist_head	sock_list[PORT_HASH_SIZE];
+	struct list_head	sock_list;
 };
 
 static int geneve_net_id;
@@ -69,19 +65,13 @@ static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
 	return (struct genevehdr *)(udp_hdr(skb) + 1);
 }
 
-static struct hlist_head *gs_head(struct net *net, __be16 port)
-{
-	struct geneve_net *gn = net_generic(net, geneve_net_id);
-
-	return &gn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
-}
-
 /* Find geneve socket based on network namespace and UDP port */
 static struct geneve_sock *geneve_find_sock(struct net *net, __be16 port)
 {
+	struct geneve_net *gn = net_generic(net, geneve_net_id);
 	struct geneve_sock *gs;
 
-	hlist_for_each_entry(gs, gs_head(net, port), hlist) {
+	list_for_each_entry(gs, &gn->sock_list, list) {
 		if (inet_sk(gs->sock->sk)->inet_sport == port)
 			return gs;
 	}
@@ -339,6 +329,7 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
 						geneve_rcv_t *rcv, void *data,
 						bool ipv6)
 {
+	struct geneve_net *gn = net_generic(net, geneve_net_id);
 	struct geneve_sock *gs;
 	struct socket *sock;
 	struct udp_tunnel_sock_cfg tunnel_cfg;
@@ -371,7 +362,7 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
 	tunnel_cfg.encap_destroy = NULL;
 	setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
 
-	hlist_add_head(&gs->hlist, gs_head(net, port));
+	list_add(&gs->list, &gn->sock_list);
 
 	return gs;
 }
@@ -407,7 +398,7 @@ void geneve_sock_release(struct geneve_sock *gs)
 	if (--gs->refcnt)
 		goto unlock;
 
-	hlist_del(&gs->hlist);
+	list_del(&gs->list);
 	geneve_notify_del_rx_port(gs);
 	udp_tunnel_sock_release(gs->sock);
 	kfree_rcu(gs, rcu);
@@ -420,17 +411,14 @@ EXPORT_SYMBOL_GPL(geneve_sock_release);
 static __net_init int geneve_init_net(struct net *net)
 {
 	struct geneve_net *gn = net_generic(net, geneve_net_id);
-	unsigned int h;
 
-	for (h = 0; h < PORT_HASH_SIZE; ++h)
-		INIT_HLIST_HEAD(&gn->sock_list[h]);
+	INIT_LIST_HEAD(&gn->sock_list);
 
 	return 0;
 }
 
 static struct pernet_operations geneve_net_ops = {
 	.init = geneve_init_net,
-	.exit = NULL,
 	.id   = &geneve_net_id,
 	.size = sizeof(struct geneve_net),
 };
-- 
cgit v1.2.3


From 46b1e4f9115d48d17bb92f612e4fa45a521a0537 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Fri, 2 Jan 2015 18:26:05 -0800
Subject: geneve: Check family when reusing sockets.

When searching for an existing socket to reuse, the address family
is not taken into account - only port number. This means that an
IPv4 socket could be used for IPv6 traffic and vice versa, which
is sure to cause problems when passing packets.

It is not possible to trigger this problem currently because the
only user of Geneve creates just IPv4 sockets. However, that is
likely to change in the near future.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/geneve.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index 4fe5a592821c..5b52046ec7a2 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -65,14 +65,15 @@ static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
 	return (struct genevehdr *)(udp_hdr(skb) + 1);
 }
 
-/* Find geneve socket based on network namespace and UDP port */
-static struct geneve_sock *geneve_find_sock(struct net *net, __be16 port)
+static struct geneve_sock *geneve_find_sock(struct net *net,
+					    sa_family_t family, __be16 port)
 {
 	struct geneve_net *gn = net_generic(net, geneve_net_id);
 	struct geneve_sock *gs;
 
 	list_for_each_entry(gs, &gn->sock_list, list) {
-		if (inet_sk(gs->sock->sk)->inet_sport == port)
+		if (inet_sk(gs->sock->sk)->inet_sport == port &&
+		    inet_sk(gs->sock->sk)->sk.sk_family == family)
 			return gs;
 	}
 
@@ -375,7 +376,7 @@ struct geneve_sock *geneve_sock_add(struct net *net, __be16 port,
 
 	mutex_lock(&geneve_mutex);
 
-	gs = geneve_find_sock(net, port);
+	gs = geneve_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port);
 	if (gs) {
 		if (!no_share && gs->rcv == rcv)
 			gs->refcnt++;
-- 
cgit v1.2.3


From 0f8162326fafb266ad21ca33b2ee0e9703dd653b Mon Sep 17 00:00:00 2001
From: Duan Jiong <duanj.fnst@cn.fujitsu.com>
Date: Wed, 10 Dec 2014 17:21:59 +0800
Subject: netfilter: nfnetlink: remove redundant variable nskb

Actually after netlink_skb_clone() is called, the nskb and
skb will point to the same thing, but they are used just like
they are different, sometimes this is confusing, so i think
there is no necessary to keep nskb anymore.

Signed-off-by: Duan Jiong <duanj.fnst@cn.fujitsu.com>
Signed-off-by: Pablo Neira Ayuso <pablo@soleta.eu>
---
 net/netfilter/nfnetlink.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 13c2e17bbe27..a7a7e8299d5a 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -272,7 +272,7 @@ static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb)
 static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
 				u_int16_t subsys_id)
 {
-	struct sk_buff *nskb, *oskb = skb;
+	struct sk_buff *oskb = skb;
 	struct net *net = sock_net(skb->sk);
 	const struct nfnetlink_subsystem *ss;
 	const struct nfnl_callback *nc;
@@ -283,12 +283,11 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (subsys_id >= NFNL_SUBSYS_COUNT)
 		return netlink_ack(skb, nlh, -EINVAL);
 replay:
-	nskb = netlink_skb_clone(oskb, GFP_KERNEL);
-	if (!nskb)
+	skb = netlink_skb_clone(oskb, GFP_KERNEL);
+	if (!skb)
 		return netlink_ack(oskb, nlh, -ENOMEM);
 
-	nskb->sk = oskb->sk;
-	skb = nskb;
+	skb->sk = oskb->sk;
 
 	nfnl_lock(subsys_id);
 	ss = rcu_dereference_protected(table[subsys_id].subsys,
@@ -305,7 +304,7 @@ replay:
 		{
 			nfnl_unlock(subsys_id);
 			netlink_ack(skb, nlh, -EOPNOTSUPP);
-			return kfree_skb(nskb);
+			return kfree_skb(skb);
 		}
 	}
 
@@ -385,7 +384,7 @@ replay:
 				nfnl_err_reset(&err_list);
 				ss->abort(oskb);
 				nfnl_unlock(subsys_id);
-				kfree_skb(nskb);
+				kfree_skb(skb);
 				goto replay;
 			}
 		}
@@ -426,7 +425,7 @@ done:
 
 	nfnl_err_deliver(&err_list, oskb);
 	nfnl_unlock(subsys_id);
-	kfree_skb(nskb);
+	kfree_skb(skb);
 }
 
 static void nfnetlink_rcv(struct sk_buff *skb)
-- 
cgit v1.2.3


From b18c5d15e8714336365d9d51782d5b53afa0443c Mon Sep 17 00:00:00 2001
From: Chen Gang <gang.chen.5i5j@gmail.com>
Date: Wed, 24 Dec 2014 23:04:54 +0800
Subject: netfilter: nfnetlink_cthelper: Remove 'const' and '&' to avoid
 warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The related code can be simplified, and also can avoid related warnings
(with allmodconfig under parisc):

    CC [M]  net/netfilter/nfnetlink_cthelper.o
  net/netfilter/nfnetlink_cthelper.c: In function ‘nfnl_cthelper_from_nlattr’:
  net/netfilter/nfnetlink_cthelper.c:97:9: warning: passing argument 1 o ‘memcpy’ discards ‘const’ qualifier from pointer target type [-Wdiscarded-array-qualifiers]
    memcpy(&help->data, nla_data(attr), help->helper->data_len);
           ^
  In file included from include/linux/string.h:17:0,
                   from include/uapi/linux/uuid.h:25,
                   from include/linux/uuid.h:23,
                   from include/linux/mod_devicetable.h:12,
                   from ./arch/parisc/include/asm/hardware.h:4,
                   from ./arch/parisc/include/asm/processor.h:15,
                   from ./arch/parisc/include/asm/spinlock.h:6,
                   from ./arch/parisc/include/asm/atomic.h:21,
                   from include/linux/atomic.h:4,
                   from ./arch/parisc/include/asm/bitops.h:12,
                   from include/linux/bitops.h:36,
                   from include/linux/kernel.h:10,
                   from include/linux/list.h:8,
                   from include/linux/module.h:9,
                   from net/netfilter/nfnetlink_cthelper.c:11:
  ./arch/parisc/include/asm/string.h:8:8: note: expected ‘void *’ but argument is of type ‘const char (*)[]’
   void * memcpy(void * dest,const void *src,size_t count);
          ^

Signed-off-by: Chen Gang <gang.chen.5i5j@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@soleta.eu>
---
 net/netfilter/nfnetlink_cthelper.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 9e287cb56a04..a5599fc51a6f 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -86,7 +86,7 @@ nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple,
 static int
 nfnl_cthelper_from_nlattr(struct nlattr *attr, struct nf_conn *ct)
 {
-	const struct nf_conn_help *help = nfct_help(ct);
+	struct nf_conn_help *help = nfct_help(ct);
 
 	if (attr == NULL)
 		return -EINVAL;
@@ -94,7 +94,7 @@ nfnl_cthelper_from_nlattr(struct nlattr *attr, struct nf_conn *ct)
 	if (help->helper->data_len == 0)
 		return -EINVAL;
 
-	memcpy(&help->data, nla_data(attr), help->helper->data_len);
+	memcpy(help->data, nla_data(attr), help->helper->data_len);
 	return 0;
 }
 
-- 
cgit v1.2.3


From b44b565cf548d78c1fa80321b14bf6b5308f337a Mon Sep 17 00:00:00 2001
From: Gao feng <gaofeng@cn.fujitsu.com>
Date: Mon, 29 Dec 2014 16:22:11 +0800
Subject: netfilter: nf_ct_seqadj: print ack seq in the right host byte order

new_start_seq and new_end_seq are network byte order,
print the host byte order in debug message and print
seq number as the type of unsigned int.

Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
Signed-off-by: Pablo Neira Ayuso <pablo@soleta.eu>
---
 net/netfilter/nf_conntrack_seqadj.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c
index f6e2ae91a80b..ce3e840c8704 100644
--- a/net/netfilter/nf_conntrack_seqadj.c
+++ b/net/netfilter/nf_conntrack_seqadj.c
@@ -98,9 +98,9 @@ static void nf_ct_sack_block_adjust(struct sk_buff *skb,
 			new_end_seq = htonl(ntohl(sack->end_seq) -
 				      seq->offset_before);
 
-		pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
-			 ntohl(sack->start_seq), new_start_seq,
-			 ntohl(sack->end_seq), new_end_seq);
+		pr_debug("sack_adjust: start_seq: %u->%u, end_seq: %u->%u\n",
+			 ntohl(sack->start_seq), ntohl(new_start_seq),
+			 ntohl(sack->end_seq), ntohl(new_end_seq));
 
 		inet_proto_csum_replace4(&tcph->check, skb,
 					 sack->start_seq, new_start_seq, 0);
-- 
cgit v1.2.3


From 224d019c4fbba242041e9b25a926ba873b7da1e2 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 5 Jan 2015 13:56:14 -0800
Subject: ip: Move checksum convert defines to inet

Move convert_csum from udp_sock to inet_sock. This allows the
possibility that we can use convert checksum for different types
of sockets and also allows convert checksum to be enabled from
inet layer (what we'll want to do when enabling IP_CHECKSUM cmsg).

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/udp.h     | 16 +---------------
 include/net/inet_sock.h | 17 +++++++++++++++++
 net/ipv4/fou.c          |  2 +-
 net/ipv4/udp.c          |  2 +-
 net/ipv4/udp_tunnel.c   |  2 +-
 net/ipv6/udp.c          |  2 +-
 6 files changed, 22 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/include/linux/udp.h b/include/linux/udp.h
index ee3277593222..247cfdcc4b08 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -49,11 +49,7 @@ struct udp_sock {
 	unsigned int	 corkflag;	/* Cork is required */
 	__u8		 encap_type;	/* Is this an Encapsulation socket? */
 	unsigned char	 no_check6_tx:1,/* Send zero UDP6 checksums on TX? */
-			 no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */
-			 convert_csum:1;/* On receive, convert checksum
-					 * unnecessary to checksum complete
-					 * if possible.
-					 */
+			 no_check6_rx:1;/* Allow zero UDP6 checksums on RX? */
 	/*
 	 * Following member retains the information to create a UDP header
 	 * when the socket is uncorked.
@@ -102,16 +98,6 @@ static inline bool udp_get_no_check6_rx(struct sock *sk)
 	return udp_sk(sk)->no_check6_rx;
 }
 
-static inline void udp_set_convert_csum(struct sock *sk, bool val)
-{
-	udp_sk(sk)->convert_csum = val;
-}
-
-static inline bool udp_get_convert_csum(struct sock *sk)
-{
-	return udp_sk(sk)->convert_csum;
-}
-
 #define udp_portaddr_for_each_entry(__sk, node, list) \
 	hlist_nulls_for_each_entry(__sk, node, list, __sk_common.skc_portaddr_node)
 
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index a829b77523cf..360b110b3e36 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -184,6 +184,7 @@ struct inet_sock {
 				mc_all:1,
 				nodefrag:1;
 	__u8			rcv_tos;
+	__u8			convert_csum;
 	int			uc_index;
 	int			mc_index;
 	__be32			mc_addr;
@@ -250,4 +251,20 @@ static inline __u8 inet_sk_flowi_flags(const struct sock *sk)
 	return flags;
 }
 
+static inline void inet_inc_convert_csum(struct sock *sk)
+{
+	inet_sk(sk)->convert_csum++;
+}
+
+static inline void inet_dec_convert_csum(struct sock *sk)
+{
+	if (inet_sk(sk)->convert_csum > 0)
+		inet_sk(sk)->convert_csum--;
+}
+
+static inline bool inet_get_convert_csum(struct sock *sk)
+{
+	return !!inet_sk(sk)->convert_csum;
+}
+
 #endif	/* _INET_SOCK_H */
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index b986298a7ba3..2197c36f722f 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -490,7 +490,7 @@ static int fou_create(struct net *net, struct fou_cfg *cfg,
 	sk->sk_user_data = fou;
 	fou->sock = sock;
 
-	udp_set_convert_csum(sk, true);
+	inet_inc_convert_csum(sk);
 
 	sk->sk_allocation = GFP_ATOMIC;
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 13b4dcf86ef6..53358d88f110 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1806,7 +1806,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	if (sk != NULL) {
 		int ret;
 
-		if (udp_sk(sk)->convert_csum && uh->check && !IS_UDPLITE(sk))
+		if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk))
 			skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
 						 inet_compute_pseudo);
 
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 1671263e5fa0..9996e63ed304 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -63,7 +63,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
 	inet_sk(sk)->mc_loop = 0;
 
 	/* Enable CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE conversion */
-	udp_set_convert_csum(sk, true);
+	inet_inc_convert_csum(sk);
 
 	rcu_assign_sk_user_data(sk, cfg->sk_user_data);
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 189dc4ae3eca..e41f017cd479 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -909,7 +909,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 			goto csum_error;
 		}
 
-		if (udp_sk(sk)->convert_csum && uh->check && !IS_UDPLITE(sk))
+		if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk))
 			skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
 						 ip6_compute_pseudo);
 
-- 
cgit v1.2.3


From c44d13d6f341ca59f3d6646f2337d4d3c8a814a6 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 5 Jan 2015 13:56:15 -0800
Subject: ip: IP cmsg cleanup

Move the IP_CMSG_* constants from ip_sockglue.c to inet_sock.h so that
they can be referenced in other source files.

Restructure ip_cmsg_recv to not go through flags using shift, check
for flags by 'and'. This eliminates both the shift and a conditional
per flag check.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h | 11 ++++++++-
 net/ipv4/ip_sockglue.c  | 64 ++++++++++++++++++++++++++++---------------------
 2 files changed, 47 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 360b110b3e36..605ca421d5ab 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -16,7 +16,7 @@
 #ifndef _INET_SOCK_H
 #define _INET_SOCK_H
 
-
+#include <linux/bitops.h>
 #include <linux/kmemcheck.h>
 #include <linux/string.h>
 #include <linux/types.h>
@@ -195,6 +195,15 @@ struct inet_sock {
 #define IPCORK_OPT	1	/* ip-options has been held in ipcork.opt */
 #define IPCORK_ALLFRAG	2	/* always fragment (for ipv6 for now) */
 
+/* cmsg flags for inet */
+#define IP_CMSG_PKTINFO		BIT(0)
+#define IP_CMSG_TTL		BIT(1)
+#define IP_CMSG_TOS		BIT(2)
+#define IP_CMSG_RECVOPTS	BIT(3)
+#define IP_CMSG_RETOPTS		BIT(4)
+#define IP_CMSG_PASSSEC		BIT(5)
+#define IP_CMSG_ORIGDSTADDR	BIT(6)
+
 static inline struct inet_sock *inet_sk(const struct sock *sk)
 {
 	return (struct inet_sock *)sk;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 8a89c738b7a3..80f78565b41b 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -45,14 +45,6 @@
 #include <linux/errqueue.h>
 #include <asm/uaccess.h>
 
-#define IP_CMSG_PKTINFO		1
-#define IP_CMSG_TTL		2
-#define IP_CMSG_TOS		4
-#define IP_CMSG_RECVOPTS	8
-#define IP_CMSG_RETOPTS		16
-#define IP_CMSG_PASSSEC		32
-#define IP_CMSG_ORIGDSTADDR     64
-
 /*
  *	SOL_IP control messages.
  */
@@ -150,37 +142,55 @@ void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
 	unsigned int flags = inet->cmsg_flags;
 
 	/* Ordered by supposed usage frequency */
-	if (flags & 1)
+	if (flags & IP_CMSG_PKTINFO) {
 		ip_cmsg_recv_pktinfo(msg, skb);
-	if ((flags >>= 1) == 0)
-		return;
 
-	if (flags & 1)
+		flags &= ~IP_CMSG_PKTINFO;
+		if (!flags)
+			return;
+	}
+
+	if (flags & IP_CMSG_TTL) {
 		ip_cmsg_recv_ttl(msg, skb);
-	if ((flags >>= 1) == 0)
-		return;
 
-	if (flags & 1)
+		flags &= ~IP_CMSG_TTL;
+		if (!flags)
+			return;
+	}
+
+	if (flags & IP_CMSG_TOS) {
 		ip_cmsg_recv_tos(msg, skb);
-	if ((flags >>= 1) == 0)
-		return;
 
-	if (flags & 1)
+		flags &= ~IP_CMSG_TOS;
+		if (!flags)
+			return;
+	}
+
+	if (flags & IP_CMSG_RECVOPTS) {
 		ip_cmsg_recv_opts(msg, skb);
-	if ((flags >>= 1) == 0)
-		return;
 
-	if (flags & 1)
+		flags &= ~IP_CMSG_RECVOPTS;
+		if (!flags)
+			return;
+	}
+
+	if (flags & IP_CMSG_RETOPTS) {
 		ip_cmsg_recv_retopts(msg, skb);
-	if ((flags >>= 1) == 0)
-		return;
 
-	if (flags & 1)
+		flags &= ~IP_CMSG_RETOPTS;
+		if (!flags)
+			return;
+	}
+
+	if (flags & IP_CMSG_PASSSEC) {
 		ip_cmsg_recv_security(msg, skb);
 
-	if ((flags >>= 1) == 0)
-		return;
-	if (flags & 1)
+		flags &= ~IP_CMSG_PASSSEC;
+		if (!flags)
+			return;
+	}
+
+	if (flags & IP_CMSG_ORIGDSTADDR)
 		ip_cmsg_recv_dstaddr(msg, skb);
 
 }
-- 
cgit v1.2.3


From 5961de9f199bef6ff437d7d85fe69b6a1964739b Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 5 Jan 2015 13:56:16 -0800
Subject: ip: Add offset parameter to ip_cmsg_recv

Add ip_cmsg_recv_offset function which takes an offset argument
that indicates the starting offset in skb where data is being received
from. This will be useful in the case of UDP and provided checksum
to user space.

ip_cmsg_recv is an inline call to ip_cmsg_recv_offset with offset of
zero.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h       | 7 ++++++-
 net/ipv4/ip_sockglue.c | 5 +++--
 2 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/net/ip.h b/include/net/ip.h
index 0bb620702929..0e5a0bae187f 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -537,7 +537,7 @@ int ip_options_rcv_srr(struct sk_buff *skb);
  */
 
 void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb);
-void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb);
+void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, int offset);
 int ip_cmsg_send(struct net *net, struct msghdr *msg,
 		 struct ipcm_cookie *ipc, bool allow_ipv6);
 int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
@@ -557,6 +557,11 @@ void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port,
 void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport,
 		    u32 info);
 
+static inline void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
+{
+	ip_cmsg_recv_offset(msg, skb, 0);
+}
+
 bool icmp_global_allow(void);
 extern int sysctl_icmp_msgs_per_sec;
 extern int sysctl_icmp_msgs_burst;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 80f78565b41b..513d506ffebb 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -136,7 +136,8 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
 	put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin);
 }
 
-void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
+void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb,
+			 int offset)
 {
 	struct inet_sock *inet = inet_sk(skb->sk);
 	unsigned int flags = inet->cmsg_flags;
@@ -194,7 +195,7 @@ void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
 		ip_cmsg_recv_dstaddr(msg, skb);
 
 }
-EXPORT_SYMBOL(ip_cmsg_recv);
+EXPORT_SYMBOL(ip_cmsg_recv_offset);
 
 int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc,
 		 bool allow_ipv6)
-- 
cgit v1.2.3


From ad6f939ab193750cc94a265f58e007fb598c97b7 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 5 Jan 2015 13:56:17 -0800
Subject: ip: Add offset parameter to ip_cmsg_recv

Add ip_cmsg_recv_offset function which takes an offset argument
that indicates the starting offset in skb where data is being received
from. This will be useful in the case of UDP and provided checksum
to user space.

ip_cmsg_recv is an inline call to ip_cmsg_recv_offset with offset of
zero.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h |  1 +
 include/uapi/linux/in.h |  1 +
 net/ipv4/ip_sockglue.c  | 41 ++++++++++++++++++++++++++++++++++++++++-
 net/ipv4/udp.c          |  2 +-
 4 files changed, 43 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 605ca421d5ab..eb16c7beed1e 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -203,6 +203,7 @@ struct inet_sock {
 #define IP_CMSG_RETOPTS		BIT(4)
 #define IP_CMSG_PASSSEC		BIT(5)
 #define IP_CMSG_ORIGDSTADDR	BIT(6)
+#define IP_CMSG_CHECKSUM	BIT(7)
 
 static inline struct inet_sock *inet_sk(const struct sock *sk)
 {
diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index c33a65e3d62c..589ced069e8a 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -109,6 +109,7 @@ struct in_addr {
 
 #define IP_MINTTL       21
 #define IP_NODEFRAG     22
+#define IP_CHECKSUM	23
 
 /* IP_MTU_DISCOVER values */
 #define IP_PMTUDISC_DONT		0	/* Never send DF frames */
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 513d506ffebb..a317797b3cd0 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -37,6 +37,7 @@
 #include <net/route.h>
 #include <net/xfrm.h>
 #include <net/compat.h>
+#include <net/checksum.h>
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/transp_v6.h>
 #endif
@@ -96,6 +97,20 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
 	put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data);
 }
 
+static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb,
+				  int offset)
+{
+	__wsum csum = skb->csum;
+
+	if (skb->ip_summed != CHECKSUM_COMPLETE)
+		return;
+
+	if (offset != 0)
+		csum = csum_sub(csum, csum_partial(skb->data, offset, 0));
+
+	put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum);
+}
+
 static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb)
 {
 	char *secdata;
@@ -191,9 +206,16 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb,
 			return;
 	}
 
-	if (flags & IP_CMSG_ORIGDSTADDR)
+	if (flags & IP_CMSG_ORIGDSTADDR) {
 		ip_cmsg_recv_dstaddr(msg, skb);
 
+		flags &= ~IP_CMSG_ORIGDSTADDR;
+		if (!flags)
+			return;
+	}
+
+	if (flags & IP_CMSG_CHECKSUM)
+		ip_cmsg_recv_checksum(msg, skb, offset);
 }
 EXPORT_SYMBOL(ip_cmsg_recv_offset);
 
@@ -533,6 +555,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 	case IP_MULTICAST_ALL:
 	case IP_MULTICAST_LOOP:
 	case IP_RECVORIGDSTADDR:
+	case IP_CHECKSUM:
 		if (optlen >= sizeof(int)) {
 			if (get_user(val, (int __user *) optval))
 				return -EFAULT;
@@ -630,6 +653,19 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 		else
 			inet->cmsg_flags &= ~IP_CMSG_ORIGDSTADDR;
 		break;
+	case IP_CHECKSUM:
+		if (val) {
+			if (!(inet->cmsg_flags & IP_CMSG_CHECKSUM)) {
+				inet_inc_convert_csum(sk);
+				inet->cmsg_flags |= IP_CMSG_CHECKSUM;
+			}
+		} else {
+			if (inet->cmsg_flags & IP_CMSG_CHECKSUM) {
+				inet_dec_convert_csum(sk);
+				inet->cmsg_flags &= ~IP_CMSG_CHECKSUM;
+			}
+		}
+		break;
 	case IP_TOS:	/* This sets both TOS and Precedence */
 		if (sk->sk_type == SOCK_STREAM) {
 			val &= ~INET_ECN_MASK;
@@ -1233,6 +1269,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
 	case IP_RECVORIGDSTADDR:
 		val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0;
 		break;
+	case IP_CHECKSUM:
+		val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0;
+		break;
 	case IP_TOS:
 		val = inet->tos;
 		break;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 53358d88f110..97ef1f8b7be8 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1329,7 +1329,7 @@ try_again:
 		*addr_len = sizeof(*sin);
 	}
 	if (inet->cmsg_flags)
-		ip_cmsg_recv(msg, skb);
+		ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr));
 
 	err = copied;
 	if (flags & MSG_TRUNC)
-- 
cgit v1.2.3


From 6cb69742daa1770fe1ce54cf45f8951376518176 Mon Sep 17 00:00:00 2001
From: Hubert Sokolowski <hubert.sokolowski@intel.com>
Date: Mon, 5 Jan 2015 17:29:21 +0000
Subject: net: Do not call ndo_dflt_fdb_dump if ndo_fdb_dump is defined

Add checking whether the call to ndo_dflt_fdb_dump is needed.
It is not expected to call ndo_dflt_fdb_dump unconditionally
by some drivers (i.e. qlcnic or macvlan) that defines
own ndo_fdb_dump. Other drivers define own ndo_fdb_dump
and don't want ndo_dflt_fdb_dump to be called at all.
At the same time it is desirable to call the default dump
function on a bridge device.
Fix attributes that are passed to dev->netdev_ops->ndo_fdb_dump.
Add extra checking in br_fdb_dump to avoid duplicate entries
as now filter_dev can be NULL.

Following tests for filtering have been performed before
the change and after the patch was applied to make sure
they are the same and it doesn't break the filtering algorithm.

[root@localhost ~]# cd /root/iproute2-3.18.0/bridge
[root@localhost bridge]# modprobe dummy
[root@localhost bridge]# ./bridge fdb add f1:f2:f3:f4:f5:f6 dev dummy0
[root@localhost bridge]# brctl addbr br0
[root@localhost bridge]# brctl addif  br0 dummy0
[root@localhost bridge]# ip link set dev br0 address 02:00:00:12:01:04
[root@localhost bridge]# # show all
[root@localhost bridge]# ./bridge fdb show
33:33:00:00:00:01 dev p2p1 self permanent
01:00:5e:00:00:01 dev p2p1 self permanent
33:33:ff:ac:ce:32 dev p2p1 self permanent
33:33:00:00:02:02 dev p2p1 self permanent
01:00:5e:00:00:fb dev p2p1 self permanent
33:33:00:00:00:01 dev p7p1 self permanent
01:00:5e:00:00:01 dev p7p1 self permanent
33:33:ff:79:50:53 dev p7p1 self permanent
33:33:00:00:02:02 dev p7p1 self permanent
01:00:5e:00:00:fb dev p7p1 self permanent
f2:46:50:85:6d:d9 dev dummy0 master br0 permanent
f2:46:50:85:6d:d9 dev dummy0 vlan 1 master br0 permanent
33:33:00:00:00:01 dev dummy0 self permanent
f1:f2:f3:f4:f5:f6 dev dummy0 self permanent
33:33:00:00:00:01 dev br0 self permanent
02:00:00:12:01:04 dev br0 vlan 1 master br0 permanent
02:00:00:12:01:04 dev br0 master br0 permanent
[root@localhost bridge]# # filter by bridge
[root@localhost bridge]# ./bridge fdb show br br0
f2:46:50:85:6d:d9 dev dummy0 master br0 permanent
f2:46:50:85:6d:d9 dev dummy0 vlan 1 master br0 permanent
33:33:00:00:00:01 dev dummy0 self permanent
f1:f2:f3:f4:f5:f6 dev dummy0 self permanent
33:33:00:00:00:01 dev br0 self permanent
02:00:00:12:01:04 dev br0 vlan 1 master br0 permanent
02:00:00:12:01:04 dev br0 master br0 permanent
[root@localhost bridge]# # filter by port
[root@localhost bridge]# ./bridge fdb show brport dummy0
f2:46:50:85:6d:d9 master br0 permanent
f2:46:50:85:6d:d9 vlan 1 master br0 permanent
33:33:00:00:00:01 self permanent
f1:f2:f3:f4:f5:f6 self permanent
[root@localhost bridge]# # filter by port + bridge
[root@localhost bridge]# ./bridge fdb show br br0 brport dummy0
f2:46:50:85:6d:d9 master br0 permanent
f2:46:50:85:6d:d9 vlan 1 master br0 permanent
33:33:00:00:00:01 self permanent
f1:f2:f3:f4:f5:f6 self permanent
[root@localhost bridge]#

Signed-off-by: Hubert Sokolowski <hubert.sokolowski@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c  | 7 ++++++-
 net/core/rtnetlink.c | 5 +++--
 2 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index cc36e59db7d7..e6e0372bc3cd 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -686,6 +686,9 @@ int br_fdb_dump(struct sk_buff *skb,
 	if (!(dev->priv_flags & IFF_EBRIDGE))
 		goto out;
 
+	if (!filter_dev)
+		idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);
+
 	for (i = 0; i < BR_HASH_SIZE; i++) {
 		struct net_bridge_fdb_entry *f;
 
@@ -697,7 +700,7 @@ int br_fdb_dump(struct sk_buff *skb,
 			    (!f->dst || f->dst->dev != filter_dev)) {
 				if (filter_dev != dev)
 					goto skip;
-				/* !f->dst is a speacial case for bridge
+				/* !f->dst is a special case for bridge
 				 * It means the MAC belongs to the bridge
 				 * Therefore need a little more filtering
 				 * we only want to dump the !f->dst case
@@ -705,6 +708,8 @@ int br_fdb_dump(struct sk_buff *skb,
 				if (f->dst)
 					goto skip;
 			}
+			if (!filter_dev && f->dst)
+				goto skip;
 
 			if (fdb_fill_info(skb, br, f,
 					  NETLINK_CB(cb->skb).portid,
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 9cf6fe9ddc0c..da983d4bac02 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2698,10 +2698,11 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
 							 idx);
 		}
 
-		idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);
 		if (dev->netdev_ops->ndo_fdb_dump)
-			idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, bdev, dev,
+			idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL,
 							    idx);
+		else
+			idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);
 
 		cops = NULL;
 	}
-- 
cgit v1.2.3


From 0409c9a5a7546043d32e8c46df0dcaf65f97f659 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Mon, 5 Jan 2015 23:57:43 +0100
Subject: net: fib6: fib6_commit_metrics: fix potential NULL pointer
 dereference
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When IPv6 host routes with metrics attached are being added, we fetch
the metrics store from the dst via COW through dst_metrics_write_ptr(),
added through commit e5fd387ad5b3.

One remaining problem here is that we actually call into inet_getpeer()
and may end up allocating/creating a new peer from the kmemcache, which
may fail.

Example trace from perf probe (inet_getpeer:41) where create is 1:

ip 6877 [002] 4221.391591: probe:inet_getpeer: (ffffffff8165e293)
  85e294 inet_getpeer.part.7 (<- kmem_cache_alloc())
  85e578 inet_getpeer
  8eb333 ipv6_cow_metrics
  8f10ff fib6_commit_metrics

Therefore, a check for NULL on the return of dst_metrics_write_ptr()
is necessary here.

Joint work with Florian Westphal.

Fixes: e5fd387ad5b3 ("ipv6: do not overwrite inetpeer metrics prematurely")
Cc: Michal Kubeček <mkubecek@suse.cz>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index b2d1838897c9..db4984e13f2f 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -633,18 +633,17 @@ static bool rt6_qualify_for_ecmp(struct rt6_info *rt)
 static int fib6_commit_metrics(struct dst_entry *dst,
 			       struct nlattr *mx, int mx_len)
 {
+	bool dst_host = dst->flags & DST_HOST;
 	struct nlattr *nla;
 	int remaining;
 	u32 *mp;
 
-	if (dst->flags & DST_HOST) {
-		mp = dst_metrics_write_ptr(dst);
-	} else {
-		mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC);
-		if (!mp)
-			return -ENOMEM;
+	mp = dst_host ? dst_metrics_write_ptr(dst) :
+			kzalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC);
+	if (unlikely(!mp))
+		return -ENOMEM;
+	if (!dst_host)
 		dst_init_metrics(dst, mp, 0);
-	}
 
 	nla_for_each_attr(nla, mx, mx_len, remaining) {
 		int type = nla_type(nla);
-- 
cgit v1.2.3


From e715b6d3a5ef55834778d49224e60e8ccb5bf45f Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 5 Jan 2015 23:57:44 +0100
Subject: net: fib6: convert cfg metric to u32 outside of table write lock

Do the nla validation earlier, outside the write lock.

This is needed by followup patch which needs to be able to call
request_module (which can sleep) if needed.

Joint work with Daniel Borkmann.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h | 10 +++++---
 net/ipv6/ip6_fib.c    | 69 +++++++++++++++++++++++++++------------------------
 net/ipv6/route.c      | 57 ++++++++++++++++++++++++++++++++++--------
 3 files changed, 90 insertions(+), 46 deletions(-)

(limited to 'net')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 8eea35d32a75..20e80fa7bbdd 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -74,6 +74,11 @@ struct fib6_node {
 #define FIB6_SUBTREE(fn)	((fn)->subtree)
 #endif
 
+struct mx6_config {
+	const u32 *mx;
+	DECLARE_BITMAP(mx_valid, RTAX_MAX);
+};
+
 /*
  *	routing information
  *
@@ -291,9 +296,8 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
 void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
 		    void *arg);
 
-int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info,
-	     struct nlattr *mx, int mx_len);
-
+int fib6_add(struct fib6_node *root, struct rt6_info *rt,
+	     struct nl_info *info, struct mx6_config *mxc);
 int fib6_del(struct rt6_info *rt, struct nl_info *info);
 
 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index db4984e13f2f..03c520a4ebeb 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -630,31 +630,35 @@ static bool rt6_qualify_for_ecmp(struct rt6_info *rt)
 	       RTF_GATEWAY;
 }
 
-static int fib6_commit_metrics(struct dst_entry *dst,
-			       struct nlattr *mx, int mx_len)
+static void fib6_copy_metrics(u32 *mp, const struct mx6_config *mxc)
 {
-	bool dst_host = dst->flags & DST_HOST;
-	struct nlattr *nla;
-	int remaining;
-	u32 *mp;
+	int i;
 
-	mp = dst_host ? dst_metrics_write_ptr(dst) :
-			kzalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC);
-	if (unlikely(!mp))
-		return -ENOMEM;
-	if (!dst_host)
-		dst_init_metrics(dst, mp, 0);
+	for (i = 0; i < RTAX_MAX; i++) {
+		if (test_bit(i, mxc->mx_valid))
+			mp[i] = mxc->mx[i];
+	}
+}
+
+static int fib6_commit_metrics(struct dst_entry *dst, struct mx6_config *mxc)
+{
+	if (!mxc->mx)
+		return 0;
 
-	nla_for_each_attr(nla, mx, mx_len, remaining) {
-		int type = nla_type(nla);
+	if (dst->flags & DST_HOST) {
+		u32 *mp = dst_metrics_write_ptr(dst);
 
-		if (type) {
-			if (type > RTAX_MAX)
-				return -EINVAL;
+		if (unlikely(!mp))
+			return -ENOMEM;
 
-			mp[type - 1] = nla_get_u32(nla);
-		}
+		fib6_copy_metrics(mp, mxc);
+	} else {
+		dst_init_metrics(dst, mxc->mx, false);
+
+		/* We've stolen mx now. */
+		mxc->mx = NULL;
 	}
+
 	return 0;
 }
 
@@ -663,7 +667,7 @@ static int fib6_commit_metrics(struct dst_entry *dst,
  */
 
 static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
-			    struct nl_info *info, struct nlattr *mx, int mx_len)
+			    struct nl_info *info, struct mx6_config *mxc)
 {
 	struct rt6_info *iter = NULL;
 	struct rt6_info **ins;
@@ -772,11 +776,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 			pr_warn("NLM_F_CREATE should be set when creating new route\n");
 
 add:
-		if (mx) {
-			err = fib6_commit_metrics(&rt->dst, mx, mx_len);
-			if (err)
-				return err;
-		}
+		err = fib6_commit_metrics(&rt->dst, mxc);
+		if (err)
+			return err;
+
 		rt->dst.rt6_next = iter;
 		*ins = rt;
 		rt->rt6i_node = fn;
@@ -796,11 +799,11 @@ add:
 			pr_warn("NLM_F_REPLACE set, but no existing node found!\n");
 			return -ENOENT;
 		}
-		if (mx) {
-			err = fib6_commit_metrics(&rt->dst, mx, mx_len);
-			if (err)
-				return err;
-		}
+
+		err = fib6_commit_metrics(&rt->dst, mxc);
+		if (err)
+			return err;
+
 		*ins = rt;
 		rt->rt6i_node = fn;
 		rt->dst.rt6_next = iter->dst.rt6_next;
@@ -837,8 +840,8 @@ void fib6_force_start_gc(struct net *net)
  *	with source addr info in sub-trees
  */
 
-int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info,
-	     struct nlattr *mx, int mx_len)
+int fib6_add(struct fib6_node *root, struct rt6_info *rt,
+	     struct nl_info *info, struct mx6_config *mxc)
 {
 	struct fib6_node *fn, *pn = NULL;
 	int err = -ENOMEM;
@@ -933,7 +936,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info,
 	}
 #endif
 
-	err = fib6_add_rt2node(fn, rt, info, mx, mx_len);
+	err = fib6_add_rt2node(fn, rt, info, mxc);
 	if (!err) {
 		fib6_start_gc(info->nl_net, rt);
 		if (!(rt->rt6i_flags & RTF_CACHE))
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c91083156edb..454771d20b21 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -853,14 +853,14 @@ EXPORT_SYMBOL(rt6_lookup);
  */
 
 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
-			struct nlattr *mx, int mx_len)
+			struct mx6_config *mxc)
 {
 	int err;
 	struct fib6_table *table;
 
 	table = rt->rt6i_table;
 	write_lock_bh(&table->tb6_lock);
-	err = fib6_add(&table->tb6_root, rt, info, mx, mx_len);
+	err = fib6_add(&table->tb6_root, rt, info, mxc);
 	write_unlock_bh(&table->tb6_lock);
 
 	return err;
@@ -868,10 +868,10 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
 
 int ip6_ins_rt(struct rt6_info *rt)
 {
-	struct nl_info info = {
-		.nl_net = dev_net(rt->dst.dev),
-	};
-	return __ip6_ins_rt(rt, &info, NULL, 0);
+	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
+	struct mx6_config mxc = { .mx = NULL, };
+
+	return __ip6_ins_rt(rt, &info, &mxc);
 }
 
 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
@@ -1470,9 +1470,39 @@ out:
 	return entries > rt_max_size;
 }
 
-/*
- *
- */
+static int ip6_convert_metrics(struct mx6_config *mxc,
+			       const struct fib6_config *cfg)
+{
+	struct nlattr *nla;
+	int remaining;
+	u32 *mp;
+
+	if (cfg->fc_mx == NULL)
+		return 0;
+
+	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
+	if (unlikely(!mp))
+		return -ENOMEM;
+
+	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
+		int type = nla_type(nla);
+
+		if (type) {
+			if (unlikely(type > RTAX_MAX))
+				goto err;
+
+			mp[type - 1] = nla_get_u32(nla);
+			__set_bit(type - 1, mxc->mx_valid);
+		}
+	}
+
+	mxc->mx = mp;
+
+	return 0;
+ err:
+	kfree(mp);
+	return -EINVAL;
+}
 
 int ip6_route_add(struct fib6_config *cfg)
 {
@@ -1482,6 +1512,7 @@ int ip6_route_add(struct fib6_config *cfg)
 	struct net_device *dev = NULL;
 	struct inet6_dev *idev = NULL;
 	struct fib6_table *table;
+	struct mx6_config mxc = { .mx = NULL, };
 	int addr_type;
 
 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
@@ -1677,8 +1708,14 @@ install_route:
 
 	cfg->fc_nlinfo.nl_net = dev_net(dev);
 
-	return __ip6_ins_rt(rt, &cfg->fc_nlinfo, cfg->fc_mx, cfg->fc_mx_len);
+	err = ip6_convert_metrics(&mxc, cfg);
+	if (err)
+		goto out;
+
+	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
 
+	kfree(mxc.mx);
+	return err;
 out:
 	if (dev)
 		dev_put(dev);
-- 
cgit v1.2.3


From 29ba4fffd396f4beefe34d24d7bcd86cb5c3e492 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Mon, 5 Jan 2015 23:57:45 +0100
Subject: net: tcp: refactor reinitialization of congestion control

We can just move this to an extra function and make the code
a bit more readable, no functional change.

Joint work with Florian Westphal.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_cong.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 27ead0dd16bc..38f2f8aa4ceb 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -107,6 +107,18 @@ void tcp_init_congestion_control(struct sock *sk)
 		icsk->icsk_ca_ops->init(sk);
 }
 
+static void tcp_reinit_congestion_control(struct sock *sk,
+					  const struct tcp_congestion_ops *ca)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+
+	tcp_cleanup_congestion_control(sk);
+	icsk->icsk_ca_ops = ca;
+
+	if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init)
+		icsk->icsk_ca_ops->init(sk);
+}
+
 /* Manage refcounts on socket close. */
 void tcp_cleanup_congestion_control(struct sock *sk)
 {
@@ -262,21 +274,13 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
 #endif
 	if (!ca)
 		err = -ENOENT;
-
 	else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
 		   ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)))
 		err = -EPERM;
-
 	else if (!try_module_get(ca->owner))
 		err = -EBUSY;
-
-	else {
-		tcp_cleanup_congestion_control(sk);
-		icsk->icsk_ca_ops = ca;
-
-		if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init)
-			icsk->icsk_ca_ops->init(sk);
-	}
+	else
+		tcp_reinit_congestion_control(sk, ca);
  out:
 	rcu_read_unlock();
 	return err;
-- 
cgit v1.2.3


From c5c6a8ab45ec0f18733afb4aaade0d4a139d80b3 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Mon, 5 Jan 2015 23:57:46 +0100
Subject: net: tcp: add key management to congestion control

This patch adds necessary infrastructure to the congestion control
framework for later per route congestion control support.

For a per route congestion control possibility, our aim is to store
a unique u32 key identifier into dst metrics, which can then be
mapped into a tcp_congestion_ops struct. We argue that having a
RTAX key entry is the most simple, generic and easy way to manage,
and also keeps the memory footprint of dst entries lower on 64 bit
than with storing a pointer directly, for example. Having a unique
key id also allows for decoupling actual TCP congestion control
module management from the FIB layer, i.e. we don't have to care
about expensive module refcounting inside the FIB at this point.

We first thought of using an IDR store for the realization, which
takes over dynamic assignment of unused key space and also performs
the key to pointer mapping in RCU. While doing so, we stumbled upon
the issue that due to the nature of dynamic key distribution, it
just so happens, arguably in very rare occasions, that excessive
module loads and unloads can lead to a possible reuse of previously
used key space. Thus, previously stale keys in the dst metric are
now being reassigned to a different congestion control algorithm,
which might lead to unexpected behaviour. One way to resolve this
would have been to walk FIBs on the actually rare occasion of a
module unload and reset the metric keys for each FIB in each netns,
but that's just very costly.

Therefore, we argue a better solution is to reuse the unique
congestion control algorithm name member and map that into u32 key
space through jhash. For that, we split the flags attribute (as it
currently uses 2 bits only anyway) into two u32 attributes, flags
and key, so that we can keep the cacheline boundary of 2 cachelines
on x86_64 and cache the precalculated key at registration time for
the fast path. On average we might expect 2 - 4 modules being loaded
worst case perhaps 15, so a key collision possibility is extremely
low, and guaranteed collision-free on LE/BE for all in-tree modules.
Overall this results in much simpler code, and all without the
overhead of an IDR. Due to the deterministic nature, modules can
now be unloaded, the congestion control algorithm for a specific
but unloaded key will fall back to the default one, and on module
reload time it will switch back to the expected algorithm
transparently.

Joint work with Florian Westphal.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_connection_sock.h |  3 +-
 include/net/tcp.h                  |  9 +++-
 net/ipv4/tcp_cong.c                | 97 +++++++++++++++++++++++++++++++-------
 3 files changed, 91 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 848e85cb5c61..5976bdecf58b 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -98,7 +98,8 @@ struct inet_connection_sock {
 	const struct tcp_congestion_ops *icsk_ca_ops;
 	const struct inet_connection_sock_af_ops *icsk_af_ops;
 	unsigned int		  (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
-	__u8			  icsk_ca_state;
+	__u8			  icsk_ca_state:7,
+				  icsk_ca_dst_locked:1;
 	__u8			  icsk_retransmits;
 	__u8			  icsk_pending;
 	__u8			  icsk_backoff;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index f50f29faf76f..135b70c9a734 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -787,6 +787,8 @@ enum tcp_ca_ack_event_flags {
 #define TCP_CA_MAX	128
 #define TCP_CA_BUF_MAX	(TCP_CA_NAME_MAX*TCP_CA_MAX)
 
+#define TCP_CA_UNSPEC	0
+
 /* Algorithm can be set on socket without CAP_NET_ADMIN privileges */
 #define TCP_CONG_NON_RESTRICTED 0x1
 /* Requires ECN/ECT set on all packets */
@@ -794,7 +796,8 @@ enum tcp_ca_ack_event_flags {
 
 struct tcp_congestion_ops {
 	struct list_head	list;
-	unsigned long flags;
+	u32 key;
+	u32 flags;
 
 	/* initialize private data (optional) */
 	void (*init)(struct sock *sk);
@@ -841,6 +844,10 @@ u32 tcp_reno_ssthresh(struct sock *sk);
 void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked);
 extern struct tcp_congestion_ops tcp_reno;
 
+struct tcp_congestion_ops *tcp_ca_find_key(u32 key);
+u32 tcp_ca_get_key_by_name(const char *name);
+char *tcp_ca_get_name_by_key(u32 key, char *buffer);
+
 static inline bool tcp_ca_needs_ecn(const struct sock *sk)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 38f2f8aa4ceb..63c29dba68a8 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -13,6 +13,7 @@
 #include <linux/types.h>
 #include <linux/list.h>
 #include <linux/gfp.h>
+#include <linux/jhash.h>
 #include <net/tcp.h>
 
 static DEFINE_SPINLOCK(tcp_cong_list_lock);
@@ -31,6 +32,34 @@ static struct tcp_congestion_ops *tcp_ca_find(const char *name)
 	return NULL;
 }
 
+/* Must be called with rcu lock held */
+static const struct tcp_congestion_ops *__tcp_ca_find_autoload(const char *name)
+{
+	const struct tcp_congestion_ops *ca = tcp_ca_find(name);
+#ifdef CONFIG_MODULES
+	if (!ca && capable(CAP_NET_ADMIN)) {
+		rcu_read_unlock();
+		request_module("tcp_%s", name);
+		rcu_read_lock();
+		ca = tcp_ca_find(name);
+	}
+#endif
+	return ca;
+}
+
+/* Simple linear search, not much in here. */
+struct tcp_congestion_ops *tcp_ca_find_key(u32 key)
+{
+	struct tcp_congestion_ops *e;
+
+	list_for_each_entry_rcu(e, &tcp_cong_list, list) {
+		if (e->key == key)
+			return e;
+	}
+
+	return NULL;
+}
+
 /*
  * Attach new congestion control algorithm to the list
  * of available options.
@@ -45,9 +74,12 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
 		return -EINVAL;
 	}
 
+	ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name));
+
 	spin_lock(&tcp_cong_list_lock);
-	if (tcp_ca_find(ca->name)) {
-		pr_notice("%s already registered\n", ca->name);
+	if (ca->key == TCP_CA_UNSPEC || tcp_ca_find_key(ca->key)) {
+		pr_notice("%s already registered or non-unique key\n",
+			  ca->name);
 		ret = -EEXIST;
 	} else {
 		list_add_tail_rcu(&ca->list, &tcp_cong_list);
@@ -70,9 +102,50 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca)
 	spin_lock(&tcp_cong_list_lock);
 	list_del_rcu(&ca->list);
 	spin_unlock(&tcp_cong_list_lock);
+
+	/* Wait for outstanding readers to complete before the
+	 * module gets removed entirely.
+	 *
+	 * A try_module_get() should fail by now as our module is
+	 * in "going" state since no refs are held anymore and
+	 * module_exit() handler being called.
+	 */
+	synchronize_rcu();
 }
 EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
 
+u32 tcp_ca_get_key_by_name(const char *name)
+{
+	const struct tcp_congestion_ops *ca;
+	u32 key;
+
+	might_sleep();
+
+	rcu_read_lock();
+	ca = __tcp_ca_find_autoload(name);
+	key = ca ? ca->key : TCP_CA_UNSPEC;
+	rcu_read_unlock();
+
+	return key;
+}
+EXPORT_SYMBOL_GPL(tcp_ca_get_key_by_name);
+
+char *tcp_ca_get_name_by_key(u32 key, char *buffer)
+{
+	const struct tcp_congestion_ops *ca;
+	char *ret = NULL;
+
+	rcu_read_lock();
+	ca = tcp_ca_find_key(key);
+	if (ca)
+		ret = strncpy(buffer, ca->name,
+			      TCP_CA_NAME_MAX);
+	rcu_read_unlock();
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(tcp_ca_get_name_by_key);
+
 /* Assign choice of congestion control. */
 void tcp_assign_congestion_control(struct sock *sk)
 {
@@ -253,25 +326,17 @@ out:
 int tcp_set_congestion_control(struct sock *sk, const char *name)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
-	struct tcp_congestion_ops *ca;
+	const struct tcp_congestion_ops *ca;
 	int err = 0;
 
-	rcu_read_lock();
-	ca = tcp_ca_find(name);
+	if (icsk->icsk_ca_dst_locked)
+		return -EPERM;
 
-	/* no change asking for existing value */
+	rcu_read_lock();
+	ca = __tcp_ca_find_autoload(name);
+	/* No change asking for existing value */
 	if (ca == icsk->icsk_ca_ops)
 		goto out;
-
-#ifdef CONFIG_MODULES
-	/* not found attempt to autoload module */
-	if (!ca && capable(CAP_NET_ADMIN)) {
-		rcu_read_unlock();
-		request_module("tcp_%s", name);
-		rcu_read_lock();
-		ca = tcp_ca_find(name);
-	}
-#endif
 	if (!ca)
 		err = -ENOENT;
 	else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
-- 
cgit v1.2.3


From ea697639992d96da98016b8934e68a73876a2264 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Mon, 5 Jan 2015 23:57:47 +0100
Subject: net: tcp: add RTAX_CC_ALGO fib handling

This patch adds the minimum necessary for the RTAX_CC_ALGO congestion
control metric to be set up and dumped back to user space.

While the internal representation of RTAX_CC_ALGO is handled as a u32
key, we avoided to expose this implementation detail to user space, thus
instead, we chose the netlink attribute that is being exchanged between
user space to be the actual congestion control algorithm name, similarly
as in the setsockopt(2) API in order to allow for maximum flexibility,
even for 3rd party modules.

It is a bit unfortunate that RTAX_QUICKACK used up a whole RTAX slot as
it should have been stored in RTAX_FEATURES instead, we first thought
about reusing it for the congestion control key, but it brings more
complications and/or confusion than worth it.

Joint work with Florian Westphal.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h              |  7 +++++++
 include/uapi/linux/rtnetlink.h |  2 ++
 net/core/rtnetlink.c           | 15 +++++++++++++--
 net/decnet/dn_fib.c            |  3 ++-
 net/decnet/dn_table.c          |  4 +++-
 net/ipv4/fib_semantics.c       | 14 ++++++++++++--
 net/ipv6/route.c               | 17 +++++++++++++++--
 7 files changed, 54 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 135b70c9a734..95bb237152e0 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -846,7 +846,14 @@ extern struct tcp_congestion_ops tcp_reno;
 
 struct tcp_congestion_ops *tcp_ca_find_key(u32 key);
 u32 tcp_ca_get_key_by_name(const char *name);
+#ifdef CONFIG_INET
 char *tcp_ca_get_name_by_key(u32 key, char *buffer);
+#else
+static inline char *tcp_ca_get_name_by_key(u32 key, char *buffer)
+{
+	return NULL;
+}
+#endif
 
 static inline bool tcp_ca_needs_ecn(const struct sock *sk)
 {
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 9c9b8b4480cd..d81f22d5b390 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -389,6 +389,8 @@ enum {
 #define RTAX_INITRWND RTAX_INITRWND
 	RTAX_QUICKACK,
 #define RTAX_QUICKACK RTAX_QUICKACK
+	RTAX_CC_ALGO,
+#define RTAX_CC_ALGO RTAX_CC_ALGO
 	__RTAX_MAX
 };
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index da983d4bac02..6a6cdade1676 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -50,6 +50,7 @@
 #include <net/arp.h>
 #include <net/route.h>
 #include <net/udp.h>
+#include <net/tcp.h>
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 #include <net/fib_rules.h>
@@ -669,9 +670,19 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
 
 	for (i = 0; i < RTAX_MAX; i++) {
 		if (metrics[i]) {
+			if (i == RTAX_CC_ALGO - 1) {
+				char tmp[TCP_CA_NAME_MAX], *name;
+
+				name = tcp_ca_get_name_by_key(metrics[i], tmp);
+				if (!name)
+					continue;
+				if (nla_put_string(skb, i + 1, name))
+					goto nla_put_failure;
+			} else {
+				if (nla_put_u32(skb, i + 1, metrics[i]))
+					goto nla_put_failure;
+			}
 			valid++;
-			if (nla_put_u32(skb, i+1, metrics[i]))
-				goto nla_put_failure;
 		}
 	}
 
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index d332aefb0846..df4803437888 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -298,7 +298,8 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct nlattr *att
 			int type = nla_type(attr);
 
 			if (type) {
-				if (type > RTAX_MAX || nla_len(attr) < 4)
+				if (type > RTAX_MAX || type == RTAX_CC_ALGO ||
+				    nla_len(attr) < 4)
 					goto err_inval;
 
 				fi->fib_metrics[type-1] = nla_get_u32(attr);
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 86e3807052e9..3f19fcbf126d 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -29,6 +29,7 @@
 #include <linux/route.h> /* RTF_xxx */
 #include <net/neighbour.h>
 #include <net/netlink.h>
+#include <net/tcp.h>
 #include <net/dst.h>
 #include <net/flow.h>
 #include <net/fib_rules.h>
@@ -273,7 +274,8 @@ static inline size_t dn_fib_nlmsg_size(struct dn_fib_info *fi)
 	size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
 			 + nla_total_size(4) /* RTA_TABLE */
 			 + nla_total_size(2) /* RTA_DST */
-			 + nla_total_size(4); /* RTA_PRIORITY */
+			 + nla_total_size(4) /* RTA_PRIORITY */
+			 + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */
 
 	/* space for nested metrics */
 	payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index f99f41bd15b8..d2b7b5521b1b 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -360,7 +360,8 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi)
 			 + nla_total_size(4) /* RTA_TABLE */
 			 + nla_total_size(4) /* RTA_DST */
 			 + nla_total_size(4) /* RTA_PRIORITY */
-			 + nla_total_size(4); /* RTA_PREFSRC */
+			 + nla_total_size(4) /* RTA_PREFSRC */
+			 + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */
 
 	/* space for nested metrics */
 	payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
@@ -859,7 +860,16 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 
 				if (type > RTAX_MAX)
 					goto err_inval;
-				val = nla_get_u32(nla);
+				if (type == RTAX_CC_ALGO) {
+					char tmp[TCP_CA_NAME_MAX];
+
+					nla_strlcpy(tmp, nla, sizeof(tmp));
+					val = tcp_ca_get_key_by_name(tmp);
+					if (val == TCP_CA_UNSPEC)
+						goto err_inval;
+				} else {
+					val = nla_get_u32(nla);
+				}
 				if (type == RTAX_ADVMSS && val > 65535 - 40)
 					val = 65535 - 40;
 				if (type == RTAX_MTU && val > 65535 - 15)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 454771d20b21..34dcbb59df75 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1488,10 +1488,22 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
 		int type = nla_type(nla);
 
 		if (type) {
+			u32 val;
+
 			if (unlikely(type > RTAX_MAX))
 				goto err;
+			if (type == RTAX_CC_ALGO) {
+				char tmp[TCP_CA_NAME_MAX];
+
+				nla_strlcpy(tmp, nla, sizeof(tmp));
+				val = tcp_ca_get_key_by_name(tmp);
+				if (val == TCP_CA_UNSPEC)
+					goto err;
+			} else {
+				val = nla_get_u32(nla);
+			}
 
-			mp[type - 1] = nla_get_u32(nla);
+			mp[type - 1] = val;
 			__set_bit(type - 1, mxc->mx_valid);
 		}
 	}
@@ -2571,7 +2583,8 @@ static inline size_t rt6_nlmsg_size(void)
 	       + nla_total_size(4) /* RTA_OIF */
 	       + nla_total_size(4) /* RTA_PRIORITY */
 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
-	       + nla_total_size(sizeof(struct rta_cacheinfo));
+	       + nla_total_size(sizeof(struct rta_cacheinfo))
+	       + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */
 }
 
 static int rt6_fill_node(struct net *net,
-- 
cgit v1.2.3


From 81164413ad096bafe8ad1068f3f095a7dd081d8b Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Mon, 5 Jan 2015 23:57:48 +0100
Subject: net: tcp: add per route congestion control

This work adds the possibility to define a per route/destination
congestion control algorithm. Generally, this opens up the possibility
for a machine with different links to enforce specific congestion
control algorithms with optimal strategies for each of them based
on their network characteristics, even transparently for a single
application listening on all links.

For our specific use case, this additionally facilitates deployment
of DCTCP, for example, applications can easily serve internal
traffic/dsts in DCTCP and external one with CUBIC. Other scenarios
would also allow for utilizing e.g. long living, low priority
background flows for certain destinations/routes while still being
able for normal traffic to utilize the default congestion control
algorithm. We also thought about a per netns setting (where different
defaults are possible), but given its actually a link specific
property, we argue that a per route/destination setting is the most
natural and flexible.

The administrator can utilize this through ip-route(8) by appending
"congctl [lock] <name>", where <name> denotes the name of a
congestion control algorithm and the optional lock parameter allows
to enforce the given algorithm so that applications in user space
would not be allowed to overwrite that algorithm for that destination.

The dst metric lookups are being done when a dst entry is already
available in order to avoid a costly lookup and still before the
algorithms are being initialized, thus overhead is very low when the
feature is not being used. While the client side would need to drop
the current reference on the module, on server side this can actually
even be avoided as we just got a flat-copied socket clone.

Joint work with Florian Westphal.

Suggested-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h        |  6 ++++++
 net/ipv4/tcp_ipv4.c      |  2 ++
 net/ipv4/tcp_minisocks.c | 30 ++++++++++++++++++++++++++----
 net/ipv4/tcp_output.c    | 21 +++++++++++++++++++++
 net/ipv6/tcp_ipv6.c      |  2 ++
 5 files changed, 57 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 95bb237152e0..b8fdc6bab3f3 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -448,6 +448,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
 struct sock *tcp_create_openreq_child(struct sock *sk,
 				      struct request_sock *req,
 				      struct sk_buff *skb);
+void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst);
 struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 				  struct request_sock *req,
 				  struct dst_entry *dst);
@@ -636,6 +637,11 @@ static inline u32 tcp_rto_min_us(struct sock *sk)
 	return jiffies_to_usecs(tcp_rto_min(sk));
 }
 
+static inline bool tcp_ca_dst_locked(const struct dst_entry *dst)
+{
+	return dst_metric_locked(dst, RTAX_CC_ALGO);
+}
+
 /* Compute the actual receive window we are currently advertising.
  * Rcv_nxt can be after the window if our peer push more data
  * than the offered window.
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a3f72d7fc06c..ad3e65bdd368 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1340,6 +1340,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	}
 	sk_setup_caps(newsk, dst);
 
+	tcp_ca_openreq_child(newsk, dst);
+
 	tcp_sync_mss(newsk, dst_mtu(dst));
 	newtp->advmss = dst_metric_advmss(dst);
 	if (tcp_sk(sk)->rx_opt.user_mss &&
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 63d2680b65db..bc9216dc9de1 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -399,6 +399,32 @@ static void tcp_ecn_openreq_child(struct tcp_sock *tp,
 	tp->ecn_flags = inet_rsk(req)->ecn_ok ? TCP_ECN_OK : 0;
 }
 
+void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	u32 ca_key = dst_metric(dst, RTAX_CC_ALGO);
+	bool ca_got_dst = false;
+
+	if (ca_key != TCP_CA_UNSPEC) {
+		const struct tcp_congestion_ops *ca;
+
+		rcu_read_lock();
+		ca = tcp_ca_find_key(ca_key);
+		if (likely(ca && try_module_get(ca->owner))) {
+			icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst);
+			icsk->icsk_ca_ops = ca;
+			ca_got_dst = true;
+		}
+		rcu_read_unlock();
+	}
+
+	if (!ca_got_dst && !try_module_get(icsk->icsk_ca_ops->owner))
+		tcp_assign_congestion_control(sk);
+
+	tcp_set_ca_state(sk, TCP_CA_Open);
+}
+EXPORT_SYMBOL_GPL(tcp_ca_openreq_child);
+
 /* This is not only more efficient than what we used to do, it eliminates
  * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM
  *
@@ -451,10 +477,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		newtp->snd_cwnd = TCP_INIT_CWND;
 		newtp->snd_cwnd_cnt = 0;
 
-		if (!try_module_get(newicsk->icsk_ca_ops->owner))
-			tcp_assign_congestion_control(newsk);
-
-		tcp_set_ca_state(newsk, TCP_CA_Open);
 		tcp_init_xmit_timers(newsk);
 		__skb_queue_head_init(&newtp->out_of_order_queue);
 		newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7f18262e2326..dc30cb563e4f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2939,6 +2939,25 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 }
 EXPORT_SYMBOL(tcp_make_synack);
 
+static void tcp_ca_dst_init(struct sock *sk, const struct dst_entry *dst)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	const struct tcp_congestion_ops *ca;
+	u32 ca_key = dst_metric(dst, RTAX_CC_ALGO);
+
+	if (ca_key == TCP_CA_UNSPEC)
+		return;
+
+	rcu_read_lock();
+	ca = tcp_ca_find_key(ca_key);
+	if (likely(ca && try_module_get(ca->owner))) {
+		module_put(icsk->icsk_ca_ops->owner);
+		icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst);
+		icsk->icsk_ca_ops = ca;
+	}
+	rcu_read_unlock();
+}
+
 /* Do all connect socket setups that can be done AF independent. */
 static void tcp_connect_init(struct sock *sk)
 {
@@ -2964,6 +2983,8 @@ static void tcp_connect_init(struct sock *sk)
 	tcp_mtup_init(sk);
 	tcp_sync_mss(sk, dst_mtu(dst));
 
+	tcp_ca_dst_init(sk, dst);
+
 	if (!tp->window_clamp)
 		tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
 	tp->advmss = dst_metric_advmss(dst);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 9c0b54e87b47..5d46832c6f72 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1199,6 +1199,8 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
 						     newnp->opt->opt_flen);
 
+	tcp_ca_openreq_child(newsk, dst);
+
 	tcp_sync_mss(newsk, dst_mtu(dst));
 	newtp->advmss = dst_metric_advmss(dst);
 	if (tcp_sk(sk)->rx_opt.user_mss &&
-- 
cgit v1.2.3


From d75bb06b61cb69ee6223d791d3bb230e68623b20 Mon Sep 17 00:00:00 2001
From: Gautam Kumar Shukla <gautams@broadcom.com>
Date: Tue, 23 Dec 2014 16:55:19 +0100
Subject: cfg80211: add extensible feature flag attribute

With the wiphy::features flag being used up this patch adds a
new field wiphy::ext_features. Considering extensibility this
new field is declared as a byte array. This extensible flag is
exposed to user-space by NL80211_ATTR_EXT_FEATURES.

Cc: Avinash Patil <patila@marvell.com>
Signed-off-by: Gautam (Gautam Kumar) Shukla <gautams@broadcom.com>
Signed-off-by: Arend van Spriel <arend@broadcom.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 39 +++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/nl80211.h | 22 ++++++++++++++++++++++
 net/wireless/nl80211.c       |  5 +++++
 3 files changed, 66 insertions(+)

(limited to 'net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index bd672ea08c9a..f38645fb83b9 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3016,6 +3016,8 @@ struct wiphy_vendor_command {
  * @regulatory_flags: wiphy regulatory flags, see
  *	&enum ieee80211_regulatory_flags
  * @features: features advertised to nl80211, see &enum nl80211_feature_flags.
+ * @ext_features: extended features advertised to nl80211, see
+ *	&enum nl80211_ext_feature_index.
  * @bss_priv_size: each BSS struct has private data allocated with it,
  *	this variable determines its size
  * @max_scan_ssids: maximum number of SSIDs the device can scan for in
@@ -3125,6 +3127,7 @@ struct wiphy {
 	u16 max_acl_mac_addrs;
 
 	u32 flags, regulatory_flags, features;
+	u8 ext_features[DIV_ROUND_UP(NUM_NL80211_EXT_FEATURES, 8)];
 
 	u32 ap_sme_capa;
 
@@ -5052,6 +5055,42 @@ void cfg80211_stop_iface(struct wiphy *wiphy, struct wireless_dev *wdev,
  */
 void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy);
 
+/**
+ * wiphy_ext_feature_set - set the extended feature flag
+ *
+ * @wiphy: the wiphy to modify.
+ * @ftidx: extended feature bit index.
+ *
+ * The extended features are flagged in multiple bytes (see
+ * &struct wiphy.@ext_features)
+ */
+static inline void wiphy_ext_feature_set(struct wiphy *wiphy,
+					 enum nl80211_ext_feature_index ftidx)
+{
+	u8 *ft_byte;
+
+	ft_byte = &wiphy->ext_features[ftidx / 8];
+	*ft_byte |= BIT(ftidx % 8);
+}
+
+/**
+ * wiphy_ext_feature_isset - check the extended feature flag
+ *
+ * @wiphy: the wiphy to modify.
+ * @ftidx: extended feature bit index.
+ *
+ * The extended features are flagged in multiple bytes (see
+ * &struct wiphy.@ext_features)
+ */
+static inline bool
+wiphy_ext_feature_isset(struct wiphy *wiphy,
+			enum nl80211_ext_feature_index ftidx)
+{
+	u8 ft_byte;
+
+	ft_byte = wiphy->ext_features[ftidx / 8];
+	return (ft_byte & BIT(ftidx % 8)) != 0;
+}
 
 /* ethtool helper */
 void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info);
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 54f391141351..f95d35483086 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1713,6 +1713,13 @@ enum nl80211_commands {
  *	obtained from it is coming from the device's wiphy and not the global
  *	cfg80211 regdomain.
  *
+ * @NL80211_ATTR_EXT_FEATURES: extended feature flags contained in a byte
+ *	array. The feature flags are identified by their bit index (see &enum
+ *	nl80211_ext_feature_index). The bit index is ordered starting at the
+ *	least-significant bit of the first byte in the array, ie. bit index 0
+ *	is located at bit 0 of byte 0. bit index 25 would be located at bit 1
+ *	of byte 3 (u8 array).
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2072,6 +2079,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_WIPHY_SELF_MANAGED_REG,
 
+	NL80211_ATTR_EXT_FEATURES,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -4223,6 +4232,19 @@ enum nl80211_feature_flags {
 	NL80211_FEATURE_ND_RANDOM_MAC_ADDR		= 1 << 31,
 };
 
+/**
+ * enum nl80211_ext_feature_index - bit index of extended features.
+ *
+ * @NUM_NL80211_EXT_FEATURES: number of extended features.
+ * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
+ */
+enum nl80211_ext_feature_index {
+
+	/* add new features before the definition below */
+	NUM_NL80211_EXT_FEATURES,
+	MAX_NL80211_EXT_FEATURES = NUM_NL80211_EXT_FEATURES - 1
+};
+
 /**
  * enum nl80211_probe_resp_offload_support_attr - optional supported
  *	protocols for probe-response offloading by the driver/FW.
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 702920134b34..689e1a8fd60a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1706,6 +1706,11 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
 		    nla_put_flag(msg, NL80211_ATTR_WIPHY_SELF_MANAGED_REG))
 			goto nla_put_failure;
 
+		if (nla_put(msg, NL80211_ATTR_EXT_FEATURES,
+			    sizeof(rdev->wiphy.ext_features),
+			    rdev->wiphy.ext_features))
+			goto nla_put_failure;
+
 		/* done */
 		state->split_start = 0;
 		break;
-- 
cgit v1.2.3


From 71b836eca7f380fbd4c025f8c4371f9a071bc909 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 23 Dec 2014 17:17:38 +0100
Subject: nl80211: define multicast group names in header

Put the group names into the userspace API header file so that
userspace clients can use symbolic names from there instead of
hardcoding the actual names. This doesn't really change much,
but seems somewhat cleaner.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h |  7 +++++++
 net/wireless/nl80211.c       | 12 ++++++------
 2 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index f95d35483086..7ba9404b290d 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -29,6 +29,13 @@
 
 #define NL80211_GENL_NAME "nl80211"
 
+#define NL80211_MULTICAST_GROUP_CONFIG		"config"
+#define NL80211_MULTICAST_GROUP_SCAN		"scan"
+#define NL80211_MULTICAST_GROUP_REG		"regulatory"
+#define NL80211_MULTICAST_GROUP_MLME		"mlme"
+#define NL80211_MULTICAST_GROUP_VENDOR		"vendor"
+#define NL80211_MULTICAST_GROUP_TESTMODE	"testmode"
+
 /**
  * DOC: Station handling
  *
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 689e1a8fd60a..049f505e5660 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -59,13 +59,13 @@ enum nl80211_multicast_groups {
 };
 
 static const struct genl_multicast_group nl80211_mcgrps[] = {
-	[NL80211_MCGRP_CONFIG] = { .name = "config", },
-	[NL80211_MCGRP_SCAN] = { .name = "scan", },
-	[NL80211_MCGRP_REGULATORY] = { .name = "regulatory", },
-	[NL80211_MCGRP_MLME] = { .name = "mlme", },
-	[NL80211_MCGRP_VENDOR] = { .name = "vendor", },
+	[NL80211_MCGRP_CONFIG] = { .name = NL80211_MULTICAST_GROUP_CONFIG },
+	[NL80211_MCGRP_SCAN] = { .name = NL80211_MULTICAST_GROUP_SCAN },
+	[NL80211_MCGRP_REGULATORY] = { .name = NL80211_MULTICAST_GROUP_REG },
+	[NL80211_MCGRP_MLME] = { .name = NL80211_MULTICAST_GROUP_MLME },
+	[NL80211_MCGRP_VENDOR] = { .name = NL80211_MULTICAST_GROUP_VENDOR },
 #ifdef CONFIG_NL80211_TESTMODE
-	[NL80211_MCGRP_TESTMODE] = { .name = "testmode", }
+	[NL80211_MCGRP_TESTMODE] = { .name = NL80211_MULTICAST_GROUP_TESTMODE }
 #endif
 };
 
-- 
cgit v1.2.3


From fa44b988d2da58334ecbcf3e00f9acccbdeace6c Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Thu, 1 Jan 2015 13:43:17 +0200
Subject: mac80211: skip disabled channels in VHT check

The patch "40a11ca mac80211: check if channels allow 80 MHz for VHT
probe requests" considered disabled channels as VHT enabled, and
mistakenly sent out probe-requests with the VHT IE.

Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/util.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 0f9bf479952e..ad8cb4fb441e 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1470,10 +1470,12 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
 
 	/* Check if any channel in this sband supports at least 80 MHz */
 	for (i = 0; i < sband->n_channels; i++) {
-		if (!(sband->channels[i].flags & IEEE80211_CHAN_NO_80MHZ)) {
-			have_80mhz = true;
-			break;
-		}
+		if (sband->channels[i].flags & (IEEE80211_CHAN_DISABLED |
+						IEEE80211_CHAN_NO_80MHZ))
+			continue;
+
+		have_80mhz = true;
+		break;
 	}
 
 	if (sband->vht_cap.vht_supported && have_80mhz) {
-- 
cgit v1.2.3


From 2f4383667d57d1c719070db86b14277277752841 Mon Sep 17 00:00:00 2001
From: Ed Swierk <eswierk@skyportsystems.com>
Date: Fri, 2 Jan 2015 17:27:56 -0800
Subject: ethtool: Extend ethtool plugin module eeprom API to phylib

This patch extends the ethtool plugin module eeprom API to support cards
whose phy support is delegated to a separate driver.

The handlers for ETHTOOL_GMODULEINFO and ETHTOOL_GMODULEEEPROM call the
module_info and module_eeprom functions if the phy driver provides them;
otherwise the handlers call the equivalent ethtool_ops functions provided
by network drivers with built-in phy support.

Signed-off-by: Ed Swierk <eswierk@skyportsystems.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h |  9 +++++++++
 net/core/ethtool.c  | 45 ++++++++++++++++++++++++++++++++++-----------
 2 files changed, 43 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 22af8f8f5802..9c189a1fa3a2 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -565,6 +565,15 @@ struct phy_driver {
 	void (*write_mmd_indirect)(struct phy_device *dev, int ptrad,
 				   int devnum, int regnum, u32 val);
 
+	/* Get the size and type of the eeprom contained within a plug-in
+	 * module */
+	int (*module_info)(struct phy_device *dev,
+			   struct ethtool_modinfo *modinfo);
+
+	/* Get the eeprom information from the plug-in module */
+	int (*module_eeprom)(struct phy_device *dev,
+			     struct ethtool_eeprom *ee, u8 *data);
+
 	struct device_driver driver;
 };
 #define to_phy_driver(d) container_of(d, struct phy_driver, driver)
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 550892cd6b3f..91f74f3eb204 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1597,20 +1597,31 @@ static int ethtool_get_ts_info(struct net_device *dev, void __user *useraddr)
 	return err;
 }
 
+static int __ethtool_get_module_info(struct net_device *dev,
+				     struct ethtool_modinfo *modinfo)
+{
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	struct phy_device *phydev = dev->phydev;
+
+	if (phydev && phydev->drv && phydev->drv->module_info)
+		return phydev->drv->module_info(phydev, modinfo);
+
+	if (ops->get_module_info)
+		return ops->get_module_info(dev, modinfo);
+
+	return -EOPNOTSUPP;
+}
+
 static int ethtool_get_module_info(struct net_device *dev,
 				   void __user *useraddr)
 {
 	int ret;
 	struct ethtool_modinfo modinfo;
-	const struct ethtool_ops *ops = dev->ethtool_ops;
-
-	if (!ops->get_module_info)
-		return -EOPNOTSUPP;
 
 	if (copy_from_user(&modinfo, useraddr, sizeof(modinfo)))
 		return -EFAULT;
 
-	ret = ops->get_module_info(dev, &modinfo);
+	ret = __ethtool_get_module_info(dev, &modinfo);
 	if (ret)
 		return ret;
 
@@ -1620,21 +1631,33 @@ static int ethtool_get_module_info(struct net_device *dev,
 	return 0;
 }
 
+static int __ethtool_get_module_eeprom(struct net_device *dev,
+				       struct ethtool_eeprom *ee, u8 *data)
+{
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	struct phy_device *phydev = dev->phydev;
+
+	if (phydev && phydev->drv && phydev->drv->module_eeprom)
+		return phydev->drv->module_eeprom(phydev, ee, data);
+
+	if (ops->get_module_eeprom)
+		return ops->get_module_eeprom(dev, ee, data);
+
+	return -EOPNOTSUPP;
+}
+
 static int ethtool_get_module_eeprom(struct net_device *dev,
 				     void __user *useraddr)
 {
 	int ret;
 	struct ethtool_modinfo modinfo;
-	const struct ethtool_ops *ops = dev->ethtool_ops;
-
-	if (!ops->get_module_info || !ops->get_module_eeprom)
-		return -EOPNOTSUPP;
 
-	ret = ops->get_module_info(dev, &modinfo);
+	ret = __ethtool_get_module_info(dev, &modinfo);
 	if (ret)
 		return ret;
 
-	return ethtool_get_any_eeprom(dev, useraddr, ops->get_module_eeprom,
+	return ethtool_get_any_eeprom(dev, useraddr,
+				      __ethtool_get_module_eeprom,
 				      modinfo.eeprom_len);
 }
 
-- 
cgit v1.2.3


From cc72f6e227b8091e0b8297a6be266bedcb20a5aa Mon Sep 17 00:00:00 2001
From: John Linville <linville@tuxdriver.com>
Date: Tue, 6 Jan 2015 14:39:33 -0500
Subject: mac80211: uninitialized return val in
 __ieee80211_sta_handle_tspec_ac_params

The return value should be initialized to false so that there's a
valid return value when there are no sessions that need work to be
done on them. Luckily, the side effect of using the uninitialized
value is an extra harmless driver call.

Coverity: CID 1260096
Fixes: 02219b3abca59 ("mac80211: add WMM admission control support")
Signed-off-by: John W. Linville <linville@tuxdriver.com>
[extend commit message]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 2c36c4765f47..837a406a9dd6 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1643,7 +1643,7 @@ __ieee80211_sta_handle_tspec_ac_params(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-	bool ret;
+	bool ret = false;
 	int ac;
 
 	if (local->hw.queues < IEEE80211_NUM_ACS)
-- 
cgit v1.2.3


From db12847ca84b7a315a3ba77c939c9d08df17d54f Mon Sep 17 00:00:00 2001
From: Ido Yariv <ido@wizery.com>
Date: Tue, 6 Jan 2015 08:39:02 -0500
Subject: mac80211: Re-fix accounting of the tailroom-needed counter

When hw acceleration is enabled, the GENERATE_IV or PUT_IV_SPACE flags
only require headroom space. Therefore, the tailroom-needed counter can
safely be decremented for most drivers.

The older incarnation of this patch (ca34e3b5) assumed that the above
holds true for all drivers. As reported by Christopher Chavez and
researched by Christian Lamparter and Larry Finger, this isn't a valid
assumption for p54 and cw1200.

Drivers that still require tailroom for ICV/MIC even when HW encryption
is enabled can use IEEE80211_KEY_FLAG_RESERVE_TAILROOM to indicate it.

Signed-off-by: Ido Yariv <idox.yariv@intel.com>
Cc: Christopher Chavez <chrischavez@gmx.us>
Cc: Christian Lamparter <chunkeey@googlemail.com>
Cc: Larry Finger <Larry.Finger@lwfinger.net>
Cc: Solomon Peachy <pizza@shaftnet.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/cw1200/sta.c |  3 ++-
 drivers/net/wireless/p54/main.c   |  2 ++
 include/net/mac80211.h            | 11 +++++++++--
 net/mac80211/key.c                |  9 +++------
 4 files changed, 16 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/cw1200/sta.c b/drivers/net/wireless/cw1200/sta.c
index a1e3237c0be8..4a47c7f8a246 100644
--- a/drivers/net/wireless/cw1200/sta.c
+++ b/drivers/net/wireless/cw1200/sta.c
@@ -709,7 +709,8 @@ int cw1200_set_key(struct ieee80211_hw *dev, enum set_key_cmd cmd,
 		if (sta)
 			peer_addr = sta->addr;
 
-		key->flags |= IEEE80211_KEY_FLAG_PUT_IV_SPACE;
+		key->flags |= IEEE80211_KEY_FLAG_PUT_IV_SPACE |
+			      IEEE80211_KEY_FLAG_RESERVE_TAILROOM;
 
 		switch (key->cipher) {
 		case WLAN_CIPHER_SUITE_WEP40:
diff --git a/drivers/net/wireless/p54/main.c b/drivers/net/wireless/p54/main.c
index 97aeff0edb84..13a30c4a27f2 100644
--- a/drivers/net/wireless/p54/main.c
+++ b/drivers/net/wireless/p54/main.c
@@ -575,6 +575,8 @@ static int p54_set_key(struct ieee80211_hw *dev, enum set_key_cmd cmd,
 			key->hw_key_idx = 0xff;
 			goto out_unlock;
 		}
+
+		key->flags |= IEEE80211_KEY_FLAG_RESERVE_TAILROOM;
 	} else {
 		slot = key->hw_key_idx;
 
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index ece1a546587e..555a845ad51e 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1281,7 +1281,8 @@ struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev);
  *
  * @IEEE80211_KEY_FLAG_GENERATE_IV: This flag should be set by the
  *	driver to indicate that it requires IV generation for this
- *	particular key.
+ *	particular key. Setting this flag does not necessarily mean that SKBs
+ *	will have sufficient tailroom for ICV or MIC.
  * @IEEE80211_KEY_FLAG_GENERATE_MMIC: This flag should be set by
  *	the driver for a TKIP key if it requires Michael MIC
  *	generation in software.
@@ -1293,7 +1294,9 @@ struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev);
  * @IEEE80211_KEY_FLAG_PUT_IV_SPACE: This flag should be set by the driver
  *	if space should be prepared for the IV, but the IV
  *	itself should not be generated. Do not set together with
- *	@IEEE80211_KEY_FLAG_GENERATE_IV on the same key.
+ *	@IEEE80211_KEY_FLAG_GENERATE_IV on the same key. Setting this flag does
+ *	not necessarily mean that SKBs will have sufficient tailroom for ICV or
+ *	MIC.
  * @IEEE80211_KEY_FLAG_RX_MGMT: This key will be used to decrypt received
  *	management frames. The flag can help drivers that have a hardware
  *	crypto implementation that doesn't deal with management frames
@@ -1304,6 +1307,9 @@ struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev);
  * @IEEE80211_KEY_FLAG_GENERATE_IV_MGMT: This flag should be set by the
  *	driver for a CCMP key to indicate that is requires IV generation
  *	only for managment frames (MFP).
+ * @IEEE80211_KEY_FLAG_RESERVE_TAILROOM: This flag should be set by the
+ *	driver for a key to indicate that sufficient tailroom must always
+ *	be reserved for ICV or MIC, even when HW encryption is enabled.
  */
 enum ieee80211_key_flags {
 	IEEE80211_KEY_FLAG_GENERATE_IV_MGMT	= BIT(0),
@@ -1313,6 +1319,7 @@ enum ieee80211_key_flags {
 	IEEE80211_KEY_FLAG_SW_MGMT_TX		= BIT(4),
 	IEEE80211_KEY_FLAG_PUT_IV_SPACE		= BIT(5),
 	IEEE80211_KEY_FLAG_RX_MGMT		= BIT(6),
+	IEEE80211_KEY_FLAG_RESERVE_TAILROOM	= BIT(7),
 };
 
 /**
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index bd4e46ec32bd..f8d9f0ee59bf 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -141,8 +141,7 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 		key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE;
 
 		if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) ||
-		      (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) ||
-		      (key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)))
+		      (key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM)))
 			sdata->crypto_tx_tailroom_needed_cnt--;
 
 		WARN_ON((key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE) &&
@@ -191,8 +190,7 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
 	sdata = key->sdata;
 
 	if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) ||
-	      (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) ||
-	      (key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)))
+	      (key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM)))
 		increment_tailroom_need_count(sdata);
 
 	ret = drv_set_key(key->local, DISABLE_KEY, sdata,
@@ -889,8 +887,7 @@ void ieee80211_remove_key(struct ieee80211_key_conf *keyconf)
 		key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE;
 
 		if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) ||
-		      (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) ||
-		      (key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)))
+		      (key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM)))
 			increment_tailroom_need_count(key->sdata);
 	}
 
-- 
cgit v1.2.3


From 20658702e08ecd693236b443837d28863b93e872 Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Mon, 29 Dec 2014 11:59:59 +0200
Subject: cfg80211: fix deadlock during reg chan check

If a P2P GO is active, the cfg80211_reg_can_beacon function will take
the wdev lock, in its call to cfg80211_go_permissive_chan. But the wdev lock
is already taken by the parent channel-checking function, causing a
deadlock.
Split the checking code into two parts. The first part will check if the
wdev is active and saves the channel under the wdev lock. The second part
will check actual channel validity according to type.

Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Reviewed-by: Ilan Peer <ilan.peer@intel.com>
Reviewed-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/reg.c | 56 +++++++++++++++++++++++++++++++++---------------------
 1 file changed, 34 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 7b8309840d4e..d39d1cbc86b1 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1530,45 +1530,40 @@ static void reg_call_notifier(struct wiphy *wiphy,
 
 static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev)
 {
-	struct ieee80211_channel *ch;
 	struct cfg80211_chan_def chandef;
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
-	bool ret = true;
+	enum nl80211_iftype iftype;
 
 	wdev_lock(wdev);
+	iftype = wdev->iftype;
 
+	/* make sure the interface is active */
 	if (!wdev->netdev || !netif_running(wdev->netdev))
-		goto out;
+		goto wdev_inactive_unlock;
 
-	switch (wdev->iftype) {
+	switch (iftype) {
 	case NL80211_IFTYPE_AP:
 	case NL80211_IFTYPE_P2P_GO:
 		if (!wdev->beacon_interval)
-			goto out;
-
-		ret = cfg80211_reg_can_beacon(wiphy,
-					      &wdev->chandef, wdev->iftype);
+			goto wdev_inactive_unlock;
+		chandef = wdev->chandef;
 		break;
 	case NL80211_IFTYPE_ADHOC:
 		if (!wdev->ssid_len)
-			goto out;
-
-		ret = cfg80211_reg_can_beacon(wiphy,
-					      &wdev->chandef, wdev->iftype);
+			goto wdev_inactive_unlock;
+		chandef = wdev->chandef;
 		break;
 	case NL80211_IFTYPE_STATION:
 	case NL80211_IFTYPE_P2P_CLIENT:
 		if (!wdev->current_bss ||
 		    !wdev->current_bss->pub.channel)
-			goto out;
+			goto wdev_inactive_unlock;
 
-		ch = wdev->current_bss->pub.channel;
-		if (rdev->ops->get_channel &&
-		    !rdev_get_channel(rdev, wdev, &chandef))
-			ret = cfg80211_chandef_usable(wiphy, &chandef,
-						      IEEE80211_CHAN_DISABLED);
-		else
-			ret = !(ch->flags & IEEE80211_CHAN_DISABLED);
+		if (!rdev->ops->get_channel ||
+		    rdev_get_channel(rdev, wdev, &chandef))
+			cfg80211_chandef_create(&chandef,
+						wdev->current_bss->pub.channel,
+						NL80211_CHAN_NO_HT);
 		break;
 	case NL80211_IFTYPE_MONITOR:
 	case NL80211_IFTYPE_AP_VLAN:
@@ -1581,9 +1576,26 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev)
 		break;
 	}
 
-out:
 	wdev_unlock(wdev);
-	return ret;
+
+	switch (iftype) {
+	case NL80211_IFTYPE_AP:
+	case NL80211_IFTYPE_P2P_GO:
+	case NL80211_IFTYPE_ADHOC:
+		return cfg80211_reg_can_beacon(wiphy, &chandef, iftype);
+	case NL80211_IFTYPE_STATION:
+	case NL80211_IFTYPE_P2P_CLIENT:
+		return cfg80211_chandef_usable(wiphy, &chandef,
+					       IEEE80211_CHAN_DISABLED);
+	default:
+		break;
+	}
+
+	return true;
+
+wdev_inactive_unlock:
+	wdev_unlock(wdev);
+	return true;
 }
 
 static void reg_leave_invalid_chans(struct wiphy *wiphy)
-- 
cgit v1.2.3


From 193924472a2514f7d9e4d08cf184e9a5f2b9309d Mon Sep 17 00:00:00 2001
From: Antonio Quartulli <antonio@meshcoding.com>
Date: Mon, 21 Jul 2014 10:03:59 +0200
Subject: batman-adv: avoid useless return in void functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cc: Linus Lüssing <linus.luessing@web.de>
Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
---
 net/batman-adv/debugfs.c   | 1 -
 net/batman-adv/multicast.h | 3 ---
 2 files changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index a12e25efaf6f..14850918ff12 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -233,7 +233,6 @@ static int batadv_debug_log_setup(struct batadv_priv *bat_priv)
 
 static void batadv_debug_log_cleanup(struct batadv_priv *bat_priv)
 {
-	return;
 }
 #endif
 
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index 73b5d45819c1..3a44ebdb43cb 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -50,7 +50,6 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node);
 
 static inline void batadv_mcast_mla_update(struct batadv_priv *bat_priv)
 {
-	return;
 }
 
 static inline enum batadv_forw_mode
@@ -67,12 +66,10 @@ static inline int batadv_mcast_init(struct batadv_priv *bat_priv)
 
 static inline void batadv_mcast_free(struct batadv_priv *bat_priv)
 {
-	return;
 }
 
 static inline void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node)
 {
-	return;
 }
 
 #endif /* CONFIG_BATMAN_ADV_MCAST */
-- 
cgit v1.2.3


From 320b936678f57107524063e4fe3c29d1d4795810 Mon Sep 17 00:00:00 2001
From: Simon Wunderlich <sw@simonwunderlich.de>
Date: Wed, 16 Jul 2014 12:23:10 +0200
Subject: batman-adv: remove obsolete variable primary_iface from orig_node
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This variable became obsolete when changing to the new bonding mechanism
based on the multi interface optimization. Since its not used anywhere,
remove it.

Reported-by: Linus Lüssing <linus.luessing@web.de>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
---
 net/batman-adv/types.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 8854c05622a9..462a70c0db8f 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -199,7 +199,6 @@ struct batadv_orig_bat_iv {
 /**
  * struct batadv_orig_node - structure for orig_list maintaining nodes of mesh
  * @orig: originator ethernet address
- * @primary_addr: hosts primary interface address
  * @ifinfo_list: list for routers per outgoing interface
  * @last_bonding_candidate: pointer to last ifinfo of last used router
  * @batadv_dat_addr_t:  address of the orig node in the distributed hash
@@ -244,7 +243,6 @@ struct batadv_orig_bat_iv {
  */
 struct batadv_orig_node {
 	uint8_t orig[ETH_ALEN];
-	uint8_t primary_addr[ETH_ALEN];
 	struct hlist_head ifinfo_list;
 	struct batadv_orig_ifinfo *last_bonding_candidate;
 #ifdef CONFIG_BATMAN_ADV_DAT
-- 
cgit v1.2.3


From 95298a91f989f69921b59269f3b87c0ac9073d96 Mon Sep 17 00:00:00 2001
From: Martin Hundebøll <martin@hundeboll.net>
Date: Tue, 15 Jul 2014 09:41:05 +0200
Subject: batman-adv: kernel doc fixes for bat_iv_ogm.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Hundebøll <martin@hundeboll.net>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
---
 net/batman-adv/bat_iv_ogm.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 1e8053976e83..73209f388fcb 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -28,7 +28,7 @@
 
 
 /**
- * batadv_dup_status - duplicate status
+ * enum batadv_dup_status - duplicate status
  * @BATADV_NO_DUP: the packet is a duplicate
  * @BATADV_ORIG_DUP: OGM is a duplicate in the originator (but not for the
  *  neighbor)
@@ -517,7 +517,7 @@ out:
  * @bat_priv: the bat priv with all the soft interface information
  * @packet_len: (total) length of the OGM
  * @send_time: timestamp (jiffies) when the packet is to be sent
- * @direktlink: true if this is a direct link packet
+ * @directlink: true if this is a direct link packet
  * @if_incoming: interface where the packet was received
  * @if_outgoing: interface for which the retransmission should be considered
  * @forw_packet: the forwarded packet which should be checked
@@ -1366,6 +1366,7 @@ out:
 /**
  * batadv_iv_ogm_process_per_outif - process a batman iv OGM for an outgoing if
  * @skb: the skb containing the OGM
+ * @ogm_offset: offset from skb->data to start of ogm header
  * @orig_node: the (cached) orig node for the originator of this OGM
  * @if_incoming: the interface where this packet was received
  * @if_outgoing: the interface for which the packet should be considered
@@ -1902,10 +1903,10 @@ out:
  * batadv_iv_ogm_neigh_is_eob - check if neigh1 is equally good or better than
  *  neigh2 from the metric prospective
  * @neigh1: the first neighbor object of the comparison
- * @if_outgoing: outgoing interface for the first neighbor
+ * @if_outgoing1: outgoing interface for the first neighbor
  * @neigh2: the second neighbor object of the comparison
  * @if_outgoing2: outgoing interface for the second neighbor
-
+ *
  * Returns true if the metric via neigh1 is equally good or better than
  * the metric via neigh2, false otherwise.
  */
-- 
cgit v1.2.3


From e33571898832fb1ee44df15b1f98cc728497072d Mon Sep 17 00:00:00 2001
From: Martin Hundebøll <martin@hundeboll.net>
Date: Tue, 15 Jul 2014 09:41:06 +0200
Subject: batman-adv: kernel doc fixes for bridge_loop_avoidance.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Hundebøll <martin@hundeboll.net>
Acked-by: Simon Wunderlich <sw@simonwunderlich.de>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
---
 net/batman-adv/bridge_loop_avoidance.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index a957c8140721..0f0ca433ce3c 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -252,7 +252,7 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw)
 /**
  * batadv_bla_send_claim - sends a claim frame according to the provided info
  * @bat_priv: the bat priv with all the soft interface information
- * @orig: the mac address to be announced within the claim
+ * @mac: the mac address to be announced within the claim
  * @vid: the VLAN ID
  * @claimtype: the type of the claim (CLAIM, UNCLAIM, ANNOUNCE, ...)
  */
@@ -364,6 +364,7 @@ out:
  * @bat_priv: the bat priv with all the soft interface information
  * @orig: the mac address of the originator
  * @vid: the VLAN ID
+ * @own_backbone: set if the requested backbone is local
  *
  * searches for the backbone gw or creates a new one if it could not
  * be found.
@@ -454,6 +455,7 @@ batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv,
 /**
  * batadv_bla_answer_request - answer a bla request by sending own claims
  * @bat_priv: the bat priv with all the soft interface information
+ * @primary_if: interface where the request came on
  * @vid: the vid where the request came on
  *
  * Repeat all of our own claims, and finally send an ANNOUNCE frame
@@ -775,6 +777,7 @@ static int batadv_handle_claim(struct batadv_priv *bat_priv,
 /**
  * batadv_check_claim_group
  * @bat_priv: the bat priv with all the soft interface information
+ * @primary_if: the primary interface of this batman interface
  * @hw_src: the Hardware source in the ARP Header
  * @hw_dst: the Hardware destination in the ARP Header
  * @ethhdr: pointer to the Ethernet header of the claim frame
@@ -850,6 +853,7 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv,
 /**
  * batadv_bla_process_claim
  * @bat_priv: the bat priv with all the soft interface information
+ * @primary_if: the primary hard interface of this batman soft interface
  * @skb: the frame to be checked
  *
  * Check if this is a claim frame, and process it accordingly.
-- 
cgit v1.2.3


From ba97abb86302d7380bc6738aa9313a1c460c4f3c Mon Sep 17 00:00:00 2001
From: Martin Hundebøll <martin@hundeboll.net>
Date: Tue, 15 Jul 2014 09:41:07 +0200
Subject: batman-adv: kernel doc fix for distributed-arp-table.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Hundebøll <martin@hundeboll.net>
Acked-by: Antonio Quartulli <antonio@meshcoding.com>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
---
 net/batman-adv/distributed-arp-table.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h
index d76e1d06c5b5..2fe0764c64be 100644
--- a/net/batman-adv/distributed-arp-table.h
+++ b/net/batman-adv/distributed-arp-table.h
@@ -25,9 +25,7 @@
 
 #include <linux/if_arp.h>
 
-/**
- * BATADV_DAT_ADDR_MAX - maximum address value in the DHT space
- */
+/* BATADV_DAT_ADDR_MAX - maximum address value in the DHT space */
 #define BATADV_DAT_ADDR_MAX ((batadv_dat_addr_t)~(batadv_dat_addr_t)0)
 
 void batadv_dat_status_update(struct net_device *net_dev);
-- 
cgit v1.2.3


From a0e287750562969db8a891e9805fef55aeb05df0 Mon Sep 17 00:00:00 2001
From: Martin Hundebøll <martin@hundeboll.net>
Date: Tue, 15 Jul 2014 09:41:08 +0200
Subject: batman-adv: kernel doc fixes for main.{c, h}
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Hundebøll <martin@hundeboll.net>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
---
 net/batman-adv/main.c | 4 ++--
 net/batman-adv/main.h | 5 ++---
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index d1183e882167..2cdd25a200bb 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -651,7 +651,7 @@ static struct batadv_tvlv_handler
 /**
  * batadv_tvlv_container_free_ref - decrement the tvlv container refcounter and
  *  possibly free it
- * @tvlv_handler: the tvlv container to free
+ * @tvlv: the tvlv container to free
  */
 static void batadv_tvlv_container_free_ref(struct batadv_tvlv_container *tvlv)
 {
@@ -800,7 +800,7 @@ void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
  *  requested packet size
  * @packet_buff: packet buffer
  * @packet_buff_len: packet buffer size
- * @packet_min_len: requested packet minimum size
+ * @min_packet_len: requested packet minimum size
  * @additional_packet_len: requested additional packet size on top of minimum
  *  size
  *
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index a1fcd884f0b1..2a59b0455e0a 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -92,9 +92,8 @@
 /* numbers of originator to contact for any PUT/GET DHT operation */
 #define BATADV_DAT_CANDIDATES_NUM 3
 
-/**
- * BATADV_TQ_SIMILARITY_THRESHOLD - TQ points that a secondary metric can differ
- *  at most from the primary one in order to be still considered acceptable
+/* BATADV_TQ_SIMILARITY_THRESHOLD - TQ points that a secondary metric can differ
+ * at most from the primary one in order to be still considered acceptable
  */
 #define BATADV_TQ_SIMILARITY_THRESHOLD 50
 
-- 
cgit v1.2.3


From 24820df1449d4d7cf71bb66e7885ab7394c7b4fb Mon Sep 17 00:00:00 2001
From: Antonio Quartulli <antonio@meshcoding.com>
Date: Mon, 1 Sep 2014 14:37:25 +0200
Subject: batman-adv: checkpatch - else is not generally useful after a break
 or return

Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
---
 net/batman-adv/bitarray.h          | 3 +--
 net/batman-adv/fragmentation.h     | 3 +--
 net/batman-adv/network-coding.c    | 3 +--
 net/batman-adv/translation-table.c | 5 ++---
 4 files changed, 5 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h
index cc2407351d36..2acaafe60188 100644
--- a/net/batman-adv/bitarray.h
+++ b/net/batman-adv/bitarray.h
@@ -29,8 +29,7 @@ static inline int batadv_test_bit(const unsigned long *seq_bits,
 	diff = last_seqno - curr_seqno;
 	if (diff < 0 || diff >= BATADV_TQ_LOCAL_WINDOW_SIZE)
 		return 0;
-	else
-		return test_bit(diff, seq_bits) != 0;
+	return test_bit(diff, seq_bits) != 0;
 }
 
 /* turn corresponding bit on, so we can remember that we got the packet */
diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h
index 5d7a0e66a22b..d848cf6676a2 100644
--- a/net/batman-adv/fragmentation.h
+++ b/net/batman-adv/fragmentation.h
@@ -41,8 +41,7 @@ batadv_frag_check_entry(struct batadv_frag_table_entry *frags_entry)
 	if (!hlist_empty(&frags_entry->head) &&
 	    batadv_has_timed_out(frags_entry->timestamp, BATADV_FRAG_TIMEOUT))
 		return true;
-	else
-		return false;
+	return false;
 }
 
 #endif /* _NET_BATMAN_ADV_FRAGMENTATION_H_ */
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index fab47f1f3ef9..127cc4d7380a 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -1212,8 +1212,7 @@ static bool batadv_nc_skb_coding_possible(struct sk_buff *skb,
 {
 	if (BATADV_SKB_CB(skb)->decoded && !batadv_compare_eth(dst, src))
 		return false;
-	else
-		return true;
+	return true;
 }
 
 /**
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 5f59e7f899a0..38a804e1db0a 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -2769,9 +2769,8 @@ static bool batadv_send_tt_response(struct batadv_priv *bat_priv,
 {
 	if (batadv_is_my_mac(bat_priv, req_dst))
 		return batadv_send_my_tt_response(bat_priv, tt_data, req_src);
-	else
-		return batadv_send_other_tt_response(bat_priv, tt_data,
-						     req_src, req_dst);
+	return batadv_send_other_tt_response(bat_priv, tt_data, req_src,
+					     req_dst);
 }
 
 static void _batadv_tt_update_changes(struct batadv_priv *bat_priv,
-- 
cgit v1.2.3


From 9687a31c84d7eca038a5335d8d804c11bd5d8653 Mon Sep 17 00:00:00 2001
From: Antonio Quartulli <antonio@meshcoding.com>
Date: Mon, 1 Sep 2014 14:37:26 +0200
Subject: batman-adv: checkpatch - No space is necessary after a cast

Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
---
 net/batman-adv/main.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 2a59b0455e0a..18286f8b4d26 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -312,10 +312,10 @@ static inline bool batadv_has_timed_out(unsigned long timestamp,
  *  - when adding 128 - it is neither a predecessor nor a successor,
  *  - after adding more than 127 to the starting value - it is a successor
  */
-#define batadv_seq_before(x, y) ({typeof(x) _d1 = (x); \
-				 typeof(y) _d2 = (y); \
-				 typeof(x) _dummy = (_d1 - _d2); \
-				 (void) (&_d1 == &_d2); \
+#define batadv_seq_before(x, y) ({typeof(x)_d1 = (x); \
+				 typeof(y)_d2 = (y); \
+				 typeof(x)_dummy = (_d1 - _d2); \
+				 (void)(&_d1 == &_d2); \
 				 _dummy > batadv_smallest_signed_int(_dummy); })
 #define batadv_seq_after(x, y) batadv_seq_before(y, x)
 
-- 
cgit v1.2.3


From aa143d2837325d66150e1566b22b61f0c7a843c9 Mon Sep 17 00:00:00 2001
From: Antonio Quartulli <antonio@meshcoding.com>
Date: Mon, 1 Sep 2014 14:37:27 +0200
Subject: batman-adv: checkpatch - Please use a blank line after declarations

Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
---
 net/batman-adv/debugfs.c               | 1 +
 net/batman-adv/distributed-arp-table.c | 1 +
 net/batman-adv/gateway_client.c        | 1 +
 net/batman-adv/packet.h                | 1 +
 4 files changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 14850918ff12..a4972874c056 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -404,6 +404,7 @@ struct batadv_debuginfo batadv_hardif_debuginfo_##_name = {	\
 		.release = single_release,			\
 	},							\
 }
+
 static BATADV_HARDIF_DEBUGINFO(originators, S_IRUGO,
 			       batadv_originators_hardif_open);
 
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index b5981113c9a7..aad022dd15df 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -1100,6 +1100,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
 	batadv_dat_send_data(bat_priv, skb, ip_src, BATADV_P_DAT_DHT_PUT);
 	batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_PUT);
 }
+
 /**
  * batadv_dat_snoop_incoming_arp_reply - snoop the ARP reply and fill the local
  * DAT storage only
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index e0bcf9e84273..27649e85f3f6 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -775,6 +775,7 @@ batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len,
 
 	return ret;
 }
+
 /**
  * batadv_gw_out_of_range - check if the dhcp request destination is the best gw
  * @bat_priv: the bat priv with all the soft interface information
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index 34e096d2dce1..facd1feddd4e 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -198,6 +198,7 @@ struct batadv_bla_claim_dst {
 	uint8_t type;		/* bla_claimframe */
 	__be16 group;		/* group id */
 };
+
 #pragma pack()
 
 /**
-- 
cgit v1.2.3


From 8a3f8b6ac5c5791a04d0d30636190bfc68f2e37b Mon Sep 17 00:00:00 2001
From: Antonio Quartulli <antonio@meshcoding.com>
Date: Mon, 1 Sep 2014 14:37:28 +0200
Subject: batman-adv: checkpatch - Please don't use multiple blank lines

Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
---
 net/batman-adv/bat_iv_ogm.c            | 2 --
 net/batman-adv/bitarray.c              | 1 -
 net/batman-adv/bridge_loop_avoidance.c | 7 -------
 net/batman-adv/fragmentation.c         | 1 -
 net/batman-adv/main.c                  | 1 -
 net/batman-adv/originator.c            | 1 -
 net/batman-adv/originator.h            | 1 -
 net/batman-adv/routing.c               | 1 -
 net/batman-adv/soft-interface.c        | 1 -
 net/batman-adv/sysfs.c                 | 1 -
 net/batman-adv/translation-table.c     | 1 -
 11 files changed, 18 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 73209f388fcb..4dd7c407b4cc 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -26,7 +26,6 @@
 #include "bat_algo.h"
 #include "network-coding.h"
 
-
 /**
  * enum batadv_dup_status - duplicate status
  * @BATADV_NO_DUP: the packet is a duplicate
@@ -1362,7 +1361,6 @@ out:
 	return ret;
 }
 
-
 /**
  * batadv_iv_ogm_process_per_outif - process a batman iv OGM for an outgoing if
  * @skb: the skb containing the OGM
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index 9586750022f5..e3da07a64026 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -29,7 +29,6 @@ static void batadv_bitmap_shift_left(unsigned long *seq_bits, int32_t n)
 	bitmap_shift_left(seq_bits, seq_bits, n, BATADV_TQ_LOCAL_WINDOW_SIZE);
 }
 
-
 /* receive and process one packet within the sequence number window.
  *
  * returns:
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 0f0ca433ce3c..4fc6cab7ed46 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -69,7 +69,6 @@ static inline uint32_t batadv_choose_backbone_gw(const void *data,
 	return hash % size;
 }
 
-
 /* compares address and vid of two backbone gws */
 static int batadv_compare_backbone_gw(const struct hlist_node *node,
 				      const void *data2)
@@ -662,7 +661,6 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv,
 	if (unlikely(!backbone_gw))
 		return 1;
 
-
 	/* handle as ANNOUNCE frame */
 	backbone_gw->lasttime = jiffies;
 	crc = ntohs(*((__be16 *)(&an_addr[4])));
@@ -849,7 +847,6 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv,
 	return 2;
 }
 
-
 /**
  * batadv_bla_process_claim
  * @bat_priv: the bat priv with all the soft interface information
@@ -1347,8 +1344,6 @@ out:
 	return ret;
 }
 
-
-
 /**
  * batadv_bla_is_backbone_gw_orig
  * @bat_priv: the bat priv with all the soft interface information
@@ -1390,7 +1385,6 @@ bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig,
 	return false;
 }
 
-
 /**
  * batadv_bla_is_backbone_gw
  * @skb: the frame to be checked
@@ -1480,7 +1474,6 @@ int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
 	if (!atomic_read(&bat_priv->bridge_loop_avoidance))
 		goto allow;
 
-
 	if (unlikely(atomic_read(&bat_priv->bla.num_requests)))
 		/* don't allow broadcasts while requests are in flight */
 		if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast)
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 00f9e144cc97..3d1dcaa3e8b5 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -23,7 +23,6 @@
 #include "hard-interface.h"
 #include "soft-interface.h"
 
-
 /**
  * batadv_frag_clear_chain - delete entries in the fragment buffer chain
  * @head: head of chain with entries.
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 2cdd25a200bb..d4079cd510f9 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -41,7 +41,6 @@
 #include "network-coding.h"
 #include "fragmentation.h"
 
-
 /* List manipulations on hardif_list have to be rtnl_lock()'ed,
  * list traversals just rcu-locked
  */
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index bea8198d0198..90e805aba379 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -797,7 +797,6 @@ batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv,
 	return ifinfo_purged;
 }
 
-
 /**
  * batadv_purge_orig_neighbors - purges neighbors from originator
  * @bat_priv: the bat priv with all the soft interface information
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index db3a9ed734cb..aa4a43696295 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -70,7 +70,6 @@ batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node,
 			  unsigned short vid);
 void batadv_orig_node_vlan_free_ref(struct batadv_orig_node_vlan *orig_vlan);
 
-
 /* hashfunction to choose an entry in a hash table of given size
  * hash algorithm from http://en.wikipedia.org/wiki/Hash_table
  */
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 6648f321864d..139d2f65728e 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -292,7 +292,6 @@ out:
 	return ret;
 }
 
-
 int batadv_recv_icmp_packet(struct sk_buff *skb,
 			    struct batadv_hard_iface *recv_if)
 {
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 5467955eb27c..5ec31d7de24f 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -36,7 +36,6 @@
 #include "bridge_loop_avoidance.h"
 #include "network-coding.h"
 
-
 static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd);
 static void batadv_get_drvinfo(struct net_device *dev,
 			       struct ethtool_drvinfo *info);
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index f40cb0436eba..a75dc12f96f8 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -151,7 +151,6 @@ ssize_t batadv_show_##_name(struct kobject *kobj,			\
 	static BATADV_ATTR(_name, _mode, batadv_show_##_name,		\
 			   batadv_store_##_name)
 
-
 #define BATADV_ATTR_SIF_STORE_UINT(_name, _min, _max, _post_func)	\
 ssize_t batadv_store_##_name(struct kobject *kobj,			\
 			     struct attribute *attr, char *buff,	\
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 38a804e1db0a..84e6f01b734f 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1780,7 +1780,6 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv,
 		batadv_tt_global_del_roaming(bat_priv, tt_global_entry,
 					     orig_node, message);
 
-
 out:
 	if (tt_global_entry)
 		batadv_tt_global_entry_free_ref(tt_global_entry);
-- 
cgit v1.2.3


From 32db6aaafef317be799265a5be1840b28bee8c78 Mon Sep 17 00:00:00 2001
From: Antonio Quartulli <antonio@meshcoding.com>
Date: Mon, 1 Sep 2014 14:37:29 +0200
Subject: batman-adv: checkpatch - remove unnecessary parentheses

Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
---
 net/batman-adv/bat_iv_ogm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 4dd7c407b4cc..00e00e09b000 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -878,7 +878,7 @@ batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface)
 		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
 			spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
 			word_index = hard_iface->if_num * BATADV_NUM_WORDS;
-			word = &(orig_node->bat_iv.bcast_own[word_index]);
+			word = &orig_node->bat_iv.bcast_own[word_index];
 
 			batadv_bit_get_packet(bat_priv, word, 1, 0);
 			if_num = hard_iface->if_num;
@@ -1663,7 +1663,7 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset,
 			offset = if_num * BATADV_NUM_WORDS;
 
 			spin_lock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock);
-			word = &(orig_neigh_node->bat_iv.bcast_own[offset]);
+			word = &orig_neigh_node->bat_iv.bcast_own[offset];
 			bit_pos = if_incoming_seqno - 2;
 			bit_pos -= ntohl(ogm_packet->seqno);
 			batadv_set_bit(word, bit_pos);
-- 
cgit v1.2.3


From e0d9677ea387e386e9ce983f287d99b278ea4930 Mon Sep 17 00:00:00 2001
From: Martin Hundebøll <martin@hundeboll.net>
Date: Wed, 17 Sep 2014 08:56:19 +0200
Subject: batman-adv: clear control block of received socket buffers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since other network components (and some drivers) uses the control block
provided in skb's, the network coding feature might wrongly assume that
an SKB has been decoded, and thus not try to code it with another packet
again. This happens for instance when batman-adv is running on a bridge device.

Fix this by clearing the control block for every received SKB.

Introduced by 3c12de9a5c756b23fe7c9ab332474ece1568914c
("batman-adv: network coding - code and transmit packets if possible")
Signed-off-by: Martin Hundebøll <martin@hundeboll.net>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>

Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
---
 net/batman-adv/main.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index d4079cd510f9..3bcd847a8f9f 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -402,6 +402,9 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
 		goto err_free;
 	}
 
+	/* reset control block to avoid left overs from previous users */
+	memset(skb->cb, 0, sizeof(struct batadv_skb_cb));
+
 	/* all receive handlers return whether they received or reused
 	 * the supplied skb. if not, we have to free the skb.
 	 */
-- 
cgit v1.2.3


From 3f68785e614bf327d63395e03a3ebfd1d847331a Mon Sep 17 00:00:00 2001
From: Antonio Quartulli <antonio@meshcoding.com>
Date: Sun, 2 Nov 2014 11:29:56 +0100
Subject: batman-adv: fix misspelled words

Reported-by: checkpatch
Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
---
 net/batman-adv/bridge_loop_avoidance.c | 4 ++--
 net/batman-adv/main.c                  | 2 +-
 net/batman-adv/packet.h                | 4 ++--
 net/batman-adv/routing.c               | 2 +-
 net/batman-adv/translation-table.c     | 2 +-
 net/batman-adv/types.h                 | 2 +-
 6 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 4fc6cab7ed46..ac4b96eccade 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -244,7 +244,7 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw)
 		spin_unlock_bh(list_lock);
 	}
 
-	/* all claims gone, intialize CRC */
+	/* all claims gone, initialize CRC */
 	backbone_gw->crc = BATADV_BLA_CRC_INIT;
 }
 
@@ -1328,7 +1328,7 @@ int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
 		goto out;
 	}
 	/* not found, add a new entry (overwrite the oldest entry)
-	 * and allow it, its the first occurence.
+	 * and allow it, its the first occurrence.
 	 */
 	curr = (bat_priv->bla.bcast_duplist_curr + BATADV_DUPLIST_SIZE - 1);
 	curr %= BATADV_DUPLIST_SIZE;
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 3bcd847a8f9f..12fc77bef23f 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -798,7 +798,7 @@ void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
 }
 
 /**
- * batadv_tvlv_realloc_packet_buff - reallocate packet buffer to accomodate
+ * batadv_tvlv_realloc_packet_buff - reallocate packet buffer to accommodate
  *  requested packet size
  * @packet_buff: packet buffer
  * @packet_buff_len: packet buffer size
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index facd1feddd4e..b81fbbf21a63 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -377,7 +377,7 @@ struct batadv_frag_packet {
 	uint8_t reserved:4;
 	uint8_t no:4;
 #else
-#error "unknown bitfield endianess"
+#error "unknown bitfield endianness"
 #endif
 	uint8_t dest[ETH_ALEN];
 	uint8_t orig[ETH_ALEN];
@@ -453,7 +453,7 @@ struct batadv_coded_packet {
  * @src: address of the source
  * @dst: address of the destination
  * @tvlv_len: length of tvlv data following the unicast tvlv header
- * @align: 2 bytes to align the header to a 4 byte boundry
+ * @align: 2 bytes to align the header to a 4 byte boundary
  */
 struct batadv_unicast_tvlv_packet {
 	uint8_t  packet_type;
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 139d2f65728e..da83982bf974 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -456,7 +456,7 @@ batadv_find_router(struct batadv_priv *bat_priv,
 	 * the last chosen bonding candidate (next_candidate). If no such
 	 * router is found, use the first candidate found (the previously
 	 * chosen bonding candidate might have been the last one in the list).
-	 * If this can't be found either, return the previously choosen
+	 * If this can't be found either, return the previously chosen
 	 * router - obviously there are no other candidates.
 	 */
 	rcu_read_lock();
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 84e6f01b734f..07b263a437d1 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -2852,7 +2852,7 @@ static void batadv_tt_update_changes(struct batadv_priv *bat_priv,
 /**
  * batadv_is_my_client - check if a client is served by the local node
  * @bat_priv: the bat priv with all the soft interface information
- * @addr: the mac adress of the client to check
+ * @addr: the mac address of the client to check
  * @vid: VLAN identifier
  *
  * Returns true if the client is served by this node, false otherwise.
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 462a70c0db8f..9398c3fb4174 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -968,7 +968,7 @@ struct batadv_tt_orig_list_entry {
 };
 
 /**
- * struct batadv_tt_change_node - structure for tt changes occured
+ * struct batadv_tt_change_node - structure for tt changes occurred
  * @list: list node for batadv_priv_tt::changes_list
  * @change: holds the actual translation table diff data
  */
-- 
cgit v1.2.3


From dcba6c9b45a8c91ebb1b16a3595011404a19cf98 Mon Sep 17 00:00:00 2001
From: Simon Wunderlich <sw@simonwunderlich.de>
Date: Tue, 30 Dec 2014 02:20:14 +0100
Subject: batman-adv: Start new development cycle

Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
---
 net/batman-adv/main.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 18286f8b4d26..4d2318829a34 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -24,7 +24,7 @@
 #define BATADV_DRIVER_DEVICE "batman-adv"
 
 #ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2014.4.0"
+#define BATADV_SOURCE_VERSION "2015.0"
 #endif
 
 /* B.A.T.M.A.N. parameters */
-- 
cgit v1.2.3


From 9535395640edb23ba5cec2abb026c4ee51899722 Mon Sep 17 00:00:00 2001
From: Markus Pargmann <mpa@pengutronix.de>
Date: Sat, 29 Nov 2014 19:07:46 +0100
Subject: batman-adv: Kconfig, Add missing DEBUG_FS dependency

BATMAN_ADV_DEBUG is using debugfs files for the debugging log. So it
depends on DEBUG_FS which is missing as dependency in the Kconfig file.

Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
---
 net/batman-adv/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index 11660a3aab5a..c6fc8f756c9a 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -62,6 +62,7 @@ config BATMAN_ADV_MCAST
 config BATMAN_ADV_DEBUG
 	bool "B.A.T.M.A.N. debugging"
 	depends on BATMAN_ADV
+	depends on DEBUG_FS
 	help
 	  This is an option for use by developers; most people should
 	  say N here. This enables compilation of support for
-- 
cgit v1.2.3


From ea81ac2e7050798109356150ea16e71622a5c329 Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Sun, 4 Jan 2015 17:10:53 +0100
Subject: ieee802154: create 6lowpan sub-directory

This patch creates an 6lowpan sub-directory inside ieee802154.
Additional we move all ieee802154 6lowpan relevant files into
this sub-directory instead of placing the 6lowpan related files
inside ieee802154.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/ieee802154/6lowpan/6lowpan_rtnl.c | 729 ++++++++++++++++++++++++++++++++++
 net/ieee802154/6lowpan/Kconfig        |   5 +
 net/ieee802154/6lowpan/Makefile       |   3 +
 net/ieee802154/6lowpan/reassembly.c   | 585 +++++++++++++++++++++++++++
 net/ieee802154/6lowpan/reassembly.h   |  41 ++
 net/ieee802154/6lowpan_rtnl.c         | 729 ----------------------------------
 net/ieee802154/Kconfig                |   6 +-
 net/ieee802154/Makefile               |   3 +-
 net/ieee802154/reassembly.c           | 585 ---------------------------
 net/ieee802154/reassembly.h           |  41 --
 10 files changed, 1365 insertions(+), 1362 deletions(-)
 create mode 100644 net/ieee802154/6lowpan/6lowpan_rtnl.c
 create mode 100644 net/ieee802154/6lowpan/Kconfig
 create mode 100644 net/ieee802154/6lowpan/Makefile
 create mode 100644 net/ieee802154/6lowpan/reassembly.c
 create mode 100644 net/ieee802154/6lowpan/reassembly.h
 delete mode 100644 net/ieee802154/6lowpan_rtnl.c
 delete mode 100644 net/ieee802154/reassembly.c
 delete mode 100644 net/ieee802154/reassembly.h

(limited to 'net')

diff --git a/net/ieee802154/6lowpan/6lowpan_rtnl.c b/net/ieee802154/6lowpan/6lowpan_rtnl.c
new file mode 100644
index 000000000000..27eaa65e88e1
--- /dev/null
+++ b/net/ieee802154/6lowpan/6lowpan_rtnl.c
@@ -0,0 +1,729 @@
+/* Copyright 2011, Siemens AG
+ * written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
+ */
+
+/* Based on patches from Jon Smirl <jonsmirl@gmail.com>
+ * Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/* Jon's code is based on 6lowpan implementation for Contiki which is:
+ * Copyright (c) 2008, Swedish Institute of Computer Science.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the Institute nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <linux/bitops.h>
+#include <linux/if_arp.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netdevice.h>
+#include <linux/ieee802154.h>
+#include <net/af_ieee802154.h>
+#include <net/ieee802154_netdev.h>
+#include <net/6lowpan.h>
+#include <net/ipv6.h>
+
+#include "reassembly.h"
+
+static LIST_HEAD(lowpan_devices);
+static int lowpan_open_count;
+
+/* private device info */
+struct lowpan_dev_info {
+	struct net_device	*real_dev; /* real WPAN device ptr */
+	struct mutex		dev_list_mtx; /* mutex for list ops */
+	u16			fragment_tag;
+};
+
+struct lowpan_dev_record {
+	struct net_device *ldev;
+	struct list_head list;
+};
+
+/* don't save pan id, it's intra pan */
+struct lowpan_addr {
+	u8 mode;
+	union {
+		/* IPv6 needs big endian here */
+		__be64 extended_addr;
+		__be16 short_addr;
+	} u;
+};
+
+struct lowpan_addr_info {
+	struct lowpan_addr daddr;
+	struct lowpan_addr saddr;
+};
+
+static inline struct
+lowpan_dev_info *lowpan_dev_info(const struct net_device *dev)
+{
+	return netdev_priv(dev);
+}
+
+static inline struct
+lowpan_addr_info *lowpan_skb_priv(const struct sk_buff *skb)
+{
+	WARN_ON_ONCE(skb_headroom(skb) < sizeof(struct lowpan_addr_info));
+	return (struct lowpan_addr_info *)(skb->data -
+			sizeof(struct lowpan_addr_info));
+}
+
+static int lowpan_header_create(struct sk_buff *skb, struct net_device *dev,
+				unsigned short type, const void *_daddr,
+				const void *_saddr, unsigned int len)
+{
+	const u8 *saddr = _saddr;
+	const u8 *daddr = _daddr;
+	struct lowpan_addr_info *info;
+
+	/* TODO:
+	 * if this package isn't ipv6 one, where should it be routed?
+	 */
+	if (type != ETH_P_IPV6)
+		return 0;
+
+	if (!saddr)
+		saddr = dev->dev_addr;
+
+	raw_dump_inline(__func__, "saddr", (unsigned char *)saddr, 8);
+	raw_dump_inline(__func__, "daddr", (unsigned char *)daddr, 8);
+
+	info = lowpan_skb_priv(skb);
+
+	/* TODO: Currently we only support extended_addr */
+	info->daddr.mode = IEEE802154_ADDR_LONG;
+	memcpy(&info->daddr.u.extended_addr, daddr,
+	       sizeof(info->daddr.u.extended_addr));
+	info->saddr.mode = IEEE802154_ADDR_LONG;
+	memcpy(&info->saddr.u.extended_addr, saddr,
+	       sizeof(info->daddr.u.extended_addr));
+
+	return 0;
+}
+
+static int lowpan_give_skb_to_devices(struct sk_buff *skb,
+				      struct net_device *dev)
+{
+	struct lowpan_dev_record *entry;
+	struct sk_buff *skb_cp;
+	int stat = NET_RX_SUCCESS;
+
+	skb->protocol = htons(ETH_P_IPV6);
+	skb->pkt_type = PACKET_HOST;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(entry, &lowpan_devices, list)
+		if (lowpan_dev_info(entry->ldev)->real_dev == skb->dev) {
+			skb_cp = skb_copy(skb, GFP_ATOMIC);
+			if (!skb_cp) {
+				kfree_skb(skb);
+				rcu_read_unlock();
+				return NET_RX_DROP;
+			}
+
+			skb_cp->dev = entry->ldev;
+			stat = netif_rx(skb_cp);
+			if (stat == NET_RX_DROP)
+				break;
+		}
+	rcu_read_unlock();
+
+	consume_skb(skb);
+
+	return stat;
+}
+
+static int
+iphc_decompress(struct sk_buff *skb, const struct ieee802154_hdr *hdr)
+{
+	u8 iphc0, iphc1;
+	struct ieee802154_addr_sa sa, da;
+	void *sap, *dap;
+
+	raw_dump_table(__func__, "raw skb data dump", skb->data, skb->len);
+	/* at least two bytes will be used for the encoding */
+	if (skb->len < 2)
+		return -EINVAL;
+
+	if (lowpan_fetch_skb_u8(skb, &iphc0))
+		return -EINVAL;
+
+	if (lowpan_fetch_skb_u8(skb, &iphc1))
+		return -EINVAL;
+
+	ieee802154_addr_to_sa(&sa, &hdr->source);
+	ieee802154_addr_to_sa(&da, &hdr->dest);
+
+	if (sa.addr_type == IEEE802154_ADDR_SHORT)
+		sap = &sa.short_addr;
+	else
+		sap = &sa.hwaddr;
+
+	if (da.addr_type == IEEE802154_ADDR_SHORT)
+		dap = &da.short_addr;
+	else
+		dap = &da.hwaddr;
+
+	return lowpan_header_decompress(skb, skb->dev, sap, sa.addr_type,
+					IEEE802154_ADDR_LEN, dap, da.addr_type,
+					IEEE802154_ADDR_LEN, iphc0, iphc1);
+}
+
+static struct sk_buff*
+lowpan_alloc_frag(struct sk_buff *skb, int size,
+		  const struct ieee802154_hdr *master_hdr)
+{
+	struct net_device *real_dev = lowpan_dev_info(skb->dev)->real_dev;
+	struct sk_buff *frag;
+	int rc;
+
+	frag = alloc_skb(real_dev->hard_header_len +
+			 real_dev->needed_tailroom + size,
+			 GFP_ATOMIC);
+
+	if (likely(frag)) {
+		frag->dev = real_dev;
+		frag->priority = skb->priority;
+		skb_reserve(frag, real_dev->hard_header_len);
+		skb_reset_network_header(frag);
+		*mac_cb(frag) = *mac_cb(skb);
+
+		rc = dev_hard_header(frag, real_dev, 0, &master_hdr->dest,
+				     &master_hdr->source, size);
+		if (rc < 0) {
+			kfree_skb(frag);
+			return ERR_PTR(rc);
+		}
+	} else {
+		frag = ERR_PTR(-ENOMEM);
+	}
+
+	return frag;
+}
+
+static int
+lowpan_xmit_fragment(struct sk_buff *skb, const struct ieee802154_hdr *wpan_hdr,
+		     u8 *frag_hdr, int frag_hdrlen,
+		     int offset, int len)
+{
+	struct sk_buff *frag;
+
+	raw_dump_inline(__func__, " fragment header", frag_hdr, frag_hdrlen);
+
+	frag = lowpan_alloc_frag(skb, frag_hdrlen + len, wpan_hdr);
+	if (IS_ERR(frag))
+		return -PTR_ERR(frag);
+
+	memcpy(skb_put(frag, frag_hdrlen), frag_hdr, frag_hdrlen);
+	memcpy(skb_put(frag, len), skb_network_header(skb) + offset, len);
+
+	raw_dump_table(__func__, " fragment dump", frag->data, frag->len);
+
+	return dev_queue_xmit(frag);
+}
+
+static int
+lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *dev,
+		       const struct ieee802154_hdr *wpan_hdr)
+{
+	u16 dgram_size, dgram_offset;
+	__be16 frag_tag;
+	u8 frag_hdr[5];
+	int frag_cap, frag_len, payload_cap, rc;
+	int skb_unprocessed, skb_offset;
+
+	dgram_size = lowpan_uncompress_size(skb, &dgram_offset) -
+		     skb->mac_len;
+	frag_tag = htons(lowpan_dev_info(dev)->fragment_tag);
+	lowpan_dev_info(dev)->fragment_tag++;
+
+	frag_hdr[0] = LOWPAN_DISPATCH_FRAG1 | ((dgram_size >> 8) & 0x07);
+	frag_hdr[1] = dgram_size & 0xff;
+	memcpy(frag_hdr + 2, &frag_tag, sizeof(frag_tag));
+
+	payload_cap = ieee802154_max_payload(wpan_hdr);
+
+	frag_len = round_down(payload_cap - LOWPAN_FRAG1_HEAD_SIZE -
+			      skb_network_header_len(skb), 8);
+
+	skb_offset = skb_network_header_len(skb);
+	skb_unprocessed = skb->len - skb->mac_len - skb_offset;
+
+	rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr,
+				  LOWPAN_FRAG1_HEAD_SIZE, 0,
+				  frag_len + skb_network_header_len(skb));
+	if (rc) {
+		pr_debug("%s unable to send FRAG1 packet (tag: %d)",
+			 __func__, ntohs(frag_tag));
+		goto err;
+	}
+
+	frag_hdr[0] &= ~LOWPAN_DISPATCH_FRAG1;
+	frag_hdr[0] |= LOWPAN_DISPATCH_FRAGN;
+	frag_cap = round_down(payload_cap - LOWPAN_FRAGN_HEAD_SIZE, 8);
+
+	do {
+		dgram_offset += frag_len;
+		skb_offset += frag_len;
+		skb_unprocessed -= frag_len;
+		frag_len = min(frag_cap, skb_unprocessed);
+
+		frag_hdr[4] = dgram_offset >> 3;
+
+		rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr,
+					  LOWPAN_FRAGN_HEAD_SIZE, skb_offset,
+					  frag_len);
+		if (rc) {
+			pr_debug("%s unable to send a FRAGN packet. (tag: %d, offset: %d)\n",
+				 __func__, ntohs(frag_tag), skb_offset);
+			goto err;
+		}
+	} while (skb_unprocessed > frag_cap);
+
+	consume_skb(skb);
+	return NET_XMIT_SUCCESS;
+
+err:
+	kfree_skb(skb);
+	return rc;
+}
+
+static int lowpan_header(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ieee802154_addr sa, da;
+	struct ieee802154_mac_cb *cb = mac_cb_init(skb);
+	struct lowpan_addr_info info;
+	void *daddr, *saddr;
+
+	memcpy(&info, lowpan_skb_priv(skb), sizeof(info));
+
+	/* TODO: Currently we only support extended_addr */
+	daddr = &info.daddr.u.extended_addr;
+	saddr = &info.saddr.u.extended_addr;
+
+	lowpan_header_compress(skb, dev, ETH_P_IPV6, daddr, saddr, skb->len);
+
+	cb->type = IEEE802154_FC_TYPE_DATA;
+
+	/* prepare wpan address data */
+	sa.mode = IEEE802154_ADDR_LONG;
+	sa.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
+	sa.extended_addr = ieee802154_devaddr_from_raw(saddr);
+
+	/* intra-PAN communications */
+	da.pan_id = sa.pan_id;
+
+	/* if the destination address is the broadcast address, use the
+	 * corresponding short address
+	 */
+	if (lowpan_is_addr_broadcast((const u8 *)daddr)) {
+		da.mode = IEEE802154_ADDR_SHORT;
+		da.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST);
+		cb->ackreq = false;
+	} else {
+		da.mode = IEEE802154_ADDR_LONG;
+		da.extended_addr = ieee802154_devaddr_from_raw(daddr);
+		cb->ackreq = true;
+	}
+
+	return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev,
+			ETH_P_IPV6, (void *)&da, (void *)&sa, 0);
+}
+
+static netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ieee802154_hdr wpan_hdr;
+	int max_single, ret;
+
+	pr_debug("package xmit\n");
+
+	/* We must take a copy of the skb before we modify/replace the ipv6
+	 * header as the header could be used elsewhere
+	 */
+	skb = skb_unshare(skb, GFP_ATOMIC);
+	if (!skb)
+		return NET_XMIT_DROP;
+
+	ret = lowpan_header(skb, dev);
+	if (ret < 0) {
+		kfree_skb(skb);
+		return NET_XMIT_DROP;
+	}
+
+	if (ieee802154_hdr_peek(skb, &wpan_hdr) < 0) {
+		kfree_skb(skb);
+		return NET_XMIT_DROP;
+	}
+
+	max_single = ieee802154_max_payload(&wpan_hdr);
+
+	if (skb_tail_pointer(skb) - skb_network_header(skb) <= max_single) {
+		skb->dev = lowpan_dev_info(dev)->real_dev;
+		return dev_queue_xmit(skb);
+	} else {
+		netdev_tx_t rc;
+
+		pr_debug("frame is too big, fragmentation is needed\n");
+		rc = lowpan_xmit_fragmented(skb, dev, &wpan_hdr);
+
+		return rc < 0 ? NET_XMIT_DROP : rc;
+	}
+}
+
+static __le16 lowpan_get_pan_id(const struct net_device *dev)
+{
+	struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
+
+	return ieee802154_mlme_ops(real_dev)->get_pan_id(real_dev);
+}
+
+static __le16 lowpan_get_short_addr(const struct net_device *dev)
+{
+	struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
+
+	return ieee802154_mlme_ops(real_dev)->get_short_addr(real_dev);
+}
+
+static u8 lowpan_get_dsn(const struct net_device *dev)
+{
+	struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
+
+	return ieee802154_mlme_ops(real_dev)->get_dsn(real_dev);
+}
+
+static struct header_ops lowpan_header_ops = {
+	.create	= lowpan_header_create,
+};
+
+static struct lock_class_key lowpan_tx_busylock;
+static struct lock_class_key lowpan_netdev_xmit_lock_key;
+
+static void lowpan_set_lockdep_class_one(struct net_device *dev,
+					 struct netdev_queue *txq,
+					 void *_unused)
+{
+	lockdep_set_class(&txq->_xmit_lock,
+			  &lowpan_netdev_xmit_lock_key);
+}
+
+static int lowpan_dev_init(struct net_device *dev)
+{
+	netdev_for_each_tx_queue(dev, lowpan_set_lockdep_class_one, NULL);
+	dev->qdisc_tx_busylock = &lowpan_tx_busylock;
+	return 0;
+}
+
+static const struct net_device_ops lowpan_netdev_ops = {
+	.ndo_init		= lowpan_dev_init,
+	.ndo_start_xmit		= lowpan_xmit,
+};
+
+static struct ieee802154_mlme_ops lowpan_mlme = {
+	.get_pan_id = lowpan_get_pan_id,
+	.get_short_addr = lowpan_get_short_addr,
+	.get_dsn = lowpan_get_dsn,
+};
+
+static void lowpan_setup(struct net_device *dev)
+{
+	dev->addr_len		= IEEE802154_ADDR_LEN;
+	memset(dev->broadcast, 0xff, IEEE802154_ADDR_LEN);
+	dev->type		= ARPHRD_IEEE802154;
+	/* Frame Control + Sequence Number + Address fields + Security Header */
+	dev->hard_header_len	= 2 + 1 + 20 + 14;
+	dev->needed_tailroom	= 2; /* FCS */
+	dev->mtu		= IPV6_MIN_MTU;
+	dev->tx_queue_len	= 0;
+	dev->flags		= IFF_BROADCAST | IFF_MULTICAST;
+	dev->watchdog_timeo	= 0;
+
+	dev->netdev_ops		= &lowpan_netdev_ops;
+	dev->header_ops		= &lowpan_header_ops;
+	dev->ml_priv		= &lowpan_mlme;
+	dev->destructor		= free_netdev;
+}
+
+static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+	if (tb[IFLA_ADDRESS]) {
+		if (nla_len(tb[IFLA_ADDRESS]) != IEEE802154_ADDR_LEN)
+			return -EINVAL;
+	}
+	return 0;
+}
+
+static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev,
+		      struct packet_type *pt, struct net_device *orig_dev)
+{
+	struct ieee802154_hdr hdr;
+	int ret;
+
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (!skb)
+		goto drop;
+
+	if (!netif_running(dev))
+		goto drop_skb;
+
+	if (skb->pkt_type == PACKET_OTHERHOST)
+		goto drop_skb;
+
+	if (dev->type != ARPHRD_IEEE802154)
+		goto drop_skb;
+
+	if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0)
+		goto drop_skb;
+
+	/* check that it's our buffer */
+	if (skb->data[0] == LOWPAN_DISPATCH_IPV6) {
+		/* Pull off the 1-byte of 6lowpan header. */
+		skb_pull(skb, 1);
+		return lowpan_give_skb_to_devices(skb, NULL);
+	} else {
+		switch (skb->data[0] & 0xe0) {
+		case LOWPAN_DISPATCH_IPHC:	/* ipv6 datagram */
+			ret = iphc_decompress(skb, &hdr);
+			if (ret < 0)
+				goto drop_skb;
+
+			return lowpan_give_skb_to_devices(skb, NULL);
+		case LOWPAN_DISPATCH_FRAG1:	/* first fragment header */
+			ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAG1);
+			if (ret == 1) {
+				ret = iphc_decompress(skb, &hdr);
+				if (ret < 0)
+					goto drop_skb;
+
+				return lowpan_give_skb_to_devices(skb, NULL);
+			} else if (ret == -1) {
+				return NET_RX_DROP;
+			} else {
+				return NET_RX_SUCCESS;
+			}
+		case LOWPAN_DISPATCH_FRAGN:	/* next fragments headers */
+			ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAGN);
+			if (ret == 1) {
+				ret = iphc_decompress(skb, &hdr);
+				if (ret < 0)
+					goto drop_skb;
+
+				return lowpan_give_skb_to_devices(skb, NULL);
+			} else if (ret == -1) {
+				return NET_RX_DROP;
+			} else {
+				return NET_RX_SUCCESS;
+			}
+		default:
+			break;
+		}
+	}
+
+drop_skb:
+	kfree_skb(skb);
+drop:
+	return NET_RX_DROP;
+}
+
+static struct packet_type lowpan_packet_type = {
+	.type = htons(ETH_P_IEEE802154),
+	.func = lowpan_rcv,
+};
+
+static int lowpan_newlink(struct net *src_net, struct net_device *dev,
+			  struct nlattr *tb[], struct nlattr *data[])
+{
+	struct net_device *real_dev;
+	struct lowpan_dev_record *entry;
+	int ret;
+
+	ASSERT_RTNL();
+
+	pr_debug("adding new link\n");
+
+	if (!tb[IFLA_LINK])
+		return -EINVAL;
+	/* find and hold real wpan device */
+	real_dev = dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
+	if (!real_dev)
+		return -ENODEV;
+	if (real_dev->type != ARPHRD_IEEE802154) {
+		dev_put(real_dev);
+		return -EINVAL;
+	}
+
+	lowpan_dev_info(dev)->real_dev = real_dev;
+	mutex_init(&lowpan_dev_info(dev)->dev_list_mtx);
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry) {
+		dev_put(real_dev);
+		lowpan_dev_info(dev)->real_dev = NULL;
+		return -ENOMEM;
+	}
+
+	entry->ldev = dev;
+
+	/* Set the lowpan hardware address to the wpan hardware address. */
+	memcpy(dev->dev_addr, real_dev->dev_addr, IEEE802154_ADDR_LEN);
+
+	mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx);
+	INIT_LIST_HEAD(&entry->list);
+	list_add_tail(&entry->list, &lowpan_devices);
+	mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx);
+
+	ret = register_netdevice(dev);
+	if (ret >= 0) {
+		if (!lowpan_open_count)
+			dev_add_pack(&lowpan_packet_type);
+		lowpan_open_count++;
+	}
+
+	return ret;
+}
+
+static void lowpan_dellink(struct net_device *dev, struct list_head *head)
+{
+	struct lowpan_dev_info *lowpan_dev = lowpan_dev_info(dev);
+	struct net_device *real_dev = lowpan_dev->real_dev;
+	struct lowpan_dev_record *entry, *tmp;
+
+	ASSERT_RTNL();
+
+	lowpan_open_count--;
+	if (!lowpan_open_count)
+		dev_remove_pack(&lowpan_packet_type);
+
+	mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx);
+	list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) {
+		if (entry->ldev == dev) {
+			list_del(&entry->list);
+			kfree(entry);
+		}
+	}
+	mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx);
+
+	mutex_destroy(&lowpan_dev_info(dev)->dev_list_mtx);
+
+	unregister_netdevice_queue(dev, head);
+
+	dev_put(real_dev);
+}
+
+static struct rtnl_link_ops lowpan_link_ops __read_mostly = {
+	.kind		= "lowpan",
+	.priv_size	= sizeof(struct lowpan_dev_info),
+	.setup		= lowpan_setup,
+	.newlink	= lowpan_newlink,
+	.dellink	= lowpan_dellink,
+	.validate	= lowpan_validate,
+};
+
+static inline int __init lowpan_netlink_init(void)
+{
+	return rtnl_link_register(&lowpan_link_ops);
+}
+
+static inline void lowpan_netlink_fini(void)
+{
+	rtnl_link_unregister(&lowpan_link_ops);
+}
+
+static int lowpan_device_event(struct notifier_block *unused,
+			       unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	LIST_HEAD(del_list);
+	struct lowpan_dev_record *entry, *tmp;
+
+	if (dev->type != ARPHRD_IEEE802154)
+		goto out;
+
+	if (event == NETDEV_UNREGISTER) {
+		list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) {
+			if (lowpan_dev_info(entry->ldev)->real_dev == dev)
+				lowpan_dellink(entry->ldev, &del_list);
+		}
+
+		unregister_netdevice_many(&del_list);
+	}
+
+out:
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block lowpan_dev_notifier = {
+	.notifier_call = lowpan_device_event,
+};
+
+static int __init lowpan_init_module(void)
+{
+	int err = 0;
+
+	err = lowpan_net_frag_init();
+	if (err < 0)
+		goto out;
+
+	err = lowpan_netlink_init();
+	if (err < 0)
+		goto out_frag;
+
+	err = register_netdevice_notifier(&lowpan_dev_notifier);
+	if (err < 0)
+		goto out_pack;
+
+	return 0;
+
+out_pack:
+	lowpan_netlink_fini();
+out_frag:
+	lowpan_net_frag_exit();
+out:
+	return err;
+}
+
+static void __exit lowpan_cleanup_module(void)
+{
+	lowpan_netlink_fini();
+
+	lowpan_net_frag_exit();
+
+	unregister_netdevice_notifier(&lowpan_dev_notifier);
+}
+
+module_init(lowpan_init_module);
+module_exit(lowpan_cleanup_module);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("lowpan");
diff --git a/net/ieee802154/6lowpan/Kconfig b/net/ieee802154/6lowpan/Kconfig
new file mode 100644
index 000000000000..d24f985b0bfd
--- /dev/null
+++ b/net/ieee802154/6lowpan/Kconfig
@@ -0,0 +1,5 @@
+config IEEE802154_6LOWPAN
+	tristate "6lowpan support over IEEE 802.15.4"
+	depends on 6LOWPAN
+	---help---
+	  IPv6 compression over IEEE 802.15.4.
diff --git a/net/ieee802154/6lowpan/Makefile b/net/ieee802154/6lowpan/Makefile
new file mode 100644
index 000000000000..936959bd4f99
--- /dev/null
+++ b/net/ieee802154/6lowpan/Makefile
@@ -0,0 +1,3 @@
+obj-y += ieee802154_6lowpan.o
+
+ieee802154_6lowpan-y := 6lowpan_rtnl.o reassembly.o
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
new file mode 100644
index 000000000000..9d980ed3ffe2
--- /dev/null
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -0,0 +1,585 @@
+/*	6LoWPAN fragment reassembly
+ *
+ *
+ *	Authors:
+ *	Alexander Aring		<aar@pengutronix.de>
+ *
+ *	Based on: net/ipv6/reassembly.c
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "6LoWPAN: " fmt
+
+#include <linux/net.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/random.h>
+#include <linux/jhash.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+
+#include <net/ieee802154_netdev.h>
+#include <net/6lowpan.h>
+#include <net/ipv6.h>
+#include <net/inet_frag.h>
+
+#include "reassembly.h"
+
+static const char lowpan_frags_cache_name[] = "lowpan-frags";
+
+struct lowpan_frag_info {
+	u16 d_tag;
+	u16 d_size;
+	u8 d_offset;
+};
+
+static struct lowpan_frag_info *lowpan_cb(struct sk_buff *skb)
+{
+	return (struct lowpan_frag_info *)skb->cb;
+}
+
+static struct inet_frags lowpan_frags;
+
+static int lowpan_frag_reasm(struct lowpan_frag_queue *fq,
+			     struct sk_buff *prev, struct net_device *dev);
+
+static unsigned int lowpan_hash_frag(u16 tag, u16 d_size,
+				     const struct ieee802154_addr *saddr,
+				     const struct ieee802154_addr *daddr)
+{
+	net_get_random_once(&lowpan_frags.rnd, sizeof(lowpan_frags.rnd));
+	return jhash_3words(ieee802154_addr_hash(saddr),
+			    ieee802154_addr_hash(daddr),
+			    (__force u32)(tag + (d_size << 16)),
+			    lowpan_frags.rnd);
+}
+
+static unsigned int lowpan_hashfn(const struct inet_frag_queue *q)
+{
+	const struct lowpan_frag_queue *fq;
+
+	fq = container_of(q, struct lowpan_frag_queue, q);
+	return lowpan_hash_frag(fq->tag, fq->d_size, &fq->saddr, &fq->daddr);
+}
+
+static bool lowpan_frag_match(const struct inet_frag_queue *q, const void *a)
+{
+	const struct lowpan_frag_queue *fq;
+	const struct lowpan_create_arg *arg = a;
+
+	fq = container_of(q, struct lowpan_frag_queue, q);
+	return	fq->tag == arg->tag && fq->d_size == arg->d_size &&
+		ieee802154_addr_equal(&fq->saddr, arg->src) &&
+		ieee802154_addr_equal(&fq->daddr, arg->dst);
+}
+
+static void lowpan_frag_init(struct inet_frag_queue *q, const void *a)
+{
+	const struct lowpan_create_arg *arg = a;
+	struct lowpan_frag_queue *fq;
+
+	fq = container_of(q, struct lowpan_frag_queue, q);
+
+	fq->tag = arg->tag;
+	fq->d_size = arg->d_size;
+	fq->saddr = *arg->src;
+	fq->daddr = *arg->dst;
+}
+
+static void lowpan_frag_expire(unsigned long data)
+{
+	struct frag_queue *fq;
+	struct net *net;
+
+	fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+	net = container_of(fq->q.net, struct net, ieee802154_lowpan.frags);
+
+	spin_lock(&fq->q.lock);
+
+	if (fq->q.flags & INET_FRAG_COMPLETE)
+		goto out;
+
+	inet_frag_kill(&fq->q, &lowpan_frags);
+out:
+	spin_unlock(&fq->q.lock);
+	inet_frag_put(&fq->q, &lowpan_frags);
+}
+
+static inline struct lowpan_frag_queue *
+fq_find(struct net *net, const struct lowpan_frag_info *frag_info,
+	const struct ieee802154_addr *src,
+	const struct ieee802154_addr *dst)
+{
+	struct inet_frag_queue *q;
+	struct lowpan_create_arg arg;
+	unsigned int hash;
+	struct netns_ieee802154_lowpan *ieee802154_lowpan =
+		net_ieee802154_lowpan(net);
+
+	arg.tag = frag_info->d_tag;
+	arg.d_size = frag_info->d_size;
+	arg.src = src;
+	arg.dst = dst;
+
+	hash = lowpan_hash_frag(frag_info->d_tag, frag_info->d_size, src, dst);
+
+	q = inet_frag_find(&ieee802154_lowpan->frags,
+			   &lowpan_frags, &arg, hash);
+	if (IS_ERR_OR_NULL(q)) {
+		inet_frag_maybe_warn_overflow(q, pr_fmt());
+		return NULL;
+	}
+	return container_of(q, struct lowpan_frag_queue, q);
+}
+
+static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
+			     struct sk_buff *skb, const u8 frag_type)
+{
+	struct sk_buff *prev, *next;
+	struct net_device *dev;
+	int end, offset;
+
+	if (fq->q.flags & INET_FRAG_COMPLETE)
+		goto err;
+
+	offset = lowpan_cb(skb)->d_offset << 3;
+	end = lowpan_cb(skb)->d_size;
+
+	/* Is this the final fragment? */
+	if (offset + skb->len == end) {
+		/* If we already have some bits beyond end
+		 * or have different end, the segment is corrupted.
+		 */
+		if (end < fq->q.len ||
+		    ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len))
+			goto err;
+		fq->q.flags |= INET_FRAG_LAST_IN;
+		fq->q.len = end;
+	} else {
+		if (end > fq->q.len) {
+			/* Some bits beyond end -> corruption. */
+			if (fq->q.flags & INET_FRAG_LAST_IN)
+				goto err;
+			fq->q.len = end;
+		}
+	}
+
+	/* Find out which fragments are in front and at the back of us
+	 * in the chain of fragments so far.  We must know where to put
+	 * this fragment, right?
+	 */
+	prev = fq->q.fragments_tail;
+	if (!prev || lowpan_cb(prev)->d_offset < lowpan_cb(skb)->d_offset) {
+		next = NULL;
+		goto found;
+	}
+	prev = NULL;
+	for (next = fq->q.fragments; next != NULL; next = next->next) {
+		if (lowpan_cb(next)->d_offset >= lowpan_cb(skb)->d_offset)
+			break;	/* bingo! */
+		prev = next;
+	}
+
+found:
+	/* Insert this fragment in the chain of fragments. */
+	skb->next = next;
+	if (!next)
+		fq->q.fragments_tail = skb;
+	if (prev)
+		prev->next = skb;
+	else
+		fq->q.fragments = skb;
+
+	dev = skb->dev;
+	if (dev)
+		skb->dev = NULL;
+
+	fq->q.stamp = skb->tstamp;
+	if (frag_type == LOWPAN_DISPATCH_FRAG1) {
+		/* Calculate uncomp. 6lowpan header to estimate full size */
+		fq->q.meat += lowpan_uncompress_size(skb, NULL);
+		fq->q.flags |= INET_FRAG_FIRST_IN;
+	} else {
+		fq->q.meat += skb->len;
+	}
+	add_frag_mem_limit(&fq->q, skb->truesize);
+
+	if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
+	    fq->q.meat == fq->q.len) {
+		int res;
+		unsigned long orefdst = skb->_skb_refdst;
+
+		skb->_skb_refdst = 0UL;
+		res = lowpan_frag_reasm(fq, prev, dev);
+		skb->_skb_refdst = orefdst;
+		return res;
+	}
+
+	return -1;
+err:
+	kfree_skb(skb);
+	return -1;
+}
+
+/*	Check if this packet is complete.
+ *	Returns NULL on failure by any reason, and pointer
+ *	to current nexthdr field in reassembled frame.
+ *
+ *	It is called with locked fq, and caller must check that
+ *	queue is eligible for reassembly i.e. it is not COMPLETE,
+ *	the last and the first frames arrived and all the bits are here.
+ */
+static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
+			     struct net_device *dev)
+{
+	struct sk_buff *fp, *head = fq->q.fragments;
+	int sum_truesize;
+
+	inet_frag_kill(&fq->q, &lowpan_frags);
+
+	/* Make the one we just received the head. */
+	if (prev) {
+		head = prev->next;
+		fp = skb_clone(head, GFP_ATOMIC);
+
+		if (!fp)
+			goto out_oom;
+
+		fp->next = head->next;
+		if (!fp->next)
+			fq->q.fragments_tail = fp;
+		prev->next = fp;
+
+		skb_morph(head, fq->q.fragments);
+		head->next = fq->q.fragments->next;
+
+		consume_skb(fq->q.fragments);
+		fq->q.fragments = head;
+	}
+
+	/* Head of list must not be cloned. */
+	if (skb_unclone(head, GFP_ATOMIC))
+		goto out_oom;
+
+	/* If the first fragment is fragmented itself, we split
+	 * it to two chunks: the first with data and paged part
+	 * and the second, holding only fragments.
+	 */
+	if (skb_has_frag_list(head)) {
+		struct sk_buff *clone;
+		int i, plen = 0;
+
+		clone = alloc_skb(0, GFP_ATOMIC);
+		if (!clone)
+			goto out_oom;
+		clone->next = head->next;
+		head->next = clone;
+		skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
+		skb_frag_list_init(head);
+		for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
+			plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
+		clone->len = head->data_len - plen;
+		clone->data_len = clone->len;
+		head->data_len -= clone->len;
+		head->len -= clone->len;
+		add_frag_mem_limit(&fq->q, clone->truesize);
+	}
+
+	WARN_ON(head == NULL);
+
+	sum_truesize = head->truesize;
+	for (fp = head->next; fp;) {
+		bool headstolen;
+		int delta;
+		struct sk_buff *next = fp->next;
+
+		sum_truesize += fp->truesize;
+		if (skb_try_coalesce(head, fp, &headstolen, &delta)) {
+			kfree_skb_partial(fp, headstolen);
+		} else {
+			if (!skb_shinfo(head)->frag_list)
+				skb_shinfo(head)->frag_list = fp;
+			head->data_len += fp->len;
+			head->len += fp->len;
+			head->truesize += fp->truesize;
+		}
+		fp = next;
+	}
+	sub_frag_mem_limit(&fq->q, sum_truesize);
+
+	head->next = NULL;
+	head->dev = dev;
+	head->tstamp = fq->q.stamp;
+
+	fq->q.fragments = NULL;
+	fq->q.fragments_tail = NULL;
+
+	return 1;
+out_oom:
+	net_dbg_ratelimited("lowpan_frag_reasm: no memory for reassembly\n");
+	return -1;
+}
+
+static int lowpan_get_frag_info(struct sk_buff *skb, const u8 frag_type,
+				struct lowpan_frag_info *frag_info)
+{
+	bool fail;
+	u8 pattern = 0, low = 0;
+	__be16 d_tag = 0;
+
+	fail = lowpan_fetch_skb(skb, &pattern, 1);
+	fail |= lowpan_fetch_skb(skb, &low, 1);
+	frag_info->d_size = (pattern & 7) << 8 | low;
+	fail |= lowpan_fetch_skb(skb, &d_tag, 2);
+	frag_info->d_tag = ntohs(d_tag);
+
+	if (frag_type == LOWPAN_DISPATCH_FRAGN) {
+		fail |= lowpan_fetch_skb(skb, &frag_info->d_offset, 1);
+	} else {
+		skb_reset_network_header(skb);
+		frag_info->d_offset = 0;
+	}
+
+	if (unlikely(fail))
+		return -EIO;
+
+	return 0;
+}
+
+int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type)
+{
+	struct lowpan_frag_queue *fq;
+	struct net *net = dev_net(skb->dev);
+	struct lowpan_frag_info *frag_info = lowpan_cb(skb);
+	struct ieee802154_addr source, dest;
+	int err;
+
+	source = mac_cb(skb)->source;
+	dest = mac_cb(skb)->dest;
+
+	err = lowpan_get_frag_info(skb, frag_type, frag_info);
+	if (err < 0)
+		goto err;
+
+	if (frag_info->d_size > IPV6_MIN_MTU) {
+		net_warn_ratelimited("lowpan_frag_rcv: datagram size exceeds MTU\n");
+		goto err;
+	}
+
+	fq = fq_find(net, frag_info, &source, &dest);
+	if (fq != NULL) {
+		int ret;
+
+		spin_lock(&fq->q.lock);
+		ret = lowpan_frag_queue(fq, skb, frag_type);
+		spin_unlock(&fq->q.lock);
+
+		inet_frag_put(&fq->q, &lowpan_frags);
+		return ret;
+	}
+
+err:
+	kfree_skb(skb);
+	return -1;
+}
+EXPORT_SYMBOL(lowpan_frag_rcv);
+
+#ifdef CONFIG_SYSCTL
+static int zero;
+
+static struct ctl_table lowpan_frags_ns_ctl_table[] = {
+	{
+		.procname	= "6lowpanfrag_high_thresh",
+		.data		= &init_net.ieee802154_lowpan.frags.high_thresh,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &init_net.ieee802154_lowpan.frags.low_thresh
+	},
+	{
+		.procname	= "6lowpanfrag_low_thresh",
+		.data		= &init_net.ieee802154_lowpan.frags.low_thresh,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &init_net.ieee802154_lowpan.frags.high_thresh
+	},
+	{
+		.procname	= "6lowpanfrag_time",
+		.data		= &init_net.ieee802154_lowpan.frags.timeout,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	{ }
+};
+
+/* secret interval has been deprecated */
+static int lowpan_frags_secret_interval_unused;
+static struct ctl_table lowpan_frags_ctl_table[] = {
+	{
+		.procname	= "6lowpanfrag_secret_interval",
+		.data		= &lowpan_frags_secret_interval_unused,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	{ }
+};
+
+static int __net_init lowpan_frags_ns_sysctl_register(struct net *net)
+{
+	struct ctl_table *table;
+	struct ctl_table_header *hdr;
+	struct netns_ieee802154_lowpan *ieee802154_lowpan =
+		net_ieee802154_lowpan(net);
+
+	table = lowpan_frags_ns_ctl_table;
+	if (!net_eq(net, &init_net)) {
+		table = kmemdup(table, sizeof(lowpan_frags_ns_ctl_table),
+				GFP_KERNEL);
+		if (table == NULL)
+			goto err_alloc;
+
+		table[0].data = &ieee802154_lowpan->frags.high_thresh;
+		table[0].extra1 = &ieee802154_lowpan->frags.low_thresh;
+		table[0].extra2 = &init_net.ieee802154_lowpan.frags.high_thresh;
+		table[1].data = &ieee802154_lowpan->frags.low_thresh;
+		table[1].extra2 = &ieee802154_lowpan->frags.high_thresh;
+		table[2].data = &ieee802154_lowpan->frags.timeout;
+
+		/* Don't export sysctls to unprivileged users */
+		if (net->user_ns != &init_user_ns)
+			table[0].procname = NULL;
+	}
+
+	hdr = register_net_sysctl(net, "net/ieee802154/6lowpan", table);
+	if (hdr == NULL)
+		goto err_reg;
+
+	ieee802154_lowpan->sysctl.frags_hdr = hdr;
+	return 0;
+
+err_reg:
+	if (!net_eq(net, &init_net))
+		kfree(table);
+err_alloc:
+	return -ENOMEM;
+}
+
+static void __net_exit lowpan_frags_ns_sysctl_unregister(struct net *net)
+{
+	struct ctl_table *table;
+	struct netns_ieee802154_lowpan *ieee802154_lowpan =
+		net_ieee802154_lowpan(net);
+
+	table = ieee802154_lowpan->sysctl.frags_hdr->ctl_table_arg;
+	unregister_net_sysctl_table(ieee802154_lowpan->sysctl.frags_hdr);
+	if (!net_eq(net, &init_net))
+		kfree(table);
+}
+
+static struct ctl_table_header *lowpan_ctl_header;
+
+static int __init lowpan_frags_sysctl_register(void)
+{
+	lowpan_ctl_header = register_net_sysctl(&init_net,
+						"net/ieee802154/6lowpan",
+						lowpan_frags_ctl_table);
+	return lowpan_ctl_header == NULL ? -ENOMEM : 0;
+}
+
+static void lowpan_frags_sysctl_unregister(void)
+{
+	unregister_net_sysctl_table(lowpan_ctl_header);
+}
+#else
+static inline int lowpan_frags_ns_sysctl_register(struct net *net)
+{
+	return 0;
+}
+
+static inline void lowpan_frags_ns_sysctl_unregister(struct net *net)
+{
+}
+
+static inline int __init lowpan_frags_sysctl_register(void)
+{
+	return 0;
+}
+
+static inline void lowpan_frags_sysctl_unregister(void)
+{
+}
+#endif
+
+static int __net_init lowpan_frags_init_net(struct net *net)
+{
+	struct netns_ieee802154_lowpan *ieee802154_lowpan =
+		net_ieee802154_lowpan(net);
+
+	ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+	ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+	ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
+
+	inet_frags_init_net(&ieee802154_lowpan->frags);
+
+	return lowpan_frags_ns_sysctl_register(net);
+}
+
+static void __net_exit lowpan_frags_exit_net(struct net *net)
+{
+	struct netns_ieee802154_lowpan *ieee802154_lowpan =
+		net_ieee802154_lowpan(net);
+
+	lowpan_frags_ns_sysctl_unregister(net);
+	inet_frags_exit_net(&ieee802154_lowpan->frags, &lowpan_frags);
+}
+
+static struct pernet_operations lowpan_frags_ops = {
+	.init = lowpan_frags_init_net,
+	.exit = lowpan_frags_exit_net,
+};
+
+int __init lowpan_net_frag_init(void)
+{
+	int ret;
+
+	ret = lowpan_frags_sysctl_register();
+	if (ret)
+		return ret;
+
+	ret = register_pernet_subsys(&lowpan_frags_ops);
+	if (ret)
+		goto err_pernet;
+
+	lowpan_frags.hashfn = lowpan_hashfn;
+	lowpan_frags.constructor = lowpan_frag_init;
+	lowpan_frags.destructor = NULL;
+	lowpan_frags.skb_free = NULL;
+	lowpan_frags.qsize = sizeof(struct frag_queue);
+	lowpan_frags.match = lowpan_frag_match;
+	lowpan_frags.frag_expire = lowpan_frag_expire;
+	lowpan_frags.frags_cache_name = lowpan_frags_cache_name;
+	ret = inet_frags_init(&lowpan_frags);
+	if (ret)
+		goto err_pernet;
+
+	return ret;
+err_pernet:
+	lowpan_frags_sysctl_unregister();
+	return ret;
+}
+
+void lowpan_net_frag_exit(void)
+{
+	inet_frags_fini(&lowpan_frags);
+	lowpan_frags_sysctl_unregister();
+	unregister_pernet_subsys(&lowpan_frags_ops);
+}
diff --git a/net/ieee802154/6lowpan/reassembly.h b/net/ieee802154/6lowpan/reassembly.h
new file mode 100644
index 000000000000..836b16fa001f
--- /dev/null
+++ b/net/ieee802154/6lowpan/reassembly.h
@@ -0,0 +1,41 @@
+#ifndef __IEEE802154_6LOWPAN_REASSEMBLY_H__
+#define __IEEE802154_6LOWPAN_REASSEMBLY_H__
+
+#include <net/inet_frag.h>
+
+struct lowpan_create_arg {
+	u16 tag;
+	u16 d_size;
+	const struct ieee802154_addr *src;
+	const struct ieee802154_addr *dst;
+};
+
+/* Equivalent of ipv4 struct ip
+ */
+struct lowpan_frag_queue {
+	struct inet_frag_queue	q;
+
+	u16			tag;
+	u16			d_size;
+	struct ieee802154_addr	saddr;
+	struct ieee802154_addr	daddr;
+};
+
+static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a)
+{
+	switch (a->mode) {
+	case IEEE802154_ADDR_LONG:
+		return (((__force u64)a->extended_addr) >> 32) ^
+			(((__force u64)a->extended_addr) & 0xffffffff);
+	case IEEE802154_ADDR_SHORT:
+		return (__force u32)(a->short_addr);
+	default:
+		return 0;
+	}
+}
+
+int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type);
+void lowpan_net_frag_exit(void);
+int lowpan_net_frag_init(void);
+
+#endif /* __IEEE802154_6LOWPAN_REASSEMBLY_H__ */
diff --git a/net/ieee802154/6lowpan_rtnl.c b/net/ieee802154/6lowpan_rtnl.c
deleted file mode 100644
index 27eaa65e88e1..000000000000
--- a/net/ieee802154/6lowpan_rtnl.c
+++ /dev/null
@@ -1,729 +0,0 @@
-/* Copyright 2011, Siemens AG
- * written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
- */
-
-/* Based on patches from Jon Smirl <jonsmirl@gmail.com>
- * Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-/* Jon's code is based on 6lowpan implementation for Contiki which is:
- * Copyright (c) 2008, Swedish Institute of Computer Science.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the Institute nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <linux/bitops.h>
-#include <linux/if_arp.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/netdevice.h>
-#include <linux/ieee802154.h>
-#include <net/af_ieee802154.h>
-#include <net/ieee802154_netdev.h>
-#include <net/6lowpan.h>
-#include <net/ipv6.h>
-
-#include "reassembly.h"
-
-static LIST_HEAD(lowpan_devices);
-static int lowpan_open_count;
-
-/* private device info */
-struct lowpan_dev_info {
-	struct net_device	*real_dev; /* real WPAN device ptr */
-	struct mutex		dev_list_mtx; /* mutex for list ops */
-	u16			fragment_tag;
-};
-
-struct lowpan_dev_record {
-	struct net_device *ldev;
-	struct list_head list;
-};
-
-/* don't save pan id, it's intra pan */
-struct lowpan_addr {
-	u8 mode;
-	union {
-		/* IPv6 needs big endian here */
-		__be64 extended_addr;
-		__be16 short_addr;
-	} u;
-};
-
-struct lowpan_addr_info {
-	struct lowpan_addr daddr;
-	struct lowpan_addr saddr;
-};
-
-static inline struct
-lowpan_dev_info *lowpan_dev_info(const struct net_device *dev)
-{
-	return netdev_priv(dev);
-}
-
-static inline struct
-lowpan_addr_info *lowpan_skb_priv(const struct sk_buff *skb)
-{
-	WARN_ON_ONCE(skb_headroom(skb) < sizeof(struct lowpan_addr_info));
-	return (struct lowpan_addr_info *)(skb->data -
-			sizeof(struct lowpan_addr_info));
-}
-
-static int lowpan_header_create(struct sk_buff *skb, struct net_device *dev,
-				unsigned short type, const void *_daddr,
-				const void *_saddr, unsigned int len)
-{
-	const u8 *saddr = _saddr;
-	const u8 *daddr = _daddr;
-	struct lowpan_addr_info *info;
-
-	/* TODO:
-	 * if this package isn't ipv6 one, where should it be routed?
-	 */
-	if (type != ETH_P_IPV6)
-		return 0;
-
-	if (!saddr)
-		saddr = dev->dev_addr;
-
-	raw_dump_inline(__func__, "saddr", (unsigned char *)saddr, 8);
-	raw_dump_inline(__func__, "daddr", (unsigned char *)daddr, 8);
-
-	info = lowpan_skb_priv(skb);
-
-	/* TODO: Currently we only support extended_addr */
-	info->daddr.mode = IEEE802154_ADDR_LONG;
-	memcpy(&info->daddr.u.extended_addr, daddr,
-	       sizeof(info->daddr.u.extended_addr));
-	info->saddr.mode = IEEE802154_ADDR_LONG;
-	memcpy(&info->saddr.u.extended_addr, saddr,
-	       sizeof(info->daddr.u.extended_addr));
-
-	return 0;
-}
-
-static int lowpan_give_skb_to_devices(struct sk_buff *skb,
-				      struct net_device *dev)
-{
-	struct lowpan_dev_record *entry;
-	struct sk_buff *skb_cp;
-	int stat = NET_RX_SUCCESS;
-
-	skb->protocol = htons(ETH_P_IPV6);
-	skb->pkt_type = PACKET_HOST;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(entry, &lowpan_devices, list)
-		if (lowpan_dev_info(entry->ldev)->real_dev == skb->dev) {
-			skb_cp = skb_copy(skb, GFP_ATOMIC);
-			if (!skb_cp) {
-				kfree_skb(skb);
-				rcu_read_unlock();
-				return NET_RX_DROP;
-			}
-
-			skb_cp->dev = entry->ldev;
-			stat = netif_rx(skb_cp);
-			if (stat == NET_RX_DROP)
-				break;
-		}
-	rcu_read_unlock();
-
-	consume_skb(skb);
-
-	return stat;
-}
-
-static int
-iphc_decompress(struct sk_buff *skb, const struct ieee802154_hdr *hdr)
-{
-	u8 iphc0, iphc1;
-	struct ieee802154_addr_sa sa, da;
-	void *sap, *dap;
-
-	raw_dump_table(__func__, "raw skb data dump", skb->data, skb->len);
-	/* at least two bytes will be used for the encoding */
-	if (skb->len < 2)
-		return -EINVAL;
-
-	if (lowpan_fetch_skb_u8(skb, &iphc0))
-		return -EINVAL;
-
-	if (lowpan_fetch_skb_u8(skb, &iphc1))
-		return -EINVAL;
-
-	ieee802154_addr_to_sa(&sa, &hdr->source);
-	ieee802154_addr_to_sa(&da, &hdr->dest);
-
-	if (sa.addr_type == IEEE802154_ADDR_SHORT)
-		sap = &sa.short_addr;
-	else
-		sap = &sa.hwaddr;
-
-	if (da.addr_type == IEEE802154_ADDR_SHORT)
-		dap = &da.short_addr;
-	else
-		dap = &da.hwaddr;
-
-	return lowpan_header_decompress(skb, skb->dev, sap, sa.addr_type,
-					IEEE802154_ADDR_LEN, dap, da.addr_type,
-					IEEE802154_ADDR_LEN, iphc0, iphc1);
-}
-
-static struct sk_buff*
-lowpan_alloc_frag(struct sk_buff *skb, int size,
-		  const struct ieee802154_hdr *master_hdr)
-{
-	struct net_device *real_dev = lowpan_dev_info(skb->dev)->real_dev;
-	struct sk_buff *frag;
-	int rc;
-
-	frag = alloc_skb(real_dev->hard_header_len +
-			 real_dev->needed_tailroom + size,
-			 GFP_ATOMIC);
-
-	if (likely(frag)) {
-		frag->dev = real_dev;
-		frag->priority = skb->priority;
-		skb_reserve(frag, real_dev->hard_header_len);
-		skb_reset_network_header(frag);
-		*mac_cb(frag) = *mac_cb(skb);
-
-		rc = dev_hard_header(frag, real_dev, 0, &master_hdr->dest,
-				     &master_hdr->source, size);
-		if (rc < 0) {
-			kfree_skb(frag);
-			return ERR_PTR(rc);
-		}
-	} else {
-		frag = ERR_PTR(-ENOMEM);
-	}
-
-	return frag;
-}
-
-static int
-lowpan_xmit_fragment(struct sk_buff *skb, const struct ieee802154_hdr *wpan_hdr,
-		     u8 *frag_hdr, int frag_hdrlen,
-		     int offset, int len)
-{
-	struct sk_buff *frag;
-
-	raw_dump_inline(__func__, " fragment header", frag_hdr, frag_hdrlen);
-
-	frag = lowpan_alloc_frag(skb, frag_hdrlen + len, wpan_hdr);
-	if (IS_ERR(frag))
-		return -PTR_ERR(frag);
-
-	memcpy(skb_put(frag, frag_hdrlen), frag_hdr, frag_hdrlen);
-	memcpy(skb_put(frag, len), skb_network_header(skb) + offset, len);
-
-	raw_dump_table(__func__, " fragment dump", frag->data, frag->len);
-
-	return dev_queue_xmit(frag);
-}
-
-static int
-lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *dev,
-		       const struct ieee802154_hdr *wpan_hdr)
-{
-	u16 dgram_size, dgram_offset;
-	__be16 frag_tag;
-	u8 frag_hdr[5];
-	int frag_cap, frag_len, payload_cap, rc;
-	int skb_unprocessed, skb_offset;
-
-	dgram_size = lowpan_uncompress_size(skb, &dgram_offset) -
-		     skb->mac_len;
-	frag_tag = htons(lowpan_dev_info(dev)->fragment_tag);
-	lowpan_dev_info(dev)->fragment_tag++;
-
-	frag_hdr[0] = LOWPAN_DISPATCH_FRAG1 | ((dgram_size >> 8) & 0x07);
-	frag_hdr[1] = dgram_size & 0xff;
-	memcpy(frag_hdr + 2, &frag_tag, sizeof(frag_tag));
-
-	payload_cap = ieee802154_max_payload(wpan_hdr);
-
-	frag_len = round_down(payload_cap - LOWPAN_FRAG1_HEAD_SIZE -
-			      skb_network_header_len(skb), 8);
-
-	skb_offset = skb_network_header_len(skb);
-	skb_unprocessed = skb->len - skb->mac_len - skb_offset;
-
-	rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr,
-				  LOWPAN_FRAG1_HEAD_SIZE, 0,
-				  frag_len + skb_network_header_len(skb));
-	if (rc) {
-		pr_debug("%s unable to send FRAG1 packet (tag: %d)",
-			 __func__, ntohs(frag_tag));
-		goto err;
-	}
-
-	frag_hdr[0] &= ~LOWPAN_DISPATCH_FRAG1;
-	frag_hdr[0] |= LOWPAN_DISPATCH_FRAGN;
-	frag_cap = round_down(payload_cap - LOWPAN_FRAGN_HEAD_SIZE, 8);
-
-	do {
-		dgram_offset += frag_len;
-		skb_offset += frag_len;
-		skb_unprocessed -= frag_len;
-		frag_len = min(frag_cap, skb_unprocessed);
-
-		frag_hdr[4] = dgram_offset >> 3;
-
-		rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr,
-					  LOWPAN_FRAGN_HEAD_SIZE, skb_offset,
-					  frag_len);
-		if (rc) {
-			pr_debug("%s unable to send a FRAGN packet. (tag: %d, offset: %d)\n",
-				 __func__, ntohs(frag_tag), skb_offset);
-			goto err;
-		}
-	} while (skb_unprocessed > frag_cap);
-
-	consume_skb(skb);
-	return NET_XMIT_SUCCESS;
-
-err:
-	kfree_skb(skb);
-	return rc;
-}
-
-static int lowpan_header(struct sk_buff *skb, struct net_device *dev)
-{
-	struct ieee802154_addr sa, da;
-	struct ieee802154_mac_cb *cb = mac_cb_init(skb);
-	struct lowpan_addr_info info;
-	void *daddr, *saddr;
-
-	memcpy(&info, lowpan_skb_priv(skb), sizeof(info));
-
-	/* TODO: Currently we only support extended_addr */
-	daddr = &info.daddr.u.extended_addr;
-	saddr = &info.saddr.u.extended_addr;
-
-	lowpan_header_compress(skb, dev, ETH_P_IPV6, daddr, saddr, skb->len);
-
-	cb->type = IEEE802154_FC_TYPE_DATA;
-
-	/* prepare wpan address data */
-	sa.mode = IEEE802154_ADDR_LONG;
-	sa.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
-	sa.extended_addr = ieee802154_devaddr_from_raw(saddr);
-
-	/* intra-PAN communications */
-	da.pan_id = sa.pan_id;
-
-	/* if the destination address is the broadcast address, use the
-	 * corresponding short address
-	 */
-	if (lowpan_is_addr_broadcast((const u8 *)daddr)) {
-		da.mode = IEEE802154_ADDR_SHORT;
-		da.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST);
-		cb->ackreq = false;
-	} else {
-		da.mode = IEEE802154_ADDR_LONG;
-		da.extended_addr = ieee802154_devaddr_from_raw(daddr);
-		cb->ackreq = true;
-	}
-
-	return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev,
-			ETH_P_IPV6, (void *)&da, (void *)&sa, 0);
-}
-
-static netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev)
-{
-	struct ieee802154_hdr wpan_hdr;
-	int max_single, ret;
-
-	pr_debug("package xmit\n");
-
-	/* We must take a copy of the skb before we modify/replace the ipv6
-	 * header as the header could be used elsewhere
-	 */
-	skb = skb_unshare(skb, GFP_ATOMIC);
-	if (!skb)
-		return NET_XMIT_DROP;
-
-	ret = lowpan_header(skb, dev);
-	if (ret < 0) {
-		kfree_skb(skb);
-		return NET_XMIT_DROP;
-	}
-
-	if (ieee802154_hdr_peek(skb, &wpan_hdr) < 0) {
-		kfree_skb(skb);
-		return NET_XMIT_DROP;
-	}
-
-	max_single = ieee802154_max_payload(&wpan_hdr);
-
-	if (skb_tail_pointer(skb) - skb_network_header(skb) <= max_single) {
-		skb->dev = lowpan_dev_info(dev)->real_dev;
-		return dev_queue_xmit(skb);
-	} else {
-		netdev_tx_t rc;
-
-		pr_debug("frame is too big, fragmentation is needed\n");
-		rc = lowpan_xmit_fragmented(skb, dev, &wpan_hdr);
-
-		return rc < 0 ? NET_XMIT_DROP : rc;
-	}
-}
-
-static __le16 lowpan_get_pan_id(const struct net_device *dev)
-{
-	struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
-
-	return ieee802154_mlme_ops(real_dev)->get_pan_id(real_dev);
-}
-
-static __le16 lowpan_get_short_addr(const struct net_device *dev)
-{
-	struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
-
-	return ieee802154_mlme_ops(real_dev)->get_short_addr(real_dev);
-}
-
-static u8 lowpan_get_dsn(const struct net_device *dev)
-{
-	struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
-
-	return ieee802154_mlme_ops(real_dev)->get_dsn(real_dev);
-}
-
-static struct header_ops lowpan_header_ops = {
-	.create	= lowpan_header_create,
-};
-
-static struct lock_class_key lowpan_tx_busylock;
-static struct lock_class_key lowpan_netdev_xmit_lock_key;
-
-static void lowpan_set_lockdep_class_one(struct net_device *dev,
-					 struct netdev_queue *txq,
-					 void *_unused)
-{
-	lockdep_set_class(&txq->_xmit_lock,
-			  &lowpan_netdev_xmit_lock_key);
-}
-
-static int lowpan_dev_init(struct net_device *dev)
-{
-	netdev_for_each_tx_queue(dev, lowpan_set_lockdep_class_one, NULL);
-	dev->qdisc_tx_busylock = &lowpan_tx_busylock;
-	return 0;
-}
-
-static const struct net_device_ops lowpan_netdev_ops = {
-	.ndo_init		= lowpan_dev_init,
-	.ndo_start_xmit		= lowpan_xmit,
-};
-
-static struct ieee802154_mlme_ops lowpan_mlme = {
-	.get_pan_id = lowpan_get_pan_id,
-	.get_short_addr = lowpan_get_short_addr,
-	.get_dsn = lowpan_get_dsn,
-};
-
-static void lowpan_setup(struct net_device *dev)
-{
-	dev->addr_len		= IEEE802154_ADDR_LEN;
-	memset(dev->broadcast, 0xff, IEEE802154_ADDR_LEN);
-	dev->type		= ARPHRD_IEEE802154;
-	/* Frame Control + Sequence Number + Address fields + Security Header */
-	dev->hard_header_len	= 2 + 1 + 20 + 14;
-	dev->needed_tailroom	= 2; /* FCS */
-	dev->mtu		= IPV6_MIN_MTU;
-	dev->tx_queue_len	= 0;
-	dev->flags		= IFF_BROADCAST | IFF_MULTICAST;
-	dev->watchdog_timeo	= 0;
-
-	dev->netdev_ops		= &lowpan_netdev_ops;
-	dev->header_ops		= &lowpan_header_ops;
-	dev->ml_priv		= &lowpan_mlme;
-	dev->destructor		= free_netdev;
-}
-
-static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[])
-{
-	if (tb[IFLA_ADDRESS]) {
-		if (nla_len(tb[IFLA_ADDRESS]) != IEEE802154_ADDR_LEN)
-			return -EINVAL;
-	}
-	return 0;
-}
-
-static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev,
-		      struct packet_type *pt, struct net_device *orig_dev)
-{
-	struct ieee802154_hdr hdr;
-	int ret;
-
-	skb = skb_share_check(skb, GFP_ATOMIC);
-	if (!skb)
-		goto drop;
-
-	if (!netif_running(dev))
-		goto drop_skb;
-
-	if (skb->pkt_type == PACKET_OTHERHOST)
-		goto drop_skb;
-
-	if (dev->type != ARPHRD_IEEE802154)
-		goto drop_skb;
-
-	if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0)
-		goto drop_skb;
-
-	/* check that it's our buffer */
-	if (skb->data[0] == LOWPAN_DISPATCH_IPV6) {
-		/* Pull off the 1-byte of 6lowpan header. */
-		skb_pull(skb, 1);
-		return lowpan_give_skb_to_devices(skb, NULL);
-	} else {
-		switch (skb->data[0] & 0xe0) {
-		case LOWPAN_DISPATCH_IPHC:	/* ipv6 datagram */
-			ret = iphc_decompress(skb, &hdr);
-			if (ret < 0)
-				goto drop_skb;
-
-			return lowpan_give_skb_to_devices(skb, NULL);
-		case LOWPAN_DISPATCH_FRAG1:	/* first fragment header */
-			ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAG1);
-			if (ret == 1) {
-				ret = iphc_decompress(skb, &hdr);
-				if (ret < 0)
-					goto drop_skb;
-
-				return lowpan_give_skb_to_devices(skb, NULL);
-			} else if (ret == -1) {
-				return NET_RX_DROP;
-			} else {
-				return NET_RX_SUCCESS;
-			}
-		case LOWPAN_DISPATCH_FRAGN:	/* next fragments headers */
-			ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAGN);
-			if (ret == 1) {
-				ret = iphc_decompress(skb, &hdr);
-				if (ret < 0)
-					goto drop_skb;
-
-				return lowpan_give_skb_to_devices(skb, NULL);
-			} else if (ret == -1) {
-				return NET_RX_DROP;
-			} else {
-				return NET_RX_SUCCESS;
-			}
-		default:
-			break;
-		}
-	}
-
-drop_skb:
-	kfree_skb(skb);
-drop:
-	return NET_RX_DROP;
-}
-
-static struct packet_type lowpan_packet_type = {
-	.type = htons(ETH_P_IEEE802154),
-	.func = lowpan_rcv,
-};
-
-static int lowpan_newlink(struct net *src_net, struct net_device *dev,
-			  struct nlattr *tb[], struct nlattr *data[])
-{
-	struct net_device *real_dev;
-	struct lowpan_dev_record *entry;
-	int ret;
-
-	ASSERT_RTNL();
-
-	pr_debug("adding new link\n");
-
-	if (!tb[IFLA_LINK])
-		return -EINVAL;
-	/* find and hold real wpan device */
-	real_dev = dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
-	if (!real_dev)
-		return -ENODEV;
-	if (real_dev->type != ARPHRD_IEEE802154) {
-		dev_put(real_dev);
-		return -EINVAL;
-	}
-
-	lowpan_dev_info(dev)->real_dev = real_dev;
-	mutex_init(&lowpan_dev_info(dev)->dev_list_mtx);
-
-	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
-	if (!entry) {
-		dev_put(real_dev);
-		lowpan_dev_info(dev)->real_dev = NULL;
-		return -ENOMEM;
-	}
-
-	entry->ldev = dev;
-
-	/* Set the lowpan hardware address to the wpan hardware address. */
-	memcpy(dev->dev_addr, real_dev->dev_addr, IEEE802154_ADDR_LEN);
-
-	mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx);
-	INIT_LIST_HEAD(&entry->list);
-	list_add_tail(&entry->list, &lowpan_devices);
-	mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx);
-
-	ret = register_netdevice(dev);
-	if (ret >= 0) {
-		if (!lowpan_open_count)
-			dev_add_pack(&lowpan_packet_type);
-		lowpan_open_count++;
-	}
-
-	return ret;
-}
-
-static void lowpan_dellink(struct net_device *dev, struct list_head *head)
-{
-	struct lowpan_dev_info *lowpan_dev = lowpan_dev_info(dev);
-	struct net_device *real_dev = lowpan_dev->real_dev;
-	struct lowpan_dev_record *entry, *tmp;
-
-	ASSERT_RTNL();
-
-	lowpan_open_count--;
-	if (!lowpan_open_count)
-		dev_remove_pack(&lowpan_packet_type);
-
-	mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx);
-	list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) {
-		if (entry->ldev == dev) {
-			list_del(&entry->list);
-			kfree(entry);
-		}
-	}
-	mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx);
-
-	mutex_destroy(&lowpan_dev_info(dev)->dev_list_mtx);
-
-	unregister_netdevice_queue(dev, head);
-
-	dev_put(real_dev);
-}
-
-static struct rtnl_link_ops lowpan_link_ops __read_mostly = {
-	.kind		= "lowpan",
-	.priv_size	= sizeof(struct lowpan_dev_info),
-	.setup		= lowpan_setup,
-	.newlink	= lowpan_newlink,
-	.dellink	= lowpan_dellink,
-	.validate	= lowpan_validate,
-};
-
-static inline int __init lowpan_netlink_init(void)
-{
-	return rtnl_link_register(&lowpan_link_ops);
-}
-
-static inline void lowpan_netlink_fini(void)
-{
-	rtnl_link_unregister(&lowpan_link_ops);
-}
-
-static int lowpan_device_event(struct notifier_block *unused,
-			       unsigned long event, void *ptr)
-{
-	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-	LIST_HEAD(del_list);
-	struct lowpan_dev_record *entry, *tmp;
-
-	if (dev->type != ARPHRD_IEEE802154)
-		goto out;
-
-	if (event == NETDEV_UNREGISTER) {
-		list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) {
-			if (lowpan_dev_info(entry->ldev)->real_dev == dev)
-				lowpan_dellink(entry->ldev, &del_list);
-		}
-
-		unregister_netdevice_many(&del_list);
-	}
-
-out:
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block lowpan_dev_notifier = {
-	.notifier_call = lowpan_device_event,
-};
-
-static int __init lowpan_init_module(void)
-{
-	int err = 0;
-
-	err = lowpan_net_frag_init();
-	if (err < 0)
-		goto out;
-
-	err = lowpan_netlink_init();
-	if (err < 0)
-		goto out_frag;
-
-	err = register_netdevice_notifier(&lowpan_dev_notifier);
-	if (err < 0)
-		goto out_pack;
-
-	return 0;
-
-out_pack:
-	lowpan_netlink_fini();
-out_frag:
-	lowpan_net_frag_exit();
-out:
-	return err;
-}
-
-static void __exit lowpan_cleanup_module(void)
-{
-	lowpan_netlink_fini();
-
-	lowpan_net_frag_exit();
-
-	unregister_netdevice_notifier(&lowpan_dev_notifier);
-}
-
-module_init(lowpan_init_module);
-module_exit(lowpan_cleanup_module);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_RTNL_LINK("lowpan");
diff --git a/net/ieee802154/Kconfig b/net/ieee802154/Kconfig
index 9ea0af49a4ea..1370d5b0041b 100644
--- a/net/ieee802154/Kconfig
+++ b/net/ieee802154/Kconfig
@@ -20,10 +20,6 @@ config IEEE802154_SOCKET
 	  for 802.15.4 dataframes. Also RAW socket interface to build MAC
 	  header from userspace.
 
-config IEEE802154_6LOWPAN
-	tristate "6lowpan support over IEEE 802.15.4"
-	depends on 6LOWPAN
-	---help---
-	  IPv6 compression over IEEE 802.15.4.
+source "net/ieee802154/6lowpan/Kconfig"
 
 endif
diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile
index 88bde78754c5..36533978b5ef 100644
--- a/net/ieee802154/Makefile
+++ b/net/ieee802154/Makefile
@@ -1,8 +1,7 @@
 obj-$(CONFIG_IEEE802154) += ieee802154.o
 obj-$(CONFIG_IEEE802154_SOCKET) += ieee802154_socket.o
-obj-$(CONFIG_IEEE802154_6LOWPAN) += ieee802154_6lowpan.o
+obj-$(CONFIG_IEEE802154_6LOWPAN) += 6lowpan/
 
-ieee802154_6lowpan-y := 6lowpan_rtnl.o reassembly.o
 ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o core.o \
                 header_ops.o sysfs.o nl802154.o
 ieee802154_socket-y := socket.o
diff --git a/net/ieee802154/reassembly.c b/net/ieee802154/reassembly.c
deleted file mode 100644
index 9d980ed3ffe2..000000000000
--- a/net/ieee802154/reassembly.c
+++ /dev/null
@@ -1,585 +0,0 @@
-/*	6LoWPAN fragment reassembly
- *
- *
- *	Authors:
- *	Alexander Aring		<aar@pengutronix.de>
- *
- *	Based on: net/ipv6/reassembly.c
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- */
-
-#define pr_fmt(fmt) "6LoWPAN: " fmt
-
-#include <linux/net.h>
-#include <linux/list.h>
-#include <linux/netdevice.h>
-#include <linux/random.h>
-#include <linux/jhash.h>
-#include <linux/skbuff.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-
-#include <net/ieee802154_netdev.h>
-#include <net/6lowpan.h>
-#include <net/ipv6.h>
-#include <net/inet_frag.h>
-
-#include "reassembly.h"
-
-static const char lowpan_frags_cache_name[] = "lowpan-frags";
-
-struct lowpan_frag_info {
-	u16 d_tag;
-	u16 d_size;
-	u8 d_offset;
-};
-
-static struct lowpan_frag_info *lowpan_cb(struct sk_buff *skb)
-{
-	return (struct lowpan_frag_info *)skb->cb;
-}
-
-static struct inet_frags lowpan_frags;
-
-static int lowpan_frag_reasm(struct lowpan_frag_queue *fq,
-			     struct sk_buff *prev, struct net_device *dev);
-
-static unsigned int lowpan_hash_frag(u16 tag, u16 d_size,
-				     const struct ieee802154_addr *saddr,
-				     const struct ieee802154_addr *daddr)
-{
-	net_get_random_once(&lowpan_frags.rnd, sizeof(lowpan_frags.rnd));
-	return jhash_3words(ieee802154_addr_hash(saddr),
-			    ieee802154_addr_hash(daddr),
-			    (__force u32)(tag + (d_size << 16)),
-			    lowpan_frags.rnd);
-}
-
-static unsigned int lowpan_hashfn(const struct inet_frag_queue *q)
-{
-	const struct lowpan_frag_queue *fq;
-
-	fq = container_of(q, struct lowpan_frag_queue, q);
-	return lowpan_hash_frag(fq->tag, fq->d_size, &fq->saddr, &fq->daddr);
-}
-
-static bool lowpan_frag_match(const struct inet_frag_queue *q, const void *a)
-{
-	const struct lowpan_frag_queue *fq;
-	const struct lowpan_create_arg *arg = a;
-
-	fq = container_of(q, struct lowpan_frag_queue, q);
-	return	fq->tag == arg->tag && fq->d_size == arg->d_size &&
-		ieee802154_addr_equal(&fq->saddr, arg->src) &&
-		ieee802154_addr_equal(&fq->daddr, arg->dst);
-}
-
-static void lowpan_frag_init(struct inet_frag_queue *q, const void *a)
-{
-	const struct lowpan_create_arg *arg = a;
-	struct lowpan_frag_queue *fq;
-
-	fq = container_of(q, struct lowpan_frag_queue, q);
-
-	fq->tag = arg->tag;
-	fq->d_size = arg->d_size;
-	fq->saddr = *arg->src;
-	fq->daddr = *arg->dst;
-}
-
-static void lowpan_frag_expire(unsigned long data)
-{
-	struct frag_queue *fq;
-	struct net *net;
-
-	fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
-	net = container_of(fq->q.net, struct net, ieee802154_lowpan.frags);
-
-	spin_lock(&fq->q.lock);
-
-	if (fq->q.flags & INET_FRAG_COMPLETE)
-		goto out;
-
-	inet_frag_kill(&fq->q, &lowpan_frags);
-out:
-	spin_unlock(&fq->q.lock);
-	inet_frag_put(&fq->q, &lowpan_frags);
-}
-
-static inline struct lowpan_frag_queue *
-fq_find(struct net *net, const struct lowpan_frag_info *frag_info,
-	const struct ieee802154_addr *src,
-	const struct ieee802154_addr *dst)
-{
-	struct inet_frag_queue *q;
-	struct lowpan_create_arg arg;
-	unsigned int hash;
-	struct netns_ieee802154_lowpan *ieee802154_lowpan =
-		net_ieee802154_lowpan(net);
-
-	arg.tag = frag_info->d_tag;
-	arg.d_size = frag_info->d_size;
-	arg.src = src;
-	arg.dst = dst;
-
-	hash = lowpan_hash_frag(frag_info->d_tag, frag_info->d_size, src, dst);
-
-	q = inet_frag_find(&ieee802154_lowpan->frags,
-			   &lowpan_frags, &arg, hash);
-	if (IS_ERR_OR_NULL(q)) {
-		inet_frag_maybe_warn_overflow(q, pr_fmt());
-		return NULL;
-	}
-	return container_of(q, struct lowpan_frag_queue, q);
-}
-
-static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
-			     struct sk_buff *skb, const u8 frag_type)
-{
-	struct sk_buff *prev, *next;
-	struct net_device *dev;
-	int end, offset;
-
-	if (fq->q.flags & INET_FRAG_COMPLETE)
-		goto err;
-
-	offset = lowpan_cb(skb)->d_offset << 3;
-	end = lowpan_cb(skb)->d_size;
-
-	/* Is this the final fragment? */
-	if (offset + skb->len == end) {
-		/* If we already have some bits beyond end
-		 * or have different end, the segment is corrupted.
-		 */
-		if (end < fq->q.len ||
-		    ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len))
-			goto err;
-		fq->q.flags |= INET_FRAG_LAST_IN;
-		fq->q.len = end;
-	} else {
-		if (end > fq->q.len) {
-			/* Some bits beyond end -> corruption. */
-			if (fq->q.flags & INET_FRAG_LAST_IN)
-				goto err;
-			fq->q.len = end;
-		}
-	}
-
-	/* Find out which fragments are in front and at the back of us
-	 * in the chain of fragments so far.  We must know where to put
-	 * this fragment, right?
-	 */
-	prev = fq->q.fragments_tail;
-	if (!prev || lowpan_cb(prev)->d_offset < lowpan_cb(skb)->d_offset) {
-		next = NULL;
-		goto found;
-	}
-	prev = NULL;
-	for (next = fq->q.fragments; next != NULL; next = next->next) {
-		if (lowpan_cb(next)->d_offset >= lowpan_cb(skb)->d_offset)
-			break;	/* bingo! */
-		prev = next;
-	}
-
-found:
-	/* Insert this fragment in the chain of fragments. */
-	skb->next = next;
-	if (!next)
-		fq->q.fragments_tail = skb;
-	if (prev)
-		prev->next = skb;
-	else
-		fq->q.fragments = skb;
-
-	dev = skb->dev;
-	if (dev)
-		skb->dev = NULL;
-
-	fq->q.stamp = skb->tstamp;
-	if (frag_type == LOWPAN_DISPATCH_FRAG1) {
-		/* Calculate uncomp. 6lowpan header to estimate full size */
-		fq->q.meat += lowpan_uncompress_size(skb, NULL);
-		fq->q.flags |= INET_FRAG_FIRST_IN;
-	} else {
-		fq->q.meat += skb->len;
-	}
-	add_frag_mem_limit(&fq->q, skb->truesize);
-
-	if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
-	    fq->q.meat == fq->q.len) {
-		int res;
-		unsigned long orefdst = skb->_skb_refdst;
-
-		skb->_skb_refdst = 0UL;
-		res = lowpan_frag_reasm(fq, prev, dev);
-		skb->_skb_refdst = orefdst;
-		return res;
-	}
-
-	return -1;
-err:
-	kfree_skb(skb);
-	return -1;
-}
-
-/*	Check if this packet is complete.
- *	Returns NULL on failure by any reason, and pointer
- *	to current nexthdr field in reassembled frame.
- *
- *	It is called with locked fq, and caller must check that
- *	queue is eligible for reassembly i.e. it is not COMPLETE,
- *	the last and the first frames arrived and all the bits are here.
- */
-static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
-			     struct net_device *dev)
-{
-	struct sk_buff *fp, *head = fq->q.fragments;
-	int sum_truesize;
-
-	inet_frag_kill(&fq->q, &lowpan_frags);
-
-	/* Make the one we just received the head. */
-	if (prev) {
-		head = prev->next;
-		fp = skb_clone(head, GFP_ATOMIC);
-
-		if (!fp)
-			goto out_oom;
-
-		fp->next = head->next;
-		if (!fp->next)
-			fq->q.fragments_tail = fp;
-		prev->next = fp;
-
-		skb_morph(head, fq->q.fragments);
-		head->next = fq->q.fragments->next;
-
-		consume_skb(fq->q.fragments);
-		fq->q.fragments = head;
-	}
-
-	/* Head of list must not be cloned. */
-	if (skb_unclone(head, GFP_ATOMIC))
-		goto out_oom;
-
-	/* If the first fragment is fragmented itself, we split
-	 * it to two chunks: the first with data and paged part
-	 * and the second, holding only fragments.
-	 */
-	if (skb_has_frag_list(head)) {
-		struct sk_buff *clone;
-		int i, plen = 0;
-
-		clone = alloc_skb(0, GFP_ATOMIC);
-		if (!clone)
-			goto out_oom;
-		clone->next = head->next;
-		head->next = clone;
-		skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
-		skb_frag_list_init(head);
-		for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
-			plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
-		clone->len = head->data_len - plen;
-		clone->data_len = clone->len;
-		head->data_len -= clone->len;
-		head->len -= clone->len;
-		add_frag_mem_limit(&fq->q, clone->truesize);
-	}
-
-	WARN_ON(head == NULL);
-
-	sum_truesize = head->truesize;
-	for (fp = head->next; fp;) {
-		bool headstolen;
-		int delta;
-		struct sk_buff *next = fp->next;
-
-		sum_truesize += fp->truesize;
-		if (skb_try_coalesce(head, fp, &headstolen, &delta)) {
-			kfree_skb_partial(fp, headstolen);
-		} else {
-			if (!skb_shinfo(head)->frag_list)
-				skb_shinfo(head)->frag_list = fp;
-			head->data_len += fp->len;
-			head->len += fp->len;
-			head->truesize += fp->truesize;
-		}
-		fp = next;
-	}
-	sub_frag_mem_limit(&fq->q, sum_truesize);
-
-	head->next = NULL;
-	head->dev = dev;
-	head->tstamp = fq->q.stamp;
-
-	fq->q.fragments = NULL;
-	fq->q.fragments_tail = NULL;
-
-	return 1;
-out_oom:
-	net_dbg_ratelimited("lowpan_frag_reasm: no memory for reassembly\n");
-	return -1;
-}
-
-static int lowpan_get_frag_info(struct sk_buff *skb, const u8 frag_type,
-				struct lowpan_frag_info *frag_info)
-{
-	bool fail;
-	u8 pattern = 0, low = 0;
-	__be16 d_tag = 0;
-
-	fail = lowpan_fetch_skb(skb, &pattern, 1);
-	fail |= lowpan_fetch_skb(skb, &low, 1);
-	frag_info->d_size = (pattern & 7) << 8 | low;
-	fail |= lowpan_fetch_skb(skb, &d_tag, 2);
-	frag_info->d_tag = ntohs(d_tag);
-
-	if (frag_type == LOWPAN_DISPATCH_FRAGN) {
-		fail |= lowpan_fetch_skb(skb, &frag_info->d_offset, 1);
-	} else {
-		skb_reset_network_header(skb);
-		frag_info->d_offset = 0;
-	}
-
-	if (unlikely(fail))
-		return -EIO;
-
-	return 0;
-}
-
-int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type)
-{
-	struct lowpan_frag_queue *fq;
-	struct net *net = dev_net(skb->dev);
-	struct lowpan_frag_info *frag_info = lowpan_cb(skb);
-	struct ieee802154_addr source, dest;
-	int err;
-
-	source = mac_cb(skb)->source;
-	dest = mac_cb(skb)->dest;
-
-	err = lowpan_get_frag_info(skb, frag_type, frag_info);
-	if (err < 0)
-		goto err;
-
-	if (frag_info->d_size > IPV6_MIN_MTU) {
-		net_warn_ratelimited("lowpan_frag_rcv: datagram size exceeds MTU\n");
-		goto err;
-	}
-
-	fq = fq_find(net, frag_info, &source, &dest);
-	if (fq != NULL) {
-		int ret;
-
-		spin_lock(&fq->q.lock);
-		ret = lowpan_frag_queue(fq, skb, frag_type);
-		spin_unlock(&fq->q.lock);
-
-		inet_frag_put(&fq->q, &lowpan_frags);
-		return ret;
-	}
-
-err:
-	kfree_skb(skb);
-	return -1;
-}
-EXPORT_SYMBOL(lowpan_frag_rcv);
-
-#ifdef CONFIG_SYSCTL
-static int zero;
-
-static struct ctl_table lowpan_frags_ns_ctl_table[] = {
-	{
-		.procname	= "6lowpanfrag_high_thresh",
-		.data		= &init_net.ieee802154_lowpan.frags.high_thresh,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &init_net.ieee802154_lowpan.frags.low_thresh
-	},
-	{
-		.procname	= "6lowpanfrag_low_thresh",
-		.data		= &init_net.ieee802154_lowpan.frags.low_thresh,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
-		.extra2		= &init_net.ieee802154_lowpan.frags.high_thresh
-	},
-	{
-		.procname	= "6lowpanfrag_time",
-		.data		= &init_net.ieee802154_lowpan.frags.timeout,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{ }
-};
-
-/* secret interval has been deprecated */
-static int lowpan_frags_secret_interval_unused;
-static struct ctl_table lowpan_frags_ctl_table[] = {
-	{
-		.procname	= "6lowpanfrag_secret_interval",
-		.data		= &lowpan_frags_secret_interval_unused,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{ }
-};
-
-static int __net_init lowpan_frags_ns_sysctl_register(struct net *net)
-{
-	struct ctl_table *table;
-	struct ctl_table_header *hdr;
-	struct netns_ieee802154_lowpan *ieee802154_lowpan =
-		net_ieee802154_lowpan(net);
-
-	table = lowpan_frags_ns_ctl_table;
-	if (!net_eq(net, &init_net)) {
-		table = kmemdup(table, sizeof(lowpan_frags_ns_ctl_table),
-				GFP_KERNEL);
-		if (table == NULL)
-			goto err_alloc;
-
-		table[0].data = &ieee802154_lowpan->frags.high_thresh;
-		table[0].extra1 = &ieee802154_lowpan->frags.low_thresh;
-		table[0].extra2 = &init_net.ieee802154_lowpan.frags.high_thresh;
-		table[1].data = &ieee802154_lowpan->frags.low_thresh;
-		table[1].extra2 = &ieee802154_lowpan->frags.high_thresh;
-		table[2].data = &ieee802154_lowpan->frags.timeout;
-
-		/* Don't export sysctls to unprivileged users */
-		if (net->user_ns != &init_user_ns)
-			table[0].procname = NULL;
-	}
-
-	hdr = register_net_sysctl(net, "net/ieee802154/6lowpan", table);
-	if (hdr == NULL)
-		goto err_reg;
-
-	ieee802154_lowpan->sysctl.frags_hdr = hdr;
-	return 0;
-
-err_reg:
-	if (!net_eq(net, &init_net))
-		kfree(table);
-err_alloc:
-	return -ENOMEM;
-}
-
-static void __net_exit lowpan_frags_ns_sysctl_unregister(struct net *net)
-{
-	struct ctl_table *table;
-	struct netns_ieee802154_lowpan *ieee802154_lowpan =
-		net_ieee802154_lowpan(net);
-
-	table = ieee802154_lowpan->sysctl.frags_hdr->ctl_table_arg;
-	unregister_net_sysctl_table(ieee802154_lowpan->sysctl.frags_hdr);
-	if (!net_eq(net, &init_net))
-		kfree(table);
-}
-
-static struct ctl_table_header *lowpan_ctl_header;
-
-static int __init lowpan_frags_sysctl_register(void)
-{
-	lowpan_ctl_header = register_net_sysctl(&init_net,
-						"net/ieee802154/6lowpan",
-						lowpan_frags_ctl_table);
-	return lowpan_ctl_header == NULL ? -ENOMEM : 0;
-}
-
-static void lowpan_frags_sysctl_unregister(void)
-{
-	unregister_net_sysctl_table(lowpan_ctl_header);
-}
-#else
-static inline int lowpan_frags_ns_sysctl_register(struct net *net)
-{
-	return 0;
-}
-
-static inline void lowpan_frags_ns_sysctl_unregister(struct net *net)
-{
-}
-
-static inline int __init lowpan_frags_sysctl_register(void)
-{
-	return 0;
-}
-
-static inline void lowpan_frags_sysctl_unregister(void)
-{
-}
-#endif
-
-static int __net_init lowpan_frags_init_net(struct net *net)
-{
-	struct netns_ieee802154_lowpan *ieee802154_lowpan =
-		net_ieee802154_lowpan(net);
-
-	ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
-	ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
-	ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
-
-	inet_frags_init_net(&ieee802154_lowpan->frags);
-
-	return lowpan_frags_ns_sysctl_register(net);
-}
-
-static void __net_exit lowpan_frags_exit_net(struct net *net)
-{
-	struct netns_ieee802154_lowpan *ieee802154_lowpan =
-		net_ieee802154_lowpan(net);
-
-	lowpan_frags_ns_sysctl_unregister(net);
-	inet_frags_exit_net(&ieee802154_lowpan->frags, &lowpan_frags);
-}
-
-static struct pernet_operations lowpan_frags_ops = {
-	.init = lowpan_frags_init_net,
-	.exit = lowpan_frags_exit_net,
-};
-
-int __init lowpan_net_frag_init(void)
-{
-	int ret;
-
-	ret = lowpan_frags_sysctl_register();
-	if (ret)
-		return ret;
-
-	ret = register_pernet_subsys(&lowpan_frags_ops);
-	if (ret)
-		goto err_pernet;
-
-	lowpan_frags.hashfn = lowpan_hashfn;
-	lowpan_frags.constructor = lowpan_frag_init;
-	lowpan_frags.destructor = NULL;
-	lowpan_frags.skb_free = NULL;
-	lowpan_frags.qsize = sizeof(struct frag_queue);
-	lowpan_frags.match = lowpan_frag_match;
-	lowpan_frags.frag_expire = lowpan_frag_expire;
-	lowpan_frags.frags_cache_name = lowpan_frags_cache_name;
-	ret = inet_frags_init(&lowpan_frags);
-	if (ret)
-		goto err_pernet;
-
-	return ret;
-err_pernet:
-	lowpan_frags_sysctl_unregister();
-	return ret;
-}
-
-void lowpan_net_frag_exit(void)
-{
-	inet_frags_fini(&lowpan_frags);
-	lowpan_frags_sysctl_unregister();
-	unregister_pernet_subsys(&lowpan_frags_ops);
-}
diff --git a/net/ieee802154/reassembly.h b/net/ieee802154/reassembly.h
deleted file mode 100644
index 836b16fa001f..000000000000
--- a/net/ieee802154/reassembly.h
+++ /dev/null
@@ -1,41 +0,0 @@
-#ifndef __IEEE802154_6LOWPAN_REASSEMBLY_H__
-#define __IEEE802154_6LOWPAN_REASSEMBLY_H__
-
-#include <net/inet_frag.h>
-
-struct lowpan_create_arg {
-	u16 tag;
-	u16 d_size;
-	const struct ieee802154_addr *src;
-	const struct ieee802154_addr *dst;
-};
-
-/* Equivalent of ipv4 struct ip
- */
-struct lowpan_frag_queue {
-	struct inet_frag_queue	q;
-
-	u16			tag;
-	u16			d_size;
-	struct ieee802154_addr	saddr;
-	struct ieee802154_addr	daddr;
-};
-
-static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a)
-{
-	switch (a->mode) {
-	case IEEE802154_ADDR_LONG:
-		return (((__force u64)a->extended_addr) >> 32) ^
-			(((__force u64)a->extended_addr) & 0xffffffff);
-	case IEEE802154_ADDR_SHORT:
-		return (__force u32)(a->short_addr);
-	default:
-		return 0;
-	}
-}
-
-int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type);
-void lowpan_net_frag_exit(void);
-int lowpan_net_frag_init(void);
-
-#endif /* __IEEE802154_6LOWPAN_REASSEMBLY_H__ */
-- 
cgit v1.2.3


From 8691ee592c9299847b28350692eda1d5d6990581 Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Sun, 4 Jan 2015 17:10:54 +0100
Subject: ieee802154: 6lowpan: rename internal header

This patch renames the internal header for af802154. This naming
convention is like ieee802154_i.h in mac802154 and avoids naming
confusing with the global af802154 header. Furthermore this header
contains more ieee802154 specific definitions.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/ieee802154/6lowpan/6lowpan_i.h    | 41 +++++++++++++++++++++++++++++++++++
 net/ieee802154/6lowpan/6lowpan_rtnl.c |  2 +-
 net/ieee802154/6lowpan/reassembly.c   |  2 +-
 net/ieee802154/6lowpan/reassembly.h   | 41 -----------------------------------
 4 files changed, 43 insertions(+), 43 deletions(-)
 create mode 100644 net/ieee802154/6lowpan/6lowpan_i.h
 delete mode 100644 net/ieee802154/6lowpan/reassembly.h

(limited to 'net')

diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h
new file mode 100644
index 000000000000..edbe80314b7d
--- /dev/null
+++ b/net/ieee802154/6lowpan/6lowpan_i.h
@@ -0,0 +1,41 @@
+#ifndef __IEEE802154_6LOWPAN_I_H__
+#define __IEEE802154_6LOWPAN_I_H__
+
+#include <net/inet_frag.h>
+
+struct lowpan_create_arg {
+	u16 tag;
+	u16 d_size;
+	const struct ieee802154_addr *src;
+	const struct ieee802154_addr *dst;
+};
+
+/* Equivalent of ipv4 struct ip
+ */
+struct lowpan_frag_queue {
+	struct inet_frag_queue	q;
+
+	u16			tag;
+	u16			d_size;
+	struct ieee802154_addr	saddr;
+	struct ieee802154_addr	daddr;
+};
+
+static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a)
+{
+	switch (a->mode) {
+	case IEEE802154_ADDR_LONG:
+		return (((__force u64)a->extended_addr) >> 32) ^
+			(((__force u64)a->extended_addr) & 0xffffffff);
+	case IEEE802154_ADDR_SHORT:
+		return (__force u32)(a->short_addr);
+	default:
+		return 0;
+	}
+}
+
+int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type);
+void lowpan_net_frag_exit(void);
+int lowpan_net_frag_init(void);
+
+#endif /* __IEEE802154_6LOWPAN_I_H__ */
diff --git a/net/ieee802154/6lowpan/6lowpan_rtnl.c b/net/ieee802154/6lowpan/6lowpan_rtnl.c
index 27eaa65e88e1..1918c43499ce 100644
--- a/net/ieee802154/6lowpan/6lowpan_rtnl.c
+++ b/net/ieee802154/6lowpan/6lowpan_rtnl.c
@@ -55,7 +55,7 @@
 #include <net/6lowpan.h>
 #include <net/ipv6.h>
 
-#include "reassembly.h"
+#include "6lowpan_i.h"
 
 static LIST_HEAD(lowpan_devices);
 static int lowpan_open_count;
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 9d980ed3ffe2..f46e4d1306f2 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -28,7 +28,7 @@
 #include <net/ipv6.h>
 #include <net/inet_frag.h>
 
-#include "reassembly.h"
+#include "6lowpan_i.h"
 
 static const char lowpan_frags_cache_name[] = "lowpan-frags";
 
diff --git a/net/ieee802154/6lowpan/reassembly.h b/net/ieee802154/6lowpan/reassembly.h
deleted file mode 100644
index 836b16fa001f..000000000000
--- a/net/ieee802154/6lowpan/reassembly.h
+++ /dev/null
@@ -1,41 +0,0 @@
-#ifndef __IEEE802154_6LOWPAN_REASSEMBLY_H__
-#define __IEEE802154_6LOWPAN_REASSEMBLY_H__
-
-#include <net/inet_frag.h>
-
-struct lowpan_create_arg {
-	u16 tag;
-	u16 d_size;
-	const struct ieee802154_addr *src;
-	const struct ieee802154_addr *dst;
-};
-
-/* Equivalent of ipv4 struct ip
- */
-struct lowpan_frag_queue {
-	struct inet_frag_queue	q;
-
-	u16			tag;
-	u16			d_size;
-	struct ieee802154_addr	saddr;
-	struct ieee802154_addr	daddr;
-};
-
-static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a)
-{
-	switch (a->mode) {
-	case IEEE802154_ADDR_LONG:
-		return (((__force u64)a->extended_addr) >> 32) ^
-			(((__force u64)a->extended_addr) & 0xffffffff);
-	case IEEE802154_ADDR_SHORT:
-		return (__force u32)(a->short_addr);
-	default:
-		return 0;
-	}
-}
-
-int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type);
-void lowpan_net_frag_exit(void);
-int lowpan_net_frag_init(void);
-
-#endif /* __IEEE802154_6LOWPAN_REASSEMBLY_H__ */
-- 
cgit v1.2.3


From 4662a0da544c8ed7ecf79ef0f6c4dc65be081352 Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Sun, 4 Jan 2015 17:10:55 +0100
Subject: ieee802154: 6lowpan: move receive functionality

This patch moves all relevant receive functionality into a separate rx.c
file. We can simple separate this functionality like we did it in mac802154.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/ieee802154/6lowpan/6lowpan_i.h    |  25 +++++
 net/ieee802154/6lowpan/6lowpan_rtnl.c | 169 +--------------------------------
 net/ieee802154/6lowpan/Makefile       |   2 +-
 net/ieee802154/6lowpan/rx.c           | 171 ++++++++++++++++++++++++++++++++++
 4 files changed, 200 insertions(+), 167 deletions(-)
 create mode 100644 net/ieee802154/6lowpan/rx.c

(limited to 'net')

diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h
index edbe80314b7d..29ec61b01752 100644
--- a/net/ieee802154/6lowpan/6lowpan_i.h
+++ b/net/ieee802154/6lowpan/6lowpan_i.h
@@ -1,6 +1,8 @@
 #ifndef __IEEE802154_6LOWPAN_I_H__
 #define __IEEE802154_6LOWPAN_I_H__
 
+#include <linux/list.h>
+
 #include <net/inet_frag.h>
 
 struct lowpan_create_arg {
@@ -34,8 +36,31 @@ static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a)
 	}
 }
 
+struct lowpan_dev_record {
+	struct net_device *ldev;
+	struct list_head list;
+};
+
+/* private device info */
+struct lowpan_dev_info {
+	struct net_device	*real_dev; /* real WPAN device ptr */
+	struct mutex		dev_list_mtx; /* mutex for list ops */
+	u16			fragment_tag;
+};
+
+static inline struct
+lowpan_dev_info *lowpan_dev_info(const struct net_device *dev)
+{
+	return netdev_priv(dev);
+}
+
+extern struct list_head lowpan_devices;
+
 int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type);
 void lowpan_net_frag_exit(void);
 int lowpan_net_frag_init(void);
 
+void lowpan_rx_init(void);
+void lowpan_rx_exit(void);
+
 #endif /* __IEEE802154_6LOWPAN_I_H__ */
diff --git a/net/ieee802154/6lowpan/6lowpan_rtnl.c b/net/ieee802154/6lowpan/6lowpan_rtnl.c
index 1918c43499ce..9dce20e7f6a0 100644
--- a/net/ieee802154/6lowpan/6lowpan_rtnl.c
+++ b/net/ieee802154/6lowpan/6lowpan_rtnl.c
@@ -57,21 +57,9 @@
 
 #include "6lowpan_i.h"
 
-static LIST_HEAD(lowpan_devices);
+LIST_HEAD(lowpan_devices);
 static int lowpan_open_count;
 
-/* private device info */
-struct lowpan_dev_info {
-	struct net_device	*real_dev; /* real WPAN device ptr */
-	struct mutex		dev_list_mtx; /* mutex for list ops */
-	u16			fragment_tag;
-};
-
-struct lowpan_dev_record {
-	struct net_device *ldev;
-	struct list_head list;
-};
-
 /* don't save pan id, it's intra pan */
 struct lowpan_addr {
 	u8 mode;
@@ -87,12 +75,6 @@ struct lowpan_addr_info {
 	struct lowpan_addr saddr;
 };
 
-static inline struct
-lowpan_dev_info *lowpan_dev_info(const struct net_device *dev)
-{
-	return netdev_priv(dev);
-}
-
 static inline struct
 lowpan_addr_info *lowpan_skb_priv(const struct sk_buff *skb)
 {
@@ -134,74 +116,6 @@ static int lowpan_header_create(struct sk_buff *skb, struct net_device *dev,
 	return 0;
 }
 
-static int lowpan_give_skb_to_devices(struct sk_buff *skb,
-				      struct net_device *dev)
-{
-	struct lowpan_dev_record *entry;
-	struct sk_buff *skb_cp;
-	int stat = NET_RX_SUCCESS;
-
-	skb->protocol = htons(ETH_P_IPV6);
-	skb->pkt_type = PACKET_HOST;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(entry, &lowpan_devices, list)
-		if (lowpan_dev_info(entry->ldev)->real_dev == skb->dev) {
-			skb_cp = skb_copy(skb, GFP_ATOMIC);
-			if (!skb_cp) {
-				kfree_skb(skb);
-				rcu_read_unlock();
-				return NET_RX_DROP;
-			}
-
-			skb_cp->dev = entry->ldev;
-			stat = netif_rx(skb_cp);
-			if (stat == NET_RX_DROP)
-				break;
-		}
-	rcu_read_unlock();
-
-	consume_skb(skb);
-
-	return stat;
-}
-
-static int
-iphc_decompress(struct sk_buff *skb, const struct ieee802154_hdr *hdr)
-{
-	u8 iphc0, iphc1;
-	struct ieee802154_addr_sa sa, da;
-	void *sap, *dap;
-
-	raw_dump_table(__func__, "raw skb data dump", skb->data, skb->len);
-	/* at least two bytes will be used for the encoding */
-	if (skb->len < 2)
-		return -EINVAL;
-
-	if (lowpan_fetch_skb_u8(skb, &iphc0))
-		return -EINVAL;
-
-	if (lowpan_fetch_skb_u8(skb, &iphc1))
-		return -EINVAL;
-
-	ieee802154_addr_to_sa(&sa, &hdr->source);
-	ieee802154_addr_to_sa(&da, &hdr->dest);
-
-	if (sa.addr_type == IEEE802154_ADDR_SHORT)
-		sap = &sa.short_addr;
-	else
-		sap = &sa.hwaddr;
-
-	if (da.addr_type == IEEE802154_ADDR_SHORT)
-		dap = &da.short_addr;
-	else
-		dap = &da.hwaddr;
-
-	return lowpan_header_decompress(skb, skb->dev, sap, sa.addr_type,
-					IEEE802154_ADDR_LEN, dap, da.addr_type,
-					IEEE802154_ADDR_LEN, iphc0, iphc1);
-}
-
 static struct sk_buff*
 lowpan_alloc_frag(struct sk_buff *skb, int size,
 		  const struct ieee802154_hdr *master_hdr)
@@ -485,83 +399,6 @@ static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[])
 	return 0;
 }
 
-static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev,
-		      struct packet_type *pt, struct net_device *orig_dev)
-{
-	struct ieee802154_hdr hdr;
-	int ret;
-
-	skb = skb_share_check(skb, GFP_ATOMIC);
-	if (!skb)
-		goto drop;
-
-	if (!netif_running(dev))
-		goto drop_skb;
-
-	if (skb->pkt_type == PACKET_OTHERHOST)
-		goto drop_skb;
-
-	if (dev->type != ARPHRD_IEEE802154)
-		goto drop_skb;
-
-	if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0)
-		goto drop_skb;
-
-	/* check that it's our buffer */
-	if (skb->data[0] == LOWPAN_DISPATCH_IPV6) {
-		/* Pull off the 1-byte of 6lowpan header. */
-		skb_pull(skb, 1);
-		return lowpan_give_skb_to_devices(skb, NULL);
-	} else {
-		switch (skb->data[0] & 0xe0) {
-		case LOWPAN_DISPATCH_IPHC:	/* ipv6 datagram */
-			ret = iphc_decompress(skb, &hdr);
-			if (ret < 0)
-				goto drop_skb;
-
-			return lowpan_give_skb_to_devices(skb, NULL);
-		case LOWPAN_DISPATCH_FRAG1:	/* first fragment header */
-			ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAG1);
-			if (ret == 1) {
-				ret = iphc_decompress(skb, &hdr);
-				if (ret < 0)
-					goto drop_skb;
-
-				return lowpan_give_skb_to_devices(skb, NULL);
-			} else if (ret == -1) {
-				return NET_RX_DROP;
-			} else {
-				return NET_RX_SUCCESS;
-			}
-		case LOWPAN_DISPATCH_FRAGN:	/* next fragments headers */
-			ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAGN);
-			if (ret == 1) {
-				ret = iphc_decompress(skb, &hdr);
-				if (ret < 0)
-					goto drop_skb;
-
-				return lowpan_give_skb_to_devices(skb, NULL);
-			} else if (ret == -1) {
-				return NET_RX_DROP;
-			} else {
-				return NET_RX_SUCCESS;
-			}
-		default:
-			break;
-		}
-	}
-
-drop_skb:
-	kfree_skb(skb);
-drop:
-	return NET_RX_DROP;
-}
-
-static struct packet_type lowpan_packet_type = {
-	.type = htons(ETH_P_IEEE802154),
-	.func = lowpan_rcv,
-};
-
 static int lowpan_newlink(struct net *src_net, struct net_device *dev,
 			  struct nlattr *tb[], struct nlattr *data[])
 {
@@ -607,7 +444,7 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev,
 	ret = register_netdevice(dev);
 	if (ret >= 0) {
 		if (!lowpan_open_count)
-			dev_add_pack(&lowpan_packet_type);
+			lowpan_rx_init();
 		lowpan_open_count++;
 	}
 
@@ -624,7 +461,7 @@ static void lowpan_dellink(struct net_device *dev, struct list_head *head)
 
 	lowpan_open_count--;
 	if (!lowpan_open_count)
-		dev_remove_pack(&lowpan_packet_type);
+		lowpan_rx_exit();
 
 	mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx);
 	list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) {
diff --git a/net/ieee802154/6lowpan/Makefile b/net/ieee802154/6lowpan/Makefile
index 936959bd4f99..8e8397185751 100644
--- a/net/ieee802154/6lowpan/Makefile
+++ b/net/ieee802154/6lowpan/Makefile
@@ -1,3 +1,3 @@
 obj-y += ieee802154_6lowpan.o
 
-ieee802154_6lowpan-y := 6lowpan_rtnl.o reassembly.o
+ieee802154_6lowpan-y := 6lowpan_rtnl.o rx.o reassembly.o
diff --git a/net/ieee802154/6lowpan/rx.c b/net/ieee802154/6lowpan/rx.c
new file mode 100644
index 000000000000..4be1d289ab2d
--- /dev/null
+++ b/net/ieee802154/6lowpan/rx.c
@@ -0,0 +1,171 @@
+/* This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/if_arp.h>
+
+#include <net/6lowpan.h>
+#include <net/ieee802154_netdev.h>
+
+#include "6lowpan_i.h"
+
+static int lowpan_give_skb_to_devices(struct sk_buff *skb,
+				      struct net_device *dev)
+{
+	struct lowpan_dev_record *entry;
+	struct sk_buff *skb_cp;
+	int stat = NET_RX_SUCCESS;
+
+	skb->protocol = htons(ETH_P_IPV6);
+	skb->pkt_type = PACKET_HOST;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(entry, &lowpan_devices, list)
+		if (lowpan_dev_info(entry->ldev)->real_dev == skb->dev) {
+			skb_cp = skb_copy(skb, GFP_ATOMIC);
+			if (!skb_cp) {
+				kfree_skb(skb);
+				rcu_read_unlock();
+				return NET_RX_DROP;
+			}
+
+			skb_cp->dev = entry->ldev;
+			stat = netif_rx(skb_cp);
+			if (stat == NET_RX_DROP)
+				break;
+		}
+	rcu_read_unlock();
+
+	consume_skb(skb);
+
+	return stat;
+}
+
+static int
+iphc_decompress(struct sk_buff *skb, const struct ieee802154_hdr *hdr)
+{
+	u8 iphc0, iphc1;
+	struct ieee802154_addr_sa sa, da;
+	void *sap, *dap;
+
+	raw_dump_table(__func__, "raw skb data dump", skb->data, skb->len);
+	/* at least two bytes will be used for the encoding */
+	if (skb->len < 2)
+		return -EINVAL;
+
+	if (lowpan_fetch_skb_u8(skb, &iphc0))
+		return -EINVAL;
+
+	if (lowpan_fetch_skb_u8(skb, &iphc1))
+		return -EINVAL;
+
+	ieee802154_addr_to_sa(&sa, &hdr->source);
+	ieee802154_addr_to_sa(&da, &hdr->dest);
+
+	if (sa.addr_type == IEEE802154_ADDR_SHORT)
+		sap = &sa.short_addr;
+	else
+		sap = &sa.hwaddr;
+
+	if (da.addr_type == IEEE802154_ADDR_SHORT)
+		dap = &da.short_addr;
+	else
+		dap = &da.hwaddr;
+
+	return lowpan_header_decompress(skb, skb->dev, sap, sa.addr_type,
+					IEEE802154_ADDR_LEN, dap, da.addr_type,
+					IEEE802154_ADDR_LEN, iphc0, iphc1);
+}
+
+static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev,
+		      struct packet_type *pt, struct net_device *orig_dev)
+{
+	struct ieee802154_hdr hdr;
+	int ret;
+
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (!skb)
+		goto drop;
+
+	if (!netif_running(dev))
+		goto drop_skb;
+
+	if (skb->pkt_type == PACKET_OTHERHOST)
+		goto drop_skb;
+
+	if (dev->type != ARPHRD_IEEE802154)
+		goto drop_skb;
+
+	if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0)
+		goto drop_skb;
+
+	/* check that it's our buffer */
+	if (skb->data[0] == LOWPAN_DISPATCH_IPV6) {
+		/* Pull off the 1-byte of 6lowpan header. */
+		skb_pull(skb, 1);
+		return lowpan_give_skb_to_devices(skb, NULL);
+	} else {
+		switch (skb->data[0] & 0xe0) {
+		case LOWPAN_DISPATCH_IPHC:	/* ipv6 datagram */
+			ret = iphc_decompress(skb, &hdr);
+			if (ret < 0)
+				goto drop_skb;
+
+			return lowpan_give_skb_to_devices(skb, NULL);
+		case LOWPAN_DISPATCH_FRAG1:	/* first fragment header */
+			ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAG1);
+			if (ret == 1) {
+				ret = iphc_decompress(skb, &hdr);
+				if (ret < 0)
+					goto drop_skb;
+
+				return lowpan_give_skb_to_devices(skb, NULL);
+			} else if (ret == -1) {
+				return NET_RX_DROP;
+			} else {
+				return NET_RX_SUCCESS;
+			}
+		case LOWPAN_DISPATCH_FRAGN:	/* next fragments headers */
+			ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAGN);
+			if (ret == 1) {
+				ret = iphc_decompress(skb, &hdr);
+				if (ret < 0)
+					goto drop_skb;
+
+				return lowpan_give_skb_to_devices(skb, NULL);
+			} else if (ret == -1) {
+				return NET_RX_DROP;
+			} else {
+				return NET_RX_SUCCESS;
+			}
+		default:
+			break;
+		}
+	}
+
+drop_skb:
+	kfree_skb(skb);
+drop:
+	return NET_RX_DROP;
+}
+
+static struct packet_type lowpan_packet_type = {
+	.type = htons(ETH_P_IEEE802154),
+	.func = lowpan_rcv,
+};
+
+void lowpan_rx_init(void)
+{
+	dev_add_pack(&lowpan_packet_type);
+}
+
+void lowpan_rx_exit(void)
+{
+	dev_remove_pack(&lowpan_packet_type);
+}
-- 
cgit v1.2.3


From 4dc315e267fe4a3c863fec21f26aa0a418c3f07a Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Sun, 4 Jan 2015 17:10:56 +0100
Subject: ieee802154: 6lowpan: move transmit functionality

This patch moves all relevant transmit functionality into a separate tx.c
file. We can simple separate this functionality like we did it in mac802154.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/ieee802154/6lowpan/6lowpan_i.h    |   6 +
 net/ieee802154/6lowpan/6lowpan_rtnl.c | 264 +--------------------------------
 net/ieee802154/6lowpan/Makefile       |   2 +-
 net/ieee802154/6lowpan/tx.c           | 271 ++++++++++++++++++++++++++++++++++
 4 files changed, 279 insertions(+), 264 deletions(-)
 create mode 100644 net/ieee802154/6lowpan/tx.c

(limited to 'net')

diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h
index 29ec61b01752..e50f69da78eb 100644
--- a/net/ieee802154/6lowpan/6lowpan_i.h
+++ b/net/ieee802154/6lowpan/6lowpan_i.h
@@ -3,6 +3,7 @@
 
 #include <linux/list.h>
 
+#include <net/ieee802154_netdev.h>
 #include <net/inet_frag.h>
 
 struct lowpan_create_arg {
@@ -63,4 +64,9 @@ int lowpan_net_frag_init(void);
 void lowpan_rx_init(void);
 void lowpan_rx_exit(void);
 
+int lowpan_header_create(struct sk_buff *skb, struct net_device *dev,
+			 unsigned short type, const void *_daddr,
+			 const void *_saddr, unsigned int len);
+netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev);
+
 #endif /* __IEEE802154_6LOWPAN_I_H__ */
diff --git a/net/ieee802154/6lowpan/6lowpan_rtnl.c b/net/ieee802154/6lowpan/6lowpan_rtnl.c
index 9dce20e7f6a0..055fbb71ba6f 100644
--- a/net/ieee802154/6lowpan/6lowpan_rtnl.c
+++ b/net/ieee802154/6lowpan/6lowpan_rtnl.c
@@ -44,15 +44,10 @@
  * SUCH DAMAGE.
  */
 
-#include <linux/bitops.h>
-#include <linux/if_arp.h>
 #include <linux/module.h>
-#include <linux/moduleparam.h>
 #include <linux/netdevice.h>
 #include <linux/ieee802154.h>
-#include <net/af_ieee802154.h>
-#include <net/ieee802154_netdev.h>
-#include <net/6lowpan.h>
+
 #include <net/ipv6.h>
 
 #include "6lowpan_i.h"
@@ -60,263 +55,6 @@
 LIST_HEAD(lowpan_devices);
 static int lowpan_open_count;
 
-/* don't save pan id, it's intra pan */
-struct lowpan_addr {
-	u8 mode;
-	union {
-		/* IPv6 needs big endian here */
-		__be64 extended_addr;
-		__be16 short_addr;
-	} u;
-};
-
-struct lowpan_addr_info {
-	struct lowpan_addr daddr;
-	struct lowpan_addr saddr;
-};
-
-static inline struct
-lowpan_addr_info *lowpan_skb_priv(const struct sk_buff *skb)
-{
-	WARN_ON_ONCE(skb_headroom(skb) < sizeof(struct lowpan_addr_info));
-	return (struct lowpan_addr_info *)(skb->data -
-			sizeof(struct lowpan_addr_info));
-}
-
-static int lowpan_header_create(struct sk_buff *skb, struct net_device *dev,
-				unsigned short type, const void *_daddr,
-				const void *_saddr, unsigned int len)
-{
-	const u8 *saddr = _saddr;
-	const u8 *daddr = _daddr;
-	struct lowpan_addr_info *info;
-
-	/* TODO:
-	 * if this package isn't ipv6 one, where should it be routed?
-	 */
-	if (type != ETH_P_IPV6)
-		return 0;
-
-	if (!saddr)
-		saddr = dev->dev_addr;
-
-	raw_dump_inline(__func__, "saddr", (unsigned char *)saddr, 8);
-	raw_dump_inline(__func__, "daddr", (unsigned char *)daddr, 8);
-
-	info = lowpan_skb_priv(skb);
-
-	/* TODO: Currently we only support extended_addr */
-	info->daddr.mode = IEEE802154_ADDR_LONG;
-	memcpy(&info->daddr.u.extended_addr, daddr,
-	       sizeof(info->daddr.u.extended_addr));
-	info->saddr.mode = IEEE802154_ADDR_LONG;
-	memcpy(&info->saddr.u.extended_addr, saddr,
-	       sizeof(info->daddr.u.extended_addr));
-
-	return 0;
-}
-
-static struct sk_buff*
-lowpan_alloc_frag(struct sk_buff *skb, int size,
-		  const struct ieee802154_hdr *master_hdr)
-{
-	struct net_device *real_dev = lowpan_dev_info(skb->dev)->real_dev;
-	struct sk_buff *frag;
-	int rc;
-
-	frag = alloc_skb(real_dev->hard_header_len +
-			 real_dev->needed_tailroom + size,
-			 GFP_ATOMIC);
-
-	if (likely(frag)) {
-		frag->dev = real_dev;
-		frag->priority = skb->priority;
-		skb_reserve(frag, real_dev->hard_header_len);
-		skb_reset_network_header(frag);
-		*mac_cb(frag) = *mac_cb(skb);
-
-		rc = dev_hard_header(frag, real_dev, 0, &master_hdr->dest,
-				     &master_hdr->source, size);
-		if (rc < 0) {
-			kfree_skb(frag);
-			return ERR_PTR(rc);
-		}
-	} else {
-		frag = ERR_PTR(-ENOMEM);
-	}
-
-	return frag;
-}
-
-static int
-lowpan_xmit_fragment(struct sk_buff *skb, const struct ieee802154_hdr *wpan_hdr,
-		     u8 *frag_hdr, int frag_hdrlen,
-		     int offset, int len)
-{
-	struct sk_buff *frag;
-
-	raw_dump_inline(__func__, " fragment header", frag_hdr, frag_hdrlen);
-
-	frag = lowpan_alloc_frag(skb, frag_hdrlen + len, wpan_hdr);
-	if (IS_ERR(frag))
-		return -PTR_ERR(frag);
-
-	memcpy(skb_put(frag, frag_hdrlen), frag_hdr, frag_hdrlen);
-	memcpy(skb_put(frag, len), skb_network_header(skb) + offset, len);
-
-	raw_dump_table(__func__, " fragment dump", frag->data, frag->len);
-
-	return dev_queue_xmit(frag);
-}
-
-static int
-lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *dev,
-		       const struct ieee802154_hdr *wpan_hdr)
-{
-	u16 dgram_size, dgram_offset;
-	__be16 frag_tag;
-	u8 frag_hdr[5];
-	int frag_cap, frag_len, payload_cap, rc;
-	int skb_unprocessed, skb_offset;
-
-	dgram_size = lowpan_uncompress_size(skb, &dgram_offset) -
-		     skb->mac_len;
-	frag_tag = htons(lowpan_dev_info(dev)->fragment_tag);
-	lowpan_dev_info(dev)->fragment_tag++;
-
-	frag_hdr[0] = LOWPAN_DISPATCH_FRAG1 | ((dgram_size >> 8) & 0x07);
-	frag_hdr[1] = dgram_size & 0xff;
-	memcpy(frag_hdr + 2, &frag_tag, sizeof(frag_tag));
-
-	payload_cap = ieee802154_max_payload(wpan_hdr);
-
-	frag_len = round_down(payload_cap - LOWPAN_FRAG1_HEAD_SIZE -
-			      skb_network_header_len(skb), 8);
-
-	skb_offset = skb_network_header_len(skb);
-	skb_unprocessed = skb->len - skb->mac_len - skb_offset;
-
-	rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr,
-				  LOWPAN_FRAG1_HEAD_SIZE, 0,
-				  frag_len + skb_network_header_len(skb));
-	if (rc) {
-		pr_debug("%s unable to send FRAG1 packet (tag: %d)",
-			 __func__, ntohs(frag_tag));
-		goto err;
-	}
-
-	frag_hdr[0] &= ~LOWPAN_DISPATCH_FRAG1;
-	frag_hdr[0] |= LOWPAN_DISPATCH_FRAGN;
-	frag_cap = round_down(payload_cap - LOWPAN_FRAGN_HEAD_SIZE, 8);
-
-	do {
-		dgram_offset += frag_len;
-		skb_offset += frag_len;
-		skb_unprocessed -= frag_len;
-		frag_len = min(frag_cap, skb_unprocessed);
-
-		frag_hdr[4] = dgram_offset >> 3;
-
-		rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr,
-					  LOWPAN_FRAGN_HEAD_SIZE, skb_offset,
-					  frag_len);
-		if (rc) {
-			pr_debug("%s unable to send a FRAGN packet. (tag: %d, offset: %d)\n",
-				 __func__, ntohs(frag_tag), skb_offset);
-			goto err;
-		}
-	} while (skb_unprocessed > frag_cap);
-
-	consume_skb(skb);
-	return NET_XMIT_SUCCESS;
-
-err:
-	kfree_skb(skb);
-	return rc;
-}
-
-static int lowpan_header(struct sk_buff *skb, struct net_device *dev)
-{
-	struct ieee802154_addr sa, da;
-	struct ieee802154_mac_cb *cb = mac_cb_init(skb);
-	struct lowpan_addr_info info;
-	void *daddr, *saddr;
-
-	memcpy(&info, lowpan_skb_priv(skb), sizeof(info));
-
-	/* TODO: Currently we only support extended_addr */
-	daddr = &info.daddr.u.extended_addr;
-	saddr = &info.saddr.u.extended_addr;
-
-	lowpan_header_compress(skb, dev, ETH_P_IPV6, daddr, saddr, skb->len);
-
-	cb->type = IEEE802154_FC_TYPE_DATA;
-
-	/* prepare wpan address data */
-	sa.mode = IEEE802154_ADDR_LONG;
-	sa.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
-	sa.extended_addr = ieee802154_devaddr_from_raw(saddr);
-
-	/* intra-PAN communications */
-	da.pan_id = sa.pan_id;
-
-	/* if the destination address is the broadcast address, use the
-	 * corresponding short address
-	 */
-	if (lowpan_is_addr_broadcast((const u8 *)daddr)) {
-		da.mode = IEEE802154_ADDR_SHORT;
-		da.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST);
-		cb->ackreq = false;
-	} else {
-		da.mode = IEEE802154_ADDR_LONG;
-		da.extended_addr = ieee802154_devaddr_from_raw(daddr);
-		cb->ackreq = true;
-	}
-
-	return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev,
-			ETH_P_IPV6, (void *)&da, (void *)&sa, 0);
-}
-
-static netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev)
-{
-	struct ieee802154_hdr wpan_hdr;
-	int max_single, ret;
-
-	pr_debug("package xmit\n");
-
-	/* We must take a copy of the skb before we modify/replace the ipv6
-	 * header as the header could be used elsewhere
-	 */
-	skb = skb_unshare(skb, GFP_ATOMIC);
-	if (!skb)
-		return NET_XMIT_DROP;
-
-	ret = lowpan_header(skb, dev);
-	if (ret < 0) {
-		kfree_skb(skb);
-		return NET_XMIT_DROP;
-	}
-
-	if (ieee802154_hdr_peek(skb, &wpan_hdr) < 0) {
-		kfree_skb(skb);
-		return NET_XMIT_DROP;
-	}
-
-	max_single = ieee802154_max_payload(&wpan_hdr);
-
-	if (skb_tail_pointer(skb) - skb_network_header(skb) <= max_single) {
-		skb->dev = lowpan_dev_info(dev)->real_dev;
-		return dev_queue_xmit(skb);
-	} else {
-		netdev_tx_t rc;
-
-		pr_debug("frame is too big, fragmentation is needed\n");
-		rc = lowpan_xmit_fragmented(skb, dev, &wpan_hdr);
-
-		return rc < 0 ? NET_XMIT_DROP : rc;
-	}
-}
-
 static __le16 lowpan_get_pan_id(const struct net_device *dev)
 {
 	struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
diff --git a/net/ieee802154/6lowpan/Makefile b/net/ieee802154/6lowpan/Makefile
index 8e8397185751..7f11c136bcfe 100644
--- a/net/ieee802154/6lowpan/Makefile
+++ b/net/ieee802154/6lowpan/Makefile
@@ -1,3 +1,3 @@
 obj-y += ieee802154_6lowpan.o
 
-ieee802154_6lowpan-y := 6lowpan_rtnl.o rx.o reassembly.o
+ieee802154_6lowpan-y := 6lowpan_rtnl.o rx.o reassembly.o tx.o
diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c
new file mode 100644
index 000000000000..2349070bd534
--- /dev/null
+++ b/net/ieee802154/6lowpan/tx.c
@@ -0,0 +1,271 @@
+/* This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <net/6lowpan.h>
+#include <net/ieee802154_netdev.h>
+
+#include "6lowpan_i.h"
+
+/* don't save pan id, it's intra pan */
+struct lowpan_addr {
+	u8 mode;
+	union {
+		/* IPv6 needs big endian here */
+		__be64 extended_addr;
+		__be16 short_addr;
+	} u;
+};
+
+struct lowpan_addr_info {
+	struct lowpan_addr daddr;
+	struct lowpan_addr saddr;
+};
+
+static inline struct
+lowpan_addr_info *lowpan_skb_priv(const struct sk_buff *skb)
+{
+	WARN_ON_ONCE(skb_headroom(skb) < sizeof(struct lowpan_addr_info));
+	return (struct lowpan_addr_info *)(skb->data -
+			sizeof(struct lowpan_addr_info));
+}
+
+int lowpan_header_create(struct sk_buff *skb, struct net_device *dev,
+			 unsigned short type, const void *_daddr,
+			 const void *_saddr, unsigned int len)
+{
+	const u8 *saddr = _saddr;
+	const u8 *daddr = _daddr;
+	struct lowpan_addr_info *info;
+
+	/* TODO:
+	 * if this package isn't ipv6 one, where should it be routed?
+	 */
+	if (type != ETH_P_IPV6)
+		return 0;
+
+	if (!saddr)
+		saddr = dev->dev_addr;
+
+	raw_dump_inline(__func__, "saddr", (unsigned char *)saddr, 8);
+	raw_dump_inline(__func__, "daddr", (unsigned char *)daddr, 8);
+
+	info = lowpan_skb_priv(skb);
+
+	/* TODO: Currently we only support extended_addr */
+	info->daddr.mode = IEEE802154_ADDR_LONG;
+	memcpy(&info->daddr.u.extended_addr, daddr,
+	       sizeof(info->daddr.u.extended_addr));
+	info->saddr.mode = IEEE802154_ADDR_LONG;
+	memcpy(&info->saddr.u.extended_addr, saddr,
+	       sizeof(info->daddr.u.extended_addr));
+
+	return 0;
+}
+
+static struct sk_buff*
+lowpan_alloc_frag(struct sk_buff *skb, int size,
+		  const struct ieee802154_hdr *master_hdr)
+{
+	struct net_device *real_dev = lowpan_dev_info(skb->dev)->real_dev;
+	struct sk_buff *frag;
+	int rc;
+
+	frag = alloc_skb(real_dev->hard_header_len +
+			 real_dev->needed_tailroom + size,
+			 GFP_ATOMIC);
+
+	if (likely(frag)) {
+		frag->dev = real_dev;
+		frag->priority = skb->priority;
+		skb_reserve(frag, real_dev->hard_header_len);
+		skb_reset_network_header(frag);
+		*mac_cb(frag) = *mac_cb(skb);
+
+		rc = dev_hard_header(frag, real_dev, 0, &master_hdr->dest,
+				     &master_hdr->source, size);
+		if (rc < 0) {
+			kfree_skb(frag);
+			return ERR_PTR(rc);
+		}
+	} else {
+		frag = ERR_PTR(-ENOMEM);
+	}
+
+	return frag;
+}
+
+static int
+lowpan_xmit_fragment(struct sk_buff *skb, const struct ieee802154_hdr *wpan_hdr,
+		     u8 *frag_hdr, int frag_hdrlen,
+		     int offset, int len)
+{
+	struct sk_buff *frag;
+
+	raw_dump_inline(__func__, " fragment header", frag_hdr, frag_hdrlen);
+
+	frag = lowpan_alloc_frag(skb, frag_hdrlen + len, wpan_hdr);
+	if (IS_ERR(frag))
+		return -PTR_ERR(frag);
+
+	memcpy(skb_put(frag, frag_hdrlen), frag_hdr, frag_hdrlen);
+	memcpy(skb_put(frag, len), skb_network_header(skb) + offset, len);
+
+	raw_dump_table(__func__, " fragment dump", frag->data, frag->len);
+
+	return dev_queue_xmit(frag);
+}
+
+static int
+lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *dev,
+		       const struct ieee802154_hdr *wpan_hdr)
+{
+	u16 dgram_size, dgram_offset;
+	__be16 frag_tag;
+	u8 frag_hdr[5];
+	int frag_cap, frag_len, payload_cap, rc;
+	int skb_unprocessed, skb_offset;
+
+	dgram_size = lowpan_uncompress_size(skb, &dgram_offset) -
+		     skb->mac_len;
+	frag_tag = htons(lowpan_dev_info(dev)->fragment_tag);
+	lowpan_dev_info(dev)->fragment_tag++;
+
+	frag_hdr[0] = LOWPAN_DISPATCH_FRAG1 | ((dgram_size >> 8) & 0x07);
+	frag_hdr[1] = dgram_size & 0xff;
+	memcpy(frag_hdr + 2, &frag_tag, sizeof(frag_tag));
+
+	payload_cap = ieee802154_max_payload(wpan_hdr);
+
+	frag_len = round_down(payload_cap - LOWPAN_FRAG1_HEAD_SIZE -
+			      skb_network_header_len(skb), 8);
+
+	skb_offset = skb_network_header_len(skb);
+	skb_unprocessed = skb->len - skb->mac_len - skb_offset;
+
+	rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr,
+				  LOWPAN_FRAG1_HEAD_SIZE, 0,
+				  frag_len + skb_network_header_len(skb));
+	if (rc) {
+		pr_debug("%s unable to send FRAG1 packet (tag: %d)",
+			 __func__, ntohs(frag_tag));
+		goto err;
+	}
+
+	frag_hdr[0] &= ~LOWPAN_DISPATCH_FRAG1;
+	frag_hdr[0] |= LOWPAN_DISPATCH_FRAGN;
+	frag_cap = round_down(payload_cap - LOWPAN_FRAGN_HEAD_SIZE, 8);
+
+	do {
+		dgram_offset += frag_len;
+		skb_offset += frag_len;
+		skb_unprocessed -= frag_len;
+		frag_len = min(frag_cap, skb_unprocessed);
+
+		frag_hdr[4] = dgram_offset >> 3;
+
+		rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr,
+					  LOWPAN_FRAGN_HEAD_SIZE, skb_offset,
+					  frag_len);
+		if (rc) {
+			pr_debug("%s unable to send a FRAGN packet. (tag: %d, offset: %d)\n",
+				 __func__, ntohs(frag_tag), skb_offset);
+			goto err;
+		}
+	} while (skb_unprocessed > frag_cap);
+
+	consume_skb(skb);
+	return NET_XMIT_SUCCESS;
+
+err:
+	kfree_skb(skb);
+	return rc;
+}
+
+static int lowpan_header(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ieee802154_addr sa, da;
+	struct ieee802154_mac_cb *cb = mac_cb_init(skb);
+	struct lowpan_addr_info info;
+	void *daddr, *saddr;
+
+	memcpy(&info, lowpan_skb_priv(skb), sizeof(info));
+
+	/* TODO: Currently we only support extended_addr */
+	daddr = &info.daddr.u.extended_addr;
+	saddr = &info.saddr.u.extended_addr;
+
+	lowpan_header_compress(skb, dev, ETH_P_IPV6, daddr, saddr, skb->len);
+
+	cb->type = IEEE802154_FC_TYPE_DATA;
+
+	/* prepare wpan address data */
+	sa.mode = IEEE802154_ADDR_LONG;
+	sa.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
+	sa.extended_addr = ieee802154_devaddr_from_raw(saddr);
+
+	/* intra-PAN communications */
+	da.pan_id = sa.pan_id;
+
+	/* if the destination address is the broadcast address, use the
+	 * corresponding short address
+	 */
+	if (lowpan_is_addr_broadcast((const u8 *)daddr)) {
+		da.mode = IEEE802154_ADDR_SHORT;
+		da.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST);
+		cb->ackreq = false;
+	} else {
+		da.mode = IEEE802154_ADDR_LONG;
+		da.extended_addr = ieee802154_devaddr_from_raw(daddr);
+		cb->ackreq = true;
+	}
+
+	return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev,
+			ETH_P_IPV6, (void *)&da, (void *)&sa, 0);
+}
+
+netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ieee802154_hdr wpan_hdr;
+	int max_single, ret;
+
+	pr_debug("package xmit\n");
+
+	/* We must take a copy of the skb before we modify/replace the ipv6
+	 * header as the header could be used elsewhere
+	 */
+	skb = skb_unshare(skb, GFP_ATOMIC);
+	if (!skb)
+		return NET_XMIT_DROP;
+
+	ret = lowpan_header(skb, dev);
+	if (ret < 0) {
+		kfree_skb(skb);
+		return NET_XMIT_DROP;
+	}
+
+	if (ieee802154_hdr_peek(skb, &wpan_hdr) < 0) {
+		kfree_skb(skb);
+		return NET_XMIT_DROP;
+	}
+
+	max_single = ieee802154_max_payload(&wpan_hdr);
+
+	if (skb_tail_pointer(skb) - skb_network_header(skb) <= max_single) {
+		skb->dev = lowpan_dev_info(dev)->real_dev;
+		return dev_queue_xmit(skb);
+	} else {
+		netdev_tx_t rc;
+
+		pr_debug("frame is too big, fragmentation is needed\n");
+		rc = lowpan_xmit_fragmented(skb, dev, &wpan_hdr);
+
+		return rc < 0 ? NET_XMIT_DROP : rc;
+	}
+}
-- 
cgit v1.2.3


From 52d84ffc540cc14010c7b0d9df20063445f89187 Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Sun, 4 Jan 2015 17:10:57 +0100
Subject: ieee802154: 6lowpan: rename to core

This patch renames the 6lowpan_rtnl.c file to core.c. 6lowpan_rtnl.c
contains functionality to put all 802.15.4 6LoWPAN functionality
together.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/ieee802154/6lowpan/6lowpan_rtnl.c | 304 ----------------------------------
 net/ieee802154/6lowpan/Makefile       |   2 +-
 net/ieee802154/6lowpan/core.c         | 304 ++++++++++++++++++++++++++++++++++
 3 files changed, 305 insertions(+), 305 deletions(-)
 delete mode 100644 net/ieee802154/6lowpan/6lowpan_rtnl.c
 create mode 100644 net/ieee802154/6lowpan/core.c

(limited to 'net')

diff --git a/net/ieee802154/6lowpan/6lowpan_rtnl.c b/net/ieee802154/6lowpan/6lowpan_rtnl.c
deleted file mode 100644
index 055fbb71ba6f..000000000000
--- a/net/ieee802154/6lowpan/6lowpan_rtnl.c
+++ /dev/null
@@ -1,304 +0,0 @@
-/* Copyright 2011, Siemens AG
- * written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
- */
-
-/* Based on patches from Jon Smirl <jonsmirl@gmail.com>
- * Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-/* Jon's code is based on 6lowpan implementation for Contiki which is:
- * Copyright (c) 2008, Swedish Institute of Computer Science.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the Institute nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <linux/module.h>
-#include <linux/netdevice.h>
-#include <linux/ieee802154.h>
-
-#include <net/ipv6.h>
-
-#include "6lowpan_i.h"
-
-LIST_HEAD(lowpan_devices);
-static int lowpan_open_count;
-
-static __le16 lowpan_get_pan_id(const struct net_device *dev)
-{
-	struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
-
-	return ieee802154_mlme_ops(real_dev)->get_pan_id(real_dev);
-}
-
-static __le16 lowpan_get_short_addr(const struct net_device *dev)
-{
-	struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
-
-	return ieee802154_mlme_ops(real_dev)->get_short_addr(real_dev);
-}
-
-static u8 lowpan_get_dsn(const struct net_device *dev)
-{
-	struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
-
-	return ieee802154_mlme_ops(real_dev)->get_dsn(real_dev);
-}
-
-static struct header_ops lowpan_header_ops = {
-	.create	= lowpan_header_create,
-};
-
-static struct lock_class_key lowpan_tx_busylock;
-static struct lock_class_key lowpan_netdev_xmit_lock_key;
-
-static void lowpan_set_lockdep_class_one(struct net_device *dev,
-					 struct netdev_queue *txq,
-					 void *_unused)
-{
-	lockdep_set_class(&txq->_xmit_lock,
-			  &lowpan_netdev_xmit_lock_key);
-}
-
-static int lowpan_dev_init(struct net_device *dev)
-{
-	netdev_for_each_tx_queue(dev, lowpan_set_lockdep_class_one, NULL);
-	dev->qdisc_tx_busylock = &lowpan_tx_busylock;
-	return 0;
-}
-
-static const struct net_device_ops lowpan_netdev_ops = {
-	.ndo_init		= lowpan_dev_init,
-	.ndo_start_xmit		= lowpan_xmit,
-};
-
-static struct ieee802154_mlme_ops lowpan_mlme = {
-	.get_pan_id = lowpan_get_pan_id,
-	.get_short_addr = lowpan_get_short_addr,
-	.get_dsn = lowpan_get_dsn,
-};
-
-static void lowpan_setup(struct net_device *dev)
-{
-	dev->addr_len		= IEEE802154_ADDR_LEN;
-	memset(dev->broadcast, 0xff, IEEE802154_ADDR_LEN);
-	dev->type		= ARPHRD_IEEE802154;
-	/* Frame Control + Sequence Number + Address fields + Security Header */
-	dev->hard_header_len	= 2 + 1 + 20 + 14;
-	dev->needed_tailroom	= 2; /* FCS */
-	dev->mtu		= IPV6_MIN_MTU;
-	dev->tx_queue_len	= 0;
-	dev->flags		= IFF_BROADCAST | IFF_MULTICAST;
-	dev->watchdog_timeo	= 0;
-
-	dev->netdev_ops		= &lowpan_netdev_ops;
-	dev->header_ops		= &lowpan_header_ops;
-	dev->ml_priv		= &lowpan_mlme;
-	dev->destructor		= free_netdev;
-}
-
-static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[])
-{
-	if (tb[IFLA_ADDRESS]) {
-		if (nla_len(tb[IFLA_ADDRESS]) != IEEE802154_ADDR_LEN)
-			return -EINVAL;
-	}
-	return 0;
-}
-
-static int lowpan_newlink(struct net *src_net, struct net_device *dev,
-			  struct nlattr *tb[], struct nlattr *data[])
-{
-	struct net_device *real_dev;
-	struct lowpan_dev_record *entry;
-	int ret;
-
-	ASSERT_RTNL();
-
-	pr_debug("adding new link\n");
-
-	if (!tb[IFLA_LINK])
-		return -EINVAL;
-	/* find and hold real wpan device */
-	real_dev = dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
-	if (!real_dev)
-		return -ENODEV;
-	if (real_dev->type != ARPHRD_IEEE802154) {
-		dev_put(real_dev);
-		return -EINVAL;
-	}
-
-	lowpan_dev_info(dev)->real_dev = real_dev;
-	mutex_init(&lowpan_dev_info(dev)->dev_list_mtx);
-
-	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
-	if (!entry) {
-		dev_put(real_dev);
-		lowpan_dev_info(dev)->real_dev = NULL;
-		return -ENOMEM;
-	}
-
-	entry->ldev = dev;
-
-	/* Set the lowpan hardware address to the wpan hardware address. */
-	memcpy(dev->dev_addr, real_dev->dev_addr, IEEE802154_ADDR_LEN);
-
-	mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx);
-	INIT_LIST_HEAD(&entry->list);
-	list_add_tail(&entry->list, &lowpan_devices);
-	mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx);
-
-	ret = register_netdevice(dev);
-	if (ret >= 0) {
-		if (!lowpan_open_count)
-			lowpan_rx_init();
-		lowpan_open_count++;
-	}
-
-	return ret;
-}
-
-static void lowpan_dellink(struct net_device *dev, struct list_head *head)
-{
-	struct lowpan_dev_info *lowpan_dev = lowpan_dev_info(dev);
-	struct net_device *real_dev = lowpan_dev->real_dev;
-	struct lowpan_dev_record *entry, *tmp;
-
-	ASSERT_RTNL();
-
-	lowpan_open_count--;
-	if (!lowpan_open_count)
-		lowpan_rx_exit();
-
-	mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx);
-	list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) {
-		if (entry->ldev == dev) {
-			list_del(&entry->list);
-			kfree(entry);
-		}
-	}
-	mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx);
-
-	mutex_destroy(&lowpan_dev_info(dev)->dev_list_mtx);
-
-	unregister_netdevice_queue(dev, head);
-
-	dev_put(real_dev);
-}
-
-static struct rtnl_link_ops lowpan_link_ops __read_mostly = {
-	.kind		= "lowpan",
-	.priv_size	= sizeof(struct lowpan_dev_info),
-	.setup		= lowpan_setup,
-	.newlink	= lowpan_newlink,
-	.dellink	= lowpan_dellink,
-	.validate	= lowpan_validate,
-};
-
-static inline int __init lowpan_netlink_init(void)
-{
-	return rtnl_link_register(&lowpan_link_ops);
-}
-
-static inline void lowpan_netlink_fini(void)
-{
-	rtnl_link_unregister(&lowpan_link_ops);
-}
-
-static int lowpan_device_event(struct notifier_block *unused,
-			       unsigned long event, void *ptr)
-{
-	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-	LIST_HEAD(del_list);
-	struct lowpan_dev_record *entry, *tmp;
-
-	if (dev->type != ARPHRD_IEEE802154)
-		goto out;
-
-	if (event == NETDEV_UNREGISTER) {
-		list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) {
-			if (lowpan_dev_info(entry->ldev)->real_dev == dev)
-				lowpan_dellink(entry->ldev, &del_list);
-		}
-
-		unregister_netdevice_many(&del_list);
-	}
-
-out:
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block lowpan_dev_notifier = {
-	.notifier_call = lowpan_device_event,
-};
-
-static int __init lowpan_init_module(void)
-{
-	int err = 0;
-
-	err = lowpan_net_frag_init();
-	if (err < 0)
-		goto out;
-
-	err = lowpan_netlink_init();
-	if (err < 0)
-		goto out_frag;
-
-	err = register_netdevice_notifier(&lowpan_dev_notifier);
-	if (err < 0)
-		goto out_pack;
-
-	return 0;
-
-out_pack:
-	lowpan_netlink_fini();
-out_frag:
-	lowpan_net_frag_exit();
-out:
-	return err;
-}
-
-static void __exit lowpan_cleanup_module(void)
-{
-	lowpan_netlink_fini();
-
-	lowpan_net_frag_exit();
-
-	unregister_netdevice_notifier(&lowpan_dev_notifier);
-}
-
-module_init(lowpan_init_module);
-module_exit(lowpan_cleanup_module);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_RTNL_LINK("lowpan");
diff --git a/net/ieee802154/6lowpan/Makefile b/net/ieee802154/6lowpan/Makefile
index 7f11c136bcfe..2f1b75a49f4d 100644
--- a/net/ieee802154/6lowpan/Makefile
+++ b/net/ieee802154/6lowpan/Makefile
@@ -1,3 +1,3 @@
 obj-y += ieee802154_6lowpan.o
 
-ieee802154_6lowpan-y := 6lowpan_rtnl.o rx.o reassembly.o tx.o
+ieee802154_6lowpan-y := core.o rx.o reassembly.o tx.o
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
new file mode 100644
index 000000000000..055fbb71ba6f
--- /dev/null
+++ b/net/ieee802154/6lowpan/core.c
@@ -0,0 +1,304 @@
+/* Copyright 2011, Siemens AG
+ * written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
+ */
+
+/* Based on patches from Jon Smirl <jonsmirl@gmail.com>
+ * Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/* Jon's code is based on 6lowpan implementation for Contiki which is:
+ * Copyright (c) 2008, Swedish Institute of Computer Science.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the Institute nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/ieee802154.h>
+
+#include <net/ipv6.h>
+
+#include "6lowpan_i.h"
+
+LIST_HEAD(lowpan_devices);
+static int lowpan_open_count;
+
+static __le16 lowpan_get_pan_id(const struct net_device *dev)
+{
+	struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
+
+	return ieee802154_mlme_ops(real_dev)->get_pan_id(real_dev);
+}
+
+static __le16 lowpan_get_short_addr(const struct net_device *dev)
+{
+	struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
+
+	return ieee802154_mlme_ops(real_dev)->get_short_addr(real_dev);
+}
+
+static u8 lowpan_get_dsn(const struct net_device *dev)
+{
+	struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
+
+	return ieee802154_mlme_ops(real_dev)->get_dsn(real_dev);
+}
+
+static struct header_ops lowpan_header_ops = {
+	.create	= lowpan_header_create,
+};
+
+static struct lock_class_key lowpan_tx_busylock;
+static struct lock_class_key lowpan_netdev_xmit_lock_key;
+
+static void lowpan_set_lockdep_class_one(struct net_device *dev,
+					 struct netdev_queue *txq,
+					 void *_unused)
+{
+	lockdep_set_class(&txq->_xmit_lock,
+			  &lowpan_netdev_xmit_lock_key);
+}
+
+static int lowpan_dev_init(struct net_device *dev)
+{
+	netdev_for_each_tx_queue(dev, lowpan_set_lockdep_class_one, NULL);
+	dev->qdisc_tx_busylock = &lowpan_tx_busylock;
+	return 0;
+}
+
+static const struct net_device_ops lowpan_netdev_ops = {
+	.ndo_init		= lowpan_dev_init,
+	.ndo_start_xmit		= lowpan_xmit,
+};
+
+static struct ieee802154_mlme_ops lowpan_mlme = {
+	.get_pan_id = lowpan_get_pan_id,
+	.get_short_addr = lowpan_get_short_addr,
+	.get_dsn = lowpan_get_dsn,
+};
+
+static void lowpan_setup(struct net_device *dev)
+{
+	dev->addr_len		= IEEE802154_ADDR_LEN;
+	memset(dev->broadcast, 0xff, IEEE802154_ADDR_LEN);
+	dev->type		= ARPHRD_IEEE802154;
+	/* Frame Control + Sequence Number + Address fields + Security Header */
+	dev->hard_header_len	= 2 + 1 + 20 + 14;
+	dev->needed_tailroom	= 2; /* FCS */
+	dev->mtu		= IPV6_MIN_MTU;
+	dev->tx_queue_len	= 0;
+	dev->flags		= IFF_BROADCAST | IFF_MULTICAST;
+	dev->watchdog_timeo	= 0;
+
+	dev->netdev_ops		= &lowpan_netdev_ops;
+	dev->header_ops		= &lowpan_header_ops;
+	dev->ml_priv		= &lowpan_mlme;
+	dev->destructor		= free_netdev;
+}
+
+static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+	if (tb[IFLA_ADDRESS]) {
+		if (nla_len(tb[IFLA_ADDRESS]) != IEEE802154_ADDR_LEN)
+			return -EINVAL;
+	}
+	return 0;
+}
+
+static int lowpan_newlink(struct net *src_net, struct net_device *dev,
+			  struct nlattr *tb[], struct nlattr *data[])
+{
+	struct net_device *real_dev;
+	struct lowpan_dev_record *entry;
+	int ret;
+
+	ASSERT_RTNL();
+
+	pr_debug("adding new link\n");
+
+	if (!tb[IFLA_LINK])
+		return -EINVAL;
+	/* find and hold real wpan device */
+	real_dev = dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
+	if (!real_dev)
+		return -ENODEV;
+	if (real_dev->type != ARPHRD_IEEE802154) {
+		dev_put(real_dev);
+		return -EINVAL;
+	}
+
+	lowpan_dev_info(dev)->real_dev = real_dev;
+	mutex_init(&lowpan_dev_info(dev)->dev_list_mtx);
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry) {
+		dev_put(real_dev);
+		lowpan_dev_info(dev)->real_dev = NULL;
+		return -ENOMEM;
+	}
+
+	entry->ldev = dev;
+
+	/* Set the lowpan hardware address to the wpan hardware address. */
+	memcpy(dev->dev_addr, real_dev->dev_addr, IEEE802154_ADDR_LEN);
+
+	mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx);
+	INIT_LIST_HEAD(&entry->list);
+	list_add_tail(&entry->list, &lowpan_devices);
+	mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx);
+
+	ret = register_netdevice(dev);
+	if (ret >= 0) {
+		if (!lowpan_open_count)
+			lowpan_rx_init();
+		lowpan_open_count++;
+	}
+
+	return ret;
+}
+
+static void lowpan_dellink(struct net_device *dev, struct list_head *head)
+{
+	struct lowpan_dev_info *lowpan_dev = lowpan_dev_info(dev);
+	struct net_device *real_dev = lowpan_dev->real_dev;
+	struct lowpan_dev_record *entry, *tmp;
+
+	ASSERT_RTNL();
+
+	lowpan_open_count--;
+	if (!lowpan_open_count)
+		lowpan_rx_exit();
+
+	mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx);
+	list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) {
+		if (entry->ldev == dev) {
+			list_del(&entry->list);
+			kfree(entry);
+		}
+	}
+	mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx);
+
+	mutex_destroy(&lowpan_dev_info(dev)->dev_list_mtx);
+
+	unregister_netdevice_queue(dev, head);
+
+	dev_put(real_dev);
+}
+
+static struct rtnl_link_ops lowpan_link_ops __read_mostly = {
+	.kind		= "lowpan",
+	.priv_size	= sizeof(struct lowpan_dev_info),
+	.setup		= lowpan_setup,
+	.newlink	= lowpan_newlink,
+	.dellink	= lowpan_dellink,
+	.validate	= lowpan_validate,
+};
+
+static inline int __init lowpan_netlink_init(void)
+{
+	return rtnl_link_register(&lowpan_link_ops);
+}
+
+static inline void lowpan_netlink_fini(void)
+{
+	rtnl_link_unregister(&lowpan_link_ops);
+}
+
+static int lowpan_device_event(struct notifier_block *unused,
+			       unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	LIST_HEAD(del_list);
+	struct lowpan_dev_record *entry, *tmp;
+
+	if (dev->type != ARPHRD_IEEE802154)
+		goto out;
+
+	if (event == NETDEV_UNREGISTER) {
+		list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) {
+			if (lowpan_dev_info(entry->ldev)->real_dev == dev)
+				lowpan_dellink(entry->ldev, &del_list);
+		}
+
+		unregister_netdevice_many(&del_list);
+	}
+
+out:
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block lowpan_dev_notifier = {
+	.notifier_call = lowpan_device_event,
+};
+
+static int __init lowpan_init_module(void)
+{
+	int err = 0;
+
+	err = lowpan_net_frag_init();
+	if (err < 0)
+		goto out;
+
+	err = lowpan_netlink_init();
+	if (err < 0)
+		goto out_frag;
+
+	err = register_netdevice_notifier(&lowpan_dev_notifier);
+	if (err < 0)
+		goto out_pack;
+
+	return 0;
+
+out_pack:
+	lowpan_netlink_fini();
+out_frag:
+	lowpan_net_frag_exit();
+out:
+	return err;
+}
+
+static void __exit lowpan_cleanup_module(void)
+{
+	lowpan_netlink_fini();
+
+	lowpan_net_frag_exit();
+
+	unregister_netdevice_notifier(&lowpan_dev_notifier);
+}
+
+module_init(lowpan_init_module);
+module_exit(lowpan_cleanup_module);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("lowpan");
-- 
cgit v1.2.3


From 866476f323465a8afef10b14b48d5136bf5c51fe Mon Sep 17 00:00:00 2001
From: Kristian Evensen <kristian.evensen@gmail.com>
Date: Wed, 24 Dec 2014 09:57:10 +0100
Subject: netfilter: conntrack: Flush connections with a given mark

This patch adds support for selective flushing of conntrack mappings.
By adding CTA_MARK and CTA_MARK_MASK to a delete-message, the mark (and
mask) is checked before a connection is deleted while flushing.

Configuring the flush is moved out of ctnetlink_del_conntrack(), and
instead of calling nf_conntrack_flush_report(), we always call
nf_ct_iterate_cleanup().  This enables us to only make one call from the
new ctnetlink_flush_conntrack() and makes it easy to add more filter
parameters.

Filtering is done in the ctnetlink_filter_match()-function, which is
also called from ctnetlink_dump_table(). ctnetlink_dump_filter has been
renamed ctnetlink_filter, to indicated that it is no longer only used
when dumping conntrack entries.

Moreover, reject mark filters with -EOPNOTSUPP if no ct mark support is
available.

Signed-off-by: Kristian Evensen <kristian.evensen@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_netlink.c | 89 ++++++++++++++++++++++++++----------
 1 file changed, 64 insertions(+), 25 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 1bd9ed9e62f6..d1c23940a86a 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -749,13 +749,47 @@ static int ctnetlink_done(struct netlink_callback *cb)
 	return 0;
 }
 
-struct ctnetlink_dump_filter {
+struct ctnetlink_filter {
 	struct {
 		u_int32_t val;
 		u_int32_t mask;
 	} mark;
 };
 
+static struct ctnetlink_filter *
+ctnetlink_alloc_filter(const struct nlattr * const cda[])
+{
+#ifdef CONFIG_NF_CONNTRACK_MARK
+	struct ctnetlink_filter *filter;
+
+	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+	if (filter == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	filter->mark.val = ntohl(nla_get_be32(cda[CTA_MARK]));
+	filter->mark.mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
+
+	return filter;
+#else
+	return ERR_PTR(-EOPNOTSUPP);
+#endif
+}
+
+static int ctnetlink_filter_match(struct nf_conn *ct, void *data)
+{
+	struct ctnetlink_filter *filter = data;
+
+	if (filter == NULL)
+		return 1;
+
+#ifdef CONFIG_NF_CONNTRACK_MARK
+	if ((ct->mark & filter->mark.mask) == filter->mark.val)
+		return 1;
+#endif
+
+	return 0;
+}
+
 static int
 ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 {
@@ -768,10 +802,6 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 	int res;
 	spinlock_t *lockp;
 
-#ifdef CONFIG_NF_CONNTRACK_MARK
-	const struct ctnetlink_dump_filter *filter = cb->data;
-#endif
-
 	last = (struct nf_conn *)cb->args[1];
 
 	local_bh_disable();
@@ -798,12 +828,9 @@ restart:
 					continue;
 				cb->args[1] = 0;
 			}
-#ifdef CONFIG_NF_CONNTRACK_MARK
-			if (filter && !((ct->mark & filter->mark.mask) ==
-					filter->mark.val)) {
+			if (!ctnetlink_filter_match(ct, cb->data))
 				continue;
-			}
-#endif
+
 			rcu_read_lock();
 			res =
 			ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
@@ -1001,6 +1028,25 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
 				    .len = NF_CT_LABELS_MAX_SIZE },
 };
 
+static int ctnetlink_flush_conntrack(struct net *net,
+				     const struct nlattr * const cda[],
+				     u32 portid, int report)
+{
+	struct ctnetlink_filter *filter = NULL;
+
+	if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) {
+		filter = ctnetlink_alloc_filter(cda);
+		if (IS_ERR(filter))
+			return PTR_ERR(filter);
+	}
+
+	nf_ct_iterate_cleanup(net, ctnetlink_filter_match, filter,
+			      portid, report);
+	kfree(filter);
+
+	return 0;
+}
+
 static int
 ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 			const struct nlmsghdr *nlh,
@@ -1024,11 +1070,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	else if (cda[CTA_TUPLE_REPLY])
 		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3);
 	else {
-		/* Flush the whole table */
-		nf_conntrack_flush_report(net,
-					 NETLINK_CB(skb).portid,
-					 nlmsg_report(nlh));
-		return 0;
+		return ctnetlink_flush_conntrack(net, cda,
+						 NETLINK_CB(skb).portid,
+						 nlmsg_report(nlh));
 	}
 
 	if (err < 0)
@@ -1076,21 +1120,16 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 			.dump = ctnetlink_dump_table,
 			.done = ctnetlink_done,
 		};
-#ifdef CONFIG_NF_CONNTRACK_MARK
+
 		if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) {
-			struct ctnetlink_dump_filter *filter;
+			struct ctnetlink_filter *filter;
 
-			filter = kzalloc(sizeof(struct ctnetlink_dump_filter),
-					 GFP_ATOMIC);
-			if (filter == NULL)
-				return -ENOMEM;
+			filter = ctnetlink_alloc_filter(cda);
+			if (IS_ERR(filter))
+				return PTR_ERR(filter);
 
-			filter->mark.val = ntohl(nla_get_be32(cda[CTA_MARK]));
-			filter->mark.mask =
-				ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
 			c.data = filter;
 		}
-#endif
 		return netlink_dump_start(ctnl, skb, nlh, &c);
 	}
 
-- 
cgit v1.2.3


From ae406bd0572be97a46d72e8a5e97c33c3168388c Mon Sep 17 00:00:00 2001
From: Kristian Evensen <kristian.evensen@gmail.com>
Date: Wed, 24 Dec 2014 09:57:11 +0100
Subject: netfilter: conntrack: Remove nf_ct_conntrack_flush_report

The only user of nf_ct_conntrack_flush_report() was ctnetlink_del_conntrack().
After adding support for flushing connections with a given mark, this function
is no longer called.

Signed-off-by: Kristian Evensen <kristian.evensen@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h | 2 --
 net/netfilter/nf_conntrack_core.c    | 6 ------
 2 files changed, 8 deletions(-)

(limited to 'net')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index f0daed2b54d1..74f271a172dd 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -191,8 +191,6 @@ __nf_conntrack_find(struct net *net, u16 zone,
 int nf_conntrack_hash_check_insert(struct nf_conn *ct);
 bool nf_ct_delete(struct nf_conn *ct, u32 pid, int report);
 
-void nf_conntrack_flush_report(struct net *net, u32 portid, int report);
-
 bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
 		       u_int16_t l3num, struct nf_conntrack_tuple *tuple);
 bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index da58cd4f2cb7..5a6990bc5a5b 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1426,12 +1426,6 @@ void nf_ct_free_hashtable(void *hash, unsigned int size)
 }
 EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
 
-void nf_conntrack_flush_report(struct net *net, u32 portid, int report)
-{
-	nf_ct_iterate_cleanup(net, kill_all, NULL, portid, report);
-}
-EXPORT_SYMBOL_GPL(nf_conntrack_flush_report);
-
 static int untrack_refs(void)
 {
 	int cnt = 0, cpu;
-- 
cgit v1.2.3


From 4ed20bebf51578229a1986efcf46344075ec8447 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 14 Nov 2014 16:35:34 +0100
Subject: cfg80211: remove "channel" from survey names

All of the survey data is (currently) per channel anyway,
so having the word "channel" in the name does nothing. In
the next patch I'll introduce global data to the survey,
where the word "channel" is actually confusing.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath10k/wmi.c         |  8 ++---
 drivers/net/wireless/ath/ath5k/mac80211-ops.c | 16 +++++-----
 drivers/net/wireless/ath/ath9k/link.c         | 16 +++++-----
 drivers/net/wireless/ath/carl9170/cmd.c       | 12 ++++----
 drivers/net/wireless/ath/carl9170/main.c      |  6 ++--
 drivers/net/wireless/mwifiex/cfg80211.c       |  7 +++--
 drivers/net/wireless/mwl8k.c                  | 12 ++++----
 drivers/net/wireless/p54/eeprom.c             |  6 ++--
 drivers/net/wireless/p54/main.c               |  8 ++---
 drivers/net/wireless/p54/txrx.c               | 12 ++++----
 drivers/net/wireless/rt2x00/rt2800lib.c       | 12 ++++----
 include/net/cfg80211.h                        | 44 +++++++++++++--------------
 include/uapi/linux/nl80211.h                  | 27 ++++++++++------
 net/mac80211/ethtool.c                        | 20 ++++++------
 net/wireless/nl80211.c                        | 30 +++++++++---------
 net/wireless/trace.h                          | 26 ++++++++--------
 16 files changed, 135 insertions(+), 127 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c
index c0f3e4d09263..721631c3dd3e 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.c
+++ b/drivers/net/wireless/ath/ath10k/wmi.c
@@ -1344,11 +1344,11 @@ static void ath10k_wmi_event_chan_info(struct ath10k *ar, struct sk_buff *skb)
 		rx_clear_count -= ar->survey_last_rx_clear_count;
 
 		survey = &ar->survey[idx];
-		survey->channel_time = WMI_CHAN_INFO_MSEC(cycle_count);
-		survey->channel_time_rx = WMI_CHAN_INFO_MSEC(rx_clear_count);
+		survey->time = WMI_CHAN_INFO_MSEC(cycle_count);
+		survey->time_rx = WMI_CHAN_INFO_MSEC(rx_clear_count);
 		survey->noise = noise_floor;
-		survey->filled = SURVEY_INFO_CHANNEL_TIME |
-				 SURVEY_INFO_CHANNEL_TIME_RX |
+		survey->filled = SURVEY_INFO_TIME |
+				 SURVEY_INFO_TIME_RX |
 				 SURVEY_INFO_NOISE_DBM;
 	}
 
diff --git a/drivers/net/wireless/ath/ath5k/mac80211-ops.c b/drivers/net/wireless/ath/ath5k/mac80211-ops.c
index 19eab2a69ad5..3b4a6463d87a 100644
--- a/drivers/net/wireless/ath/ath5k/mac80211-ops.c
+++ b/drivers/net/wireless/ath/ath5k/mac80211-ops.c
@@ -672,10 +672,10 @@ ath5k_get_survey(struct ieee80211_hw *hw, int idx, struct survey_info *survey)
 	spin_lock_bh(&common->cc_lock);
 	ath_hw_cycle_counters_update(common);
 	if (cc->cycles > 0) {
-		ah->survey.channel_time += cc->cycles / div;
-		ah->survey.channel_time_busy += cc->rx_busy / div;
-		ah->survey.channel_time_rx += cc->rx_frame / div;
-		ah->survey.channel_time_tx += cc->tx_frame / div;
+		ah->survey.time += cc->cycles / div;
+		ah->survey.time_busy += cc->rx_busy / div;
+		ah->survey.time_rx += cc->rx_frame / div;
+		ah->survey.time_tx += cc->tx_frame / div;
 	}
 	memset(cc, 0, sizeof(*cc));
 	spin_unlock_bh(&common->cc_lock);
@@ -686,10 +686,10 @@ ath5k_get_survey(struct ieee80211_hw *hw, int idx, struct survey_info *survey)
 	survey->noise = ah->ah_noise_floor;
 	survey->filled = SURVEY_INFO_NOISE_DBM |
 			SURVEY_INFO_IN_USE |
-			SURVEY_INFO_CHANNEL_TIME |
-			SURVEY_INFO_CHANNEL_TIME_BUSY |
-			SURVEY_INFO_CHANNEL_TIME_RX |
-			SURVEY_INFO_CHANNEL_TIME_TX;
+			SURVEY_INFO_TIME |
+			SURVEY_INFO_TIME_BUSY |
+			SURVEY_INFO_TIME_RX |
+			SURVEY_INFO_TIME_TX;
 
 	return 0;
 }
diff --git a/drivers/net/wireless/ath/ath9k/link.c b/drivers/net/wireless/ath/ath9k/link.c
index b829263e3d0a..90631d768a60 100644
--- a/drivers/net/wireless/ath/ath9k/link.c
+++ b/drivers/net/wireless/ath/ath9k/link.c
@@ -516,14 +516,14 @@ int ath_update_survey_stats(struct ath_softc *sc)
 		ath_hw_cycle_counters_update(common);
 
 	if (cc->cycles > 0) {
-		survey->filled |= SURVEY_INFO_CHANNEL_TIME |
-			SURVEY_INFO_CHANNEL_TIME_BUSY |
-			SURVEY_INFO_CHANNEL_TIME_RX |
-			SURVEY_INFO_CHANNEL_TIME_TX;
-		survey->channel_time += cc->cycles / div;
-		survey->channel_time_busy += cc->rx_busy / div;
-		survey->channel_time_rx += cc->rx_frame / div;
-		survey->channel_time_tx += cc->tx_frame / div;
+		survey->filled |= SURVEY_INFO_TIME |
+			SURVEY_INFO_TIME_BUSY |
+			SURVEY_INFO_TIME_RX |
+			SURVEY_INFO_TIME_TX;
+		survey->time += cc->cycles / div;
+		survey->time_busy += cc->rx_busy / div;
+		survey->time_rx += cc->rx_frame / div;
+		survey->time_tx += cc->tx_frame / div;
 	}
 
 	if (cc->cycles < div)
diff --git a/drivers/net/wireless/ath/carl9170/cmd.c b/drivers/net/wireless/ath/carl9170/cmd.c
index 39a63874b275..f2b4f537e4c1 100644
--- a/drivers/net/wireless/ath/carl9170/cmd.c
+++ b/drivers/net/wireless/ath/carl9170/cmd.c
@@ -188,12 +188,12 @@ int carl9170_collect_tally(struct ar9170 *ar)
 
 		if (ar->channel) {
 			info = &ar->survey[ar->channel->hw_value];
-			info->channel_time = ar->tally.active;
-			info->channel_time_busy = ar->tally.cca;
-			info->channel_time_tx = ar->tally.tx_time;
-			do_div(info->channel_time, 1000);
-			do_div(info->channel_time_busy, 1000);
-			do_div(info->channel_time_tx, 1000);
+			info->time = ar->tally.active;
+			info->time_busy = ar->tally.cca;
+			info->time_tx = ar->tally.tx_time;
+			do_div(info->time, 1000);
+			do_div(info->time_busy, 1000);
+			do_div(info->time_tx, 1000);
 		}
 	}
 	return 0;
diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c
index ef5b6dc7b7f1..f1455a04cb62 100644
--- a/drivers/net/wireless/ath/carl9170/main.c
+++ b/drivers/net/wireless/ath/carl9170/main.c
@@ -1690,9 +1690,9 @@ found:
 		survey->filled |= SURVEY_INFO_IN_USE;
 
 	if (ar->fw.hw_counters) {
-		survey->filled |= SURVEY_INFO_CHANNEL_TIME |
-				  SURVEY_INFO_CHANNEL_TIME_BUSY |
-				  SURVEY_INFO_CHANNEL_TIME_TX;
+		survey->filled |= SURVEY_INFO_TIME |
+				  SURVEY_INFO_TIME_BUSY |
+				  SURVEY_INFO_TIME_TX;
 	}
 
 	return 0;
diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c
index 4a66a6555366..8bd446b69658 100644
--- a/drivers/net/wireless/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/mwifiex/cfg80211.c
@@ -1037,10 +1037,11 @@ mwifiex_cfg80211_dump_survey(struct wiphy *wiphy, struct net_device *dev,
 	survey->channel = ieee80211_get_channel(wiphy,
 	    ieee80211_channel_to_frequency(pchan_stats[idx].chan_num, band));
 	survey->filled = SURVEY_INFO_NOISE_DBM |
-		SURVEY_INFO_CHANNEL_TIME | SURVEY_INFO_CHANNEL_TIME_BUSY;
+			 SURVEY_INFO_TIME |
+			 SURVEY_INFO_TIME_BUSY;
 	survey->noise = pchan_stats[idx].noise;
-	survey->channel_time = pchan_stats[idx].cca_scan_dur;
-	survey->channel_time_busy = pchan_stats[idx].cca_busy_dur;
+	survey->time = pchan_stats[idx].cca_scan_dur;
+	survey->time_busy = pchan_stats[idx].cca_busy_dur;
 
 	return 0;
 }
diff --git a/drivers/net/wireless/mwl8k.c b/drivers/net/wireless/mwl8k.c
index b8d1e04aa9b9..f9b1218c761a 100644
--- a/drivers/net/wireless/mwl8k.c
+++ b/drivers/net/wireless/mwl8k.c
@@ -3098,14 +3098,14 @@ static void mwl8k_update_survey(struct mwl8k_priv *priv,
 
 	cca_cnt = ioread32(priv->regs + NOK_CCA_CNT_REG);
 	cca_cnt /= 1000; /* uSecs to mSecs */
-	survey->channel_time_busy = (u64) cca_cnt;
+	survey->time_busy = (u64) cca_cnt;
 
 	rx_rdy = ioread32(priv->regs + BBU_RXRDY_CNT_REG);
 	rx_rdy /= 1000; /* uSecs to mSecs */
-	survey->channel_time_rx = (u64) rx_rdy;
+	survey->time_rx = (u64) rx_rdy;
 
 	priv->channel_time = jiffies - priv->channel_time;
-	survey->channel_time = jiffies_to_msecs(priv->channel_time);
+	survey->time = jiffies_to_msecs(priv->channel_time);
 
 	survey->channel = channel;
 
@@ -3115,9 +3115,9 @@ static void mwl8k_update_survey(struct mwl8k_priv *priv,
 	survey->noise = nf * -1;
 
 	survey->filled = SURVEY_INFO_NOISE_DBM |
-			 SURVEY_INFO_CHANNEL_TIME |
-			 SURVEY_INFO_CHANNEL_TIME_BUSY |
-			 SURVEY_INFO_CHANNEL_TIME_RX;
+			 SURVEY_INFO_TIME |
+			 SURVEY_INFO_TIME_BUSY |
+			 SURVEY_INFO_TIME_RX;
 }
 
 /*
diff --git a/drivers/net/wireless/p54/eeprom.c b/drivers/net/wireless/p54/eeprom.c
index 0fe67d2da208..2fe713eda7ad 100644
--- a/drivers/net/wireless/p54/eeprom.c
+++ b/drivers/net/wireless/p54/eeprom.c
@@ -196,9 +196,9 @@ static int p54_generate_band(struct ieee80211_hw *dev,
 		dest->max_power = chan->max_power;
 		priv->survey[*chan_num].channel = &tmp->channels[j];
 		priv->survey[*chan_num].filled = SURVEY_INFO_NOISE_DBM |
-			SURVEY_INFO_CHANNEL_TIME |
-			SURVEY_INFO_CHANNEL_TIME_BUSY |
-			SURVEY_INFO_CHANNEL_TIME_TX;
+			SURVEY_INFO_TIME |
+			SURVEY_INFO_TIME_BUSY |
+			SURVEY_INFO_TIME_TX;
 		dest->hw_value = (*chan_num);
 		j++;
 		(*chan_num)++;
diff --git a/drivers/net/wireless/p54/main.c b/drivers/net/wireless/p54/main.c
index 13a30c4a27f2..b9250d75d253 100644
--- a/drivers/net/wireless/p54/main.c
+++ b/drivers/net/wireless/p54/main.c
@@ -305,9 +305,9 @@ static void p54_reset_stats(struct p54_common *priv)
 		struct survey_info *info = &priv->survey[chan->hw_value];
 
 		/* only reset channel statistics, don't touch .filled, etc. */
-		info->channel_time = 0;
-		info->channel_time_busy = 0;
-		info->channel_time_tx = 0;
+		info->time = 0;
+		info->time_busy = 0;
+		info->time_tx = 0;
 	}
 
 	priv->update_stats = true;
@@ -636,7 +636,7 @@ static int p54_get_survey(struct ieee80211_hw *dev, int idx,
 
 		if (in_use) {
 			/* test if the reported statistics are valid. */
-			if  (survey->channel_time != 0) {
+			if  (survey->time != 0) {
 				survey->filled |= SURVEY_INFO_IN_USE;
 			} else {
 				/*
diff --git a/drivers/net/wireless/p54/txrx.c b/drivers/net/wireless/p54/txrx.c
index 153c61539ec8..24e5ff9a9272 100644
--- a/drivers/net/wireless/p54/txrx.c
+++ b/drivers/net/wireless/p54/txrx.c
@@ -587,13 +587,13 @@ static void p54_rx_stats(struct p54_common *priv, struct sk_buff *skb)
 	if (chan) {
 		struct survey_info *survey = &priv->survey[chan->hw_value];
 		survey->noise = clamp(priv->noise, -128, 127);
-		survey->channel_time = priv->survey_raw.active;
-		survey->channel_time_tx = priv->survey_raw.tx;
-		survey->channel_time_busy = priv->survey_raw.tx +
+		survey->time = priv->survey_raw.active;
+		survey->time_tx = priv->survey_raw.tx;
+		survey->time_busy = priv->survey_raw.tx +
 			priv->survey_raw.cca;
-		do_div(survey->channel_time, 1024);
-		do_div(survey->channel_time_tx, 1024);
-		do_div(survey->channel_time_busy, 1024);
+		do_div(survey->time, 1024);
+		do_div(survey->time_tx, 1024);
+		do_div(survey->time_busy, 1024);
 	}
 
 	tmp = p54_find_and_unlink_skb(priv, hdr->req_id);
diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c
index 81ee481487cf..be2d54f257b1 100644
--- a/drivers/net/wireless/rt2x00/rt2800lib.c
+++ b/drivers/net/wireless/rt2x00/rt2800lib.c
@@ -8020,13 +8020,13 @@ int rt2800_get_survey(struct ieee80211_hw *hw, int idx,
 	rt2800_register_read(rt2x00dev, CH_BUSY_STA_SEC, &busy_ext);
 
 	if (idle || busy) {
-		survey->filled = SURVEY_INFO_CHANNEL_TIME |
-				 SURVEY_INFO_CHANNEL_TIME_BUSY |
-				 SURVEY_INFO_CHANNEL_TIME_EXT_BUSY;
+		survey->filled = SURVEY_INFO_TIME |
+				 SURVEY_INFO_TIME_BUSY |
+				 SURVEY_INFO_TIME_EXT_BUSY;
 
-		survey->channel_time = (idle + busy) / 1000;
-		survey->channel_time_busy = busy / 1000;
-		survey->channel_time_ext_busy = busy_ext / 1000;
+		survey->time = (idle + busy) / 1000;
+		survey->time_busy = busy / 1000;
+		survey->time_ext_busy = busy_ext / 1000;
 	}
 
 	if (!(hw->conf.flags & IEEE80211_CONF_OFFCHANNEL))
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f38645fb83b9..3b489f8fc4cd 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -520,23 +520,23 @@ ieee80211_chandef_max_power(struct cfg80211_chan_def *chandef)
  *
  * @SURVEY_INFO_NOISE_DBM: noise (in dBm) was filled in
  * @SURVEY_INFO_IN_USE: channel is currently being used
- * @SURVEY_INFO_CHANNEL_TIME: channel active time (in ms) was filled in
- * @SURVEY_INFO_CHANNEL_TIME_BUSY: channel busy time was filled in
- * @SURVEY_INFO_CHANNEL_TIME_EXT_BUSY: extension channel busy time was filled in
- * @SURVEY_INFO_CHANNEL_TIME_RX: channel receive time was filled in
- * @SURVEY_INFO_CHANNEL_TIME_TX: channel transmit time was filled in
+ * @SURVEY_INFO_TIME: active time (in ms) was filled in
+ * @SURVEY_INFO_TIME_BUSY: busy time was filled in
+ * @SURVEY_INFO_TIME_EXT_BUSY: extension channel busy time was filled in
+ * @SURVEY_INFO_TIME_RX: receive time was filled in
+ * @SURVEY_INFO_TIME_TX: transmit time was filled in
  *
  * Used by the driver to indicate which info in &struct survey_info
  * it has filled in during the get_survey().
  */
 enum survey_info_flags {
-	SURVEY_INFO_NOISE_DBM = 1<<0,
-	SURVEY_INFO_IN_USE = 1<<1,
-	SURVEY_INFO_CHANNEL_TIME = 1<<2,
-	SURVEY_INFO_CHANNEL_TIME_BUSY = 1<<3,
-	SURVEY_INFO_CHANNEL_TIME_EXT_BUSY = 1<<4,
-	SURVEY_INFO_CHANNEL_TIME_RX = 1<<5,
-	SURVEY_INFO_CHANNEL_TIME_TX = 1<<6,
+	SURVEY_INFO_NOISE_DBM		= BIT(0),
+	SURVEY_INFO_IN_USE		= BIT(1),
+	SURVEY_INFO_TIME		= BIT(2),
+	SURVEY_INFO_TIME_BUSY		= BIT(3),
+	SURVEY_INFO_TIME_EXT_BUSY	= BIT(4),
+	SURVEY_INFO_TIME_RX		= BIT(5),
+	SURVEY_INFO_TIME_TX		= BIT(6),
 };
 
 /**
@@ -546,11 +546,11 @@ enum survey_info_flags {
  * @filled: bitflag of flags from &enum survey_info_flags
  * @noise: channel noise in dBm. This and all following fields are
  *	optional
- * @channel_time: amount of time in ms the radio spent on the channel
- * @channel_time_busy: amount of time the primary channel was sensed busy
- * @channel_time_ext_busy: amount of time the extension channel was sensed busy
- * @channel_time_rx: amount of time the radio spent receiving data
- * @channel_time_tx: amount of time the radio spent transmitting data
+ * @time: amount of time in ms the radio was turn on (on the channel)
+ * @time_busy: amount of time the primary channel was sensed busy
+ * @time_ext_busy: amount of time the extension channel was sensed busy
+ * @time_rx: amount of time the radio spent receiving data
+ * @time_tx: amount of time the radio spent transmitting data
  *
  * Used by dump_survey() to report back per-channel survey information.
  *
@@ -559,11 +559,11 @@ enum survey_info_flags {
  */
 struct survey_info {
 	struct ieee80211_channel *channel;
-	u64 channel_time;
-	u64 channel_time_busy;
-	u64 channel_time_ext_busy;
-	u64 channel_time_rx;
-	u64 channel_time_tx;
+	u64 time;
+	u64 time_busy;
+	u64 time_ext_busy;
+	u64 time_rx;
+	u64 time_tx;
 	u32 filled;
 	s8 noise;
 };
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 7ba9404b290d..1a5acc80ab88 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2815,15 +2815,15 @@ enum nl80211_user_reg_hint_type {
  * @NL80211_SURVEY_INFO_FREQUENCY: center frequency of channel
  * @NL80211_SURVEY_INFO_NOISE: noise level of channel (u8, dBm)
  * @NL80211_SURVEY_INFO_IN_USE: channel is currently being used
- * @NL80211_SURVEY_INFO_CHANNEL_TIME: amount of time (in ms) that the radio
+ * @NL80211_SURVEY_INFO_TIME: amount of time (in ms) that the radio
  *	spent on this channel
- * @NL80211_SURVEY_INFO_CHANNEL_TIME_BUSY: amount of the time the primary
+ * @NL80211_SURVEY_INFO_TIME_BUSY: amount of the time the primary
  *	channel was sensed busy (either due to activity or energy detect)
- * @NL80211_SURVEY_INFO_CHANNEL_TIME_EXT_BUSY: amount of time the extension
+ * @NL80211_SURVEY_INFO_TIME_EXT_BUSY: amount of time the extension
  *	channel was sensed busy
- * @NL80211_SURVEY_INFO_CHANNEL_TIME_RX: amount of time the radio spent
+ * @NL80211_SURVEY_INFO_TIME_RX: amount of time the radio spent
  *	receiving data
- * @NL80211_SURVEY_INFO_CHANNEL_TIME_TX: amount of time the radio spent
+ * @NL80211_SURVEY_INFO_TIME_TX: amount of time the radio spent
  *	transmitting data
  * @NL80211_SURVEY_INFO_MAX: highest survey info attribute number
  *	currently defined
@@ -2834,17 +2834,24 @@ enum nl80211_survey_info {
 	NL80211_SURVEY_INFO_FREQUENCY,
 	NL80211_SURVEY_INFO_NOISE,
 	NL80211_SURVEY_INFO_IN_USE,
-	NL80211_SURVEY_INFO_CHANNEL_TIME,
-	NL80211_SURVEY_INFO_CHANNEL_TIME_BUSY,
-	NL80211_SURVEY_INFO_CHANNEL_TIME_EXT_BUSY,
-	NL80211_SURVEY_INFO_CHANNEL_TIME_RX,
-	NL80211_SURVEY_INFO_CHANNEL_TIME_TX,
+	NL80211_SURVEY_INFO_TIME,
+	NL80211_SURVEY_INFO_TIME_BUSY,
+	NL80211_SURVEY_INFO_TIME_EXT_BUSY,
+	NL80211_SURVEY_INFO_TIME_RX,
+	NL80211_SURVEY_INFO_TIME_TX,
 
 	/* keep last */
 	__NL80211_SURVEY_INFO_AFTER_LAST,
 	NL80211_SURVEY_INFO_MAX = __NL80211_SURVEY_INFO_AFTER_LAST - 1
 };
 
+/* keep old names for compatibility */
+#define NL80211_SURVEY_INFO_CHANNEL_TIME		NL80211_SURVEY_INFO_TIME
+#define NL80211_SURVEY_INFO_CHANNEL_TIME_BUSY		NL80211_SURVEY_INFO_TIME_BUSY
+#define NL80211_SURVEY_INFO_CHANNEL_TIME_EXT_BUSY	NL80211_SURVEY_INFO_TIME_EXT_BUSY
+#define NL80211_SURVEY_INFO_CHANNEL_TIME_RX		NL80211_SURVEY_INFO_TIME_RX
+#define NL80211_SURVEY_INFO_CHANNEL_TIME_TX		NL80211_SURVEY_INFO_TIME_TX
+
 /**
  * enum nl80211_mntr_flags - monitor configuration flags
  *
diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c
index ebfc8091557b..eea742710c0a 100644
--- a/net/mac80211/ethtool.c
+++ b/net/mac80211/ethtool.c
@@ -175,24 +175,24 @@ do_survey:
 		data[i++] = (u8)survey.noise;
 	else
 		data[i++] = -1LL;
-	if (survey.filled & SURVEY_INFO_CHANNEL_TIME)
-		data[i++] = survey.channel_time;
+	if (survey.filled & SURVEY_INFO_TIME)
+		data[i++] = survey.time;
 	else
 		data[i++] = -1LL;
-	if (survey.filled & SURVEY_INFO_CHANNEL_TIME_BUSY)
-		data[i++] = survey.channel_time_busy;
+	if (survey.filled & SURVEY_INFO_TIME_BUSY)
+		data[i++] = survey.time_busy;
 	else
 		data[i++] = -1LL;
-	if (survey.filled & SURVEY_INFO_CHANNEL_TIME_EXT_BUSY)
-		data[i++] = survey.channel_time_ext_busy;
+	if (survey.filled & SURVEY_INFO_TIME_EXT_BUSY)
+		data[i++] = survey.time_ext_busy;
 	else
 		data[i++] = -1LL;
-	if (survey.filled & SURVEY_INFO_CHANNEL_TIME_RX)
-		data[i++] = survey.channel_time_rx;
+	if (survey.filled & SURVEY_INFO_TIME_RX)
+		data[i++] = survey.time_rx;
 	else
 		data[i++] = -1LL;
-	if (survey.filled & SURVEY_INFO_CHANNEL_TIME_TX)
-		data[i++] = survey.channel_time_tx;
+	if (survey.filled & SURVEY_INFO_TIME_TX)
+		data[i++] = survey.time_tx;
 	else
 		data[i++] = -1LL;
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index ad3e294acabe..94ab2014fefe 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6641,25 +6641,25 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq,
 	if ((survey->filled & SURVEY_INFO_IN_USE) &&
 	    nla_put_flag(msg, NL80211_SURVEY_INFO_IN_USE))
 		goto nla_put_failure;
-	if ((survey->filled & SURVEY_INFO_CHANNEL_TIME) &&
-	    nla_put_u64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME,
-			survey->channel_time))
+	if ((survey->filled & SURVEY_INFO_TIME) &&
+	    nla_put_u64(msg, NL80211_SURVEY_INFO_TIME,
+			survey->time))
 		goto nla_put_failure;
-	if ((survey->filled & SURVEY_INFO_CHANNEL_TIME_BUSY) &&
-	    nla_put_u64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_BUSY,
-			survey->channel_time_busy))
+	if ((survey->filled & SURVEY_INFO_TIME_BUSY) &&
+	    nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_BUSY,
+			survey->time_busy))
 		goto nla_put_failure;
-	if ((survey->filled & SURVEY_INFO_CHANNEL_TIME_EXT_BUSY) &&
-	    nla_put_u64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_EXT_BUSY,
-			survey->channel_time_ext_busy))
+	if ((survey->filled & SURVEY_INFO_TIME_EXT_BUSY) &&
+	    nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_EXT_BUSY,
+			survey->time_ext_busy))
 		goto nla_put_failure;
-	if ((survey->filled & SURVEY_INFO_CHANNEL_TIME_RX) &&
-	    nla_put_u64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_RX,
-			survey->channel_time_rx))
+	if ((survey->filled & SURVEY_INFO_TIME_RX) &&
+	    nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_RX,
+			survey->time_rx))
 		goto nla_put_failure;
-	if ((survey->filled & SURVEY_INFO_CHANNEL_TIME_TX) &&
-	    nla_put_u64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_TX,
-			survey->channel_time_tx))
+	if ((survey->filled & SURVEY_INFO_TIME_TX) &&
+	    nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_TX,
+			survey->time_tx))
 		goto nla_put_failure;
 
 	nla_nest_end(msg, infoattr);
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index ad38910f7036..bbb7afc264af 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1604,11 +1604,11 @@ TRACE_EVENT(rdev_return_int_survey_info,
 		WIPHY_ENTRY
 		CHAN_ENTRY
 		__field(int, ret)
-		__field(u64, channel_time)
-		__field(u64, channel_time_busy)
-		__field(u64, channel_time_ext_busy)
-		__field(u64, channel_time_rx)
-		__field(u64, channel_time_tx)
+		__field(u64, time)
+		__field(u64, time_busy)
+		__field(u64, time_ext_busy)
+		__field(u64, time_rx)
+		__field(u64, time_tx)
 		__field(u32, filled)
 		__field(s8, noise)
 	),
@@ -1616,11 +1616,11 @@ TRACE_EVENT(rdev_return_int_survey_info,
 		WIPHY_ASSIGN;
 		CHAN_ASSIGN(info->channel);
 		__entry->ret = ret;
-		__entry->channel_time = info->channel_time;
-		__entry->channel_time_busy = info->channel_time_busy;
-		__entry->channel_time_ext_busy = info->channel_time_ext_busy;
-		__entry->channel_time_rx = info->channel_time_rx;
-		__entry->channel_time_tx = info->channel_time_tx;
+		__entry->time = info->time;
+		__entry->time_busy = info->time_busy;
+		__entry->time_ext_busy = info->time_ext_busy;
+		__entry->time_rx = info->time_rx;
+		__entry->time_tx = info->time_tx;
 		__entry->filled = info->filled;
 		__entry->noise = info->noise;
 	),
@@ -1629,9 +1629,9 @@ TRACE_EVENT(rdev_return_int_survey_info,
 		  "channel time extension busy: %llu, channel time rx: %llu, "
 		  "channel time tx: %llu, filled: %u, noise: %d",
 		  WIPHY_PR_ARG, __entry->ret, CHAN_PR_ARG,
-		  __entry->channel_time, __entry->channel_time_busy,
-		  __entry->channel_time_ext_busy, __entry->channel_time_rx,
-		  __entry->channel_time_tx, __entry->filled, __entry->noise)
+		  __entry->time, __entry->time_busy,
+		  __entry->time_ext_busy, __entry->time_rx,
+		  __entry->time_tx, __entry->filled, __entry->noise)
 );
 
 TRACE_EVENT(rdev_tdls_oper,
-- 
cgit v1.2.3


From 11f78ac32b06648c1dde9371b70323168b51a83e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 14 Nov 2014 16:43:50 +0100
Subject: cfg80211: allow survey data to return global data

Not all devices are able to report survey data (particularly
time spent for various operations) per channel. As all these
statistics already exist in survey data, allow such devices
to report them (if userspace requested it)

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  3 ++-
 include/uapi/linux/nl80211.h | 16 +++++++++++++---
 net/wireless/nl80211.c       | 31 ++++++++++++++++++-------------
 3 files changed, 33 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 3b489f8fc4cd..5a861440c122 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -542,7 +542,8 @@ enum survey_info_flags {
 /**
  * struct survey_info - channel survey response
  *
- * @channel: the channel this survey record reports, mandatory
+ * @channel: the channel this survey record reports, may be %NULL for a single
+ *	record to report global statistics
  * @filled: bitflag of flags from &enum survey_info_flags
  * @noise: channel noise in dBm. This and all following fields are
  *	optional
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 1a5acc80ab88..5e8b65f239a5 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1727,6 +1727,14 @@ enum nl80211_commands {
  *	is located at bit 0 of byte 0. bit index 25 would be located at bit 1
  *	of byte 3 (u8 array).
  *
+ * @NL80211_ATTR_SURVEY_RADIO_STATS: Request overall radio statistics to be
+ *	returned along with other survey data. If set, @NL80211_CMD_GET_SURVEY
+ *	may return a survey entry without a channel indicating global radio
+ *	statistics (only some values are valid and make sense.)
+ *	For devices that don't return such an entry even then, the information
+ *	should be contained in the result as the sum of the respective counters
+ *	over all channels.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2088,6 +2096,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_EXT_FEATURES,
 
+	NL80211_ATTR_SURVEY_RADIO_STATS,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -2816,15 +2826,15 @@ enum nl80211_user_reg_hint_type {
  * @NL80211_SURVEY_INFO_NOISE: noise level of channel (u8, dBm)
  * @NL80211_SURVEY_INFO_IN_USE: channel is currently being used
  * @NL80211_SURVEY_INFO_TIME: amount of time (in ms) that the radio
- *	spent on this channel
+ *	was turned on (on channel or globally)
  * @NL80211_SURVEY_INFO_TIME_BUSY: amount of the time the primary
  *	channel was sensed busy (either due to activity or energy detect)
  * @NL80211_SURVEY_INFO_TIME_EXT_BUSY: amount of time the extension
  *	channel was sensed busy
  * @NL80211_SURVEY_INFO_TIME_RX: amount of time the radio spent
- *	receiving data
+ *	receiving data (on channel or globally)
  * @NL80211_SURVEY_INFO_TIME_TX: amount of time the radio spent
- *	transmitting data
+ *	transmitting data (on channel or globally)
  * @NL80211_SURVEY_INFO_MAX: highest survey info attribute number
  *	currently defined
  * @__NL80211_SURVEY_INFO_AFTER_LAST: internal use
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 94ab2014fefe..9555ef9fd99e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6613,12 +6613,17 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb)
 }
 
 static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq,
-				int flags, struct net_device *dev,
-				struct survey_info *survey)
+			       int flags, struct net_device *dev,
+			       bool allow_radio_stats,
+			       struct survey_info *survey)
 {
 	void *hdr;
 	struct nlattr *infoattr;
 
+	/* skip radio stats if userspace didn't request them */
+	if (!survey->channel && !allow_radio_stats)
+		return 0;
+
 	hdr = nl80211hdr_put(msg, portid, seq, flags,
 			     NL80211_CMD_NEW_SURVEY_RESULTS);
 	if (!hdr)
@@ -6631,7 +6636,8 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq,
 	if (!infoattr)
 		goto nla_put_failure;
 
-	if (nla_put_u32(msg, NL80211_SURVEY_INFO_FREQUENCY,
+	if (survey->channel &&
+	    nla_put_u32(msg, NL80211_SURVEY_INFO_FREQUENCY,
 			survey->channel->center_freq))
 		goto nla_put_failure;
 
@@ -6671,19 +6677,22 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq,
 	return -EMSGSIZE;
 }
 
-static int nl80211_dump_survey(struct sk_buff *skb,
-			struct netlink_callback *cb)
+static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct survey_info survey;
 	struct cfg80211_registered_device *rdev;
 	struct wireless_dev *wdev;
 	int survey_idx = cb->args[2];
 	int res;
+	bool radio_stats;
 
 	res = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
 	if (res)
 		return res;
 
+	/* prepare_wdev_dump parsed the attributes */
+	radio_stats = nl80211_fam.attrbuf[NL80211_ATTR_SURVEY_RADIO_STATS];
+
 	if (!wdev->netdev) {
 		res = -EINVAL;
 		goto out_err;
@@ -6701,13 +6710,9 @@ static int nl80211_dump_survey(struct sk_buff *skb,
 		if (res)
 			goto out_err;
 
-		/* Survey without a channel doesn't make sense */
-		if (!survey.channel) {
-			res = -EINVAL;
-			goto out;
-		}
-
-		if (survey.channel->flags & IEEE80211_CHAN_DISABLED) {
+		/* don't send disabled channels, but do send non-channel data */
+		if (survey.channel &&
+		    survey.channel->flags & IEEE80211_CHAN_DISABLED) {
 			survey_idx++;
 			continue;
 		}
@@ -6715,7 +6720,7 @@ static int nl80211_dump_survey(struct sk_buff *skb,
 		if (nl80211_send_survey(skb,
 				NETLINK_CB(cb->skb).portid,
 				cb->nlh->nlmsg_seq, NLM_F_MULTI,
-				wdev->netdev, &survey) < 0)
+				wdev->netdev, radio_stats, &survey) < 0)
 			goto out;
 		survey_idx++;
 	}
-- 
cgit v1.2.3


From 052536abfa9144566599a7fbe8cc89e1086fa9a7 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 14 Nov 2014 16:44:11 +0100
Subject: cfg80211: add scan time to survey data

Add the time spent scanning to the survey data so it can be
reported by drivers that collect such information.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 4 ++++
 include/uapi/linux/nl80211.h | 3 +++
 net/wireless/nl80211.c       | 4 ++++
 net/wireless/trace.h         | 7 +++++--
 4 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 5a861440c122..f94f0d486d13 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -525,6 +525,7 @@ ieee80211_chandef_max_power(struct cfg80211_chan_def *chandef)
  * @SURVEY_INFO_TIME_EXT_BUSY: extension channel busy time was filled in
  * @SURVEY_INFO_TIME_RX: receive time was filled in
  * @SURVEY_INFO_TIME_TX: transmit time was filled in
+ * @SURVEY_INFO_TIME_SCAN: scan time was filled in
  *
  * Used by the driver to indicate which info in &struct survey_info
  * it has filled in during the get_survey().
@@ -537,6 +538,7 @@ enum survey_info_flags {
 	SURVEY_INFO_TIME_EXT_BUSY	= BIT(4),
 	SURVEY_INFO_TIME_RX		= BIT(5),
 	SURVEY_INFO_TIME_TX		= BIT(6),
+	SURVEY_INFO_TIME_SCAN		= BIT(7),
 };
 
 /**
@@ -552,6 +554,7 @@ enum survey_info_flags {
  * @time_ext_busy: amount of time the extension channel was sensed busy
  * @time_rx: amount of time the radio spent receiving data
  * @time_tx: amount of time the radio spent transmitting data
+ * @time_scan: amount of time the radio spent for scanning
  *
  * Used by dump_survey() to report back per-channel survey information.
  *
@@ -565,6 +568,7 @@ struct survey_info {
 	u64 time_ext_busy;
 	u64 time_rx;
 	u64 time_tx;
+	u64 time_scan;
 	u32 filled;
 	s8 noise;
 };
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 5e8b65f239a5..2f549a253138 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2835,6 +2835,8 @@ enum nl80211_user_reg_hint_type {
  *	receiving data (on channel or globally)
  * @NL80211_SURVEY_INFO_TIME_TX: amount of time the radio spent
  *	transmitting data (on channel or globally)
+ * @NL80211_SURVEY_INFO_TIME_SCAN: time the radio spent for scan
+ *	(on this channel or globally)
  * @NL80211_SURVEY_INFO_MAX: highest survey info attribute number
  *	currently defined
  * @__NL80211_SURVEY_INFO_AFTER_LAST: internal use
@@ -2849,6 +2851,7 @@ enum nl80211_survey_info {
 	NL80211_SURVEY_INFO_TIME_EXT_BUSY,
 	NL80211_SURVEY_INFO_TIME_RX,
 	NL80211_SURVEY_INFO_TIME_TX,
+	NL80211_SURVEY_INFO_TIME_SCAN,
 
 	/* keep last */
 	__NL80211_SURVEY_INFO_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 9555ef9fd99e..f56309bd21bd 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6667,6 +6667,10 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq,
 	    nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_TX,
 			survey->time_tx))
 		goto nla_put_failure;
+	if ((survey->filled & SURVEY_INFO_TIME_SCAN) &&
+	    nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_SCAN,
+			survey->time_scan))
+		goto nla_put_failure;
 
 	nla_nest_end(msg, infoattr);
 
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index bbb7afc264af..b17b3692f8c2 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1609,6 +1609,7 @@ TRACE_EVENT(rdev_return_int_survey_info,
 		__field(u64, time_ext_busy)
 		__field(u64, time_rx)
 		__field(u64, time_tx)
+		__field(u64, time_scan)
 		__field(u32, filled)
 		__field(s8, noise)
 	),
@@ -1621,17 +1622,19 @@ TRACE_EVENT(rdev_return_int_survey_info,
 		__entry->time_ext_busy = info->time_ext_busy;
 		__entry->time_rx = info->time_rx;
 		__entry->time_tx = info->time_tx;
+		__entry->time_scan = info->time_scan;
 		__entry->filled = info->filled;
 		__entry->noise = info->noise;
 	),
 	TP_printk(WIPHY_PR_FMT ", returned: %d, " CHAN_PR_FMT
 		  ", channel time: %llu, channel time busy: %llu, "
 		  "channel time extension busy: %llu, channel time rx: %llu, "
-		  "channel time tx: %llu, filled: %u, noise: %d",
+		  "channel time tx: %llu, scan time: %llu, filled: %u, noise: %d",
 		  WIPHY_PR_ARG, __entry->ret, CHAN_PR_ARG,
 		  __entry->time, __entry->time_busy,
 		  __entry->time_ext_busy, __entry->time_rx,
-		  __entry->time_tx, __entry->filled, __entry->noise)
+		  __entry->time_tx, __entry->time_scan,
+		  __entry->filled, __entry->noise)
 );
 
 TRACE_EVENT(rdev_tdls_oper,
-- 
cgit v1.2.3


From cf5ead822d5db2d276616ccca91f00eb3b855db2 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 14 Nov 2014 17:14:00 +0100
Subject: cfg80211: allow including station info in delete event

When a station is removed, its statistics may be interesting to
userspace, for example for further aggregation of statistics of
all stations that ever connected to an AP.

Introduce a new cfg80211_del_sta_sinfo() function (and make the
cfg80211_del_sta() a static inline calling it) to allow passing
a struct station_info along with this, and send the data in the
nl80211 event message.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 16 +++++++++++++++-
 net/wireless/nl80211.c | 38 ++++++++++++++++----------------------
 2 files changed, 31 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f94f0d486d13..42e3d74f1906 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4591,6 +4591,16 @@ void cfg80211_remain_on_channel_expired(struct wireless_dev *wdev, u64 cookie,
 void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
 		      struct station_info *sinfo, gfp_t gfp);
 
+/**
+ * cfg80211_del_sta_sinfo - notify userspace about deletion of a station
+ * @dev: the netdev
+ * @mac_addr: the station's address
+ * @sinfo: the station information/statistics
+ * @gfp: allocation flags
+ */
+void cfg80211_del_sta_sinfo(struct net_device *dev, const u8 *mac_addr,
+			    struct station_info *sinfo, gfp_t gfp);
+
 /**
  * cfg80211_del_sta - notify userspace about deletion of a station
  *
@@ -4598,7 +4608,11 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
  * @mac_addr: the station's address
  * @gfp: allocation flags
  */
-void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp);
+static inline void cfg80211_del_sta(struct net_device *dev,
+				    const u8 *mac_addr, gfp_t gfp)
+{
+	cfg80211_del_sta_sinfo(dev, mac_addr, NULL, gfp);
+}
 
 /**
  * cfg80211_conn_failed - connection request failed notification
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index f56309bd21bd..a75dc91976d3 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3650,8 +3650,8 @@ static bool nl80211_put_signal(struct sk_buff *msg, u8 mask, s8 *signal,
 	return true;
 }
 
-static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq,
-				int flags,
+static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
+				u32 seq, int flags,
 				struct cfg80211_registered_device *rdev,
 				struct net_device *dev,
 				const u8 *mac_addr, struct station_info *sinfo)
@@ -3659,7 +3659,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq,
 	void *hdr;
 	struct nlattr *sinfoattr, *bss_param;
 
-	hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_STATION);
+	hdr = nl80211hdr_put(msg, portid, seq, flags, cmd);
 	if (!hdr)
 		return -1;
 
@@ -3854,7 +3854,7 @@ static int nl80211_dump_station(struct sk_buff *skb,
 		if (err)
 			goto out_err;
 
-		if (nl80211_send_station(skb,
+		if (nl80211_send_station(skb, NL80211_CMD_NEW_STATION,
 				NETLINK_CB(cb->skb).portid,
 				cb->nlh->nlmsg_seq, NLM_F_MULTI,
 				rdev, wdev->netdev, mac_addr,
@@ -3901,7 +3901,8 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info)
 	if (!msg)
 		return -ENOMEM;
 
-	if (nl80211_send_station(msg, info->snd_portid, info->snd_seq, 0,
+	if (nl80211_send_station(msg, NL80211_CMD_NEW_STATION,
+				 info->snd_portid, info->snd_seq, 0,
 				 rdev, dev, mac_addr, &sinfo) < 0) {
 		nlmsg_free(msg);
 		return -ENOBUFS;
@@ -11687,7 +11688,7 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
 	if (!msg)
 		return;
 
-	if (nl80211_send_station(msg, 0, 0, 0,
+	if (nl80211_send_station(msg, NL80211_CMD_NEW_STATION, 0, 0, 0,
 				 rdev, dev, mac_addr, sinfo) < 0) {
 		nlmsg_free(msg);
 		return;
@@ -11698,12 +11699,16 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
 }
 EXPORT_SYMBOL(cfg80211_new_sta);
 
-void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp)
+void cfg80211_del_sta_sinfo(struct net_device *dev, const u8 *mac_addr,
+			    struct station_info *sinfo, gfp_t gfp)
 {
 	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	struct sk_buff *msg;
-	void *hdr;
+	struct station_info empty_sinfo = {};
+
+	if (!sinfo)
+		sinfo = &empty_sinfo;
 
 	trace_cfg80211_del_sta(dev, mac_addr);
 
@@ -11711,27 +11716,16 @@ void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp)
 	if (!msg)
 		return;
 
-	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_DEL_STATION);
-	if (!hdr) {
+	if (nl80211_send_station(msg, NL80211_CMD_DEL_STATION, 0, 0, 0,
+				 rdev, dev, mac_addr, sinfo)) {
 		nlmsg_free(msg);
 		return;
 	}
 
-	if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
-	    nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr))
-		goto nla_put_failure;
-
-	genlmsg_end(msg, hdr);
-
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				NL80211_MCGRP_MLME, gfp);
-	return;
-
- nla_put_failure:
-	genlmsg_cancel(msg, hdr);
-	nlmsg_free(msg);
 }
-EXPORT_SYMBOL(cfg80211_del_sta);
+EXPORT_SYMBOL(cfg80211_del_sta_sinfo);
 
 void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
 			  enum nl80211_connect_failed_reason reason,
-- 
cgit v1.2.3


From 6f7a8d26e2668e00de524d3da0122a4411047dd2 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 14 Nov 2014 20:32:57 +0100
Subject: mac80211: send statistics with delete station event

Use the new cfg80211_del_sta_sinfo() function to send the
statistics about the deleted station with the delete event.
This lets userspace see how much traffic etc. the deleted
station used.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/sta_info.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index db8b07ac6b10..388ff0b2ad2b 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -874,6 +874,7 @@ static void __sta_info_destroy_part2(struct sta_info *sta)
 {
 	struct ieee80211_local *local = sta->local;
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	struct station_info sinfo = {};
 	int ret;
 
 	/*
@@ -908,7 +909,8 @@ static void __sta_info_destroy_part2(struct sta_info *sta)
 
 	sta_dbg(sdata, "Removed STA %pM\n", sta->sta.addr);
 
-	cfg80211_del_sta(sdata->dev, sta->sta.addr, GFP_KERNEL);
+	sta_set_sinfo(sta, &sinfo);
+	cfg80211_del_sta_sinfo(sdata->dev, sta->sta.addr, &sinfo, GFP_KERNEL);
 
 	rate_control_remove_sta_debugfs(sta);
 	ieee80211_sta_debugfs_remove(sta);
-- 
cgit v1.2.3


From 2b9a7e1bac24df8ddb0713ad1e5807a7243bcab0 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 17 Nov 2014 11:35:23 +0100
Subject: mac80211: allow drivers to provide most station statistics

In many cases, drivers can filter things like beacons that will
skew statistics reported by mac80211. To get correct statistics
in these cases, call drivers to obtain statistics and let them
override all values, filling values from mac80211 if the driver
didn't provide them. Not all of them make sense for the driver
to fill, so some are still always done by mac80211.

Note that this doesn't currently allow a driver to say "I know
this value is wrong, don't report it at all", or to sum it up
with a mac80211 value (as could be useful for "dropped misc"),
that can be added if it turns out to be needed.

This also gets rid of the get_rssi() method as is can now be
implemented using sta_statistics().

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ti/wlcore/main.c | 20 ++++----
 include/net/mac80211.h                | 17 ++++--
 net/mac80211/driver-ops.h             | 30 +++++------
 net/mac80211/sta_info.c               | 97 +++++++++++++++++++++++------------
 net/mac80211/trace.h                  | 33 +++---------
 5 files changed, 110 insertions(+), 87 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index 2a99456b6b8f..8d11b0ca412c 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -5376,14 +5376,15 @@ static void wlcore_op_sta_rc_update(struct ieee80211_hw *hw,
 	wlcore_hw_sta_rc_update(wl, wlvif, sta, changed);
 }
 
-static int wlcore_op_get_rssi(struct ieee80211_hw *hw,
-			       struct ieee80211_vif *vif,
-			       struct ieee80211_sta *sta,
-			       s8 *rssi_dbm)
+static void wlcore_op_sta_statistics(struct ieee80211_hw *hw,
+				     struct ieee80211_vif *vif,
+				     struct ieee80211_sta *sta,
+				     struct station_info *sinfo)
 {
 	struct wl1271 *wl = hw->priv;
 	struct wl12xx_vif *wlvif = wl12xx_vif_to_data(vif);
-	int ret = 0;
+	s8 rssi_dbm;
+	int ret;
 
 	wl1271_debug(DEBUG_MAC80211, "mac80211 get_rssi");
 
@@ -5396,17 +5397,18 @@ static int wlcore_op_get_rssi(struct ieee80211_hw *hw,
 	if (ret < 0)
 		goto out_sleep;
 
-	ret = wlcore_acx_average_rssi(wl, wlvif, rssi_dbm);
+	ret = wlcore_acx_average_rssi(wl, wlvif, &rssi_dbm);
 	if (ret < 0)
 		goto out_sleep;
 
+	sinfo->filled |= STATION_INFO_SIGNAL;
+	sinfo->signal = rssi_dbm;
+
 out_sleep:
 	wl1271_ps_elp_sleep(wl);
 
 out:
 	mutex_unlock(&wl->mutex);
-
-	return ret;
 }
 
 static bool wl1271_tx_frames_pending(struct ieee80211_hw *hw)
@@ -5606,7 +5608,7 @@ static const struct ieee80211_ops wl1271_ops = {
 	.assign_vif_chanctx = wlcore_op_assign_vif_chanctx,
 	.unassign_vif_chanctx = wlcore_op_unassign_vif_chanctx,
 	.sta_rc_update = wlcore_op_sta_rc_update,
-	.get_rssi = wlcore_op_get_rssi,
+	.sta_statistics = wlcore_op_sta_statistics,
 	CFG80211_TESTMODE_CMD(wl1271_tm_cmd)
 };
 
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 555a845ad51e..123f2308958a 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2708,6 +2708,14 @@ enum ieee80211_reconfig_type {
  *	is only used if the configured rate control algorithm actually uses
  *	the new rate table API, and is therefore optional. Must be atomic.
  *
+ * @sta_statistics: Get statistics for this station. For example with beacon
+ *	filtering, the statistics kept by mac80211 might not be accurate, so
+ *	let the driver pre-fill the statistics. The driver can fill most of
+ *	the values (indicating which by setting the filled bitmap), but not
+ *	all of them make sense - see the source for which ones are possible.
+ *	Statistics that the driver doesn't fill will be filled by mac80211.
+ *	The callback can sleep.
+ *
  * @conf_tx: Configure TX queue parameters (EDCF (aifs, cw_min, cw_max),
  *	bursting) for a hardware TX queue.
  *	Returns a negative error code on failure.
@@ -2868,9 +2876,6 @@ enum ieee80211_reconfig_type {
  * @get_et_strings:  Ethtool API to get a set of strings to describe stats
  *	and perhaps other supported types of ethtool data-sets.
  *
- * @get_rssi: Get current signal strength in dBm, the function is optional
- *	and can sleep.
- *
  * @mgd_prepare_tx: Prepare for transmitting a management frame for association
  *	before associated. In multi-channel scenarios, a virtual interface is
  *	bound to a channel before it is associated, but as it isn't associated
@@ -3071,6 +3076,10 @@ struct ieee80211_ops {
 	void (*sta_rate_tbl_update)(struct ieee80211_hw *hw,
 				    struct ieee80211_vif *vif,
 				    struct ieee80211_sta *sta);
+	void (*sta_statistics)(struct ieee80211_hw *hw,
+			       struct ieee80211_vif *vif,
+			       struct ieee80211_sta *sta,
+			       struct station_info *sinfo);
 	int (*conf_tx)(struct ieee80211_hw *hw,
 		       struct ieee80211_vif *vif, u16 ac,
 		       const struct ieee80211_tx_queue_params *params);
@@ -3138,8 +3147,6 @@ struct ieee80211_ops {
 	void	(*get_et_strings)(struct ieee80211_hw *hw,
 				  struct ieee80211_vif *vif,
 				  u32 sset, u8 *data);
-	int	(*get_rssi)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
-			    struct ieee80211_sta *sta, s8 *rssi_dbm);
 
 	void	(*mgd_prepare_tx)(struct ieee80211_hw *hw,
 				  struct ieee80211_vif *vif);
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 2ebc9ead9695..fdeda17b8dd2 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -639,6 +639,21 @@ static inline void drv_sta_rate_tbl_update(struct ieee80211_local *local,
 	trace_drv_return_void(local);
 }
 
+static inline void drv_sta_statistics(struct ieee80211_local *local,
+				      struct ieee80211_sub_if_data *sdata,
+				      struct ieee80211_sta *sta,
+				      struct station_info *sinfo)
+{
+	sdata = get_bss_sdata(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
+
+	trace_drv_sta_statistics(local, sdata, sta);
+	if (local->ops->sta_statistics)
+		local->ops->sta_statistics(&local->hw, &sdata->vif, sta, sinfo);
+	trace_drv_return_void(local);
+}
+
 static inline int drv_conf_tx(struct ieee80211_local *local,
 			      struct ieee80211_sub_if_data *sdata, u16 ac,
 			      const struct ieee80211_tx_queue_params *params)
@@ -966,21 +981,6 @@ drv_allow_buffered_frames(struct ieee80211_local *local,
 	trace_drv_return_void(local);
 }
 
-static inline int drv_get_rssi(struct ieee80211_local *local,
-				struct ieee80211_sub_if_data *sdata,
-				struct ieee80211_sta *sta,
-				s8 *rssi_dbm)
-{
-	int ret;
-
-	might_sleep();
-
-	ret = local->ops->get_rssi(&local->hw, &sdata->vif, sta, rssi_dbm);
-	trace_drv_get_rssi(local, sta, *rssi_dbm, ret);
-
-	return ret;
-}
-
 static inline void drv_mgd_prepare_tx(struct ieee80211_local *local,
 				      struct ieee80211_sub_if_data *sdata)
 {
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 388ff0b2ad2b..967b42eae5c2 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1746,7 +1746,6 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 	struct ieee80211_local *local = sdata->local;
 	struct rate_control_ref *ref = NULL;
 	struct timespec uptime;
-	u64 packets = 0;
 	u32 thr = 0;
 	int i, ac;
 
@@ -1755,47 +1754,74 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 
 	sinfo->generation = sdata->local->sta_generation;
 
-	sinfo->filled = STATION_INFO_INACTIVE_TIME |
-			STATION_INFO_RX_BYTES64 |
-			STATION_INFO_TX_BYTES64 |
-			STATION_INFO_RX_PACKETS |
-			STATION_INFO_TX_PACKETS |
-			STATION_INFO_TX_RETRIES |
-			STATION_INFO_TX_FAILED |
-			STATION_INFO_TX_BITRATE |
-			STATION_INFO_RX_BITRATE |
-			STATION_INFO_RX_DROP_MISC |
-			STATION_INFO_BSS_PARAM |
-			STATION_INFO_CONNECTED_TIME |
-			STATION_INFO_STA_FLAGS |
-			STATION_INFO_BEACON_LOSS_COUNT;
+	drv_sta_statistics(local, sdata, &sta->sta, sinfo);
+
+	sinfo->filled |= STATION_INFO_INACTIVE_TIME |
+			 STATION_INFO_STA_FLAGS |
+			 STATION_INFO_BSS_PARAM |
+			 STATION_INFO_CONNECTED_TIME |
+			 STATION_INFO_RX_DROP_MISC |
+			 STATION_INFO_BEACON_LOSS_COUNT;
 
 	ktime_get_ts(&uptime);
 	sinfo->connected_time = uptime.tv_sec - sta->last_connected;
-
 	sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx);
-	sinfo->tx_bytes = 0;
-	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
-		sinfo->tx_bytes += sta->tx_bytes[ac];
-		packets += sta->tx_packets[ac];
+
+	if (!(sinfo->filled & (STATION_INFO_TX_BYTES64 |
+			       STATION_INFO_TX_BYTES))) {
+		sinfo->tx_bytes = 0;
+		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
+			sinfo->tx_bytes += sta->tx_bytes[ac];
+		sinfo->filled |= STATION_INFO_TX_BYTES64;
+	}
+
+	if (!(sinfo->filled & STATION_INFO_TX_PACKETS)) {
+		sinfo->tx_packets = 0;
+		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
+			sinfo->tx_packets += sta->tx_packets[ac];
+		sinfo->filled |= STATION_INFO_TX_PACKETS;
+	}
+
+	if (!(sinfo->filled & (STATION_INFO_RX_BYTES64 |
+			       STATION_INFO_RX_BYTES))) {
+		sinfo->rx_bytes = sta->rx_bytes;
+		sinfo->filled |= STATION_INFO_RX_BYTES64;
+	}
+
+	if (!(sinfo->filled & STATION_INFO_RX_PACKETS)) {
+		sinfo->rx_packets = sta->rx_packets;
+		sinfo->filled |= STATION_INFO_RX_PACKETS;
+	}
+
+	if (!(sinfo->filled & STATION_INFO_TX_RETRIES)) {
+		sinfo->tx_retries = sta->tx_retry_count;
+		sinfo->filled |= STATION_INFO_TX_RETRIES;
+	}
+
+	if (!(sinfo->filled & STATION_INFO_TX_FAILED)) {
+		sinfo->tx_failed = sta->tx_retry_failed;
+		sinfo->filled |= STATION_INFO_TX_FAILED;
 	}
-	sinfo->tx_packets = packets;
-	sinfo->rx_bytes = sta->rx_bytes;
-	sinfo->rx_packets = sta->rx_packets;
-	sinfo->tx_retries = sta->tx_retry_count;
-	sinfo->tx_failed = sta->tx_retry_failed;
+
 	sinfo->rx_dropped_misc = sta->rx_dropped;
 	sinfo->beacon_loss_count = sta->beacon_loss_count;
 
 	if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) ||
 	    (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) {
-		sinfo->filled |= STATION_INFO_SIGNAL | STATION_INFO_SIGNAL_AVG;
-		if (!local->ops->get_rssi ||
-		    drv_get_rssi(local, sdata, &sta->sta, &sinfo->signal))
+		if (!(sinfo->filled & STATION_INFO_SIGNAL)) {
 			sinfo->signal = (s8)sta->last_signal;
-		sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal);
+			sinfo->filled |= STATION_INFO_SIGNAL;
+		}
+
+		if (!(sinfo->filled & STATION_INFO_SIGNAL_AVG)) {
+			sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal);
+			sinfo->filled |= STATION_INFO_SIGNAL_AVG;
+		}
 	}
-	if (sta->chains) {
+
+	if (sta->chains &&
+	    !(sinfo->filled & (STATION_INFO_CHAIN_SIGNAL |
+			       STATION_INFO_CHAIN_SIGNAL_AVG))) {
 		sinfo->filled |= STATION_INFO_CHAIN_SIGNAL |
 				 STATION_INFO_CHAIN_SIGNAL_AVG;
 
@@ -1807,8 +1833,15 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 		}
 	}
 
-	sta_set_rate_info_tx(sta, &sta->last_tx_rate, &sinfo->txrate);
-	sta_set_rate_info_rx(sta, &sinfo->rxrate);
+	if (!(sinfo->filled & STATION_INFO_TX_BITRATE)) {
+		sta_set_rate_info_tx(sta, &sta->last_tx_rate, &sinfo->txrate);
+		sinfo->filled |= STATION_INFO_TX_BITRATE;
+	}
+
+	if (!(sinfo->filled & STATION_INFO_RX_BITRATE)) {
+		sta_set_rate_info_rx(sta, &sinfo->rxrate);
+		sinfo->filled |= STATION_INFO_RX_BITRATE;
+	}
 
 	if (ieee80211_vif_is_mesh(&sdata->vif)) {
 #ifdef CONFIG_MAC80211_MESH
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 8e461a02c6a8..263a9561eb26 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -825,6 +825,13 @@ DECLARE_EVENT_CLASS(sta_event,
 	)
 );
 
+DEFINE_EVENT(sta_event, drv_sta_statistics,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata,
+		 struct ieee80211_sta *sta),
+	TP_ARGS(local, sdata, sta)
+);
+
 DEFINE_EVENT(sta_event, drv_sta_add,
 	TP_PROTO(struct ieee80211_local *local,
 		 struct ieee80211_sub_if_data *sdata,
@@ -1329,32 +1336,6 @@ DEFINE_EVENT(release_evt, drv_allow_buffered_frames,
 	TP_ARGS(local, sta, tids, num_frames, reason, more_data)
 );
 
-TRACE_EVENT(drv_get_rssi,
-	TP_PROTO(struct ieee80211_local *local, struct ieee80211_sta *sta,
-		 s8 rssi, int ret),
-
-	TP_ARGS(local, sta, rssi, ret),
-
-	TP_STRUCT__entry(
-		LOCAL_ENTRY
-		STA_ENTRY
-		__field(s8, rssi)
-		__field(int, ret)
-	),
-
-	TP_fast_assign(
-		LOCAL_ASSIGN;
-		STA_ASSIGN;
-		__entry->rssi = rssi;
-		__entry->ret = ret;
-	),
-
-	TP_printk(
-		LOCAL_PR_FMT STA_PR_FMT " rssi:%d ret:%d",
-		LOCAL_PR_ARG, STA_PR_ARG, __entry->rssi, __entry->ret
-	)
-);
-
 DEFINE_EVENT(local_sdata_evt, drv_mgd_prepare_tx,
 	TP_PROTO(struct ieee80211_local *local,
 		 struct ieee80211_sub_if_data *sdata),
-- 
cgit v1.2.3


From 319090bf6c75e3ad42a8c74973be5e78ae4f948f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 17 Nov 2014 14:08:11 +0100
Subject: cfg80211: remove enum station_info_flags

This is really just duplicating the list of information that's
already available in the nl80211 attribute, so remove the list.
Two small changes are needed:
 * remove STATION_INFO_ASSOC_REQ_IES complete, but the length
   (assoc_req_ies_len) can be used instead
 * add NL80211_STA_INFO_RX_DROP_MISC which exists internally
   but not in nl80211 yet

This gets rid of the duplicate maintenance of the two lists.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath6kl/cfg80211.c         |  14 +--
 drivers/net/wireless/ath/ath6kl/main.c             |   1 -
 drivers/net/wireless/ath/wil6210/cfg80211.c        |  18 +--
 drivers/net/wireless/ath/wil6210/wmi.c             |   1 -
 drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c |  11 +-
 drivers/net/wireless/libertas/cfg.c                |  12 +-
 drivers/net/wireless/mwifiex/cfg80211.c            |  10 +-
 drivers/net/wireless/mwifiex/uap_event.c           |   1 -
 drivers/net/wireless/rndis_wlan.c                  |   4 +-
 drivers/net/wireless/ti/wlcore/main.c              |   2 +-
 drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c  |   9 +-
 drivers/staging/wlan-ng/cfg80211.c                 |   4 +-
 include/net/cfg80211.h                             |  77 +-----------
 include/uapi/linux/nl80211.h                       |   3 +
 net/mac80211/ethtool.c                             |   6 +-
 net/mac80211/sta_info.c                            |  80 ++++++-------
 net/wireless/nl80211.c                             | 133 ++++++++-------------
 net/wireless/wext-compat.c                         |  10 +-
 18 files changed, 142 insertions(+), 254 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c
index 7a5337877a0c..44dd6ef923cd 100644
--- a/drivers/net/wireless/ath/ath6kl/cfg80211.c
+++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c
@@ -1799,20 +1799,20 @@ static int ath6kl_get_station(struct wiphy *wiphy, struct net_device *dev,
 
 	if (vif->target_stats.rx_byte) {
 		sinfo->rx_bytes = vif->target_stats.rx_byte;
-		sinfo->filled |= STATION_INFO_RX_BYTES64;
+		sinfo->filled |= BIT(NL80211_STA_INFO_RX_BYTES64);
 		sinfo->rx_packets = vif->target_stats.rx_pkt;
-		sinfo->filled |= STATION_INFO_RX_PACKETS;
+		sinfo->filled |= BIT(NL80211_STA_INFO_RX_PACKETS);
 	}
 
 	if (vif->target_stats.tx_byte) {
 		sinfo->tx_bytes = vif->target_stats.tx_byte;
-		sinfo->filled |= STATION_INFO_TX_BYTES64;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_BYTES64);
 		sinfo->tx_packets = vif->target_stats.tx_pkt;
-		sinfo->filled |= STATION_INFO_TX_PACKETS;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_PACKETS);
 	}
 
 	sinfo->signal = vif->target_stats.cs_rssi;
-	sinfo->filled |= STATION_INFO_SIGNAL;
+	sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
 
 	rate = vif->target_stats.tx_ucast_rate;
 
@@ -1844,12 +1844,12 @@ static int ath6kl_get_station(struct wiphy *wiphy, struct net_device *dev,
 		return 0;
 	}
 
-	sinfo->filled |= STATION_INFO_TX_BITRATE;
+	sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE);
 
 	if (test_bit(CONNECTED, &vif->flags) &&
 	    test_bit(DTIM_PERIOD_AVAIL, &vif->flags) &&
 	    vif->nw_type == INFRA_NETWORK) {
-		sinfo->filled |= STATION_INFO_BSS_PARAM;
+		sinfo->filled |= BIT(NL80211_STA_INFO_BSS_PARAM);
 		sinfo->bss_param.flags = 0;
 		sinfo->bss_param.dtim_period = vif->assoc_bss_dtim_period;
 		sinfo->bss_param.beacon_interval = vif->assoc_bss_beacon_int;
diff --git a/drivers/net/wireless/ath/ath6kl/main.c b/drivers/net/wireless/ath/ath6kl/main.c
index 933aef025698..b42ba46b5030 100644
--- a/drivers/net/wireless/ath/ath6kl/main.c
+++ b/drivers/net/wireless/ath/ath6kl/main.c
@@ -488,7 +488,6 @@ void ath6kl_connect_ap_mode_sta(struct ath6kl_vif *vif, u16 aid, u8 *mac_addr,
 
 	sinfo.assoc_req_ies = ies;
 	sinfo.assoc_req_ies_len = ies_len;
-	sinfo.filled |= STATION_INFO_ASSOC_REQ_IES;
 
 	cfg80211_new_sta(vif->ndev, mac_addr, &sinfo, GFP_KERNEL);
 
diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c
index 38332a6dfb3a..e72a95d1ced6 100644
--- a/drivers/net/wireless/ath/wil6210/cfg80211.c
+++ b/drivers/net/wireless/ath/wil6210/cfg80211.c
@@ -142,14 +142,14 @@ int wil_cid_fill_sinfo(struct wil6210_priv *wil, int cid,
 
 	sinfo->generation = wil->sinfo_gen;
 
-	sinfo->filled = STATION_INFO_RX_BYTES |
-			STATION_INFO_TX_BYTES |
-			STATION_INFO_RX_PACKETS |
-			STATION_INFO_TX_PACKETS |
-			STATION_INFO_RX_BITRATE |
-			STATION_INFO_TX_BITRATE |
-			STATION_INFO_RX_DROP_MISC |
-			STATION_INFO_TX_FAILED;
+	sinfo->filled = BIT(NL80211_STA_INFO_RX_BYTES) |
+			BIT(NL80211_STA_INFO_TX_BYTES) |
+			BIT(NL80211_STA_INFO_RX_PACKETS) |
+			BIT(NL80211_STA_INFO_TX_PACKETS) |
+			BIT(NL80211_STA_INFO_RX_BITRATE) |
+			BIT(NL80211_STA_INFO_TX_BITRATE) |
+			BIT(NL80211_STA_INFO_RX_DROP_MISC) |
+			BIT(NL80211_STA_INFO_TX_FAILED);
 
 	sinfo->txrate.flags = RATE_INFO_FLAGS_MCS | RATE_INFO_FLAGS_60G;
 	sinfo->txrate.mcs = le16_to_cpu(reply.evt.bf_mcs);
@@ -163,7 +163,7 @@ int wil_cid_fill_sinfo(struct wil6210_priv *wil, int cid,
 	sinfo->tx_failed = stats->tx_errors;
 
 	if (test_bit(wil_status_fwconnected, &wil->status)) {
-		sinfo->filled |= STATION_INFO_SIGNAL;
+		sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
 		sinfo->signal = reply.evt.sqi;
 	}
 
diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c
index 63476c86cd0e..899754920955 100644
--- a/drivers/net/wireless/ath/wil6210/wmi.c
+++ b/drivers/net/wireless/ath/wil6210/wmi.c
@@ -457,7 +457,6 @@ static void wmi_evt_connect(struct wil6210_priv *wil, int id, void *d, int len)
 		if (assoc_req_ie) {
 			sinfo.assoc_req_ies = assoc_req_ie;
 			sinfo.assoc_req_ies_len = assoc_req_ielen;
-			sinfo.filled |= STATION_INFO_ASSOC_REQ_IES;
 		}
 
 		cfg80211_new_sta(ndev, evt->bssid, &sinfo, GFP_KERNEL);
diff --git a/drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c
index 3aecc5f48719..4a88b2381a68 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c
@@ -2333,10 +2333,10 @@ brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev,
 			brcmf_err("GET STA INFO failed, %d\n", err);
 			goto done;
 		}
-		sinfo->filled = STATION_INFO_INACTIVE_TIME;
+		sinfo->filled = BIT(NL80211_STA_INFO_INACTIVE_TIME);
 		sinfo->inactive_time = le32_to_cpu(sta_info_le.idle) * 1000;
 		if (le32_to_cpu(sta_info_le.flags) & BRCMF_STA_ASSOC) {
-			sinfo->filled |= STATION_INFO_CONNECTED_TIME;
+			sinfo->filled |= BIT(NL80211_STA_INFO_CONNECTED_TIME);
 			sinfo->connected_time = le32_to_cpu(sta_info_le.in);
 		}
 		brcmf_dbg(TRACE, "STA idle time : %d ms, connected time :%d sec\n",
@@ -2354,7 +2354,7 @@ brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev,
 			brcmf_err("Could not get rate (%d)\n", err);
 			goto done;
 		} else {
-			sinfo->filled |= STATION_INFO_TX_BITRATE;
+			sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE);
 			sinfo->txrate.legacy = rate * 5;
 			brcmf_dbg(CONN, "Rate %d Mbps\n", rate / 2);
 		}
@@ -2369,7 +2369,7 @@ brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev,
 				goto done;
 			} else {
 				rssi = le32_to_cpu(scb_val.val);
-				sinfo->filled |= STATION_INFO_SIGNAL;
+				sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
 				sinfo->signal = rssi;
 				brcmf_dbg(CONN, "RSSI %d dBm\n", rssi);
 			}
@@ -2396,7 +2396,7 @@ brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev,
 				brcmf_dbg(CONN, "DTIM peroid %d\n",
 					  dtim_period);
 			}
-			sinfo->filled |= STATION_INFO_BSS_PARAM;
+			sinfo->filled |= BIT(NL80211_STA_INFO_BSS_PARAM);
 		}
 	} else
 		err = -EPERM;
@@ -4778,7 +4778,6 @@ brcmf_notify_connect_status_ap(struct brcmf_cfg80211_info *cfg,
 	if (((event == BRCMF_E_ASSOC_IND) || (event == BRCMF_E_REASSOC_IND)) &&
 	    (reason == BRCMF_E_STATUS_SUCCESS)) {
 		memset(&sinfo, 0, sizeof(sinfo));
-		sinfo.filled = STATION_INFO_ASSOC_REQ_IES;
 		if (!data) {
 			brcmf_err("No IEs present in ASSOC/REASSOC_IND");
 			return -EINVAL;
diff --git a/drivers/net/wireless/libertas/cfg.c b/drivers/net/wireless/libertas/cfg.c
index 34f09ef90bb3..a92985a6ea21 100644
--- a/drivers/net/wireless/libertas/cfg.c
+++ b/drivers/net/wireless/libertas/cfg.c
@@ -1616,10 +1616,10 @@ static int lbs_cfg_get_station(struct wiphy *wiphy, struct net_device *dev,
 
 	lbs_deb_enter(LBS_DEB_CFG80211);
 
-	sinfo->filled |= STATION_INFO_TX_BYTES |
-			 STATION_INFO_TX_PACKETS |
-			 STATION_INFO_RX_BYTES |
-			 STATION_INFO_RX_PACKETS;
+	sinfo->filled |= BIT(NL80211_STA_INFO_TX_BYTES) |
+			 BIT(NL80211_STA_INFO_TX_PACKETS) |
+			 BIT(NL80211_STA_INFO_RX_BYTES) |
+			 BIT(NL80211_STA_INFO_RX_PACKETS);
 	sinfo->tx_bytes = priv->dev->stats.tx_bytes;
 	sinfo->tx_packets = priv->dev->stats.tx_packets;
 	sinfo->rx_bytes = priv->dev->stats.rx_bytes;
@@ -1629,14 +1629,14 @@ static int lbs_cfg_get_station(struct wiphy *wiphy, struct net_device *dev,
 	ret = lbs_get_rssi(priv, &signal, &noise);
 	if (ret == 0) {
 		sinfo->signal = signal;
-		sinfo->filled |= STATION_INFO_SIGNAL;
+		sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
 	}
 
 	/* Convert priv->cur_rate from hw_value to NL80211 value */
 	for (i = 0; i < ARRAY_SIZE(lbs_rates); i++) {
 		if (priv->cur_rate == lbs_rates[i].hw_value) {
 			sinfo->txrate.legacy = lbs_rates[i].bitrate;
-			sinfo->filled |= STATION_INFO_TX_BITRATE;
+			sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE);
 			break;
 		}
 	}
diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c
index 8bd446b69658..71312ff52703 100644
--- a/drivers/net/wireless/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/mwifiex/cfg80211.c
@@ -910,10 +910,10 @@ mwifiex_dump_station_info(struct mwifiex_private *priv,
 {
 	u32 rate;
 
-	sinfo->filled = STATION_INFO_RX_BYTES | STATION_INFO_TX_BYTES |
-			STATION_INFO_RX_PACKETS | STATION_INFO_TX_PACKETS |
-			STATION_INFO_TX_BITRATE |
-			STATION_INFO_SIGNAL | STATION_INFO_SIGNAL_AVG;
+	sinfo->filled = BIT(NL80211_STA_INFO_RX_BYTES) | BIT(NL80211_STA_INFO_TX_BYTES) |
+			BIT(NL80211_STA_INFO_RX_PACKETS) | BIT(NL80211_STA_INFO_TX_PACKETS) |
+			BIT(NL80211_STA_INFO_TX_BITRATE) |
+			BIT(NL80211_STA_INFO_SIGNAL) | BIT(NL80211_STA_INFO_SIGNAL_AVG);
 
 	/* Get signal information from the firmware */
 	if (mwifiex_send_cmd(priv, HostCmd_CMD_RSSI_INFO,
@@ -944,7 +944,7 @@ mwifiex_dump_station_info(struct mwifiex_private *priv,
 	sinfo->txrate.legacy = rate * 5;
 
 	if (priv->bss_mode == NL80211_IFTYPE_STATION) {
-		sinfo->filled |= STATION_INFO_BSS_PARAM;
+		sinfo->filled |= BIT(NL80211_STA_INFO_BSS_PARAM);
 		sinfo->bss_param.flags = 0;
 		if (priv->curr_bss_params.bss_descriptor.cap_info_bitmap &
 						WLAN_CAPABILITY_SHORT_PREAMBLE)
diff --git a/drivers/net/wireless/mwifiex/uap_event.c b/drivers/net/wireless/mwifiex/uap_event.c
index c54a537e31fb..3b3a970e2086 100644
--- a/drivers/net/wireless/mwifiex/uap_event.c
+++ b/drivers/net/wireless/mwifiex/uap_event.c
@@ -68,7 +68,6 @@ int mwifiex_process_uap_event(struct mwifiex_private *priv)
 				len = ETH_ALEN;
 
 			if (len != -1) {
-				sinfo.filled = STATION_INFO_ASSOC_REQ_IES;
 				sinfo.assoc_req_ies = &event->data[len];
 				len = (u8 *)sinfo.assoc_req_ies -
 				      (u8 *)&event->frame_control;
diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index 1a4facd1fbf3..60d44ce9c017 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c
@@ -2478,7 +2478,7 @@ static void rndis_fill_station_info(struct usbnet *usbdev,
 	ret = rndis_query_oid(usbdev, RNDIS_OID_GEN_LINK_SPEED, &linkspeed, &len);
 	if (ret == 0) {
 		sinfo->txrate.legacy = le32_to_cpu(linkspeed) / 1000;
-		sinfo->filled |= STATION_INFO_TX_BITRATE;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE);
 	}
 
 	len = sizeof(rssi);
@@ -2486,7 +2486,7 @@ static void rndis_fill_station_info(struct usbnet *usbdev,
 			      &rssi, &len);
 	if (ret == 0) {
 		sinfo->signal = level_to_qual(le32_to_cpu(rssi));
-		sinfo->filled |= STATION_INFO_SIGNAL;
+		sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
 	}
 }
 
diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index 8d11b0ca412c..a2133b1fd631 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -5401,7 +5401,7 @@ static void wlcore_op_sta_statistics(struct ieee80211_hw *hw,
 	if (ret < 0)
 		goto out_sleep;
 
-	sinfo->filled |= STATION_INFO_SIGNAL;
+	sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
 	sinfo->signal = rssi_dbm;
 
 out_sleep:
diff --git a/drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c
index 3d26955da724..c76874d72a22 100644
--- a/drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c
+++ b/drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c
@@ -1092,17 +1092,17 @@ static int cfg80211_rtw_get_station(struct wiphy *wiphy,
 			goto exit;
 		}
 
-		sinfo->filled |= STATION_INFO_SIGNAL;
+		sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
 		sinfo->signal = translate_percentage_to_dbm(padapter->recvpriv.
 							    signal_strength);
 
-		sinfo->filled |= STATION_INFO_TX_BITRATE;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE);
 		sinfo->txrate.legacy = rtw_get_cur_max_rate(padapter);
 
-		sinfo->filled |= STATION_INFO_RX_PACKETS;
+		sinfo->filled |= BIT(NL80211_STA_INFO_RX_PACKETS);
 		sinfo->rx_packets = sta_rx_data_pkts(psta);
 
-		sinfo->filled |= STATION_INFO_TX_PACKETS;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_PACKETS);
 		sinfo->tx_packets = psta->sta_stats.tx_pkts;
 	}
 
@@ -2365,7 +2365,6 @@ void rtw_cfg80211_indicate_sta_assoc(struct rtw_adapter *padapter,
 					     u.reassoc_req.variable);
 
 		sinfo.filled = 0;
-		sinfo.filled = STATION_INFO_ASSOC_REQ_IES;
 		sinfo.assoc_req_ies = pmgmt_frame + ie_offset;
 		sinfo.assoc_req_ies_len = frame_len - ie_offset;
 		cfg80211_new_sta(ndev, hdr->addr2, &sinfo, GFP_ATOMIC);
diff --git a/drivers/staging/wlan-ng/cfg80211.c b/drivers/staging/wlan-ng/cfg80211.c
index 8942dcb44180..7c87aecf4744 100644
--- a/drivers/staging/wlan-ng/cfg80211.c
+++ b/drivers/staging/wlan-ng/cfg80211.c
@@ -325,9 +325,9 @@ static int prism2_get_station(struct wiphy *wiphy, struct net_device *dev,
 
 	if (result == 0) {
 		sinfo->txrate.legacy = quality.txrate.data;
-		sinfo->filled |= STATION_INFO_TX_BITRATE;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE);
 		sinfo->signal = quality.level.data;
-		sinfo->filled |= STATION_INFO_SIGNAL;
+		sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
 	}
 
 	return result;
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 42e3d74f1906..91c133626c32 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -865,75 +865,6 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
 				  struct station_parameters *params,
 				  enum cfg80211_station_type statype);
 
-/**
- * enum station_info_flags - station information flags
- *
- * Used by the driver to indicate which info in &struct station_info
- * it has filled in during get_station() or dump_station().
- *
- * @STATION_INFO_INACTIVE_TIME: @inactive_time filled
- * @STATION_INFO_RX_BYTES: @rx_bytes filled
- * @STATION_INFO_TX_BYTES: @tx_bytes filled
- * @STATION_INFO_RX_BYTES64: @rx_bytes filled with 64-bit value
- * @STATION_INFO_TX_BYTES64: @tx_bytes filled with 64-bit value
- * @STATION_INFO_LLID: @llid filled
- * @STATION_INFO_PLID: @plid filled
- * @STATION_INFO_PLINK_STATE: @plink_state filled
- * @STATION_INFO_SIGNAL: @signal filled
- * @STATION_INFO_TX_BITRATE: @txrate fields are filled
- *	(tx_bitrate, tx_bitrate_flags and tx_bitrate_mcs)
- * @STATION_INFO_RX_PACKETS: @rx_packets filled with 32-bit value
- * @STATION_INFO_TX_PACKETS: @tx_packets filled with 32-bit value
- * @STATION_INFO_TX_RETRIES: @tx_retries filled
- * @STATION_INFO_TX_FAILED: @tx_failed filled
- * @STATION_INFO_RX_DROP_MISC: @rx_dropped_misc filled
- * @STATION_INFO_SIGNAL_AVG: @signal_avg filled
- * @STATION_INFO_RX_BITRATE: @rxrate fields are filled
- * @STATION_INFO_BSS_PARAM: @bss_param filled
- * @STATION_INFO_CONNECTED_TIME: @connected_time filled
- * @STATION_INFO_ASSOC_REQ_IES: @assoc_req_ies filled
- * @STATION_INFO_STA_FLAGS: @sta_flags filled
- * @STATION_INFO_BEACON_LOSS_COUNT: @beacon_loss_count filled
- * @STATION_INFO_T_OFFSET: @t_offset filled
- * @STATION_INFO_LOCAL_PM: @local_pm filled
- * @STATION_INFO_PEER_PM: @peer_pm filled
- * @STATION_INFO_NONPEER_PM: @nonpeer_pm filled
- * @STATION_INFO_CHAIN_SIGNAL: @chain_signal filled
- * @STATION_INFO_CHAIN_SIGNAL_AVG: @chain_signal_avg filled
- * @STATION_INFO_EXPECTED_THROUGHPUT: @expected_throughput filled
- */
-enum station_info_flags {
-	STATION_INFO_INACTIVE_TIME		= BIT(0),
-	STATION_INFO_RX_BYTES			= BIT(1),
-	STATION_INFO_TX_BYTES			= BIT(2),
-	STATION_INFO_LLID			= BIT(3),
-	STATION_INFO_PLID			= BIT(4),
-	STATION_INFO_PLINK_STATE		= BIT(5),
-	STATION_INFO_SIGNAL			= BIT(6),
-	STATION_INFO_TX_BITRATE			= BIT(7),
-	STATION_INFO_RX_PACKETS			= BIT(8),
-	STATION_INFO_TX_PACKETS			= BIT(9),
-	STATION_INFO_TX_RETRIES			= BIT(10),
-	STATION_INFO_TX_FAILED			= BIT(11),
-	STATION_INFO_RX_DROP_MISC		= BIT(12),
-	STATION_INFO_SIGNAL_AVG			= BIT(13),
-	STATION_INFO_RX_BITRATE			= BIT(14),
-	STATION_INFO_BSS_PARAM			= BIT(15),
-	STATION_INFO_CONNECTED_TIME		= BIT(16),
-	STATION_INFO_ASSOC_REQ_IES		= BIT(17),
-	STATION_INFO_STA_FLAGS			= BIT(18),
-	STATION_INFO_BEACON_LOSS_COUNT		= BIT(19),
-	STATION_INFO_T_OFFSET			= BIT(20),
-	STATION_INFO_LOCAL_PM			= BIT(21),
-	STATION_INFO_PEER_PM			= BIT(22),
-	STATION_INFO_NONPEER_PM			= BIT(23),
-	STATION_INFO_RX_BYTES64			= BIT(24),
-	STATION_INFO_TX_BYTES64			= BIT(25),
-	STATION_INFO_CHAIN_SIGNAL		= BIT(26),
-	STATION_INFO_CHAIN_SIGNAL_AVG		= BIT(27),
-	STATION_INFO_EXPECTED_THROUGHPUT	= BIT(28),
-};
-
 /**
  * enum station_info_rate_flags - bitrate info flags
  *
@@ -1015,7 +946,8 @@ struct sta_bss_parameters {
  *
  * Station information filled by driver for get_station() and dump_station.
  *
- * @filled: bitflag of flags from &enum station_info_flags
+ * @filled: bitflag of flags using the bits of &enum nl80211_sta_info to
+ *	indicate the relevant values in this struct for them
  * @connected_time: time(in secs) since a station is last connected
  * @inactive_time: time since last station activity (tx/rx) in milliseconds
  * @rx_bytes: bytes received from this station
@@ -1094,11 +1026,6 @@ struct station_info {
 	enum nl80211_mesh_power_mode nonpeer_pm;
 
 	u32 expected_throughput;
-
-	/*
-	 * Note: Add a new enum station_info_flags value for each new field and
-	 * use it to check which fields are initialized.
-	 */
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 2f549a253138..e48ca0bbd07b 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2373,6 +2373,8 @@ enum nl80211_sta_bss_param {
  *	Same format as NL80211_STA_INFO_CHAIN_SIGNAL.
  * @NL80211_STA_EXPECTED_THROUGHPUT: expected throughput considering also the
  *	802.11 header (u32, kbps)
+ * @NL80211_STA_INFO_RX_DROP_MISC: RX packets dropped for unspecified reasons
+ *	(u64)
  * @__NL80211_STA_INFO_AFTER_LAST: internal
  * @NL80211_STA_INFO_MAX: highest possible station info attribute
  */
@@ -2405,6 +2407,7 @@ enum nl80211_sta_info {
 	NL80211_STA_INFO_CHAIN_SIGNAL,
 	NL80211_STA_INFO_CHAIN_SIGNAL_AVG,
 	NL80211_STA_INFO_EXPECTED_THROUGHPUT,
+	NL80211_STA_INFO_RX_DROP_MISC,
 
 	/* keep last */
 	__NL80211_STA_INFO_AFTER_LAST,
diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c
index eea742710c0a..52bcea6ad9e8 100644
--- a/net/mac80211/ethtool.c
+++ b/net/mac80211/ethtool.c
@@ -117,16 +117,16 @@ static void ieee80211_get_stats(struct net_device *dev,
 		data[i++] = sta->sta_state;
 
 
-		if (sinfo.filled & STATION_INFO_TX_BITRATE)
+		if (sinfo.filled & BIT(NL80211_STA_INFO_TX_BITRATE))
 			data[i] = 100000 *
 				cfg80211_calculate_bitrate(&sinfo.txrate);
 		i++;
-		if (sinfo.filled & STATION_INFO_RX_BITRATE)
+		if (sinfo.filled & BIT(NL80211_STA_INFO_RX_BITRATE))
 			data[i] = 100000 *
 				cfg80211_calculate_bitrate(&sinfo.rxrate);
 		i++;
 
-		if (sinfo.filled & STATION_INFO_SIGNAL_AVG)
+		if (sinfo.filled & BIT(NL80211_STA_INFO_SIGNAL_AVG))
 			data[i] = (u8)sinfo.signal_avg;
 		i++;
 	} else {
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 967b42eae5c2..64b53b943d98 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1756,51 +1756,51 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 
 	drv_sta_statistics(local, sdata, &sta->sta, sinfo);
 
-	sinfo->filled |= STATION_INFO_INACTIVE_TIME |
-			 STATION_INFO_STA_FLAGS |
-			 STATION_INFO_BSS_PARAM |
-			 STATION_INFO_CONNECTED_TIME |
-			 STATION_INFO_RX_DROP_MISC |
-			 STATION_INFO_BEACON_LOSS_COUNT;
+	sinfo->filled |= BIT(NL80211_STA_INFO_INACTIVE_TIME) |
+			 BIT(NL80211_STA_INFO_STA_FLAGS) |
+			 BIT(NL80211_STA_INFO_BSS_PARAM) |
+			 BIT(NL80211_STA_INFO_CONNECTED_TIME) |
+			 BIT(NL80211_STA_INFO_RX_DROP_MISC) |
+			 BIT(NL80211_STA_INFO_BEACON_LOSS);
 
 	ktime_get_ts(&uptime);
 	sinfo->connected_time = uptime.tv_sec - sta->last_connected;
 	sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx);
 
-	if (!(sinfo->filled & (STATION_INFO_TX_BYTES64 |
-			       STATION_INFO_TX_BYTES))) {
+	if (!(sinfo->filled & (BIT(NL80211_STA_INFO_TX_BYTES64) |
+			       BIT(NL80211_STA_INFO_TX_BYTES)))) {
 		sinfo->tx_bytes = 0;
 		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
 			sinfo->tx_bytes += sta->tx_bytes[ac];
-		sinfo->filled |= STATION_INFO_TX_BYTES64;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_BYTES64);
 	}
 
-	if (!(sinfo->filled & STATION_INFO_TX_PACKETS)) {
+	if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_PACKETS))) {
 		sinfo->tx_packets = 0;
 		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
 			sinfo->tx_packets += sta->tx_packets[ac];
-		sinfo->filled |= STATION_INFO_TX_PACKETS;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_PACKETS);
 	}
 
-	if (!(sinfo->filled & (STATION_INFO_RX_BYTES64 |
-			       STATION_INFO_RX_BYTES))) {
+	if (!(sinfo->filled & (BIT(NL80211_STA_INFO_RX_BYTES64) |
+			       BIT(NL80211_STA_INFO_RX_BYTES)))) {
 		sinfo->rx_bytes = sta->rx_bytes;
-		sinfo->filled |= STATION_INFO_RX_BYTES64;
+		sinfo->filled |= BIT(NL80211_STA_INFO_RX_BYTES64);
 	}
 
-	if (!(sinfo->filled & STATION_INFO_RX_PACKETS)) {
+	if (!(sinfo->filled & BIT(NL80211_STA_INFO_RX_PACKETS))) {
 		sinfo->rx_packets = sta->rx_packets;
-		sinfo->filled |= STATION_INFO_RX_PACKETS;
+		sinfo->filled |= BIT(NL80211_STA_INFO_RX_PACKETS);
 	}
 
-	if (!(sinfo->filled & STATION_INFO_TX_RETRIES)) {
+	if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_RETRIES))) {
 		sinfo->tx_retries = sta->tx_retry_count;
-		sinfo->filled |= STATION_INFO_TX_RETRIES;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_RETRIES);
 	}
 
-	if (!(sinfo->filled & STATION_INFO_TX_FAILED)) {
+	if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_FAILED))) {
 		sinfo->tx_failed = sta->tx_retry_failed;
-		sinfo->filled |= STATION_INFO_TX_FAILED;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_FAILED);
 	}
 
 	sinfo->rx_dropped_misc = sta->rx_dropped;
@@ -1808,22 +1808,22 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 
 	if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) ||
 	    (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) {
-		if (!(sinfo->filled & STATION_INFO_SIGNAL)) {
+		if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL))) {
 			sinfo->signal = (s8)sta->last_signal;
-			sinfo->filled |= STATION_INFO_SIGNAL;
+			sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
 		}
 
-		if (!(sinfo->filled & STATION_INFO_SIGNAL_AVG)) {
+		if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL_AVG))) {
 			sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal);
-			sinfo->filled |= STATION_INFO_SIGNAL_AVG;
+			sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL_AVG);
 		}
 	}
 
 	if (sta->chains &&
-	    !(sinfo->filled & (STATION_INFO_CHAIN_SIGNAL |
-			       STATION_INFO_CHAIN_SIGNAL_AVG))) {
-		sinfo->filled |= STATION_INFO_CHAIN_SIGNAL |
-				 STATION_INFO_CHAIN_SIGNAL_AVG;
+	    !(sinfo->filled & (BIT(NL80211_STA_INFO_CHAIN_SIGNAL) |
+			       BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)))) {
+		sinfo->filled |= BIT(NL80211_STA_INFO_CHAIN_SIGNAL) |
+				 BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG);
 
 		sinfo->chains = sta->chains;
 		for (i = 0; i < ARRAY_SIZE(sinfo->chain_signal); i++) {
@@ -1833,30 +1833,30 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 		}
 	}
 
-	if (!(sinfo->filled & STATION_INFO_TX_BITRATE)) {
+	if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_BITRATE))) {
 		sta_set_rate_info_tx(sta, &sta->last_tx_rate, &sinfo->txrate);
-		sinfo->filled |= STATION_INFO_TX_BITRATE;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE);
 	}
 
-	if (!(sinfo->filled & STATION_INFO_RX_BITRATE)) {
+	if (!(sinfo->filled & BIT(NL80211_STA_INFO_RX_BITRATE))) {
 		sta_set_rate_info_rx(sta, &sinfo->rxrate);
-		sinfo->filled |= STATION_INFO_RX_BITRATE;
+		sinfo->filled |= BIT(NL80211_STA_INFO_RX_BITRATE);
 	}
 
 	if (ieee80211_vif_is_mesh(&sdata->vif)) {
 #ifdef CONFIG_MAC80211_MESH
-		sinfo->filled |= STATION_INFO_LLID |
-				 STATION_INFO_PLID |
-				 STATION_INFO_PLINK_STATE |
-				 STATION_INFO_LOCAL_PM |
-				 STATION_INFO_PEER_PM |
-				 STATION_INFO_NONPEER_PM;
+		sinfo->filled |= BIT(NL80211_STA_INFO_LLID) |
+				 BIT(NL80211_STA_INFO_PLID) |
+				 BIT(NL80211_STA_INFO_PLINK_STATE) |
+				 BIT(NL80211_STA_INFO_LOCAL_PM) |
+				 BIT(NL80211_STA_INFO_PEER_PM) |
+				 BIT(NL80211_STA_INFO_NONPEER_PM);
 
 		sinfo->llid = sta->llid;
 		sinfo->plid = sta->plid;
 		sinfo->plink_state = sta->plink_state;
 		if (test_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN)) {
-			sinfo->filled |= STATION_INFO_T_OFFSET;
+			sinfo->filled |= BIT(NL80211_STA_INFO_T_OFFSET);
 			sinfo->t_offset = sta->t_offset;
 		}
 		sinfo->local_pm = sta->local_pm;
@@ -1905,7 +1905,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 		thr = drv_get_expected_throughput(local, &sta->sta);
 
 	if (thr != 0) {
-		sinfo->filled |= STATION_INFO_EXPECTED_THROUGHPUT;
+		sinfo->filled |= BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT);
 		sinfo->expected_throughput = thr;
 	}
 }
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index a75dc91976d3..68faf8a2aa43 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3671,115 +3671,77 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
 	sinfoattr = nla_nest_start(msg, NL80211_ATTR_STA_INFO);
 	if (!sinfoattr)
 		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_CONNECTED_TIME) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_CONNECTED_TIME,
-			sinfo->connected_time))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_INACTIVE_TIME) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_INACTIVE_TIME,
-			sinfo->inactive_time))
-		goto nla_put_failure;
-	if ((sinfo->filled & (STATION_INFO_RX_BYTES |
-			      STATION_INFO_RX_BYTES64)) &&
+
+#define PUT_SINFO(attr, memb, type) do {				\
+	if (sinfo->filled & BIT(NL80211_STA_INFO_ ## attr) &&		\
+	    nla_put_ ## type(msg, NL80211_STA_INFO_ ## attr,		\
+			     sinfo->memb))				\
+		goto nla_put_failure;					\
+	} while (0)
+
+	PUT_SINFO(CONNECTED_TIME, connected_time, u32);
+	PUT_SINFO(INACTIVE_TIME, inactive_time, u32);
+
+	if (sinfo->filled & (BIT(NL80211_STA_INFO_RX_BYTES) |
+			     BIT(NL80211_STA_INFO_RX_BYTES64)) &&
 	    nla_put_u32(msg, NL80211_STA_INFO_RX_BYTES,
 			(u32)sinfo->rx_bytes))
 		goto nla_put_failure;
-	if ((sinfo->filled & (STATION_INFO_TX_BYTES |
-			      STATION_INFO_TX_BYTES64)) &&
+
+	if (sinfo->filled & (BIT(NL80211_STA_INFO_TX_BYTES) |
+			     BIT(NL80211_STA_INFO_TX_BYTES64)) &&
 	    nla_put_u32(msg, NL80211_STA_INFO_TX_BYTES,
 			(u32)sinfo->tx_bytes))
 		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_RX_BYTES64) &&
-	    nla_put_u64(msg, NL80211_STA_INFO_RX_BYTES64,
-			sinfo->rx_bytes))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_TX_BYTES64) &&
-	    nla_put_u64(msg, NL80211_STA_INFO_TX_BYTES64,
-			sinfo->tx_bytes))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_LLID) &&
-	    nla_put_u16(msg, NL80211_STA_INFO_LLID, sinfo->llid))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_PLID) &&
-	    nla_put_u16(msg, NL80211_STA_INFO_PLID, sinfo->plid))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_PLINK_STATE) &&
-	    nla_put_u8(msg, NL80211_STA_INFO_PLINK_STATE,
-		       sinfo->plink_state))
-		goto nla_put_failure;
+
+	PUT_SINFO(RX_BYTES64, rx_bytes, u64);
+	PUT_SINFO(TX_BYTES64, tx_bytes, u64);
+	PUT_SINFO(LLID, llid, u16);
+	PUT_SINFO(PLID, plid, u16);
+	PUT_SINFO(PLINK_STATE, plink_state, u8);
+
 	switch (rdev->wiphy.signal_type) {
 	case CFG80211_SIGNAL_TYPE_MBM:
-		if ((sinfo->filled & STATION_INFO_SIGNAL) &&
-		    nla_put_u8(msg, NL80211_STA_INFO_SIGNAL,
-			       sinfo->signal))
-			goto nla_put_failure;
-		if ((sinfo->filled & STATION_INFO_SIGNAL_AVG) &&
-		    nla_put_u8(msg, NL80211_STA_INFO_SIGNAL_AVG,
-			       sinfo->signal_avg))
-			goto nla_put_failure;
+		PUT_SINFO(SIGNAL, signal, u8);
+		PUT_SINFO(SIGNAL_AVG, signal_avg, u8);
 		break;
 	default:
 		break;
 	}
-	if (sinfo->filled & STATION_INFO_CHAIN_SIGNAL) {
+	if (sinfo->filled & BIT(NL80211_STA_INFO_CHAIN_SIGNAL)) {
 		if (!nl80211_put_signal(msg, sinfo->chains,
 					sinfo->chain_signal,
 					NL80211_STA_INFO_CHAIN_SIGNAL))
 			goto nla_put_failure;
 	}
-	if (sinfo->filled & STATION_INFO_CHAIN_SIGNAL_AVG) {
+	if (sinfo->filled & BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)) {
 		if (!nl80211_put_signal(msg, sinfo->chains,
 					sinfo->chain_signal_avg,
 					NL80211_STA_INFO_CHAIN_SIGNAL_AVG))
 			goto nla_put_failure;
 	}
-	if (sinfo->filled & STATION_INFO_TX_BITRATE) {
+	if (sinfo->filled & BIT(NL80211_STA_INFO_TX_BITRATE)) {
 		if (!nl80211_put_sta_rate(msg, &sinfo->txrate,
 					  NL80211_STA_INFO_TX_BITRATE))
 			goto nla_put_failure;
 	}
-	if (sinfo->filled & STATION_INFO_RX_BITRATE) {
+	if (sinfo->filled & BIT(NL80211_STA_INFO_RX_BITRATE)) {
 		if (!nl80211_put_sta_rate(msg, &sinfo->rxrate,
 					  NL80211_STA_INFO_RX_BITRATE))
 			goto nla_put_failure;
 	}
-	if ((sinfo->filled & STATION_INFO_RX_PACKETS) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_RX_PACKETS,
-			sinfo->rx_packets))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_TX_PACKETS) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_TX_PACKETS,
-			sinfo->tx_packets))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_TX_RETRIES) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_TX_RETRIES,
-			sinfo->tx_retries))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_TX_FAILED) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_TX_FAILED,
-			sinfo->tx_failed))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_EXPECTED_THROUGHPUT) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_EXPECTED_THROUGHPUT,
-			sinfo->expected_throughput))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_BEACON_LOSS_COUNT) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_BEACON_LOSS,
-			sinfo->beacon_loss_count))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_LOCAL_PM) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_LOCAL_PM,
-			sinfo->local_pm))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_PEER_PM) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_PEER_PM,
-			sinfo->peer_pm))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_NONPEER_PM) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_NONPEER_PM,
-			sinfo->nonpeer_pm))
-		goto nla_put_failure;
-	if (sinfo->filled & STATION_INFO_BSS_PARAM) {
+
+	PUT_SINFO(RX_PACKETS, rx_packets, u32);
+	PUT_SINFO(TX_PACKETS, tx_packets, u32);
+	PUT_SINFO(TX_RETRIES, tx_retries, u32);
+	PUT_SINFO(TX_FAILED, tx_failed, u32);
+	PUT_SINFO(EXPECTED_THROUGHPUT, expected_throughput, u32);
+	PUT_SINFO(BEACON_LOSS, beacon_loss_count, u32);
+	PUT_SINFO(LOCAL_PM, local_pm, u32);
+	PUT_SINFO(PEER_PM, peer_pm, u32);
+	PUT_SINFO(NONPEER_PM, nonpeer_pm, u32);
+
+	if (sinfo->filled & BIT(NL80211_STA_INFO_BSS_PARAM)) {
 		bss_param = nla_nest_start(msg, NL80211_STA_INFO_BSS_PARAM);
 		if (!bss_param)
 			goto nla_put_failure;
@@ -3798,18 +3760,19 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
 
 		nla_nest_end(msg, bss_param);
 	}
-	if ((sinfo->filled & STATION_INFO_STA_FLAGS) &&
+	if ((sinfo->filled & BIT(NL80211_STA_INFO_STA_FLAGS)) &&
 	    nla_put(msg, NL80211_STA_INFO_STA_FLAGS,
 		    sizeof(struct nl80211_sta_flag_update),
 		    &sinfo->sta_flags))
 		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_T_OFFSET) &&
-		nla_put_u64(msg, NL80211_STA_INFO_T_OFFSET,
-			    sinfo->t_offset))
-		goto nla_put_failure;
+
+	PUT_SINFO(T_OFFSET, t_offset, u64);
+	PUT_SINFO(RX_DROP_MISC, rx_dropped_misc, u64);
+
+#undef PUT_SINFO
 	nla_nest_end(msg, sinfoattr);
 
-	if ((sinfo->filled & STATION_INFO_ASSOC_REQ_IES) &&
+	if (sinfo->assoc_req_ies_len &&
 	    nla_put(msg, NL80211_ATTR_IE, sinfo->assoc_req_ies_len,
 		    sinfo->assoc_req_ies))
 		goto nla_put_failure;
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 0f47948c572f..5b24d39d7903 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1300,7 +1300,7 @@ static int cfg80211_wext_giwrate(struct net_device *dev,
 	if (err)
 		return err;
 
-	if (!(sinfo.filled & STATION_INFO_TX_BITRATE))
+	if (!(sinfo.filled & BIT(NL80211_STA_INFO_TX_BITRATE)))
 		return -EOPNOTSUPP;
 
 	rate->value = 100000 * cfg80211_calculate_bitrate(&sinfo.txrate);
@@ -1340,7 +1340,7 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
 
 	switch (rdev->wiphy.signal_type) {
 	case CFG80211_SIGNAL_TYPE_MBM:
-		if (sinfo.filled & STATION_INFO_SIGNAL) {
+		if (sinfo.filled & BIT(NL80211_STA_INFO_SIGNAL)) {
 			int sig = sinfo.signal;
 			wstats.qual.updated |= IW_QUAL_LEVEL_UPDATED;
 			wstats.qual.updated |= IW_QUAL_QUAL_UPDATED;
@@ -1354,7 +1354,7 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
 			break;
 		}
 	case CFG80211_SIGNAL_TYPE_UNSPEC:
-		if (sinfo.filled & STATION_INFO_SIGNAL) {
+		if (sinfo.filled & BIT(NL80211_STA_INFO_SIGNAL)) {
 			wstats.qual.updated |= IW_QUAL_LEVEL_UPDATED;
 			wstats.qual.updated |= IW_QUAL_QUAL_UPDATED;
 			wstats.qual.level = sinfo.signal;
@@ -1367,9 +1367,9 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
 	}
 
 	wstats.qual.updated |= IW_QUAL_NOISE_INVALID;
-	if (sinfo.filled & STATION_INFO_RX_DROP_MISC)
+	if (sinfo.filled & BIT(NL80211_STA_INFO_RX_DROP_MISC))
 		wstats.discard.misc = sinfo.rx_dropped_misc;
-	if (sinfo.filled & STATION_INFO_TX_FAILED)
+	if (sinfo.filled & BIT(NL80211_STA_INFO_TX_FAILED))
 		wstats.discard.retries = sinfo.tx_failed;
 
 	return &wstats;
-- 
cgit v1.2.3


From a76b1942a10293a94edf3c93c23a6231b63532f5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 17 Nov 2014 14:12:22 +0100
Subject: cfg80211: add nl80211 beacon-only statistics

Add these two values:
 * BEACON_RX: number of beacons received from this peer
 * BEACON_SIGNAL_AVG: signal strength average for beacons only

These can then be used for Android Lollipop's statistics request.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 6 ++++++
 include/uapi/linux/nl80211.h | 5 +++++
 net/wireless/nl80211.c       | 2 ++
 3 files changed, 13 insertions(+)

(limited to 'net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 91c133626c32..ef26ce16b058 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -987,6 +987,9 @@ struct sta_bss_parameters {
  * @nonpeer_pm: non-peer mesh STA power save mode
  * @expected_throughput: expected throughput in kbps (including 802.11 headers)
  *	towards this station.
+ * @rx_beacon: number of beacons received from this peer
+ * @rx_beacon_signal_avg: signal strength average (in dBm) for beacons received
+ *	from this peer
  */
 struct station_info {
 	u32 filled;
@@ -1026,6 +1029,9 @@ struct station_info {
 	enum nl80211_mesh_power_mode nonpeer_pm;
 
 	u32 expected_throughput;
+
+	u64 rx_beacon;
+	u8 rx_beacon_signal_avg;
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index e48ca0bbd07b..0c3d341c0aeb 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2375,6 +2375,9 @@ enum nl80211_sta_bss_param {
  *	802.11 header (u32, kbps)
  * @NL80211_STA_INFO_RX_DROP_MISC: RX packets dropped for unspecified reasons
  *	(u64)
+ * @NL80211_STA_INFO_BEACON_RX: number of beacons received from this peer (u64)
+ * @NL80211_STA_INFO_BEACON_SIGNAL_AVG: signal strength average
+ *	for beacons only (u8, dBm)
  * @__NL80211_STA_INFO_AFTER_LAST: internal
  * @NL80211_STA_INFO_MAX: highest possible station info attribute
  */
@@ -2408,6 +2411,8 @@ enum nl80211_sta_info {
 	NL80211_STA_INFO_CHAIN_SIGNAL_AVG,
 	NL80211_STA_INFO_EXPECTED_THROUGHPUT,
 	NL80211_STA_INFO_RX_DROP_MISC,
+	NL80211_STA_INFO_BEACON_RX,
+	NL80211_STA_INFO_BEACON_SIGNAL_AVG,
 
 	/* keep last */
 	__NL80211_STA_INFO_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 68faf8a2aa43..42b968a1f994 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3768,6 +3768,8 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
 
 	PUT_SINFO(T_OFFSET, t_offset, u64);
 	PUT_SINFO(RX_DROP_MISC, rx_dropped_misc, u64);
+	PUT_SINFO(BEACON_RX, rx_beacon, u64);
+	PUT_SINFO(BEACON_SIGNAL_AVG, rx_beacon_signal_avg, u8);
 
 #undef PUT_SINFO
 	nla_nest_end(msg, sinfoattr);
-- 
cgit v1.2.3


From 6de39808cf1dd7b02bf42e7d8695d80f5eaf645d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 19 Dec 2014 12:34:00 +0100
Subject: nl80211: support per-TID station statistics

The base for the current statistics is pretty mixed up, support
exporting RX/TX statistics for MSDUs per TID. This (currently)
covers received MSDUs, transmitted MSDUs and retries/failures
thereof.

Doing it per TID for MSDUs makes more sense than say only per AC
because it's symmetric - we could export per-AC statistics for all
frames (which AC we used for transmission can be determined also
for management frames) but per TID is better and usually data
frames are really the ones we care about. Also, on RX we can't
determine the AC - but we do know the TID for any QoS MPDU we
received.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 21 +++++++++++++++++++++
 include/uapi/linux/nl80211.h | 31 +++++++++++++++++++++++++++++++
 net/wireless/nl80211.c       | 41 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 93 insertions(+)

(limited to 'net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 95420fb61600..197735788f18 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -939,6 +939,24 @@ struct sta_bss_parameters {
 	u16 beacon_interval;
 };
 
+/**
+ * struct cfg80211_tid_stats - per-TID statistics
+ * @filled: bitmap of flags using the bits of &enum nl80211_tid_stats to
+ *	indicate the relevant values in this struct are filled
+ * @rx_msdu: number of received MSDUs
+ * @tx_msdu: number of (attempted) transmitted MSDUs
+ * @tx_msdu_retries: number of retries (not counting the first) for
+ *	transmitted MSDUs
+ * @tx_msdu_failed: number of failed transmitted MSDUs
+ */
+struct cfg80211_tid_stats {
+	u32 filled;
+	u64 rx_msdu;
+	u64 tx_msdu;
+	u64 tx_msdu_retries;
+	u64 tx_msdu_failed;
+};
+
 #define IEEE80211_MAX_CHAINS	4
 
 /**
@@ -990,6 +1008,8 @@ struct sta_bss_parameters {
  * @rx_beacon: number of beacons received from this peer
  * @rx_beacon_signal_avg: signal strength average (in dBm) for beacons received
  *	from this peer
+ * @pertid: per-TID statistics, see &struct cfg80211_tid_stats, using the last
+ *	(IEEE80211_NUM_TIDS) index for MSDUs not encapsulated in QoS-MPDUs.
  */
 struct station_info {
 	u32 filled;
@@ -1032,6 +1052,7 @@ struct station_info {
 
 	u64 rx_beacon;
 	u8 rx_beacon_signal_avg;
+	struct cfg80211_tid_stats pertid[IEEE80211_NUM_TIDS + 1];
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index b0fb5d598250..a963d4824c51 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2384,6 +2384,11 @@ enum nl80211_sta_bss_param {
  * @NL80211_STA_INFO_BEACON_RX: number of beacons received from this peer (u64)
  * @NL80211_STA_INFO_BEACON_SIGNAL_AVG: signal strength average
  *	for beacons only (u8, dBm)
+ * @NL80211_STA_INFO_TID_STATS: per-TID statistics (see &enum nl80211_tid_stats)
+ *	This is a nested attribute where each the inner attribute number is the
+ *	TID+1 and the special TID 16 (i.e. value 17) is used for non-QoS frames;
+ *	each one of those is again nested with &enum nl80211_tid_stats
+ *	attributes carrying the actual values.
  * @__NL80211_STA_INFO_AFTER_LAST: internal
  * @NL80211_STA_INFO_MAX: highest possible station info attribute
  */
@@ -2419,12 +2424,38 @@ enum nl80211_sta_info {
 	NL80211_STA_INFO_RX_DROP_MISC,
 	NL80211_STA_INFO_BEACON_RX,
 	NL80211_STA_INFO_BEACON_SIGNAL_AVG,
+	NL80211_STA_INFO_TID_STATS,
 
 	/* keep last */
 	__NL80211_STA_INFO_AFTER_LAST,
 	NL80211_STA_INFO_MAX = __NL80211_STA_INFO_AFTER_LAST - 1
 };
 
+/**
+ * enum nl80211_tid_stats - per TID statistics attributes
+ * @__NL80211_TID_STATS_INVALID: attribute number 0 is reserved
+ * @NL80211_TID_STATS_RX_MSDU: number of MSDUs received (u64)
+ * @NL80211_TID_STATS_TX_MSDU: number of MSDUs transmitted (or
+ *	attempted to transmit; u64)
+ * @NL80211_TID_STATS_TX_MSDU_RETRIES: number of retries for
+ *	transmitted MSDUs (not counting the first attempt; u64)
+ * @NL80211_TID_STATS_TX_MSDU_FAILED: number of failed transmitted
+ *	MSDUs (u64)
+ * @NUM_NL80211_TID_STATS: number of attributes here
+ * @NL80211_TID_STATS_MAX: highest numbered attribute here
+ */
+enum nl80211_tid_stats {
+	__NL80211_TID_STATS_INVALID,
+	NL80211_TID_STATS_RX_MSDU,
+	NL80211_TID_STATS_TX_MSDU,
+	NL80211_TID_STATS_TX_MSDU_RETRIES,
+	NL80211_TID_STATS_TX_MSDU_FAILED,
+
+	/* keep last */
+	NUM_NL80211_TID_STATS,
+	NL80211_TID_STATS_MAX = NUM_NL80211_TID_STATS - 1
+};
+
 /**
  * enum nl80211_mpath_flags - nl80211 mesh path flags
  *
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 42b968a1f994..7c2ce26e22de 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3772,6 +3772,47 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
 	PUT_SINFO(BEACON_SIGNAL_AVG, rx_beacon_signal_avg, u8);
 
 #undef PUT_SINFO
+
+	if (sinfo->filled & BIT(NL80211_STA_INFO_TID_STATS)) {
+		struct nlattr *tidsattr;
+		int tid;
+
+		tidsattr = nla_nest_start(msg, NL80211_STA_INFO_TID_STATS);
+		if (!tidsattr)
+			goto nla_put_failure;
+
+		for (tid = 0; tid < IEEE80211_NUM_TIDS + 1; tid++) {
+			struct cfg80211_tid_stats *tidstats;
+			struct nlattr *tidattr;
+
+			tidstats = &sinfo->pertid[tid];
+
+			if (!tidstats->filled)
+				continue;
+
+			tidattr = nla_nest_start(msg, tid + 1);
+			if (!tidattr)
+				goto nla_put_failure;
+
+#define PUT_TIDVAL(attr, memb, type) do {				\
+	if (tidstats->filled & BIT(NL80211_TID_STATS_ ## attr) &&	\
+	    nla_put_ ## type(msg, NL80211_TID_STATS_ ## attr,		\
+			     tidstats->memb))				\
+		goto nla_put_failure;					\
+	} while (0)
+
+			PUT_TIDVAL(RX_MSDU, rx_msdu, u64);
+			PUT_TIDVAL(TX_MSDU, tx_msdu, u64);
+			PUT_TIDVAL(TX_MSDU_RETRIES, tx_msdu_retries, u64);
+			PUT_TIDVAL(TX_MSDU_FAILED, tx_msdu_failed, u64);
+
+#undef PUT_TIDVAL
+			nla_nest_end(msg, tidattr);
+		}
+
+		nla_nest_end(msg, tidsattr);
+	}
+
 	nla_nest_end(msg, sinfoattr);
 
 	if (sinfo->assoc_req_ies_len &&
-- 
cgit v1.2.3


From 79c892b85027d5074dfa670dd451c14ee649fb88 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 21 Nov 2014 14:26:31 +0100
Subject: mac80211: provide per-TID RX/TX MSDU counters

Implement the new counters cfg80211 can now advertise to userspace.
The TX code is in the sequence number handler, which is a bit odd,
but that place already knows the TID and frame type, so it was
easiest and least impact there.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c       |  9 +++++++++
 net/mac80211/sta_info.c | 31 +++++++++++++++++++++++++++++++
 net/mac80211/sta_info.h | 12 ++++++++++++
 net/mac80211/status.c   | 15 +++++++++++++--
 net/mac80211/tx.c       |  3 +++
 5 files changed, 68 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index fa5d870655a9..3a1a3ba40bd8 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2314,6 +2314,15 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
 	if (unlikely(!ieee80211_is_data_present(hdr->frame_control)))
 		return RX_DROP_MONITOR;
 
+	if (rx->sta) {
+		/* The security index has the same property as needed
+		 * for the rx_msdu field, i.e. it is IEEE80211_NUM_TIDS
+		 * for non-QoS-data frames. Here we know it's a data
+		 * frame, so count MSDUs.
+		 */
+		rx->sta->rx_msdu[rx->security_idx]++;
+	}
+
 	/*
 	 * Send unexpected-4addr-frame event to hostapd. For older versions,
 	 * also drop the frame to cooked monitor interfaces.
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 64b53b943d98..dc352fcdd469 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1843,6 +1843,37 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 		sinfo->filled |= BIT(NL80211_STA_INFO_RX_BITRATE);
 	}
 
+	sinfo->filled |= BIT(NL80211_STA_INFO_TID_STATS);
+	for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) {
+		struct cfg80211_tid_stats *tidstats = &sinfo->pertid[i];
+
+		if (!(tidstats->filled & BIT(NL80211_TID_STATS_RX_MSDU))) {
+			tidstats->filled |= BIT(NL80211_TID_STATS_RX_MSDU);
+			tidstats->rx_msdu = sta->rx_msdu[i];
+		}
+
+		if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU))) {
+			tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU);
+			tidstats->tx_msdu = sta->tx_msdu[i];
+		}
+
+		if (!(tidstats->filled &
+				BIT(NL80211_TID_STATS_TX_MSDU_RETRIES)) &&
+		    local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) {
+			tidstats->filled |=
+				BIT(NL80211_TID_STATS_TX_MSDU_RETRIES);
+			tidstats->tx_msdu_retries = sta->tx_msdu_retries[i];
+		}
+
+		if (!(tidstats->filled &
+				BIT(NL80211_TID_STATS_TX_MSDU_FAILED)) &&
+		    local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) {
+			tidstats->filled |=
+				BIT(NL80211_TID_STATS_TX_MSDU_FAILED);
+			tidstats->tx_msdu_failed = sta->tx_msdu_failed[i];
+		}
+	}
+
 	if (ieee80211_vif_is_mesh(&sdata->vif)) {
 #ifdef CONFIG_MAC80211_MESH
 		sinfo->filled |= BIT(NL80211_STA_INFO_LLID) |
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 4f052bb2a5ad..925e68fe64c7 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -346,6 +346,14 @@ struct ieee80211_tx_latency_stat {
  * @cipher_scheme: optional cipher scheme for this station
  * @last_tdls_pkt_time: holds the time in jiffies of last TDLS pkt ACKed
  * @reserved_tid: reserved TID (if any, otherwise IEEE80211_TID_UNRESERVED)
+ * @tx_msdu: MSDUs transmitted to this station, using IEEE80211_NUM_TID
+ *	entry for non-QoS frames
+ * @tx_msdu_retries: MSDU retries for transmissions to to this station,
+ *	using IEEE80211_NUM_TID entry for non-QoS frames
+ * @tx_msdu_failed: MSDU failures for transmissions to to this station,
+ *	using IEEE80211_NUM_TID entry for non-QoS frames
+ * @rx_msdu: MSDUs received from this station, using IEEE80211_NUM_TID
+ *	entry for non-QoS frames
  */
 struct sta_info {
 	/* General information, mostly static */
@@ -416,6 +424,10 @@ struct sta_info {
 	u32 last_rx_rate_vht_flag;
 	u8 last_rx_rate_vht_nss;
 	u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1];
+	u64 tx_msdu[IEEE80211_NUM_TIDS + 1];
+	u64 tx_msdu_retries[IEEE80211_NUM_TIDS + 1];
+	u64 tx_msdu_failed[IEEE80211_NUM_TIDS + 1];
+	u64 rx_msdu[IEEE80211_NUM_TIDS + 1];
 
 	/*
 	 * Aggregation information, locked with lock.
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 7d4e9307164c..788707f05516 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -730,6 +730,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 	struct ieee80211_bar *bar;
 	int rtap_len;
 	int shift = 0;
+	int tid = IEEE80211_NUM_TIDS;;
 
 	rates_idx = ieee80211_tx_get_rates(hw, info, &retry_count);
 
@@ -773,7 +774,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 
 		if ((info->flags & IEEE80211_TX_STAT_AMPDU_NO_BACK) &&
 		    (ieee80211_is_data_qos(fc))) {
-			u16 tid, ssn;
+			u16 ssn;
 			u8 *qc;
 
 			qc = ieee80211_get_qos_ctl(hdr);
@@ -782,10 +783,14 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 						& IEEE80211_SCTL_SEQ);
 			ieee80211_send_bar(&sta->sdata->vif, hdr->addr1,
 					   tid, ssn);
+		} else if (ieee80211_is_data_qos(fc)) {
+			u8 *qc = ieee80211_get_qos_ctl(hdr);
+
+			tid = qc[0] & 0xf;
 		}
 
 		if (!acked && ieee80211_is_back_req(fc)) {
-			u16 tid, control;
+			u16 control;
 
 			/*
 			 * BAR failed, store the last SSN and retry sending
@@ -813,6 +818,12 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 			if (!acked)
 				sta->tx_retry_failed++;
 			sta->tx_retry_count += retry_count;
+
+			if (ieee80211_is_data_present(fc)) {
+				if (!acked)
+					sta->tx_msdu_failed[tid]++;
+				sta->tx_msdu_retries[tid] += retry_count;
+			}
 		}
 
 		rate_control_tx_status(local, sband, sta, skb);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 058686a721a1..da7f352a2b16 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -815,6 +815,8 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
 		/* for pure STA mode without beacons, we can do it */
 		hdr->seq_ctrl = cpu_to_le16(tx->sdata->sequence_number);
 		tx->sdata->sequence_number += 0x10;
+		if (tx->sta)
+			tx->sta->tx_msdu[IEEE80211_NUM_TIDS]++;
 		return TX_CONTINUE;
 	}
 
@@ -831,6 +833,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
 	qc = ieee80211_get_qos_ctl(hdr);
 	tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
 	seq = &tx->sta->tid_seq[tid];
+	tx->sta->tx_msdu[tid]++;
 
 	hdr->seq_ctrl = cpu_to_le16(*seq);
 
-- 
cgit v1.2.3


From bc6efeeeb5a2fa968532b9d666e8b7f823b1940f Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Thu, 8 Jan 2015 11:30:10 +0100
Subject: ieee802154: 6lowpan: fix Makefile entry

Since commit ea81ac2e7050798109356150ea16e71622a5c329 ("ieee802154:
create 6lowpan sub-directory") we have a subdirectory for the ieee802154
6lowpan implementation. This commit also moves the Kconfig entry inside
of net/ieee802154/6lowpan/ and forgot to rename the Makefile entry from
obj-$(CONFIG_IEEE802154_6LOWPAN) to obj-y and handle the
obj-$(CONFIG_IEEE802154_6LOWPAN) inside the created 6lowpan directory.
This will occur that the ieee802154_6lowpan can't be build.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/ieee802154/6lowpan/Makefile | 2 +-
 net/ieee802154/Makefile         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ieee802154/6lowpan/Makefile b/net/ieee802154/6lowpan/Makefile
index 2f1b75a49f4d..6bfb270a81a6 100644
--- a/net/ieee802154/6lowpan/Makefile
+++ b/net/ieee802154/6lowpan/Makefile
@@ -1,3 +1,3 @@
-obj-y += ieee802154_6lowpan.o
+obj-$(CONFIG_IEEE802154_6LOWPAN) += ieee802154_6lowpan.o
 
 ieee802154_6lowpan-y := core.o rx.o reassembly.o tx.o
diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile
index 36533978b5ef..05dab2957cd4 100644
--- a/net/ieee802154/Makefile
+++ b/net/ieee802154/Makefile
@@ -1,6 +1,6 @@
 obj-$(CONFIG_IEEE802154) += ieee802154.o
 obj-$(CONFIG_IEEE802154_SOCKET) += ieee802154_socket.o
-obj-$(CONFIG_IEEE802154_6LOWPAN) += 6lowpan/
+obj-y += 6lowpan/
 
 ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o core.o \
                 header_ops.o sysfs.o nl802154.o
-- 
cgit v1.2.3


From 07f6c4bc048a7a8939c68a668bf77474890794c5 Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Wed, 7 Jan 2015 13:41:58 +0800
Subject: tipc: convert tipc reference table to use generic rhashtable

As tipc reference table is statically allocated, its memory size
requested on stack initialization stage is quite big even if the
maximum port number is just restricted to 8191 currently, however,
the number already becomes insufficient in practice. But if the
maximum ports is allowed to its theory value - 2^32, its consumed
memory size will reach a ridiculously unacceptable value. Apart from
this, heavy tipc users spend a considerable amount of time in
tipc_sk_get() due to the read-lock on ref_table_lock.

If tipc reference table is converted with generic rhashtable, above
mentioned both disadvantages would be resolved respectively: making
use of the new resizable hash table can avoid locking on the lookup;
smaller memory size is required at initial stage, for example, 256
hash bucket slots are requested at the beginning phase instead of
allocating the entire 8191 slots in old mode. The hash table will
grow if entries exceeds 75% of table size up to a total table size
of 1M, and it will automatically shrink if usage falls below 30%,
but the minimum table size is allowed down to 256.

Also converts ref_table_lock to a separate mutex to protect hash table
mutations on write side. Lastly defers the release of the socket
reference using call_rcu() to allow using an RCU read-side protected
call to rhashtable_lookup().

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Erik Hugne <erik.hugne@ericsson.com>
Cc: Thomas Graf <tgraf@suug.ch>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/Kconfig  |  12 --
 net/tipc/config.c |  24 +--
 net/tipc/core.c   |  10 +-
 net/tipc/core.h   |   3 -
 net/tipc/socket.c | 480 +++++++++++++++++++-----------------------------------
 net/tipc/socket.h |   4 +-
 6 files changed, 180 insertions(+), 353 deletions(-)

(limited to 'net')

diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index c890848f9d56..91c8a8e031db 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -20,18 +20,6 @@ menuconfig TIPC
 
 	  If in doubt, say N.
 
-config TIPC_PORTS
-	int "Maximum number of ports in a node"
-	depends on TIPC
-	range 127 65535
-	default "8191"
-	help
-	  Specifies how many ports can be supported by a node.
-	  Can range from 127 to 65535 ports; default is 8191.
-
-	  Setting this to a smaller value saves some memory,
-	  setting it to higher allows for more ports.
-
 config TIPC_MEDIA_IB
 	bool "InfiniBand media type support"
 	depends on TIPC && INFINIBAND_IPOIB
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 876f4c6a2631..0b3a90ecab6d 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -183,22 +183,6 @@ static struct sk_buff *cfg_set_own_addr(void)
 	return tipc_cfg_reply_error_string("cannot change to network mode");
 }
 
-static struct sk_buff *cfg_set_max_ports(void)
-{
-	u32 value;
-
-	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
-		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-	value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
-	if (value == tipc_max_ports)
-		return tipc_cfg_reply_none();
-	if (value < 127 || value > 65535)
-		return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
-						   " (max ports must be 127-65535)");
-	return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
-		" (cannot change max ports while TIPC is active)");
-}
-
 static struct sk_buff *cfg_set_netid(void)
 {
 	u32 value;
@@ -285,15 +269,9 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 	case TIPC_CMD_SET_NODE_ADDR:
 		rep_tlv_buf = cfg_set_own_addr();
 		break;
-	case TIPC_CMD_SET_MAX_PORTS:
-		rep_tlv_buf = cfg_set_max_ports();
-		break;
 	case TIPC_CMD_SET_NETID:
 		rep_tlv_buf = cfg_set_netid();
 		break;
-	case TIPC_CMD_GET_MAX_PORTS:
-		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_ports);
-		break;
 	case TIPC_CMD_GET_NETID:
 		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_net_id);
 		break;
@@ -317,6 +295,8 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 	case TIPC_CMD_SET_REMOTE_MNG:
 	case TIPC_CMD_GET_REMOTE_MNG:
 	case TIPC_CMD_DUMP_LOG:
+	case TIPC_CMD_SET_MAX_PORTS:
+	case TIPC_CMD_GET_MAX_PORTS:
 		rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
 							  " (obsolete command)");
 		break;
diff --git a/net/tipc/core.c b/net/tipc/core.c
index a5737b8407dd..71b2ada0f5ab 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -34,6 +34,8 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include "core.h"
 #include "name_table.h"
 #include "subscr.h"
@@ -47,7 +49,6 @@ int tipc_random __read_mostly;
 
 /* configurable TIPC parameters */
 u32 tipc_own_addr __read_mostly;
-int tipc_max_ports __read_mostly;
 int tipc_net_id __read_mostly;
 int sysctl_tipc_rmem[3] __read_mostly;	/* min/default/max */
 
@@ -84,9 +85,9 @@ static void tipc_core_stop(void)
 	tipc_netlink_stop();
 	tipc_subscr_stop();
 	tipc_nametbl_stop();
-	tipc_sk_ref_table_stop();
 	tipc_socket_stop();
 	tipc_unregister_sysctl();
+	tipc_sk_rht_destroy();
 }
 
 /**
@@ -98,7 +99,7 @@ static int tipc_core_start(void)
 
 	get_random_bytes(&tipc_random, sizeof(tipc_random));
 
-	err = tipc_sk_ref_table_init(tipc_max_ports, tipc_random);
+	err = tipc_sk_rht_init();
 	if (err)
 		goto out_reftbl;
 
@@ -138,7 +139,7 @@ out_socket:
 out_netlink:
 	tipc_nametbl_stop();
 out_nametbl:
-	tipc_sk_ref_table_stop();
+	tipc_sk_rht_destroy();
 out_reftbl:
 	return err;
 }
@@ -150,7 +151,6 @@ static int __init tipc_init(void)
 	pr_info("Activated (version " TIPC_MOD_VER ")\n");
 
 	tipc_own_addr = 0;
-	tipc_max_ports = CONFIG_TIPC_PORTS;
 	tipc_net_id = 4711;
 
 	sysctl_tipc_rmem[0] = TIPC_CONN_OVERLOAD_LIMIT >> 4 <<
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 84602137ce20..56fe4229fc5e 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -37,8 +37,6 @@
 #ifndef _TIPC_CORE_H
 #define _TIPC_CORE_H
 
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
 #include <linux/tipc.h>
 #include <linux/tipc_config.h>
 #include <linux/tipc_netlink.h>
@@ -79,7 +77,6 @@ int tipc_snprintf(char *buf, int len, const char *fmt, ...);
  * Global configuration variables
  */
 extern u32 tipc_own_addr __read_mostly;
-extern int tipc_max_ports __read_mostly;
 extern int tipc_net_id __read_mostly;
 extern int sysctl_tipc_rmem[3] __read_mostly;
 extern int sysctl_tipc_named_timeout __read_mostly;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 4731cad99d1c..701f31bbbbfb 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -34,22 +34,25 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <linux/rhashtable.h>
+#include <linux/jhash.h>
 #include "core.h"
 #include "name_table.h"
 #include "node.h"
 #include "link.h"
-#include <linux/export.h>
 #include "config.h"
 #include "socket.h"
 
-#define SS_LISTENING	-1	/* socket is listening */
-#define SS_READY	-2	/* socket is connectionless */
+#define SS_LISTENING		-1	/* socket is listening */
+#define SS_READY		-2	/* socket is connectionless */
 
-#define CONN_TIMEOUT_DEFAULT  8000	/* default connect timeout = 8s */
-#define CONN_PROBING_INTERVAL 3600000	/* [ms] => 1 h */
-#define TIPC_FWD_MSG	      1
-#define TIPC_CONN_OK          0
-#define TIPC_CONN_PROBING     1
+#define CONN_TIMEOUT_DEFAULT	8000	/* default connect timeout = 8s */
+#define CONN_PROBING_INTERVAL	3600000	/* [ms] => 1 h */
+#define TIPC_FWD_MSG		1
+#define TIPC_CONN_OK		0
+#define TIPC_CONN_PROBING	1
+#define TIPC_MAX_PORT		0xffffffff
+#define TIPC_MIN_PORT		1
 
 /**
  * struct tipc_sock - TIPC socket structure
@@ -59,7 +62,7 @@
  * @conn_instance: TIPC instance used when connection was established
  * @published: non-zero if port has one or more associated names
  * @max_pkt: maximum packet size "hint" used when building messages sent by port
- * @ref: unique reference to port in TIPC object registry
+ * @portid: unique port identity in TIPC socket hash table
  * @phdr: preformatted message header used when sending messages
  * @port_list: adjacent ports in TIPC's global list of ports
  * @publications: list of publications for port
@@ -74,6 +77,8 @@
  * @link_cong: non-zero if owner must sleep because of link congestion
  * @sent_unacked: # messages sent by socket, and not yet acked by peer
  * @rcv_unacked: # messages read by user, but not yet acked back to peer
+ * @node: hash table node
+ * @rcu: rcu struct for tipc_sock
  */
 struct tipc_sock {
 	struct sock sk;
@@ -82,7 +87,7 @@ struct tipc_sock {
 	u32 conn_instance;
 	int published;
 	u32 max_pkt;
-	u32 ref;
+	u32 portid;
 	struct tipc_msg phdr;
 	struct list_head sock_list;
 	struct list_head publications;
@@ -95,6 +100,8 @@ struct tipc_sock {
 	bool link_cong;
 	uint sent_unacked;
 	uint rcv_unacked;
+	struct rhash_head node;
+	struct rcu_head rcu;
 };
 
 static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb);
@@ -103,16 +110,14 @@ static void tipc_write_space(struct sock *sk);
 static int tipc_release(struct socket *sock);
 static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags);
 static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p);
-static void tipc_sk_timeout(unsigned long ref);
+static void tipc_sk_timeout(unsigned long portid);
 static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
 			   struct tipc_name_seq const *seq);
 static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
 			    struct tipc_name_seq const *seq);
-static u32 tipc_sk_ref_acquire(struct tipc_sock *tsk);
-static void tipc_sk_ref_discard(u32 ref);
-static struct tipc_sock *tipc_sk_get(u32 ref);
-static struct tipc_sock *tipc_sk_get_next(u32 *ref);
-static void tipc_sk_put(struct tipc_sock *tsk);
+static struct tipc_sock *tipc_sk_lookup(u32 portid);
+static int tipc_sk_insert(struct tipc_sock *tsk);
+static void tipc_sk_remove(struct tipc_sock *tsk);
 
 static const struct proto_ops packet_ops;
 static const struct proto_ops stream_ops;
@@ -174,6 +179,9 @@ static const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = {
  *   - port reference
  */
 
+/* Protects tipc socket hash table mutations */
+static struct rhashtable tipc_sk_rht;
+
 static u32 tsk_peer_node(struct tipc_sock *tsk)
 {
 	return msg_destnode(&tsk->phdr);
@@ -305,7 +313,6 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
 	struct sock *sk;
 	struct tipc_sock *tsk;
 	struct tipc_msg *msg;
-	u32 ref;
 
 	/* Validate arguments */
 	if (unlikely(protocol != 0))
@@ -339,24 +346,22 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
 		return -ENOMEM;
 
 	tsk = tipc_sk(sk);
-	ref = tipc_sk_ref_acquire(tsk);
-	if (!ref) {
-		pr_warn("Socket create failed; reference table exhausted\n");
-		return -ENOMEM;
-	}
 	tsk->max_pkt = MAX_PKT_DEFAULT;
-	tsk->ref = ref;
 	INIT_LIST_HEAD(&tsk->publications);
 	msg = &tsk->phdr;
 	tipc_msg_init(msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG,
 		      NAMED_H_SIZE, 0);
-	msg_set_origport(msg, ref);
 
 	/* Finish initializing socket data structures */
 	sock->ops = ops;
 	sock->state = state;
 	sock_init_data(sock, sk);
-	k_init_timer(&tsk->timer, (Handler)tipc_sk_timeout, ref);
+	if (tipc_sk_insert(tsk)) {
+		pr_warn("Socket create failed; port numbrer exhausted\n");
+		return -EINVAL;
+	}
+	msg_set_origport(msg, tsk->portid);
+	k_init_timer(&tsk->timer, (Handler)tipc_sk_timeout, tsk->portid);
 	sk->sk_backlog_rcv = tipc_backlog_rcv;
 	sk->sk_rcvbuf = sysctl_tipc_rmem[1];
 	sk->sk_data_ready = tipc_data_ready;
@@ -442,6 +447,13 @@ int tipc_sock_accept_local(struct socket *sock, struct socket **newsock,
 	return ret;
 }
 
+static void tipc_sk_callback(struct rcu_head *head)
+{
+	struct tipc_sock *tsk = container_of(head, struct tipc_sock, rcu);
+
+	sock_put(&tsk->sk);
+}
+
 /**
  * tipc_release - destroy a TIPC socket
  * @sock: socket to destroy
@@ -491,7 +503,7 @@ static int tipc_release(struct socket *sock)
 			    (sock->state == SS_CONNECTED)) {
 				sock->state = SS_DISCONNECTING;
 				tsk->connected = 0;
-				tipc_node_remove_conn(dnode, tsk->ref);
+				tipc_node_remove_conn(dnode, tsk->portid);
 			}
 			if (tipc_msg_reverse(skb, &dnode, TIPC_ERR_NO_PORT))
 				tipc_link_xmit_skb(skb, dnode, 0);
@@ -499,16 +511,16 @@ static int tipc_release(struct socket *sock)
 	}
 
 	tipc_sk_withdraw(tsk, 0, NULL);
-	tipc_sk_ref_discard(tsk->ref);
 	k_cancel_timer(&tsk->timer);
+	tipc_sk_remove(tsk);
 	if (tsk->connected) {
 		skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG,
 				      SHORT_H_SIZE, 0, dnode, tipc_own_addr,
 				      tsk_peer_port(tsk),
-				      tsk->ref, TIPC_ERR_NO_PORT);
+				      tsk->portid, TIPC_ERR_NO_PORT);
 		if (skb)
-			tipc_link_xmit_skb(skb, dnode, tsk->ref);
-		tipc_node_remove_conn(dnode, tsk->ref);
+			tipc_link_xmit_skb(skb, dnode, tsk->portid);
+		tipc_node_remove_conn(dnode, tsk->portid);
 	}
 	k_term_timer(&tsk->timer);
 
@@ -518,7 +530,8 @@ static int tipc_release(struct socket *sock)
 	/* Reject any messages that accumulated in backlog queue */
 	sock->state = SS_DISCONNECTING;
 	release_sock(sk);
-	sock_put(sk);
+
+	call_rcu(&tsk->rcu, tipc_sk_callback);
 	sock->sk = NULL;
 
 	return 0;
@@ -611,7 +624,7 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
 		addr->addr.id.ref = tsk_peer_port(tsk);
 		addr->addr.id.node = tsk_peer_node(tsk);
 	} else {
-		addr->addr.id.ref = tsk->ref;
+		addr->addr.id.ref = tsk->portid;
 		addr->addr.id.node = tipc_own_addr;
 	}
 
@@ -946,7 +959,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock,
 	}
 
 new_mtu:
-	mtu = tipc_node_get_mtu(dnode, tsk->ref);
+	mtu = tipc_node_get_mtu(dnode, tsk->portid);
 	__skb_queue_head_init(&head);
 	rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, &head);
 	if (rc < 0)
@@ -955,7 +968,7 @@ new_mtu:
 	do {
 		skb = skb_peek(&head);
 		TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong;
-		rc = tipc_link_xmit(&head, dnode, tsk->ref);
+		rc = tipc_link_xmit(&head, dnode, tsk->portid);
 		if (likely(rc >= 0)) {
 			if (sock->state != SS_READY)
 				sock->state = SS_CONNECTING;
@@ -1028,7 +1041,7 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock,
 	struct tipc_msg *mhdr = &tsk->phdr;
 	struct sk_buff_head head;
 	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
-	u32 ref = tsk->ref;
+	u32 portid = tsk->portid;
 	int rc = -EINVAL;
 	long timeo;
 	u32 dnode;
@@ -1067,7 +1080,7 @@ next:
 		goto exit;
 	do {
 		if (likely(!tsk_conn_cong(tsk))) {
-			rc = tipc_link_xmit(&head, dnode, ref);
+			rc = tipc_link_xmit(&head, dnode, portid);
 			if (likely(!rc)) {
 				tsk->sent_unacked++;
 				sent += send;
@@ -1076,7 +1089,7 @@ next:
 				goto next;
 			}
 			if (rc == -EMSGSIZE) {
-				tsk->max_pkt = tipc_node_get_mtu(dnode, ref);
+				tsk->max_pkt = tipc_node_get_mtu(dnode, portid);
 				goto next;
 			}
 			if (rc != -ELINKCONG)
@@ -1130,8 +1143,8 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
 	tsk->probing_state = TIPC_CONN_OK;
 	tsk->connected = 1;
 	k_start_timer(&tsk->timer, tsk->probing_interval);
-	tipc_node_add_conn(peer_node, tsk->ref, peer_port);
-	tsk->max_pkt = tipc_node_get_mtu(peer_node, tsk->ref);
+	tipc_node_add_conn(peer_node, tsk->portid, peer_port);
+	tsk->max_pkt = tipc_node_get_mtu(peer_node, tsk->portid);
 }
 
 /**
@@ -1238,7 +1251,7 @@ static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack)
 	if (!tsk->connected)
 		return;
 	skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0, dnode,
-			      tipc_own_addr, peer_port, tsk->ref, TIPC_OK);
+			      tipc_own_addr, peer_port, tsk->portid, TIPC_OK);
 	if (!skb)
 		return;
 	msg = buf_msg(skb);
@@ -1552,7 +1565,7 @@ static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf)
 				tsk->connected = 0;
 				/* let timer expire on it's own */
 				tipc_node_remove_conn(tsk_peer_node(tsk),
-						      tsk->ref);
+						      tsk->portid);
 			}
 			retval = TIPC_OK;
 		}
@@ -1743,7 +1756,7 @@ int tipc_sk_rcv(struct sk_buff *skb)
 	u32 dnode;
 
 	/* Validate destination and message */
-	tsk = tipc_sk_get(dport);
+	tsk = tipc_sk_lookup(dport);
 	if (unlikely(!tsk)) {
 		rc = tipc_msg_eval(skb, &dnode);
 		goto exit;
@@ -1763,7 +1776,7 @@ int tipc_sk_rcv(struct sk_buff *skb)
 			rc = -TIPC_ERR_OVERLOAD;
 	}
 	spin_unlock_bh(&sk->sk_lock.slock);
-	tipc_sk_put(tsk);
+	sock_put(sk);
 	if (likely(!rc))
 		return 0;
 exit:
@@ -2050,20 +2063,20 @@ restart:
 				goto restart;
 			}
 			if (tipc_msg_reverse(skb, &dnode, TIPC_CONN_SHUTDOWN))
-				tipc_link_xmit_skb(skb, dnode, tsk->ref);
-			tipc_node_remove_conn(dnode, tsk->ref);
+				tipc_link_xmit_skb(skb, dnode, tsk->portid);
+			tipc_node_remove_conn(dnode, tsk->portid);
 		} else {
 			dnode = tsk_peer_node(tsk);
 			skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
 					      TIPC_CONN_MSG, SHORT_H_SIZE,
 					      0, dnode, tipc_own_addr,
 					      tsk_peer_port(tsk),
-					      tsk->ref, TIPC_CONN_SHUTDOWN);
-			tipc_link_xmit_skb(skb, dnode, tsk->ref);
+					      tsk->portid, TIPC_CONN_SHUTDOWN);
+			tipc_link_xmit_skb(skb, dnode, tsk->portid);
 		}
 		tsk->connected = 0;
 		sock->state = SS_DISCONNECTING;
-		tipc_node_remove_conn(dnode, tsk->ref);
+		tipc_node_remove_conn(dnode, tsk->portid);
 		/* fall through */
 
 	case SS_DISCONNECTING:
@@ -2084,14 +2097,14 @@ restart:
 	return res;
 }
 
-static void tipc_sk_timeout(unsigned long ref)
+static void tipc_sk_timeout(unsigned long portid)
 {
 	struct tipc_sock *tsk;
 	struct sock *sk;
 	struct sk_buff *skb = NULL;
 	u32 peer_port, peer_node;
 
-	tsk = tipc_sk_get(ref);
+	tsk = tipc_sk_lookup(portid);
 	if (!tsk)
 		return;
 
@@ -2108,20 +2121,20 @@ static void tipc_sk_timeout(unsigned long ref)
 		/* Previous probe not answered -> self abort */
 		skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG,
 				      SHORT_H_SIZE, 0, tipc_own_addr,
-				      peer_node, ref, peer_port,
+				      peer_node, portid, peer_port,
 				      TIPC_ERR_NO_PORT);
 	} else {
 		skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE,
 				      0, peer_node, tipc_own_addr,
-				      peer_port, ref, TIPC_OK);
+				      peer_port, portid, TIPC_OK);
 		tsk->probing_state = TIPC_CONN_PROBING;
 		k_start_timer(&tsk->timer, tsk->probing_interval);
 	}
 	bh_unlock_sock(sk);
 	if (skb)
-		tipc_link_xmit_skb(skb, peer_node, ref);
+		tipc_link_xmit_skb(skb, peer_node, portid);
 exit:
-	tipc_sk_put(tsk);
+	sock_put(sk);
 }
 
 static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
@@ -2132,12 +2145,12 @@ static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
 
 	if (tsk->connected)
 		return -EINVAL;
-	key = tsk->ref + tsk->pub_count + 1;
-	if (key == tsk->ref)
+	key = tsk->portid + tsk->pub_count + 1;
+	if (key == tsk->portid)
 		return -EADDRINUSE;
 
 	publ = tipc_nametbl_publish(seq->type, seq->lower, seq->upper,
-				    scope, tsk->ref, key);
+				    scope, tsk->portid, key);
 	if (unlikely(!publ))
 		return -EINVAL;
 
@@ -2188,9 +2201,9 @@ static int tipc_sk_show(struct tipc_sock *tsk, char *buf,
 		ret = tipc_snprintf(buf, len, "<%u.%u.%u:%u>:",
 				    tipc_zone(tipc_own_addr),
 				    tipc_cluster(tipc_own_addr),
-				    tipc_node(tipc_own_addr), tsk->ref);
+				    tipc_node(tipc_own_addr), tsk->portid);
 	else
-		ret = tipc_snprintf(buf, len, "%-10u:", tsk->ref);
+		ret = tipc_snprintf(buf, len, "%-10u:", tsk->portid);
 
 	if (tsk->connected) {
 		u32 dport = tsk_peer_port(tsk);
@@ -2224,13 +2237,15 @@ static int tipc_sk_show(struct tipc_sock *tsk, char *buf,
 
 struct sk_buff *tipc_sk_socks_show(void)
 {
+	const struct bucket_table *tbl;
+	struct rhash_head *pos;
 	struct sk_buff *buf;
 	struct tlv_desc *rep_tlv;
 	char *pb;
 	int pb_len;
 	struct tipc_sock *tsk;
 	int str_len = 0;
-	u32 ref = 0;
+	int i;
 
 	buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN));
 	if (!buf)
@@ -2239,14 +2254,18 @@ struct sk_buff *tipc_sk_socks_show(void)
 	pb = TLV_DATA(rep_tlv);
 	pb_len = ULTRA_STRING_MAX_LEN;
 
-	tsk = tipc_sk_get_next(&ref);
-	for (; tsk; tsk = tipc_sk_get_next(&ref)) {
-		lock_sock(&tsk->sk);
-		str_len += tipc_sk_show(tsk, pb + str_len,
-					pb_len - str_len, 0);
-		release_sock(&tsk->sk);
-		tipc_sk_put(tsk);
+	rcu_read_lock();
+	tbl = rht_dereference_rcu((&tipc_sk_rht)->tbl, &tipc_sk_rht);
+	for (i = 0; i < tbl->size; i++) {
+		rht_for_each_entry_rcu(tsk, pos, tbl, i, node) {
+			spin_lock_bh(&tsk->sk.sk_lock.slock);
+			str_len += tipc_sk_show(tsk, pb + str_len,
+						pb_len - str_len, 0);
+			spin_unlock_bh(&tsk->sk.sk_lock.slock);
+		}
 	}
+	rcu_read_unlock();
+
 	str_len += 1;	/* for "\0" */
 	skb_put(buf, TLV_SPACE(str_len));
 	TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
@@ -2259,255 +2278,91 @@ struct sk_buff *tipc_sk_socks_show(void)
  */
 void tipc_sk_reinit(void)
 {
+	const struct bucket_table *tbl;
+	struct rhash_head *pos;
+	struct tipc_sock *tsk;
 	struct tipc_msg *msg;
-	u32 ref = 0;
-	struct tipc_sock *tsk = tipc_sk_get_next(&ref);
+	int i;
 
-	for (; tsk; tsk = tipc_sk_get_next(&ref)) {
-		lock_sock(&tsk->sk);
-		msg = &tsk->phdr;
-		msg_set_prevnode(msg, tipc_own_addr);
-		msg_set_orignode(msg, tipc_own_addr);
-		release_sock(&tsk->sk);
-		tipc_sk_put(tsk);
+	rcu_read_lock();
+	tbl = rht_dereference_rcu((&tipc_sk_rht)->tbl, &tipc_sk_rht);
+	for (i = 0; i < tbl->size; i++) {
+		rht_for_each_entry_rcu(tsk, pos, tbl, i, node) {
+			spin_lock_bh(&tsk->sk.sk_lock.slock);
+			msg = &tsk->phdr;
+			msg_set_prevnode(msg, tipc_own_addr);
+			msg_set_orignode(msg, tipc_own_addr);
+			spin_unlock_bh(&tsk->sk.sk_lock.slock);
+		}
 	}
+	rcu_read_unlock();
 }
 
-/**
- * struct reference - TIPC socket reference entry
- * @tsk: pointer to socket associated with reference entry
- * @ref: reference value for socket (combines instance & array index info)
- */
-struct reference {
-	struct tipc_sock *tsk;
-	u32 ref;
-};
-
-/**
- * struct tipc_ref_table - table of TIPC socket reference entries
- * @entries: pointer to array of reference entries
- * @capacity: array index of first unusable entry
- * @init_point: array index of first uninitialized entry
- * @first_free: array index of first unused socket reference entry
- * @last_free: array index of last unused socket reference entry
- * @index_mask: bitmask for array index portion of reference values
- * @start_mask: initial value for instance value portion of reference values
- */
-struct ref_table {
-	struct reference *entries;
-	u32 capacity;
-	u32 init_point;
-	u32 first_free;
-	u32 last_free;
-	u32 index_mask;
-	u32 start_mask;
-};
-
-/* Socket reference table consists of 2**N entries.
- *
- * State	Socket ptr	Reference
- * -----        ----------      ---------
- * In use        non-NULL       XXXX|own index
- *				(XXXX changes each time entry is acquired)
- * Free            NULL         YYYY|next free index
- *				(YYYY is one more than last used XXXX)
- * Uninitialized   NULL         0
- *
- * Entry 0 is not used; this allows index 0 to denote the end of the free list.
- *
- * Note that a reference value of 0 does not necessarily indicate that an
- * entry is uninitialized, since the last entry in the free list could also
- * have a reference value of 0 (although this is unlikely).
- */
-
-static struct ref_table tipc_ref_table;
-
-static DEFINE_RWLOCK(ref_table_lock);
-
-/**
- * tipc_ref_table_init - create reference table for sockets
- */
-int tipc_sk_ref_table_init(u32 req_sz, u32 start)
+static struct tipc_sock *tipc_sk_lookup(u32 portid)
 {
-	struct reference *table;
-	u32 actual_sz;
-
-	/* account for unused entry, then round up size to a power of 2 */
-
-	req_sz++;
-	for (actual_sz = 16; actual_sz < req_sz; actual_sz <<= 1) {
-		/* do nothing */
-	};
-
-	/* allocate table & mark all entries as uninitialized */
-	table = vzalloc(actual_sz * sizeof(struct reference));
-	if (table == NULL)
-		return -ENOMEM;
-
-	tipc_ref_table.entries = table;
-	tipc_ref_table.capacity = req_sz;
-	tipc_ref_table.init_point = 1;
-	tipc_ref_table.first_free = 0;
-	tipc_ref_table.last_free = 0;
-	tipc_ref_table.index_mask = actual_sz - 1;
-	tipc_ref_table.start_mask = start & ~tipc_ref_table.index_mask;
+	struct tipc_sock *tsk;
 
-	return 0;
-}
+	rcu_read_lock();
+	tsk = rhashtable_lookup(&tipc_sk_rht, &portid);
+	if (tsk)
+		sock_hold(&tsk->sk);
+	rcu_read_unlock();
 
-/**
- * tipc_ref_table_stop - destroy reference table for sockets
- */
-void tipc_sk_ref_table_stop(void)
-{
-	if (!tipc_ref_table.entries)
-		return;
-	vfree(tipc_ref_table.entries);
-	tipc_ref_table.entries = NULL;
+	return tsk;
 }
 
-/* tipc_ref_acquire - create reference to a socket
- *
- * Register an socket pointer in the reference table.
- * Returns a unique reference value that is used from then on to retrieve the
- * socket pointer, or to determine if the socket has been deregistered.
- */
-u32 tipc_sk_ref_acquire(struct tipc_sock *tsk)
+static int tipc_sk_insert(struct tipc_sock *tsk)
 {
-	u32 index;
-	u32 index_mask;
-	u32 next_plus_upper;
-	u32 ref = 0;
-	struct reference *entry;
-
-	if (unlikely(!tsk)) {
-		pr_err("Attempt to acquire ref. to non-existent obj\n");
-		return 0;
-	}
-	if (unlikely(!tipc_ref_table.entries)) {
-		pr_err("Ref. table not found in acquisition attempt\n");
-		return 0;
-	}
-
-	/* Take a free entry, if available; otherwise initialize a new one */
-	write_lock_bh(&ref_table_lock);
-	index = tipc_ref_table.first_free;
-	entry = &tipc_ref_table.entries[index];
+	u32 remaining = (TIPC_MAX_PORT - TIPC_MIN_PORT) + 1;
+	u32 portid = prandom_u32() % remaining + TIPC_MIN_PORT;
 
-	if (likely(index)) {
-		index = tipc_ref_table.first_free;
-		entry = &tipc_ref_table.entries[index];
-		index_mask = tipc_ref_table.index_mask;
-		next_plus_upper = entry->ref;
-		tipc_ref_table.first_free = next_plus_upper & index_mask;
-		ref = (next_plus_upper & ~index_mask) + index;
-		entry->tsk = tsk;
-	} else if (tipc_ref_table.init_point < tipc_ref_table.capacity) {
-		index = tipc_ref_table.init_point++;
-		entry = &tipc_ref_table.entries[index];
-		ref = tipc_ref_table.start_mask + index;
+	while (remaining--) {
+		portid++;
+		if ((portid < TIPC_MIN_PORT) || (portid > TIPC_MAX_PORT))
+			portid = TIPC_MIN_PORT;
+		tsk->portid = portid;
+		sock_hold(&tsk->sk);
+		if (rhashtable_lookup_insert(&tipc_sk_rht, &tsk->node))
+			return 0;
+		sock_put(&tsk->sk);
 	}
 
-	if (ref) {
-		entry->ref = ref;
-		entry->tsk = tsk;
-	}
-	write_unlock_bh(&ref_table_lock);
-	return ref;
+	return -1;
 }
 
-/* tipc_sk_ref_discard - invalidate reference to an socket
- *
- * Disallow future references to an socket and free up the entry for re-use.
- */
-void tipc_sk_ref_discard(u32 ref)
+static void tipc_sk_remove(struct tipc_sock *tsk)
 {
-	struct reference *entry;
-	u32 index;
-	u32 index_mask;
-
-	if (unlikely(!tipc_ref_table.entries)) {
-		pr_err("Ref. table not found during discard attempt\n");
-		return;
-	}
-
-	index_mask = tipc_ref_table.index_mask;
-	index = ref & index_mask;
-	entry = &tipc_ref_table.entries[index];
-
-	write_lock_bh(&ref_table_lock);
+	struct sock *sk = &tsk->sk;
 
-	if (unlikely(!entry->tsk)) {
-		pr_err("Attempt to discard ref. to non-existent socket\n");
-		goto exit;
-	}
-	if (unlikely(entry->ref != ref)) {
-		pr_err("Attempt to discard non-existent reference\n");
-		goto exit;
+	if (rhashtable_remove(&tipc_sk_rht, &tsk->node)) {
+		WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
+		__sock_put(sk);
 	}
-
-	/* Mark entry as unused; increment instance part of entry's
-	 *   reference to invalidate any subsequent references
-	 */
-
-	entry->tsk = NULL;
-	entry->ref = (ref & ~index_mask) + (index_mask + 1);
-
-	/* Append entry to free entry list */
-	if (unlikely(tipc_ref_table.first_free == 0))
-		tipc_ref_table.first_free = index;
-	else
-		tipc_ref_table.entries[tipc_ref_table.last_free].ref |= index;
-	tipc_ref_table.last_free = index;
-exit:
-	write_unlock_bh(&ref_table_lock);
 }
 
-/* tipc_sk_get - find referenced socket and return pointer to it
- */
-struct tipc_sock *tipc_sk_get(u32 ref)
+int tipc_sk_rht_init(void)
 {
-	struct reference *entry;
-	struct tipc_sock *tsk;
+	struct rhashtable_params rht_params = {
+		.nelem_hint = 192,
+		.head_offset = offsetof(struct tipc_sock, node),
+		.key_offset = offsetof(struct tipc_sock, portid),
+		.key_len = sizeof(u32), /* portid */
+		.hashfn = jhash,
+		.max_shift = 20, /* 1M */
+		.min_shift = 8,  /* 256 */
+		.grow_decision = rht_grow_above_75,
+		.shrink_decision = rht_shrink_below_30,
+	};
 
-	if (unlikely(!tipc_ref_table.entries))
-		return NULL;
-	read_lock_bh(&ref_table_lock);
-	entry = &tipc_ref_table.entries[ref & tipc_ref_table.index_mask];
-	tsk = entry->tsk;
-	if (likely(tsk && (entry->ref == ref)))
-		sock_hold(&tsk->sk);
-	else
-		tsk = NULL;
-	read_unlock_bh(&ref_table_lock);
-	return tsk;
+	return rhashtable_init(&tipc_sk_rht, &rht_params);
 }
 
-/* tipc_sk_get_next - lock & return next socket after referenced one
-*/
-struct tipc_sock *tipc_sk_get_next(u32 *ref)
+void tipc_sk_rht_destroy(void)
 {
-	struct reference *entry;
-	struct tipc_sock *tsk = NULL;
-	uint index = *ref & tipc_ref_table.index_mask;
+	/* Wait for socket readers to complete */
+	synchronize_net();
 
-	read_lock_bh(&ref_table_lock);
-	while (++index < tipc_ref_table.capacity) {
-		entry = &tipc_ref_table.entries[index];
-		if (!entry->tsk)
-			continue;
-		tsk = entry->tsk;
-		sock_hold(&tsk->sk);
-		*ref = entry->ref;
-		break;
-	}
-	read_unlock_bh(&ref_table_lock);
-	return tsk;
-}
-
-static void tipc_sk_put(struct tipc_sock *tsk)
-{
-	sock_put(&tsk->sk);
+	rhashtable_destroy(&tipc_sk_rht);
 }
 
 /**
@@ -2829,7 +2684,7 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb,
 	attrs = nla_nest_start(skb, TIPC_NLA_SOCK);
 	if (!attrs)
 		goto genlmsg_cancel;
-	if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->ref))
+	if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid))
 		goto attr_msg_cancel;
 	if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tipc_own_addr))
 		goto attr_msg_cancel;
@@ -2859,22 +2714,29 @@ int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	int err;
 	struct tipc_sock *tsk;
-	u32 prev_ref = cb->args[0];
-	u32 ref = prev_ref;
-
-	tsk = tipc_sk_get_next(&ref);
-	for (; tsk; tsk = tipc_sk_get_next(&ref)) {
-		lock_sock(&tsk->sk);
-		err = __tipc_nl_add_sk(skb, cb, tsk);
-		release_sock(&tsk->sk);
-		tipc_sk_put(tsk);
-		if (err)
-			break;
+	const struct bucket_table *tbl;
+	struct rhash_head *pos;
+	u32 prev_portid = cb->args[0];
+	u32 portid = prev_portid;
+	int i;
 
-		prev_ref = ref;
+	rcu_read_lock();
+	tbl = rht_dereference_rcu((&tipc_sk_rht)->tbl, &tipc_sk_rht);
+	for (i = 0; i < tbl->size; i++) {
+		rht_for_each_entry_rcu(tsk, pos, tbl, i, node) {
+			spin_lock_bh(&tsk->sk.sk_lock.slock);
+			portid = tsk->portid;
+			err = __tipc_nl_add_sk(skb, cb, tsk);
+			spin_unlock_bh(&tsk->sk.sk_lock.slock);
+			if (err)
+				break;
+
+			prev_portid = portid;
+		}
 	}
+	rcu_read_unlock();
 
-	cb->args[0] = prev_ref;
+	cb->args[0] = prev_portid;
 
 	return skb->len;
 }
@@ -2962,12 +2824,12 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb,
 int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	int err;
-	u32 tsk_ref = cb->args[0];
+	u32 tsk_portid = cb->args[0];
 	u32 last_publ = cb->args[1];
 	u32 done = cb->args[2];
 	struct tipc_sock *tsk;
 
-	if (!tsk_ref) {
+	if (!tsk_portid) {
 		struct nlattr **attrs;
 		struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1];
 
@@ -2984,13 +2846,13 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb)
 		if (!sock[TIPC_NLA_SOCK_REF])
 			return -EINVAL;
 
-		tsk_ref = nla_get_u32(sock[TIPC_NLA_SOCK_REF]);
+		tsk_portid = nla_get_u32(sock[TIPC_NLA_SOCK_REF]);
 	}
 
 	if (done)
 		return 0;
 
-	tsk = tipc_sk_get(tsk_ref);
+	tsk = tipc_sk_lookup(tsk_portid);
 	if (!tsk)
 		return -EINVAL;
 
@@ -2999,9 +2861,9 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	if (!err)
 		done = 1;
 	release_sock(&tsk->sk);
-	tipc_sk_put(tsk);
+	sock_put(&tsk->sk);
 
-	cb->args[0] = tsk_ref;
+	cb->args[0] = tsk_portid;
 	cb->args[1] = last_publ;
 	cb->args[2] = done;
 
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index d34089387006..c7d46d069d89 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h
@@ -46,8 +46,8 @@ int tipc_sk_rcv(struct sk_buff *buf);
 struct sk_buff *tipc_sk_socks_show(void);
 void tipc_sk_mcast_rcv(struct sk_buff *buf);
 void tipc_sk_reinit(void);
-int tipc_sk_ref_table_init(u32 requested_size, u32 start);
-void tipc_sk_ref_table_stop(void);
+int tipc_sk_rht_init(void);
+void tipc_sk_rht_destroy(void);
 int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb);
 
-- 
cgit v1.2.3


From 9b7a86f3514dd5cb4a6787292781daae65a29a10 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 9 Jan 2015 11:40:39 +0100
Subject: mac80211: fix handling TIM IE when stations disconnect

When a station disconnects with frames still pending, we clear
the TIM bit, but too late - it's only cleared when the station
is already removed from the driver, and thus the driver can get
confused (and hwsim will loudly complain.)

Fix this by clearing the TIM bit earlier, when the station has
been unlinked but not removed from the driver yet. To do this,
refactor the TIM recalculation to in that case ignore traffic
and simply assume no pending traffic - this is correct for the
disconnected station even though the frames haven't been freed
yet at that point.

This patch isn't needed for current drivers though as they don't
check the station argument to the set_tim() operation and thus
don't really run into the possible confusion.

Reported-by: Jouni Malinen <j@w1.fi>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/sta_info.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index dc352fcdd469..79383ef0c264 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -116,7 +116,6 @@ static void __cleanup_single_sta(struct sta_info *sta)
 		clear_sta_flag(sta, WLAN_STA_PS_DELIVER);
 
 		atomic_dec(&ps->num_sta_ps);
-		sta_info_recalc_tim(sta);
 	}
 
 	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
@@ -625,7 +624,7 @@ static unsigned long ieee80211_tids_for_ac(int ac)
 	}
 }
 
-void sta_info_recalc_tim(struct sta_info *sta)
+static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending)
 {
 	struct ieee80211_local *local = sta->local;
 	struct ps_data *ps;
@@ -667,6 +666,9 @@ void sta_info_recalc_tim(struct sta_info *sta)
 	if (ignore_for_tim == BIT(IEEE80211_NUM_ACS) - 1)
 		ignore_for_tim = 0;
 
+	if (ignore_pending)
+		ignore_for_tim = BIT(IEEE80211_NUM_ACS) - 1;
+
 	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
 		unsigned long tids;
 
@@ -695,7 +697,7 @@ void sta_info_recalc_tim(struct sta_info *sta)
 	else
 		__bss_tim_clear(ps->tim, id);
 
-	if (local->ops->set_tim) {
+	if (local->ops->set_tim && !WARN_ON(sta->dead)) {
 		local->tim_in_locked_section = true;
 		drv_set_tim(local, &sta->sta, indicate_tim);
 		local->tim_in_locked_section = false;
@@ -705,6 +707,11 @@ out_unlock:
 	spin_unlock_bh(&local->tim_lock);
 }
 
+void sta_info_recalc_tim(struct sta_info *sta)
+{
+	__sta_info_recalc_tim(sta, false);
+}
+
 static bool sta_info_buffer_expired(struct sta_info *sta, struct sk_buff *skb)
 {
 	struct ieee80211_tx_info *info;
@@ -888,6 +895,9 @@ static void __sta_info_destroy_part2(struct sta_info *sta)
 	/* now keys can no longer be reached */
 	ieee80211_free_sta_keys(local, sta);
 
+	/* disable TIM bit - last chance to tell driver */
+	__sta_info_recalc_tim(sta, true);
+
 	sta->dead = true;
 
 	local->num_sta--;
-- 
cgit v1.2.3


From 1904a853fae40ee61bed7c231fc5bd2158984441 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sun, 11 Jan 2015 13:50:44 -0800
Subject: Bluetooth: Add opcode parameter to hci_req_complete_t callback

When hci_req_run() calls its provided complete function and one of the
HCI commands in the sequence fails, then provide the opcode of failing
command. In case of success HCI_OP_NOP is provided since all commands
completed.

This patch fixes the prototype of hci_req_complete_t and all its users.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/bluetooth.h |  2 +-
 net/bluetooth/hci_conn.c          |  2 +-
 net/bluetooth/hci_core.c          |  9 ++++----
 net/bluetooth/hci_request.c       |  3 ++-
 net/bluetooth/mgmt.c              | 44 ++++++++++++++++++++++-----------------
 5 files changed, 34 insertions(+), 26 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 58695ffeb138..e00455aab18c 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -273,7 +273,7 @@ struct l2cap_ctrl {
 
 struct hci_dev;
 
-typedef void (*hci_req_complete_t)(struct hci_dev *hdev, u8 status);
+typedef void (*hci_req_complete_t)(struct hci_dev *hdev, u8 status, u16 opcode);
 
 struct hci_req_ctrl {
 	bool			start;
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 75240aaca101..2e724e0b75b9 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -633,7 +633,7 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status)
 	mgmt_reenable_advertising(hdev);
 }
 
-static void create_le_conn_complete(struct hci_dev *hdev, u8 status)
+static void create_le_conn_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
 	struct hci_conn *conn;
 
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index bc5486ea5411..ba0d1fdccbd9 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -141,7 +141,7 @@ static const struct file_operations dut_mode_fops = {
 
 /* ---- HCI requests ---- */
 
-static void hci_req_sync_complete(struct hci_dev *hdev, u8 result)
+static void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode)
 {
 	BT_DBG("%s result 0x%2.2x", hdev->name, result);
 
@@ -2754,7 +2754,7 @@ void hci_conn_params_clear_all(struct hci_dev *hdev)
 	BT_DBG("All LE connection parameters were removed");
 }
 
-static void inquiry_complete(struct hci_dev *hdev, u8 status)
+static void inquiry_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
 	if (status) {
 		BT_ERR("Failed to start inquiry: status %d", status);
@@ -2766,7 +2766,8 @@ static void inquiry_complete(struct hci_dev *hdev, u8 status)
 	}
 }
 
-static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status)
+static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status,
+					  u16 opcode)
 {
 	/* General inquiry access code (GIAC) */
 	u8 lap[3] = { 0x33, 0x8b, 0x9e };
@@ -4159,7 +4160,7 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status)
 
 call_complete:
 	if (req_complete)
-		req_complete(hdev, status);
+		req_complete(hdev, status, status ? opcode : HCI_OP_NOP);
 }
 
 static void hci_rx_work(struct work_struct *work)
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 324c6418b17c..b59f92c6df0c 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -533,7 +533,8 @@ void __hci_update_background_scan(struct hci_request *req)
 	}
 }
 
-static void update_background_scan_complete(struct hci_dev *hdev, u8 status)
+static void update_background_scan_complete(struct hci_dev *hdev, u8 status,
+					    u16 opcode)
 {
 	if (status)
 		BT_DBG("HCI request failed to update background scanning: "
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 6b3f5537e441..e531da805923 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -1251,7 +1251,7 @@ static int send_settings_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev)
 			    sizeof(settings));
 }
 
-static void clean_up_hci_complete(struct hci_dev *hdev, u8 status)
+static void clean_up_hci_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
 	BT_DBG("%s status 0x%02x", hdev->name, status);
 
@@ -1518,7 +1518,8 @@ static u8 mgmt_le_support(struct hci_dev *hdev)
 		return MGMT_STATUS_SUCCESS;
 }
 
-static void set_discoverable_complete(struct hci_dev *hdev, u8 status)
+static void set_discoverable_complete(struct hci_dev *hdev, u8 status,
+				      u16 opcode)
 {
 	struct pending_cmd *cmd;
 	struct mgmt_mode *cp;
@@ -1777,7 +1778,8 @@ static void write_fast_connectable(struct hci_request *req, bool enable)
 		hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type);
 }
 
-static void set_connectable_complete(struct hci_dev *hdev, u8 status)
+static void set_connectable_complete(struct hci_dev *hdev, u8 status,
+				     u16 opcode)
 {
 	struct pending_cmd *cmd;
 	struct mgmt_mode *cp;
@@ -2195,7 +2197,7 @@ unlock:
 	return err;
 }
 
-static void le_enable_complete(struct hci_dev *hdev, u8 status)
+static void le_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
 	struct cmd_lookup match = { NULL, hdev };
 
@@ -2385,7 +2387,7 @@ unlock:
 	hci_dev_unlock(hdev);
 }
 
-static void add_uuid_complete(struct hci_dev *hdev, u8 status)
+static void add_uuid_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
 	BT_DBG("status 0x%02x", status);
 
@@ -2464,7 +2466,7 @@ static bool enable_service_cache(struct hci_dev *hdev)
 	return false;
 }
 
-static void remove_uuid_complete(struct hci_dev *hdev, u8 status)
+static void remove_uuid_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
 	BT_DBG("status 0x%02x", status);
 
@@ -2549,7 +2551,7 @@ unlock:
 	return err;
 }
 
-static void set_class_complete(struct hci_dev *hdev, u8 status)
+static void set_class_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
 	BT_DBG("status 0x%02x", status);
 
@@ -3483,7 +3485,7 @@ static void update_name(struct hci_request *req)
 	hci_req_add(req, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp);
 }
 
-static void set_name_complete(struct hci_dev *hdev, u8 status)
+static void set_name_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
 	struct mgmt_cp_set_local_name *cp;
 	struct pending_cmd *cmd;
@@ -3834,7 +3836,8 @@ static bool trigger_discovery(struct hci_request *req, u8 *status)
 	return true;
 }
 
-static void start_discovery_complete(struct hci_dev *hdev, u8 status)
+static void start_discovery_complete(struct hci_dev *hdev, u8 status,
+				     u16 opcode)
 {
 	struct pending_cmd *cmd;
 	unsigned long timeout;
@@ -4063,7 +4066,7 @@ failed:
 	return err;
 }
 
-static void stop_discovery_complete(struct hci_dev *hdev, u8 status)
+static void stop_discovery_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
 	struct pending_cmd *cmd;
 
@@ -4289,7 +4292,8 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data,
 	return err;
 }
 
-static void set_advertising_complete(struct hci_dev *hdev, u8 status)
+static void set_advertising_complete(struct hci_dev *hdev, u8 status,
+				     u16 opcode)
 {
 	struct cmd_lookup match = { NULL, hdev };
 
@@ -4496,7 +4500,8 @@ static int set_scan_params(struct sock *sk, struct hci_dev *hdev,
 	return err;
 }
 
-static void fast_connectable_complete(struct hci_dev *hdev, u8 status)
+static void fast_connectable_complete(struct hci_dev *hdev, u8 status,
+				      u16 opcode)
 {
 	struct pending_cmd *cmd;
 
@@ -4594,7 +4599,7 @@ unlock:
 	return err;
 }
 
-static void set_bredr_complete(struct hci_dev *hdev, u8 status)
+static void set_bredr_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
 	struct pending_cmd *cmd;
 
@@ -5119,7 +5124,8 @@ static int conn_info_cmd_complete(struct pending_cmd *cmd, u8 status)
 	return err;
 }
 
-static void conn_info_refresh_complete(struct hci_dev *hdev, u8 hci_status)
+static void conn_info_refresh_complete(struct hci_dev *hdev, u8 hci_status,
+				       u16 opcode)
 {
 	struct hci_cp_read_rssi *cp;
 	struct pending_cmd *cmd;
@@ -5326,7 +5332,7 @@ complete:
 	return err;
 }
 
-static void get_clock_info_complete(struct hci_dev *hdev, u8 status)
+static void get_clock_info_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
 	struct hci_cp_read_clock *hci_cp;
 	struct pending_cmd *cmd;
@@ -5504,7 +5510,7 @@ static void device_added(struct sock *sk, struct hci_dev *hdev,
 	mgmt_event(MGMT_EV_DEVICE_ADDED, hdev, &ev, sizeof(ev), sk);
 }
 
-static void add_device_complete(struct hci_dev *hdev, u8 status)
+static void add_device_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
 	struct pending_cmd *cmd;
 
@@ -5627,7 +5633,7 @@ static void device_removed(struct sock *sk, struct hci_dev *hdev,
 	mgmt_event(MGMT_EV_DEVICE_REMOVED, hdev, &ev, sizeof(ev), sk);
 }
 
-static void remove_device_complete(struct hci_dev *hdev, u8 status)
+static void remove_device_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
 	struct pending_cmd *cmd;
 
@@ -6205,7 +6211,7 @@ static void restart_le_actions(struct hci_request *req)
 	__hci_update_background_scan(req);
 }
 
-static void powered_complete(struct hci_dev *hdev, u8 status)
+static void powered_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
 	struct cmd_lookup match = { NULL, hdev };
 
@@ -7316,7 +7322,7 @@ void mgmt_discovering(struct hci_dev *hdev, u8 discovering)
 	mgmt_event(MGMT_EV_DISCOVERING, hdev, &ev, sizeof(ev), NULL);
 }
 
-static void adv_enable_complete(struct hci_dev *hdev, u8 status)
+static void adv_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
 	BT_DBG("%s status %u", hdev->name, status);
 }
-- 
cgit v1.2.3


From b0a8e282b54d11859a13bebd4c056dcfb577f8b7 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sun, 11 Jan 2015 15:18:17 -0800
Subject: Bluetooth: Add BUILD_BUG_ON for size of struct sockaddr_hci

This adds an extra check for ensuring that the size of sockaddr_hci
does not grow larger than sockaddr.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_sock.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 2c245fdf319a..e176a988625e 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1230,6 +1230,8 @@ int __init hci_sock_init(void)
 {
 	int err;
 
+	BUILD_BUG_ON(sizeof(struct sockaddr_hci) > sizeof(struct sockaddr));
+
 	err = proto_register(&hci_sk_proto, 0);
 	if (err < 0)
 		return err;
-- 
cgit v1.2.3


From dd6255588a56e755380b66275ff4f35a8243b64a Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sun, 11 Jan 2015 15:18:19 -0800
Subject: Bluetooth: Add BUILD_BUG_ON for size of struct sockaddr_l2

This adds an extra check for ensuring that the size of sockaddr_l2
does not grow larger than sockaddr.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/l2cap_sock.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index f65caf41953f..20206cd3acbc 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1614,6 +1614,8 @@ int __init l2cap_init_sockets(void)
 {
 	int err;
 
+	BUILD_BUG_ON(sizeof(struct sockaddr_l2) > sizeof(struct sockaddr));
+
 	err = proto_register(&l2cap_proto, 0);
 	if (err < 0)
 		return err;
-- 
cgit v1.2.3


From 74b3fb8d0daf150eeb66665897601fb10c024687 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sun, 11 Jan 2015 15:18:20 -0800
Subject: Bluetooth: Add BUILD_BUG_ON for size of struct sockaddr_rc

This adds an extra check for ensuring that the size of sockaddr_rc
does not grow larger than sockaddr.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/rfcomm/sock.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 2348176401a0..d8a95755a8a8 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -1058,6 +1058,8 @@ int __init rfcomm_init_sockets(void)
 {
 	int err;
 
+	BUILD_BUG_ON(sizeof(struct sockaddr_rc) > sizeof(struct sockaddr));
+
 	err = proto_register(&rfcomm_proto, 0);
 	if (err < 0)
 		return err;
-- 
cgit v1.2.3


From 15762fa772bf91687e2f2a6afc6df975ee2a6f70 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sun, 11 Jan 2015 15:21:06 -0800
Subject: Bluetooth: Add BUILD_BUG_ON for size of struct sockaddr_sco

This adds an extra check for ensuring that the size of sockaddr_sco
does not grow larger than sockaddr.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/sco.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 30e5ea3f1ad3..07ec7d23b843 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -1184,6 +1184,8 @@ int __init sco_init(void)
 {
 	int err;
 
+	BUILD_BUG_ON(sizeof(struct sockaddr_sco) > sizeof(struct sockaddr));
+
 	err = proto_register(&sco_proto, 0);
 	if (err < 0)
 		return err;
-- 
cgit v1.2.3


From 2b531294b02454b6aa189f99ea8982bdd0d7b3bb Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sun, 11 Jan 2015 19:33:31 -0800
Subject: Bluetooth: Simplify packet copy in hci_send_to_monitor function

Within the monitor functionality, the global atomic variable called
monitor_promisc ensures that no memory allocation happend when there
is actually no client listening. This means it is safe to just create
a copy of the skb since it is guaranteed that at least one client
exists. No extra checks needed.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_sock.c | 28 ++++++++++++----------------
 1 file changed, 12 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index e176a988625e..026e84a80659 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -221,6 +221,7 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct sock *sk;
 	struct sk_buff *skb_copy = NULL;
+	struct hci_mon_hdr *hdr;
 	__le16 opcode;
 
 	if (!atomic_read(&monitor_promisc))
@@ -251,6 +252,17 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb)
 		return;
 	}
 
+	/* Create a private copy with headroom */
+	skb_copy = __pskb_copy_fclone(skb, HCI_MON_HDR_SIZE, GFP_ATOMIC, true);
+	if (!skb_copy)
+		return;
+
+	/* Put header before the data */
+	hdr = (void *) skb_push(skb_copy, HCI_MON_HDR_SIZE);
+	hdr->opcode = opcode;
+	hdr->index = cpu_to_le16(hdev->id);
+	hdr->len = cpu_to_le16(skb->len);
+
 	read_lock(&hci_sk_list.lock);
 
 	sk_for_each(sk, &hci_sk_list.head) {
@@ -262,22 +274,6 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb)
 		if (hci_pi(sk)->channel != HCI_CHANNEL_MONITOR)
 			continue;
 
-		if (!skb_copy) {
-			struct hci_mon_hdr *hdr;
-
-			/* Create a private copy with headroom */
-			skb_copy = __pskb_copy_fclone(skb, HCI_MON_HDR_SIZE,
-						      GFP_ATOMIC, true);
-			if (!skb_copy)
-				continue;
-
-			/* Put header before the data */
-			hdr = (void *) skb_push(skb_copy, HCI_MON_HDR_SIZE);
-			hdr->opcode = opcode;
-			hdr->index = cpu_to_le16(hdev->id);
-			hdr->len = cpu_to_le16(skb->len);
-		}
-
 		nskb = skb_clone(skb_copy, GFP_ATOMIC);
 		if (!nskb)
 			continue;
-- 
cgit v1.2.3


From d7f72f61955ded6ae5f0381dcde1fcadce8833bb Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sun, 11 Jan 2015 19:33:32 -0800
Subject: Bluetooth: Create generic queue_monitor_skb helper function

The hci_send_to_monitor function contains generic code for queueing the
packet into the receive queue of every monitor client. To avoid code
duplication, create a generic queue_monitor_skb function to interate
over all monitor sockets.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_sock.c | 51 +++++++++++++++++++++++++++---------------------
 1 file changed, 29 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 026e84a80659..1987ea178b7d 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -216,10 +216,37 @@ void hci_send_to_control(struct sk_buff *skb, struct sock *skip_sk)
 	read_unlock(&hci_sk_list.lock);
 }
 
+static void queue_monitor_skb(struct sk_buff *skb)
+{
+	struct sock *sk;
+
+	BT_DBG("len %d", skb->len);
+
+	read_lock(&hci_sk_list.lock);
+
+	sk_for_each(sk, &hci_sk_list.head) {
+		struct sk_buff *nskb;
+
+		if (sk->sk_state != BT_BOUND)
+			continue;
+
+		if (hci_pi(sk)->channel != HCI_CHANNEL_MONITOR)
+			continue;
+
+		nskb = skb_clone(skb, GFP_ATOMIC);
+		if (!nskb)
+			continue;
+
+		if (sock_queue_rcv_skb(sk, nskb))
+			kfree_skb(nskb);
+	}
+
+	read_unlock(&hci_sk_list.lock);
+}
+
 /* Send frame to monitor socket */
 void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb)
 {
-	struct sock *sk;
 	struct sk_buff *skb_copy = NULL;
 	struct hci_mon_hdr *hdr;
 	__le16 opcode;
@@ -263,27 +290,7 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb)
 	hdr->index = cpu_to_le16(hdev->id);
 	hdr->len = cpu_to_le16(skb->len);
 
-	read_lock(&hci_sk_list.lock);
-
-	sk_for_each(sk, &hci_sk_list.head) {
-		struct sk_buff *nskb;
-
-		if (sk->sk_state != BT_BOUND)
-			continue;
-
-		if (hci_pi(sk)->channel != HCI_CHANNEL_MONITOR)
-			continue;
-
-		nskb = skb_clone(skb_copy, GFP_ATOMIC);
-		if (!nskb)
-			continue;
-
-		if (sock_queue_rcv_skb(sk, nskb))
-			kfree_skb(nskb);
-	}
-
-	read_unlock(&hci_sk_list.lock);
-
+	queue_monitor_skb(skb_copy);
 	kfree_skb(skb_copy);
 }
 
-- 
cgit v1.2.3


From 41e91e71f61ead454c72b68d7c5cf13a2651e992 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sun, 11 Jan 2015 19:33:33 -0800
Subject: Bluetooth: Replace send_monitor_event with queue_monitor_skb

The send_monitor_event function is essentially the same as the newly
introduced queue_monitor_skb. So instead of having duplicated code,
replace send_monitor_event with queue_monitor_skb.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_sock.c | 30 +-----------------------------
 1 file changed, 1 insertion(+), 29 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 1987ea178b7d..1d65c5be7c82 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -294,34 +294,6 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb)
 	kfree_skb(skb_copy);
 }
 
-static void send_monitor_event(struct sk_buff *skb)
-{
-	struct sock *sk;
-
-	BT_DBG("len %d", skb->len);
-
-	read_lock(&hci_sk_list.lock);
-
-	sk_for_each(sk, &hci_sk_list.head) {
-		struct sk_buff *nskb;
-
-		if (sk->sk_state != BT_BOUND)
-			continue;
-
-		if (hci_pi(sk)->channel != HCI_CHANNEL_MONITOR)
-			continue;
-
-		nskb = skb_clone(skb, GFP_ATOMIC);
-		if (!nskb)
-			continue;
-
-		if (sock_queue_rcv_skb(sk, nskb))
-			kfree_skb(nskb);
-	}
-
-	read_unlock(&hci_sk_list.lock);
-}
-
 static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event)
 {
 	struct hci_mon_hdr *hdr;
@@ -425,7 +397,7 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event)
 
 		skb = create_monitor_event(hdev, event);
 		if (skb) {
-			send_monitor_event(skb);
+			queue_monitor_skb(skb);
 			kfree_skb(skb);
 		}
 	}
-- 
cgit v1.2.3


From c2f0f979276fc4911cef5da2fc113f0daeda3ebc Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 12 Jan 2015 09:21:25 -0800
Subject: Bluetooth: Handle command complete event for HCI Read Stored Link
 Keys

When the HCI Read Stored Link Keys command completes it gives useful
information of the current stored keys and maximum keys a controller
can actually store. So process this event and store these information
in hci_dev structure.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h |  2 ++
 net/bluetooth/hci_event.c        | 22 ++++++++++++++++++++++
 2 files changed, 24 insertions(+)

(limited to 'net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 89f4e3c8a097..1f21fe48b38e 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -205,6 +205,8 @@ struct hci_dev {
 	__u16		lmp_subver;
 	__u16		voice_setting;
 	__u8		num_iac;
+	__u8		stored_max_keys;
+	__u8		stored_num_keys;
 	__u8		io_capability;
 	__s8		inq_tx_power;
 	__u16		page_scan_interval;
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 0881efd0ad2d..b1580daede13 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -214,6 +214,24 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb)
 	hci_bdaddr_list_clear(&hdev->le_white_list);
 }
 
+static void hci_cc_read_stored_link_key(struct hci_dev *hdev,
+					struct sk_buff *skb)
+{
+	struct hci_rp_read_stored_link_key *rp = (void *)skb->data;
+	struct hci_cp_read_stored_link_key *sent;
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
+
+	sent = hci_sent_cmd_data(hdev, HCI_OP_READ_STORED_LINK_KEY);
+	if (!sent)
+		return;
+
+	if (!rp->status && sent->read_all == 0x01) {
+		hdev->stored_max_keys = rp->max_keys;
+		hdev->stored_num_keys = rp->num_keys;
+	}
+}
+
 static void hci_cc_write_local_name(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	__u8 status = *((__u8 *) skb->data);
@@ -2714,6 +2732,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_cc_reset(hdev, skb);
 		break;
 
+	case HCI_OP_READ_STORED_LINK_KEY:
+		hci_cc_read_stored_link_key(hdev, skb);
+		break;
+
 	case HCI_OP_WRITE_LOCAL_NAME:
 		hci_cc_write_local_name(hdev, skb);
 		break;
-- 
cgit v1.2.3


From 48ce62c4fae7e817d9018020345bbe30a6b9c446 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 12 Jan 2015 09:21:26 -0800
Subject: Bluetooth: Read stored link key information when powering on
 controller

The information about max stored link keys and current stored link keys
should be read at controller initialization. So issue HCI Read Stored
Link Key command with BDADDR_ANY and read_all flag set to 0x01 to
retrieve this information.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_core.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index ba0d1fdccbd9..96572a48948e 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -731,6 +731,14 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt)
 
 	hci_setup_event_mask(req);
 
+	if (hdev->commands[6] & 0x20) {
+		struct hci_cp_read_stored_link_key cp;
+
+		bacpy(&cp.bdaddr, BDADDR_ANY);
+		cp.read_all = 0x01;
+		hci_req_add(req, HCI_OP_READ_STORED_LINK_KEY, sizeof(cp), &cp);
+	}
+
 	/* Some Broadcom based Bluetooth controllers do not support the
 	 * Delete Stored Link Key command. They are clearly indicating its
 	 * absence in the bit mask of supported commands.
-- 
cgit v1.2.3


From a93661203641eb5e8aa9bbed8d9689e5d5dbe6b1 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 12 Jan 2015 09:21:28 -0800
Subject: Bluetooth: Process result of HCI Delete Stored Link Key command

When the HCI Delete Stored Link Key command completes, then update the
value of current stored keys in hci_dev structure.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_event.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index b1580daede13..a58845e98921 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -232,6 +232,22 @@ static void hci_cc_read_stored_link_key(struct hci_dev *hdev,
 	}
 }
 
+static void hci_cc_delete_stored_link_key(struct hci_dev *hdev,
+					  struct sk_buff *skb)
+{
+	struct hci_rp_delete_stored_link_key *rp = (void *)skb->data;
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
+
+	if (rp->status)
+		return;
+
+	if (rp->num_keys <= hdev->stored_num_keys)
+		hdev->stored_num_keys -= rp->num_keys;
+	else
+		hdev->stored_num_keys = 0;
+}
+
 static void hci_cc_write_local_name(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	__u8 status = *((__u8 *) skb->data);
@@ -2736,6 +2752,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_cc_read_stored_link_key(hdev, skb);
 		break;
 
+	case HCI_OP_DELETE_STORED_LINK_KEY:
+		hci_cc_delete_stored_link_key(hdev, skb);
+		break;
+
 	case HCI_OP_WRITE_LOCAL_NAME:
 		hci_cc_write_local_name(hdev, skb);
 		break;
-- 
cgit v1.2.3


From eee2f04b801ebc95da16f91fef029a083ba1a216 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Thu, 8 Jan 2015 11:29:18 -0500
Subject: packet: make packet too small warning match condition

The expression in ll_header_truncated() tests less than or equal, but
the warning prints less than. Update the warning.

Reported-by: Jouni Malinen <jkmalinen@gmail.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 6880f34a529a..0f02668dc219 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2102,7 +2102,7 @@ static bool ll_header_truncated(const struct net_device *dev, int len)
 {
 	/* net device doesn't like empty head */
 	if (unlikely(len <= dev->hard_header_len)) {
-		net_warn_ratelimited("%s: packet size is too short (%d < %d)\n",
+		net_warn_ratelimited("%s: packet size is too short (%d <= %d)\n",
 				     current->comm, len, dev->hard_header_len);
 		return true;
 	}
-- 
cgit v1.2.3


From f902e8812ef657c6cf744ac25e21865217155460 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Fri, 9 Jan 2015 14:16:40 +0900
Subject: bridge: Add ability to enable TSO

Currently a bridge device turns off TSO feature if no bridge ports
support it. We can always enable it, since packets can be segmented on
ports by software as well as on the bridge device.
This will reduce the number of packets processed in the bridge.

Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_if.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index ed307db7a12b..81e49fb73169 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -424,6 +424,7 @@ netdev_features_t br_features_recompute(struct net_bridge *br,
 		features = netdev_increment_features(features,
 						     p->dev->features, mask);
 	}
+	features = netdev_add_tso_features(features, mask);
 
 	return features;
 }
-- 
cgit v1.2.3


From 703068eee6dde2ab318048f976b6ebd7fd239a78 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Fri, 9 Jan 2015 15:26:58 +0800
Subject: tipc: fix bug in broadcast retransmit code

In commit 58dc55f25631178ee74cd27185956a8f7dcb3e32 ("tipc: use generic
SKB list APIs to manage link transmission queue") we replace all list
traversal loops with the macros skb_queue_walk() or
skb_queue_walk_safe(). While the previous loops were based on the
assumption that the list was NULL-terminated, the standard macros
stop when the iterator reaches the list head, which is non-NULL.

In the function bclink_retransmit_pkt() this macro replacement has
lead to a bug. When we receive a BCAST STATE_MSG we unconditionally
call the function bclink_retransmit_pkt(), whether there really is
anything to retransmit or not, assuming that the sequence number
comparisons will lead to the correct behavior. However, if the
transmission queue is empty, or if there are no eligible buffers in
the transmission queue, we will by mistake pass the list head pointer
to the function tipc_link_retransmit(). Since the list head is not a
valid sk_buff, this leads to a crash.

In this commit we fix this by only calling tipc_link_retransmit()
if we actually found eligible buffers in the transmission queue.

Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 96ceefeb9daf..a9e174fc0f91 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -220,10 +220,11 @@ static void bclink_retransmit_pkt(u32 after, u32 to)
 	struct sk_buff *skb;
 
 	skb_queue_walk(&bcl->outqueue, skb) {
-		if (more(buf_seqno(skb), after))
+		if (more(buf_seqno(skb), after)) {
+			tipc_link_retransmit(bcl, skb, mod(to - after));
 			break;
+		}
 	}
-	tipc_link_retransmit(bcl, skb, mod(to - after));
 }
 
 /**
-- 
cgit v1.2.3


From 6b8326ed14683f641e1c4149197f23a48c7cee36 Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Fri, 9 Jan 2015 15:26:59 +0800
Subject: tipc: remove tipc_core_start/stop routines

Remove redundant wrapper functions like tipc_core_start() and
tipc_core_stop(), and directly move them to their callers, such
as tipc_init() and tipc_exit(), having us clearly know what are
really done in both initialization and deinitialzation functions.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Tested-by: Tero Aho <Tero.Aho@coriant.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/core.c | 66 ++++++++++++++++++++-------------------------------------
 1 file changed, 23 insertions(+), 43 deletions(-)

(limited to 'net')

diff --git a/net/tipc/core.c b/net/tipc/core.c
index 71b2ada0f5ab..10bc0bf909e4 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -75,28 +75,21 @@ struct sk_buff *tipc_buf_acquire(u32 size)
 	return skb;
 }
 
-/**
- * tipc_core_stop - switch TIPC from SINGLE NODE to NOT RUNNING mode
- */
-static void tipc_core_stop(void)
-{
-	tipc_net_stop();
-	tipc_bearer_cleanup();
-	tipc_netlink_stop();
-	tipc_subscr_stop();
-	tipc_nametbl_stop();
-	tipc_socket_stop();
-	tipc_unregister_sysctl();
-	tipc_sk_rht_destroy();
-}
-
-/**
- * tipc_core_start - switch TIPC from NOT RUNNING to SINGLE NODE mode
- */
-static int tipc_core_start(void)
+static int __init tipc_init(void)
 {
 	int err;
 
+	pr_info("Activated (version " TIPC_MOD_VER ")\n");
+
+	tipc_own_addr = 0;
+	tipc_net_id = 4711;
+
+	sysctl_tipc_rmem[0] = TIPC_CONN_OVERLOAD_LIMIT >> 4 <<
+			      TIPC_LOW_IMPORTANCE;
+	sysctl_tipc_rmem[1] = TIPC_CONN_OVERLOAD_LIMIT >> 4 <<
+			      TIPC_CRITICAL_IMPORTANCE;
+	sysctl_tipc_rmem[2] = TIPC_CONN_OVERLOAD_LIMIT;
+
 	get_random_bytes(&tipc_random, sizeof(tipc_random));
 
 	err = tipc_sk_rht_init();
@@ -127,6 +120,7 @@ static int tipc_core_start(void)
 	if (err)
 		goto out_bearer;
 
+	pr_info("Started in single node mode\n");
 	return 0;
 out_bearer:
 	tipc_subscr_stop();
@@ -141,35 +135,21 @@ out_netlink:
 out_nametbl:
 	tipc_sk_rht_destroy();
 out_reftbl:
+	pr_err("Unable to start in single node mode\n");
 	return err;
 }
 
-static int __init tipc_init(void)
-{
-	int res;
-
-	pr_info("Activated (version " TIPC_MOD_VER ")\n");
-
-	tipc_own_addr = 0;
-	tipc_net_id = 4711;
-
-	sysctl_tipc_rmem[0] = TIPC_CONN_OVERLOAD_LIMIT >> 4 <<
-			      TIPC_LOW_IMPORTANCE;
-	sysctl_tipc_rmem[1] = TIPC_CONN_OVERLOAD_LIMIT >> 4 <<
-			      TIPC_CRITICAL_IMPORTANCE;
-	sysctl_tipc_rmem[2] = TIPC_CONN_OVERLOAD_LIMIT;
-
-	res = tipc_core_start();
-	if (res)
-		pr_err("Unable to start in single node mode\n");
-	else
-		pr_info("Started in single node mode\n");
-	return res;
-}
-
 static void __exit tipc_exit(void)
 {
-	tipc_core_stop();
+	tipc_net_stop();
+	tipc_bearer_cleanup();
+	tipc_netlink_stop();
+	tipc_subscr_stop();
+	tipc_nametbl_stop();
+	tipc_socket_stop();
+	tipc_unregister_sysctl();
+	tipc_sk_rht_destroy();
+
 	pr_info("Deactivated\n");
 }
 
-- 
cgit v1.2.3


From 2f55c43788df7358be8c6e78ae2a3d3268e7afb6 Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Fri, 9 Jan 2015 15:27:00 +0800
Subject: tipc: remove unnecessary wrapper functions of kernel timer APIs

Not only some wrapper function like k_term_timer() is empty, but also
some others including k_start_timer() and k_cancel_timer() don't return
back any value to its caller, what's more, there is no any component
in the kernel world to do such thing. Therefore, these timer interfaces
defined in tipc module should be purged.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Tested-by: Tero Aho <Tero.Aho@coriant.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/core.h     | 67 -----------------------------------------------------
 net/tipc/discover.c | 32 ++++++++++++++-----------
 net/tipc/link.c     | 27 ++++++++++-----------
 net/tipc/link.h     |  4 ++--
 net/tipc/msg.h      |  2 --
 net/tipc/socket.c   | 17 +++++++-------
 net/tipc/subscr.c   | 17 ++++++--------
 net/tipc/subscr.h   |  2 +-
 8 files changed, 50 insertions(+), 118 deletions(-)

(limited to 'net')

diff --git a/net/tipc/core.h b/net/tipc/core.h
index 56fe4229fc5e..d57068961d4c 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -106,73 +106,6 @@ void tipc_unregister_sysctl(void);
 #define tipc_unregister_sysctl()
 #endif
 
-/*
- * TIPC timer code
- */
-typedef void (*Handler) (unsigned long);
-
-/**
- * k_init_timer - initialize a timer
- * @timer: pointer to timer structure
- * @routine: pointer to routine to invoke when timer expires
- * @argument: value to pass to routine when timer expires
- *
- * Timer must be initialized before use (and terminated when no longer needed).
- */
-static inline void k_init_timer(struct timer_list *timer, Handler routine,
-				unsigned long argument)
-{
-	setup_timer(timer, routine, argument);
-}
-
-/**
- * k_start_timer - start a timer
- * @timer: pointer to timer structure
- * @msec: time to delay (in ms)
- *
- * Schedules a previously initialized timer for later execution.
- * If timer is already running, the new timeout overrides the previous request.
- *
- * To ensure the timer doesn't expire before the specified delay elapses,
- * the amount of delay is rounded up when converting to the jiffies
- * then an additional jiffy is added to account for the fact that
- * the starting time may be in the middle of the current jiffy.
- */
-static inline void k_start_timer(struct timer_list *timer, unsigned long msec)
-{
-	mod_timer(timer, jiffies + msecs_to_jiffies(msec) + 1);
-}
-
-/**
- * k_cancel_timer - cancel a timer
- * @timer: pointer to timer structure
- *
- * Cancels a previously initialized timer.
- * Can be called safely even if the timer is already inactive.
- *
- * WARNING: Must not be called when holding locks required by the timer's
- *          timeout routine, otherwise deadlock can occur on SMP systems!
- */
-static inline void k_cancel_timer(struct timer_list *timer)
-{
-	del_timer_sync(timer);
-}
-
-/**
- * k_term_timer - terminate a timer
- * @timer: pointer to timer structure
- *
- * Prevents further use of a previously initialized timer.
- *
- * WARNING: Caller must ensure timer isn't currently running.
- *
- * (Do not "enhance" this routine to automatically cancel an active timer,
- * otherwise deadlock can arise when a timeout routine calls k_term_timer.)
- */
-static inline void k_term_timer(struct timer_list *timer)
-{
-}
-
 /*
  * TIPC message buffer code
  *
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index aa722a42ef8b..1a3a98582034 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -38,10 +38,14 @@
 #include "link.h"
 #include "discover.h"
 
-#define TIPC_LINK_REQ_INIT	125	/* min delay during bearer start up */
-#define TIPC_LINK_REQ_FAST	1000	/* max delay if bearer has no links */
-#define TIPC_LINK_REQ_SLOW	60000	/* max delay if bearer has links */
-#define TIPC_LINK_REQ_INACTIVE	0xffffffff /* indicates no timer in use */
+/* min delay during bearer start up */
+#define TIPC_LINK_REQ_INIT	msecs_to_jiffies(125)
+/* max delay if bearer has no links */
+#define TIPC_LINK_REQ_FAST	msecs_to_jiffies(1000)
+/* max delay if bearer has links */
+#define TIPC_LINK_REQ_SLOW	msecs_to_jiffies(60000)
+/* indicates no timer in use */
+#define TIPC_LINK_REQ_INACTIVE	0xffffffff
 
 
 /**
@@ -63,7 +67,7 @@ struct tipc_link_req {
 	spinlock_t lock;
 	struct sk_buff *buf;
 	struct timer_list timer;
-	unsigned int timer_intv;
+	unsigned long timer_intv;
 };
 
 /**
@@ -265,7 +269,7 @@ static void disc_update(struct tipc_link_req *req)
 		if ((req->timer_intv == TIPC_LINK_REQ_INACTIVE) ||
 		    (req->timer_intv > TIPC_LINK_REQ_FAST)) {
 			req->timer_intv = TIPC_LINK_REQ_INIT;
-			k_start_timer(&req->timer, req->timer_intv);
+			mod_timer(&req->timer, jiffies + req->timer_intv);
 		}
 	}
 }
@@ -295,12 +299,13 @@ void tipc_disc_remove_dest(struct tipc_link_req *req)
 
 /**
  * disc_timeout - send a periodic link setup request
- * @req: ptr to link request structure
+ * @data: ptr to link request structure
  *
  * Called whenever a link setup request timer associated with a bearer expires.
  */
-static void disc_timeout(struct tipc_link_req *req)
+static void disc_timeout(unsigned long data)
 {
+	struct tipc_link_req *req = (struct tipc_link_req *)data;
 	int max_delay;
 
 	spin_lock_bh(&req->lock);
@@ -329,7 +334,7 @@ static void disc_timeout(struct tipc_link_req *req)
 	if (req->timer_intv > max_delay)
 		req->timer_intv = max_delay;
 
-	k_start_timer(&req->timer, req->timer_intv);
+	mod_timer(&req->timer, jiffies + req->timer_intv);
 exit:
 	spin_unlock_bh(&req->lock);
 }
@@ -363,8 +368,8 @@ int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest)
 	req->num_nodes = 0;
 	req->timer_intv = TIPC_LINK_REQ_INIT;
 	spin_lock_init(&req->lock);
-	k_init_timer(&req->timer, (Handler)disc_timeout, (unsigned long)req);
-	k_start_timer(&req->timer, req->timer_intv);
+	setup_timer(&req->timer, disc_timeout, (unsigned long)req);
+	mod_timer(&req->timer, jiffies + req->timer_intv);
 	b_ptr->link_req = req;
 	tipc_bearer_send(req->bearer_id, req->buf, &req->dest);
 	return 0;
@@ -376,8 +381,7 @@ int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest)
  */
 void tipc_disc_delete(struct tipc_link_req *req)
 {
-	k_cancel_timer(&req->timer);
-	k_term_timer(&req->timer);
+	del_timer_sync(&req->timer);
 	kfree_skb(req->buf);
 	kfree(req);
 }
@@ -397,7 +401,7 @@ void tipc_disc_reset(struct tipc_bearer *b_ptr)
 	req->domain = b_ptr->domain;
 	req->num_nodes = 0;
 	req->timer_intv = TIPC_LINK_REQ_INIT;
-	k_start_timer(&req->timer, req->timer_intv);
+	mod_timer(&req->timer, jiffies + req->timer_intv);
 	tipc_bearer_send(req->bearer_id, req->buf, &req->dest);
 	spin_unlock_bh(&req->lock);
 }
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 082c3b5b32a1..f2531a8efa54 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -106,7 +106,7 @@ static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr,
 static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf);
 static int  tipc_link_tunnel_rcv(struct tipc_node *n_ptr,
 				 struct sk_buff **buf);
-static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance);
+static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol);
 static void link_state_event(struct tipc_link *l_ptr, u32 event);
 static void link_reset_statistics(struct tipc_link *l_ptr);
 static void link_print(struct tipc_link *l_ptr, const char *str);
@@ -169,8 +169,9 @@ int tipc_link_is_active(struct tipc_link *l_ptr)
  * link_timeout - handle expiration of link timer
  * @l_ptr: pointer to link
  */
-static void link_timeout(struct tipc_link *l_ptr)
+static void link_timeout(unsigned long data)
 {
+	struct tipc_link *l_ptr = (struct tipc_link *)data;
 	struct sk_buff *skb;
 
 	tipc_node_lock(l_ptr->owner);
@@ -217,9 +218,9 @@ static void link_timeout(struct tipc_link *l_ptr)
 	tipc_node_unlock(l_ptr->owner);
 }
 
-static void link_set_timer(struct tipc_link *l_ptr, u32 time)
+static void link_set_timer(struct tipc_link *link, unsigned long time)
 {
-	k_start_timer(&l_ptr->timer, time);
+	mod_timer(&link->timer, jiffies + time);
 }
 
 /**
@@ -299,8 +300,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
 
 	tipc_node_attach_link(n_ptr, l_ptr);
 
-	k_init_timer(&l_ptr->timer, (Handler)link_timeout,
-		     (unsigned long)l_ptr);
+	setup_timer(&l_ptr->timer, link_timeout, (unsigned long)l_ptr);
 
 	link_state_event(l_ptr, STARTING_EVT);
 
@@ -479,7 +479,7 @@ static void link_activate(struct tipc_link *l_ptr)
 static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
 {
 	struct tipc_link *other;
-	u32 cont_intv = l_ptr->continuity_interval;
+	unsigned long cont_intv = l_ptr->cont_intv;
 
 	if (l_ptr->flags & LINK_STOPPED)
 		return;
@@ -1880,15 +1880,16 @@ void tipc_link_bundle_rcv(struct sk_buff *buf)
 	kfree_skb(buf);
 }
 
-static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance)
+static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol)
 {
-	if ((tolerance < TIPC_MIN_LINK_TOL) || (tolerance > TIPC_MAX_LINK_TOL))
+	unsigned long intv = ((tol / 4) > 500) ? 500 : tol / 4;
+
+	if ((tol < TIPC_MIN_LINK_TOL) || (tol > TIPC_MAX_LINK_TOL))
 		return;
 
-	l_ptr->tolerance = tolerance;
-	l_ptr->continuity_interval =
-		((tolerance / 4) > 500) ? 500 : tolerance / 4;
-	l_ptr->abort_limit = tolerance / (l_ptr->continuity_interval / 4);
+	l_ptr->tolerance = tol;
+	l_ptr->cont_intv = msecs_to_jiffies(intv);
+	l_ptr->abort_limit = tol / (jiffies_to_msecs(l_ptr->cont_intv) / 4);
 }
 
 void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window)
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 55812e87ca1e..15ca850391df 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -105,7 +105,7 @@ struct tipc_stats {
  * @peer_bearer_id: bearer id used by link's peer endpoint
  * @bearer_id: local bearer id used by link
  * @tolerance: minimum link continuity loss needed to reset link [in ms]
- * @continuity_interval: link continuity testing interval [in ms]
+ * @cont_intv: link continuity testing interval
  * @abort_limit: # of unacknowledged continuity probes needed to reset link
  * @state: current state of link FSM
  * @fsm_msg_cnt: # of protocol messages link FSM has sent in current state
@@ -146,7 +146,7 @@ struct tipc_link {
 	u32 peer_bearer_id;
 	u32 bearer_id;
 	u32 tolerance;
-	u32 continuity_interval;
+	unsigned long cont_intv;
 	u32 abort_limit;
 	int state;
 	u32 fsm_msg_cnt;
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index d5c83d7ecb47..1a52f7cf3cd3 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -77,12 +77,10 @@
 
 #define TIPC_MEDIA_ADDR_OFFSET	5
 
-
 struct tipc_msg {
 	__be32 hdr[15];
 };
 
-
 static inline u32 msg_word(struct tipc_msg *m, u32 pos)
 {
 	return ntohl(m->hdr[pos]);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 701f31bbbbfb..e16197eb7b9f 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -47,7 +47,7 @@
 #define SS_READY		-2	/* socket is connectionless */
 
 #define CONN_TIMEOUT_DEFAULT	8000	/* default connect timeout = 8s */
-#define CONN_PROBING_INTERVAL	3600000	/* [ms] => 1 h */
+#define CONN_PROBING_INTERVAL	msecs_to_jiffies(3600000)  /* [ms] => 1 h */
 #define TIPC_FWD_MSG		1
 #define TIPC_CONN_OK		0
 #define TIPC_CONN_PROBING	1
@@ -68,7 +68,7 @@
  * @publications: list of publications for port
  * @pub_count: total # of publications port has made during its lifetime
  * @probing_state:
- * @probing_interval:
+ * @probing_intv:
  * @timer:
  * @port: port - interacts with 'sk' and with the rest of the TIPC stack
  * @peer_name: the peer of the connection, if any
@@ -93,7 +93,7 @@ struct tipc_sock {
 	struct list_head publications;
 	u32 pub_count;
 	u32 probing_state;
-	u32 probing_interval;
+	unsigned long probing_intv;
 	struct timer_list timer;
 	uint conn_timeout;
 	atomic_t dupl_rcvcnt;
@@ -361,7 +361,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
 		return -EINVAL;
 	}
 	msg_set_origport(msg, tsk->portid);
-	k_init_timer(&tsk->timer, (Handler)tipc_sk_timeout, tsk->portid);
+	setup_timer(&tsk->timer, tipc_sk_timeout, tsk->portid);
 	sk->sk_backlog_rcv = tipc_backlog_rcv;
 	sk->sk_rcvbuf = sysctl_tipc_rmem[1];
 	sk->sk_data_ready = tipc_data_ready;
@@ -511,7 +511,7 @@ static int tipc_release(struct socket *sock)
 	}
 
 	tipc_sk_withdraw(tsk, 0, NULL);
-	k_cancel_timer(&tsk->timer);
+	del_timer_sync(&tsk->timer);
 	tipc_sk_remove(tsk);
 	if (tsk->connected) {
 		skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG,
@@ -522,7 +522,6 @@ static int tipc_release(struct socket *sock)
 			tipc_link_xmit_skb(skb, dnode, tsk->portid);
 		tipc_node_remove_conn(dnode, tsk->portid);
 	}
-	k_term_timer(&tsk->timer);
 
 	/* Discard any remaining (connection-based) messages in receive queue */
 	__skb_queue_purge(&sk->sk_receive_queue);
@@ -1139,10 +1138,10 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
 	msg_set_lookup_scope(msg, 0);
 	msg_set_hdr_sz(msg, SHORT_H_SIZE);
 
-	tsk->probing_interval = CONN_PROBING_INTERVAL;
+	tsk->probing_intv = CONN_PROBING_INTERVAL;
 	tsk->probing_state = TIPC_CONN_OK;
 	tsk->connected = 1;
-	k_start_timer(&tsk->timer, tsk->probing_interval);
+	mod_timer(&tsk->timer, jiffies + tsk->probing_intv);
 	tipc_node_add_conn(peer_node, tsk->portid, peer_port);
 	tsk->max_pkt = tipc_node_get_mtu(peer_node, tsk->portid);
 }
@@ -2128,7 +2127,7 @@ static void tipc_sk_timeout(unsigned long portid)
 				      0, peer_node, tipc_own_addr,
 				      peer_port, portid, TIPC_OK);
 		tsk->probing_state = TIPC_CONN_PROBING;
-		k_start_timer(&tsk->timer, tsk->probing_interval);
+		mod_timer(&tsk->timer, jiffies + tsk->probing_intv);
 	}
 	bh_unlock_sock(sk);
 	if (skb)
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 0344206b984f..e6cb959371dc 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -141,8 +141,9 @@ void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower,
 	subscr_send_event(sub, found_lower, found_upper, event, port_ref, node);
 }
 
-static void subscr_timeout(struct tipc_subscription *sub)
+static void subscr_timeout(unsigned long data)
 {
+	struct tipc_subscription *sub = (struct tipc_subscription *)data;
 	struct tipc_subscriber *subscriber = sub->subscriber;
 
 	/* The spin lock per subscriber is used to protect its members */
@@ -167,7 +168,6 @@ static void subscr_timeout(struct tipc_subscription *sub)
 			  TIPC_SUBSCR_TIMEOUT, 0, 0);
 
 	/* Now destroy subscription */
-	k_term_timer(&sub->timer);
 	kfree(sub);
 	atomic_dec(&subscription_count);
 }
@@ -207,8 +207,7 @@ static void subscr_release(struct tipc_subscriber *subscriber)
 				 subscription_list) {
 		if (sub->timeout != TIPC_WAIT_FOREVER) {
 			spin_unlock_bh(&subscriber->lock);
-			k_cancel_timer(&sub->timer);
-			k_term_timer(&sub->timer);
+			del_timer_sync(&sub->timer);
 			spin_lock_bh(&subscriber->lock);
 		}
 		subscr_del(sub);
@@ -250,8 +249,7 @@ static void subscr_cancel(struct tipc_subscr *s,
 	if (sub->timeout != TIPC_WAIT_FOREVER) {
 		sub->timeout = TIPC_WAIT_FOREVER;
 		spin_unlock_bh(&subscriber->lock);
-		k_cancel_timer(&sub->timer);
-		k_term_timer(&sub->timer);
+		del_timer_sync(&sub->timer);
 		spin_lock_bh(&subscriber->lock);
 	}
 	subscr_del(sub);
@@ -296,7 +294,7 @@ static int subscr_subscribe(struct tipc_subscr *s,
 	sub->seq.type = htohl(s->seq.type, swap);
 	sub->seq.lower = htohl(s->seq.lower, swap);
 	sub->seq.upper = htohl(s->seq.upper, swap);
-	sub->timeout = htohl(s->timeout, swap);
+	sub->timeout = msecs_to_jiffies(htohl(s->timeout, swap));
 	sub->filter = htohl(s->filter, swap);
 	if ((!(sub->filter & TIPC_SUB_PORTS) ==
 	     !(sub->filter & TIPC_SUB_SERVICE)) ||
@@ -311,9 +309,8 @@ static int subscr_subscribe(struct tipc_subscr *s,
 	memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr));
 	atomic_inc(&subscription_count);
 	if (sub->timeout != TIPC_WAIT_FOREVER) {
-		k_init_timer(&sub->timer,
-			     (Handler)subscr_timeout, (unsigned long)sub);
-		k_start_timer(&sub->timer, sub->timeout);
+		setup_timer(&sub->timer, subscr_timeout, (unsigned long)sub);
+		mod_timer(&sub->timer, jiffies + sub->timeout);
 	}
 	*sub_p = sub;
 	return 0;
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index 393e417bee3f..ab01713bfe32 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -58,7 +58,7 @@ struct tipc_subscriber;
 struct tipc_subscription {
 	struct tipc_subscriber *subscriber;
 	struct tipc_name_seq seq;
-	u32 timeout;
+	unsigned long timeout;
 	u32 filter;
 	struct timer_list timer;
 	struct list_head nameseq_list;
-- 
cgit v1.2.3


From 859fc7c0cedca0f84dac471fa31e9512259e1ecd Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Fri, 9 Jan 2015 15:27:01 +0800
Subject: tipc: cleanup core.c and core.h files

Only the works of initializing and shutting down tipc module are done
in core.h and core.c files, so all stuffs which are not closely
associated with the two tasks should be moved to appropriate places.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Tested-by: Tero Aho <Tero.Aho@coriant.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/addr.h    |  2 --
 net/tipc/config.h  |  4 ++--
 net/tipc/core.c    | 23 -----------------------
 net/tipc/core.h    | 53 -----------------------------------------------------
 net/tipc/link.h    |  4 ++++
 net/tipc/msg.c     | 23 +++++++++++++++++++++++
 net/tipc/msg.h     | 37 +++++++++++++++++++++++++++++--------
 net/tipc/netlink.h |  3 +++
 net/tipc/server.c  |  1 +
 net/tipc/server.h  |  3 ++-
 net/tipc/socket.h  |  7 +++++++
 net/tipc/subscr.h  |  3 +++
 12 files changed, 74 insertions(+), 89 deletions(-)

(limited to 'net')

diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index a74acf9ee804..60b00ab93d74 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -37,8 +37,6 @@
 #ifndef _TIPC_ADDR_H
 #define _TIPC_ADDR_H
 
-#include "core.h"
-
 #define TIPC_ZONE_MASK		0xff000000u
 #define TIPC_CLUSTER_MASK	0xfffff000u
 
diff --git a/net/tipc/config.h b/net/tipc/config.h
index 47b1bf181612..a41a41c58750 100644
--- a/net/tipc/config.h
+++ b/net/tipc/config.h
@@ -37,10 +37,10 @@
 #ifndef _TIPC_CONFIG_H
 #define _TIPC_CONFIG_H
 
-/* ---------------------------------------------------------------------- */
-
 #include "link.h"
 
+#define ULTRA_STRING_MAX_LEN	32768
+
 struct sk_buff *tipc_cfg_reply_alloc(int payload_size);
 int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type,
 			void *tlv_data, int tlv_data_size);
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 10bc0bf909e4..b6ec3d7c5f51 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -52,29 +52,6 @@ u32 tipc_own_addr __read_mostly;
 int tipc_net_id __read_mostly;
 int sysctl_tipc_rmem[3] __read_mostly;	/* min/default/max */
 
-/**
- * tipc_buf_acquire - creates a TIPC message buffer
- * @size: message size (including TIPC header)
- *
- * Returns a new buffer with data pointers set to the specified size.
- *
- * NOTE: Headroom is reserved to allow prepending of a data link header.
- *       There may also be unrequested tailroom present at the buffer's end.
- */
-struct sk_buff *tipc_buf_acquire(u32 size)
-{
-	struct sk_buff *skb;
-	unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u;
-
-	skb = alloc_skb_fclone(buf_size, GFP_ATOMIC);
-	if (skb) {
-		skb_reserve(skb, BUF_HEADROOM);
-		skb_put(skb, size);
-		skb->next = NULL;
-	}
-	return skb;
-}
-
 static int __init tipc_init(void)
 {
 	int err;
diff --git a/net/tipc/core.h b/net/tipc/core.h
index d57068961d4c..a5b3140f1451 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -60,19 +60,8 @@
 
 #define TIPC_MOD_VER "2.0.0"
 
-#define ULTRA_STRING_MAX_LEN	32768
-#define TIPC_MAX_SUBSCRIPTIONS	65535
-#define TIPC_MAX_PUBLICATIONS	65535
-
-struct tipc_msg;	/* msg.h */
-
 int tipc_snprintf(char *buf, int len, const char *fmt, ...);
 
-/*
- * TIPC-specific error codes
- */
-#define ELINKCONG EAGAIN	/* link congestion <=> resource unavailable */
-
 /*
  * Global configuration variables
  */
@@ -86,18 +75,6 @@ extern int sysctl_tipc_named_timeout __read_mostly;
  */
 extern int tipc_random __read_mostly;
 
-/*
- * Routines available to privileged subsystems
- */
-int tipc_netlink_start(void);
-void tipc_netlink_stop(void);
-int tipc_socket_init(void);
-void tipc_socket_stop(void);
-int tipc_sock_create_local(int type, struct socket **res);
-void tipc_sock_release_local(struct socket *sock);
-int tipc_sock_accept_local(struct socket *sock, struct socket **newsock,
-			   int flags);
-
 #ifdef CONFIG_SYSCTL
 int tipc_register_sysctl(void);
 void tipc_unregister_sysctl(void);
@@ -106,34 +83,4 @@ void tipc_unregister_sysctl(void);
 #define tipc_unregister_sysctl()
 #endif
 
-/*
- * TIPC message buffer code
- *
- * TIPC message buffer headroom reserves space for the worst-case
- * link-level device header (in case the message is sent off-node).
- *
- * Note: Headroom should be a multiple of 4 to ensure the TIPC header fields
- *       are word aligned for quicker access
- */
-#define BUF_HEADROOM LL_MAX_HEADER
-
-struct tipc_skb_cb {
-	void *handle;
-	struct sk_buff *tail;
-	bool deferred;
-	bool wakeup_pending;
-	bool bundling;
-	u16 chain_sz;
-	u16 chain_imp;
-};
-
-#define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0]))
-
-static inline struct tipc_msg *buf_msg(struct sk_buff *skb)
-{
-	return (struct tipc_msg *)skb->data;
-}
-
-struct sk_buff *tipc_buf_acquire(u32 size);
-
 #endif
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 15ca850391df..e96c318f5c73 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -41,6 +41,10 @@
 #include "msg.h"
 #include "node.h"
 
+/* TIPC-specific error codes
+*/
+#define ELINKCONG EAGAIN	/* link congestion <=> resource unavailable */
+
 /* Out-of-range value for link sequence numbers
  */
 #define INVALID_LINK_SEQ 0x10000
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index a687b30a699c..35523fb6668c 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -46,6 +46,29 @@ static unsigned int align(unsigned int i)
 	return (i + 3) & ~3u;
 }
 
+/**
+ * tipc_buf_acquire - creates a TIPC message buffer
+ * @size: message size (including TIPC header)
+ *
+ * Returns a new buffer with data pointers set to the specified size.
+ *
+ * NOTE: Headroom is reserved to allow prepending of a data link header.
+ *       There may also be unrequested tailroom present at the buffer's end.
+ */
+struct sk_buff *tipc_buf_acquire(u32 size)
+{
+	struct sk_buff *skb;
+	unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u;
+
+	skb = alloc_skb_fclone(buf_size, GFP_ATOMIC);
+	if (skb) {
+		skb_reserve(skb, BUF_HEADROOM);
+		skb_put(skb, size);
+		skb->next = NULL;
+	}
+	return skb;
+}
+
 void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize,
 		   u32 destnode)
 {
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 1a52f7cf3cd3..0065a2e8ad9b 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -77,10 +77,38 @@
 
 #define TIPC_MEDIA_ADDR_OFFSET	5
 
+/**
+ * TIPC message buffer code
+ *
+ * TIPC message buffer headroom reserves space for the worst-case
+ * link-level device header (in case the message is sent off-node).
+ *
+ * Note: Headroom should be a multiple of 4 to ensure the TIPC header fields
+ *       are word aligned for quicker access
+ */
+#define BUF_HEADROOM LL_MAX_HEADER
+
+struct tipc_skb_cb {
+	void *handle;
+	struct sk_buff *tail;
+	bool deferred;
+	bool wakeup_pending;
+	bool bundling;
+	u16 chain_sz;
+	u16 chain_imp;
+};
+
+#define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0]))
+
 struct tipc_msg {
 	__be32 hdr[15];
 };
 
+static inline struct tipc_msg *buf_msg(struct sk_buff *skb)
+{
+	return (struct tipc_msg *)skb->data;
+}
+
 static inline u32 msg_word(struct tipc_msg *m, u32 pos)
 {
 	return ntohl(m->hdr[pos]);
@@ -719,27 +747,20 @@ static inline u32 msg_tot_origport(struct tipc_msg *m)
 	return msg_origport(m);
 }
 
+struct sk_buff *tipc_buf_acquire(u32 size);
 bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err);
-
 int tipc_msg_eval(struct sk_buff *buf, u32 *dnode);
-
 void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize,
 		   u32 destnode);
-
 struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz,
 				uint data_sz, u32 dnode, u32 onode,
 				u32 dport, u32 oport, int errcode);
-
 int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf);
-
 bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu);
-
 bool tipc_msg_make_bundle(struct sk_buff_head *list, struct sk_buff *skb,
 			  u32 mtu, u32 dnode);
-
 int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset,
 		   int dsz, int mtu, struct sk_buff_head *list);
-
 struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list);
 
 #endif
diff --git a/net/tipc/netlink.h b/net/tipc/netlink.h
index 1425c6869de0..ae2f2d923a15 100644
--- a/net/tipc/netlink.h
+++ b/net/tipc/netlink.h
@@ -45,4 +45,7 @@ struct tipc_nl_msg {
 	u32 seq;
 };
 
+int tipc_netlink_start(void);
+void tipc_netlink_stop(void);
+
 #endif
diff --git a/net/tipc/server.c b/net/tipc/server.c
index a538a02f869b..869eb0905754 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -35,6 +35,7 @@
 
 #include "server.h"
 #include "core.h"
+#include "socket.h"
 #include <net/sock.h>
 
 /* Number of messages to send before rescheduling */
diff --git a/net/tipc/server.h b/net/tipc/server.h
index be817b0b547e..87bc05c70dce 100644
--- a/net/tipc/server.h
+++ b/net/tipc/server.h
@@ -36,7 +36,8 @@
 #ifndef _TIPC_SERVER_H
 #define _TIPC_SERVER_H
 
-#include "core.h"
+#include <linux/idr.h>
+#include <linux/tipc.h>
 
 #define TIPC_SERVER_NAME_LEN	32
 
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index c7d46d069d89..46bc370d82c7 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h
@@ -42,6 +42,13 @@
 #define TIPC_FLOWCTRL_WIN        (TIPC_CONNACK_INTV * 2)
 #define TIPC_CONN_OVERLOAD_LIMIT ((TIPC_FLOWCTRL_WIN * 2 + 1) * \
 				  SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE))
+
+int tipc_socket_init(void);
+void tipc_socket_stop(void);
+int tipc_sock_create_local(int type, struct socket **res);
+void tipc_sock_release_local(struct socket *sock);
+int tipc_sock_accept_local(struct socket *sock, struct socket **newsock,
+			   int flags);
 int tipc_sk_rcv(struct sk_buff *buf);
 struct sk_buff *tipc_sk_socks_show(void);
 void tipc_sk_mcast_rcv(struct sk_buff *buf);
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index ab01713bfe32..0d3958956aca 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -39,6 +39,9 @@
 
 #include "server.h"
 
+#define TIPC_MAX_SUBSCRIPTIONS	65535
+#define TIPC_MAX_PUBLICATIONS	65535
+
 struct tipc_subscription;
 struct tipc_subscriber;
 
-- 
cgit v1.2.3


From f2f2a96a20d52d65aa79bd4019af43bbfb0e1528 Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Fri, 9 Jan 2015 15:27:02 +0800
Subject: tipc: feed tipc sock pointer to tipc_sk_timeout routine

In order to make tipc socket table aware of namespace, a networking
namespace instance must be passed to tipc_sk_lookup(), allowing it
to look up tipc socket instance with a given port ID from a concrete
socket table. However, as now tipc_sk_timeout() only has one port ID
parameter and is not namespace aware, it's unable to obtain a correct
socket instance through tipc_sk_lookup() just with a port ID,
especially after namespace is completely supported.

If port ID is replaced with socket instance as tipc_sk_timeout()'s
parameter, it's unnecessary to look up socket table. But as the timer
handler - tipc_sk_timeout() is run asynchronously, socket reference
must be held before its timer is launched, and must be carefully
checked to identify whether the socket reference needs to be put or
not when its timer is terminated.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Tested-by: Tero Aho <Tero.Aho@coriant.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index e16197eb7b9f..c58f66be7e18 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -110,7 +110,7 @@ static void tipc_write_space(struct sock *sk);
 static int tipc_release(struct socket *sock);
 static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags);
 static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p);
-static void tipc_sk_timeout(unsigned long portid);
+static void tipc_sk_timeout(unsigned long data);
 static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
 			   struct tipc_name_seq const *seq);
 static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
@@ -361,7 +361,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
 		return -EINVAL;
 	}
 	msg_set_origport(msg, tsk->portid);
-	setup_timer(&tsk->timer, tipc_sk_timeout, tsk->portid);
+	setup_timer(&tsk->timer, tipc_sk_timeout, (unsigned long)tsk);
 	sk->sk_backlog_rcv = tipc_backlog_rcv;
 	sk->sk_rcvbuf = sysctl_tipc_rmem[1];
 	sk->sk_data_ready = tipc_data_ready;
@@ -475,7 +475,7 @@ static int tipc_release(struct socket *sock)
 	struct sock *sk = sock->sk;
 	struct tipc_sock *tsk;
 	struct sk_buff *skb;
-	u32 dnode;
+	u32 dnode, probing_state;
 
 	/*
 	 * Exit if socket isn't fully initialized (occurs when a failed accept()
@@ -511,7 +511,9 @@ static int tipc_release(struct socket *sock)
 	}
 
 	tipc_sk_withdraw(tsk, 0, NULL);
-	del_timer_sync(&tsk->timer);
+	probing_state = tsk->probing_state;
+	if (del_timer_sync(&tsk->timer) && probing_state != TIPC_CONN_PROBING)
+		sock_put(sk);
 	tipc_sk_remove(tsk);
 	if (tsk->connected) {
 		skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG,
@@ -1141,7 +1143,8 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
 	tsk->probing_intv = CONN_PROBING_INTERVAL;
 	tsk->probing_state = TIPC_CONN_OK;
 	tsk->connected = 1;
-	mod_timer(&tsk->timer, jiffies + tsk->probing_intv);
+	if (!mod_timer(&tsk->timer, jiffies + tsk->probing_intv))
+		sock_hold(&tsk->sk);
 	tipc_node_add_conn(peer_node, tsk->portid, peer_port);
 	tsk->max_pkt = tipc_node_get_mtu(peer_node, tsk->portid);
 }
@@ -2096,18 +2099,13 @@ restart:
 	return res;
 }
 
-static void tipc_sk_timeout(unsigned long portid)
+static void tipc_sk_timeout(unsigned long data)
 {
-	struct tipc_sock *tsk;
-	struct sock *sk;
+	struct tipc_sock *tsk = (struct tipc_sock *)data;
+	struct sock *sk = &tsk->sk;
 	struct sk_buff *skb = NULL;
 	u32 peer_port, peer_node;
 
-	tsk = tipc_sk_lookup(portid);
-	if (!tsk)
-		return;
-
-	sk = &tsk->sk;
 	bh_lock_sock(sk);
 	if (!tsk->connected) {
 		bh_unlock_sock(sk);
@@ -2120,18 +2118,19 @@ static void tipc_sk_timeout(unsigned long portid)
 		/* Previous probe not answered -> self abort */
 		skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG,
 				      SHORT_H_SIZE, 0, tipc_own_addr,
-				      peer_node, portid, peer_port,
+				      peer_node, tsk->portid, peer_port,
 				      TIPC_ERR_NO_PORT);
 	} else {
 		skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE,
 				      0, peer_node, tipc_own_addr,
-				      peer_port, portid, TIPC_OK);
+				      peer_port, tsk->portid, TIPC_OK);
 		tsk->probing_state = TIPC_CONN_PROBING;
-		mod_timer(&tsk->timer, jiffies + tsk->probing_intv);
+		if (!mod_timer(&tsk->timer, jiffies + tsk->probing_intv))
+			sock_hold(sk);
 	}
 	bh_unlock_sock(sk);
 	if (skb)
-		tipc_link_xmit_skb(skb, peer_node, portid);
+		tipc_link_xmit_skb(skb, peer_node, tsk->portid);
 exit:
 	sock_put(sk);
 }
-- 
cgit v1.2.3


From 54fef04ad05f15984082c225fe47ce6af8ea1c5c Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Fri, 9 Jan 2015 15:27:03 +0800
Subject: tipc: remove unused tipc_link_get_max_pkt routine

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Tested-by: Tero Aho <Tero.Aho@coriant.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 27 ---------------------------
 net/tipc/link.h |  1 -
 2 files changed, 28 deletions(-)

(limited to 'net')

diff --git a/net/tipc/link.c b/net/tipc/link.c
index f2531a8efa54..f23105852cb3 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2267,33 +2267,6 @@ struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, int req_tlv_s
 	return buf;
 }
 
-/**
- * tipc_link_get_max_pkt - get maximum packet size to use when sending to destination
- * @dest: network address of destination node
- * @selector: used to select from set of active links
- *
- * If no active link can be found, uses default maximum packet size.
- */
-u32 tipc_link_get_max_pkt(u32 dest, u32 selector)
-{
-	struct tipc_node *n_ptr;
-	struct tipc_link *l_ptr;
-	u32 res = MAX_PKT_DEFAULT;
-
-	if (dest == tipc_own_addr)
-		return MAX_MSG_SIZE;
-
-	n_ptr = tipc_node_find(dest);
-	if (n_ptr) {
-		tipc_node_lock(n_ptr);
-		l_ptr = n_ptr->active_links[selector & 1];
-		if (l_ptr)
-			res = l_ptr->max_pkt;
-		tipc_node_unlock(n_ptr);
-	}
-	return res;
-}
-
 static void link_print(struct tipc_link *l_ptr, const char *str)
 {
 	struct tipc_bearer *b_ptr;
diff --git a/net/tipc/link.h b/net/tipc/link.h
index e96c318f5c73..692268dca4b9 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -220,7 +220,6 @@ void tipc_link_reset_list(unsigned int bearer_id);
 int tipc_link_xmit_skb(struct sk_buff *skb, u32 dest, u32 selector);
 int tipc_link_xmit(struct sk_buff_head *list, u32 dest, u32 selector);
 int __tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list);
-u32 tipc_link_get_max_pkt(u32 dest, u32 selector);
 void tipc_link_bundle_rcv(struct sk_buff *buf);
 void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob,
 			  u32 gap, u32 tolerance, u32 priority, u32 acked_mtu);
-- 
cgit v1.2.3


From c93d3baa24095887005647984cff5de8c63d3611 Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Fri, 9 Jan 2015 15:27:04 +0800
Subject: tipc: involve namespace infrastructure

Involve namespace infrastructure, make the "tipc_net_id" global
variable aware of per namespace, and rename it to "net_id". In
order that the conversion can be successfully done, an instance
of networking namespace must be passed to relevant functions,
allowing them to access the "net_id" variable of per namespace.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Tested-by: Tero Aho <Tero.Aho@coriant.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c    | 15 ++++++++++-----
 net/tipc/bcast.h    |  5 +++--
 net/tipc/bearer.c   | 29 ++++++++++++-----------------
 net/tipc/bearer.h   |  5 +++--
 net/tipc/config.c   | 29 ++++++++++++++++-------------
 net/tipc/config.h   |  2 +-
 net/tipc/core.c     | 28 +++++++++++++++++++++++++++-
 net/tipc/core.h     |  5 +++++
 net/tipc/discover.c | 26 +++++++++++++++++---------
 net/tipc/discover.h |  8 +++++---
 net/tipc/link.c     | 47 +++++++++++++++++++++++++++--------------------
 net/tipc/msg.c      |  4 ++++
 net/tipc/net.c      | 22 ++++++++++++++--------
 net/tipc/net.h      |  2 +-
 net/tipc/netlink.c  | 10 ++++++----
 15 files changed, 151 insertions(+), 86 deletions(-)

(limited to 'net')

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index a9e174fc0f91..f98231138916 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -327,9 +327,11 @@ exit:
  *
  * RCU and node lock set
  */
-void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent)
+void tipc_bclink_update_link_state(struct net *net, struct tipc_node *n_ptr,
+				   u32 last_sent)
 {
 	struct sk_buff *buf;
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 
 	/* Ignore "stale" link state info */
 	if (less_eq(last_sent, n_ptr->bclink.last_in))
@@ -362,7 +364,7 @@ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent)
 		tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG,
 			      INT_H_SIZE, n_ptr->addr);
 		msg_set_non_seq(msg, 1);
-		msg_set_mc_netid(msg, tipc_net_id);
+		msg_set_mc_netid(msg, tn->net_id);
 		msg_set_bcast_ack(msg, n_ptr->bclink.last_in);
 		msg_set_bcgap_after(msg, n_ptr->bclink.last_in);
 		msg_set_bcgap_to(msg, to);
@@ -476,8 +478,9 @@ static void bclink_accept_pkt(struct tipc_node *node, u32 seqno)
  *
  * RCU is locked, no other locks set
  */
-void tipc_bclink_rcv(struct sk_buff *buf)
+void tipc_bclink_rcv(struct net *net, struct sk_buff *buf)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_msg *msg = buf_msg(buf);
 	struct tipc_node *node;
 	u32 next_in;
@@ -485,7 +488,7 @@ void tipc_bclink_rcv(struct sk_buff *buf)
 	int deferred = 0;
 
 	/* Screen out unwanted broadcast messages */
-	if (msg_mc_netid(msg) != tipc_net_id)
+	if (msg_mc_netid(msg) != tn->net_id)
 		goto exit;
 
 	node = tipc_node_find(msg_prevnode(msg));
@@ -638,6 +641,8 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1,
 {
 	int bp_index;
 	struct tipc_msg *msg = buf_msg(buf);
+	struct net *net = sock_net(buf->sk);
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 
 	/* Prepare broadcast link message for reliable transmission,
 	 * if first time trying to send it;
@@ -647,7 +652,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1,
 	if (likely(!msg_non_seq(buf_msg(buf)))) {
 		bcbuf_set_acks(buf, bclink->bcast_nodes.count);
 		msg_set_non_seq(msg, 1);
-		msg_set_mc_netid(msg, tipc_net_id);
+		msg_set_mc_netid(msg, tn->net_id);
 		bcl->stats.sent_info++;
 
 		if (WARN_ON(!bclink->bcast_nodes.count)) {
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 644d79129fba..a5fd22438aed 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -91,10 +91,11 @@ void tipc_bclink_add_node(u32 addr);
 void tipc_bclink_remove_node(u32 addr);
 struct tipc_node *tipc_bclink_retransmit_to(void);
 void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked);
-void tipc_bclink_rcv(struct sk_buff *buf);
+void tipc_bclink_rcv(struct net *net, struct sk_buff *buf);
 u32  tipc_bclink_get_last_sent(void);
 u32  tipc_bclink_acks_missing(struct tipc_node *n_ptr);
-void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent);
+void tipc_bclink_update_link_state(struct net *net, struct tipc_node *n_ptr,
+				   u32 last_sent);
 int  tipc_bclink_stats(char *stats_buf, const u32 buf_size);
 int  tipc_bclink_reset_stats(void);
 int  tipc_bclink_set_queue_limits(u32 limit);
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 463db5b15b8b..cdd30337dc5e 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -260,7 +260,8 @@ void tipc_bearer_remove_dest(u32 bearer_id, u32 dest)
 /**
  * tipc_enable_bearer - enable bearer with the given name
  */
-int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority)
+int tipc_enable_bearer(struct net *net, const char *name, u32 disc_domain,
+		       u32 priority)
 {
 	struct tipc_bearer *b_ptr;
 	struct tipc_media *m_ptr;
@@ -361,7 +362,7 @@ restart:
 	b_ptr->net_plane = bearer_id + 'A';
 	b_ptr->priority = priority;
 
-	res = tipc_disc_create(b_ptr, &b_ptr->bcast_addr);
+	res = tipc_disc_create(net, b_ptr, &b_ptr->bcast_addr);
 	if (res) {
 		bearer_disable(b_ptr, false);
 		pr_warn("Bearer <%s> rejected, discovery object creation failed\n",
@@ -380,11 +381,11 @@ restart:
 /**
  * tipc_reset_bearer - Reset all links established over this bearer
  */
-static int tipc_reset_bearer(struct tipc_bearer *b_ptr)
+static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b_ptr)
 {
 	pr_info("Resetting bearer <%s>\n", b_ptr->name);
 	tipc_link_reset_list(b_ptr->identity);
-	tipc_disc_reset(b_ptr);
+	tipc_disc_reset(net, b_ptr);
 	return 0;
 }
 
@@ -539,17 +540,12 @@ static int tipc_l2_rcv_msg(struct sk_buff *buf, struct net_device *dev,
 {
 	struct tipc_bearer *b_ptr;
 
-	if (!net_eq(dev_net(dev), &init_net)) {
-		kfree_skb(buf);
-		return NET_RX_DROP;
-	}
-
 	rcu_read_lock();
 	b_ptr = rcu_dereference_rtnl(dev->tipc_ptr);
 	if (likely(b_ptr)) {
 		if (likely(buf->pkt_type <= PACKET_BROADCAST)) {
 			buf->next = NULL;
-			tipc_rcv(buf, b_ptr);
+			tipc_rcv(dev_net(dev), buf, b_ptr);
 			rcu_read_unlock();
 			return NET_RX_SUCCESS;
 		}
@@ -572,11 +568,9 @@ static int tipc_l2_rcv_msg(struct sk_buff *buf, struct net_device *dev,
 static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
 				void *ptr)
 {
-	struct tipc_bearer *b_ptr;
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-
-	if (!net_eq(dev_net(dev), &init_net))
-		return NOTIFY_DONE;
+	struct net *net = dev_net(dev);
+	struct tipc_bearer *b_ptr;
 
 	b_ptr = rtnl_dereference(dev->tipc_ptr);
 	if (!b_ptr)
@@ -590,12 +584,12 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
 			break;
 	case NETDEV_DOWN:
 	case NETDEV_CHANGEMTU:
-		tipc_reset_bearer(b_ptr);
+		tipc_reset_bearer(net, b_ptr);
 		break;
 	case NETDEV_CHANGEADDR:
 		b_ptr->media->raw2addr(b_ptr, &b_ptr->addr,
 				       (char *)dev->dev_addr);
-		tipc_reset_bearer(b_ptr);
+		tipc_reset_bearer(net, b_ptr);
 		break;
 	case NETDEV_UNREGISTER:
 	case NETDEV_CHANGENAME:
@@ -808,6 +802,7 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
 
 int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
 {
+	struct net *net = genl_info_net(info);
 	int err;
 	char *bearer;
 	struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
@@ -847,7 +842,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	rtnl_lock();
-	err = tipc_enable_bearer(bearer, domain, prio);
+	err = tipc_enable_bearer(net, bearer, domain, prio);
 	if (err) {
 		rtnl_unlock();
 		return err;
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 2c1230ac5dfe..43f683aebbbe 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -165,8 +165,9 @@ extern struct tipc_bearer __rcu *bearer_list[];
  * TIPC routines available to supported media types
  */
 
-void tipc_rcv(struct sk_buff *skb, struct tipc_bearer *tb_ptr);
-int tipc_enable_bearer(const char *bearer_name, u32 disc_domain, u32 priority);
+void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr);
+int tipc_enable_bearer(struct net *net, const char *bearer_name,
+		       u32 disc_domain, u32 priority);
 int tipc_disable_bearer(const char *name);
 
 /*
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 0b3a90ecab6d..28d4272803c4 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -134,7 +134,7 @@ static struct sk_buff *tipc_show_stats(void)
 	return buf;
 }
 
-static struct sk_buff *cfg_enable_bearer(void)
+static struct sk_buff *cfg_enable_bearer(struct net *net)
 {
 	struct tipc_bearer_config *args;
 
@@ -142,7 +142,7 @@ static struct sk_buff *cfg_enable_bearer(void)
 		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
 
 	args = (struct tipc_bearer_config *)TLV_DATA(req_tlv_area);
-	if (tipc_enable_bearer(args->name,
+	if (tipc_enable_bearer(net, args->name,
 			       ntohl(args->disc_domain),
 			       ntohl(args->priority)))
 		return tipc_cfg_reply_error_string("unable to enable bearer");
@@ -161,7 +161,7 @@ static struct sk_buff *cfg_disable_bearer(void)
 	return tipc_cfg_reply_none();
 }
 
-static struct sk_buff *cfg_set_own_addr(void)
+static struct sk_buff *cfg_set_own_addr(struct net *net)
 {
 	u32 addr;
 
@@ -177,20 +177,21 @@ static struct sk_buff *cfg_set_own_addr(void)
 	if (tipc_own_addr)
 		return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
 						   " (cannot change node address once assigned)");
-	if (!tipc_net_start(addr))
+	if (!tipc_net_start(net, addr))
 		return tipc_cfg_reply_none();
 
 	return tipc_cfg_reply_error_string("cannot change to network mode");
 }
 
-static struct sk_buff *cfg_set_netid(void)
+static struct sk_buff *cfg_set_netid(struct net *net)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	u32 value;
 
 	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
 		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
 	value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
-	if (value == tipc_net_id)
+	if (value == tn->net_id)
 		return tipc_cfg_reply_none();
 	if (value < 1 || value > 9999)
 		return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
@@ -198,14 +199,16 @@ static struct sk_buff *cfg_set_netid(void)
 	if (tipc_own_addr)
 		return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
 			" (cannot change network id once TIPC has joined a network)");
-	tipc_net_id = value;
+	tn->net_id = value;
 	return tipc_cfg_reply_none();
 }
 
-struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area,
-				int request_space, int reply_headroom)
+struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
+				const void *request_area, int request_space,
+				int reply_headroom)
 {
 	struct sk_buff *rep_tlv_buf;
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 
 	rtnl_lock();
 
@@ -261,19 +264,19 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 		rep_tlv_buf = tipc_link_cmd_config(req_tlv_area, req_tlv_space, cmd);
 		break;
 	case TIPC_CMD_ENABLE_BEARER:
-		rep_tlv_buf = cfg_enable_bearer();
+		rep_tlv_buf = cfg_enable_bearer(net);
 		break;
 	case TIPC_CMD_DISABLE_BEARER:
 		rep_tlv_buf = cfg_disable_bearer();
 		break;
 	case TIPC_CMD_SET_NODE_ADDR:
-		rep_tlv_buf = cfg_set_own_addr();
+		rep_tlv_buf = cfg_set_own_addr(net);
 		break;
 	case TIPC_CMD_SET_NETID:
-		rep_tlv_buf = cfg_set_netid();
+		rep_tlv_buf = cfg_set_netid(net);
 		break;
 	case TIPC_CMD_GET_NETID:
-		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_net_id);
+		rep_tlv_buf = tipc_cfg_reply_unsigned(tn->net_id);
 		break;
 	case TIPC_CMD_NOT_NET_ADMIN:
 		rep_tlv_buf =
diff --git a/net/tipc/config.h b/net/tipc/config.h
index a41a41c58750..9e9b575fc429 100644
--- a/net/tipc/config.h
+++ b/net/tipc/config.h
@@ -61,7 +61,7 @@ static inline struct sk_buff *tipc_cfg_reply_ultra_string(char *string)
 	return tipc_cfg_reply_string_type(TIPC_TLV_ULTRA_STRING, string);
 }
 
-struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd,
+struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
 				const void *req_tlv_area, int req_tlv_space,
 				int headroom);
 #endif
diff --git a/net/tipc/core.c b/net/tipc/core.c
index b6ec3d7c5f51..a2302480d8cf 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -52,6 +52,26 @@ u32 tipc_own_addr __read_mostly;
 int tipc_net_id __read_mostly;
 int sysctl_tipc_rmem[3] __read_mostly;	/* min/default/max */
 
+static int __net_init tipc_init_net(struct net *net)
+{
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
+
+	tn->net_id = 4711;
+
+	return 0;
+}
+
+static void __net_exit tipc_exit_net(struct net *net)
+{
+}
+
+static struct pernet_operations tipc_net_ops = {
+	.init = tipc_init_net,
+	.exit = tipc_exit_net,
+	.id   = &tipc_net_id,
+	.size = sizeof(struct tipc_net),
+};
+
 static int __init tipc_init(void)
 {
 	int err;
@@ -59,7 +79,6 @@ static int __init tipc_init(void)
 	pr_info("Activated (version " TIPC_MOD_VER ")\n");
 
 	tipc_own_addr = 0;
-	tipc_net_id = 4711;
 
 	sysctl_tipc_rmem[0] = TIPC_CONN_OVERLOAD_LIMIT >> 4 <<
 			      TIPC_LOW_IMPORTANCE;
@@ -69,6 +88,10 @@ static int __init tipc_init(void)
 
 	get_random_bytes(&tipc_random, sizeof(tipc_random));
 
+	err = register_pernet_subsys(&tipc_net_ops);
+	if (err)
+		goto out_pernet;
+
 	err = tipc_sk_rht_init();
 	if (err)
 		goto out_reftbl;
@@ -112,12 +135,15 @@ out_netlink:
 out_nametbl:
 	tipc_sk_rht_destroy();
 out_reftbl:
+	unregister_pernet_subsys(&tipc_net_ops);
+out_pernet:
 	pr_err("Unable to start in single node mode\n");
 	return err;
 }
 
 static void __exit tipc_exit(void)
 {
+	unregister_pernet_subsys(&tipc_net_ops);
 	tipc_net_stop();
 	tipc_bearer_cleanup();
 	tipc_netlink_stop();
diff --git a/net/tipc/core.h b/net/tipc/core.h
index a5b3140f1451..106e8150c3a6 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -57,6 +57,7 @@
 #include <linux/vmalloc.h>
 #include <linux/rtnetlink.h>
 #include <linux/etherdevice.h>
+#include <net/netns/generic.h>
 
 #define TIPC_MOD_VER "2.0.0"
 
@@ -75,6 +76,10 @@ extern int sysctl_tipc_named_timeout __read_mostly;
  */
 extern int tipc_random __read_mostly;
 
+struct tipc_net {
+	int net_id;
+};
+
 #ifdef CONFIG_SYSCTL
 int tipc_register_sysctl(void);
 void tipc_unregister_sysctl(void);
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 1a3a98582034..246a23788ded 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -72,12 +72,14 @@ struct tipc_link_req {
 
 /**
  * tipc_disc_init_msg - initialize a link setup message
+ * @net: the applicable net namespace
  * @type: message type (request or response)
  * @b_ptr: ptr to bearer issuing message
  */
-static void tipc_disc_init_msg(struct sk_buff *buf, u32 type,
+static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type,
 			       struct tipc_bearer *b_ptr)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_msg *msg;
 	u32 dest_domain = b_ptr->domain;
 
@@ -86,7 +88,7 @@ static void tipc_disc_init_msg(struct sk_buff *buf, u32 type,
 	msg_set_non_seq(msg, 1);
 	msg_set_node_sig(msg, tipc_random);
 	msg_set_dest_domain(msg, dest_domain);
-	msg_set_bc_netid(msg, tipc_net_id);
+	msg_set_bc_netid(msg, tn->net_id);
 	b_ptr->media->addr2msg(msg_media_addr(msg), &b_ptr->addr);
 }
 
@@ -111,11 +113,14 @@ static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr,
 
 /**
  * tipc_disc_rcv - handle incoming discovery message (request or response)
+ * @net: the applicable net namespace
  * @buf: buffer containing message
  * @bearer: bearer that message arrived on
  */
-void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *bearer)
+void tipc_disc_rcv(struct net *net, struct sk_buff *buf,
+		   struct tipc_bearer *bearer)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_node *node;
 	struct tipc_link *link;
 	struct tipc_media_addr maddr;
@@ -137,7 +142,7 @@ void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *bearer)
 	kfree_skb(buf);
 
 	/* Ensure message from node is valid and communication is permitted */
-	if (net_id != tipc_net_id)
+	if (net_id != tn->net_id)
 		return;
 	if (maddr.broadcast)
 		return;
@@ -248,7 +253,7 @@ void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *bearer)
 	if (respond && (mtyp == DSC_REQ_MSG)) {
 		rbuf = tipc_buf_acquire(INT_H_SIZE);
 		if (rbuf) {
-			tipc_disc_init_msg(rbuf, DSC_RESP_MSG, bearer);
+			tipc_disc_init_msg(net, rbuf, DSC_RESP_MSG, bearer);
 			tipc_bearer_send(bearer->identity, rbuf, &maddr);
 			kfree_skb(rbuf);
 		}
@@ -341,13 +346,15 @@ exit:
 
 /**
  * tipc_disc_create - create object to send periodic link setup requests
+ * @net: the applicable net namespace
  * @b_ptr: ptr to bearer issuing requests
  * @dest: destination address for request messages
  * @dest_domain: network domain to which links can be established
  *
  * Returns 0 if successful, otherwise -errno.
  */
-int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest)
+int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr,
+		     struct tipc_media_addr *dest)
 {
 	struct tipc_link_req *req;
 
@@ -361,7 +368,7 @@ int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest)
 		return -ENOMEM;
 	}
 
-	tipc_disc_init_msg(req->buf, DSC_REQ_MSG, b_ptr);
+	tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b_ptr);
 	memcpy(&req->dest, dest, sizeof(*dest));
 	req->bearer_id = b_ptr->identity;
 	req->domain = b_ptr->domain;
@@ -388,15 +395,16 @@ void tipc_disc_delete(struct tipc_link_req *req)
 
 /**
  * tipc_disc_reset - reset object to send periodic link setup requests
+ * @net: the applicable net namespace
  * @b_ptr: ptr to bearer issuing requests
  * @dest_domain: network domain to which links can be established
  */
-void tipc_disc_reset(struct tipc_bearer *b_ptr)
+void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr)
 {
 	struct tipc_link_req *req = b_ptr->link_req;
 
 	spin_lock_bh(&req->lock);
-	tipc_disc_init_msg(req->buf, DSC_REQ_MSG, b_ptr);
+	tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b_ptr);
 	req->bearer_id = b_ptr->identity;
 	req->domain = b_ptr->domain;
 	req->num_nodes = 0;
diff --git a/net/tipc/discover.h b/net/tipc/discover.h
index 515b57392f4d..c9b12770c5ed 100644
--- a/net/tipc/discover.h
+++ b/net/tipc/discover.h
@@ -39,11 +39,13 @@
 
 struct tipc_link_req;
 
-int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest);
+int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr,
+		     struct tipc_media_addr *dest);
 void tipc_disc_delete(struct tipc_link_req *req);
-void tipc_disc_reset(struct tipc_bearer *b_ptr);
+void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr);
 void tipc_disc_add_dest(struct tipc_link_req *req);
 void tipc_disc_remove_dest(struct tipc_link_req *req);
-void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *b_ptr);
+void tipc_disc_rcv(struct net *net, struct sk_buff *buf,
+		   struct tipc_bearer *b_ptr);
 
 #endif
diff --git a/net/tipc/link.c b/net/tipc/link.c
index f23105852cb3..248813cb6d68 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -101,10 +101,12 @@ static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = {
  */
 #define START_CHANGEOVER 100000u
 
-static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr,
+static void link_handle_out_of_seq_msg(struct net *net,
+				       struct tipc_link *l_ptr,
 				       struct sk_buff *buf);
-static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf);
-static int  tipc_link_tunnel_rcv(struct tipc_node *n_ptr,
+static void tipc_link_proto_rcv(struct net *net, struct tipc_link *l_ptr,
+				struct sk_buff *buf);
+static int  tipc_link_tunnel_rcv(struct net *net, struct tipc_node *n_ptr,
 				 struct sk_buff **buf);
 static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol);
 static void link_state_event(struct tipc_link *l_ptr, u32 event);
@@ -113,7 +115,8 @@ static void link_print(struct tipc_link *l_ptr, const char *str);
 static void tipc_link_sync_xmit(struct tipc_link *l);
 static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf);
 static int tipc_link_input(struct tipc_link *l, struct sk_buff *buf);
-static int tipc_link_prepare_input(struct tipc_link *l, struct sk_buff **buf);
+static int tipc_link_prepare_input(struct net *net, struct tipc_link *l,
+				   struct sk_buff **buf);
 
 /*
  *  Simple link routines
@@ -1063,13 +1066,14 @@ static int link_recv_buf_validate(struct sk_buff *buf)
 
 /**
  * tipc_rcv - process TIPC packets/messages arriving from off-node
+ * @net: net namespace handler
  * @skb: TIPC packet
  * @b_ptr: pointer to bearer message arrived on
  *
  * Invoked with no locks held.  Bearer pointer must point to a valid bearer
  * structure (i.e. cannot be NULL), but bearer can be inactive.
  */
-void tipc_rcv(struct sk_buff *skb, struct tipc_bearer *b_ptr)
+void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr)
 {
 	struct sk_buff_head head;
 	struct tipc_node *n_ptr;
@@ -1096,9 +1100,9 @@ void tipc_rcv(struct sk_buff *skb, struct tipc_bearer *b_ptr)
 
 		if (unlikely(msg_non_seq(msg))) {
 			if (msg_user(msg) ==  LINK_CONFIG)
-				tipc_disc_rcv(skb, b_ptr);
+				tipc_disc_rcv(net, skb, b_ptr);
 			else
-				tipc_bclink_rcv(skb);
+				tipc_bclink_rcv(net, skb);
 			continue;
 		}
 
@@ -1159,7 +1163,7 @@ void tipc_rcv(struct sk_buff *skb, struct tipc_bearer *b_ptr)
 		/* Process the incoming packet */
 		if (unlikely(!link_working_working(l_ptr))) {
 			if (msg_user(msg) == LINK_PROTOCOL) {
-				tipc_link_proto_rcv(l_ptr, skb);
+				tipc_link_proto_rcv(net, l_ptr, skb);
 				link_retrieve_defq(l_ptr, &head);
 				tipc_node_unlock(n_ptr);
 				continue;
@@ -1179,7 +1183,7 @@ void tipc_rcv(struct sk_buff *skb, struct tipc_bearer *b_ptr)
 
 		/* Link is now in state WORKING_WORKING */
 		if (unlikely(seq_no != mod(l_ptr->next_in_no))) {
-			link_handle_out_of_seq_msg(l_ptr, skb);
+			link_handle_out_of_seq_msg(net, l_ptr, skb);
 			link_retrieve_defq(l_ptr, &head);
 			tipc_node_unlock(n_ptr);
 			continue;
@@ -1193,7 +1197,7 @@ void tipc_rcv(struct sk_buff *skb, struct tipc_bearer *b_ptr)
 			tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0, 0);
 		}
 
-		if (tipc_link_prepare_input(l_ptr, &skb)) {
+		if (tipc_link_prepare_input(net, l_ptr, &skb)) {
 			tipc_node_unlock(n_ptr);
 			continue;
 		}
@@ -1216,7 +1220,8 @@ discard:
  *
  * Node lock must be held
  */
-static int tipc_link_prepare_input(struct tipc_link *l, struct sk_buff **buf)
+static int tipc_link_prepare_input(struct net *net, struct tipc_link *l,
+				   struct sk_buff **buf)
 {
 	struct tipc_node *n;
 	struct tipc_msg *msg;
@@ -1226,7 +1231,7 @@ static int tipc_link_prepare_input(struct tipc_link *l, struct sk_buff **buf)
 	msg = buf_msg(*buf);
 	switch (msg_user(msg)) {
 	case CHANGEOVER_PROTOCOL:
-		if (tipc_link_tunnel_rcv(n, buf))
+		if (tipc_link_tunnel_rcv(net, n, buf))
 			res = 0;
 		break;
 	case MSG_FRAGMENTER:
@@ -1325,13 +1330,14 @@ u32 tipc_link_defer_pkt(struct sk_buff_head *list, struct sk_buff *skb)
 /*
  * link_handle_out_of_seq_msg - handle arrival of out-of-sequence packet
  */
-static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr,
+static void link_handle_out_of_seq_msg(struct net *net,
+				       struct tipc_link *l_ptr,
 				       struct sk_buff *buf)
 {
 	u32 seq_no = buf_seqno(buf);
 
 	if (likely(msg_user(buf_msg(buf)) == LINK_PROTOCOL)) {
-		tipc_link_proto_rcv(l_ptr, buf);
+		tipc_link_proto_rcv(net, l_ptr, buf);
 		return;
 	}
 
@@ -1455,7 +1461,8 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg,
  * Note that network plane id propagates through the network, and may
  * change at any time. The node with lowest address rules
  */
-static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf)
+static void tipc_link_proto_rcv(struct net *net, struct tipc_link *l_ptr,
+				struct sk_buff *buf)
 {
 	u32 rec_gap = 0;
 	u32 max_pkt_info;
@@ -1571,7 +1578,7 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf)
 
 		/* Protocol message before retransmits, reduce loss risk */
 		if (l_ptr->owner->bclink.recv_permitted)
-			tipc_bclink_update_link_state(l_ptr->owner,
+			tipc_bclink_update_link_state(net, l_ptr->owner,
 						      msg_last_bcast(msg));
 
 		if (rec_gap || (msg_probe(msg))) {
@@ -1748,7 +1755,7 @@ static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos)
 /* tipc_link_dup_rcv(): Receive a tunnelled DUPLICATE_MSG packet.
  * Owner node is locked.
  */
-static void tipc_link_dup_rcv(struct tipc_link *l_ptr,
+static void tipc_link_dup_rcv(struct net *net, struct tipc_link *l_ptr,
 			      struct sk_buff *t_buf)
 {
 	struct sk_buff *buf;
@@ -1763,7 +1770,7 @@ static void tipc_link_dup_rcv(struct tipc_link *l_ptr,
 	}
 
 	/* Add buffer to deferred queue, if applicable: */
-	link_handle_out_of_seq_msg(l_ptr, buf);
+	link_handle_out_of_seq_msg(net, l_ptr, buf);
 }
 
 /*  tipc_link_failover_rcv(): Receive a tunnelled ORIGINAL_MSG packet
@@ -1817,7 +1824,7 @@ exit:
  *  returned to the active link for delivery upwards.
  *  Owner node is locked.
  */
-static int tipc_link_tunnel_rcv(struct tipc_node *n_ptr,
+static int tipc_link_tunnel_rcv(struct net *net, struct tipc_node *n_ptr,
 				struct sk_buff **buf)
 {
 	struct sk_buff *t_buf = *buf;
@@ -1835,7 +1842,7 @@ static int tipc_link_tunnel_rcv(struct tipc_node *n_ptr,
 		goto exit;
 
 	if (msg_type(t_msg) == DUPLICATE_MSG)
-		tipc_link_dup_rcv(l_ptr, t_buf);
+		tipc_link_dup_rcv(net, l_ptr, t_buf);
 	else if (msg_type(t_msg) == ORIGINAL_MSG)
 		*buf = tipc_link_failover_rcv(l_ptr, t_buf);
 	else
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 35523fb6668c..a38f6a680df1 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -34,6 +34,7 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <net/sock.h>
 #include "core.h"
 #include "msg.h"
 #include "addr.h"
@@ -214,6 +215,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset,
 		skb = tipc_buf_acquire(msz);
 		if (unlikely(!skb))
 			return -ENOMEM;
+		skb_orphan(skb);
 		__skb_queue_tail(list, skb);
 		skb_copy_to_linear_data(skb, mhdr, mhsz);
 		pktpos = skb->data + mhsz;
@@ -234,6 +236,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset,
 	skb = tipc_buf_acquire(pktmax);
 	if (!skb)
 		return -ENOMEM;
+	skb_orphan(skb);
 	__skb_queue_tail(list, skb);
 	pktpos = skb->data;
 	skb_copy_to_linear_data(skb, &pkthdr, INT_H_SIZE);
@@ -267,6 +270,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset,
 			rc = -ENOMEM;
 			goto error;
 		}
+		skb_orphan(skb);
 		__skb_queue_tail(list, skb);
 		msg_set_type(&pkthdr, FRAGMENT);
 		msg_set_size(&pkthdr, pktsz);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index cf13df3cde8f..5ce9d628f2d0 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -108,8 +108,9 @@ static const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = {
  *     - A local spin_lock protecting the queue of subscriber events.
 */
 
-int tipc_net_start(u32 addr)
+int tipc_net_start(struct net *net, u32 addr)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	char addr_string[16];
 	int res;
 
@@ -125,7 +126,8 @@ int tipc_net_start(u32 addr)
 
 	pr_info("Started in network mode\n");
 	pr_info("Own node address %s, network identity %u\n",
-		tipc_addr_string_fill(addr_string, tipc_own_addr), tipc_net_id);
+		tipc_addr_string_fill(addr_string, tipc_own_addr),
+		tn->net_id);
 	return 0;
 }
 
@@ -144,8 +146,9 @@ void tipc_net_stop(void)
 	pr_info("Left network mode\n");
 }
 
-static int __tipc_nl_add_net(struct tipc_nl_msg *msg)
+static int __tipc_nl_add_net(struct net *net, struct tipc_nl_msg *msg)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	void *hdr;
 	struct nlattr *attrs;
 
@@ -158,7 +161,7 @@ static int __tipc_nl_add_net(struct tipc_nl_msg *msg)
 	if (!attrs)
 		goto msg_full;
 
-	if (nla_put_u32(msg->skb, TIPC_NLA_NET_ID, tipc_net_id))
+	if (nla_put_u32(msg->skb, TIPC_NLA_NET_ID, tn->net_id))
 		goto attr_msg_full;
 
 	nla_nest_end(msg->skb, attrs);
@@ -176,6 +179,7 @@ msg_full:
 
 int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct net *net = sock_net(skb->sk);
 	int err;
 	int done = cb->args[0];
 	struct tipc_nl_msg msg;
@@ -187,7 +191,7 @@ int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	msg.portid = NETLINK_CB(cb->skb).portid;
 	msg.seq = cb->nlh->nlmsg_seq;
 
-	err = __tipc_nl_add_net(&msg);
+	err = __tipc_nl_add_net(net, &msg);
 	if (err)
 		goto out;
 
@@ -200,8 +204,10 @@ out:
 
 int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
 {
-	int err;
+	struct net *net = genl_info_net(info);
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct nlattr *attrs[TIPC_NLA_NET_MAX + 1];
+	int err;
 
 	if (!info->attrs[TIPC_NLA_NET])
 		return -EINVAL;
@@ -223,7 +229,7 @@ int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
 		if (val < 1 || val > 9999)
 			return -EINVAL;
 
-		tipc_net_id = val;
+		tn->net_id = val;
 	}
 
 	if (attrs[TIPC_NLA_NET_ADDR]) {
@@ -238,7 +244,7 @@ int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
 			return -EINVAL;
 
 		rtnl_lock();
-		tipc_net_start(addr);
+		tipc_net_start(net, addr);
 		rtnl_unlock();
 	}
 
diff --git a/net/tipc/net.h b/net/tipc/net.h
index a81c1b9eb150..2c4812f8408f 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -39,7 +39,7 @@
 
 #include <net/genetlink.h>
 
-int tipc_net_start(u32 addr);
+int tipc_net_start(struct net *net, u32 addr);
 
 void tipc_net_stop(void);
 
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index b891e3905bc4..282b59681484 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -46,6 +46,7 @@
 
 static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
 {
+	struct net *net = genl_info_net(info);
 	struct sk_buff *rep_buf;
 	struct nlmsghdr *rep_nlh;
 	struct nlmsghdr *req_nlh = info->nlhdr;
@@ -58,10 +59,11 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
 	else
 		cmd = req_userhdr->cmd;
 
-	rep_buf = tipc_cfg_do_cmd(req_userhdr->dest, cmd,
-			nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN,
-			nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN),
-			hdr_space);
+	rep_buf = tipc_cfg_do_cmd(net, req_userhdr->dest, cmd,
+				  nlmsg_data(req_nlh) + GENL_HDRLEN +
+				  TIPC_GENL_HDRLEN,
+				  nlmsg_attrlen(req_nlh, GENL_HDRLEN +
+				  TIPC_GENL_HDRLEN), hdr_space);
 
 	if (rep_buf) {
 		skb_push(rep_buf, hdr_space);
-- 
cgit v1.2.3


From f2f9800d4955a96d92896841d8ba9b04201deaa1 Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Fri, 9 Jan 2015 15:27:05 +0800
Subject: tipc: make tipc node table aware of net namespace

Global variables associated with node table are below:
- node table list (node_htable)
- node hash table list (tipc_node_list)
- node table lock (node_list_lock)
- node number counter (tipc_num_nodes)
- node link number counter (tipc_num_links)

To make node table support namespace, above global variables must be
moved to tipc_net structure in order to keep secret for different
namespaces. As a consequence, these variables are allocated and
initialized when namespace is created, and deallocated when namespace
is destroyed. After the change, functions associated with these
variables have to utilize a namespace pointer to access them. So
adding namespace pointer as a parameter of these functions is the
major change made in the commit.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Tested-by: Tero Aho <Tero.Aho@coriant.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/addr.h       |   2 +
 net/tipc/bcast.c      |  24 +++++-----
 net/tipc/bcast.h      |   4 +-
 net/tipc/bearer.c     |  25 +++++-----
 net/tipc/bearer.h     |   4 +-
 net/tipc/config.c     |  21 ++++----
 net/tipc/core.c       |   4 +-
 net/tipc/core.h       |   9 ++++
 net/tipc/discover.c   |   4 +-
 net/tipc/link.c       | 107 +++++++++++++++++++++++++----------------
 net/tipc/link.h       |  24 ++++++----
 net/tipc/name_distr.c |  57 ++++++++++++----------
 net/tipc/name_distr.h |  10 ++--
 net/tipc/name_table.c |  16 ++++---
 net/tipc/name_table.h |   8 ++--
 net/tipc/net.c        |  11 +++--
 net/tipc/net.h        |   2 +-
 net/tipc/node.c       | 130 +++++++++++++++++++++++++++-----------------------
 net/tipc/node.h       |  35 +++++++-------
 net/tipc/socket.c     |  72 +++++++++++++++++-----------
 net/tipc/socket.h     |   4 +-
 21 files changed, 329 insertions(+), 244 deletions(-)

(limited to 'net')

diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index 60b00ab93d74..3e1f18e29f1e 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -40,6 +40,8 @@
 #define TIPC_ZONE_MASK		0xff000000u
 #define TIPC_CLUSTER_MASK	0xfffff000u
 
+extern u32 tipc_own_addr __read_mostly;
+
 static inline u32 tipc_zone_mask(u32 addr)
 {
 	return addr & TIPC_ZONE_MASK;
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index f98231138916..816c0e49319f 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -232,13 +232,12 @@ static void bclink_retransmit_pkt(u32 after, u32 to)
  *
  * Called with no locks taken
  */
-void tipc_bclink_wakeup_users(void)
+void tipc_bclink_wakeup_users(struct net *net)
 {
 	struct sk_buff *skb;
 
 	while ((skb = skb_dequeue(&bclink->link.waiting_sks)))
-		tipc_sk_rcv(skb);
-
+		tipc_sk_rcv(net, skb);
 }
 
 /**
@@ -385,9 +384,9 @@ void tipc_bclink_update_link_state(struct net *net, struct tipc_node *n_ptr,
  * Delay any upcoming NACK by this node if another node has already
  * requested the first message this node is going to ask for.
  */
-static void bclink_peek_nack(struct tipc_msg *msg)
+static void bclink_peek_nack(struct net *net, struct tipc_msg *msg)
 {
-	struct tipc_node *n_ptr = tipc_node_find(msg_destnode(msg));
+	struct tipc_node *n_ptr = tipc_node_find(net, msg_destnode(msg));
 
 	if (unlikely(!n_ptr))
 		return;
@@ -404,11 +403,12 @@ static void bclink_peek_nack(struct tipc_msg *msg)
 
 /* tipc_bclink_xmit - broadcast buffer chain to all nodes in cluster
  *                    and to identified node local sockets
+ * @net: the applicable net namespace
  * @list: chain of buffers containing message
  * Consumes the buffer chain, except when returning -ELINKCONG
  * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE
  */
-int tipc_bclink_xmit(struct sk_buff_head *list)
+int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list)
 {
 	int rc = 0;
 	int bc = 0;
@@ -443,7 +443,7 @@ int tipc_bclink_xmit(struct sk_buff_head *list)
 
 	/* Deliver message clone */
 	if (likely(!rc))
-		tipc_sk_mcast_rcv(skb);
+		tipc_sk_mcast_rcv(net, skb);
 	else
 		kfree_skb(skb);
 
@@ -491,7 +491,7 @@ void tipc_bclink_rcv(struct net *net, struct sk_buff *buf)
 	if (msg_mc_netid(msg) != tn->net_id)
 		goto exit;
 
-	node = tipc_node_find(msg_prevnode(msg));
+	node = tipc_node_find(net, msg_prevnode(msg));
 	if (unlikely(!node))
 		goto exit;
 
@@ -514,7 +514,7 @@ void tipc_bclink_rcv(struct net *net, struct sk_buff *buf)
 			tipc_bclink_unlock();
 		} else {
 			tipc_node_unlock(node);
-			bclink_peek_nack(msg);
+			bclink_peek_nack(net, msg);
 		}
 		goto exit;
 	}
@@ -532,7 +532,7 @@ receive:
 			tipc_bclink_unlock();
 			tipc_node_unlock(node);
 			if (likely(msg_mcast(msg)))
-				tipc_sk_mcast_rcv(buf);
+				tipc_sk_mcast_rcv(net, buf);
 			else
 				kfree_skb(buf);
 		} else if (msg_user(msg) == MSG_BUNDLER) {
@@ -542,7 +542,7 @@ receive:
 			bcl->stats.recv_bundled += msg_msgcnt(msg);
 			tipc_bclink_unlock();
 			tipc_node_unlock(node);
-			tipc_link_bundle_rcv(buf);
+			tipc_link_bundle_rcv(net, buf);
 		} else if (msg_user(msg) == MSG_FRAGMENTER) {
 			tipc_buf_append(&node->bclink.reasm_buf, &buf);
 			if (unlikely(!buf && !node->bclink.reasm_buf))
@@ -563,7 +563,7 @@ receive:
 			bclink_accept_pkt(node, seqno);
 			tipc_bclink_unlock();
 			tipc_node_unlock(node);
-			tipc_named_rcv(buf);
+			tipc_named_rcv(net, buf);
 		} else {
 			tipc_bclink_lock();
 			bclink_accept_pkt(node, seqno);
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index a5fd22438aed..fd0d17a76493 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -101,8 +101,8 @@ int  tipc_bclink_reset_stats(void);
 int  tipc_bclink_set_queue_limits(u32 limit);
 void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action);
 uint  tipc_bclink_get_mtu(void);
-int tipc_bclink_xmit(struct sk_buff_head *list);
-void tipc_bclink_wakeup_users(void);
+int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list);
+void tipc_bclink_wakeup_users(struct net *net);
 int tipc_nl_add_bc_link(struct tipc_nl_msg *msg);
 
 #endif
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index cdd30337dc5e..a735c08e9d90 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -69,7 +69,8 @@ static const struct nla_policy tipc_nl_media_policy[TIPC_NLA_MEDIA_MAX + 1] = {
 
 struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1];
 
-static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down);
+static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr,
+			   bool shutting_down);
 
 /**
  * tipc_media_find - locates specified media object by name
@@ -364,7 +365,7 @@ restart:
 
 	res = tipc_disc_create(net, b_ptr, &b_ptr->bcast_addr);
 	if (res) {
-		bearer_disable(b_ptr, false);
+		bearer_disable(net, b_ptr, false);
 		pr_warn("Bearer <%s> rejected, discovery object creation failed\n",
 			name);
 		return -EINVAL;
@@ -384,7 +385,7 @@ restart:
 static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b_ptr)
 {
 	pr_info("Resetting bearer <%s>\n", b_ptr->name);
-	tipc_link_reset_list(b_ptr->identity);
+	tipc_link_reset_list(net, b_ptr->identity);
 	tipc_disc_reset(net, b_ptr);
 	return 0;
 }
@@ -394,14 +395,15 @@ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b_ptr)
  *
  * Note: This routine assumes caller holds RTNL lock.
  */
-static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down)
+static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr,
+			   bool shutting_down)
 {
 	u32 i;
 
 	pr_info("Disabling bearer <%s>\n", b_ptr->name);
 	b_ptr->media->disable_media(b_ptr);
 
-	tipc_link_delete_list(b_ptr->identity, shutting_down);
+	tipc_link_delete_list(net, b_ptr->identity, shutting_down);
 	if (b_ptr->link_req)
 		tipc_disc_delete(b_ptr->link_req);
 
@@ -414,7 +416,7 @@ static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down)
 	kfree_rcu(b_ptr, rcu);
 }
 
-int tipc_disable_bearer(const char *name)
+int tipc_disable_bearer(struct net *net, const char *name)
 {
 	struct tipc_bearer *b_ptr;
 	int res;
@@ -424,7 +426,7 @@ int tipc_disable_bearer(const char *name)
 		pr_warn("Attempt to disable unknown bearer <%s>\n", name);
 		res = -EINVAL;
 	} else {
-		bearer_disable(b_ptr, false);
+		bearer_disable(net, b_ptr, false);
 		res = 0;
 	}
 	return res;
@@ -593,7 +595,7 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
 		break;
 	case NETDEV_UNREGISTER:
 	case NETDEV_CHANGENAME:
-		bearer_disable(b_ptr, false);
+		bearer_disable(dev_net(dev), b_ptr, false);
 		break;
 	}
 	return NOTIFY_OK;
@@ -626,7 +628,7 @@ void tipc_bearer_cleanup(void)
 	dev_remove_pack(&tipc_packet_type);
 }
 
-void tipc_bearer_stop(void)
+void tipc_bearer_stop(struct net *net)
 {
 	struct tipc_bearer *b_ptr;
 	u32 i;
@@ -634,7 +636,7 @@ void tipc_bearer_stop(void)
 	for (i = 0; i < MAX_BEARERS; i++) {
 		b_ptr = rtnl_dereference(bearer_list[i]);
 		if (b_ptr) {
-			bearer_disable(b_ptr, true);
+			bearer_disable(net, b_ptr, true);
 			bearer_list[i] = NULL;
 		}
 	}
@@ -772,6 +774,7 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
 	char *name;
 	struct tipc_bearer *bearer;
 	struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
+	struct net *net = genl_info_net(info);
 
 	if (!info->attrs[TIPC_NLA_BEARER])
 		return -EINVAL;
@@ -794,7 +797,7 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
 		return -EINVAL;
 	}
 
-	bearer_disable(bearer, false);
+	bearer_disable(net, bearer, false);
 	rtnl_unlock();
 
 	return 0;
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 43f683aebbbe..91a8eeea309c 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -168,7 +168,7 @@ extern struct tipc_bearer __rcu *bearer_list[];
 void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr);
 int tipc_enable_bearer(struct net *net, const char *bearer_name,
 		       u32 disc_domain, u32 priority);
-int tipc_disable_bearer(const char *name);
+int tipc_disable_bearer(struct net *net, const char *name);
 
 /*
  * Routines made available to TIPC by supported media types
@@ -205,7 +205,7 @@ struct tipc_bearer *tipc_bearer_find(const char *name);
 struct tipc_media *tipc_media_find(const char *name);
 int tipc_bearer_setup(void);
 void tipc_bearer_cleanup(void);
-void tipc_bearer_stop(void);
+void tipc_bearer_stop(struct net *net);
 void tipc_bearer_send(u32 bearer_id, struct sk_buff *buf,
 		      struct tipc_media_addr *dest);
 
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 28d4272803c4..cf2d417ad488 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -150,12 +150,12 @@ static struct sk_buff *cfg_enable_bearer(struct net *net)
 	return tipc_cfg_reply_none();
 }
 
-static struct sk_buff *cfg_disable_bearer(void)
+static struct sk_buff *cfg_disable_bearer(struct net *net)
 {
 	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_BEARER_NAME))
 		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
 
-	if (tipc_disable_bearer((char *)TLV_DATA(req_tlv_area)))
+	if (tipc_disable_bearer(net, (char *)TLV_DATA(req_tlv_area)))
 		return tipc_cfg_reply_error_string("unable to disable bearer");
 
 	return tipc_cfg_reply_none();
@@ -232,16 +232,20 @@ struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
 		rep_tlv_buf = tipc_cfg_reply_none();
 		break;
 	case TIPC_CMD_GET_NODES:
-		rep_tlv_buf = tipc_node_get_nodes(req_tlv_area, req_tlv_space);
+		rep_tlv_buf = tipc_node_get_nodes(net, req_tlv_area,
+						  req_tlv_space);
 		break;
 	case TIPC_CMD_GET_LINKS:
-		rep_tlv_buf = tipc_node_get_links(req_tlv_area, req_tlv_space);
+		rep_tlv_buf = tipc_node_get_links(net, req_tlv_area,
+						  req_tlv_space);
 		break;
 	case TIPC_CMD_SHOW_LINK_STATS:
-		rep_tlv_buf = tipc_link_cmd_show_stats(req_tlv_area, req_tlv_space);
+		rep_tlv_buf = tipc_link_cmd_show_stats(net, req_tlv_area,
+						       req_tlv_space);
 		break;
 	case TIPC_CMD_RESET_LINK_STATS:
-		rep_tlv_buf = tipc_link_cmd_reset_stats(req_tlv_area, req_tlv_space);
+		rep_tlv_buf = tipc_link_cmd_reset_stats(net, req_tlv_area,
+							req_tlv_space);
 		break;
 	case TIPC_CMD_SHOW_NAME_TABLE:
 		rep_tlv_buf = tipc_nametbl_get(req_tlv_area, req_tlv_space);
@@ -261,13 +265,14 @@ struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
 	case TIPC_CMD_SET_LINK_TOL:
 	case TIPC_CMD_SET_LINK_PRI:
 	case TIPC_CMD_SET_LINK_WINDOW:
-		rep_tlv_buf = tipc_link_cmd_config(req_tlv_area, req_tlv_space, cmd);
+		rep_tlv_buf = tipc_link_cmd_config(net, req_tlv_area,
+						   req_tlv_space, cmd);
 		break;
 	case TIPC_CMD_ENABLE_BEARER:
 		rep_tlv_buf = cfg_enable_bearer(net);
 		break;
 	case TIPC_CMD_DISABLE_BEARER:
-		rep_tlv_buf = cfg_disable_bearer();
+		rep_tlv_buf = cfg_disable_bearer(net);
 		break;
 	case TIPC_CMD_SET_NODE_ADDR:
 		rep_tlv_buf = cfg_set_own_addr(net);
diff --git a/net/tipc/core.c b/net/tipc/core.c
index a2302480d8cf..7b8443938caf 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -57,12 +57,15 @@ static int __net_init tipc_init_net(struct net *net)
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
 
 	tn->net_id = 4711;
+	INIT_LIST_HEAD(&tn->node_list);
+	spin_lock_init(&tn->node_list_lock);
 
 	return 0;
 }
 
 static void __net_exit tipc_exit_net(struct net *net)
 {
+	tipc_net_stop(net);
 }
 
 static struct pernet_operations tipc_net_ops = {
@@ -144,7 +147,6 @@ out_pernet:
 static void __exit tipc_exit(void)
 {
 	unregister_pernet_subsys(&tipc_net_ops);
-	tipc_net_stop();
 	tipc_bearer_cleanup();
 	tipc_netlink_stop();
 	tipc_subscr_stop();
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 106e8150c3a6..4fb113397e7b 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -59,6 +59,8 @@
 #include <linux/etherdevice.h>
 #include <net/netns/generic.h>
 
+#include "node.h"
+
 #define TIPC_MOD_VER "2.0.0"
 
 int tipc_snprintf(char *buf, int len, const char *fmt, ...);
@@ -78,6 +80,13 @@ extern int tipc_random __read_mostly;
 
 struct tipc_net {
 	int net_id;
+
+	/* Node table and node list */
+	spinlock_t node_list_lock;
+	struct hlist_head node_htable[NODE_HTABLE_SIZE];
+	struct list_head node_list;
+	u32 num_nodes;
+	u32 num_links;
 };
 
 #ifdef CONFIG_SYSCTL
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 246a23788ded..f0fd8b449aef 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -162,9 +162,9 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *buf,
 		return;
 
 	/* Locate, or if necessary, create, node: */
-	node = tipc_node_find(onode);
+	node = tipc_node_find(net, onode);
 	if (!node)
-		node = tipc_node_create(onode);
+		node = tipc_node_create(net, onode);
 	if (!node)
 		return;
 
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 248813cb6d68..f6505652742e 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -114,7 +114,8 @@ static void link_reset_statistics(struct tipc_link *l_ptr);
 static void link_print(struct tipc_link *l_ptr, const char *str);
 static void tipc_link_sync_xmit(struct tipc_link *l);
 static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf);
-static int tipc_link_input(struct tipc_link *l, struct sk_buff *buf);
+static int tipc_link_input(struct net *net, struct tipc_link *l,
+			   struct sk_buff *buf);
 static int tipc_link_prepare_input(struct net *net, struct tipc_link *l,
 				   struct sk_buff **buf);
 
@@ -310,13 +311,15 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
 	return l_ptr;
 }
 
-void tipc_link_delete_list(unsigned int bearer_id, bool shutting_down)
+void tipc_link_delete_list(struct net *net, unsigned int bearer_id,
+			   bool shutting_down)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_link *l_ptr;
 	struct tipc_node *n_ptr;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) {
+	list_for_each_entry_rcu(n_ptr, &tn->node_list, list) {
 		tipc_node_lock(n_ptr);
 		l_ptr = n_ptr->links[bearer_id];
 		if (l_ptr) {
@@ -451,13 +454,14 @@ void tipc_link_reset(struct tipc_link *l_ptr)
 	link_reset_statistics(l_ptr);
 }
 
-void tipc_link_reset_list(unsigned int bearer_id)
+void tipc_link_reset_list(struct net *net, unsigned int bearer_id)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_link *l_ptr;
 	struct tipc_node *n_ptr;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) {
+	list_for_each_entry_rcu(n_ptr, &tn->node_list, list) {
 		tipc_node_lock(n_ptr);
 		l_ptr = n_ptr->links[bearer_id];
 		if (l_ptr)
@@ -773,16 +777,18 @@ static int __tipc_link_xmit_skb(struct tipc_link *link, struct sk_buff *skb)
 	return __tipc_link_xmit(link, &head);
 }
 
-int tipc_link_xmit_skb(struct sk_buff *skb, u32 dnode, u32 selector)
+int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode,
+		       u32 selector)
 {
 	struct sk_buff_head head;
 
 	skb2list(skb, &head);
-	return tipc_link_xmit(&head, dnode, selector);
+	return tipc_link_xmit(net, &head, dnode, selector);
 }
 
 /**
  * tipc_link_xmit() is the general link level function for message sending
+ * @net: the applicable net namespace
  * @list: chain of buffers containing message
  * @dsz: amount of user data to be sent
  * @dnode: address of destination node
@@ -790,13 +796,14 @@ int tipc_link_xmit_skb(struct sk_buff *skb, u32 dnode, u32 selector)
  * Consumes the buffer chain, except when returning -ELINKCONG
  * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE
  */
-int tipc_link_xmit(struct sk_buff_head *list, u32 dnode, u32 selector)
+int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dnode,
+		   u32 selector)
 {
 	struct tipc_link *link = NULL;
 	struct tipc_node *node;
 	int rc = -EHOSTUNREACH;
 
-	node = tipc_node_find(dnode);
+	node = tipc_node_find(net, dnode);
 	if (node) {
 		tipc_node_lock(node);
 		link = node->active_links[selector & 1];
@@ -813,7 +820,7 @@ int tipc_link_xmit(struct sk_buff_head *list, u32 dnode, u32 selector)
 		 * buffer, we just need to dequeue one SKB buffer from the
 		 * head list.
 		 */
-		return tipc_sk_rcv(__skb_dequeue(list));
+		return tipc_sk_rcv(net, __skb_dequeue(list));
 	}
 	__skb_queue_purge(list);
 
@@ -1066,7 +1073,7 @@ static int link_recv_buf_validate(struct sk_buff *buf)
 
 /**
  * tipc_rcv - process TIPC packets/messages arriving from off-node
- * @net: net namespace handler
+ * @net: the applicable net namespace
  * @skb: TIPC packet
  * @b_ptr: pointer to bearer message arrived on
  *
@@ -1112,7 +1119,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr)
 			goto discard;
 
 		/* Locate neighboring node that sent message */
-		n_ptr = tipc_node_find(msg_prevnode(msg));
+		n_ptr = tipc_node_find(net, msg_prevnode(msg));
 		if (unlikely(!n_ptr))
 			goto discard;
 		tipc_node_lock(n_ptr);
@@ -1203,7 +1210,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr)
 		}
 		tipc_node_unlock(n_ptr);
 
-		if (tipc_link_input(l_ptr, skb) != 0)
+		if (tipc_link_input(net, l_ptr, skb) != 0)
 			goto discard;
 		continue;
 unlock_discard:
@@ -1263,7 +1270,8 @@ static int tipc_link_prepare_input(struct net *net, struct tipc_link *l,
 /**
  * tipc_link_input - Deliver message too higher layers
  */
-static int tipc_link_input(struct tipc_link *l, struct sk_buff *buf)
+static int tipc_link_input(struct net *net, struct tipc_link *l,
+			   struct sk_buff *buf)
 {
 	struct tipc_msg *msg = buf_msg(buf);
 	int res = 0;
@@ -1274,13 +1282,13 @@ static int tipc_link_input(struct tipc_link *l, struct sk_buff *buf)
 	case TIPC_HIGH_IMPORTANCE:
 	case TIPC_CRITICAL_IMPORTANCE:
 	case CONN_MANAGER:
-		tipc_sk_rcv(buf);
+		tipc_sk_rcv(net, buf);
 		break;
 	case NAME_DISTRIBUTOR:
-		tipc_named_rcv(buf);
+		tipc_named_rcv(net, buf);
 		break;
 	case MSG_BUNDLER:
-		tipc_link_bundle_rcv(buf);
+		tipc_link_bundle_rcv(net, buf);
 		break;
 	default:
 		res = -EINVAL;
@@ -1855,7 +1863,7 @@ exit:
 /*
  *  Bundler functionality:
  */
-void tipc_link_bundle_rcv(struct sk_buff *buf)
+void tipc_link_bundle_rcv(struct net *net, struct sk_buff *buf)
 {
 	u32 msgcount = msg_msgcnt(buf_msg(buf));
 	u32 pos = INT_H_SIZE;
@@ -1872,13 +1880,13 @@ void tipc_link_bundle_rcv(struct sk_buff *buf)
 		pos += align(msg_size(omsg));
 		if (msg_isdata(omsg)) {
 			if (unlikely(msg_type(omsg) == TIPC_MCAST_MSG))
-				tipc_sk_mcast_rcv(obuf);
+				tipc_sk_mcast_rcv(net, obuf);
 			else
-				tipc_sk_rcv(obuf);
+				tipc_sk_rcv(net, obuf);
 		} else if (msg_user(omsg) == CONN_MANAGER) {
-			tipc_sk_rcv(obuf);
+			tipc_sk_rcv(net, obuf);
 		} else if (msg_user(omsg) == NAME_DISTRIBUTOR) {
-			tipc_named_rcv(obuf);
+			tipc_named_rcv(net, obuf);
 		} else {
 			pr_warn("Illegal bundled msg: %u\n", msg_user(omsg));
 			kfree_skb(obuf);
@@ -1919,14 +1927,17 @@ void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window)
 }
 
 /* tipc_link_find_owner - locate owner node of link by link's name
+ * @net: the applicable net namespace
  * @name: pointer to link name string
  * @bearer_id: pointer to index in 'node->links' array where the link was found.
  *
  * Returns pointer to node owning the link, or 0 if no matching link is found.
  */
-static struct tipc_node *tipc_link_find_owner(const char *link_name,
+static struct tipc_node *tipc_link_find_owner(struct net *net,
+					      const char *link_name,
 					      unsigned int *bearer_id)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_link *l_ptr;
 	struct tipc_node *n_ptr;
 	struct tipc_node *found_node = NULL;
@@ -1934,7 +1945,7 @@ static struct tipc_node *tipc_link_find_owner(const char *link_name,
 
 	*bearer_id = 0;
 	rcu_read_lock();
-	list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) {
+	list_for_each_entry_rcu(n_ptr, &tn->node_list, list) {
 		tipc_node_lock(n_ptr);
 		for (i = 0; i < MAX_BEARERS; i++) {
 			l_ptr = n_ptr->links[i];
@@ -1978,6 +1989,7 @@ static int link_value_is_valid(u16 cmd, u32 new_value)
 
 /**
  * link_cmd_set_value - change priority/tolerance/window for link/bearer/media
+ * @net: the applicable net namespace
  * @name: ptr to link, bearer, or media name
  * @new_value: new value of link, bearer, or media setting
  * @cmd: which link, bearer, or media attribute to set (TIPC_CMD_SET_LINK_*)
@@ -1986,7 +1998,8 @@ static int link_value_is_valid(u16 cmd, u32 new_value)
  *
  * Returns 0 if value updated and negative value on error.
  */
-static int link_cmd_set_value(const char *name, u32 new_value, u16 cmd)
+static int link_cmd_set_value(struct net *net, const char *name, u32 new_value,
+			      u16 cmd)
 {
 	struct tipc_node *node;
 	struct tipc_link *l_ptr;
@@ -1995,7 +2008,7 @@ static int link_cmd_set_value(const char *name, u32 new_value, u16 cmd)
 	int bearer_id;
 	int res = 0;
 
-	node = tipc_link_find_owner(name, &bearer_id);
+	node = tipc_link_find_owner(net, name, &bearer_id);
 	if (node) {
 		tipc_node_lock(node);
 		l_ptr = node->links[bearer_id];
@@ -2063,8 +2076,8 @@ static int link_cmd_set_value(const char *name, u32 new_value, u16 cmd)
 	return res;
 }
 
-struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space,
-				     u16 cmd)
+struct sk_buff *tipc_link_cmd_config(struct net *net, const void *req_tlv_area,
+				     int req_tlv_space, u16 cmd)
 {
 	struct tipc_link_config *args;
 	u32 new_value;
@@ -2088,7 +2101,7 @@ struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space
 						   " (cannot change setting on broadcast link)");
 	}
 
-	res = link_cmd_set_value(args->name, new_value, cmd);
+	res = link_cmd_set_value(net, args->name, new_value, cmd);
 	if (res)
 		return tipc_cfg_reply_error_string("cannot change link setting");
 
@@ -2106,7 +2119,9 @@ static void link_reset_statistics(struct tipc_link *l_ptr)
 	l_ptr->stats.recv_info = l_ptr->next_in_no;
 }
 
-struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_space)
+struct sk_buff *tipc_link_cmd_reset_stats(struct net *net,
+					  const void *req_tlv_area,
+					  int req_tlv_space)
 {
 	char *link_name;
 	struct tipc_link *l_ptr;
@@ -2122,7 +2137,7 @@ struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_
 			return tipc_cfg_reply_error_string("link not found");
 		return tipc_cfg_reply_none();
 	}
-	node = tipc_link_find_owner(link_name, &bearer_id);
+	node = tipc_link_find_owner(net, link_name, &bearer_id);
 	if (!node)
 		return tipc_cfg_reply_error_string("link not found");
 
@@ -2147,13 +2162,15 @@ static u32 percent(u32 count, u32 total)
 
 /**
  * tipc_link_stats - print link statistics
+ * @net: the applicable net namespace
  * @name: link name
  * @buf: print buffer area
  * @buf_size: size of print buffer area
  *
  * Returns length of print buffer data string (or 0 if error)
  */
-static int tipc_link_stats(const char *name, char *buf, const u32 buf_size)
+static int tipc_link_stats(struct net *net, const char *name, char *buf,
+			   const u32 buf_size)
 {
 	struct tipc_link *l;
 	struct tipc_stats *s;
@@ -2166,7 +2183,7 @@ static int tipc_link_stats(const char *name, char *buf, const u32 buf_size)
 	if (!strcmp(name, tipc_bclink_name))
 		return tipc_bclink_stats(buf, buf_size);
 
-	node = tipc_link_find_owner(name, &bearer_id);
+	node = tipc_link_find_owner(net, name, &bearer_id);
 	if (!node)
 		return 0;
 
@@ -2243,7 +2260,9 @@ static int tipc_link_stats(const char *name, char *buf, const u32 buf_size)
 	return ret;
 }
 
-struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, int req_tlv_space)
+struct sk_buff *tipc_link_cmd_show_stats(struct net *net,
+					 const void *req_tlv_area,
+					 int req_tlv_space)
 {
 	struct sk_buff *buf;
 	struct tlv_desc *rep_tlv;
@@ -2261,7 +2280,7 @@ struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, int req_tlv_s
 	rep_tlv = (struct tlv_desc *)buf->data;
 	pb = TLV_DATA(rep_tlv);
 	pb_len = ULTRA_STRING_MAX_LEN;
-	str_len = tipc_link_stats((char *)TLV_DATA(req_tlv_area),
+	str_len = tipc_link_stats(net, (char *)TLV_DATA(req_tlv_area),
 				  pb, pb_len);
 	if (!str_len) {
 		kfree_skb(buf);
@@ -2343,6 +2362,7 @@ int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info)
 	struct tipc_link *link;
 	struct tipc_node *node;
 	struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1];
+	struct net *net = genl_info_net(info);
 
 	if (!info->attrs[TIPC_NLA_LINK])
 		return -EINVAL;
@@ -2358,7 +2378,7 @@ int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info)
 
 	name = nla_data(attrs[TIPC_NLA_LINK_NAME]);
 
-	node = tipc_link_find_owner(name, &bearer_id);
+	node = tipc_link_find_owner(net, name, &bearer_id);
 	if (!node)
 		return -EINVAL;
 
@@ -2567,6 +2587,8 @@ static int __tipc_nl_add_node_links(struct tipc_nl_msg *msg,
 
 int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct net *net = sock_net(skb->sk);
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_node *node;
 	struct tipc_nl_msg msg;
 	u32 prev_node = cb->args[0];
@@ -2584,7 +2606,7 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	rcu_read_lock();
 
 	if (prev_node) {
-		node = tipc_node_find(prev_node);
+		node = tipc_node_find(net, prev_node);
 		if (!node) {
 			/* We never set seq or call nl_dump_check_consistent()
 			 * this means that setting prev_seq here will cause the
@@ -2596,7 +2618,8 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb)
 			goto out;
 		}
 
-		list_for_each_entry_continue_rcu(node, &tipc_node_list, list) {
+		list_for_each_entry_continue_rcu(node, &tn->node_list,
+						 list) {
 			tipc_node_lock(node);
 			err = __tipc_nl_add_node_links(&msg, node, &prev_link);
 			tipc_node_unlock(node);
@@ -2610,7 +2633,7 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb)
 		if (err)
 			goto out;
 
-		list_for_each_entry_rcu(node, &tipc_node_list, list) {
+		list_for_each_entry_rcu(node, &tn->node_list, list) {
 			tipc_node_lock(node);
 			err = __tipc_nl_add_node_links(&msg, node, &prev_link);
 			tipc_node_unlock(node);
@@ -2633,6 +2656,7 @@ out:
 
 int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info)
 {
+	struct net *net = genl_info_net(info);
 	struct sk_buff *ans_skb;
 	struct tipc_nl_msg msg;
 	struct tipc_link *link;
@@ -2645,7 +2669,7 @@ int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info)
 		return -EINVAL;
 
 	name = nla_data(info->attrs[TIPC_NLA_LINK_NAME]);
-	node = tipc_link_find_owner(name, &bearer_id);
+	node = tipc_link_find_owner(net, name, &bearer_id);
 	if (!node)
 		return -EINVAL;
 
@@ -2687,6 +2711,7 @@ int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info)
 	struct tipc_link *link;
 	struct tipc_node *node;
 	struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1];
+	struct net *net = genl_info_net(info);
 
 	if (!info->attrs[TIPC_NLA_LINK])
 		return -EINVAL;
@@ -2709,7 +2734,7 @@ int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info)
 		return 0;
 	}
 
-	node = tipc_link_find_owner(link_name, &bearer_id);
+	node = tipc_link_find_owner(net, link_name, &bearer_id);
 	if (!node)
 		return -EINVAL;
 
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 692268dca4b9..380e27ee0f70 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -200,27 +200,31 @@ struct tipc_port;
 struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
 			      struct tipc_bearer *b_ptr,
 			      const struct tipc_media_addr *media_addr);
-void tipc_link_delete_list(unsigned int bearer_id, bool shutting_down);
+void tipc_link_delete_list(struct net *net, unsigned int bearer_id,
+			   bool shutting_down);
 void tipc_link_failover_send_queue(struct tipc_link *l_ptr);
 void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr, struct tipc_link *dest);
 void tipc_link_reset_fragments(struct tipc_link *l_ptr);
 int tipc_link_is_up(struct tipc_link *l_ptr);
 int tipc_link_is_active(struct tipc_link *l_ptr);
 void tipc_link_purge_queues(struct tipc_link *l_ptr);
-struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area,
-				     int req_tlv_space,
-				     u16 cmd);
-struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area,
+struct sk_buff *tipc_link_cmd_config(struct net *net, const void *req_tlv_area,
+				     int req_tlv_space, u16 cmd);
+struct sk_buff *tipc_link_cmd_show_stats(struct net *net,
+					 const void *req_tlv_area,
 					 int req_tlv_space);
-struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area,
+struct sk_buff *tipc_link_cmd_reset_stats(struct net *net,
+					  const void *req_tlv_area,
 					  int req_tlv_space);
 void tipc_link_reset_all(struct tipc_node *node);
 void tipc_link_reset(struct tipc_link *l_ptr);
-void tipc_link_reset_list(unsigned int bearer_id);
-int tipc_link_xmit_skb(struct sk_buff *skb, u32 dest, u32 selector);
-int tipc_link_xmit(struct sk_buff_head *list, u32 dest, u32 selector);
+void tipc_link_reset_list(struct net *net, unsigned int bearer_id);
+int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest,
+		       u32 selector);
+int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dest,
+		   u32 selector);
 int __tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list);
-void tipc_link_bundle_rcv(struct sk_buff *buf);
+void tipc_link_bundle_rcv(struct net *net, struct sk_buff *buf);
 void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob,
 			  u32 gap, u32 tolerance, u32 priority, u32 acked_mtu);
 void tipc_link_push_packets(struct tipc_link *l_ptr);
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index ba6083dca95b..d40df588263e 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -81,14 +81,15 @@ static struct sk_buff *named_prepare_buf(u32 type, u32 size, u32 dest)
 	return buf;
 }
 
-void named_cluster_distribute(struct sk_buff *skb)
+void named_cluster_distribute(struct net *net, struct sk_buff *skb)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct sk_buff *oskb;
 	struct tipc_node *node;
 	u32 dnode;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(node, &tipc_node_list, list) {
+	list_for_each_entry_rcu(node, &tn->node_list, list) {
 		dnode = node->addr;
 		if (in_own_node(dnode))
 			continue;
@@ -98,7 +99,7 @@ void named_cluster_distribute(struct sk_buff *skb)
 		if (!oskb)
 			break;
 		msg_set_destnode(buf_msg(oskb), dnode);
-		tipc_link_xmit_skb(oskb, dnode, dnode);
+		tipc_link_xmit_skb(net, oskb, dnode, dnode);
 	}
 	rcu_read_unlock();
 
@@ -160,13 +161,14 @@ struct sk_buff *tipc_named_withdraw(struct publication *publ)
  * @dnode: node to be updated
  * @pls: linked list of publication items to be packed into buffer chain
  */
-static void named_distribute(struct sk_buff_head *list, u32 dnode,
-			     struct list_head *pls)
+static void named_distribute(struct net *net, struct sk_buff_head *list,
+			     u32 dnode, struct list_head *pls)
 {
 	struct publication *publ;
 	struct sk_buff *skb = NULL;
 	struct distr_item *item = NULL;
-	uint msg_dsz = (tipc_node_get_mtu(dnode, 0) / ITEM_SIZE) * ITEM_SIZE;
+	uint msg_dsz = (tipc_node_get_mtu(net, dnode, 0) / ITEM_SIZE) *
+			ITEM_SIZE;
 	uint msg_rem = msg_dsz;
 
 	list_for_each_entry(publ, pls, local_list) {
@@ -202,30 +204,31 @@ static void named_distribute(struct sk_buff_head *list, u32 dnode,
 /**
  * tipc_named_node_up - tell specified node about all publications by this node
  */
-void tipc_named_node_up(u32 dnode)
+void tipc_named_node_up(struct net *net, u32 dnode)
 {
 	struct sk_buff_head head;
 
 	__skb_queue_head_init(&head);
 
 	rcu_read_lock();
-	named_distribute(&head, dnode,
+	named_distribute(net, &head, dnode,
 			 &tipc_nametbl->publ_list[TIPC_CLUSTER_SCOPE]);
-	named_distribute(&head, dnode,
+	named_distribute(net, &head, dnode,
 			 &tipc_nametbl->publ_list[TIPC_ZONE_SCOPE]);
 	rcu_read_unlock();
 
-	tipc_link_xmit(&head, dnode, dnode);
+	tipc_link_xmit(net, &head, dnode, dnode);
 }
 
-static void tipc_publ_subscribe(struct publication *publ, u32 addr)
+static void tipc_publ_subscribe(struct net *net, struct publication *publ,
+				u32 addr)
 {
 	struct tipc_node *node;
 
 	if (in_own_node(addr))
 		return;
 
-	node = tipc_node_find(addr);
+	node = tipc_node_find(net, addr);
 	if (!node) {
 		pr_warn("Node subscription rejected, unknown node 0x%x\n",
 			addr);
@@ -237,11 +240,12 @@ static void tipc_publ_subscribe(struct publication *publ, u32 addr)
 	tipc_node_unlock(node);
 }
 
-static void tipc_publ_unsubscribe(struct publication *publ, u32 addr)
+static void tipc_publ_unsubscribe(struct net *net, struct publication *publ,
+				  u32 addr)
 {
 	struct tipc_node *node;
 
-	node = tipc_node_find(addr);
+	node = tipc_node_find(net, addr);
 	if (!node)
 		return;
 
@@ -256,7 +260,7 @@ static void tipc_publ_unsubscribe(struct publication *publ, u32 addr)
  * Invoked for each publication issued by a newly failed node.
  * Removes publication structure from name table & deletes it.
  */
-static void tipc_publ_purge(struct publication *publ, u32 addr)
+static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr)
 {
 	struct publication *p;
 
@@ -264,7 +268,7 @@ static void tipc_publ_purge(struct publication *publ, u32 addr)
 	p = tipc_nametbl_remove_publ(publ->type, publ->lower,
 				     publ->node, publ->ref, publ->key);
 	if (p)
-		tipc_publ_unsubscribe(p, addr);
+		tipc_publ_unsubscribe(net, p, addr);
 	spin_unlock_bh(&tipc_nametbl_lock);
 
 	if (p != publ) {
@@ -277,12 +281,12 @@ static void tipc_publ_purge(struct publication *publ, u32 addr)
 	kfree_rcu(p, rcu);
 }
 
-void tipc_publ_notify(struct list_head *nsub_list, u32 addr)
+void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr)
 {
 	struct publication *publ, *tmp;
 
 	list_for_each_entry_safe(publ, tmp, nsub_list, nodesub_list)
-		tipc_publ_purge(publ, addr);
+		tipc_publ_purge(net, publ, addr);
 }
 
 /**
@@ -292,7 +296,8 @@ void tipc_publ_notify(struct list_head *nsub_list, u32 addr)
  * tipc_nametbl_lock must be held.
  * Returns the publication item if successful, otherwise NULL.
  */
-static bool tipc_update_nametbl(struct distr_item *i, u32 node, u32 dtype)
+static bool tipc_update_nametbl(struct net *net, struct distr_item *i,
+				u32 node, u32 dtype)
 {
 	struct publication *publ = NULL;
 
@@ -302,7 +307,7 @@ static bool tipc_update_nametbl(struct distr_item *i, u32 node, u32 dtype)
 						TIPC_CLUSTER_SCOPE, node,
 						ntohl(i->ref), ntohl(i->key));
 		if (publ) {
-			tipc_publ_subscribe(publ, node);
+			tipc_publ_subscribe(net, publ, node);
 			return true;
 		}
 	} else if (dtype == WITHDRAWAL) {
@@ -310,7 +315,7 @@ static bool tipc_update_nametbl(struct distr_item *i, u32 node, u32 dtype)
 						node, ntohl(i->ref),
 						ntohl(i->key));
 		if (publ) {
-			tipc_publ_unsubscribe(publ, node);
+			tipc_publ_unsubscribe(net, publ, node);
 			kfree_rcu(publ, rcu);
 			return true;
 		}
@@ -343,7 +348,7 @@ static void tipc_named_add_backlog(struct distr_item *i, u32 type, u32 node)
  * tipc_named_process_backlog - try to process any pending name table updates
  * from the network.
  */
-void tipc_named_process_backlog(void)
+void tipc_named_process_backlog(struct net *net)
 {
 	struct distr_queue_item *e, *tmp;
 	char addr[16];
@@ -351,7 +356,7 @@ void tipc_named_process_backlog(void)
 
 	list_for_each_entry_safe(e, tmp, &tipc_dist_queue, next) {
 		if (time_after(e->expires, now)) {
-			if (!tipc_update_nametbl(&e->i, e->node, e->dtype))
+			if (!tipc_update_nametbl(net, &e->i, e->node, e->dtype))
 				continue;
 		} else {
 			tipc_addr_string_fill(addr, e->node);
@@ -369,7 +374,7 @@ void tipc_named_process_backlog(void)
 /**
  * tipc_named_rcv - process name table update message sent by another node
  */
-void tipc_named_rcv(struct sk_buff *buf)
+void tipc_named_rcv(struct net *net, struct sk_buff *buf)
 {
 	struct tipc_msg *msg = buf_msg(buf);
 	struct distr_item *item = (struct distr_item *)msg_data(msg);
@@ -378,11 +383,11 @@ void tipc_named_rcv(struct sk_buff *buf)
 
 	spin_lock_bh(&tipc_nametbl_lock);
 	while (count--) {
-		if (!tipc_update_nametbl(item, node, msg_type(msg)))
+		if (!tipc_update_nametbl(net, item, node, msg_type(msg)))
 			tipc_named_add_backlog(item, msg_type(msg), node);
 		item++;
 	}
-	tipc_named_process_backlog();
+	tipc_named_process_backlog(net);
 	spin_unlock_bh(&tipc_nametbl_lock);
 	kfree_skb(buf);
 }
diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h
index cef55cedcfb2..8039d84351b3 100644
--- a/net/tipc/name_distr.h
+++ b/net/tipc/name_distr.h
@@ -69,11 +69,11 @@ struct distr_item {
 
 struct sk_buff *tipc_named_publish(struct publication *publ);
 struct sk_buff *tipc_named_withdraw(struct publication *publ);
-void named_cluster_distribute(struct sk_buff *buf);
-void tipc_named_node_up(u32 dnode);
-void tipc_named_rcv(struct sk_buff *buf);
+void named_cluster_distribute(struct net *net, struct sk_buff *buf);
+void tipc_named_node_up(struct net *net, u32 dnode);
+void tipc_named_rcv(struct net *net, struct sk_buff *buf);
 void tipc_named_reinit(void);
-void tipc_named_process_backlog(void);
-void tipc_publ_notify(struct list_head *nsub_list, u32 addr);
+void tipc_named_process_backlog(struct net *net);
+void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr);
 
 #endif
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index c8df0223371a..cf177907bb53 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -650,8 +650,9 @@ exit:
 /*
  * tipc_nametbl_publish - add name publication to network name tables
  */
-struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper,
-					 u32 scope, u32 port_ref, u32 key)
+struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
+					 u32 upper, u32 scope, u32 port_ref,
+					 u32 key)
 {
 	struct publication *publ;
 	struct sk_buff *buf = NULL;
@@ -670,19 +671,20 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper,
 		tipc_nametbl->local_publ_count++;
 		buf = tipc_named_publish(publ);
 		/* Any pending external events? */
-		tipc_named_process_backlog();
+		tipc_named_process_backlog(net);
 	}
 	spin_unlock_bh(&tipc_nametbl_lock);
 
 	if (buf)
-		named_cluster_distribute(buf);
+		named_cluster_distribute(net, buf);
 	return publ;
 }
 
 /**
  * tipc_nametbl_withdraw - withdraw name publication from network name tables
  */
-int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key)
+int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
+			  u32 key)
 {
 	struct publication *publ;
 	struct sk_buff *skb = NULL;
@@ -693,7 +695,7 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key)
 		tipc_nametbl->local_publ_count--;
 		skb = tipc_named_withdraw(publ);
 		/* Any pending external events? */
-		tipc_named_process_backlog();
+		tipc_named_process_backlog(net);
 		list_del_init(&publ->pport_list);
 		kfree_rcu(publ, rcu);
 	} else {
@@ -704,7 +706,7 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key)
 	spin_unlock_bh(&tipc_nametbl_lock);
 
 	if (skb) {
-		named_cluster_distribute(skb);
+		named_cluster_distribute(net, skb);
 		return 1;
 	}
 	return 0;
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 5f0dee92010d..efccaca7a5d5 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -104,9 +104,11 @@ struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space);
 u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *node);
 int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit,
 			      struct tipc_port_list *dports);
-struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper,
-					 u32 scope, u32 port_ref, u32 key);
-int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key);
+struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
+					 u32 upper, u32 scope, u32 port_ref,
+					 u32 key);
+int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
+			  u32 key);
 struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper,
 					     u32 scope, u32 node, u32 ref,
 					     u32 key);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 5ce9d628f2d0..de18aacf3d64 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -121,7 +121,7 @@ int tipc_net_start(struct net *net, u32 addr)
 	if (res)
 		return res;
 
-	tipc_nametbl_publish(TIPC_CFG_SRV, tipc_own_addr, tipc_own_addr,
+	tipc_nametbl_publish(net, TIPC_CFG_SRV, tipc_own_addr, tipc_own_addr,
 			     TIPC_ZONE_SCOPE, 0, tipc_own_addr);
 
 	pr_info("Started in network mode\n");
@@ -131,16 +131,17 @@ int tipc_net_start(struct net *net, u32 addr)
 	return 0;
 }
 
-void tipc_net_stop(void)
+void tipc_net_stop(struct net *net)
 {
 	if (!tipc_own_addr)
 		return;
 
-	tipc_nametbl_withdraw(TIPC_CFG_SRV, tipc_own_addr, 0, tipc_own_addr);
+	tipc_nametbl_withdraw(net, TIPC_CFG_SRV, tipc_own_addr, 0,
+			      tipc_own_addr);
 	rtnl_lock();
-	tipc_bearer_stop();
+	tipc_bearer_stop(net);
 	tipc_bclink_stop();
-	tipc_node_stop();
+	tipc_node_stop(net);
 	rtnl_unlock();
 
 	pr_info("Left network mode\n");
diff --git a/net/tipc/net.h b/net/tipc/net.h
index 2c4812f8408f..77a7a118911d 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -41,7 +41,7 @@
 
 int tipc_net_start(struct net *net, u32 addr);
 
-void tipc_net_stop(void);
+void tipc_net_stop(struct net *net);
 
 int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 8d353ec77a66..a0ca1ac53119 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -40,17 +40,9 @@
 #include "name_distr.h"
 #include "socket.h"
 
-#define NODE_HTABLE_SIZE 512
-
 static void node_lost_contact(struct tipc_node *n_ptr);
 static void node_established_contact(struct tipc_node *n_ptr);
 
-static struct hlist_head node_htable[NODE_HTABLE_SIZE];
-LIST_HEAD(tipc_node_list);
-static u32 tipc_num_nodes;
-static u32 tipc_num_links;
-static DEFINE_SPINLOCK(node_list_lock);
-
 struct tipc_sock_conn {
 	u32 port;
 	u32 peer_port;
@@ -78,15 +70,17 @@ static unsigned int tipc_hashfn(u32 addr)
 /*
  * tipc_node_find - locate specified node object, if it exists
  */
-struct tipc_node *tipc_node_find(u32 addr)
+struct tipc_node *tipc_node_find(struct net *net, u32 addr)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_node *node;
 
 	if (unlikely(!in_own_cluster_exact(addr)))
 		return NULL;
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(node, &node_htable[tipc_hashfn(addr)], hash) {
+	hlist_for_each_entry_rcu(node, &tn->node_htable[tipc_hashfn(addr)],
+				 hash) {
 		if (node->addr == addr) {
 			rcu_read_unlock();
 			return node;
@@ -96,20 +90,22 @@ struct tipc_node *tipc_node_find(u32 addr)
 	return NULL;
 }
 
-struct tipc_node *tipc_node_create(u32 addr)
+struct tipc_node *tipc_node_create(struct net *net, u32 addr)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_node *n_ptr, *temp_node;
 
-	spin_lock_bh(&node_list_lock);
+	spin_lock_bh(&tn->node_list_lock);
 
 	n_ptr = kzalloc(sizeof(*n_ptr), GFP_ATOMIC);
 	if (!n_ptr) {
-		spin_unlock_bh(&node_list_lock);
+		spin_unlock_bh(&tn->node_list_lock);
 		pr_warn("Node creation failed, no memory\n");
 		return NULL;
 	}
 
 	n_ptr->addr = addr;
+	n_ptr->net = net;
 	spin_lock_init(&n_ptr->lock);
 	INIT_HLIST_NODE(&n_ptr->hash);
 	INIT_LIST_HEAD(&n_ptr->list);
@@ -118,9 +114,9 @@ struct tipc_node *tipc_node_create(u32 addr)
 	skb_queue_head_init(&n_ptr->waiting_sks);
 	__skb_queue_head_init(&n_ptr->bclink.deferred_queue);
 
-	hlist_add_head_rcu(&n_ptr->hash, &node_htable[tipc_hashfn(addr)]);
+	hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]);
 
-	list_for_each_entry_rcu(temp_node, &tipc_node_list, list) {
+	list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
 		if (n_ptr->addr < temp_node->addr)
 			break;
 	}
@@ -128,32 +124,33 @@ struct tipc_node *tipc_node_create(u32 addr)
 	n_ptr->action_flags = TIPC_WAIT_PEER_LINKS_DOWN;
 	n_ptr->signature = INVALID_NODE_SIG;
 
-	tipc_num_nodes++;
+	tn->num_nodes++;
 
-	spin_unlock_bh(&node_list_lock);
+	spin_unlock_bh(&tn->node_list_lock);
 	return n_ptr;
 }
 
-static void tipc_node_delete(struct tipc_node *n_ptr)
+static void tipc_node_delete(struct tipc_net *tn, struct tipc_node *n_ptr)
 {
 	list_del_rcu(&n_ptr->list);
 	hlist_del_rcu(&n_ptr->hash);
 	kfree_rcu(n_ptr, rcu);
 
-	tipc_num_nodes--;
+	tn->num_nodes--;
 }
 
-void tipc_node_stop(void)
+void tipc_node_stop(struct net *net)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_node *node, *t_node;
 
-	spin_lock_bh(&node_list_lock);
-	list_for_each_entry_safe(node, t_node, &tipc_node_list, list)
-		tipc_node_delete(node);
-	spin_unlock_bh(&node_list_lock);
+	spin_lock_bh(&tn->node_list_lock);
+	list_for_each_entry_safe(node, t_node, &tn->node_list, list)
+		tipc_node_delete(tn, node);
+	spin_unlock_bh(&tn->node_list_lock);
 }
 
-int tipc_node_add_conn(u32 dnode, u32 port, u32 peer_port)
+int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port)
 {
 	struct tipc_node *node;
 	struct tipc_sock_conn *conn;
@@ -161,7 +158,7 @@ int tipc_node_add_conn(u32 dnode, u32 port, u32 peer_port)
 	if (in_own_node(dnode))
 		return 0;
 
-	node = tipc_node_find(dnode);
+	node = tipc_node_find(net, dnode);
 	if (!node) {
 		pr_warn("Connecting sock to node 0x%x failed\n", dnode);
 		return -EHOSTUNREACH;
@@ -179,7 +176,7 @@ int tipc_node_add_conn(u32 dnode, u32 port, u32 peer_port)
 	return 0;
 }
 
-void tipc_node_remove_conn(u32 dnode, u32 port)
+void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port)
 {
 	struct tipc_node *node;
 	struct tipc_sock_conn *conn, *safe;
@@ -187,7 +184,7 @@ void tipc_node_remove_conn(u32 dnode, u32 port)
 	if (in_own_node(dnode))
 		return;
 
-	node = tipc_node_find(dnode);
+	node = tipc_node_find(net, dnode);
 	if (!node)
 		return;
 
@@ -201,7 +198,7 @@ void tipc_node_remove_conn(u32 dnode, u32 port)
 	tipc_node_unlock(node);
 }
 
-void tipc_node_abort_sock_conns(struct list_head *conns)
+void tipc_node_abort_sock_conns(struct net *net, struct list_head *conns)
 {
 	struct tipc_sock_conn *conn, *safe;
 	struct sk_buff *buf;
@@ -212,7 +209,7 @@ void tipc_node_abort_sock_conns(struct list_head *conns)
 				      conn->peer_node, conn->port,
 				      conn->peer_port, TIPC_ERR_NO_NODE);
 		if (likely(buf))
-			tipc_sk_rcv(buf);
+			tipc_sk_rcv(net, buf);
 		list_del(&conn->list);
 		kfree(conn);
 	}
@@ -342,24 +339,27 @@ int tipc_node_is_up(struct tipc_node *n_ptr)
 
 void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
 {
+	struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id);
+
 	n_ptr->links[l_ptr->bearer_id] = l_ptr;
-	spin_lock_bh(&node_list_lock);
-	tipc_num_links++;
-	spin_unlock_bh(&node_list_lock);
+	spin_lock_bh(&tn->node_list_lock);
+	tn->num_links++;
+	spin_unlock_bh(&tn->node_list_lock);
 	n_ptr->link_cnt++;
 }
 
 void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
 {
+	struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id);
 	int i;
 
 	for (i = 0; i < MAX_BEARERS; i++) {
 		if (l_ptr != n_ptr->links[i])
 			continue;
 		n_ptr->links[i] = NULL;
-		spin_lock_bh(&node_list_lock);
-		tipc_num_links--;
-		spin_unlock_bh(&node_list_lock);
+		spin_lock_bh(&tn->node_list_lock);
+		tn->num_links--;
+		spin_unlock_bh(&tn->node_list_lock);
 		n_ptr->link_cnt--;
 	}
 }
@@ -414,8 +414,10 @@ static void node_lost_contact(struct tipc_node *n_ptr)
 			       TIPC_NOTIFY_NODE_DOWN;
 }
 
-struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
+struct sk_buff *tipc_node_get_nodes(struct net *net, const void *req_tlv_area,
+				    int req_tlv_space)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	u32 domain;
 	struct sk_buff *buf;
 	struct tipc_node *n_ptr;
@@ -430,20 +432,20 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
 		return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
 						   " (network address)");
 
-	spin_lock_bh(&node_list_lock);
-	if (!tipc_num_nodes) {
-		spin_unlock_bh(&node_list_lock);
+	spin_lock_bh(&tn->node_list_lock);
+	if (!tn->num_nodes) {
+		spin_unlock_bh(&tn->node_list_lock);
 		return tipc_cfg_reply_none();
 	}
 
 	/* For now, get space for all other nodes */
-	payload_size = TLV_SPACE(sizeof(node_info)) * tipc_num_nodes;
+	payload_size = TLV_SPACE(sizeof(node_info)) * tn->num_nodes;
 	if (payload_size > 32768u) {
-		spin_unlock_bh(&node_list_lock);
+		spin_unlock_bh(&tn->node_list_lock);
 		return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
 						   " (too many nodes)");
 	}
-	spin_unlock_bh(&node_list_lock);
+	spin_unlock_bh(&tn->node_list_lock);
 
 	buf = tipc_cfg_reply_alloc(payload_size);
 	if (!buf)
@@ -451,7 +453,7 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
 
 	/* Add TLVs for all nodes in scope */
 	rcu_read_lock();
-	list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) {
+	list_for_each_entry_rcu(n_ptr, &tn->node_list, list) {
 		if (!tipc_in_scope(domain, n_ptr->addr))
 			continue;
 		node_info.addr = htonl(n_ptr->addr);
@@ -463,8 +465,10 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
 	return buf;
 }
 
-struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
+struct sk_buff *tipc_node_get_links(struct net *net, const void *req_tlv_area,
+				    int req_tlv_space)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	u32 domain;
 	struct sk_buff *buf;
 	struct tipc_node *n_ptr;
@@ -482,15 +486,15 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
 	if (!tipc_own_addr)
 		return tipc_cfg_reply_none();
 
-	spin_lock_bh(&node_list_lock);
+	spin_lock_bh(&tn->node_list_lock);
 	/* Get space for all unicast links + broadcast link */
-	payload_size = TLV_SPACE((sizeof(link_info)) * (tipc_num_links + 1));
+	payload_size = TLV_SPACE((sizeof(link_info)) * (tn->num_links + 1));
 	if (payload_size > 32768u) {
-		spin_unlock_bh(&node_list_lock);
+		spin_unlock_bh(&tn->node_list_lock);
 		return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
 						   " (too many links)");
 	}
-	spin_unlock_bh(&node_list_lock);
+	spin_unlock_bh(&tn->node_list_lock);
 
 	buf = tipc_cfg_reply_alloc(payload_size);
 	if (!buf)
@@ -504,7 +508,7 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
 
 	/* Add TLVs for any other links in scope */
 	rcu_read_lock();
-	list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) {
+	list_for_each_entry_rcu(n_ptr, &tn->node_list, list) {
 		u32 i;
 
 		if (!tipc_in_scope(domain, n_ptr->addr))
@@ -534,10 +538,11 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
  *
  * Returns 0 on success
  */
-int tipc_node_get_linkname(u32 bearer_id, u32 addr, char *linkname, size_t len)
+int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr,
+			   char *linkname, size_t len)
 {
 	struct tipc_link *link;
-	struct tipc_node *node = tipc_node_find(addr);
+	struct tipc_node *node = tipc_node_find(net, addr);
 
 	if ((bearer_id >= MAX_BEARERS) || !node)
 		return -EINVAL;
@@ -554,6 +559,7 @@ int tipc_node_get_linkname(u32 bearer_id, u32 addr, char *linkname, size_t len)
 
 void tipc_node_unlock(struct tipc_node *node)
 {
+	struct net *net = node->net;
 	LIST_HEAD(nsub_list);
 	LIST_HEAD(conn_sks);
 	struct sk_buff_head waiting_sks;
@@ -585,26 +591,26 @@ void tipc_node_unlock(struct tipc_node *node)
 	spin_unlock_bh(&node->lock);
 
 	while (!skb_queue_empty(&waiting_sks))
-		tipc_sk_rcv(__skb_dequeue(&waiting_sks));
+		tipc_sk_rcv(net, __skb_dequeue(&waiting_sks));
 
 	if (!list_empty(&conn_sks))
-		tipc_node_abort_sock_conns(&conn_sks);
+		tipc_node_abort_sock_conns(net, &conn_sks);
 
 	if (!list_empty(&nsub_list))
-		tipc_publ_notify(&nsub_list, addr);
+		tipc_publ_notify(net, &nsub_list, addr);
 
 	if (flags & TIPC_WAKEUP_BCAST_USERS)
-		tipc_bclink_wakeup_users();
+		tipc_bclink_wakeup_users(net);
 
 	if (flags & TIPC_NOTIFY_NODE_UP)
-		tipc_named_node_up(addr);
+		tipc_named_node_up(net, addr);
 
 	if (flags & TIPC_NOTIFY_LINK_UP)
-		tipc_nametbl_publish(TIPC_LINK_STATE, addr, addr,
+		tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr,
 				     TIPC_NODE_SCOPE, link_id, addr);
 
 	if (flags & TIPC_NOTIFY_LINK_DOWN)
-		tipc_nametbl_withdraw(TIPC_LINK_STATE, addr,
+		tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr,
 				      link_id, addr);
 }
 
@@ -645,6 +651,8 @@ msg_full:
 int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	int err;
+	struct net *net = sock_net(skb->sk);
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	int done = cb->args[0];
 	int last_addr = cb->args[1];
 	struct tipc_node *node;
@@ -659,7 +667,7 @@ int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb)
 
 	rcu_read_lock();
 
-	if (last_addr && !tipc_node_find(last_addr)) {
+	if (last_addr && !tipc_node_find(net, last_addr)) {
 		rcu_read_unlock();
 		/* We never set seq or call nl_dump_check_consistent() this
 		 * means that setting prev_seq here will cause the consistence
@@ -671,7 +679,7 @@ int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb)
 		return -EPIPE;
 	}
 
-	list_for_each_entry_rcu(node, &tipc_node_list, list) {
+	list_for_each_entry_rcu(node, &tn->node_list, list) {
 		if (last_addr) {
 			if (node->addr == last_addr)
 				last_addr = 0;
diff --git a/net/tipc/node.h b/net/tipc/node.h
index cbe0e950f1cc..43ef88ef3035 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -42,10 +42,10 @@
 #include "bearer.h"
 #include "msg.h"
 
-/*
- * Out-of-range value for node signature
- */
-#define INVALID_NODE_SIG 0x10000
+/* Out-of-range value for node signature */
+#define INVALID_NODE_SIG	0x10000
+
+#define NODE_HTABLE_SIZE	512
 
 /* Flags used to take different actions according to flag type
  * TIPC_WAIT_PEER_LINKS_DOWN: wait to see that peer's links are down
@@ -90,6 +90,7 @@ struct tipc_node_bclink {
  * struct tipc_node - TIPC node structure
  * @addr: network address of node
  * @lock: spinlock governing access to structure
+ * @net: the applicable net namespace
  * @hash: links to adjacent nodes in unsorted hash chain
  * @active_links: pointers to active links to node
  * @links: pointers to all links to node
@@ -106,6 +107,7 @@ struct tipc_node_bclink {
 struct tipc_node {
 	u32 addr;
 	spinlock_t lock;
+	struct net *net;
 	struct hlist_node hash;
 	struct tipc_link *active_links[2];
 	u32 act_mtus[2];
@@ -123,23 +125,24 @@ struct tipc_node {
 	struct rcu_head rcu;
 };
 
-extern struct list_head tipc_node_list;
-
-struct tipc_node *tipc_node_find(u32 addr);
-struct tipc_node *tipc_node_create(u32 addr);
-void tipc_node_stop(void);
+struct tipc_node *tipc_node_find(struct net *net, u32 addr);
+struct tipc_node *tipc_node_create(struct net *net, u32 addr);
+void tipc_node_stop(struct net *net);
 void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
 void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
 void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
 void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
 int tipc_node_active_links(struct tipc_node *n_ptr);
 int tipc_node_is_up(struct tipc_node *n_ptr);
-struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space);
-struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space);
-int tipc_node_get_linkname(u32 bearer_id, u32 node, char *linkname, size_t len);
+struct sk_buff *tipc_node_get_links(struct net *net, const void *req_tlv_area,
+				    int req_tlv_space);
+struct sk_buff *tipc_node_get_nodes(struct net *net, const void *req_tlv_area,
+				    int req_tlv_space);
+int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node,
+			   char *linkname, size_t len);
 void tipc_node_unlock(struct tipc_node *node);
-int tipc_node_add_conn(u32 dnode, u32 port, u32 peer_port);
-void tipc_node_remove_conn(u32 dnode, u32 port);
+int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port);
+void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port);
 
 int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb);
 
@@ -154,12 +157,12 @@ static inline bool tipc_node_blocked(struct tipc_node *node)
 		TIPC_NOTIFY_NODE_DOWN | TIPC_WAIT_OWN_LINKS_DOWN));
 }
 
-static inline uint tipc_node_get_mtu(u32 addr, u32 selector)
+static inline uint tipc_node_get_mtu(struct net *net, u32 addr, u32 selector)
 {
 	struct tipc_node *node;
 	u32 mtu;
 
-	node = tipc_node_find(addr);
+	node = tipc_node_find(net, addr);
 
 	if (likely(node))
 		mtu = node->act_mtus[selector & 1];
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index c58f66be7e18..68831453bc0e 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -257,7 +257,7 @@ static void tsk_rej_rx_queue(struct sock *sk)
 
 	while ((skb = __skb_dequeue(&sk->sk_receive_queue))) {
 		if (tipc_msg_reverse(skb, &dnode, TIPC_ERR_NO_PORT))
-			tipc_link_xmit_skb(skb, dnode, 0);
+			tipc_link_xmit_skb(sock_net(sk), skb, dnode, 0);
 	}
 }
 
@@ -473,6 +473,7 @@ static void tipc_sk_callback(struct rcu_head *head)
 static int tipc_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
+	struct net *net = sock_net(sk);
 	struct tipc_sock *tsk;
 	struct sk_buff *skb;
 	u32 dnode, probing_state;
@@ -503,10 +504,10 @@ static int tipc_release(struct socket *sock)
 			    (sock->state == SS_CONNECTED)) {
 				sock->state = SS_DISCONNECTING;
 				tsk->connected = 0;
-				tipc_node_remove_conn(dnode, tsk->portid);
+				tipc_node_remove_conn(net, dnode, tsk->portid);
 			}
 			if (tipc_msg_reverse(skb, &dnode, TIPC_ERR_NO_PORT))
-				tipc_link_xmit_skb(skb, dnode, 0);
+				tipc_link_xmit_skb(net, skb, dnode, 0);
 		}
 	}
 
@@ -521,8 +522,8 @@ static int tipc_release(struct socket *sock)
 				      tsk_peer_port(tsk),
 				      tsk->portid, TIPC_ERR_NO_PORT);
 		if (skb)
-			tipc_link_xmit_skb(skb, dnode, tsk->portid);
-		tipc_node_remove_conn(dnode, tsk->portid);
+			tipc_link_xmit_skb(net, skb, dnode, tsk->portid);
+		tipc_node_remove_conn(net, dnode, tsk->portid);
 	}
 
 	/* Discard any remaining (connection-based) messages in receive queue */
@@ -725,6 +726,7 @@ static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
 			  struct msghdr *msg, size_t dsz, long timeo)
 {
 	struct sock *sk = sock->sk;
+	struct net *net = sock_net(sk);
 	struct tipc_msg *mhdr = &tipc_sk(sk)->phdr;
 	struct sk_buff_head head;
 	uint mtu;
@@ -747,7 +749,7 @@ new_mtu:
 		return rc;
 
 	do {
-		rc = tipc_bclink_xmit(&head);
+		rc = tipc_bclink_xmit(net, &head);
 		if (likely(rc >= 0)) {
 			rc = dsz;
 			break;
@@ -766,7 +768,7 @@ new_mtu:
 
 /* tipc_sk_mcast_rcv - Deliver multicast message to all destination sockets
  */
-void tipc_sk_mcast_rcv(struct sk_buff *buf)
+void tipc_sk_mcast_rcv(struct net *net, struct sk_buff *buf)
 {
 	struct tipc_msg *msg = buf_msg(buf);
 	struct tipc_port_list dports = {0, NULL, };
@@ -798,7 +800,7 @@ void tipc_sk_mcast_rcv(struct sk_buff *buf)
 				continue;
 			}
 			msg_set_destport(msg, item->ports[i]);
-			tipc_sk_rcv(b);
+			tipc_sk_rcv(net, b);
 		}
 	}
 	tipc_port_list_free(&dports);
@@ -886,6 +888,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock,
 	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
 	struct sock *sk = sock->sk;
 	struct tipc_sock *tsk = tipc_sk(sk);
+	struct net *net = sock_net(sk);
 	struct tipc_msg *mhdr = &tsk->phdr;
 	u32 dnode, dport;
 	struct sk_buff_head head;
@@ -960,7 +963,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock,
 	}
 
 new_mtu:
-	mtu = tipc_node_get_mtu(dnode, tsk->portid);
+	mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
 	__skb_queue_head_init(&head);
 	rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, &head);
 	if (rc < 0)
@@ -969,7 +972,7 @@ new_mtu:
 	do {
 		skb = skb_peek(&head);
 		TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong;
-		rc = tipc_link_xmit(&head, dnode, tsk->portid);
+		rc = tipc_link_xmit(net, &head, dnode, tsk->portid);
 		if (likely(rc >= 0)) {
 			if (sock->state != SS_READY)
 				sock->state = SS_CONNECTING;
@@ -1038,6 +1041,7 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock,
 			    struct msghdr *m, size_t dsz)
 {
 	struct sock *sk = sock->sk;
+	struct net *net = sock_net(sk);
 	struct tipc_sock *tsk = tipc_sk(sk);
 	struct tipc_msg *mhdr = &tsk->phdr;
 	struct sk_buff_head head;
@@ -1081,7 +1085,7 @@ next:
 		goto exit;
 	do {
 		if (likely(!tsk_conn_cong(tsk))) {
-			rc = tipc_link_xmit(&head, dnode, portid);
+			rc = tipc_link_xmit(net, &head, dnode, portid);
 			if (likely(!rc)) {
 				tsk->sent_unacked++;
 				sent += send;
@@ -1090,7 +1094,8 @@ next:
 				goto next;
 			}
 			if (rc == -EMSGSIZE) {
-				tsk->max_pkt = tipc_node_get_mtu(dnode, portid);
+				tsk->max_pkt = tipc_node_get_mtu(net, dnode,
+								 portid);
 				goto next;
 			}
 			if (rc != -ELINKCONG)
@@ -1132,6 +1137,7 @@ static int tipc_send_packet(struct kiocb *iocb, struct socket *sock,
 static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
 				u32 peer_node)
 {
+	struct net *net = sock_net(&tsk->sk);
 	struct tipc_msg *msg = &tsk->phdr;
 
 	msg_set_destnode(msg, peer_node);
@@ -1145,8 +1151,8 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
 	tsk->connected = 1;
 	if (!mod_timer(&tsk->timer, jiffies + tsk->probing_intv))
 		sock_hold(&tsk->sk);
-	tipc_node_add_conn(peer_node, tsk->portid, peer_port);
-	tsk->max_pkt = tipc_node_get_mtu(peer_node, tsk->portid);
+	tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
+	tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
 }
 
 /**
@@ -1245,6 +1251,7 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
 
 static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack)
 {
+	struct net *net = sock_net(&tsk->sk);
 	struct sk_buff *skb = NULL;
 	struct tipc_msg *msg;
 	u32 peer_port = tsk_peer_port(tsk);
@@ -1258,7 +1265,7 @@ static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack)
 		return;
 	msg = buf_msg(skb);
 	msg_set_msgcnt(msg, ack);
-	tipc_link_xmit_skb(skb, dnode, msg_link_selector(msg));
+	tipc_link_xmit_skb(net, skb, dnode, msg_link_selector(msg));
 }
 
 static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
@@ -1551,6 +1558,7 @@ static void tipc_data_ready(struct sock *sk)
 static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf)
 {
 	struct sock *sk = &tsk->sk;
+	struct net *net = sock_net(sk);
 	struct socket *sock = sk->sk_socket;
 	struct tipc_msg *msg = buf_msg(*buf);
 	int retval = -TIPC_ERR_NO_PORT;
@@ -1566,7 +1574,7 @@ static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf)
 				sock->state = SS_DISCONNECTING;
 				tsk->connected = 0;
 				/* let timer expire on it's own */
-				tipc_node_remove_conn(tsk_peer_node(tsk),
+				tipc_node_remove_conn(net, tsk_peer_node(tsk),
 						      tsk->portid);
 			}
 			retval = TIPC_OK;
@@ -1737,7 +1745,7 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 	if ((rc < 0) && !tipc_msg_reverse(skb, &onode, -rc))
 		return 0;
 
-	tipc_link_xmit_skb(skb, onode, 0);
+	tipc_link_xmit_skb(sock_net(sk), skb, onode, 0);
 
 	return 0;
 }
@@ -1748,7 +1756,7 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb)
  * Consumes buffer
  * Returns 0 if success, or errno: -EHOSTUNREACH
  */
-int tipc_sk_rcv(struct sk_buff *skb)
+int tipc_sk_rcv(struct net *net, struct sk_buff *skb)
 {
 	struct tipc_sock *tsk;
 	struct sock *sk;
@@ -1785,7 +1793,7 @@ exit:
 	if ((rc < 0) && !tipc_msg_reverse(skb, &dnode, -rc))
 		return -EHOSTUNREACH;
 
-	tipc_link_xmit_skb(skb, dnode, 0);
+	tipc_link_xmit_skb(net, skb, dnode, 0);
 	return (rc < 0) ? -EHOSTUNREACH : 0;
 }
 
@@ -2042,6 +2050,7 @@ exit:
 static int tipc_shutdown(struct socket *sock, int how)
 {
 	struct sock *sk = sock->sk;
+	struct net *net = sock_net(sk);
 	struct tipc_sock *tsk = tipc_sk(sk);
 	struct sk_buff *skb;
 	u32 dnode;
@@ -2065,8 +2074,9 @@ restart:
 				goto restart;
 			}
 			if (tipc_msg_reverse(skb, &dnode, TIPC_CONN_SHUTDOWN))
-				tipc_link_xmit_skb(skb, dnode, tsk->portid);
-			tipc_node_remove_conn(dnode, tsk->portid);
+				tipc_link_xmit_skb(net, skb, dnode,
+						   tsk->portid);
+			tipc_node_remove_conn(net, dnode, tsk->portid);
 		} else {
 			dnode = tsk_peer_node(tsk);
 			skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
@@ -2074,11 +2084,11 @@ restart:
 					      0, dnode, tipc_own_addr,
 					      tsk_peer_port(tsk),
 					      tsk->portid, TIPC_CONN_SHUTDOWN);
-			tipc_link_xmit_skb(skb, dnode, tsk->portid);
+			tipc_link_xmit_skb(net, skb, dnode, tsk->portid);
 		}
 		tsk->connected = 0;
 		sock->state = SS_DISCONNECTING;
-		tipc_node_remove_conn(dnode, tsk->portid);
+		tipc_node_remove_conn(net, dnode, tsk->portid);
 		/* fall through */
 
 	case SS_DISCONNECTING:
@@ -2130,7 +2140,7 @@ static void tipc_sk_timeout(unsigned long data)
 	}
 	bh_unlock_sock(sk);
 	if (skb)
-		tipc_link_xmit_skb(skb, peer_node, tsk->portid);
+		tipc_link_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid);
 exit:
 	sock_put(sk);
 }
@@ -2138,6 +2148,7 @@ exit:
 static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
 			   struct tipc_name_seq const *seq)
 {
+	struct net *net = sock_net(&tsk->sk);
 	struct publication *publ;
 	u32 key;
 
@@ -2147,7 +2158,7 @@ static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
 	if (key == tsk->portid)
 		return -EADDRINUSE;
 
-	publ = tipc_nametbl_publish(seq->type, seq->lower, seq->upper,
+	publ = tipc_nametbl_publish(net, seq->type, seq->lower, seq->upper,
 				    scope, tsk->portid, key);
 	if (unlikely(!publ))
 		return -EINVAL;
@@ -2161,6 +2172,7 @@ static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
 static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
 			    struct tipc_name_seq const *seq)
 {
+	struct net *net = sock_net(&tsk->sk);
 	struct publication *publ;
 	struct publication *safe;
 	int rc = -EINVAL;
@@ -2175,12 +2187,12 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
 				continue;
 			if (publ->upper != seq->upper)
 				break;
-			tipc_nametbl_withdraw(publ->type, publ->lower,
+			tipc_nametbl_withdraw(net, publ->type, publ->lower,
 					      publ->ref, publ->key);
 			rc = 0;
 			break;
 		}
-		tipc_nametbl_withdraw(publ->type, publ->lower,
+		tipc_nametbl_withdraw(net, publ->type, publ->lower,
 				      publ->ref, publ->key);
 		rc = 0;
 	}
@@ -2492,8 +2504,9 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
 	return put_user(sizeof(value), ol);
 }
 
-static int tipc_ioctl(struct socket *sk, unsigned int cmd, unsigned long arg)
+static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
+	struct sock *sk = sock->sk;
 	struct tipc_sioc_ln_req lnr;
 	void __user *argp = (void __user *)arg;
 
@@ -2501,7 +2514,8 @@ static int tipc_ioctl(struct socket *sk, unsigned int cmd, unsigned long arg)
 	case SIOCGETLINKNAME:
 		if (copy_from_user(&lnr, argp, sizeof(lnr)))
 			return -EFAULT;
-		if (!tipc_node_get_linkname(lnr.bearer_id & 0xffff, lnr.peer,
+		if (!tipc_node_get_linkname(sock_net(sk),
+					    lnr.bearer_id & 0xffff, lnr.peer,
 					    lnr.linkname, TIPC_MAX_LINK_NAME)) {
 			if (copy_to_user(argp, &lnr, sizeof(lnr)))
 				return -EFAULT;
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index 46bc370d82c7..eb15c3107920 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h
@@ -49,9 +49,9 @@ int tipc_sock_create_local(int type, struct socket **res);
 void tipc_sock_release_local(struct socket *sock);
 int tipc_sock_accept_local(struct socket *sock, struct socket **newsock,
 			   int flags);
-int tipc_sk_rcv(struct sk_buff *buf);
+int tipc_sk_rcv(struct net *net, struct sk_buff *buf);
 struct sk_buff *tipc_sk_socks_show(void);
-void tipc_sk_mcast_rcv(struct sk_buff *buf);
+void tipc_sk_mcast_rcv(struct net *net, struct sk_buff *buf);
 void tipc_sk_reinit(void);
 int tipc_sk_rht_init(void);
 void tipc_sk_rht_destroy(void);
-- 
cgit v1.2.3


From 7f9f95d9d9bcdf253c4149a157b096958013eceb Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Fri, 9 Jan 2015 15:27:06 +0800
Subject: tipc: make bearer list support net namespace

Bearer list defined as a global variable is used to store bearer
instances. When tipc supports net namespace, bearers created in
one namespace must be isolated with others allocated in other
namespaces, which requires us that the bearer list(bearer_list)
must be moved to tipc_net structure. As a result, a net namespace
pointer has to be passed to functions which access the bearer list.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Tested-by: Tero Aho <Tero.Aho@coriant.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c    | 27 +++++++++++++--------
 net/tipc/bcast.h    |  7 +++---
 net/tipc/bearer.c   | 67 +++++++++++++++++++++++++++++++----------------------
 net/tipc/bearer.h   | 16 ++++++-------
 net/tipc/config.c   |  2 +-
 net/tipc/core.h     |  4 ++++
 net/tipc/discover.c | 12 ++++++----
 net/tipc/link.c     | 41 ++++++++++++++++++++------------
 net/tipc/link.h     |  3 ++-
 net/tipc/net.c      |  4 ++--
 10 files changed, 110 insertions(+), 73 deletions(-)

(limited to 'net')

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 816c0e49319f..e7c538304595 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -369,7 +369,7 @@ void tipc_bclink_update_link_state(struct net *net, struct tipc_node *n_ptr,
 		msg_set_bcgap_to(msg, to);
 
 		tipc_bclink_lock();
-		tipc_bearer_send(MAX_BEARERS, buf, NULL);
+		tipc_bearer_send(net, MAX_BEARERS, buf, NULL);
 		bcl->stats.sent_nacks++;
 		tipc_bclink_unlock();
 		kfree_skb(buf);
@@ -425,7 +425,7 @@ int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list)
 	if (likely(bclink)) {
 		tipc_bclink_lock();
 		if (likely(bclink->bcast_nodes.count)) {
-			rc = __tipc_link_xmit(bcl, list);
+			rc = __tipc_link_xmit(net, bcl, list);
 			if (likely(!rc)) {
 				u32 len = skb_queue_len(&bcl->outqueue);
 
@@ -682,13 +682,14 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1,
 
 		if (bp_index == 0) {
 			/* Use original buffer for first bearer */
-			tipc_bearer_send(b->identity, buf, &b->bcast_addr);
+			tipc_bearer_send(net, b->identity, buf, &b->bcast_addr);
 		} else {
 			/* Avoid concurrent buffer access */
 			tbuf = pskb_copy_for_clone(buf, GFP_ATOMIC);
 			if (!tbuf)
 				break;
-			tipc_bearer_send(b->identity, tbuf, &b->bcast_addr);
+			tipc_bearer_send(net, b->identity, tbuf,
+					 &b->bcast_addr);
 			kfree_skb(tbuf); /* Bearer keeps a clone */
 		}
 		if (bcbearer->remains_new.count == 0)
@@ -703,8 +704,10 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1,
 /**
  * tipc_bcbearer_sort - create sets of bearer pairs used by broadcast bearer
  */
-void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action)
+void tipc_bcbearer_sort(struct net *net, struct tipc_node_map *nm_ptr,
+			u32 node, bool action)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_bcbearer_pair *bp_temp = bcbearer->bpairs_temp;
 	struct tipc_bcbearer_pair *bp_curr;
 	struct tipc_bearer *b;
@@ -723,7 +726,7 @@ void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action)
 
 	rcu_read_lock();
 	for (b_index = 0; b_index < MAX_BEARERS; b_index++) {
-		b = rcu_dereference_rtnl(bearer_list[b_index]);
+		b = rcu_dereference_rtnl(tn->bearer_list[b_index]);
 		if (!b || !b->nodes.count)
 			continue;
 
@@ -939,8 +942,10 @@ int tipc_bclink_set_queue_limits(u32 limit)
 	return 0;
 }
 
-int tipc_bclink_init(void)
+int tipc_bclink_init(struct net *net)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
+
 	bcbearer = kzalloc(sizeof(*bcbearer), GFP_ATOMIC);
 	if (!bcbearer)
 		return -ENOMEM;
@@ -967,19 +972,21 @@ int tipc_bclink_init(void)
 	bcl->max_pkt = MAX_PKT_DEFAULT_MCAST;
 	tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT);
 	bcl->bearer_id = MAX_BEARERS;
-	rcu_assign_pointer(bearer_list[MAX_BEARERS], &bcbearer->bearer);
+	rcu_assign_pointer(tn->bearer_list[MAX_BEARERS], &bcbearer->bearer);
 	bcl->state = WORKING_WORKING;
 	strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME);
 	return 0;
 }
 
-void tipc_bclink_stop(void)
+void tipc_bclink_stop(struct net *net)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
+
 	tipc_bclink_lock();
 	tipc_link_purge_queues(bcl);
 	tipc_bclink_unlock();
 
-	RCU_INIT_POINTER(bearer_list[BCBEARER], NULL);
+	RCU_INIT_POINTER(tn->bearer_list[BCBEARER], NULL);
 	synchronize_net();
 	kfree(bcbearer);
 	kfree(bclink);
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index fd0d17a76493..692efb662253 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -84,8 +84,8 @@ static inline int tipc_nmap_equal(struct tipc_node_map *nm_a,
 void tipc_port_list_add(struct tipc_port_list *pl_ptr, u32 port);
 void tipc_port_list_free(struct tipc_port_list *pl_ptr);
 
-int tipc_bclink_init(void);
-void tipc_bclink_stop(void);
+int tipc_bclink_init(struct net *net);
+void tipc_bclink_stop(struct net *net);
 void tipc_bclink_set_flags(unsigned int flags);
 void tipc_bclink_add_node(u32 addr);
 void tipc_bclink_remove_node(u32 addr);
@@ -99,7 +99,8 @@ void tipc_bclink_update_link_state(struct net *net, struct tipc_node *n_ptr,
 int  tipc_bclink_stats(char *stats_buf, const u32 buf_size);
 int  tipc_bclink_reset_stats(void);
 int  tipc_bclink_set_queue_limits(u32 limit);
-void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action);
+void tipc_bcbearer_sort(struct net *net, struct tipc_node_map *nm_ptr,
+			u32 node, bool action);
 uint  tipc_bclink_get_mtu(void);
 int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list);
 void tipc_bclink_wakeup_users(struct net *net);
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index a735c08e9d90..2fe9dcb418d4 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -34,6 +34,7 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <net/sock.h>
 #include "core.h"
 #include "config.h"
 #include "bearer.h"
@@ -67,8 +68,6 @@ static const struct nla_policy tipc_nl_media_policy[TIPC_NLA_MEDIA_MAX + 1] = {
 	[TIPC_NLA_MEDIA_PROP]		= { .type = NLA_NESTED }
 };
 
-struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1];
-
 static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr,
 			   bool shutting_down);
 
@@ -191,13 +190,14 @@ static int bearer_name_validate(const char *name,
 /**
  * tipc_bearer_find - locates bearer object with matching bearer name
  */
-struct tipc_bearer *tipc_bearer_find(const char *name)
+struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_bearer *b_ptr;
 	u32 i;
 
 	for (i = 0; i < MAX_BEARERS; i++) {
-		b_ptr = rtnl_dereference(bearer_list[i]);
+		b_ptr = rtnl_dereference(tn->bearer_list[i]);
 		if (b_ptr && (!strcmp(b_ptr->name, name)))
 			return b_ptr;
 	}
@@ -207,8 +207,9 @@ struct tipc_bearer *tipc_bearer_find(const char *name)
 /**
  * tipc_bearer_get_names - record names of bearers in buffer
  */
-struct sk_buff *tipc_bearer_get_names(void)
+struct sk_buff *tipc_bearer_get_names(struct net *net)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct sk_buff *buf;
 	struct tipc_bearer *b;
 	int i, j;
@@ -219,7 +220,7 @@ struct sk_buff *tipc_bearer_get_names(void)
 
 	for (i = 0; media_info_array[i] != NULL; i++) {
 		for (j = 0; j < MAX_BEARERS; j++) {
-			b = rtnl_dereference(bearer_list[j]);
+			b = rtnl_dereference(tn->bearer_list[j]);
 			if (!b)
 				continue;
 			if (b->media == media_info_array[i]) {
@@ -232,27 +233,29 @@ struct sk_buff *tipc_bearer_get_names(void)
 	return buf;
 }
 
-void tipc_bearer_add_dest(u32 bearer_id, u32 dest)
+void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_bearer *b_ptr;
 
 	rcu_read_lock();
-	b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]);
+	b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
 	if (b_ptr) {
-		tipc_bcbearer_sort(&b_ptr->nodes, dest, true);
+		tipc_bcbearer_sort(net, &b_ptr->nodes, dest, true);
 		tipc_disc_add_dest(b_ptr->link_req);
 	}
 	rcu_read_unlock();
 }
 
-void tipc_bearer_remove_dest(u32 bearer_id, u32 dest)
+void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_bearer *b_ptr;
 
 	rcu_read_lock();
-	b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]);
+	b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
 	if (b_ptr) {
-		tipc_bcbearer_sort(&b_ptr->nodes, dest, false);
+		tipc_bcbearer_sort(net, &b_ptr->nodes, dest, false);
 		tipc_disc_remove_dest(b_ptr->link_req);
 	}
 	rcu_read_unlock();
@@ -264,6 +267,7 @@ void tipc_bearer_remove_dest(u32 bearer_id, u32 dest)
 int tipc_enable_bearer(struct net *net, const char *name, u32 disc_domain,
 		       u32 priority)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_bearer *b_ptr;
 	struct tipc_media *m_ptr;
 	struct tipc_bearer_names b_names;
@@ -315,7 +319,7 @@ restart:
 	bearer_id = MAX_BEARERS;
 	with_this_prio = 1;
 	for (i = MAX_BEARERS; i-- != 0; ) {
-		b_ptr = rtnl_dereference(bearer_list[i]);
+		b_ptr = rtnl_dereference(tn->bearer_list[i]);
 		if (!b_ptr) {
 			bearer_id = i;
 			continue;
@@ -349,7 +353,7 @@ restart:
 
 	strcpy(b_ptr->name, name);
 	b_ptr->media = m_ptr;
-	res = m_ptr->enable_media(b_ptr);
+	res = m_ptr->enable_media(net, b_ptr);
 	if (res) {
 		pr_warn("Bearer <%s> rejected, enable failure (%d)\n",
 			name, -res);
@@ -371,7 +375,7 @@ restart:
 		return -EINVAL;
 	}
 
-	rcu_assign_pointer(bearer_list[bearer_id], b_ptr);
+	rcu_assign_pointer(tn->bearer_list[bearer_id], b_ptr);
 
 	pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
 		name,
@@ -398,6 +402,7 @@ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b_ptr)
 static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr,
 			   bool shutting_down)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	u32 i;
 
 	pr_info("Disabling bearer <%s>\n", b_ptr->name);
@@ -408,8 +413,8 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr,
 		tipc_disc_delete(b_ptr->link_req);
 
 	for (i = 0; i < MAX_BEARERS; i++) {
-		if (b_ptr == rtnl_dereference(bearer_list[i])) {
-			RCU_INIT_POINTER(bearer_list[i], NULL);
+		if (b_ptr == rtnl_dereference(tn->bearer_list[i])) {
+			RCU_INIT_POINTER(tn->bearer_list[i], NULL);
 			break;
 		}
 	}
@@ -421,7 +426,7 @@ int tipc_disable_bearer(struct net *net, const char *name)
 	struct tipc_bearer *b_ptr;
 	int res;
 
-	b_ptr = tipc_bearer_find(name);
+	b_ptr = tipc_bearer_find(net, name);
 	if (b_ptr == NULL) {
 		pr_warn("Attempt to disable unknown bearer <%s>\n", name);
 		res = -EINVAL;
@@ -432,13 +437,13 @@ int tipc_disable_bearer(struct net *net, const char *name)
 	return res;
 }
 
-int tipc_enable_l2_media(struct tipc_bearer *b)
+int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b)
 {
 	struct net_device *dev;
 	char *driver_name = strchr((const char *)b->name, ':') + 1;
 
 	/* Find device with specified name */
-	dev = dev_get_by_name(&init_net, driver_name);
+	dev = dev_get_by_name(net, driver_name);
 	if (!dev)
 		return -ENODEV;
 
@@ -514,13 +519,14 @@ int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b,
  * The media send routine must not alter the buffer being passed in
  * as it may be needed for later retransmission!
  */
-void tipc_bearer_send(u32 bearer_id, struct sk_buff *buf,
+void tipc_bearer_send(struct net *net, u32 bearer_id, struct sk_buff *buf,
 		      struct tipc_media_addr *dest)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_bearer *b_ptr;
 
 	rcu_read_lock();
-	b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]);
+	b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
 	if (likely(b_ptr))
 		b_ptr->media->send_msg(buf, b_ptr, dest);
 	rcu_read_unlock();
@@ -630,14 +636,15 @@ void tipc_bearer_cleanup(void)
 
 void tipc_bearer_stop(struct net *net)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_bearer *b_ptr;
 	u32 i;
 
 	for (i = 0; i < MAX_BEARERS; i++) {
-		b_ptr = rtnl_dereference(bearer_list[i]);
+		b_ptr = rtnl_dereference(tn->bearer_list[i]);
 		if (b_ptr) {
 			bearer_disable(net, b_ptr, true);
-			bearer_list[i] = NULL;
+			tn->bearer_list[i] = NULL;
 		}
 	}
 }
@@ -694,6 +701,8 @@ int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	int i = cb->args[0];
 	struct tipc_bearer *bearer;
 	struct tipc_nl_msg msg;
+	struct net *net = sock_net(skb->sk);
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 
 	if (i == MAX_BEARERS)
 		return 0;
@@ -704,7 +713,7 @@ int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb)
 
 	rtnl_lock();
 	for (i = 0; i < MAX_BEARERS; i++) {
-		bearer = rtnl_dereference(bearer_list[i]);
+		bearer = rtnl_dereference(tn->bearer_list[i]);
 		if (!bearer)
 			continue;
 
@@ -726,6 +735,7 @@ int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info)
 	struct tipc_bearer *bearer;
 	struct tipc_nl_msg msg;
 	struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
+	struct net *net = genl_info_net(info);
 
 	if (!info->attrs[TIPC_NLA_BEARER])
 		return -EINVAL;
@@ -749,7 +759,7 @@ int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info)
 	msg.seq = info->snd_seq;
 
 	rtnl_lock();
-	bearer = tipc_bearer_find(name);
+	bearer = tipc_bearer_find(net, name);
 	if (!bearer) {
 		err = -EINVAL;
 		goto err_out;
@@ -791,7 +801,7 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
 	name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
 
 	rtnl_lock();
-	bearer = tipc_bearer_find(name);
+	bearer = tipc_bearer_find(net, name);
 	if (!bearer) {
 		rtnl_unlock();
 		return -EINVAL;
@@ -861,6 +871,7 @@ int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
 	char *name;
 	struct tipc_bearer *b;
 	struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
+	struct net *net = genl_info_net(info);
 
 	if (!info->attrs[TIPC_NLA_BEARER])
 		return -EINVAL;
@@ -876,7 +887,7 @@ int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
 	name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
 
 	rtnl_lock();
-	b = tipc_bearer_find(name);
+	b = tipc_bearer_find(net, name);
 	if (!b) {
 		rtnl_unlock();
 		return -EINVAL;
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 91a8eeea309c..01722d6b2058 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -92,7 +92,7 @@ struct tipc_media {
 	int (*send_msg)(struct sk_buff *buf,
 			struct tipc_bearer *b_ptr,
 			struct tipc_media_addr *dest);
-	int (*enable_media)(struct tipc_bearer *b_ptr);
+	int (*enable_media)(struct net *net, struct tipc_bearer *b_ptr);
 	void (*disable_media)(struct tipc_bearer *b_ptr);
 	int (*addr2str)(struct tipc_media_addr *addr,
 			char *strbuf,
@@ -159,8 +159,6 @@ struct tipc_bearer_names {
 
 struct tipc_link;
 
-extern struct tipc_bearer __rcu *bearer_list[];
-
 /*
  * TIPC routines available to supported media types
  */
@@ -193,20 +191,20 @@ int tipc_media_set_priority(const char *name, u32 new_value);
 int tipc_media_set_window(const char *name, u32 new_value);
 void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a);
 struct sk_buff *tipc_media_get_names(void);
-int tipc_enable_l2_media(struct tipc_bearer *b);
+int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b);
 void tipc_disable_l2_media(struct tipc_bearer *b);
 int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b,
 		     struct tipc_media_addr *dest);
 
-struct sk_buff *tipc_bearer_get_names(void);
-void tipc_bearer_add_dest(u32 bearer_id, u32 dest);
-void tipc_bearer_remove_dest(u32 bearer_id, u32 dest);
-struct tipc_bearer *tipc_bearer_find(const char *name);
+struct sk_buff *tipc_bearer_get_names(struct net *net);
+void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest);
+void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest);
+struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name);
 struct tipc_media *tipc_media_find(const char *name);
 int tipc_bearer_setup(void);
 void tipc_bearer_cleanup(void);
 void tipc_bearer_stop(struct net *net);
-void tipc_bearer_send(u32 bearer_id, struct sk_buff *buf,
+void tipc_bearer_send(struct net *net, u32 bearer_id, struct sk_buff *buf,
 		      struct tipc_media_addr *dest);
 
 #endif	/* _TIPC_BEARER_H */
diff --git a/net/tipc/config.c b/net/tipc/config.c
index cf2d417ad488..ac73291d0d32 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -251,7 +251,7 @@ struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
 		rep_tlv_buf = tipc_nametbl_get(req_tlv_area, req_tlv_space);
 		break;
 	case TIPC_CMD_GET_BEARER_NAMES:
-		rep_tlv_buf = tipc_bearer_get_names();
+		rep_tlv_buf = tipc_bearer_get_names(net);
 		break;
 	case TIPC_CMD_GET_MEDIA_NAMES:
 		rep_tlv_buf = tipc_media_get_names();
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 4fb113397e7b..75a332b1968e 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -60,6 +60,7 @@
 #include <net/netns/generic.h>
 
 #include "node.h"
+#include "bearer.h"
 
 #define TIPC_MOD_VER "2.0.0"
 
@@ -87,6 +88,9 @@ struct tipc_net {
 	struct list_head node_list;
 	u32 num_nodes;
 	u32 num_links;
+
+	/* Bearer list */
+	struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1];
 };
 
 #ifdef CONFIG_SYSCTL
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index f0fd8b449aef..f93dd3dd621b 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -51,6 +51,7 @@
 /**
  * struct tipc_link_req - information about an ongoing link setup request
  * @bearer_id: identity of bearer issuing requests
+ * @net: network namespace instance
  * @dest: destination address for request messages
  * @domain: network domain to which links can be established
  * @num_nodes: number of nodes currently discovered (i.e. with an active link)
@@ -62,6 +63,7 @@
 struct tipc_link_req {
 	u32 bearer_id;
 	struct tipc_media_addr dest;
+	struct net *net;
 	u32 domain;
 	int num_nodes;
 	spinlock_t lock;
@@ -254,7 +256,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *buf,
 		rbuf = tipc_buf_acquire(INT_H_SIZE);
 		if (rbuf) {
 			tipc_disc_init_msg(net, rbuf, DSC_RESP_MSG, bearer);
-			tipc_bearer_send(bearer->identity, rbuf, &maddr);
+			tipc_bearer_send(net, bearer->identity, rbuf, &maddr);
 			kfree_skb(rbuf);
 		}
 	}
@@ -328,7 +330,7 @@ static void disc_timeout(unsigned long data)
 	 * hold at fast polling rate if don't have any associated nodes,
 	 * otherwise hold at slow polling rate
 	 */
-	tipc_bearer_send(req->bearer_id, req->buf, &req->dest);
+	tipc_bearer_send(req->net, req->bearer_id, req->buf, &req->dest);
 
 
 	req->timer_intv *= 2;
@@ -370,6 +372,7 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr,
 
 	tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b_ptr);
 	memcpy(&req->dest, dest, sizeof(*dest));
+	req->net = net;
 	req->bearer_id = b_ptr->identity;
 	req->domain = b_ptr->domain;
 	req->num_nodes = 0;
@@ -378,7 +381,7 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr,
 	setup_timer(&req->timer, disc_timeout, (unsigned long)req);
 	mod_timer(&req->timer, jiffies + req->timer_intv);
 	b_ptr->link_req = req;
-	tipc_bearer_send(req->bearer_id, req->buf, &req->dest);
+	tipc_bearer_send(net, req->bearer_id, req->buf, &req->dest);
 	return 0;
 }
 
@@ -405,11 +408,12 @@ void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr)
 
 	spin_lock_bh(&req->lock);
 	tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b_ptr);
+	req->net = net;
 	req->bearer_id = b_ptr->identity;
 	req->domain = b_ptr->domain;
 	req->num_nodes = 0;
 	req->timer_intv = TIPC_LINK_REQ_INIT;
 	mod_timer(&req->timer, jiffies + req->timer_intv);
-	tipc_bearer_send(req->bearer_id, req->buf, &req->dest);
+	tipc_bearer_send(net, req->bearer_id, req->buf, &req->dest);
 	spin_unlock_bh(&req->lock);
 }
diff --git a/net/tipc/link.c b/net/tipc/link.c
index f6505652742e..be21e6ac73fe 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -129,11 +129,13 @@ static unsigned int align(unsigned int i)
 
 static void link_init_max_pkt(struct tipc_link *l_ptr)
 {
+	struct tipc_node *node = l_ptr->owner;
+	struct tipc_net *tn = net_generic(node->net, tipc_net_id);
 	struct tipc_bearer *b_ptr;
 	u32 max_pkt;
 
 	rcu_read_lock();
-	b_ptr = rcu_dereference_rtnl(bearer_list[l_ptr->bearer_id]);
+	b_ptr = rcu_dereference_rtnl(tn->bearer_list[l_ptr->bearer_id]);
 	if (!b_ptr) {
 		rcu_read_unlock();
 		return;
@@ -431,7 +433,7 @@ void tipc_link_reset(struct tipc_link *l_ptr)
 		return;
 
 	tipc_node_link_down(l_ptr->owner, l_ptr);
-	tipc_bearer_remove_dest(l_ptr->bearer_id, l_ptr->addr);
+	tipc_bearer_remove_dest(owner->net, l_ptr->bearer_id, l_ptr->addr);
 
 	if (was_active_link && tipc_node_active_links(l_ptr->owner)) {
 		l_ptr->reset_checkpoint = checkpoint;
@@ -471,11 +473,14 @@ void tipc_link_reset_list(struct net *net, unsigned int bearer_id)
 	rcu_read_unlock();
 }
 
-static void link_activate(struct tipc_link *l_ptr)
+static void link_activate(struct tipc_link *link)
 {
-	l_ptr->next_in_no = l_ptr->stats.recv_info = 1;
-	tipc_node_link_up(l_ptr->owner, l_ptr);
-	tipc_bearer_add_dest(l_ptr->bearer_id, l_ptr->addr);
+	struct tipc_node *node = link->owner;
+
+	link->next_in_no = 1;
+	link->stats.recv_info = 1;
+	tipc_node_link_up(node, link);
+	tipc_bearer_add_dest(node->net, link->bearer_id, link->addr);
 }
 
 /**
@@ -707,7 +712,8 @@ drop:
  * Only the socket functions tipc_send_stream() and tipc_send_packet() need
  * to act on the return value, since they may need to do more send attempts.
  */
-int __tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list)
+int __tipc_link_xmit(struct net *net, struct tipc_link *link,
+		     struct sk_buff_head *list)
 {
 	struct tipc_msg *msg = buf_msg(skb_peek(list));
 	uint psz = msg_size(msg);
@@ -740,7 +746,8 @@ int __tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list)
 
 		if (skb_queue_len(outqueue) < sndlim) {
 			__skb_queue_tail(outqueue, skb);
-			tipc_bearer_send(link->bearer_id, skb, addr);
+			tipc_bearer_send(net, link->bearer_id,
+					 skb, addr);
 			link->next_out = NULL;
 			link->unacked_window = 0;
 		} else if (tipc_msg_bundle(outqueue, skb, mtu)) {
@@ -774,7 +781,7 @@ static int __tipc_link_xmit_skb(struct tipc_link *link, struct sk_buff *skb)
 	struct sk_buff_head head;
 
 	skb2list(skb, &head);
-	return __tipc_link_xmit(link, &head);
+	return __tipc_link_xmit(link->owner->net, link, &head);
 }
 
 int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode,
@@ -808,7 +815,7 @@ int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dnode,
 		tipc_node_lock(node);
 		link = node->active_links[selector & 1];
 		if (link)
-			rc = __tipc_link_xmit(link, list);
+			rc = __tipc_link_xmit(net, link, list);
 		tipc_node_unlock(node);
 	}
 
@@ -900,7 +907,8 @@ void tipc_link_push_packets(struct tipc_link *l_ptr)
 			msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
 			if (msg_user(msg) == MSG_BUNDLER)
 				TIPC_SKB_CB(skb)->bundling = false;
-			tipc_bearer_send(l_ptr->bearer_id, skb,
+			tipc_bearer_send(l_ptr->owner->net,
+					 l_ptr->bearer_id, skb,
 					 &l_ptr->media_addr);
 			l_ptr->next_out = tipc_skb_queue_next(outqueue, skb);
 		} else {
@@ -997,7 +1005,8 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *skb,
 		msg = buf_msg(skb);
 		msg_set_ack(msg, mod(l_ptr->next_in_no - 1));
 		msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
-		tipc_bearer_send(l_ptr->bearer_id, skb, &l_ptr->media_addr);
+		tipc_bearer_send(l_ptr->owner->net, l_ptr->bearer_id, skb,
+				 &l_ptr->media_addr);
 		retransmits--;
 		l_ptr->stats.retransmitted++;
 	}
@@ -1459,7 +1468,8 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg,
 	skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg));
 	buf->priority = TC_PRIO_CONTROL;
 
-	tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr);
+	tipc_bearer_send(l_ptr->owner->net, l_ptr->bearer_id, buf,
+			 &l_ptr->media_addr);
 	l_ptr->unacked_window = 0;
 	kfree_skb(buf);
 }
@@ -2037,7 +2047,7 @@ static int link_cmd_set_value(struct net *net, const char *name, u32 new_value,
 		return res;
 	}
 
-	b_ptr = tipc_bearer_find(name);
+	b_ptr = tipc_bearer_find(net, name);
 	if (b_ptr) {
 		switch (cmd) {
 		case TIPC_CMD_SET_LINK_TOL:
@@ -2295,10 +2305,11 @@ struct sk_buff *tipc_link_cmd_show_stats(struct net *net,
 
 static void link_print(struct tipc_link *l_ptr, const char *str)
 {
+	struct tipc_net *tn = net_generic(l_ptr->owner->net, tipc_net_id);
 	struct tipc_bearer *b_ptr;
 
 	rcu_read_lock();
-	b_ptr = rcu_dereference_rtnl(bearer_list[l_ptr->bearer_id]);
+	b_ptr = rcu_dereference_rtnl(tn->bearer_list[l_ptr->bearer_id]);
 	if (b_ptr)
 		pr_info("%s Link %x<%s>:", str, l_ptr->addr, b_ptr->name);
 	rcu_read_unlock();
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 380e27ee0f70..9df7fa4d3bdd 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -223,7 +223,8 @@ int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest,
 		       u32 selector);
 int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dest,
 		   u32 selector);
-int __tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list);
+int __tipc_link_xmit(struct net *net, struct tipc_link *link,
+		     struct sk_buff_head *list);
 void tipc_link_bundle_rcv(struct net *net, struct sk_buff *buf);
 void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob,
 			  u32 gap, u32 tolerance, u32 priority, u32 acked_mtu);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index de18aacf3d64..38633e5f8a7d 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -117,7 +117,7 @@ int tipc_net_start(struct net *net, u32 addr)
 	tipc_own_addr = addr;
 	tipc_named_reinit();
 	tipc_sk_reinit();
-	res = tipc_bclink_init();
+	res = tipc_bclink_init(net);
 	if (res)
 		return res;
 
@@ -140,7 +140,7 @@ void tipc_net_stop(struct net *net)
 			      tipc_own_addr);
 	rtnl_lock();
 	tipc_bearer_stop(net);
-	tipc_bclink_stop();
+	tipc_bclink_stop(net);
 	tipc_node_stop(net);
 	rtnl_unlock();
 
-- 
cgit v1.2.3


From 1da465683a93142488a54a9038155f23d6349441 Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Fri, 9 Jan 2015 15:27:07 +0800
Subject: tipc: make tipc broadcast link support net namespace

TIPC broadcast link is statically established and its relevant states
are maintained with the global variables: "bcbearer", "bclink" and
"bcl". Allowing different namespace to own different broadcast link
instances, these variables must be moved to tipc_net structure and
broadcast link instances would be allocated and initialized when
namespace is created.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Tested-by: Tero Aho <Tero.Aho@coriant.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/addr.c       |   2 +-
 net/tipc/addr.h       |   3 +
 net/tipc/bcast.c      | 297 ++++++++++++++++++++++++--------------------------
 net/tipc/bcast.h      |  97 ++++++++++++-----
 net/tipc/bearer.c     |   7 +-
 net/tipc/bearer.h     |  21 +++-
 net/tipc/core.h       |  10 ++
 net/tipc/link.c       |  17 +--
 net/tipc/msg.h        |   2 +-
 net/tipc/name_table.c |   1 +
 net/tipc/net.c        |   1 +
 net/tipc/node.c       |   6 +-
 12 files changed, 259 insertions(+), 205 deletions(-)

(limited to 'net')

diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index 357b74b26f9e..9e6eeb450fe1 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -34,7 +34,7 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include "core.h"
+#include <linux/kernel.h>
 #include "addr.h"
 
 /**
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index 3e1f18e29f1e..4e364c4f1359 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -37,6 +37,9 @@
 #ifndef _TIPC_ADDR_H
 #define _TIPC_ADDR_H
 
+#include <linux/types.h>
+#include <linux/tipc.h>
+
 #define TIPC_ZONE_MASK		0xff000000u
 #define TIPC_CLUSTER_MASK	0xfffff000u
 
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index e7c538304595..bc58097ebad2 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -35,77 +35,14 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include "core.h"
-#include "link.h"
 #include "socket.h"
 #include "msg.h"
 #include "bcast.h"
 #include "name_distr.h"
+#include "core.h"
 
 #define	MAX_PKT_DEFAULT_MCAST	1500	/* bcast link max packet size (fixed) */
 #define	BCLINK_WIN_DEFAULT	20	/* bcast link window size (default) */
-#define	BCBEARER		MAX_BEARERS
-
-/**
- * struct tipc_bcbearer_pair - a pair of bearers used by broadcast link
- * @primary: pointer to primary bearer
- * @secondary: pointer to secondary bearer
- *
- * Bearers must have same priority and same set of reachable destinations
- * to be paired.
- */
-
-struct tipc_bcbearer_pair {
-	struct tipc_bearer *primary;
-	struct tipc_bearer *secondary;
-};
-
-/**
- * struct tipc_bcbearer - bearer used by broadcast link
- * @bearer: (non-standard) broadcast bearer structure
- * @media: (non-standard) broadcast media structure
- * @bpairs: array of bearer pairs
- * @bpairs_temp: temporary array of bearer pairs used by tipc_bcbearer_sort()
- * @remains: temporary node map used by tipc_bcbearer_send()
- * @remains_new: temporary node map used tipc_bcbearer_send()
- *
- * Note: The fields labelled "temporary" are incorporated into the bearer
- * to avoid consuming potentially limited stack space through the use of
- * large local variables within multicast routines.  Concurrent access is
- * prevented through use of the spinlock "bclink_lock".
- */
-struct tipc_bcbearer {
-	struct tipc_bearer bearer;
-	struct tipc_media media;
-	struct tipc_bcbearer_pair bpairs[MAX_BEARERS];
-	struct tipc_bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1];
-	struct tipc_node_map remains;
-	struct tipc_node_map remains_new;
-};
-
-/**
- * struct tipc_bclink - link used for broadcast messages
- * @lock: spinlock governing access to structure
- * @link: (non-standard) broadcast link structure
- * @node: (non-standard) node structure representing b'cast link's peer node
- * @flags: represent bclink states
- * @bcast_nodes: map of broadcast-capable nodes
- * @retransmit_to: node that most recently requested a retransmit
- *
- * Handles sequence numbering, fragmentation, bundling, etc.
- */
-struct tipc_bclink {
-	spinlock_t lock;
-	struct tipc_link link;
-	struct tipc_node node;
-	unsigned int flags;
-	struct tipc_node_map bcast_nodes;
-	struct tipc_node *retransmit_to;
-};
-
-static struct tipc_bcbearer *bcbearer;
-static struct tipc_bclink *bclink;
-static struct tipc_link *bcl;
 
 const char tipc_bclink_name[] = "broadcast-link";
 
@@ -115,25 +52,28 @@ static void tipc_nmap_diff(struct tipc_node_map *nm_a,
 static void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node);
 static void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node);
 
-static void tipc_bclink_lock(void)
+static void tipc_bclink_lock(struct net *net)
 {
-	spin_lock_bh(&bclink->lock);
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
+
+	spin_lock_bh(&tn->bclink->lock);
 }
 
-static void tipc_bclink_unlock(void)
+static void tipc_bclink_unlock(struct net *net)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_node *node = NULL;
 
-	if (likely(!bclink->flags)) {
-		spin_unlock_bh(&bclink->lock);
+	if (likely(!tn->bclink->flags)) {
+		spin_unlock_bh(&tn->bclink->lock);
 		return;
 	}
 
-	if (bclink->flags & TIPC_BCLINK_RESET) {
-		bclink->flags &= ~TIPC_BCLINK_RESET;
-		node = tipc_bclink_retransmit_to();
+	if (tn->bclink->flags & TIPC_BCLINK_RESET) {
+		tn->bclink->flags &= ~TIPC_BCLINK_RESET;
+		node = tipc_bclink_retransmit_to(net);
 	}
-	spin_unlock_bh(&bclink->lock);
+	spin_unlock_bh(&tn->bclink->lock);
 
 	if (node)
 		tipc_link_reset_all(node);
@@ -144,9 +84,11 @@ uint  tipc_bclink_get_mtu(void)
 	return MAX_PKT_DEFAULT_MCAST;
 }
 
-void tipc_bclink_set_flags(unsigned int flags)
+void tipc_bclink_set_flags(struct net *net, unsigned int flags)
 {
-	bclink->flags |= flags;
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
+
+	tn->bclink->flags |= flags;
 }
 
 static u32 bcbuf_acks(struct sk_buff *buf)
@@ -164,31 +106,40 @@ static void bcbuf_decr_acks(struct sk_buff *buf)
 	bcbuf_set_acks(buf, bcbuf_acks(buf) - 1);
 }
 
-void tipc_bclink_add_node(u32 addr)
+void tipc_bclink_add_node(struct net *net, u32 addr)
 {
-	tipc_bclink_lock();
-	tipc_nmap_add(&bclink->bcast_nodes, addr);
-	tipc_bclink_unlock();
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
+
+	tipc_bclink_lock(net);
+	tipc_nmap_add(&tn->bclink->bcast_nodes, addr);
+	tipc_bclink_unlock(net);
 }
 
-void tipc_bclink_remove_node(u32 addr)
+void tipc_bclink_remove_node(struct net *net, u32 addr)
 {
-	tipc_bclink_lock();
-	tipc_nmap_remove(&bclink->bcast_nodes, addr);
-	tipc_bclink_unlock();
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
+
+	tipc_bclink_lock(net);
+	tipc_nmap_remove(&tn->bclink->bcast_nodes, addr);
+	tipc_bclink_unlock(net);
 }
 
-static void bclink_set_last_sent(void)
+static void bclink_set_last_sent(struct net *net)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct tipc_link *bcl = tn->bcl;
+
 	if (bcl->next_out)
 		bcl->fsm_msg_cnt = mod(buf_seqno(bcl->next_out) - 1);
 	else
 		bcl->fsm_msg_cnt = mod(bcl->next_out_no - 1);
 }
 
-u32 tipc_bclink_get_last_sent(void)
+u32 tipc_bclink_get_last_sent(struct net *net)
 {
-	return bcl->fsm_msg_cnt;
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
+
+	return tn->bcl->fsm_msg_cnt;
 }
 
 static void bclink_update_last_sent(struct tipc_node *node, u32 seqno)
@@ -203,9 +154,11 @@ static void bclink_update_last_sent(struct tipc_node *node, u32 seqno)
  *
  * Called with bclink_lock locked
  */
-struct tipc_node *tipc_bclink_retransmit_to(void)
+struct tipc_node *tipc_bclink_retransmit_to(struct net *net)
 {
-	return bclink->retransmit_to;
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
+
+	return tn->bclink->retransmit_to;
 }
 
 /**
@@ -215,9 +168,10 @@ struct tipc_node *tipc_bclink_retransmit_to(void)
  *
  * Called with bclink_lock locked
  */
-static void bclink_retransmit_pkt(u32 after, u32 to)
+static void bclink_retransmit_pkt(struct tipc_net *tn, u32 after, u32 to)
 {
 	struct sk_buff *skb;
+	struct tipc_link *bcl = tn->bcl;
 
 	skb_queue_walk(&bcl->outqueue, skb) {
 		if (more(buf_seqno(skb), after)) {
@@ -234,9 +188,10 @@ static void bclink_retransmit_pkt(u32 after, u32 to)
  */
 void tipc_bclink_wakeup_users(struct net *net)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct sk_buff *skb;
 
-	while ((skb = skb_dequeue(&bclink->link.waiting_sks)))
+	while ((skb = skb_dequeue(&tn->bclink->link.waiting_sks)))
 		tipc_sk_rcv(net, skb);
 }
 
@@ -252,10 +207,12 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked)
 	struct sk_buff *skb, *tmp;
 	struct sk_buff *next;
 	unsigned int released = 0;
+	struct net *net = n_ptr->net;
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 
-	tipc_bclink_lock();
+	tipc_bclink_lock(net);
 	/* Bail out if tx queue is empty (no clean up is required) */
-	skb = skb_peek(&bcl->outqueue);
+	skb = skb_peek(&tn->bcl->outqueue);
 	if (!skb)
 		goto exit;
 
@@ -266,43 +223,43 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked)
 		 * acknowledge sent messages only (if other nodes still exist)
 		 * or both sent and unsent messages (otherwise)
 		 */
-		if (bclink->bcast_nodes.count)
-			acked = bcl->fsm_msg_cnt;
+		if (tn->bclink->bcast_nodes.count)
+			acked = tn->bcl->fsm_msg_cnt;
 		else
-			acked = bcl->next_out_no;
+			acked = tn->bcl->next_out_no;
 	} else {
 		/*
 		 * Bail out if specified sequence number does not correspond
 		 * to a message that has been sent and not yet acknowledged
 		 */
 		if (less(acked, buf_seqno(skb)) ||
-		    less(bcl->fsm_msg_cnt, acked) ||
+		    less(tn->bcl->fsm_msg_cnt, acked) ||
 		    less_eq(acked, n_ptr->bclink.acked))
 			goto exit;
 	}
 
 	/* Skip over packets that node has previously acknowledged */
-	skb_queue_walk(&bcl->outqueue, skb) {
+	skb_queue_walk(&tn->bcl->outqueue, skb) {
 		if (more(buf_seqno(skb), n_ptr->bclink.acked))
 			break;
 	}
 
 	/* Update packets that node is now acknowledging */
-	skb_queue_walk_from_safe(&bcl->outqueue, skb, tmp) {
+	skb_queue_walk_from_safe(&tn->bcl->outqueue, skb, tmp) {
 		if (more(buf_seqno(skb), acked))
 			break;
 
-		next = tipc_skb_queue_next(&bcl->outqueue, skb);
-		if (skb != bcl->next_out) {
+		next = tipc_skb_queue_next(&tn->bcl->outqueue, skb);
+		if (skb != tn->bcl->next_out) {
 			bcbuf_decr_acks(skb);
 		} else {
 			bcbuf_set_acks(skb, 0);
-			bcl->next_out = next;
-			bclink_set_last_sent();
+			tn->bcl->next_out = next;
+			bclink_set_last_sent(net);
 		}
 
 		if (bcbuf_acks(skb) == 0) {
-			__skb_unlink(skb, &bcl->outqueue);
+			__skb_unlink(skb, &tn->bcl->outqueue);
 			kfree_skb(skb);
 			released = 1;
 		}
@@ -310,15 +267,15 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked)
 	n_ptr->bclink.acked = acked;
 
 	/* Try resolving broadcast link congestion, if necessary */
-	if (unlikely(bcl->next_out)) {
-		tipc_link_push_packets(bcl);
-		bclink_set_last_sent();
+	if (unlikely(tn->bcl->next_out)) {
+		tipc_link_push_packets(tn->bcl);
+		bclink_set_last_sent(net);
 	}
-	if (unlikely(released && !skb_queue_empty(&bcl->waiting_sks)))
+	if (unlikely(released && !skb_queue_empty(&tn->bcl->waiting_sks)))
 		n_ptr->action_flags |= TIPC_WAKEUP_BCAST_USERS;
 
 exit:
-	tipc_bclink_unlock();
+	tipc_bclink_unlock(net);
 }
 
 /**
@@ -368,10 +325,10 @@ void tipc_bclink_update_link_state(struct net *net, struct tipc_node *n_ptr,
 		msg_set_bcgap_after(msg, n_ptr->bclink.last_in);
 		msg_set_bcgap_to(msg, to);
 
-		tipc_bclink_lock();
+		tipc_bclink_lock(net);
 		tipc_bearer_send(net, MAX_BEARERS, buf, NULL);
-		bcl->stats.sent_nacks++;
-		tipc_bclink_unlock();
+		tn->bcl->stats.sent_nacks++;
+		tipc_bclink_unlock(net);
 		kfree_skb(buf);
 
 		n_ptr->bclink.oos_state++;
@@ -410,6 +367,9 @@ static void bclink_peek_nack(struct net *net, struct tipc_msg *msg)
  */
 int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct tipc_link *bcl = tn->bcl;
+	struct tipc_bclink *bclink = tn->bclink;
 	int rc = 0;
 	int bc = 0;
 	struct sk_buff *skb;
@@ -423,19 +383,19 @@ int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list)
 
 	/* Broadcast to all other nodes */
 	if (likely(bclink)) {
-		tipc_bclink_lock();
+		tipc_bclink_lock(net);
 		if (likely(bclink->bcast_nodes.count)) {
 			rc = __tipc_link_xmit(net, bcl, list);
 			if (likely(!rc)) {
 				u32 len = skb_queue_len(&bcl->outqueue);
 
-				bclink_set_last_sent();
+				bclink_set_last_sent(net);
 				bcl->stats.queue_sz_counts++;
 				bcl->stats.accu_queue_sz += len;
 			}
 			bc = 1;
 		}
-		tipc_bclink_unlock();
+		tipc_bclink_unlock(net);
 	}
 
 	if (unlikely(!bc))
@@ -457,10 +417,12 @@ int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list)
  */
 static void bclink_accept_pkt(struct tipc_node *node, u32 seqno)
 {
+	struct tipc_net *tn = net_generic(node->net, tipc_net_id);
+
 	bclink_update_last_sent(node, seqno);
 	node->bclink.last_in = seqno;
 	node->bclink.oos_state = 0;
-	bcl->stats.recv_info++;
+	tn->bcl->stats.recv_info++;
 
 	/*
 	 * Unicast an ACK periodically, ensuring that
@@ -469,7 +431,7 @@ static void bclink_accept_pkt(struct tipc_node *node, u32 seqno)
 	if (((seqno - tipc_own_addr) % TIPC_MIN_LINK_WIN) == 0) {
 		tipc_link_proto_xmit(node->active_links[node->addr & 1],
 				     STATE_MSG, 0, 0, 0, 0, 0);
-		bcl->stats.sent_acks++;
+		tn->bcl->stats.sent_acks++;
 	}
 }
 
@@ -481,6 +443,7 @@ static void bclink_accept_pkt(struct tipc_node *node, u32 seqno)
 void tipc_bclink_rcv(struct net *net, struct sk_buff *buf)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct tipc_link *bcl = tn->bcl;
 	struct tipc_msg *msg = buf_msg(buf);
 	struct tipc_node *node;
 	u32 next_in;
@@ -506,12 +469,12 @@ void tipc_bclink_rcv(struct net *net, struct sk_buff *buf)
 		if (msg_destnode(msg) == tipc_own_addr) {
 			tipc_bclink_acknowledge(node, msg_bcast_ack(msg));
 			tipc_node_unlock(node);
-			tipc_bclink_lock();
+			tipc_bclink_lock(net);
 			bcl->stats.recv_nacks++;
-			bclink->retransmit_to = node;
-			bclink_retransmit_pkt(msg_bcgap_after(msg),
+			tn->bclink->retransmit_to = node;
+			bclink_retransmit_pkt(tn, msg_bcgap_after(msg),
 					      msg_bcgap_to(msg));
-			tipc_bclink_unlock();
+			tipc_bclink_unlock(net);
 		} else {
 			tipc_node_unlock(node);
 			bclink_peek_nack(net, msg);
@@ -527,47 +490,47 @@ void tipc_bclink_rcv(struct net *net, struct sk_buff *buf)
 receive:
 		/* Deliver message to destination */
 		if (likely(msg_isdata(msg))) {
-			tipc_bclink_lock();
+			tipc_bclink_lock(net);
 			bclink_accept_pkt(node, seqno);
-			tipc_bclink_unlock();
+			tipc_bclink_unlock(net);
 			tipc_node_unlock(node);
 			if (likely(msg_mcast(msg)))
 				tipc_sk_mcast_rcv(net, buf);
 			else
 				kfree_skb(buf);
 		} else if (msg_user(msg) == MSG_BUNDLER) {
-			tipc_bclink_lock();
+			tipc_bclink_lock(net);
 			bclink_accept_pkt(node, seqno);
 			bcl->stats.recv_bundles++;
 			bcl->stats.recv_bundled += msg_msgcnt(msg);
-			tipc_bclink_unlock();
+			tipc_bclink_unlock(net);
 			tipc_node_unlock(node);
 			tipc_link_bundle_rcv(net, buf);
 		} else if (msg_user(msg) == MSG_FRAGMENTER) {
 			tipc_buf_append(&node->bclink.reasm_buf, &buf);
 			if (unlikely(!buf && !node->bclink.reasm_buf))
 				goto unlock;
-			tipc_bclink_lock();
+			tipc_bclink_lock(net);
 			bclink_accept_pkt(node, seqno);
 			bcl->stats.recv_fragments++;
 			if (buf) {
 				bcl->stats.recv_fragmented++;
 				msg = buf_msg(buf);
-				tipc_bclink_unlock();
+				tipc_bclink_unlock(net);
 				goto receive;
 			}
-			tipc_bclink_unlock();
+			tipc_bclink_unlock(net);
 			tipc_node_unlock(node);
 		} else if (msg_user(msg) == NAME_DISTRIBUTOR) {
-			tipc_bclink_lock();
+			tipc_bclink_lock(net);
 			bclink_accept_pkt(node, seqno);
-			tipc_bclink_unlock();
+			tipc_bclink_unlock(net);
 			tipc_node_unlock(node);
 			tipc_named_rcv(net, buf);
 		} else {
-			tipc_bclink_lock();
+			tipc_bclink_lock(net);
 			bclink_accept_pkt(node, seqno);
-			tipc_bclink_unlock();
+			tipc_bclink_unlock(net);
 			tipc_node_unlock(node);
 			kfree_skb(buf);
 		}
@@ -605,14 +568,14 @@ receive:
 		buf = NULL;
 	}
 
-	tipc_bclink_lock();
+	tipc_bclink_lock(net);
 
 	if (deferred)
 		bcl->stats.deferred_recv++;
 	else
 		bcl->stats.duplicates++;
 
-	tipc_bclink_unlock();
+	tipc_bclink_unlock(net);
 
 unlock:
 	tipc_node_unlock(node);
@@ -623,7 +586,7 @@ exit:
 u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr)
 {
 	return (n_ptr->bclink.recv_permitted &&
-		(tipc_bclink_get_last_sent() != n_ptr->bclink.acked));
+		(tipc_bclink_get_last_sent(n_ptr->net) != n_ptr->bclink.acked));
 }
 
 
@@ -636,13 +599,15 @@ u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr)
  * Returns 0 (packet sent successfully) under all circumstances,
  * since the broadcast link's pseudo-bearer never blocks
  */
-static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1,
+static int tipc_bcbearer_send(struct net *net, struct sk_buff *buf,
+			      struct tipc_bearer *unused1,
 			      struct tipc_media_addr *unused2)
 {
 	int bp_index;
 	struct tipc_msg *msg = buf_msg(buf);
-	struct net *net = sock_net(buf->sk);
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct tipc_bcbearer *bcbearer = tn->bcbearer;
+	struct tipc_bclink *bclink = tn->bclink;
 
 	/* Prepare broadcast link message for reliable transmission,
 	 * if first time trying to send it;
@@ -653,7 +618,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1,
 		bcbuf_set_acks(buf, bclink->bcast_nodes.count);
 		msg_set_non_seq(msg, 1);
 		msg_set_mc_netid(msg, tn->net_id);
-		bcl->stats.sent_info++;
+		tn->bcl->stats.sent_info++;
 
 		if (WARN_ON(!bclink->bcast_nodes.count)) {
 			dump_stack();
@@ -708,13 +673,14 @@ void tipc_bcbearer_sort(struct net *net, struct tipc_node_map *nm_ptr,
 			u32 node, bool action)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct tipc_bcbearer *bcbearer = tn->bcbearer;
 	struct tipc_bcbearer_pair *bp_temp = bcbearer->bpairs_temp;
 	struct tipc_bcbearer_pair *bp_curr;
 	struct tipc_bearer *b;
 	int b_index;
 	int pri;
 
-	tipc_bclink_lock();
+	tipc_bclink_lock(net);
 
 	if (action)
 		tipc_nmap_add(nm_ptr, node);
@@ -761,7 +727,7 @@ void tipc_bcbearer_sort(struct net *net, struct tipc_node_map *nm_ptr,
 		bp_curr++;
 	}
 
-	tipc_bclink_unlock();
+	tipc_bclink_unlock(net);
 }
 
 static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb,
@@ -815,17 +781,19 @@ msg_full:
 	return -EMSGSIZE;
 }
 
-int tipc_nl_add_bc_link(struct tipc_nl_msg *msg)
+int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg)
 {
 	int err;
 	void *hdr;
 	struct nlattr *attrs;
 	struct nlattr *prop;
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct tipc_link *bcl = tn->bcl;
 
 	if (!bcl)
 		return 0;
 
-	tipc_bclink_lock();
+	tipc_bclink_lock(net);
 
 	hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_v2_family,
 			  NLM_F_MULTI, TIPC_NL_LINK_GET);
@@ -860,7 +828,7 @@ int tipc_nl_add_bc_link(struct tipc_nl_msg *msg)
 	if (err)
 		goto attr_msg_full;
 
-	tipc_bclink_unlock();
+	tipc_bclink_unlock(net);
 	nla_nest_end(msg->skb, attrs);
 	genlmsg_end(msg->skb, hdr);
 
@@ -871,21 +839,23 @@ prop_msg_full:
 attr_msg_full:
 	nla_nest_cancel(msg->skb, attrs);
 msg_full:
-	tipc_bclink_unlock();
+	tipc_bclink_unlock(net);
 	genlmsg_cancel(msg->skb, hdr);
 
 	return -EMSGSIZE;
 }
 
-int tipc_bclink_stats(char *buf, const u32 buf_size)
+int tipc_bclink_stats(struct net *net, char *buf, const u32 buf_size)
 {
 	int ret;
 	struct tipc_stats *s;
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct tipc_link *bcl = tn->bcl;
 
 	if (!bcl)
 		return 0;
 
-	tipc_bclink_lock();
+	tipc_bclink_lock(net);
 
 	s = &bcl->stats;
 
@@ -914,37 +884,46 @@ int tipc_bclink_stats(char *buf, const u32 buf_size)
 			     s->queue_sz_counts ?
 			     (s->accu_queue_sz / s->queue_sz_counts) : 0);
 
-	tipc_bclink_unlock();
+	tipc_bclink_unlock(net);
 	return ret;
 }
 
-int tipc_bclink_reset_stats(void)
+int tipc_bclink_reset_stats(struct net *net)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct tipc_link *bcl = tn->bcl;
+
 	if (!bcl)
 		return -ENOPROTOOPT;
 
-	tipc_bclink_lock();
+	tipc_bclink_lock(net);
 	memset(&bcl->stats, 0, sizeof(bcl->stats));
-	tipc_bclink_unlock();
+	tipc_bclink_unlock(net);
 	return 0;
 }
 
-int tipc_bclink_set_queue_limits(u32 limit)
+int tipc_bclink_set_queue_limits(struct net *net, u32 limit)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct tipc_link *bcl = tn->bcl;
+
 	if (!bcl)
 		return -ENOPROTOOPT;
 	if ((limit < TIPC_MIN_LINK_WIN) || (limit > TIPC_MAX_LINK_WIN))
 		return -EINVAL;
 
-	tipc_bclink_lock();
+	tipc_bclink_lock(net);
 	tipc_link_set_queue_limits(bcl, limit);
-	tipc_bclink_unlock();
+	tipc_bclink_unlock(net);
 	return 0;
 }
 
 int tipc_bclink_init(struct net *net)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct tipc_bcbearer *bcbearer;
+	struct tipc_bclink *bclink;
+	struct tipc_link *bcl;
 
 	bcbearer = kzalloc(sizeof(*bcbearer), GFP_ATOMIC);
 	if (!bcbearer)
@@ -969,12 +948,16 @@ int tipc_bclink_init(struct net *net)
 	spin_lock_init(&bclink->node.lock);
 	__skb_queue_head_init(&bclink->node.waiting_sks);
 	bcl->owner = &bclink->node;
+	bcl->owner->net = net;
 	bcl->max_pkt = MAX_PKT_DEFAULT_MCAST;
 	tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT);
 	bcl->bearer_id = MAX_BEARERS;
 	rcu_assign_pointer(tn->bearer_list[MAX_BEARERS], &bcbearer->bearer);
 	bcl->state = WORKING_WORKING;
 	strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME);
+	tn->bcbearer = bcbearer;
+	tn->bclink = bclink;
+	tn->bcl = bcl;
 	return 0;
 }
 
@@ -982,14 +965,14 @@ void tipc_bclink_stop(struct net *net)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
 
-	tipc_bclink_lock();
-	tipc_link_purge_queues(bcl);
-	tipc_bclink_unlock();
+	tipc_bclink_lock(net);
+	tipc_link_purge_queues(tn->bcl);
+	tipc_bclink_unlock(net);
 
 	RCU_INIT_POINTER(tn->bearer_list[BCBEARER], NULL);
 	synchronize_net();
-	kfree(bcbearer);
-	kfree(bclink);
+	kfree(tn->bcbearer);
+	kfree(tn->bclink);
 }
 
 /**
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 692efb662253..a4583a109486 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -37,23 +37,13 @@
 #ifndef _TIPC_BCAST_H
 #define _TIPC_BCAST_H
 
-#include "netlink.h"
+#include <linux/tipc_config.h>
+#include "link.h"
+#include "node.h"
 
-#define MAX_NODES 4096
-#define WSIZE 32
-#define TIPC_BCLINK_RESET 1
-
-/**
- * struct tipc_node_map - set of node identifiers
- * @count: # of nodes in set
- * @map: bitmap of node identifiers that are in the set
- */
-struct tipc_node_map {
-	u32 count;
-	u32 map[MAX_NODES / WSIZE];
-};
-
-#define PLSIZE 32
+#define TIPC_BCLINK_RESET	1
+#define PLSIZE			32
+#define	BCBEARER		MAX_BEARERS
 
 /**
  * struct tipc_port_list - set of node local destination ports
@@ -67,9 +57,64 @@ struct tipc_port_list {
 	u32 ports[PLSIZE];
 };
 
+/**
+ * struct tipc_bcbearer_pair - a pair of bearers used by broadcast link
+ * @primary: pointer to primary bearer
+ * @secondary: pointer to secondary bearer
+ *
+ * Bearers must have same priority and same set of reachable destinations
+ * to be paired.
+ */
 
-struct tipc_node;
+struct tipc_bcbearer_pair {
+	struct tipc_bearer *primary;
+	struct tipc_bearer *secondary;
+};
 
+/**
+ * struct tipc_bcbearer - bearer used by broadcast link
+ * @bearer: (non-standard) broadcast bearer structure
+ * @media: (non-standard) broadcast media structure
+ * @bpairs: array of bearer pairs
+ * @bpairs_temp: temporary array of bearer pairs used by tipc_bcbearer_sort()
+ * @remains: temporary node map used by tipc_bcbearer_send()
+ * @remains_new: temporary node map used tipc_bcbearer_send()
+ *
+ * Note: The fields labelled "temporary" are incorporated into the bearer
+ * to avoid consuming potentially limited stack space through the use of
+ * large local variables within multicast routines.  Concurrent access is
+ * prevented through use of the spinlock "bclink_lock".
+ */
+struct tipc_bcbearer {
+	struct tipc_bearer bearer;
+	struct tipc_media media;
+	struct tipc_bcbearer_pair bpairs[MAX_BEARERS];
+	struct tipc_bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1];
+	struct tipc_node_map remains;
+	struct tipc_node_map remains_new;
+};
+
+/**
+ * struct tipc_bclink - link used for broadcast messages
+ * @lock: spinlock governing access to structure
+ * @link: (non-standard) broadcast link structure
+ * @node: (non-standard) node structure representing b'cast link's peer node
+ * @flags: represent bclink states
+ * @bcast_nodes: map of broadcast-capable nodes
+ * @retransmit_to: node that most recently requested a retransmit
+ *
+ * Handles sequence numbering, fragmentation, bundling, etc.
+ */
+struct tipc_bclink {
+	spinlock_t lock;
+	struct tipc_link link;
+	struct tipc_node node;
+	unsigned int flags;
+	struct tipc_node_map bcast_nodes;
+	struct tipc_node *retransmit_to;
+};
+
+struct tipc_node;
 extern const char tipc_bclink_name[];
 
 /**
@@ -86,24 +131,24 @@ void tipc_port_list_free(struct tipc_port_list *pl_ptr);
 
 int tipc_bclink_init(struct net *net);
 void tipc_bclink_stop(struct net *net);
-void tipc_bclink_set_flags(unsigned int flags);
-void tipc_bclink_add_node(u32 addr);
-void tipc_bclink_remove_node(u32 addr);
-struct tipc_node *tipc_bclink_retransmit_to(void);
+void tipc_bclink_set_flags(struct net *tn, unsigned int flags);
+void tipc_bclink_add_node(struct net *net, u32 addr);
+void tipc_bclink_remove_node(struct net *net, u32 addr);
+struct tipc_node *tipc_bclink_retransmit_to(struct net *tn);
 void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked);
 void tipc_bclink_rcv(struct net *net, struct sk_buff *buf);
-u32  tipc_bclink_get_last_sent(void);
+u32  tipc_bclink_get_last_sent(struct net *net);
 u32  tipc_bclink_acks_missing(struct tipc_node *n_ptr);
 void tipc_bclink_update_link_state(struct net *net, struct tipc_node *n_ptr,
 				   u32 last_sent);
-int  tipc_bclink_stats(char *stats_buf, const u32 buf_size);
-int  tipc_bclink_reset_stats(void);
-int  tipc_bclink_set_queue_limits(u32 limit);
+int  tipc_bclink_stats(struct net *net, char *stats_buf, const u32 buf_size);
+int  tipc_bclink_reset_stats(struct net *net);
+int  tipc_bclink_set_queue_limits(struct net *net, u32 limit);
 void tipc_bcbearer_sort(struct net *net, struct tipc_node_map *nm_ptr,
 			u32 node, bool action);
 uint  tipc_bclink_get_mtu(void);
 int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list);
 void tipc_bclink_wakeup_users(struct net *net);
-int tipc_nl_add_bc_link(struct tipc_nl_msg *msg);
+int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg);
 
 #endif
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 2fe9dcb418d4..9a0d6ed5c96c 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -40,6 +40,7 @@
 #include "bearer.h"
 #include "link.h"
 #include "discover.h"
+#include "bcast.h"
 
 #define MAX_ADDR_STR 60
 
@@ -482,8 +483,8 @@ void tipc_disable_l2_media(struct tipc_bearer *b)
  * @b_ptr: the bearer through which the packet is to be sent
  * @dest: peer destination address
  */
-int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b,
-		     struct tipc_media_addr *dest)
+int tipc_l2_send_msg(struct net *net, struct sk_buff *buf,
+		     struct tipc_bearer *b, struct tipc_media_addr *dest)
 {
 	struct sk_buff *clone;
 	struct net_device *dev;
@@ -528,7 +529,7 @@ void tipc_bearer_send(struct net *net, u32 bearer_id, struct sk_buff *buf,
 	rcu_read_lock();
 	b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
 	if (likely(b_ptr))
-		b_ptr->media->send_msg(buf, b_ptr, dest);
+		b_ptr->media->send_msg(net, buf, b_ptr, dest);
 	rcu_read_unlock();
 }
 
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 01722d6b2058..c035e3e24764 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -37,12 +37,13 @@
 #ifndef _TIPC_BEARER_H
 #define _TIPC_BEARER_H
 
-#include "bcast.h"
 #include "netlink.h"
 #include <net/genetlink.h>
 
 #define MAX_BEARERS	2
 #define MAX_MEDIA	2
+#define MAX_NODES	4096
+#define WSIZE		32
 
 /* Identifiers associated with TIPC message header media address info
  * - address info field is 32 bytes long
@@ -58,6 +59,16 @@
 #define TIPC_MEDIA_TYPE_ETH	1
 #define TIPC_MEDIA_TYPE_IB	2
 
+/**
+ * struct tipc_node_map - set of node identifiers
+ * @count: # of nodes in set
+ * @map: bitmap of node identifiers that are in the set
+ */
+struct tipc_node_map {
+	u32 count;
+	u32 map[MAX_NODES / WSIZE];
+};
+
 /**
  * struct tipc_media_addr - destination address used by TIPC bearers
  * @value: address info (format defined by media)
@@ -89,7 +100,7 @@ struct tipc_bearer;
  * @name: media name
  */
 struct tipc_media {
-	int (*send_msg)(struct sk_buff *buf,
+	int (*send_msg)(struct net *net, struct sk_buff *buf,
 			struct tipc_bearer *b_ptr,
 			struct tipc_media_addr *dest);
 	int (*enable_media)(struct net *net, struct tipc_bearer *b_ptr);
@@ -157,8 +168,6 @@ struct tipc_bearer_names {
 	char if_name[TIPC_MAX_IF_NAME];
 };
 
-struct tipc_link;
-
 /*
  * TIPC routines available to supported media types
  */
@@ -193,8 +202,8 @@ void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a);
 struct sk_buff *tipc_media_get_names(void);
 int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b);
 void tipc_disable_l2_media(struct tipc_bearer *b);
-int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b,
-		     struct tipc_media_addr *dest);
+int tipc_l2_send_msg(struct net *net, struct sk_buff *buf,
+		     struct tipc_bearer *b, struct tipc_media_addr *dest);
 
 struct sk_buff *tipc_bearer_get_names(struct net *net);
 void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest);
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 75a332b1968e..3f6f9e07da99 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -61,6 +61,11 @@
 
 #include "node.h"
 #include "bearer.h"
+#include "bcast.h"
+#include "netlink.h"
+#include "link.h"
+#include "node.h"
+#include "msg.h"
 
 #define TIPC_MOD_VER "2.0.0"
 
@@ -91,6 +96,11 @@ struct tipc_net {
 
 	/* Bearer list */
 	struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1];
+
+	/* Broadcast link */
+	struct tipc_bcbearer *bcbearer;
+	struct tipc_bclink *bclink;
+	struct tipc_link *bcl;
 };
 
 #ifdef CONFIG_SYSCTL
diff --git a/net/tipc/link.c b/net/tipc/link.c
index be21e6ac73fe..a84d5c67997e 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -941,6 +941,7 @@ static void link_retransmit_failure(struct tipc_link *l_ptr,
 				    struct sk_buff *buf)
 {
 	struct tipc_msg *msg = buf_msg(buf);
+	struct net *net = l_ptr->owner->net;
 
 	pr_warn("Retransmission failure on link <%s>\n", l_ptr->name);
 
@@ -958,7 +959,7 @@ static void link_retransmit_failure(struct tipc_link *l_ptr,
 		pr_cont("Outstanding acks: %lu\n",
 			(unsigned long) TIPC_SKB_CB(buf)->handle);
 
-		n_ptr = tipc_bclink_retransmit_to();
+		n_ptr = tipc_bclink_retransmit_to(net);
 		tipc_node_lock(n_ptr);
 
 		tipc_addr_string_fill(addr_string, n_ptr->addr);
@@ -973,7 +974,7 @@ static void link_retransmit_failure(struct tipc_link *l_ptr,
 
 		tipc_node_unlock(n_ptr);
 
-		tipc_bclink_set_flags(TIPC_BCLINK_RESET);
+		tipc_bclink_set_flags(net, TIPC_BCLINK_RESET);
 		l_ptr->stale_count = 0;
 	}
 }
@@ -1404,7 +1405,7 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg,
 	msg_set_type(msg, msg_typ);
 	msg_set_net_plane(msg, l_ptr->net_plane);
 	msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
-	msg_set_last_bcast(msg, tipc_bclink_get_last_sent());
+	msg_set_last_bcast(msg, tipc_bclink_get_last_sent(l_ptr->owner->net));
 
 	if (msg_typ == STATE_MSG) {
 		u32 next_sent = mod(l_ptr->next_out_no);
@@ -2105,7 +2106,7 @@ struct sk_buff *tipc_link_cmd_config(struct net *net, const void *req_tlv_area,
 
 	if (!strcmp(args->name, tipc_bclink_name)) {
 		if ((cmd == TIPC_CMD_SET_LINK_WINDOW) &&
-		    (tipc_bclink_set_queue_limits(new_value) == 0))
+		    (tipc_bclink_set_queue_limits(net, new_value) == 0))
 			return tipc_cfg_reply_none();
 		return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
 						   " (cannot change setting on broadcast link)");
@@ -2143,7 +2144,7 @@ struct sk_buff *tipc_link_cmd_reset_stats(struct net *net,
 
 	link_name = (char *)TLV_DATA(req_tlv_area);
 	if (!strcmp(link_name, tipc_bclink_name)) {
-		if (tipc_bclink_reset_stats())
+		if (tipc_bclink_reset_stats(net))
 			return tipc_cfg_reply_error_string("link not found");
 		return tipc_cfg_reply_none();
 	}
@@ -2191,7 +2192,7 @@ static int tipc_link_stats(struct net *net, const char *name, char *buf,
 	int ret;
 
 	if (!strcmp(name, tipc_bclink_name))
-		return tipc_bclink_stats(buf, buf_size);
+		return tipc_bclink_stats(net, buf, buf_size);
 
 	node = tipc_link_find_owner(net, name, &bearer_id);
 	if (!node)
@@ -2640,7 +2641,7 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb)
 			prev_node = node->addr;
 		}
 	} else {
-		err = tipc_nl_add_bc_link(&msg);
+		err = tipc_nl_add_bc_link(net, &msg);
 		if (err)
 			goto out;
 
@@ -2739,7 +2740,7 @@ int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info)
 	link_name = nla_data(attrs[TIPC_NLA_LINK_NAME]);
 
 	if (strcmp(link_name, tipc_bclink_name) == 0) {
-		err = tipc_bclink_reset_stats();
+		err = tipc_bclink_reset_stats(net);
 		if (err)
 			return err;
 		return 0;
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 0065a2e8ad9b..75f324287244 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -37,7 +37,7 @@
 #ifndef _TIPC_MSG_H
 #define _TIPC_MSG_H
 
-#include "bearer.h"
+#include <linux/tipc.h>
 
 /*
  * Constants and routines used to read and write TIPC payload message headers
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index cf177907bb53..beed5fdda004 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -39,6 +39,7 @@
 #include "name_table.h"
 #include "name_distr.h"
 #include "subscr.h"
+#include "bcast.h"
 
 #define TIPC_NAMETBL_SIZE 1024		/* must be a power of 2 */
 
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 38633e5f8a7d..7548ba80d289 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -41,6 +41,7 @@
 #include "socket.h"
 #include "node.h"
 #include "config.h"
+#include "bcast.h"
 
 static const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = {
 	[TIPC_NLA_NET_UNSPEC]	= { .type = NLA_UNSPEC },
diff --git a/net/tipc/node.c b/net/tipc/node.c
index a0ca1ac53119..3db501260de1 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -368,8 +368,8 @@ static void node_established_contact(struct tipc_node *n_ptr)
 {
 	n_ptr->action_flags |= TIPC_NOTIFY_NODE_UP;
 	n_ptr->bclink.oos_state = 0;
-	n_ptr->bclink.acked = tipc_bclink_get_last_sent();
-	tipc_bclink_add_node(n_ptr->addr);
+	n_ptr->bclink.acked = tipc_bclink_get_last_sent(n_ptr->net);
+	tipc_bclink_add_node(n_ptr->net, n_ptr->addr);
 }
 
 static void node_lost_contact(struct tipc_node *n_ptr)
@@ -389,7 +389,7 @@ static void node_lost_contact(struct tipc_node *n_ptr)
 			n_ptr->bclink.reasm_buf = NULL;
 		}
 
-		tipc_bclink_remove_node(n_ptr->addr);
+		tipc_bclink_remove_node(n_ptr->net, n_ptr->addr);
 		tipc_bclink_acknowledge(n_ptr, INVALID_LINK_SEQ);
 
 		n_ptr->bclink.recv_permitted = false;
-- 
cgit v1.2.3


From e05b31f4bf8994d49322e9afb004ad479a129db0 Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Fri, 9 Jan 2015 15:27:08 +0800
Subject: tipc: make tipc socket support net namespace

Now tipc socket table is statically allocated as a global variable.
Through it, we can look up one socket instance with port ID, insert
a new socket instance to the table, and delete a socket from the
table. But when tipc supports net namespace, each namespace must own
its specific socket table. So the global variable of socket table
must be redefined in tipc_net structure. As a concequence, a new
socket table will be allocated when a new namespace is created, and
a socket table will be deallocated when namespace is destroyed.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Tested-by: Tero Aho <Tero.Aho@coriant.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/config.c |  2 +-
 net/tipc/core.c   | 12 ++++--------
 net/tipc/core.h   |  4 ++++
 net/tipc/net.c    |  2 +-
 net/tipc/socket.c | 48 +++++++++++++++++++++++++++++-------------------
 net/tipc/socket.h |  8 ++++----
 6 files changed, 43 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/net/tipc/config.c b/net/tipc/config.c
index ac73291d0d32..20b1c5812f00 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -257,7 +257,7 @@ struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
 		rep_tlv_buf = tipc_media_get_names();
 		break;
 	case TIPC_CMD_SHOW_PORTS:
-		rep_tlv_buf = tipc_sk_socks_show();
+		rep_tlv_buf = tipc_sk_socks_show(net);
 		break;
 	case TIPC_CMD_SHOW_STATS:
 		rep_tlv_buf = tipc_show_stats();
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 7b8443938caf..23ff3caa1ce6 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -55,17 +55,20 @@ int sysctl_tipc_rmem[3] __read_mostly;	/* min/default/max */
 static int __net_init tipc_init_net(struct net *net)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	int err;
 
 	tn->net_id = 4711;
 	INIT_LIST_HEAD(&tn->node_list);
 	spin_lock_init(&tn->node_list_lock);
 
-	return 0;
+	err = tipc_sk_rht_init(net);
+	return err;
 }
 
 static void __net_exit tipc_exit_net(struct net *net)
 {
 	tipc_net_stop(net);
+	tipc_sk_rht_destroy(net);
 }
 
 static struct pernet_operations tipc_net_ops = {
@@ -95,10 +98,6 @@ static int __init tipc_init(void)
 	if (err)
 		goto out_pernet;
 
-	err = tipc_sk_rht_init();
-	if (err)
-		goto out_reftbl;
-
 	err = tipc_nametbl_init();
 	if (err)
 		goto out_nametbl;
@@ -136,8 +135,6 @@ out_socket:
 out_netlink:
 	tipc_nametbl_stop();
 out_nametbl:
-	tipc_sk_rht_destroy();
-out_reftbl:
 	unregister_pernet_subsys(&tipc_net_ops);
 out_pernet:
 	pr_err("Unable to start in single node mode\n");
@@ -153,7 +150,6 @@ static void __exit tipc_exit(void)
 	tipc_nametbl_stop();
 	tipc_socket_stop();
 	tipc_unregister_sysctl();
-	tipc_sk_rht_destroy();
 
 	pr_info("Deactivated\n");
 }
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 3f6f9e07da99..1a7f81643668 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -58,6 +58,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/etherdevice.h>
 #include <net/netns/generic.h>
+#include <linux/rhashtable.h>
 
 #include "node.h"
 #include "bearer.h"
@@ -101,6 +102,9 @@ struct tipc_net {
 	struct tipc_bcbearer *bcbearer;
 	struct tipc_bclink *bclink;
 	struct tipc_link *bcl;
+
+	/* Socket hash table */
+	struct rhashtable sk_rht;
 };
 
 #ifdef CONFIG_SYSCTL
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 7548ba80d289..44ccf47c79a3 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -117,7 +117,7 @@ int tipc_net_start(struct net *net, u32 addr)
 
 	tipc_own_addr = addr;
 	tipc_named_reinit();
-	tipc_sk_reinit();
+	tipc_sk_reinit(net);
 	res = tipc_bclink_init(net);
 	if (res)
 		return res;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 68831453bc0e..accb02cb3527 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -115,7 +115,7 @@ static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
 			   struct tipc_name_seq const *seq);
 static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
 			    struct tipc_name_seq const *seq);
-static struct tipc_sock *tipc_sk_lookup(u32 portid);
+static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid);
 static int tipc_sk_insert(struct tipc_sock *tsk);
 static void tipc_sk_remove(struct tipc_sock *tsk);
 
@@ -179,9 +179,6 @@ static const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = {
  *   - port reference
  */
 
-/* Protects tipc socket hash table mutations */
-static struct rhashtable tipc_sk_rht;
-
 static u32 tsk_peer_node(struct tipc_sock *tsk)
 {
 	return msg_destnode(&tsk->phdr);
@@ -1766,7 +1763,7 @@ int tipc_sk_rcv(struct net *net, struct sk_buff *skb)
 	u32 dnode;
 
 	/* Validate destination and message */
-	tsk = tipc_sk_lookup(dport);
+	tsk = tipc_sk_lookup(net, dport);
 	if (unlikely(!tsk)) {
 		rc = tipc_msg_eval(skb, &dnode);
 		goto exit;
@@ -2245,8 +2242,9 @@ static int tipc_sk_show(struct tipc_sock *tsk, char *buf,
 	return ret;
 }
 
-struct sk_buff *tipc_sk_socks_show(void)
+struct sk_buff *tipc_sk_socks_show(struct net *net)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	const struct bucket_table *tbl;
 	struct rhash_head *pos;
 	struct sk_buff *buf;
@@ -2265,7 +2263,7 @@ struct sk_buff *tipc_sk_socks_show(void)
 	pb_len = ULTRA_STRING_MAX_LEN;
 
 	rcu_read_lock();
-	tbl = rht_dereference_rcu((&tipc_sk_rht)->tbl, &tipc_sk_rht);
+	tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht);
 	for (i = 0; i < tbl->size; i++) {
 		rht_for_each_entry_rcu(tsk, pos, tbl, i, node) {
 			spin_lock_bh(&tsk->sk.sk_lock.slock);
@@ -2286,8 +2284,9 @@ struct sk_buff *tipc_sk_socks_show(void)
 /* tipc_sk_reinit: set non-zero address in all existing sockets
  *                 when we go from standalone to network mode.
  */
-void tipc_sk_reinit(void)
+void tipc_sk_reinit(struct net *net)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	const struct bucket_table *tbl;
 	struct rhash_head *pos;
 	struct tipc_sock *tsk;
@@ -2295,7 +2294,7 @@ void tipc_sk_reinit(void)
 	int i;
 
 	rcu_read_lock();
-	tbl = rht_dereference_rcu((&tipc_sk_rht)->tbl, &tipc_sk_rht);
+	tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht);
 	for (i = 0; i < tbl->size; i++) {
 		rht_for_each_entry_rcu(tsk, pos, tbl, i, node) {
 			spin_lock_bh(&tsk->sk.sk_lock.slock);
@@ -2308,12 +2307,13 @@ void tipc_sk_reinit(void)
 	rcu_read_unlock();
 }
 
-static struct tipc_sock *tipc_sk_lookup(u32 portid)
+static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_sock *tsk;
 
 	rcu_read_lock();
-	tsk = rhashtable_lookup(&tipc_sk_rht, &portid);
+	tsk = rhashtable_lookup(&tn->sk_rht, &portid);
 	if (tsk)
 		sock_hold(&tsk->sk);
 	rcu_read_unlock();
@@ -2323,6 +2323,9 @@ static struct tipc_sock *tipc_sk_lookup(u32 portid)
 
 static int tipc_sk_insert(struct tipc_sock *tsk)
 {
+	struct sock *sk = &tsk->sk;
+	struct net *net = sock_net(sk);
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	u32 remaining = (TIPC_MAX_PORT - TIPC_MIN_PORT) + 1;
 	u32 portid = prandom_u32() % remaining + TIPC_MIN_PORT;
 
@@ -2332,7 +2335,7 @@ static int tipc_sk_insert(struct tipc_sock *tsk)
 			portid = TIPC_MIN_PORT;
 		tsk->portid = portid;
 		sock_hold(&tsk->sk);
-		if (rhashtable_lookup_insert(&tipc_sk_rht, &tsk->node))
+		if (rhashtable_lookup_insert(&tn->sk_rht, &tsk->node))
 			return 0;
 		sock_put(&tsk->sk);
 	}
@@ -2343,15 +2346,17 @@ static int tipc_sk_insert(struct tipc_sock *tsk)
 static void tipc_sk_remove(struct tipc_sock *tsk)
 {
 	struct sock *sk = &tsk->sk;
+	struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id);
 
-	if (rhashtable_remove(&tipc_sk_rht, &tsk->node)) {
+	if (rhashtable_remove(&tn->sk_rht, &tsk->node)) {
 		WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
 		__sock_put(sk);
 	}
 }
 
-int tipc_sk_rht_init(void)
+int tipc_sk_rht_init(struct net *net)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct rhashtable_params rht_params = {
 		.nelem_hint = 192,
 		.head_offset = offsetof(struct tipc_sock, node),
@@ -2364,15 +2369,17 @@ int tipc_sk_rht_init(void)
 		.shrink_decision = rht_shrink_below_30,
 	};
 
-	return rhashtable_init(&tipc_sk_rht, &rht_params);
+	return rhashtable_init(&tn->sk_rht, &rht_params);
 }
 
-void tipc_sk_rht_destroy(void)
+void tipc_sk_rht_destroy(struct net *net)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
+
 	/* Wait for socket readers to complete */
 	synchronize_net();
 
-	rhashtable_destroy(&tipc_sk_rht);
+	rhashtable_destroy(&tn->sk_rht);
 }
 
 /**
@@ -2730,10 +2737,12 @@ int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	struct rhash_head *pos;
 	u32 prev_portid = cb->args[0];
 	u32 portid = prev_portid;
+	struct net *net = sock_net(skb->sk);
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	int i;
 
 	rcu_read_lock();
-	tbl = rht_dereference_rcu((&tipc_sk_rht)->tbl, &tipc_sk_rht);
+	tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht);
 	for (i = 0; i < tbl->size; i++) {
 		rht_for_each_entry_rcu(tsk, pos, tbl, i, node) {
 			spin_lock_bh(&tsk->sk.sk_lock.slock);
@@ -2839,6 +2848,7 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	u32 tsk_portid = cb->args[0];
 	u32 last_publ = cb->args[1];
 	u32 done = cb->args[2];
+	struct net *net = sock_net(skb->sk);
 	struct tipc_sock *tsk;
 
 	if (!tsk_portid) {
@@ -2864,7 +2874,7 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	if (done)
 		return 0;
 
-	tsk = tipc_sk_lookup(tsk_portid);
+	tsk = tipc_sk_lookup(net, tsk_portid);
 	if (!tsk)
 		return -EINVAL;
 
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index eb15c3107920..c15c4e121fe3 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h
@@ -50,11 +50,11 @@ void tipc_sock_release_local(struct socket *sock);
 int tipc_sock_accept_local(struct socket *sock, struct socket **newsock,
 			   int flags);
 int tipc_sk_rcv(struct net *net, struct sk_buff *buf);
-struct sk_buff *tipc_sk_socks_show(void);
+struct sk_buff *tipc_sk_socks_show(struct net *net);
 void tipc_sk_mcast_rcv(struct net *net, struct sk_buff *buf);
-void tipc_sk_reinit(void);
-int tipc_sk_rht_init(void);
-void tipc_sk_rht_destroy(void);
+void tipc_sk_reinit(struct net *net);
+int tipc_sk_rht_init(struct net *net);
+void tipc_sk_rht_destroy(struct net *net);
 int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb);
 
-- 
cgit v1.2.3


From 4ac1c8d0ee9faf3a4be185cc4db1381fa0d81280 Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Fri, 9 Jan 2015 15:27:09 +0800
Subject: tipc: name tipc name table support net namespace

TIPC name table is used to store the mapping relationship between
TIPC service name and socket port ID. When tipc supports namespace,
it allows users to publish service names only owned by a certain
namespace. Therefore, every namespace must have its private name
table to prevent service names published to one namespace from being
contaminated by other service names in another namespace. Therefore,
The name table global variable (ie, nametbl) and its lock must be
moved to tipc_net structure, and a parameter of namespace must be
added for necessary functions so that they can obtain name table
variable defined in tipc_net structure.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Tested-by: Tero Aho <Tero.Aho@coriant.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/config.c     |   3 +-
 net/tipc/core.c       |  19 +++++---
 net/tipc/core.h       |   4 ++
 net/tipc/msg.c        |   4 +-
 net/tipc/msg.h        |   2 +-
 net/tipc/name_distr.c |  37 +++++++++------
 net/tipc/name_distr.h |   4 +-
 net/tipc/name_table.c | 128 ++++++++++++++++++++++++++++----------------------
 net/tipc/name_table.h |  25 +++++-----
 net/tipc/net.c        |   2 +-
 net/tipc/server.c     |   3 +-
 net/tipc/server.h     |  10 ++--
 net/tipc/socket.c     |  11 ++---
 net/tipc/subscr.c     |  16 ++++---
 net/tipc/subscr.h     |   2 +
 15 files changed, 154 insertions(+), 116 deletions(-)

(limited to 'net')

diff --git a/net/tipc/config.c b/net/tipc/config.c
index 20b1c5812f00..974723a1e32e 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -248,7 +248,8 @@ struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
 							req_tlv_space);
 		break;
 	case TIPC_CMD_SHOW_NAME_TABLE:
-		rep_tlv_buf = tipc_nametbl_get(req_tlv_area, req_tlv_space);
+		rep_tlv_buf = tipc_nametbl_get(net, req_tlv_area,
+					       req_tlv_space);
 		break;
 	case TIPC_CMD_GET_BEARER_NAMES:
 		rep_tlv_buf = tipc_bearer_get_names(net);
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 23ff3caa1ce6..63cde8148aaf 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -62,12 +62,24 @@ static int __net_init tipc_init_net(struct net *net)
 	spin_lock_init(&tn->node_list_lock);
 
 	err = tipc_sk_rht_init(net);
+	if (err)
+		goto out_sk_rht;
+
+	err = tipc_nametbl_init(net);
+	if (err)
+		goto out_nametbl;
+	return 0;
+
+out_nametbl:
+	tipc_sk_rht_destroy(net);
+out_sk_rht:
 	return err;
 }
 
 static void __net_exit tipc_exit_net(struct net *net)
 {
 	tipc_net_stop(net);
+	tipc_nametbl_stop(net);
 	tipc_sk_rht_destroy(net);
 }
 
@@ -98,10 +110,6 @@ static int __init tipc_init(void)
 	if (err)
 		goto out_pernet;
 
-	err = tipc_nametbl_init();
-	if (err)
-		goto out_nametbl;
-
 	err = tipc_netlink_start();
 	if (err)
 		goto out_netlink;
@@ -133,8 +141,6 @@ out_sysctl:
 out_socket:
 	tipc_netlink_stop();
 out_netlink:
-	tipc_nametbl_stop();
-out_nametbl:
 	unregister_pernet_subsys(&tipc_net_ops);
 out_pernet:
 	pr_err("Unable to start in single node mode\n");
@@ -147,7 +153,6 @@ static void __exit tipc_exit(void)
 	tipc_bearer_cleanup();
 	tipc_netlink_stop();
 	tipc_subscr_stop();
-	tipc_nametbl_stop();
 	tipc_socket_stop();
 	tipc_unregister_sysctl();
 
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 1a7f81643668..893992944570 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -105,6 +105,10 @@ struct tipc_net {
 
 	/* Socket hash table */
 	struct rhashtable sk_rht;
+
+	/* Name table */
+	spinlock_t nametbl_lock;
+	struct name_table *nametbl;
 };
 
 #ifdef CONFIG_SYSCTL
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index a38f6a680df1..642fb137463c 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -426,7 +426,7 @@ exit:
  * Returns 0 (TIPC_OK) if message ok and we can try again, -TIPC error
  * code if message to be rejected
  */
-int tipc_msg_eval(struct sk_buff *buf, u32 *dnode)
+int tipc_msg_eval(struct net *net, struct sk_buff *buf, u32 *dnode)
 {
 	struct tipc_msg *msg = buf_msg(buf);
 	u32 dport;
@@ -441,7 +441,7 @@ int tipc_msg_eval(struct sk_buff *buf, u32 *dnode)
 		return -TIPC_ERR_NO_NAME;
 
 	*dnode = addr_domain(msg_lookup_scope(msg));
-	dport = tipc_nametbl_translate(msg_nametype(msg),
+	dport = tipc_nametbl_translate(net, msg_nametype(msg),
 				       msg_nameinst(msg),
 				       dnode);
 	if (!dport)
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 75f324287244..69f37e652a8e 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -749,7 +749,7 @@ static inline u32 msg_tot_origport(struct tipc_msg *m)
 
 struct sk_buff *tipc_buf_acquire(u32 size);
 bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err);
-int tipc_msg_eval(struct sk_buff *buf, u32 *dnode);
+int tipc_msg_eval(struct net *net, struct sk_buff *buf, u32 *dnode);
 void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize,
 		   u32 destnode);
 struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz,
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index d40df588263e..ba421321d15d 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -109,13 +109,14 @@ void named_cluster_distribute(struct net *net, struct sk_buff *skb)
 /**
  * tipc_named_publish - tell other nodes about a new publication by this node
  */
-struct sk_buff *tipc_named_publish(struct publication *publ)
+struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct sk_buff *buf;
 	struct distr_item *item;
 
 	list_add_tail_rcu(&publ->local_list,
-			  &tipc_nametbl->publ_list[publ->scope]);
+			  &tn->nametbl->publ_list[publ->scope]);
 
 	if (publ->scope == TIPC_NODE_SCOPE)
 		return NULL;
@@ -206,15 +207,16 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
  */
 void tipc_named_node_up(struct net *net, u32 dnode)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct sk_buff_head head;
 
 	__skb_queue_head_init(&head);
 
 	rcu_read_lock();
 	named_distribute(net, &head, dnode,
-			 &tipc_nametbl->publ_list[TIPC_CLUSTER_SCOPE]);
+			 &tn->nametbl->publ_list[TIPC_CLUSTER_SCOPE]);
 	named_distribute(net, &head, dnode,
-			 &tipc_nametbl->publ_list[TIPC_ZONE_SCOPE]);
+			 &tn->nametbl->publ_list[TIPC_ZONE_SCOPE]);
 	rcu_read_unlock();
 
 	tipc_link_xmit(net, &head, dnode, dnode);
@@ -262,14 +264,15 @@ static void tipc_publ_unsubscribe(struct net *net, struct publication *publ,
  */
 static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct publication *p;
 
-	spin_lock_bh(&tipc_nametbl_lock);
-	p = tipc_nametbl_remove_publ(publ->type, publ->lower,
+	spin_lock_bh(&tn->nametbl_lock);
+	p = tipc_nametbl_remove_publ(net, publ->type, publ->lower,
 				     publ->node, publ->ref, publ->key);
 	if (p)
 		tipc_publ_unsubscribe(net, p, addr);
-	spin_unlock_bh(&tipc_nametbl_lock);
+	spin_unlock_bh(&tn->nametbl_lock);
 
 	if (p != publ) {
 		pr_err("Unable to remove publication from failed node\n"
@@ -302,7 +305,8 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i,
 	struct publication *publ = NULL;
 
 	if (dtype == PUBLICATION) {
-		publ = tipc_nametbl_insert_publ(ntohl(i->type), ntohl(i->lower),
+		publ = tipc_nametbl_insert_publ(net, ntohl(i->type),
+						ntohl(i->lower),
 						ntohl(i->upper),
 						TIPC_CLUSTER_SCOPE, node,
 						ntohl(i->ref), ntohl(i->key));
@@ -311,7 +315,8 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i,
 			return true;
 		}
 	} else if (dtype == WITHDRAWAL) {
-		publ = tipc_nametbl_remove_publ(ntohl(i->type), ntohl(i->lower),
+		publ = tipc_nametbl_remove_publ(net, ntohl(i->type),
+						ntohl(i->lower),
 						node, ntohl(i->ref),
 						ntohl(i->key));
 		if (publ) {
@@ -376,19 +381,20 @@ void tipc_named_process_backlog(struct net *net)
  */
 void tipc_named_rcv(struct net *net, struct sk_buff *buf)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_msg *msg = buf_msg(buf);
 	struct distr_item *item = (struct distr_item *)msg_data(msg);
 	u32 count = msg_data_sz(msg) / ITEM_SIZE;
 	u32 node = msg_orignode(msg);
 
-	spin_lock_bh(&tipc_nametbl_lock);
+	spin_lock_bh(&tn->nametbl_lock);
 	while (count--) {
 		if (!tipc_update_nametbl(net, item, node, msg_type(msg)))
 			tipc_named_add_backlog(item, msg_type(msg), node);
 		item++;
 	}
 	tipc_named_process_backlog(net);
-	spin_unlock_bh(&tipc_nametbl_lock);
+	spin_unlock_bh(&tn->nametbl_lock);
 	kfree_skb(buf);
 }
 
@@ -399,17 +405,18 @@ void tipc_named_rcv(struct net *net, struct sk_buff *buf)
  * All name table entries published by this node are updated to reflect
  * the node's new network address.
  */
-void tipc_named_reinit(void)
+void tipc_named_reinit(struct net *net)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct publication *publ;
 	int scope;
 
-	spin_lock_bh(&tipc_nametbl_lock);
+	spin_lock_bh(&tn->nametbl_lock);
 
 	for (scope = TIPC_ZONE_SCOPE; scope <= TIPC_NODE_SCOPE; scope++)
-		list_for_each_entry_rcu(publ, &tipc_nametbl->publ_list[scope],
+		list_for_each_entry_rcu(publ, &tn->nametbl->publ_list[scope],
 					local_list)
 			publ->node = tipc_own_addr;
 
-	spin_unlock_bh(&tipc_nametbl_lock);
+	spin_unlock_bh(&tn->nametbl_lock);
 }
diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h
index 8039d84351b3..1ed2d7e48290 100644
--- a/net/tipc/name_distr.h
+++ b/net/tipc/name_distr.h
@@ -67,12 +67,12 @@ struct distr_item {
 	__be32 key;
 };
 
-struct sk_buff *tipc_named_publish(struct publication *publ);
+struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ);
 struct sk_buff *tipc_named_withdraw(struct publication *publ);
 void named_cluster_distribute(struct net *net, struct sk_buff *buf);
 void tipc_named_node_up(struct net *net, u32 dnode);
 void tipc_named_rcv(struct net *net, struct sk_buff *buf);
-void tipc_named_reinit(void);
+void tipc_named_reinit(struct net *net);
 void tipc_named_process_backlog(struct net *net);
 void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr);
 
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index beed5fdda004..57e39c16a8c3 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -34,6 +34,7 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <net/sock.h>
 #include "core.h"
 #include "config.h"
 #include "name_table.h"
@@ -106,9 +107,6 @@ struct name_seq {
 	struct rcu_head rcu;
 };
 
-struct name_table *tipc_nametbl;
-DEFINE_SPINLOCK(tipc_nametbl_lock);
-
 static int hash(int x)
 {
 	return x & (TIPC_NAMETBL_SIZE - 1);
@@ -448,12 +446,13 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq,
 	}
 }
 
-static struct name_seq *nametbl_find_seq(u32 type)
+static struct name_seq *nametbl_find_seq(struct net *net, u32 type)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct hlist_head *seq_head;
 	struct name_seq *ns;
 
-	seq_head = &tipc_nametbl->seq_hlist[hash(type)];
+	seq_head = &tn->nametbl->seq_hlist[hash(type)];
 	hlist_for_each_entry_rcu(ns, seq_head, ns_list) {
 		if (ns->type == type)
 			return ns;
@@ -462,11 +461,13 @@ static struct name_seq *nametbl_find_seq(u32 type)
 	return NULL;
 };
 
-struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper,
-					     u32 scope, u32 node, u32 port, u32 key)
+struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
+					     u32 lower, u32 upper, u32 scope,
+					     u32 node, u32 port, u32 key)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct publication *publ;
-	struct name_seq *seq = nametbl_find_seq(type);
+	struct name_seq *seq = nametbl_find_seq(net, type);
 	int index = hash(type);
 
 	if ((scope < TIPC_ZONE_SCOPE) || (scope > TIPC_NODE_SCOPE) ||
@@ -477,8 +478,7 @@ struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper,
 	}
 
 	if (!seq)
-		seq = tipc_nameseq_create(type,
-					  &tipc_nametbl->seq_hlist[index]);
+		seq = tipc_nameseq_create(type, &tn->nametbl->seq_hlist[index]);
 	if (!seq)
 		return NULL;
 
@@ -489,11 +489,12 @@ struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper,
 	return publ;
 }
 
-struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower,
-					     u32 node, u32 ref, u32 key)
+struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
+					     u32 lower, u32 node, u32 ref,
+					     u32 key)
 {
 	struct publication *publ;
-	struct name_seq *seq = nametbl_find_seq(type);
+	struct name_seq *seq = nametbl_find_seq(net, type);
 
 	if (!seq)
 		return NULL;
@@ -524,7 +525,8 @@ struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower,
  * - if name translation is attempted and fails, sets 'destnode' to 0
  *   and returns 0
  */
-u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *destnode)
+u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance,
+			   u32 *destnode)
 {
 	struct sub_seq *sseq;
 	struct name_info *info;
@@ -537,7 +539,7 @@ u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *destnode)
 		return 0;
 
 	rcu_read_lock();
-	seq = nametbl_find_seq(type);
+	seq = nametbl_find_seq(net, type);
 	if (unlikely(!seq))
 		goto not_found;
 	spin_lock_bh(&seq->lock);
@@ -610,8 +612,8 @@ not_found:
  *
  * Returns non-zero if any off-node ports overlap
  */
-int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit,
-			      struct tipc_port_list *dports)
+int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
+			      u32 limit, struct tipc_port_list *dports)
 {
 	struct name_seq *seq;
 	struct sub_seq *sseq;
@@ -620,7 +622,7 @@ int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit,
 	int res = 0;
 
 	rcu_read_lock();
-	seq = nametbl_find_seq(type);
+	seq = nametbl_find_seq(net, type);
 	if (!seq)
 		goto exit;
 
@@ -657,24 +659,25 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
 {
 	struct publication *publ;
 	struct sk_buff *buf = NULL;
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 
-	spin_lock_bh(&tipc_nametbl_lock);
-	if (tipc_nametbl->local_publ_count >= TIPC_MAX_PUBLICATIONS) {
+	spin_lock_bh(&tn->nametbl_lock);
+	if (tn->nametbl->local_publ_count >= TIPC_MAX_PUBLICATIONS) {
 		pr_warn("Publication failed, local publication limit reached (%u)\n",
 			TIPC_MAX_PUBLICATIONS);
-		spin_unlock_bh(&tipc_nametbl_lock);
+		spin_unlock_bh(&tn->nametbl_lock);
 		return NULL;
 	}
 
-	publ = tipc_nametbl_insert_publ(type, lower, upper, scope,
-				   tipc_own_addr, port_ref, key);
+	publ = tipc_nametbl_insert_publ(net, type, lower, upper, scope,
+					tipc_own_addr, port_ref, key);
 	if (likely(publ)) {
-		tipc_nametbl->local_publ_count++;
-		buf = tipc_named_publish(publ);
+		tn->nametbl->local_publ_count++;
+		buf = tipc_named_publish(net, publ);
 		/* Any pending external events? */
 		tipc_named_process_backlog(net);
 	}
-	spin_unlock_bh(&tipc_nametbl_lock);
+	spin_unlock_bh(&tn->nametbl_lock);
 
 	if (buf)
 		named_cluster_distribute(net, buf);
@@ -689,11 +692,13 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
 {
 	struct publication *publ;
 	struct sk_buff *skb = NULL;
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 
-	spin_lock_bh(&tipc_nametbl_lock);
-	publ = tipc_nametbl_remove_publ(type, lower, tipc_own_addr, ref, key);
+	spin_lock_bh(&tn->nametbl_lock);
+	publ = tipc_nametbl_remove_publ(net, type, lower, tipc_own_addr,
+					ref, key);
 	if (likely(publ)) {
-		tipc_nametbl->local_publ_count--;
+		tn->nametbl->local_publ_count--;
 		skb = tipc_named_withdraw(publ);
 		/* Any pending external events? */
 		tipc_named_process_backlog(net);
@@ -704,7 +709,7 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
 		       "(type=%u, lower=%u, ref=%u, key=%u)\n",
 		       type, lower, ref, key);
 	}
-	spin_unlock_bh(&tipc_nametbl_lock);
+	spin_unlock_bh(&tn->nametbl_lock);
 
 	if (skb) {
 		named_cluster_distribute(net, skb);
@@ -718,15 +723,15 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
  */
 void tipc_nametbl_subscribe(struct tipc_subscription *s)
 {
+	struct tipc_net *tn = net_generic(s->net, tipc_net_id);
 	u32 type = s->seq.type;
 	int index = hash(type);
 	struct name_seq *seq;
 
-	spin_lock_bh(&tipc_nametbl_lock);
-	seq = nametbl_find_seq(type);
+	spin_lock_bh(&tn->nametbl_lock);
+	seq = nametbl_find_seq(s->net, type);
 	if (!seq)
-		seq = tipc_nameseq_create(type,
-					  &tipc_nametbl->seq_hlist[index]);
+		seq = tipc_nameseq_create(type, &tn->nametbl->seq_hlist[index]);
 	if (seq) {
 		spin_lock_bh(&seq->lock);
 		tipc_nameseq_subscribe(seq, s);
@@ -735,7 +740,7 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s)
 		pr_warn("Failed to create subscription for {%u,%u,%u}\n",
 			s->seq.type, s->seq.lower, s->seq.upper);
 	}
-	spin_unlock_bh(&tipc_nametbl_lock);
+	spin_unlock_bh(&tn->nametbl_lock);
 }
 
 /**
@@ -743,10 +748,11 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s)
  */
 void tipc_nametbl_unsubscribe(struct tipc_subscription *s)
 {
+	struct tipc_net *tn = net_generic(s->net, tipc_net_id);
 	struct name_seq *seq;
 
-	spin_lock_bh(&tipc_nametbl_lock);
-	seq = nametbl_find_seq(s->seq.type);
+	spin_lock_bh(&tn->nametbl_lock);
+	seq = nametbl_find_seq(s->net, s->seq.type);
 	if (seq != NULL) {
 		spin_lock_bh(&seq->lock);
 		list_del_init(&s->nameseq_list);
@@ -759,7 +765,7 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s)
 			spin_unlock_bh(&seq->lock);
 		}
 	}
-	spin_unlock_bh(&tipc_nametbl_lock);
+	spin_unlock_bh(&tn->nametbl_lock);
 }
 
 /**
@@ -861,9 +867,10 @@ static int nametbl_header(char *buf, int len, u32 depth)
 /**
  * nametbl_list - print specified name table contents into the given buffer
  */
-static int nametbl_list(char *buf, int len, u32 depth_info,
+static int nametbl_list(struct net *net, char *buf, int len, u32 depth_info,
 			u32 type, u32 lowbound, u32 upbound)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct hlist_head *seq_head;
 	struct name_seq *seq;
 	int all_types;
@@ -883,7 +890,7 @@ static int nametbl_list(char *buf, int len, u32 depth_info,
 		lowbound = 0;
 		upbound = ~0;
 		for (i = 0; i < TIPC_NAMETBL_SIZE; i++) {
-			seq_head = &tipc_nametbl->seq_hlist[i];
+			seq_head = &tn->nametbl->seq_hlist[i];
 			hlist_for_each_entry_rcu(seq, seq_head, ns_list) {
 				ret += nameseq_list(seq, buf + ret, len - ret,
 						   depth, seq->type,
@@ -899,7 +906,7 @@ static int nametbl_list(char *buf, int len, u32 depth_info,
 		}
 		ret += nametbl_header(buf + ret, len - ret, depth);
 		i = hash(type);
-		seq_head = &tipc_nametbl->seq_hlist[i];
+		seq_head = &tn->nametbl->seq_hlist[i];
 		hlist_for_each_entry_rcu(seq, seq_head, ns_list) {
 			if (seq->type == type) {
 				ret += nameseq_list(seq, buf + ret, len - ret,
@@ -912,7 +919,8 @@ static int nametbl_list(char *buf, int len, u32 depth_info,
 	return ret;
 }
 
-struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space)
+struct sk_buff *tipc_nametbl_get(struct net *net, const void *req_tlv_area,
+				 int req_tlv_space)
 {
 	struct sk_buff *buf;
 	struct tipc_name_table_query *argv;
@@ -933,7 +941,7 @@ struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space)
 	pb_len = ULTRA_STRING_MAX_LEN;
 	argv = (struct tipc_name_table_query *)TLV_DATA(req_tlv_area);
 	rcu_read_lock();
-	str_len = nametbl_list(pb, pb_len, ntohl(argv->depth),
+	str_len = nametbl_list(net, pb, pb_len, ntohl(argv->depth),
 			       ntohl(argv->type),
 			       ntohl(argv->lowbound), ntohl(argv->upbound));
 	rcu_read_unlock();
@@ -944,8 +952,10 @@ struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space)
 	return buf;
 }
 
-int tipc_nametbl_init(void)
+int tipc_nametbl_init(struct net *net)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct name_table *tipc_nametbl;
 	int i;
 
 	tipc_nametbl = kzalloc(sizeof(*tipc_nametbl), GFP_ATOMIC);
@@ -958,6 +968,8 @@ int tipc_nametbl_init(void)
 	INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_ZONE_SCOPE]);
 	INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_CLUSTER_SCOPE]);
 	INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_NODE_SCOPE]);
+	tn->nametbl = tipc_nametbl;
+	spin_lock_init(&tn->nametbl_lock);
 	return 0;
 }
 
@@ -966,7 +978,7 @@ int tipc_nametbl_init(void)
  *
  * tipc_nametbl_lock must be held when calling this function
  */
-static void tipc_purge_publications(struct name_seq *seq)
+static void tipc_purge_publications(struct net *net, struct name_seq *seq)
 {
 	struct publication *publ, *safe;
 	struct sub_seq *sseq;
@@ -976,8 +988,8 @@ static void tipc_purge_publications(struct name_seq *seq)
 	sseq = seq->sseqs;
 	info = sseq->info;
 	list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) {
-		tipc_nametbl_remove_publ(publ->type, publ->lower, publ->node,
-					 publ->ref, publ->key);
+		tipc_nametbl_remove_publ(net, publ->type, publ->lower,
+					 publ->node, publ->ref, publ->key);
 		kfree_rcu(publ, rcu);
 	}
 	hlist_del_init_rcu(&seq->ns_list);
@@ -987,25 +999,27 @@ static void tipc_purge_publications(struct name_seq *seq)
 	kfree_rcu(seq, rcu);
 }
 
-void tipc_nametbl_stop(void)
+void tipc_nametbl_stop(struct net *net)
 {
 	u32 i;
 	struct name_seq *seq;
 	struct hlist_head *seq_head;
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct name_table *tipc_nametbl = tn->nametbl;
 
 	/* Verify name table is empty and purge any lingering
 	 * publications, then release the name table
 	 */
-	spin_lock_bh(&tipc_nametbl_lock);
+	spin_lock_bh(&tn->nametbl_lock);
 	for (i = 0; i < TIPC_NAMETBL_SIZE; i++) {
 		if (hlist_empty(&tipc_nametbl->seq_hlist[i]))
 			continue;
 		seq_head = &tipc_nametbl->seq_hlist[i];
 		hlist_for_each_entry_rcu(seq, seq_head, ns_list) {
-			tipc_purge_publications(seq);
+			tipc_purge_publications(net, seq);
 		}
 	}
-	spin_unlock_bh(&tipc_nametbl_lock);
+	spin_unlock_bh(&tn->nametbl_lock);
 
 	synchronize_net();
 	kfree(tipc_nametbl);
@@ -1109,9 +1123,10 @@ static int __tipc_nl_subseq_list(struct tipc_nl_msg *msg, struct name_seq *seq,
 	return 0;
 }
 
-static int __tipc_nl_seq_list(struct tipc_nl_msg *msg, u32 *last_type,
-			      u32 *last_lower, u32 *last_publ)
+static int tipc_nl_seq_list(struct net *net, struct tipc_nl_msg *msg,
+			    u32 *last_type, u32 *last_lower, u32 *last_publ)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct hlist_head *seq_head;
 	struct name_seq *seq = NULL;
 	int err;
@@ -1123,10 +1138,10 @@ static int __tipc_nl_seq_list(struct tipc_nl_msg *msg, u32 *last_type,
 		i = 0;
 
 	for (; i < TIPC_NAMETBL_SIZE; i++) {
-		seq_head = &tipc_nametbl->seq_hlist[i];
+		seq_head = &tn->nametbl->seq_hlist[i];
 
 		if (*last_type) {
-			seq = nametbl_find_seq(*last_type);
+			seq = nametbl_find_seq(net, *last_type);
 			if (!seq)
 				return -EPIPE;
 		} else {
@@ -1160,6 +1175,7 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	u32 last_type = cb->args[0];
 	u32 last_lower = cb->args[1];
 	u32 last_publ = cb->args[2];
+	struct net *net = sock_net(skb->sk);
 	struct tipc_nl_msg msg;
 
 	if (done)
@@ -1170,7 +1186,7 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	msg.seq = cb->nlh->nlmsg_seq;
 
 	rcu_read_lock();
-	err = __tipc_nl_seq_list(&msg, &last_type, &last_lower, &last_publ);
+	err = tipc_nl_seq_list(net, &msg, &last_type, &last_lower, &last_publ);
 	if (!err) {
 		done = 1;
 	} else if (err != -EMSGSIZE) {
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index efccaca7a5d5..f67b3d8d4b2f 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -95,28 +95,27 @@ struct name_table {
 	u32 local_publ_count;
 };
 
-extern spinlock_t tipc_nametbl_lock;
-extern struct name_table *tipc_nametbl;
-
 int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
 
-struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space);
-u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *node);
-int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit,
-			      struct tipc_port_list *dports);
+struct sk_buff *tipc_nametbl_get(struct net *net, const void *req_tlv_area,
+				 int req_tlv_space);
+u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node);
+int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
+			      u32 limit, struct tipc_port_list *dports);
 struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
 					 u32 upper, u32 scope, u32 port_ref,
 					 u32 key);
 int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
 			  u32 key);
-struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper,
-					     u32 scope, u32 node, u32 ref,
+struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
+					     u32 lower, u32 upper, u32 scope,
+					     u32 node, u32 ref, u32 key);
+struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
+					     u32 lower, u32 node, u32 ref,
 					     u32 key);
-struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, u32 node,
-					     u32 ref, u32 key);
 void tipc_nametbl_subscribe(struct tipc_subscription *s);
 void tipc_nametbl_unsubscribe(struct tipc_subscription *s);
-int tipc_nametbl_init(void);
-void tipc_nametbl_stop(void);
+int tipc_nametbl_init(struct net *net);
+void tipc_nametbl_stop(struct net *net);
 
 #endif
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 44ccf47c79a3..04445d210e45 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -116,7 +116,7 @@ int tipc_net_start(struct net *net, u32 addr)
 	int res;
 
 	tipc_own_addr = addr;
-	tipc_named_reinit();
+	tipc_named_reinit(net);
 	tipc_sk_reinit(net);
 	res = tipc_bclink_init(net);
 	if (res)
diff --git a/net/tipc/server.c b/net/tipc/server.c
index 869eb0905754..b5bdaf721d70 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -256,7 +256,8 @@ static int tipc_receive_from_sock(struct tipc_conn *con)
 		goto out_close;
 	}
 
-	s->tipc_conn_recvmsg(con->conid, &addr, con->usr_data, buf, ret);
+	s->tipc_conn_recvmsg(sock_net(con->sock->sk), con->conid, &addr,
+			     con->usr_data, buf, ret);
 
 	kmem_cache_free(s->rcvbuf_cache, buf);
 
diff --git a/net/tipc/server.h b/net/tipc/server.h
index 87bc05c70dce..9c979a01997c 100644
--- a/net/tipc/server.h
+++ b/net/tipc/server.h
@@ -38,6 +38,7 @@
 
 #include <linux/idr.h>
 #include <linux/tipc.h>
+#include <net/net_namespace.h>
 
 #define TIPC_SERVER_NAME_LEN	32
 
@@ -66,10 +67,11 @@ struct tipc_server {
 	struct workqueue_struct *rcv_wq;
 	struct workqueue_struct *send_wq;
 	int max_rcvbuf_size;
-	void *(*tipc_conn_new) (int conid);
-	void (*tipc_conn_shutdown) (int conid, void *usr_data);
-	void (*tipc_conn_recvmsg) (int conid, struct sockaddr_tipc *addr,
-				   void *usr_data, void *buf, size_t len);
+	void *(*tipc_conn_new)(int conid);
+	void (*tipc_conn_shutdown)(int conid, void *usr_data);
+	void (*tipc_conn_recvmsg)(struct net *net, int conid,
+				  struct sockaddr_tipc *addr, void *usr_data,
+				  void *buf, size_t len);
 	struct sockaddr_tipc *saddr;
 	const char name[TIPC_SERVER_NAME_LEN];
 	int imp;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index accb02cb3527..4670e1e46c89 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -778,11 +778,8 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff *buf)
 		scope = TIPC_NODE_SCOPE;
 
 	/* Create destination port list: */
-	tipc_nametbl_mc_translate(msg_nametype(msg),
-				  msg_namelower(msg),
-				  msg_nameupper(msg),
-				  scope,
-				  &dports);
+	tipc_nametbl_mc_translate(net, msg_nametype(msg), msg_namelower(msg),
+				  msg_nameupper(msg), scope, &dports);
 	last = dports.count;
 	if (!last) {
 		kfree_skb(buf);
@@ -943,7 +940,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock,
 		msg_set_nametype(mhdr, type);
 		msg_set_nameinst(mhdr, inst);
 		msg_set_lookup_scope(mhdr, tipc_addr_scope(domain));
-		dport = tipc_nametbl_translate(type, inst, &dnode);
+		dport = tipc_nametbl_translate(net, type, inst, &dnode);
 		msg_set_destnode(mhdr, dnode);
 		msg_set_destport(mhdr, dport);
 		if (unlikely(!dport && !dnode)) {
@@ -1765,7 +1762,7 @@ int tipc_sk_rcv(struct net *net, struct sk_buff *skb)
 	/* Validate destination and message */
 	tsk = tipc_sk_lookup(net, dport);
 	if (unlikely(!tsk)) {
-		rc = tipc_msg_eval(skb, &dnode);
+		rc = tipc_msg_eval(net, skb, &dnode);
 		goto exit;
 	}
 	sk = &tsk->sk;
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index e6cb959371dc..b71dbc0ae8f9 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -50,8 +50,9 @@ struct tipc_subscriber {
 	struct list_head subscription_list;
 };
 
-static void subscr_conn_msg_event(int conid, struct sockaddr_tipc *addr,
-				  void *usr_data, void *buf, size_t len);
+static void subscr_conn_msg_event(struct net *net, int conid,
+				  struct sockaddr_tipc *addr, void *usr_data,
+				  void *buf, size_t len);
 static void *subscr_named_msg_event(int conid);
 static void subscr_conn_shutdown_event(int conid, void *usr_data);
 
@@ -260,7 +261,7 @@ static void subscr_cancel(struct tipc_subscr *s,
  *
  * Called with subscriber lock held.
  */
-static int subscr_subscribe(struct tipc_subscr *s,
+static int subscr_subscribe(struct net *net, struct tipc_subscr *s,
 			    struct tipc_subscriber *subscriber,
 			    struct tipc_subscription **sub_p) {
 	struct tipc_subscription *sub;
@@ -291,6 +292,7 @@ static int subscr_subscribe(struct tipc_subscr *s,
 	}
 
 	/* Initialize subscription object */
+	sub->net = net;
 	sub->seq.type = htohl(s->seq.type, swap);
 	sub->seq.lower = htohl(s->seq.lower, swap);
 	sub->seq.upper = htohl(s->seq.upper, swap);
@@ -323,14 +325,16 @@ static void subscr_conn_shutdown_event(int conid, void *usr_data)
 }
 
 /* Handle one request to create a new subscription for the subscriber */
-static void subscr_conn_msg_event(int conid, struct sockaddr_tipc *addr,
-				  void *usr_data, void *buf, size_t len)
+static void subscr_conn_msg_event(struct net *net, int conid,
+				  struct sockaddr_tipc *addr, void *usr_data,
+				  void *buf, size_t len)
 {
 	struct tipc_subscriber *subscriber = usr_data;
 	struct tipc_subscription *sub = NULL;
 
 	spin_lock_bh(&subscriber->lock);
-	if (subscr_subscribe((struct tipc_subscr *)buf, subscriber, &sub) < 0) {
+	if (subscr_subscribe(net, (struct tipc_subscr *)buf, subscriber,
+			     &sub) < 0) {
 		spin_unlock_bh(&subscriber->lock);
 		subscr_terminate(subscriber);
 		return;
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index 0d3958956aca..670f57096635 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -49,6 +49,7 @@ struct tipc_subscriber;
  * struct tipc_subscription - TIPC network topology subscription object
  * @subscriber: pointer to its subscriber
  * @seq: name sequence associated with subscription
+ * @net: point to network namespace
  * @timeout: duration of subscription (in ms)
  * @filter: event filtering to be done for subscription
  * @timer: timer governing subscription duration (optional)
@@ -61,6 +62,7 @@ struct tipc_subscriber;
 struct tipc_subscription {
 	struct tipc_subscriber *subscriber;
 	struct tipc_name_seq seq;
+	struct net *net;
 	unsigned long timeout;
 	u32 filter;
 	struct timer_list timer;
-- 
cgit v1.2.3


From 347475395434abb2b61bf59c2952470f37072567 Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Fri, 9 Jan 2015 15:27:10 +0800
Subject: tipc: make tipc node address support net namespace

If net namespace is supported in tipc, each namespace will be treated
as a separate tipc node. Therefore, every namespace must own its
private tipc node address. This means the "tipc_own_addr" global
variable of node address must be moved to tipc_net structure to
satisfy the requirement. It's turned out that users also can assign
node address for every namespace.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Tested-by: Tero Aho <Tero.Aho@coriant.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/addr.c       | 43 ++++++++++++++++++++++++++
 net/tipc/addr.h       | 44 ++++----------------------
 net/tipc/bcast.c      |  6 ++--
 net/tipc/bearer.c     | 13 ++++----
 net/tipc/config.c     |  9 +++---
 net/tipc/core.c       |  4 +--
 net/tipc/core.h       |  5 +--
 net/tipc/discover.c   |  8 ++---
 net/tipc/link.c       | 56 +++++++++++++++++++--------------
 net/tipc/msg.c        | 40 +++++++++++++-----------
 net/tipc/msg.h        | 21 +++++++------
 net/tipc/name_distr.c | 21 +++++++------
 net/tipc/name_distr.h |  2 +-
 net/tipc/name_table.c | 39 +++++++++++++----------
 net/tipc/net.c        | 20 ++++++------
 net/tipc/node.c       | 23 ++++++++------
 net/tipc/socket.c     | 86 ++++++++++++++++++++++++++++++---------------------
 17 files changed, 246 insertions(+), 194 deletions(-)

(limited to 'net')

diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index 9e6eeb450fe1..48fd3b5a73fb 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -36,6 +36,49 @@
 
 #include <linux/kernel.h>
 #include "addr.h"
+#include "core.h"
+
+/**
+ * in_own_cluster - test for cluster inclusion; <0.0.0> always matches
+ */
+int in_own_cluster(struct net *net, u32 addr)
+{
+	return in_own_cluster_exact(net, addr) || !addr;
+}
+
+int in_own_cluster_exact(struct net *net, u32 addr)
+{
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
+
+	return !((addr ^ tn->own_addr) >> 12);
+}
+
+/**
+ * in_own_node - test for node inclusion; <0.0.0> always matches
+ */
+int in_own_node(struct net *net, u32 addr)
+{
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
+
+	return (addr == tn->own_addr) || !addr;
+}
+
+/**
+ * addr_domain - convert 2-bit scope value to equivalent message lookup domain
+ *
+ * Needed when address of a named message must be looked up a second time
+ * after a network hop.
+ */
+u32 addr_domain(struct net *net, u32 sc)
+{
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
+
+	if (likely(sc == TIPC_NODE_SCOPE))
+		return tn->own_addr;
+	if (sc == TIPC_CLUSTER_SCOPE)
+		return tipc_cluster_mask(tn->own_addr);
+	return tipc_zone_mask(tn->own_addr);
+}
 
 /**
  * tipc_addr_domain_valid - validates a network domain address
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index 4e364c4f1359..c700c2d28e09 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -39,12 +39,12 @@
 
 #include <linux/types.h>
 #include <linux/tipc.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
 
 #define TIPC_ZONE_MASK		0xff000000u
 #define TIPC_CLUSTER_MASK	0xfffff000u
 
-extern u32 tipc_own_addr __read_mostly;
-
 static inline u32 tipc_zone_mask(u32 addr)
 {
 	return addr & TIPC_ZONE_MASK;
@@ -55,42 +55,10 @@ static inline u32 tipc_cluster_mask(u32 addr)
 	return addr & TIPC_CLUSTER_MASK;
 }
 
-static inline int in_own_cluster_exact(u32 addr)
-{
-	return !((addr ^ tipc_own_addr) >> 12);
-}
-
-/**
- * in_own_node - test for node inclusion; <0.0.0> always matches
- */
-static inline int in_own_node(u32 addr)
-{
-	return (addr == tipc_own_addr) || !addr;
-}
-
-/**
- * in_own_cluster - test for cluster inclusion; <0.0.0> always matches
- */
-static inline int in_own_cluster(u32 addr)
-{
-	return in_own_cluster_exact(addr) || !addr;
-}
-
-/**
- * addr_domain - convert 2-bit scope value to equivalent message lookup domain
- *
- * Needed when address of a named message must be looked up a second time
- * after a network hop.
- */
-static inline u32 addr_domain(u32 sc)
-{
-	if (likely(sc == TIPC_NODE_SCOPE))
-		return tipc_own_addr;
-	if (sc == TIPC_CLUSTER_SCOPE)
-		return tipc_cluster_mask(tipc_own_addr);
-	return tipc_zone_mask(tipc_own_addr);
-}
-
+int in_own_cluster(struct net *net, u32 addr);
+int in_own_cluster_exact(struct net *net, u32 addr);
+int in_own_node(struct net *net, u32 addr);
+u32 addr_domain(struct net *net, u32 sc);
 int tipc_addr_domain_valid(u32);
 int tipc_addr_node_valid(u32 addr);
 int tipc_in_scope(u32 domain, u32 addr);
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index bc58097ebad2..53f8bf059fec 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -317,7 +317,7 @@ void tipc_bclink_update_link_state(struct net *net, struct tipc_node *n_ptr,
 		struct sk_buff *skb = skb_peek(&n_ptr->bclink.deferred_queue);
 		u32 to = skb ? buf_seqno(skb) - 1 : n_ptr->bclink.last_sent;
 
-		tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG,
+		tipc_msg_init(net, msg, BCAST_PROTOCOL, STATE_MSG,
 			      INT_H_SIZE, n_ptr->addr);
 		msg_set_non_seq(msg, 1);
 		msg_set_mc_netid(msg, tn->net_id);
@@ -428,7 +428,7 @@ static void bclink_accept_pkt(struct tipc_node *node, u32 seqno)
 	 * Unicast an ACK periodically, ensuring that
 	 * all nodes in the cluster don't ACK at the same time
 	 */
-	if (((seqno - tipc_own_addr) % TIPC_MIN_LINK_WIN) == 0) {
+	if (((seqno - tn->own_addr) % TIPC_MIN_LINK_WIN) == 0) {
 		tipc_link_proto_xmit(node->active_links[node->addr & 1],
 				     STATE_MSG, 0, 0, 0, 0, 0);
 		tn->bcl->stats.sent_acks++;
@@ -466,7 +466,7 @@ void tipc_bclink_rcv(struct net *net, struct sk_buff *buf)
 	if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) {
 		if (msg_type(msg) != STATE_MSG)
 			goto unlock;
-		if (msg_destnode(msg) == tipc_own_addr) {
+		if (msg_destnode(msg) == tn->own_addr) {
 			tipc_bclink_acknowledge(node, msg_bcast_ack(msg));
 			tipc_node_unlock(node);
 			tipc_bclink_lock(net);
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 9a0d6ed5c96c..33dc3486d16c 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -278,7 +278,7 @@ int tipc_enable_bearer(struct net *net, const char *name, u32 disc_domain,
 	u32 i;
 	int res = -EINVAL;
 
-	if (!tipc_own_addr) {
+	if (!tn->own_addr) {
 		pr_warn("Bearer <%s> rejected, not supported in standalone mode\n",
 			name);
 		return -ENOPROTOOPT;
@@ -288,11 +288,11 @@ int tipc_enable_bearer(struct net *net, const char *name, u32 disc_domain,
 		return -EINVAL;
 	}
 	if (tipc_addr_domain_valid(disc_domain) &&
-	    (disc_domain != tipc_own_addr)) {
-		if (tipc_in_scope(disc_domain, tipc_own_addr)) {
-			disc_domain = tipc_own_addr & TIPC_CLUSTER_MASK;
+	    (disc_domain != tn->own_addr)) {
+		if (tipc_in_scope(disc_domain, tn->own_addr)) {
+			disc_domain = tn->own_addr & TIPC_CLUSTER_MASK;
 			res = 0;   /* accept any node in own cluster */
-		} else if (in_own_cluster_exact(disc_domain))
+		} else if (in_own_cluster_exact(net, disc_domain))
 			res = 0;   /* accept specified node in own cluster */
 	}
 	if (res) {
@@ -817,6 +817,7 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
 int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
 {
 	struct net *net = genl_info_net(info);
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	int err;
 	char *bearer;
 	struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
@@ -824,7 +825,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
 	u32 prio;
 
 	prio = TIPC_MEDIA_LINK_PRI;
-	domain = tipc_own_addr & TIPC_CLUSTER_MASK;
+	domain = tn->own_addr & TIPC_CLUSTER_MASK;
 
 	if (!info->attrs[TIPC_NLA_BEARER])
 		return -EINVAL;
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 974723a1e32e..6873360cda53 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -163,18 +163,19 @@ static struct sk_buff *cfg_disable_bearer(struct net *net)
 
 static struct sk_buff *cfg_set_own_addr(struct net *net)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	u32 addr;
 
 	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR))
 		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
 
 	addr = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
-	if (addr == tipc_own_addr)
+	if (addr == tn->own_addr)
 		return tipc_cfg_reply_none();
 	if (!tipc_addr_node_valid(addr))
 		return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
 						   " (node address)");
-	if (tipc_own_addr)
+	if (tn->own_addr)
 		return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
 						   " (cannot change node address once assigned)");
 	if (!tipc_net_start(net, addr))
@@ -196,7 +197,7 @@ static struct sk_buff *cfg_set_netid(struct net *net)
 	if (value < 1 || value > 9999)
 		return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
 						   " (network id must be 1-9999)");
-	if (tipc_own_addr)
+	if (tn->own_addr)
 		return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
 			" (cannot change network id once TIPC has joined a network)");
 	tn->net_id = value;
@@ -218,7 +219,7 @@ struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
 	rep_headroom = reply_headroom;
 
 	/* Check command authorization */
-	if (likely(in_own_node(orig_node))) {
+	if (likely(in_own_node(net, orig_node))) {
 		/* command is permitted */
 	} else {
 		rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 63cde8148aaf..7c09670120eb 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -48,7 +48,6 @@
 int tipc_random __read_mostly;
 
 /* configurable TIPC parameters */
-u32 tipc_own_addr __read_mostly;
 int tipc_net_id __read_mostly;
 int sysctl_tipc_rmem[3] __read_mostly;	/* min/default/max */
 
@@ -58,6 +57,7 @@ static int __net_init tipc_init_net(struct net *net)
 	int err;
 
 	tn->net_id = 4711;
+	tn->own_addr = 0;
 	INIT_LIST_HEAD(&tn->node_list);
 	spin_lock_init(&tn->node_list_lock);
 
@@ -96,8 +96,6 @@ static int __init tipc_init(void)
 
 	pr_info("Activated (version " TIPC_MOD_VER ")\n");
 
-	tipc_own_addr = 0;
-
 	sysctl_tipc_rmem[0] = TIPC_CONN_OVERLOAD_LIMIT >> 4 <<
 			      TIPC_LOW_IMPORTANCE;
 	sysctl_tipc_rmem[1] = TIPC_CONN_OVERLOAD_LIMIT >> 4 <<
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 893992944570..afabf39e801c 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -72,10 +72,6 @@
 
 int tipc_snprintf(char *buf, int len, const char *fmt, ...);
 
-/*
- * Global configuration variables
- */
-extern u32 tipc_own_addr __read_mostly;
 extern int tipc_net_id __read_mostly;
 extern int sysctl_tipc_rmem[3] __read_mostly;
 extern int sysctl_tipc_named_timeout __read_mostly;
@@ -86,6 +82,7 @@ extern int sysctl_tipc_named_timeout __read_mostly;
 extern int tipc_random __read_mostly;
 
 struct tipc_net {
+	u32 own_addr;
 	int net_id;
 
 	/* Node table and node list */
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index f93dd3dd621b..786411dea61c 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -86,7 +86,7 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type,
 	u32 dest_domain = b_ptr->domain;
 
 	msg = buf_msg(buf);
-	tipc_msg_init(msg, LINK_CONFIG, type, INT_H_SIZE, dest_domain);
+	tipc_msg_init(net, msg, LINK_CONFIG, type, INT_H_SIZE, dest_domain);
 	msg_set_non_seq(msg, 1);
 	msg_set_node_sig(msg, tipc_random);
 	msg_set_dest_domain(msg, dest_domain);
@@ -153,12 +153,12 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *buf,
 	if (!tipc_addr_node_valid(onode))
 		return;
 
-	if (in_own_node(onode)) {
+	if (in_own_node(net, onode)) {
 		if (memcmp(&maddr, &bearer->addr, sizeof(maddr)))
-			disc_dupl_alert(bearer, tipc_own_addr, &maddr);
+			disc_dupl_alert(bearer, tn->own_addr, &maddr);
 		return;
 	}
-	if (!tipc_in_scope(ddom, tipc_own_addr))
+	if (!tipc_in_scope(ddom, tn->own_addr))
 		return;
 	if (!tipc_in_scope(bearer->domain, onode))
 		return;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index a84d5c67997e..997256769065 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -241,6 +241,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
 				   struct tipc_bearer *b_ptr,
 				   const struct tipc_media_addr *media_addr)
 {
+	struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id);
 	struct tipc_link *l_ptr;
 	struct tipc_msg *msg;
 	char *if_name;
@@ -270,8 +271,8 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
 	l_ptr->addr = peer;
 	if_name = strchr(b_ptr->name, ':') + 1;
 	sprintf(l_ptr->name, "%u.%u.%u:%s-%u.%u.%u:unknown",
-		tipc_zone(tipc_own_addr), tipc_cluster(tipc_own_addr),
-		tipc_node(tipc_own_addr),
+		tipc_zone(tn->own_addr), tipc_cluster(tn->own_addr),
+		tipc_node(tn->own_addr),
 		if_name,
 		tipc_zone(peer), tipc_cluster(peer), tipc_node(peer));
 		/* note: peer i/f name is updated by reset/activate message */
@@ -285,7 +286,8 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
 
 	l_ptr->pmsg = (struct tipc_msg *)&l_ptr->proto_msg;
 	msg = l_ptr->pmsg;
-	tipc_msg_init(msg, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, l_ptr->addr);
+	tipc_msg_init(n_ptr->net, msg, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE,
+		      l_ptr->addr);
 	msg_set_size(msg, sizeof(l_ptr->proto_msg));
 	msg_set_session(msg, (tipc_random & 0xffff));
 	msg_set_bearer_id(msg, b_ptr->identity);
@@ -358,10 +360,12 @@ void tipc_link_delete_list(struct net *net, unsigned int bearer_id,
 static bool link_schedule_user(struct tipc_link *link, u32 oport,
 			       uint chain_sz, uint imp)
 {
+	struct net *net = link->owner->net;
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct sk_buff *buf;
 
-	buf = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0, tipc_own_addr,
-			      tipc_own_addr, oport, 0, 0);
+	buf = tipc_msg_create(net, SOCK_WAKEUP, 0, INT_H_SIZE, 0, tn->own_addr,
+			      tn->own_addr, oport, 0, 0);
 	if (!buf)
 		return false;
 	TIPC_SKB_CB(buf)->chain_sz = chain_sz;
@@ -753,7 +757,7 @@ int __tipc_link_xmit(struct net *net, struct tipc_link *link,
 		} else if (tipc_msg_bundle(outqueue, skb, mtu)) {
 			link->stats.sent_bundled++;
 			continue;
-		} else if (tipc_msg_make_bundle(outqueue, skb, mtu,
+		} else if (tipc_msg_make_bundle(net, outqueue, skb, mtu,
 						link->addr)) {
 			link->stats.sent_bundled++;
 			link->stats.sent_bundles++;
@@ -822,7 +826,7 @@ int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dnode,
 	if (link)
 		return rc;
 
-	if (likely(in_own_node(dnode))) {
+	if (likely(in_own_node(net, dnode))) {
 		/* As a node local message chain never contains more than one
 		 * buffer, we just need to dequeue one SKB buffer from the
 		 * head list.
@@ -852,7 +856,8 @@ static void tipc_link_sync_xmit(struct tipc_link *link)
 		return;
 
 	msg = buf_msg(skb);
-	tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, link->addr);
+	tipc_msg_init(link->owner->net, msg, BCAST_PROTOCOL, STATE_MSG,
+		      INT_H_SIZE, link->addr);
 	msg_set_last_bcast(msg, link->owner->bclink.acked);
 	__tipc_link_xmit_skb(link, skb);
 }
@@ -1092,6 +1097,7 @@ static int link_recv_buf_validate(struct sk_buff *buf)
  */
 void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct sk_buff_head head;
 	struct tipc_node *n_ptr;
 	struct tipc_link *l_ptr;
@@ -1125,7 +1131,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr)
 
 		/* Discard unicast link messages destined for another node */
 		if (unlikely(!msg_short(msg) &&
-			     (msg_destnode(msg) != tipc_own_addr)))
+			     (msg_destnode(msg) != tn->own_addr)))
 			goto discard;
 
 		/* Locate neighboring node that sent message */
@@ -1483,6 +1489,7 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg,
 static void tipc_link_proto_rcv(struct net *net, struct tipc_link *l_ptr,
 				struct sk_buff *buf)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	u32 rec_gap = 0;
 	u32 max_pkt_info;
 	u32 max_pkt_ack;
@@ -1494,7 +1501,7 @@ static void tipc_link_proto_rcv(struct net *net, struct tipc_link *l_ptr,
 		goto exit;
 
 	if (l_ptr->net_plane != msg_net_plane(msg))
-		if (tipc_own_addr > msg_prevnode(msg))
+		if (tn->own_addr > msg_prevnode(msg))
 			l_ptr->net_plane = msg_net_plane(msg);
 
 	switch (msg_type(msg)) {
@@ -1662,8 +1669,8 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr)
 	if (!tunnel)
 		return;
 
-	tipc_msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL,
-		 ORIGINAL_MSG, INT_H_SIZE, l_ptr->addr);
+	tipc_msg_init(l_ptr->owner->net, &tunnel_hdr, CHANGEOVER_PROTOCOL,
+		      ORIGINAL_MSG, INT_H_SIZE, l_ptr->addr);
 	msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id);
 	msg_set_msgcnt(&tunnel_hdr, msgcount);
 
@@ -1720,8 +1727,8 @@ void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr,
 	struct sk_buff *skb;
 	struct tipc_msg tunnel_hdr;
 
-	tipc_msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL,
-		 DUPLICATE_MSG, INT_H_SIZE, l_ptr->addr);
+	tipc_msg_init(l_ptr->owner->net, &tunnel_hdr, CHANGEOVER_PROTOCOL,
+		      DUPLICATE_MSG, INT_H_SIZE, l_ptr->addr);
 	msg_set_msgcnt(&tunnel_hdr, skb_queue_len(&l_ptr->outqueue));
 	msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id);
 	skb_queue_walk(&l_ptr->outqueue, skb) {
@@ -2506,12 +2513,14 @@ msg_full:
 }
 
 /* Caller should hold appropriate locks to protect the link */
-static int __tipc_nl_add_link(struct tipc_nl_msg *msg, struct tipc_link *link)
+static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg,
+			      struct tipc_link *link)
 {
 	int err;
 	void *hdr;
 	struct nlattr *attrs;
 	struct nlattr *prop;
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 
 	hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_v2_family,
 			  NLM_F_MULTI, TIPC_NL_LINK_GET);
@@ -2525,7 +2534,7 @@ static int __tipc_nl_add_link(struct tipc_nl_msg *msg, struct tipc_link *link)
 	if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, link->name))
 		goto attr_msg_full;
 	if (nla_put_u32(msg->skb, TIPC_NLA_LINK_DEST,
-			tipc_cluster_mask(tipc_own_addr)))
+			tipc_cluster_mask(tn->own_addr)))
 		goto attr_msg_full;
 	if (nla_put_u32(msg->skb, TIPC_NLA_LINK_MTU, link->max_pkt))
 		goto attr_msg_full;
@@ -2575,9 +2584,8 @@ msg_full:
 }
 
 /* Caller should hold node lock  */
-static int __tipc_nl_add_node_links(struct tipc_nl_msg *msg,
-				    struct tipc_node *node,
-				    u32 *prev_link)
+static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg,
+				    struct tipc_node *node, u32 *prev_link)
 {
 	u32 i;
 	int err;
@@ -2588,7 +2596,7 @@ static int __tipc_nl_add_node_links(struct tipc_nl_msg *msg,
 		if (!node->links[i])
 			continue;
 
-		err = __tipc_nl_add_link(msg, node->links[i]);
+		err = __tipc_nl_add_link(net, msg, node->links[i]);
 		if (err)
 			return err;
 	}
@@ -2633,7 +2641,8 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb)
 		list_for_each_entry_continue_rcu(node, &tn->node_list,
 						 list) {
 			tipc_node_lock(node);
-			err = __tipc_nl_add_node_links(&msg, node, &prev_link);
+			err = __tipc_nl_add_node_links(net, &msg, node,
+						       &prev_link);
 			tipc_node_unlock(node);
 			if (err)
 				goto out;
@@ -2647,7 +2656,8 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb)
 
 		list_for_each_entry_rcu(node, &tn->node_list, list) {
 			tipc_node_lock(node);
-			err = __tipc_nl_add_node_links(&msg, node, &prev_link);
+			err = __tipc_nl_add_node_links(net, &msg, node,
+						       &prev_link);
 			tipc_node_unlock(node);
 			if (err)
 				goto out;
@@ -2700,7 +2710,7 @@ int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info)
 		goto err_out;
 	}
 
-	err = __tipc_nl_add_link(&msg, link);
+	err = __tipc_nl_add_link(net, &msg, link);
 	if (err)
 		goto err_out;
 
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 642fb137463c..18aba9e99345 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -70,25 +70,27 @@ struct sk_buff *tipc_buf_acquire(u32 size)
 	return skb;
 }
 
-void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize,
-		   u32 destnode)
+void tipc_msg_init(struct net *net, struct tipc_msg *m, u32 user, u32 type,
+		   u32 hsize, u32 destnode)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
+
 	memset(m, 0, hsize);
 	msg_set_version(m);
 	msg_set_user(m, user);
 	msg_set_hdr_sz(m, hsize);
 	msg_set_size(m, hsize);
-	msg_set_prevnode(m, tipc_own_addr);
+	msg_set_prevnode(m, tn->own_addr);
 	msg_set_type(m, type);
 	if (hsize > SHORT_H_SIZE) {
-		msg_set_orignode(m, tipc_own_addr);
+		msg_set_orignode(m, tn->own_addr);
 		msg_set_destnode(m, destnode);
 	}
 }
 
-struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz,
-				uint data_sz, u32 dnode, u32 onode,
-				u32 dport, u32 oport, int errcode)
+struct sk_buff *tipc_msg_create(struct net *net, uint user, uint type,
+				uint hdr_sz, uint data_sz, u32 dnode,
+				u32 onode, u32 dport, u32 oport, int errcode)
 {
 	struct tipc_msg *msg;
 	struct sk_buff *buf;
@@ -98,7 +100,7 @@ struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz,
 		return NULL;
 
 	msg = buf_msg(buf);
-	tipc_msg_init(msg, user, type, hdr_sz, dnode);
+	tipc_msg_init(net, msg, user, type, hdr_sz, dnode);
 	msg_set_size(msg, hdr_sz + data_sz);
 	msg_set_prevnode(msg, onode);
 	msg_set_origport(msg, oport);
@@ -194,8 +196,8 @@ err:
  *
  * Returns message data size or errno: -ENOMEM, -EFAULT
  */
-int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset,
-		   int dsz, int pktmax, struct sk_buff_head *list)
+int tipc_msg_build(struct net *net, struct tipc_msg *mhdr, struct msghdr *m,
+		   int offset, int dsz, int pktmax, struct sk_buff_head *list)
 {
 	int mhsz = msg_hdr_sz(mhdr);
 	int msz = mhsz + dsz;
@@ -227,8 +229,8 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset,
 	}
 
 	/* Prepare reusable fragment header */
-	tipc_msg_init(&pkthdr, MSG_FRAGMENTER, FIRST_FRAGMENT,
-		      INT_H_SIZE, msg_destnode(mhdr));
+	tipc_msg_init(net, &pkthdr, MSG_FRAGMENTER, FIRST_FRAGMENT, INT_H_SIZE,
+		      msg_destnode(mhdr));
 	msg_set_size(&pkthdr, pktmax);
 	msg_set_fragm_no(&pkthdr, pktno);
 
@@ -339,8 +341,8 @@ bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu)
  * Replaces buffer if successful
  * Returns true if success, otherwise false
  */
-bool tipc_msg_make_bundle(struct sk_buff_head *list, struct sk_buff *skb,
-			  u32 mtu, u32 dnode)
+bool tipc_msg_make_bundle(struct net *net, struct sk_buff_head *list,
+			  struct sk_buff *skb, u32 mtu, u32 dnode)
 {
 	struct sk_buff *bskb;
 	struct tipc_msg *bmsg;
@@ -363,7 +365,7 @@ bool tipc_msg_make_bundle(struct sk_buff_head *list, struct sk_buff *skb,
 
 	skb_trim(bskb, INT_H_SIZE);
 	bmsg = buf_msg(bskb);
-	tipc_msg_init(bmsg, MSG_BUNDLER, 0, INT_H_SIZE, dnode);
+	tipc_msg_init(net, bmsg, MSG_BUNDLER, 0, INT_H_SIZE, dnode);
 	msg_set_seqno(bmsg, msg_seqno(msg));
 	msg_set_ack(bmsg, msg_ack(msg));
 	msg_set_bcast_ack(bmsg, msg_bcast_ack(msg));
@@ -380,8 +382,10 @@ bool tipc_msg_make_bundle(struct sk_buff_head *list, struct sk_buff *skb,
  * Consumes buffer if failure
  * Returns true if success, otherwise false
  */
-bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err)
+bool tipc_msg_reverse(struct net *net, struct sk_buff *buf, u32 *dnode,
+		      int err)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_msg *msg = buf_msg(buf);
 	uint imp = msg_importance(msg);
 	struct tipc_msg ohdr;
@@ -401,7 +405,7 @@ bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err)
 	msg_set_errcode(msg, err);
 	msg_set_origport(msg, msg_destport(&ohdr));
 	msg_set_destport(msg, msg_origport(&ohdr));
-	msg_set_prevnode(msg, tipc_own_addr);
+	msg_set_prevnode(msg, tn->own_addr);
 	if (!msg_short(msg)) {
 		msg_set_orignode(msg, msg_destnode(&ohdr));
 		msg_set_destnode(msg, msg_orignode(&ohdr));
@@ -440,7 +444,7 @@ int tipc_msg_eval(struct net *net, struct sk_buff *buf, u32 *dnode)
 	if (msg_reroute_cnt(msg) > 0)
 		return -TIPC_ERR_NO_NAME;
 
-	*dnode = addr_domain(msg_lookup_scope(msg));
+	*dnode = addr_domain(net, msg_lookup_scope(msg));
 	dport = tipc_nametbl_translate(net, msg_nametype(msg),
 				       msg_nameinst(msg),
 				       dnode);
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 69f37e652a8e..526ef345b70e 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -748,19 +748,20 @@ static inline u32 msg_tot_origport(struct tipc_msg *m)
 }
 
 struct sk_buff *tipc_buf_acquire(u32 size);
-bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err);
+bool tipc_msg_reverse(struct net *net, struct sk_buff *buf, u32 *dnode,
+		      int err);
 int tipc_msg_eval(struct net *net, struct sk_buff *buf, u32 *dnode);
-void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize,
-		   u32 destnode);
-struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz,
-				uint data_sz, u32 dnode, u32 onode,
-				u32 dport, u32 oport, int errcode);
+void tipc_msg_init(struct net *net, struct tipc_msg *m, u32 user, u32 type,
+		   u32 hsize, u32 destnode);
+struct sk_buff *tipc_msg_create(struct net *net, uint user, uint type,
+				uint hdr_sz, uint data_sz, u32 dnode,
+				u32 onode, u32 dport, u32 oport, int errcode);
 int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf);
 bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu);
-bool tipc_msg_make_bundle(struct sk_buff_head *list, struct sk_buff *skb,
-			  u32 mtu, u32 dnode);
-int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset,
-		   int dsz, int mtu, struct sk_buff_head *list);
+bool tipc_msg_make_bundle(struct net *net, struct sk_buff_head *list,
+			  struct sk_buff *skb, u32 mtu, u32 dnode);
+int tipc_msg_build(struct net *net, struct tipc_msg *mhdr, struct msghdr *m,
+		   int offset, int dsz, int mtu, struct sk_buff_head *list);
 struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list);
 
 #endif
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index ba421321d15d..7f31cd4badc4 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -68,14 +68,16 @@ static void publ_to_item(struct distr_item *i, struct publication *p)
 /**
  * named_prepare_buf - allocate & initialize a publication message
  */
-static struct sk_buff *named_prepare_buf(u32 type, u32 size, u32 dest)
+static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size,
+					 u32 dest)
 {
 	struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE + size);
 	struct tipc_msg *msg;
 
 	if (buf != NULL) {
 		msg = buf_msg(buf);
-		tipc_msg_init(msg, NAME_DISTRIBUTOR, type, INT_H_SIZE, dest);
+		tipc_msg_init(net, msg, NAME_DISTRIBUTOR, type, INT_H_SIZE,
+			      dest);
 		msg_set_size(msg, INT_H_SIZE + size);
 	}
 	return buf;
@@ -91,7 +93,7 @@ void named_cluster_distribute(struct net *net, struct sk_buff *skb)
 	rcu_read_lock();
 	list_for_each_entry_rcu(node, &tn->node_list, list) {
 		dnode = node->addr;
-		if (in_own_node(dnode))
+		if (in_own_node(net, dnode))
 			continue;
 		if (!tipc_node_active_links(node))
 			continue;
@@ -121,7 +123,7 @@ struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ)
 	if (publ->scope == TIPC_NODE_SCOPE)
 		return NULL;
 
-	buf = named_prepare_buf(PUBLICATION, ITEM_SIZE, 0);
+	buf = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0);
 	if (!buf) {
 		pr_warn("Publication distribution failure\n");
 		return NULL;
@@ -135,7 +137,7 @@ struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ)
 /**
  * tipc_named_withdraw - tell other nodes about a withdrawn publication by this node
  */
-struct sk_buff *tipc_named_withdraw(struct publication *publ)
+struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ)
 {
 	struct sk_buff *buf;
 	struct distr_item *item;
@@ -145,7 +147,7 @@ struct sk_buff *tipc_named_withdraw(struct publication *publ)
 	if (publ->scope == TIPC_NODE_SCOPE)
 		return NULL;
 
-	buf = named_prepare_buf(WITHDRAWAL, ITEM_SIZE, 0);
+	buf = named_prepare_buf(net, WITHDRAWAL, ITEM_SIZE, 0);
 	if (!buf) {
 		pr_warn("Withdrawal distribution failure\n");
 		return NULL;
@@ -175,7 +177,8 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
 	list_for_each_entry(publ, pls, local_list) {
 		/* Prepare next buffer: */
 		if (!skb) {
-			skb = named_prepare_buf(PUBLICATION, msg_rem, dnode);
+			skb = named_prepare_buf(net, PUBLICATION, msg_rem,
+						dnode);
 			if (!skb) {
 				pr_warn("Bulk publication failure\n");
 				return;
@@ -227,7 +230,7 @@ static void tipc_publ_subscribe(struct net *net, struct publication *publ,
 {
 	struct tipc_node *node;
 
-	if (in_own_node(addr))
+	if (in_own_node(net, addr))
 		return;
 
 	node = tipc_node_find(net, addr);
@@ -416,7 +419,7 @@ void tipc_named_reinit(struct net *net)
 	for (scope = TIPC_ZONE_SCOPE; scope <= TIPC_NODE_SCOPE; scope++)
 		list_for_each_entry_rcu(publ, &tn->nametbl->publ_list[scope],
 					local_list)
-			publ->node = tipc_own_addr;
+			publ->node = tn->own_addr;
 
 	spin_unlock_bh(&tn->nametbl_lock);
 }
diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h
index 1ed2d7e48290..5ec10b59527b 100644
--- a/net/tipc/name_distr.h
+++ b/net/tipc/name_distr.h
@@ -68,7 +68,7 @@ struct distr_item {
 };
 
 struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ);
-struct sk_buff *tipc_named_withdraw(struct publication *publ);
+struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ);
 void named_cluster_distribute(struct net *net, struct sk_buff *buf);
 void tipc_named_node_up(struct net *net, u32 dnode);
 void tipc_named_rcv(struct net *net, struct sk_buff *buf);
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 57e39c16a8c3..ce09b863528c 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -227,9 +227,11 @@ static u32 nameseq_locate_subseq(struct name_seq *nseq, u32 instance)
 /**
  * tipc_nameseq_insert_publ
  */
-static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
-						    u32 type, u32 lower, u32 upper,
-						    u32 scope, u32 node, u32 port, u32 key)
+static struct publication *tipc_nameseq_insert_publ(struct net *net,
+						    struct name_seq *nseq,
+						    u32 type, u32 lower,
+						    u32 upper, u32 scope,
+						    u32 node, u32 port, u32 key)
 {
 	struct tipc_subscription *s;
 	struct tipc_subscription *st;
@@ -314,12 +316,12 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
 	list_add(&publ->zone_list, &info->zone_list);
 	info->zone_list_size++;
 
-	if (in_own_cluster(node)) {
+	if (in_own_cluster(net, node)) {
 		list_add(&publ->cluster_list, &info->cluster_list);
 		info->cluster_list_size++;
 	}
 
-	if (in_own_node(node)) {
+	if (in_own_node(net, node)) {
 		list_add(&publ->node_list, &info->node_list);
 		info->node_list_size++;
 	}
@@ -348,8 +350,10 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
  * A failed withdraw request simply returns a failure indication and lets the
  * caller issue any error or warning messages associated with such a problem.
  */
-static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 inst,
-						    u32 node, u32 ref, u32 key)
+static struct publication *tipc_nameseq_remove_publ(struct net *net,
+						    struct name_seq *nseq,
+						    u32 inst, u32 node,
+						    u32 ref, u32 key)
 {
 	struct publication *publ;
 	struct sub_seq *sseq = nameseq_find_subseq(nseq, inst);
@@ -377,13 +381,13 @@ found:
 	info->zone_list_size--;
 
 	/* Remove publication from cluster scope list, if present */
-	if (in_own_cluster(node)) {
+	if (in_own_cluster(net, node)) {
 		list_del(&publ->cluster_list);
 		info->cluster_list_size--;
 	}
 
 	/* Remove publication from node scope list, if present */
-	if (in_own_node(node)) {
+	if (in_own_node(net, node)) {
 		list_del(&publ->node_list);
 		info->node_list_size--;
 	}
@@ -483,7 +487,7 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
 		return NULL;
 
 	spin_lock_bh(&seq->lock);
-	publ = tipc_nameseq_insert_publ(seq, type, lower, upper,
+	publ = tipc_nameseq_insert_publ(net, seq, type, lower, upper,
 					scope, node, port, key);
 	spin_unlock_bh(&seq->lock);
 	return publ;
@@ -500,7 +504,7 @@ struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
 		return NULL;
 
 	spin_lock_bh(&seq->lock);
-	publ = tipc_nameseq_remove_publ(seq, lower, node, ref, key);
+	publ = tipc_nameseq_remove_publ(net, seq, lower, node, ref, key);
 	if (!seq->first_free && list_empty(&seq->subscriptions)) {
 		hlist_del_init_rcu(&seq->ns_list);
 		kfree(seq->sseqs);
@@ -528,6 +532,7 @@ struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
 u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance,
 			   u32 *destnode)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct sub_seq *sseq;
 	struct name_info *info;
 	struct publication *publ;
@@ -535,7 +540,7 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance,
 	u32 ref = 0;
 	u32 node = 0;
 
-	if (!tipc_in_scope(*destnode, tipc_own_addr))
+	if (!tipc_in_scope(*destnode, tn->own_addr))
 		return 0;
 
 	rcu_read_lock();
@@ -572,13 +577,13 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance,
 	}
 
 	/* Round-Robin Algorithm */
-	else if (*destnode == tipc_own_addr) {
+	else if (*destnode == tn->own_addr) {
 		if (list_empty(&info->node_list))
 			goto no_match;
 		publ = list_first_entry(&info->node_list, struct publication,
 					node_list);
 		list_move_tail(&publ->node_list, &info->node_list);
-	} else if (in_own_cluster_exact(*destnode)) {
+	} else if (in_own_cluster_exact(net, *destnode)) {
 		if (list_empty(&info->cluster_list))
 			goto no_match;
 		publ = list_first_entry(&info->cluster_list, struct publication,
@@ -670,7 +675,7 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
 	}
 
 	publ = tipc_nametbl_insert_publ(net, type, lower, upper, scope,
-					tipc_own_addr, port_ref, key);
+					tn->own_addr, port_ref, key);
 	if (likely(publ)) {
 		tn->nametbl->local_publ_count++;
 		buf = tipc_named_publish(net, publ);
@@ -695,11 +700,11 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
 
 	spin_lock_bh(&tn->nametbl_lock);
-	publ = tipc_nametbl_remove_publ(net, type, lower, tipc_own_addr,
+	publ = tipc_nametbl_remove_publ(net, type, lower, tn->own_addr,
 					ref, key);
 	if (likely(publ)) {
 		tn->nametbl->local_publ_count--;
-		skb = tipc_named_withdraw(publ);
+		skb = tipc_named_withdraw(net, publ);
 		/* Any pending external events? */
 		tipc_named_process_backlog(net);
 		list_del_init(&publ->pport_list);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 04445d210e45..263267e0e7fe 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -115,30 +115,32 @@ int tipc_net_start(struct net *net, u32 addr)
 	char addr_string[16];
 	int res;
 
-	tipc_own_addr = addr;
+	tn->own_addr = addr;
 	tipc_named_reinit(net);
 	tipc_sk_reinit(net);
 	res = tipc_bclink_init(net);
 	if (res)
 		return res;
 
-	tipc_nametbl_publish(net, TIPC_CFG_SRV, tipc_own_addr, tipc_own_addr,
-			     TIPC_ZONE_SCOPE, 0, tipc_own_addr);
+	tipc_nametbl_publish(net, TIPC_CFG_SRV, tn->own_addr, tn->own_addr,
+			     TIPC_ZONE_SCOPE, 0, tn->own_addr);
 
 	pr_info("Started in network mode\n");
 	pr_info("Own node address %s, network identity %u\n",
-		tipc_addr_string_fill(addr_string, tipc_own_addr),
+		tipc_addr_string_fill(addr_string, tn->own_addr),
 		tn->net_id);
 	return 0;
 }
 
 void tipc_net_stop(struct net *net)
 {
-	if (!tipc_own_addr)
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
+
+	if (!tn->own_addr)
 		return;
 
-	tipc_nametbl_withdraw(net, TIPC_CFG_SRV, tipc_own_addr, 0,
-			      tipc_own_addr);
+	tipc_nametbl_withdraw(net, TIPC_CFG_SRV, tn->own_addr, 0,
+			      tn->own_addr);
 	rtnl_lock();
 	tipc_bearer_stop(net);
 	tipc_bclink_stop(net);
@@ -224,7 +226,7 @@ int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
 		u32 val;
 
 		/* Can't change net id once TIPC has joined a network */
-		if (tipc_own_addr)
+		if (tn->own_addr)
 			return -EPERM;
 
 		val = nla_get_u32(attrs[TIPC_NLA_NET_ID]);
@@ -238,7 +240,7 @@ int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
 		u32 addr;
 
 		/* Can't change net addr once TIPC has joined a network */
-		if (tipc_own_addr)
+		if (tn->own_addr)
 			return -EPERM;
 
 		addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 3db501260de1..b1eb0927bac8 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -75,7 +75,7 @@ struct tipc_node *tipc_node_find(struct net *net, u32 addr)
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_node *node;
 
-	if (unlikely(!in_own_cluster_exact(addr)))
+	if (unlikely(!in_own_cluster_exact(net, addr)))
 		return NULL;
 
 	rcu_read_lock();
@@ -155,7 +155,7 @@ int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port)
 	struct tipc_node *node;
 	struct tipc_sock_conn *conn;
 
-	if (in_own_node(dnode))
+	if (in_own_node(net, dnode))
 		return 0;
 
 	node = tipc_node_find(net, dnode);
@@ -181,7 +181,7 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port)
 	struct tipc_node *node;
 	struct tipc_sock_conn *conn, *safe;
 
-	if (in_own_node(dnode))
+	if (in_own_node(net, dnode))
 		return;
 
 	node = tipc_node_find(net, dnode);
@@ -200,14 +200,16 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port)
 
 void tipc_node_abort_sock_conns(struct net *net, struct list_head *conns)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_sock_conn *conn, *safe;
 	struct sk_buff *buf;
 
 	list_for_each_entry_safe(conn, safe, conns, list) {
-		buf = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG,
-				      SHORT_H_SIZE, 0, tipc_own_addr,
-				      conn->peer_node, conn->port,
-				      conn->peer_port, TIPC_ERR_NO_NODE);
+		buf = tipc_msg_create(net, TIPC_CRITICAL_IMPORTANCE,
+				      TIPC_CONN_MSG, SHORT_H_SIZE, 0,
+				      tn->own_addr, conn->peer_node,
+				      conn->port, conn->peer_port,
+				      TIPC_ERR_NO_NODE);
 		if (likely(buf))
 			tipc_sk_rcv(net, buf);
 		list_del(&conn->list);
@@ -287,6 +289,7 @@ static void node_select_active_links(struct tipc_node *n_ptr)
  */
 void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
 {
+	struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id);
 	struct tipc_link **active;
 
 	n_ptr->working_links--;
@@ -321,7 +324,7 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
 	}
 
 	/* Loopback link went down? No fragmentation needed from now on. */
-	if (n_ptr->addr == tipc_own_addr) {
+	if (n_ptr->addr == tn->own_addr) {
 		n_ptr->act_mtus[0] = MAX_MSG_SIZE;
 		n_ptr->act_mtus[1] = MAX_MSG_SIZE;
 	}
@@ -483,7 +486,7 @@ struct sk_buff *tipc_node_get_links(struct net *net, const void *req_tlv_area,
 		return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
 						   " (network address)");
 
-	if (!tipc_own_addr)
+	if (!tn->own_addr)
 		return tipc_cfg_reply_none();
 
 	spin_lock_bh(&tn->node_list_lock);
@@ -501,7 +504,7 @@ struct sk_buff *tipc_node_get_links(struct net *net, const void *req_tlv_area,
 		return NULL;
 
 	/* Add TLV for broadcast link */
-	link_info.dest = htonl(tipc_cluster_mask(tipc_own_addr));
+	link_info.dest = htonl(tipc_cluster_mask(tn->own_addr));
 	link_info.up = htonl(1);
 	strlcpy(link_info.str, tipc_bclink_name, TIPC_MAX_LINK_NAME);
 	tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info));
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 4670e1e46c89..9b8470edc783 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -251,10 +251,11 @@ static void tsk_rej_rx_queue(struct sock *sk)
 {
 	struct sk_buff *skb;
 	u32 dnode;
+	struct net *net = sock_net(sk);
 
 	while ((skb = __skb_dequeue(&sk->sk_receive_queue))) {
-		if (tipc_msg_reverse(skb, &dnode, TIPC_ERR_NO_PORT))
-			tipc_link_xmit_skb(sock_net(sk), skb, dnode, 0);
+		if (tipc_msg_reverse(net, skb, &dnode, TIPC_ERR_NO_PORT))
+			tipc_link_xmit_skb(net, skb, dnode, 0);
 	}
 }
 
@@ -265,6 +266,7 @@ static void tsk_rej_rx_queue(struct sock *sk)
  */
 static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg)
 {
+	struct tipc_net *tn = net_generic(sock_net(&tsk->sk), tipc_net_id);
 	u32 peer_port = tsk_peer_port(tsk);
 	u32 orig_node;
 	u32 peer_node;
@@ -281,10 +283,10 @@ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg)
 	if (likely(orig_node == peer_node))
 		return true;
 
-	if (!orig_node && (peer_node == tipc_own_addr))
+	if (!orig_node && (peer_node == tn->own_addr))
 		return true;
 
-	if (!peer_node && (orig_node == tipc_own_addr))
+	if (!peer_node && (orig_node == tn->own_addr))
 		return true;
 
 	return false;
@@ -346,7 +348,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
 	tsk->max_pkt = MAX_PKT_DEFAULT;
 	INIT_LIST_HEAD(&tsk->publications);
 	msg = &tsk->phdr;
-	tipc_msg_init(msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG,
+	tipc_msg_init(net, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG,
 		      NAMED_H_SIZE, 0);
 
 	/* Finish initializing socket data structures */
@@ -471,6 +473,7 @@ static int tipc_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
 	struct net *net = sock_net(sk);
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_sock *tsk;
 	struct sk_buff *skb;
 	u32 dnode, probing_state;
@@ -503,7 +506,8 @@ static int tipc_release(struct socket *sock)
 				tsk->connected = 0;
 				tipc_node_remove_conn(net, dnode, tsk->portid);
 			}
-			if (tipc_msg_reverse(skb, &dnode, TIPC_ERR_NO_PORT))
+			if (tipc_msg_reverse(net, skb, &dnode,
+					     TIPC_ERR_NO_PORT))
 				tipc_link_xmit_skb(net, skb, dnode, 0);
 		}
 	}
@@ -514,9 +518,9 @@ static int tipc_release(struct socket *sock)
 		sock_put(sk);
 	tipc_sk_remove(tsk);
 	if (tsk->connected) {
-		skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG,
-				      SHORT_H_SIZE, 0, dnode, tipc_own_addr,
-				      tsk_peer_port(tsk),
+		skb = tipc_msg_create(net, TIPC_CRITICAL_IMPORTANCE,
+				      TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode,
+				      tn->own_addr, tsk_peer_port(tsk),
 				      tsk->portid, TIPC_ERR_NO_PORT);
 		if (skb)
 			tipc_link_xmit_skb(net, skb, dnode, tsk->portid);
@@ -614,6 +618,7 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
 {
 	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
 	struct tipc_sock *tsk = tipc_sk(sock->sk);
+	struct tipc_net *tn = net_generic(sock_net(sock->sk), tipc_net_id);
 
 	memset(addr, 0, sizeof(*addr));
 	if (peer) {
@@ -624,7 +629,7 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
 		addr->addr.id.node = tsk_peer_node(tsk);
 	} else {
 		addr->addr.id.ref = tsk->portid;
-		addr->addr.id.node = tipc_own_addr;
+		addr->addr.id.node = tn->own_addr;
 	}
 
 	*uaddr_len = sizeof(*addr);
@@ -741,7 +746,7 @@ static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
 new_mtu:
 	mtu = tipc_bclink_get_mtu();
 	__skb_queue_head_init(&head);
-	rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, &head);
+	rc = tipc_msg_build(net, mhdr, msg, 0, dsz, mtu, &head);
 	if (unlikely(rc < 0))
 		return rc;
 
@@ -774,7 +779,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff *buf)
 	uint i, last, dst = 0;
 	u32 scope = TIPC_CLUSTER_SCOPE;
 
-	if (in_own_node(msg_orignode(msg)))
+	if (in_own_node(net, msg_orignode(msg)))
 		scope = TIPC_NODE_SCOPE;
 
 	/* Create destination port list: */
@@ -826,7 +831,7 @@ static int tipc_sk_proto_rcv(struct tipc_sock *tsk, u32 *dnode,
 		if (conn_cong)
 			tsk->sk.sk_write_space(&tsk->sk);
 	} else if (msg_type(msg) == CONN_PROBE) {
-		if (!tipc_msg_reverse(buf, dnode, TIPC_OK))
+		if (!tipc_msg_reverse(sock_net(&tsk->sk), buf, dnode, TIPC_OK))
 			return TIPC_OK;
 		msg_set_type(msg, CONN_PROBE_REPLY);
 		return TIPC_FWD_MSG;
@@ -959,7 +964,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock,
 new_mtu:
 	mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
 	__skb_queue_head_init(&head);
-	rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, &head);
+	rc = tipc_msg_build(net, mhdr, m, 0, dsz, mtu, &head);
 	if (rc < 0)
 		goto exit;
 
@@ -1074,7 +1079,7 @@ next:
 	mtu = tsk->max_pkt;
 	send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE);
 	__skb_queue_head_init(&head);
-	rc = tipc_msg_build(mhdr, m, sent, send, mtu, &head);
+	rc = tipc_msg_build(net, mhdr, m, sent, send, mtu, &head);
 	if (unlikely(rc < 0))
 		goto exit;
 	do {
@@ -1246,6 +1251,7 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
 static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack)
 {
 	struct net *net = sock_net(&tsk->sk);
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct sk_buff *skb = NULL;
 	struct tipc_msg *msg;
 	u32 peer_port = tsk_peer_port(tsk);
@@ -1253,8 +1259,9 @@ static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack)
 
 	if (!tsk->connected)
 		return;
-	skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0, dnode,
-			      tipc_own_addr, peer_port, tsk->portid, TIPC_OK);
+	skb = tipc_msg_create(net, CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0,
+			      dnode, tn->own_addr, peer_port, tsk->portid,
+			      TIPC_OK);
 	if (!skb)
 		return;
 	msg = buf_msg(skb);
@@ -1726,6 +1733,7 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 	int rc;
 	u32 onode;
 	struct tipc_sock *tsk = tipc_sk(sk);
+	struct net *net = sock_net(sk);
 	uint truesize = skb->truesize;
 
 	rc = filter_rcv(sk, skb);
@@ -1736,10 +1744,10 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 		return 0;
 	}
 
-	if ((rc < 0) && !tipc_msg_reverse(skb, &onode, -rc))
+	if ((rc < 0) && !tipc_msg_reverse(net, skb, &onode, -rc))
 		return 0;
 
-	tipc_link_xmit_skb(sock_net(sk), skb, onode, 0);
+	tipc_link_xmit_skb(net, skb, onode, 0);
 
 	return 0;
 }
@@ -1784,7 +1792,7 @@ int tipc_sk_rcv(struct net *net, struct sk_buff *skb)
 	if (likely(!rc))
 		return 0;
 exit:
-	if ((rc < 0) && !tipc_msg_reverse(skb, &dnode, -rc))
+	if ((rc < 0) && !tipc_msg_reverse(net, skb, &dnode, -rc))
 		return -EHOSTUNREACH;
 
 	tipc_link_xmit_skb(net, skb, dnode, 0);
@@ -2045,6 +2053,7 @@ static int tipc_shutdown(struct socket *sock, int how)
 {
 	struct sock *sk = sock->sk;
 	struct net *net = sock_net(sk);
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_sock *tsk = tipc_sk(sk);
 	struct sk_buff *skb;
 	u32 dnode;
@@ -2067,15 +2076,16 @@ restart:
 				kfree_skb(skb);
 				goto restart;
 			}
-			if (tipc_msg_reverse(skb, &dnode, TIPC_CONN_SHUTDOWN))
+			if (tipc_msg_reverse(net, skb, &dnode,
+					     TIPC_CONN_SHUTDOWN))
 				tipc_link_xmit_skb(net, skb, dnode,
 						   tsk->portid);
 			tipc_node_remove_conn(net, dnode, tsk->portid);
 		} else {
 			dnode = tsk_peer_node(tsk);
-			skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
+			skb = tipc_msg_create(net, TIPC_CRITICAL_IMPORTANCE,
 					      TIPC_CONN_MSG, SHORT_H_SIZE,
-					      0, dnode, tipc_own_addr,
+					      0, dnode, tn->own_addr,
 					      tsk_peer_port(tsk),
 					      tsk->portid, TIPC_CONN_SHUTDOWN);
 			tipc_link_xmit_skb(net, skb, dnode, tsk->portid);
@@ -2107,6 +2117,8 @@ static void tipc_sk_timeout(unsigned long data)
 {
 	struct tipc_sock *tsk = (struct tipc_sock *)data;
 	struct sock *sk = &tsk->sk;
+	struct net *net = sock_net(sk);
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct sk_buff *skb = NULL;
 	u32 peer_port, peer_node;
 
@@ -2120,13 +2132,13 @@ static void tipc_sk_timeout(unsigned long data)
 
 	if (tsk->probing_state == TIPC_CONN_PROBING) {
 		/* Previous probe not answered -> self abort */
-		skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG,
-				      SHORT_H_SIZE, 0, tipc_own_addr,
-				      peer_node, tsk->portid, peer_port,
-				      TIPC_ERR_NO_PORT);
+		skb = tipc_msg_create(net, TIPC_CRITICAL_IMPORTANCE,
+				      TIPC_CONN_MSG, SHORT_H_SIZE, 0,
+				      tn->own_addr, peer_node, tsk->portid,
+				      peer_port, TIPC_ERR_NO_PORT);
 	} else {
-		skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE,
-				      0, peer_node, tipc_own_addr,
+		skb = tipc_msg_create(net, CONN_MANAGER, CONN_PROBE, INT_H_SIZE,
+				      0, peer_node, tn->own_addr,
 				      peer_port, tsk->portid, TIPC_OK);
 		tsk->probing_state = TIPC_CONN_PROBING;
 		if (!mod_timer(&tsk->timer, jiffies + tsk->probing_intv))
@@ -2198,14 +2210,16 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
 static int tipc_sk_show(struct tipc_sock *tsk, char *buf,
 			int len, int full_id)
 {
+	struct net *net = sock_net(&tsk->sk);
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct publication *publ;
 	int ret;
 
 	if (full_id)
 		ret = tipc_snprintf(buf, len, "<%u.%u.%u:%u>:",
-				    tipc_zone(tipc_own_addr),
-				    tipc_cluster(tipc_own_addr),
-				    tipc_node(tipc_own_addr), tsk->portid);
+				    tipc_zone(tn->own_addr),
+				    tipc_cluster(tn->own_addr),
+				    tipc_node(tn->own_addr), tsk->portid);
 	else
 		ret = tipc_snprintf(buf, len, "%-10u:", tsk->portid);
 
@@ -2296,8 +2310,8 @@ void tipc_sk_reinit(struct net *net)
 		rht_for_each_entry_rcu(tsk, pos, tbl, i, node) {
 			spin_lock_bh(&tsk->sk.sk_lock.slock);
 			msg = &tsk->phdr;
-			msg_set_prevnode(msg, tipc_own_addr);
-			msg_set_orignode(msg, tipc_own_addr);
+			msg_set_prevnode(msg, tn->own_addr);
+			msg_set_orignode(msg, tn->own_addr);
 			spin_unlock_bh(&tsk->sk.sk_lock.slock);
 		}
 	}
@@ -2691,6 +2705,8 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb,
 	int err;
 	void *hdr;
 	struct nlattr *attrs;
+	struct net *net = sock_net(skb->sk);
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 
 	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 			  &tipc_genl_v2_family, NLM_F_MULTI, TIPC_NL_SOCK_GET);
@@ -2702,7 +2718,7 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb,
 		goto genlmsg_cancel;
 	if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid))
 		goto attr_msg_cancel;
-	if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tipc_own_addr))
+	if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tn->own_addr))
 		goto attr_msg_cancel;
 
 	if (tsk->connected) {
-- 
cgit v1.2.3


From a62fbccecd62bacb4416fc427239f5b43b25d05e Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Fri, 9 Jan 2015 15:27:11 +0800
Subject: tipc: make subscriber server support net namespace

TIPC establishes one subscriber server which allows users to subscribe
their interesting name service status. After tipc supports namespace,
one dedicated tipc stack instance is created for each namespace, and
each instance can be deemed as one independent TIPC node. As a result,
subscriber server must be built for each namespace.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Tested-by: Tero Aho <Tero.Aho@coriant.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/core.c   |  24 ++++++-------
 net/tipc/core.h   |   4 +++
 net/tipc/server.c |   2 +-
 net/tipc/server.h |   4 ++-
 net/tipc/socket.c |   4 +--
 net/tipc/socket.h |   2 +-
 net/tipc/subscr.c | 104 ++++++++++++++++++++++++++++++++----------------------
 net/tipc/subscr.h |   7 ++--
 8 files changed, 86 insertions(+), 65 deletions(-)

(limited to 'net')

diff --git a/net/tipc/core.c b/net/tipc/core.c
index 7c09670120eb..4a8b7955e0e0 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -68,8 +68,14 @@ static int __net_init tipc_init_net(struct net *net)
 	err = tipc_nametbl_init(net);
 	if (err)
 		goto out_nametbl;
+
+	err = tipc_subscr_start(net);
+	if (err)
+		goto out_subscr;
 	return 0;
 
+out_subscr:
+	tipc_nametbl_stop(net);
 out_nametbl:
 	tipc_sk_rht_destroy(net);
 out_sk_rht:
@@ -78,6 +84,7 @@ out_sk_rht:
 
 static void __net_exit tipc_exit_net(struct net *net)
 {
+	tipc_subscr_stop(net);
 	tipc_net_stop(net);
 	tipc_nametbl_stop(net);
 	tipc_sk_rht_destroy(net);
@@ -104,10 +111,6 @@ static int __init tipc_init(void)
 
 	get_random_bytes(&tipc_random, sizeof(tipc_random));
 
-	err = register_pernet_subsys(&tipc_net_ops);
-	if (err)
-		goto out_pernet;
-
 	err = tipc_netlink_start();
 	if (err)
 		goto out_netlink;
@@ -120,9 +123,9 @@ static int __init tipc_init(void)
 	if (err)
 		goto out_sysctl;
 
-	err = tipc_subscr_start();
+	err = register_pernet_subsys(&tipc_net_ops);
 	if (err)
-		goto out_subscr;
+		goto out_pernet;
 
 	err = tipc_bearer_setup();
 	if (err)
@@ -131,28 +134,25 @@ static int __init tipc_init(void)
 	pr_info("Started in single node mode\n");
 	return 0;
 out_bearer:
-	tipc_subscr_stop();
-out_subscr:
+	unregister_pernet_subsys(&tipc_net_ops);
+out_pernet:
 	tipc_unregister_sysctl();
 out_sysctl:
 	tipc_socket_stop();
 out_socket:
 	tipc_netlink_stop();
 out_netlink:
-	unregister_pernet_subsys(&tipc_net_ops);
-out_pernet:
 	pr_err("Unable to start in single node mode\n");
 	return err;
 }
 
 static void __exit tipc_exit(void)
 {
-	unregister_pernet_subsys(&tipc_net_ops);
 	tipc_bearer_cleanup();
 	tipc_netlink_stop();
-	tipc_subscr_stop();
 	tipc_socket_stop();
 	tipc_unregister_sysctl();
+	unregister_pernet_subsys(&tipc_net_ops);
 
 	pr_info("Deactivated\n");
 }
diff --git a/net/tipc/core.h b/net/tipc/core.h
index afabf39e801c..639f562dddf3 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -106,6 +106,10 @@ struct tipc_net {
 	/* Name table */
 	spinlock_t nametbl_lock;
 	struct name_table *nametbl;
+
+	/* Topology subscription server */
+	struct tipc_server *topsrv;
+	atomic_t subscription_count;
 };
 
 #ifdef CONFIG_SYSCTL
diff --git a/net/tipc/server.c b/net/tipc/server.c
index b5bdaf721d70..eadd4ed45905 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -309,7 +309,7 @@ static struct socket *tipc_create_listen_sock(struct tipc_conn *con)
 	struct socket *sock = NULL;
 	int ret;
 
-	ret = tipc_sock_create_local(s->type, &sock);
+	ret = tipc_sock_create_local(s->net, s->type, &sock);
 	if (ret < 0)
 		return NULL;
 	ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE,
diff --git a/net/tipc/server.h b/net/tipc/server.h
index 9c979a01997c..9015faedb1b0 100644
--- a/net/tipc/server.h
+++ b/net/tipc/server.h
@@ -47,6 +47,7 @@
  * @conn_idr: identifier set of connection
  * @idr_lock: protect the connection identifier set
  * @idr_in_use: amount of allocated identifier entry
+ * @net: network namspace instance
  * @rcvbuf_cache: memory cache of server receive buffer
  * @rcv_wq: receive workqueue
  * @send_wq: send workqueue
@@ -63,6 +64,7 @@ struct tipc_server {
 	struct idr conn_idr;
 	spinlock_t idr_lock;
 	int idr_in_use;
+	struct net *net;
 	struct kmem_cache *rcvbuf_cache;
 	struct workqueue_struct *rcv_wq;
 	struct workqueue_struct *send_wq;
@@ -73,7 +75,7 @@ struct tipc_server {
 				  struct sockaddr_tipc *addr, void *usr_data,
 				  void *buf, size_t len);
 	struct sockaddr_tipc *saddr;
-	const char name[TIPC_SERVER_NAME_LEN];
+	char name[TIPC_SERVER_NAME_LEN];
 	int imp;
 	int type;
 };
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 9b8470edc783..2cec496ba691 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -388,7 +388,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
  *
  * Returns 0 on success, errno otherwise
  */
-int tipc_sock_create_local(int type, struct socket **res)
+int tipc_sock_create_local(struct net *net, int type, struct socket **res)
 {
 	int rc;
 
@@ -397,7 +397,7 @@ int tipc_sock_create_local(int type, struct socket **res)
 		pr_err("Failed to create kernel socket\n");
 		return rc;
 	}
-	tipc_sk_create(&init_net, *res, 0, 1);
+	tipc_sk_create(net, *res, 0, 1);
 
 	return 0;
 }
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index c15c4e121fe3..f56c3fded51f 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h
@@ -45,7 +45,7 @@
 
 int tipc_socket_init(void);
 void tipc_socket_stop(void);
-int tipc_sock_create_local(int type, struct socket **res);
+int tipc_sock_create_local(struct net *net, int type, struct socket **res);
 void tipc_sock_release_local(struct socket *sock);
 int tipc_sock_accept_local(struct socket *sock, struct socket **newsock,
 			   int flags);
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index b71dbc0ae8f9..72c339e432aa 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -50,34 +50,6 @@ struct tipc_subscriber {
 	struct list_head subscription_list;
 };
 
-static void subscr_conn_msg_event(struct net *net, int conid,
-				  struct sockaddr_tipc *addr, void *usr_data,
-				  void *buf, size_t len);
-static void *subscr_named_msg_event(int conid);
-static void subscr_conn_shutdown_event(int conid, void *usr_data);
-
-static atomic_t subscription_count = ATOMIC_INIT(0);
-
-static struct sockaddr_tipc topsrv_addr __read_mostly = {
-	.family			= AF_TIPC,
-	.addrtype		= TIPC_ADDR_NAMESEQ,
-	.addr.nameseq.type	= TIPC_TOP_SRV,
-	.addr.nameseq.lower	= TIPC_TOP_SRV,
-	.addr.nameseq.upper	= TIPC_TOP_SRV,
-	.scope			= TIPC_NODE_SCOPE
-};
-
-static struct tipc_server topsrv __read_mostly = {
-	.saddr			= &topsrv_addr,
-	.imp			= TIPC_CRITICAL_IMPORTANCE,
-	.type			= SOCK_SEQPACKET,
-	.max_rcvbuf_size	= sizeof(struct tipc_subscr),
-	.name			= "topology_server",
-	.tipc_conn_recvmsg	= subscr_conn_msg_event,
-	.tipc_conn_new		= subscr_named_msg_event,
-	.tipc_conn_shutdown	= subscr_conn_shutdown_event,
-};
-
 /**
  * htohl - convert value to endianness used by destination
  * @in: value to convert
@@ -94,6 +66,7 @@ static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower,
 			      u32 found_upper, u32 event, u32 port_ref,
 			      u32 node)
 {
+	struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
 	struct tipc_subscriber *subscriber = sub->subscriber;
 	struct kvec msg_sect;
 
@@ -104,8 +77,8 @@ static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower,
 	sub->evt.found_upper = htohl(found_upper, sub->swap);
 	sub->evt.port.ref = htohl(port_ref, sub->swap);
 	sub->evt.port.node = htohl(node, sub->swap);
-	tipc_conn_sendmsg(&topsrv, subscriber->conid, NULL, msg_sect.iov_base,
-			  msg_sect.iov_len);
+	tipc_conn_sendmsg(tn->topsrv, subscriber->conid, NULL,
+			  msg_sect.iov_base, msg_sect.iov_len);
 }
 
 /**
@@ -146,6 +119,7 @@ static void subscr_timeout(unsigned long data)
 {
 	struct tipc_subscription *sub = (struct tipc_subscription *)data;
 	struct tipc_subscriber *subscriber = sub->subscriber;
+	struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
 
 	/* The spin lock per subscriber is used to protect its members */
 	spin_lock_bh(&subscriber->lock);
@@ -170,7 +144,7 @@ static void subscr_timeout(unsigned long data)
 
 	/* Now destroy subscription */
 	kfree(sub);
-	atomic_dec(&subscription_count);
+	atomic_dec(&tn->subscription_count);
 }
 
 /**
@@ -180,10 +154,12 @@ static void subscr_timeout(unsigned long data)
  */
 static void subscr_del(struct tipc_subscription *sub)
 {
+	struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
+
 	tipc_nametbl_unsubscribe(sub);
 	list_del(&sub->subscription_list);
 	kfree(sub);
-	atomic_dec(&subscription_count);
+	atomic_dec(&tn->subscription_count);
 }
 
 /**
@@ -191,9 +167,12 @@ static void subscr_del(struct tipc_subscription *sub)
  *
  * Note: Must call it in process context since it might sleep.
  */
-static void subscr_terminate(struct tipc_subscriber *subscriber)
+static void subscr_terminate(struct tipc_subscription *sub)
 {
-	tipc_conn_terminate(&topsrv, subscriber->conid);
+	struct tipc_subscriber *subscriber = sub->subscriber;
+	struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
+
+	tipc_conn_terminate(tn->topsrv, subscriber->conid);
 }
 
 static void subscr_release(struct tipc_subscriber *subscriber)
@@ -263,7 +242,9 @@ static void subscr_cancel(struct tipc_subscr *s,
  */
 static int subscr_subscribe(struct net *net, struct tipc_subscr *s,
 			    struct tipc_subscriber *subscriber,
-			    struct tipc_subscription **sub_p) {
+			    struct tipc_subscription **sub_p)
+{
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_subscription *sub;
 	int swap;
 
@@ -278,7 +259,7 @@ static int subscr_subscribe(struct net *net, struct tipc_subscr *s,
 	}
 
 	/* Refuse subscription if global limit exceeded */
-	if (atomic_read(&subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) {
+	if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) {
 		pr_warn("Subscription rejected, limit reached (%u)\n",
 			TIPC_MAX_SUBSCRIPTIONS);
 		return -EINVAL;
@@ -309,7 +290,7 @@ static int subscr_subscribe(struct net *net, struct tipc_subscr *s,
 	sub->subscriber = subscriber;
 	sub->swap = swap;
 	memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr));
-	atomic_inc(&subscription_count);
+	atomic_inc(&tn->subscription_count);
 	if (sub->timeout != TIPC_WAIT_FOREVER) {
 		setup_timer(&sub->timer, subscr_timeout, (unsigned long)sub);
 		mod_timer(&sub->timer, jiffies + sub->timeout);
@@ -336,7 +317,7 @@ static void subscr_conn_msg_event(struct net *net, int conid,
 	if (subscr_subscribe(net, (struct tipc_subscr *)buf, subscriber,
 			     &sub) < 0) {
 		spin_unlock_bh(&subscriber->lock);
-		subscr_terminate(subscriber);
+		subscr_terminate(sub);
 		return;
 	}
 	if (sub)
@@ -344,7 +325,6 @@ static void subscr_conn_msg_event(struct net *net, int conid,
 	spin_unlock_bh(&subscriber->lock);
 }
 
-
 /* Handle one request to establish a new subscriber */
 static void *subscr_named_msg_event(int conid)
 {
@@ -363,12 +343,50 @@ static void *subscr_named_msg_event(int conid)
 	return (void *)subscriber;
 }
 
-int tipc_subscr_start(void)
+int tipc_subscr_start(struct net *net)
 {
-	return tipc_server_start(&topsrv);
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	const char name[] = "topology_server";
+	struct tipc_server *topsrv;
+	struct sockaddr_tipc *saddr;
+
+	saddr = kzalloc(sizeof(*saddr), GFP_ATOMIC);
+	if (!saddr)
+		return -ENOMEM;
+	saddr->family			= AF_TIPC;
+	saddr->addrtype			= TIPC_ADDR_NAMESEQ;
+	saddr->addr.nameseq.type	= TIPC_TOP_SRV;
+	saddr->addr.nameseq.lower	= TIPC_TOP_SRV;
+	saddr->addr.nameseq.upper	= TIPC_TOP_SRV;
+	saddr->scope			= TIPC_NODE_SCOPE;
+
+	topsrv = kzalloc(sizeof(*topsrv), GFP_ATOMIC);
+	if (!topsrv) {
+		kfree(saddr);
+		return -ENOMEM;
+	}
+	topsrv->net			= net;
+	topsrv->saddr			= saddr;
+	topsrv->imp			= TIPC_CRITICAL_IMPORTANCE;
+	topsrv->type			= SOCK_SEQPACKET;
+	topsrv->max_rcvbuf_size		= sizeof(struct tipc_subscr);
+	topsrv->tipc_conn_recvmsg	= subscr_conn_msg_event;
+	topsrv->tipc_conn_new		= subscr_named_msg_event;
+	topsrv->tipc_conn_shutdown	= subscr_conn_shutdown_event;
+
+	strncpy(topsrv->name, name, strlen(name) + 1);
+	tn->topsrv = topsrv;
+	atomic_set(&tn->subscription_count, 0);
+
+	return tipc_server_start(topsrv);
 }
 
-void tipc_subscr_stop(void)
+void tipc_subscr_stop(struct net *net)
 {
-	tipc_server_stop(&topsrv);
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct tipc_server *topsrv = tn->topsrv;
+
+	tipc_server_stop(topsrv);
+	kfree(topsrv->saddr);
+	kfree(topsrv);
 }
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index 670f57096635..33488bd9fe3c 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -74,13 +74,10 @@ struct tipc_subscription {
 
 int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower,
 			u32 found_upper);
-
 void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower,
 				u32 found_upper, u32 event, u32 port_ref,
 				u32 node, int must);
-
-int tipc_subscr_start(void);
-
-void tipc_subscr_stop(void);
+int tipc_subscr_start(struct net *net);
+void tipc_subscr_stop(struct net *net);
 
 #endif
-- 
cgit v1.2.3


From bafa29e34185fb70496bfd604dc9e4071049f023 Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Fri, 9 Jan 2015 15:27:12 +0800
Subject: tipc: make tipc random value aware of net namespace

After namespace is supported, each namespace should own its private
random value. So the global variable representing the random value
must be moved to tipc_net structure.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Tested-by: Tero Aho <Tero.Aho@coriant.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/core.c     | 6 +-----
 net/tipc/core.h     | 6 +-----
 net/tipc/discover.c | 2 +-
 net/tipc/link.c     | 2 +-
 4 files changed, 4 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/tipc/core.c b/net/tipc/core.c
index 4a8b7955e0e0..674bd2698528 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -44,9 +44,6 @@
 
 #include <linux/module.h>
 
-/* global variables used by multiple sub-systems within TIPC */
-int tipc_random __read_mostly;
-
 /* configurable TIPC parameters */
 int tipc_net_id __read_mostly;
 int sysctl_tipc_rmem[3] __read_mostly;	/* min/default/max */
@@ -58,6 +55,7 @@ static int __net_init tipc_init_net(struct net *net)
 
 	tn->net_id = 4711;
 	tn->own_addr = 0;
+	get_random_bytes(&tn->random, sizeof(int));
 	INIT_LIST_HEAD(&tn->node_list);
 	spin_lock_init(&tn->node_list_lock);
 
@@ -109,8 +107,6 @@ static int __init tipc_init(void)
 			      TIPC_CRITICAL_IMPORTANCE;
 	sysctl_tipc_rmem[2] = TIPC_CONN_OVERLOAD_LIMIT;
 
-	get_random_bytes(&tipc_random, sizeof(tipc_random));
-
 	err = tipc_netlink_start();
 	if (err)
 		goto out_netlink;
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 639f562dddf3..817b2e9d4227 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -76,14 +76,10 @@ extern int tipc_net_id __read_mostly;
 extern int sysctl_tipc_rmem[3] __read_mostly;
 extern int sysctl_tipc_named_timeout __read_mostly;
 
-/*
- * Other global variables
- */
-extern int tipc_random __read_mostly;
-
 struct tipc_net {
 	u32 own_addr;
 	int net_id;
+	int random;
 
 	/* Node table and node list */
 	spinlock_t node_list_lock;
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 786411dea61c..5b40cb89ff0a 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -88,7 +88,7 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type,
 	msg = buf_msg(buf);
 	tipc_msg_init(net, msg, LINK_CONFIG, type, INT_H_SIZE, dest_domain);
 	msg_set_non_seq(msg, 1);
-	msg_set_node_sig(msg, tipc_random);
+	msg_set_node_sig(msg, tn->random);
 	msg_set_dest_domain(msg, dest_domain);
 	msg_set_bc_netid(msg, tn->net_id);
 	b_ptr->media->addr2msg(msg_media_addr(msg), &b_ptr->addr);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 997256769065..193bc1560677 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -289,7 +289,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
 	tipc_msg_init(n_ptr->net, msg, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE,
 		      l_ptr->addr);
 	msg_set_size(msg, sizeof(l_ptr->proto_msg));
-	msg_set_session(msg, (tipc_random & 0xffff));
+	msg_set_session(msg, (tn->random & 0xffff));
 	msg_set_bearer_id(msg, b_ptr->identity);
 	strcpy((char *)msg_data(msg), if_name);
 
-- 
cgit v1.2.3


From d49e204161af6e47d8473423aa6856388c02f254 Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Fri, 9 Jan 2015 15:27:13 +0800
Subject: tipc: make netlink support net namespace

Currently tipc module only allows users sitting on "init_net" namespace
to configure it through netlink interface. But now almost each tipc
component is able to be aware of net namespace, so it's time to open
the permission for users residing in other namespaces, allowing them
to configure their own tipc stack instance through netlink interface.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Tested-by: Tero Aho <Tero.Aho@coriant.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/netlink.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 282b59681484..fe0f5134ce15 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -54,7 +54,8 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
 	int hdr_space = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN);
 	u16 cmd;
 
-	if ((req_userhdr->cmd & 0xC000) && (!netlink_capable(skb, CAP_NET_ADMIN)))
+	if ((req_userhdr->cmd & 0xC000) &&
+	    (!netlink_net_capable(skb, CAP_NET_ADMIN)))
 		cmd = TIPC_CMD_NOT_NET_ADMIN;
 	else
 		cmd = req_userhdr->cmd;
@@ -70,7 +71,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
 		rep_nlh = nlmsg_hdr(rep_buf);
 		memcpy(rep_nlh, req_nlh, hdr_space);
 		rep_nlh->nlmsg_len = rep_buf->len;
-		genlmsg_unicast(&init_net, rep_buf, NETLINK_CB(skb).portid);
+		genlmsg_unicast(net, rep_buf, NETLINK_CB(skb).portid);
 	}
 
 	return 0;
@@ -95,6 +96,7 @@ static struct genl_family tipc_genl_family = {
 	.version	= TIPC_GENL_VERSION,
 	.hdrsize	= TIPC_GENL_HDRLEN,
 	.maxattr	= 0,
+	.netnsok	= true,
 };
 
 /* Legacy ASCII API */
@@ -114,6 +116,7 @@ struct genl_family tipc_genl_v2_family = {
 	.version	= TIPC_GENL_V2_VERSION,
 	.hdrsize	= 0,
 	.maxattr	= TIPC_NLA_MAX,
+	.netnsok	= true,
 };
 
 static const struct genl_ops tipc_genl_v2_ops[] = {
-- 
cgit v1.2.3


From bdced7ef7838c1c4aebe9f295e44b7f0dcae2109 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Sat, 10 Jan 2015 07:31:12 -0800
Subject: bridge: support for multiple vlans and vlan ranges in setlink and
 dellink requests

This patch changes bridge IFLA_AF_SPEC netlink attribute parser to
look for more than one IFLA_BRIDGE_VLAN_INFO attribute. This allows
userspace to pack more than one vlan in the setlink msg.

The dumps were already sending more than one vlan info in the getlink msg.

This patch also adds bridge_vlan_info flags BRIDGE_VLAN_INFO_RANGE_BEGIN and
BRIDGE_VLAN_INFO_RANGE_END to indicate start and end of vlan range

This patch also deletes unused ifla_br_policy.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_bridge.h |   2 +
 net/bridge/br_netlink.c        | 104 +++++++++++++++++++++++++++--------------
 2 files changed, 70 insertions(+), 36 deletions(-)

(limited to 'net')

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index b03ee8f62d3c..eaaea6208b42 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -125,6 +125,8 @@ enum {
 #define BRIDGE_VLAN_INFO_MASTER	(1<<0)	/* Operate on Bridge device as well */
 #define BRIDGE_VLAN_INFO_PVID	(1<<1)	/* VLAN is PVID, ingress untagged */
 #define BRIDGE_VLAN_INFO_UNTAGGED	(1<<2)	/* VLAN egresses untagged */
+#define BRIDGE_VLAN_INFO_RANGE_BEGIN	(1<<3) /* VLAN is start of vlan range */
+#define BRIDGE_VLAN_INFO_RANGE_END	(1<<4) /* VLAN is end of vlan range */
 
 struct bridge_vlan_info {
 	__u16 flags;
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 9f5eb55a4d3a..6f616a2df0b4 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -218,57 +218,89 @@ out:
 	return err;
 }
 
-static const struct nla_policy ifla_br_policy[IFLA_MAX+1] = {
-	[IFLA_BRIDGE_FLAGS]	= { .type = NLA_U16 },
-	[IFLA_BRIDGE_MODE]	= { .type = NLA_U16 },
-	[IFLA_BRIDGE_VLAN_INFO]	= { .type = NLA_BINARY,
-				    .len = sizeof(struct bridge_vlan_info), },
-};
+static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p,
+			int cmd, struct bridge_vlan_info *vinfo)
+{
+	int err = 0;
+
+	switch (cmd) {
+	case RTM_SETLINK:
+		if (p) {
+			err = nbp_vlan_add(p, vinfo->vid, vinfo->flags);
+			if (err)
+				break;
+
+			if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
+				err = br_vlan_add(p->br, vinfo->vid,
+						  vinfo->flags);
+		} else {
+			err = br_vlan_add(br, vinfo->vid, vinfo->flags);
+		}
+		break;
+
+	case RTM_DELLINK:
+		if (p) {
+			nbp_vlan_delete(p, vinfo->vid);
+			if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
+				br_vlan_delete(p->br, vinfo->vid);
+		} else {
+			br_vlan_delete(br, vinfo->vid);
+		}
+		break;
+	}
+
+	return err;
+}
 
 static int br_afspec(struct net_bridge *br,
 		     struct net_bridge_port *p,
 		     struct nlattr *af_spec,
 		     int cmd)
 {
-	struct nlattr *tb[IFLA_BRIDGE_MAX+1];
+	struct bridge_vlan_info *vinfo_start = NULL;
+	struct bridge_vlan_info *vinfo = NULL;
+	struct nlattr *attr;
 	int err = 0;
+	int rem;
 
-	err = nla_parse_nested(tb, IFLA_BRIDGE_MAX, af_spec, ifla_br_policy);
-	if (err)
-		return err;
+	nla_for_each_nested(attr, af_spec, rem) {
+		if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO)
+			continue;
+		if (nla_len(attr) != sizeof(struct bridge_vlan_info))
+			return -EINVAL;
+		vinfo = nla_data(attr);
+		if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) {
+			if (vinfo_start)
+				return -EINVAL;
+			vinfo_start = vinfo;
+			continue;
+		}
+
+		if (vinfo_start) {
+			struct bridge_vlan_info tmp_vinfo;
+			int v;
 
-	if (tb[IFLA_BRIDGE_VLAN_INFO]) {
-		struct bridge_vlan_info *vinfo;
+			if (!(vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END))
+				return -EINVAL;
 
-		vinfo = nla_data(tb[IFLA_BRIDGE_VLAN_INFO]);
+			if (vinfo->vid <= vinfo_start->vid)
+				return -EINVAL;
 
-		if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK)
-			return -EINVAL;
+			memcpy(&tmp_vinfo, vinfo_start,
+			       sizeof(struct bridge_vlan_info));
 
-		switch (cmd) {
-		case RTM_SETLINK:
-			if (p) {
-				err = nbp_vlan_add(p, vinfo->vid, vinfo->flags);
+			for (v = vinfo_start->vid; v <= vinfo->vid; v++) {
+				tmp_vinfo.vid = v;
+				err = br_vlan_info(br, p, cmd, &tmp_vinfo);
 				if (err)
 					break;
-
-				if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
-					err = br_vlan_add(p->br, vinfo->vid,
-							  vinfo->flags);
-			} else
-				err = br_vlan_add(br, vinfo->vid, vinfo->flags);
-
-			break;
-
-		case RTM_DELLINK:
-			if (p) {
-				nbp_vlan_delete(p, vinfo->vid);
-				if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
-					br_vlan_delete(p->br, vinfo->vid);
-			} else
-				br_vlan_delete(br, vinfo->vid);
-			break;
+			}
+			vinfo_start = NULL;
+		} else {
+			err = br_vlan_info(br, p, cmd, vinfo);
 		}
+		if (err)
+			break;
 	}
 
 	return err;
-- 
cgit v1.2.3


From 36cd0ffbab8a65f44ae13fb200bfb5a8f9ea68de Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Sat, 10 Jan 2015 07:31:14 -0800
Subject: bridge: new function to pack vlans into ranges during gets

This patch adds new function to pack vlans into ranges
whereever applicable using the flags BRIDGE_VLAN_INFO_RANGE_BEGIN
and BRIDGE VLAN_INFO_RANGE_END

Old vlan packing code is moved to a new function and continues to be
called when filter_mask is RTEXT_FILTER_BRVLAN.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netlink.c | 145 +++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 124 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 6f616a2df0b4..0b03879488c4 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -67,6 +67,118 @@ static int br_port_fill_attrs(struct sk_buff *skb,
 	return 0;
 }
 
+static int br_fill_ifvlaninfo_range(struct sk_buff *skb, u16 vid_start,
+				    u16 vid_end, u16 flags)
+{
+	struct  bridge_vlan_info vinfo;
+
+	if ((vid_end - vid_start) > 0) {
+		/* add range to skb */
+		vinfo.vid = vid_start;
+		vinfo.flags = flags | BRIDGE_VLAN_INFO_RANGE_BEGIN;
+		if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO,
+			    sizeof(vinfo), &vinfo))
+			goto nla_put_failure;
+
+		vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN;
+
+		vinfo.vid = vid_end;
+		vinfo.flags = flags | BRIDGE_VLAN_INFO_RANGE_END;
+		if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO,
+			    sizeof(vinfo), &vinfo))
+			goto nla_put_failure;
+	} else {
+		vinfo.vid = vid_start;
+		vinfo.flags = flags;
+		if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO,
+			    sizeof(vinfo), &vinfo))
+			goto nla_put_failure;
+	}
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int br_fill_ifvlaninfo_compressed(struct sk_buff *skb,
+					 const struct net_port_vlans *pv)
+{
+	u16 vid_range_start = 0, vid_range_end = 0;
+	u16 vid_range_flags;
+	u16 pvid, vid, flags;
+	int err = 0;
+
+	/* Pack IFLA_BRIDGE_VLAN_INFO's for every vlan
+	 * and mark vlan info with begin and end flags
+	 * if vlaninfo represents a range
+	 */
+	pvid = br_get_pvid(pv);
+	for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
+		flags = 0;
+		if (vid == pvid)
+			flags |= BRIDGE_VLAN_INFO_PVID;
+
+		if (test_bit(vid, pv->untagged_bitmap))
+			flags |= BRIDGE_VLAN_INFO_UNTAGGED;
+
+		if (vid_range_start == 0) {
+			goto initvars;
+		} else if ((vid - vid_range_end) == 1 &&
+			flags == vid_range_flags) {
+			vid_range_end = vid;
+			continue;
+		} else {
+			err = br_fill_ifvlaninfo_range(skb, vid_range_start,
+						       vid_range_end,
+						       vid_range_flags);
+			if (err)
+				return err;
+		}
+
+initvars:
+		vid_range_start = vid;
+		vid_range_end = vid;
+		vid_range_flags = flags;
+	}
+
+	/* Call it once more to send any left over vlans */
+	err = br_fill_ifvlaninfo_range(skb, vid_range_start,
+				       vid_range_end,
+				       vid_range_flags);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int br_fill_ifvlaninfo(struct sk_buff *skb,
+			      const struct net_port_vlans *pv)
+{
+	struct bridge_vlan_info vinfo;
+	u16 pvid, vid;
+
+	pvid = br_get_pvid(pv);
+	for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
+		vinfo.vid = vid;
+		vinfo.flags = 0;
+		if (vid == pvid)
+			vinfo.flags |= BRIDGE_VLAN_INFO_PVID;
+
+		if (test_bit(vid, pv->untagged_bitmap))
+			vinfo.flags |= BRIDGE_VLAN_INFO_UNTAGGED;
+
+		if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO,
+			    sizeof(vinfo), &vinfo))
+			goto nla_put_failure;
+	}
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
 /*
  * Create one netlink message for one interface
  * Contains port and master info as well as carrier and bridge state.
@@ -121,12 +233,11 @@ static int br_fill_ifinfo(struct sk_buff *skb,
 	}
 
 	/* Check if  the VID information is requested */
-	if (filter_mask & RTEXT_FILTER_BRVLAN) {
-		struct nlattr *af;
+	if ((filter_mask & RTEXT_FILTER_BRVLAN) ||
+	    (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) {
 		const struct net_port_vlans *pv;
-		struct bridge_vlan_info vinfo;
-		u16 vid;
-		u16 pvid;
+		struct nlattr *af;
+		int err;
 
 		if (port)
 			pv = nbp_get_vlan_info(port);
@@ -140,21 +251,12 @@ static int br_fill_ifinfo(struct sk_buff *skb,
 		if (!af)
 			goto nla_put_failure;
 
-		pvid = br_get_pvid(pv);
-		for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
-			vinfo.vid = vid;
-			vinfo.flags = 0;
-			if (vid == pvid)
-				vinfo.flags |= BRIDGE_VLAN_INFO_PVID;
-
-			if (test_bit(vid, pv->untagged_bitmap))
-				vinfo.flags |= BRIDGE_VLAN_INFO_UNTAGGED;
-
-			if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO,
-				    sizeof(vinfo), &vinfo))
-				goto nla_put_failure;
-		}
-
+		if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)
+			err = br_fill_ifvlaninfo_compressed(skb, pv);
+		else
+			err = br_fill_ifvlaninfo(skb, pv);
+		if (err)
+			goto nla_put_failure;
 		nla_nest_end(skb, af);
 	}
 
@@ -209,7 +311,8 @@ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
 	int err = 0;
 	struct net_bridge_port *port = br_port_get_rtnl(dev);
 
-	if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN))
+	if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN) &&
+	    !(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED))
 		goto out;
 
 	err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI,
-- 
cgit v1.2.3


From 83400b990cd78a774a932223e83a19b1f816f24e Mon Sep 17 00:00:00 2001
From: Rickard Strandqvist <rickard_strandqvist@spectrumdigital.se>
Date: Sun, 11 Jan 2015 14:03:35 +0100
Subject: net: xfrm: xfrm_algo: Remove unused function

Remove the function aead_entries() that is not used anywhere.

This was partially found by using a static code analysis program called cppcheck.

Signed-off-by: Rickard Strandqvist <rickard_strandqvist@spectrumdigital.se>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_algo.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index debe733386f8..12e82a5e4ad5 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -561,11 +561,6 @@ static struct xfrm_algo_desc calg_list[] = {
 },
 };
 
-static inline int aead_entries(void)
-{
-	return ARRAY_SIZE(aead_list);
-}
-
 static inline int aalg_entries(void)
 {
 	return ARRAY_SIZE(aalg_list);
-- 
cgit v1.2.3


From ddcde70cbf2f837a083012965eaec6d213b87c48 Mon Sep 17 00:00:00 2001
From: Rickard Strandqvist <rickard_strandqvist@spectrumdigital.se>
Date: Sun, 11 Jan 2015 15:08:46 +0100
Subject: net: sched: sch_teql: Remove unused function

Remove the function teql_neigh_release() that is not used anywhere.

This was partially found by using a static code analysis program called cppcheck.

Signed-off-by: Rickard Strandqvist <rickard_strandqvist@spectrumdigital.se>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_teql.c | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 6ada42396a24..4899d4a18aa5 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -122,13 +122,6 @@ teql_peek(struct Qdisc *sch)
 	return NULL;
 }
 
-static inline void
-teql_neigh_release(struct neighbour *n)
-{
-	if (n)
-		neigh_release(n);
-}
-
 static void
 teql_reset(struct Qdisc *sch)
 {
-- 
cgit v1.2.3


From 1059590254fa9dce9cafc4f07d1103dbec415e76 Mon Sep 17 00:00:00 2001
From: Pankaj Gupta <pagupta@redhat.com>
Date: Mon, 12 Jan 2015 11:41:28 +0530
Subject: net: allow large number of rx queues

netif_alloc_rx_queues() uses kcalloc() to allocate memory
for "struct netdev_queue *_rx" array.
If we are doing large rx queue allocation kcalloc() might
fail, so this patch does a fallback to vzalloc().
Similar implementation is done for tx queue allocation in
netif_alloc_netdev_queues().

We avoid failure of high order memory allocation
with the help of vzalloc(), this allows us to do large
rx and tx queue allocation which in turn helps us to
increase the number of queues in tun.

As vmalloc() adds overhead on a critical network path,
__GFP_REPEAT flag is used with kzalloc() to do this fallback
only when really needed.

Signed-off-by: Pankaj Gupta <pagupta@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: David Gibson <dgibson@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 683d493aa1bf..805456147c30 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6172,13 +6172,16 @@ static int netif_alloc_rx_queues(struct net_device *dev)
 {
 	unsigned int i, count = dev->num_rx_queues;
 	struct netdev_rx_queue *rx;
+	size_t sz = count * sizeof(*rx);
 
 	BUG_ON(count < 1);
 
-	rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
-	if (!rx)
-		return -ENOMEM;
-
+	rx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
+	if (!rx) {
+		rx = vzalloc(sz);
+		if (!rx)
+			return -ENOMEM;
+	}
 	dev->_rx = rx;
 
 	for (i = 0; i < count; i++)
@@ -6808,7 +6811,7 @@ void free_netdev(struct net_device *dev)
 
 	netif_free_tx_queues(dev);
 #ifdef CONFIG_SYSFS
-	kfree(dev->_rx);
+	kvfree(dev->_rx);
 #endif
 
 	kfree(rcu_dereference_protected(dev->ingress_queue, 1));
-- 
cgit v1.2.3


From c5adde9468b0714a051eac7f9666f23eb10b61f7 Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Mon, 12 Jan 2015 14:52:23 +0800
Subject: netlink: eliminate nl_sk_hash_lock

As rhashtable_lookup_compare_insert() can guarantee the process
of search and insertion is atomic, it's safe to eliminate the
nl_sk_hash_lock. After this, object insertion or removal will
be protected with per bucket lock on write side while object
lookup is guarded with rcu read lock on read side.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Cc: Thomas Graf <tgraf@suug.ch>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 33 ++++++++++++++++++++-------------
 net/netlink/af_netlink.h |  1 -
 net/netlink/diag.c       | 10 +++++-----
 3 files changed, 25 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 298e1df7132a..01b702d63457 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -98,7 +98,7 @@ static void netlink_skb_destructor(struct sk_buff *skb);
 
 /* nl_table locking explained:
  * Lookup and traversal are protected with an RCU read-side lock. Insertion
- * and removal are protected with nl_sk_hash_lock while using RCU list
+ * and removal are protected with per bucket lock while using RCU list
  * modification primitives and may run in parallel to RCU protected lookups.
  * Destruction of the Netlink socket may only occur *after* nl_table_lock has
  * been acquired * either during or after the socket has been removed from
@@ -110,10 +110,6 @@ static atomic_t nl_table_users = ATOMIC_INIT(0);
 
 #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock));
 
-/* Protects netlink socket hash table mutations */
-DEFINE_MUTEX(nl_sk_hash_lock);
-EXPORT_SYMBOL_GPL(nl_sk_hash_lock);
-
 static ATOMIC_NOTIFIER_HEAD(netlink_chain);
 
 static DEFINE_SPINLOCK(netlink_tap_lock);
@@ -998,6 +994,19 @@ static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid,
 					 &netlink_compare, &arg);
 }
 
+static bool __netlink_insert(struct netlink_table *table, struct sock *sk,
+			     struct net *net)
+{
+	struct netlink_compare_arg arg = {
+		.net = net,
+		.portid = nlk_sk(sk)->portid,
+	};
+
+	return rhashtable_lookup_compare_insert(&table->hash,
+						&nlk_sk(sk)->node,
+						&netlink_compare, &arg);
+}
+
 static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
 {
 	struct netlink_table *table = &nl_table[protocol];
@@ -1043,9 +1052,7 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
 	struct netlink_table *table = &nl_table[sk->sk_protocol];
 	int err = -EADDRINUSE;
 
-	mutex_lock(&nl_sk_hash_lock);
-	if (__netlink_lookup(table, portid, net))
-		goto err;
+	lock_sock(sk);
 
 	err = -EBUSY;
 	if (nlk_sk(sk)->portid)
@@ -1058,10 +1065,12 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
 
 	nlk_sk(sk)->portid = portid;
 	sock_hold(sk);
-	rhashtable_insert(&table->hash, &nlk_sk(sk)->node);
-	err = 0;
+	if (__netlink_insert(table, sk, net))
+		err = 0;
+	else
+		sock_put(sk);
 err:
-	mutex_unlock(&nl_sk_hash_lock);
+	release_sock(sk);
 	return err;
 }
 
@@ -1069,13 +1078,11 @@ static void netlink_remove(struct sock *sk)
 {
 	struct netlink_table *table;
 
-	mutex_lock(&nl_sk_hash_lock);
 	table = &nl_table[sk->sk_protocol];
 	if (rhashtable_remove(&table->hash, &nlk_sk(sk)->node)) {
 		WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
 		__sock_put(sk);
 	}
-	mutex_unlock(&nl_sk_hash_lock);
 
 	netlink_table_grab();
 	if (nlk_sk(sk)->subscriptions) {
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index fd96fa76202a..7518375782f5 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -74,6 +74,5 @@ struct netlink_table {
 
 extern struct netlink_table *nl_table;
 extern rwlock_t nl_table_lock;
-extern struct mutex nl_sk_hash_lock;
 
 #endif
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index fcca36d81a62..bb59a7ed0859 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -103,7 +103,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
 {
 	struct netlink_table *tbl = &nl_table[protocol];
 	struct rhashtable *ht = &tbl->hash;
-	const struct bucket_table *htbl = rht_dereference(ht->tbl, ht);
+	const struct bucket_table *htbl = rht_dereference_rcu(ht->tbl, ht);
 	struct net *net = sock_net(skb->sk);
 	struct netlink_diag_req *req;
 	struct netlink_sock *nlsk;
@@ -115,7 +115,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
 	for (i = 0; i < htbl->size; i++) {
 		struct rhash_head *pos;
 
-		rht_for_each_entry(nlsk, pos, htbl, i, node) {
+		rht_for_each_entry_rcu(nlsk, pos, htbl, i, node) {
 			sk = (struct sock *)nlsk;
 
 			if (!net_eq(sock_net(sk), net))
@@ -172,7 +172,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 
 	req = nlmsg_data(cb->nlh);
 
-	mutex_lock(&nl_sk_hash_lock);
+	rcu_read_lock();
 	read_lock(&nl_table_lock);
 
 	if (req->sdiag_protocol == NDIAG_PROTO_ALL) {
@@ -186,7 +186,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	} else {
 		if (req->sdiag_protocol >= MAX_LINKS) {
 			read_unlock(&nl_table_lock);
-			mutex_unlock(&nl_sk_hash_lock);
+			rcu_read_unlock();
 			return -ENOENT;
 		}
 
@@ -194,7 +194,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	}
 
 	read_unlock(&nl_table_lock);
-	mutex_unlock(&nl_sk_hash_lock);
+	rcu_read_unlock();
 
 	return skb->len;
 }
-- 
cgit v1.2.3


From 08abdffa1c16b9603ee9bb33f3226ff186d98fa1 Mon Sep 17 00:00:00 2001
From: Sébastien Barré <sebastien.barre@uclouvain.be>
Date: Mon, 12 Jan 2015 10:30:40 +0100
Subject: tcp: avoid reducing cwnd when ACK+DSACK is received
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With TLP, the peer may reply to a probe with an
ACK+D-SACK, with ack value set to tlp_high_seq. In the current code,
such ACK+DSACK will be missed and only at next, higher ack will the TLP
episode be considered done. Since the DSACK is not present anymore,
this will cost a cwnd reduction.

This patch ensures that this scenario does not cause a cwnd reduction, since
receiving an ACK+DSACK indicates that both the initial segment and the probe
have been received by the peer.

The following packetdrill test, from Neal Cardwell, validates this patch:

// Establish a connection.
0     socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0     setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0    bind(3, ..., ...) = 0
+0    listen(3, 1) = 0

+0    < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+0    > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 6>
+.020 < . 1:1(0) ack 1 win 257
+0    accept(3, ..., ...) = 4

// Send 1 packet.
+0    write(4, ..., 1000) = 1000
+0    > P. 1:1001(1000) ack 1

// Loss probe retransmission.
// packets_out == 1 => schedule PTO in max(2*RTT, 1.5*RTT + 200ms)
// In this case, this means: 1.5*RTT + 200ms = 230ms
+.230 > P. 1:1001(1000) ack 1
+0    %{ assert tcpi_snd_cwnd == 10 }%

// Receiver ACKs at tlp_high_seq with a DSACK,
// indicating they received the original packet and probe.
+.020 < . 1:1(0) ack 1001 win 257 <sack 1:1001,nop,nop>
+0    %{ assert tcpi_snd_cwnd == 10 }%

// Send another packet.
+0    write(4, ..., 1000) = 1000
+0    > P. 1001:2001(1000) ack 1

// Receiver ACKs above tlp_high_seq, which should end the TLP episode
// if we haven't already. We should not reduce cwnd.
+.020 < . 1:1(0) ack 2001 win 257
+0    %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%

Credits:
-Gregory helped in finding that tcp_process_tlp_ack was where the cwnd
got reduced in our MPTCP tests.
-Neal wrote the packetdrill test above
-Yuchung reworked the patch to make it more readable.

Cc: Gregory Detal <gregory.detal@uclouvain.be>
Cc: Nandita Dukkipati <nanditad@google.com>
Tested-by: Neal Cardwell <ncardwell@google.com>
Reviewed-by: Yuchung Cheng <ycheng@google.com>
Reviewed-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Sébastien Barré <sebastien.barre@uclouvain.be>
Acked-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 075ab4d5af5e..71fb37c70581 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3358,34 +3358,34 @@ static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
 }
 
 /* This routine deals with acks during a TLP episode.
+ * We mark the end of a TLP episode on receiving TLP dupack or when
+ * ack is after tlp_high_seq.
  * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe.
  */
 static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	bool is_tlp_dupack = (ack == tp->tlp_high_seq) &&
-			     !(flag & (FLAG_SND_UNA_ADVANCED |
-				       FLAG_NOT_DUP | FLAG_DATA_SACKED));
 
-	/* Mark the end of TLP episode on receiving TLP dupack or when
-	 * ack is after tlp_high_seq.
-	 */
-	if (is_tlp_dupack) {
-		tp->tlp_high_seq = 0;
+	if (before(ack, tp->tlp_high_seq))
 		return;
-	}
 
-	if (after(ack, tp->tlp_high_seq)) {
+	if (flag & FLAG_DSACKING_ACK) {
+		/* This DSACK means original and TLP probe arrived; no loss */
+		tp->tlp_high_seq = 0;
+	} else if (after(ack, tp->tlp_high_seq)) {
+		/* ACK advances: there was a loss, so reduce cwnd. Reset
+		 * tlp_high_seq in tcp_init_cwnd_reduction()
+		 */
+		tcp_init_cwnd_reduction(sk);
+		tcp_set_ca_state(sk, TCP_CA_CWR);
+		tcp_end_cwnd_reduction(sk);
+		tcp_try_keep_open(sk);
+		NET_INC_STATS_BH(sock_net(sk),
+				 LINUX_MIB_TCPLOSSPROBERECOVERY);
+	} else if (!(flag & (FLAG_SND_UNA_ADVANCED |
+			     FLAG_NOT_DUP | FLAG_DATA_SACKED))) {
+		/* Pure dupack: original and TLP probe arrived; no loss */
 		tp->tlp_high_seq = 0;
-		/* Don't reduce cwnd if DSACK arrives for TLP retrans. */
-		if (!(flag & FLAG_DSACKING_ACK)) {
-			tcp_init_cwnd_reduction(sk);
-			tcp_set_ca_state(sk, TCP_CA_CWR);
-			tcp_end_cwnd_reduction(sk);
-			tcp_try_keep_open(sk);
-			NET_INC_STATS_BH(sock_net(sk),
-					 LINUX_MIB_TCPLOSSPROBERECOVERY);
-		}
 	}
 }
 
-- 
cgit v1.2.3


From a440edf1fc01d95a1552dea7ac644e92fb588f5c Mon Sep 17 00:00:00 2001
From: Syam Sidhardhan <s.syam@samsung.com>
Date: Mon, 12 Jan 2015 20:49:35 +0530
Subject: openvswitch: Remove unnecessary version.h inclusion

version.h inclusion is not necessary as detected by versioncheck.

Signed-off-by: Syam Sidhardhan <s.syam@samsung.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/vport-geneve.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 484864dd0e68..2daf1440618b 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -9,8 +9,6 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#include <linux/version.h>
-
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/net.h>
-- 
cgit v1.2.3


From 0fe6de490320bfbf1b82a33d7ee49b62af5f29db Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Mon, 12 Jan 2015 16:25:28 -0800
Subject: bridge: fix uninitialized variable warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

net/bridge/br_netlink.c: In function ‘br_fill_ifinfo’:
net/bridge/br_netlink.c:146:32: warning: ‘vid_range_flags’ may be used uninitialized in this function [-Wmaybe-uninitialized]
  err = br_fill_ifvlaninfo_range(skb, vid_range_start,
                                ^
net/bridge/br_netlink.c:108:6: note: ‘vid_range_flags’ was declared here
  u16 vid_range_flags;

Reported-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netlink.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 0b03879488c4..66ece91ee165 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -105,7 +105,7 @@ static int br_fill_ifvlaninfo_compressed(struct sk_buff *skb,
 					 const struct net_port_vlans *pv)
 {
 	u16 vid_range_start = 0, vid_range_end = 0;
-	u16 vid_range_flags;
+	u16 vid_range_flags = 0;
 	u16 pvid, vid, flags;
 	int err = 0;
 
@@ -142,12 +142,14 @@ initvars:
 		vid_range_flags = flags;
 	}
 
-	/* Call it once more to send any left over vlans */
-	err = br_fill_ifvlaninfo_range(skb, vid_range_start,
-				       vid_range_end,
-				       vid_range_flags);
-	if (err)
-		return err;
+	if (vid_range_start != 0) {
+		/* Call it once more to send any left over vlans */
+		err = br_fill_ifvlaninfo_range(skb, vid_range_start,
+					       vid_range_end,
+					       vid_range_flags);
+		if (err)
+			return err;
+	}
 
 	return 0;
 }
-- 
cgit v1.2.3


From 3721e9c7c194f576fbd30926e98e0abb13c641b5 Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Tue, 13 Jan 2015 17:07:48 +0800
Subject: tipc: remove redundant timer defined in tipc_sock struct

Remove the redundant timer defined in tipc_sock structure, instead we
can directly reuse the sk_timer defined in sock structure.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 2cec496ba691..c9c34a667921 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -69,7 +69,6 @@
  * @pub_count: total # of publications port has made during its lifetime
  * @probing_state:
  * @probing_intv:
- * @timer:
  * @port: port - interacts with 'sk' and with the rest of the TIPC stack
  * @peer_name: the peer of the connection, if any
  * @conn_timeout: the time we can wait for an unresponded setup request
@@ -94,7 +93,6 @@ struct tipc_sock {
 	u32 pub_count;
 	u32 probing_state;
 	unsigned long probing_intv;
-	struct timer_list timer;
 	uint conn_timeout;
 	atomic_t dupl_rcvcnt;
 	bool link_cong;
@@ -360,7 +358,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
 		return -EINVAL;
 	}
 	msg_set_origport(msg, tsk->portid);
-	setup_timer(&tsk->timer, tipc_sk_timeout, (unsigned long)tsk);
+	setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk);
 	sk->sk_backlog_rcv = tipc_backlog_rcv;
 	sk->sk_rcvbuf = sysctl_tipc_rmem[1];
 	sk->sk_data_ready = tipc_data_ready;
@@ -514,7 +512,8 @@ static int tipc_release(struct socket *sock)
 
 	tipc_sk_withdraw(tsk, 0, NULL);
 	probing_state = tsk->probing_state;
-	if (del_timer_sync(&tsk->timer) && probing_state != TIPC_CONN_PROBING)
+	if (del_timer_sync(&sk->sk_timer) &&
+	    probing_state != TIPC_CONN_PROBING)
 		sock_put(sk);
 	tipc_sk_remove(tsk);
 	if (tsk->connected) {
@@ -1136,7 +1135,8 @@ static int tipc_send_packet(struct kiocb *iocb, struct socket *sock,
 static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
 				u32 peer_node)
 {
-	struct net *net = sock_net(&tsk->sk);
+	struct sock *sk = &tsk->sk;
+	struct net *net = sock_net(sk);
 	struct tipc_msg *msg = &tsk->phdr;
 
 	msg_set_destnode(msg, peer_node);
@@ -1148,8 +1148,7 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
 	tsk->probing_intv = CONN_PROBING_INTERVAL;
 	tsk->probing_state = TIPC_CONN_OK;
 	tsk->connected = 1;
-	if (!mod_timer(&tsk->timer, jiffies + tsk->probing_intv))
-		sock_hold(&tsk->sk);
+	sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv);
 	tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
 	tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
 }
@@ -2141,8 +2140,7 @@ static void tipc_sk_timeout(unsigned long data)
 				      0, peer_node, tn->own_addr,
 				      peer_port, tsk->portid, TIPC_OK);
 		tsk->probing_state = TIPC_CONN_PROBING;
-		if (!mod_timer(&tsk->timer, jiffies + tsk->probing_intv))
-			sock_hold(sk);
+		sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv);
 	}
 	bh_unlock_sock(sk);
 	if (skb)
-- 
cgit v1.2.3


From 357c4774b5b08878d980847f496af38869e7aad0 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Tue, 13 Jan 2015 12:46:41 -0500
Subject: tipc: correctly handle releasing a not fully initialized sock

Commit f2f9800d4955 "tipc: make tipc node table aware of net
namespace" has added a dereference of sock->sk before making sure it's
not NULL, which makes releasing a tipc socket NULL pointer dereference
for sockets that are not fully initialized.

Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index c9c34a667921..720fda6cc2e6 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -470,8 +470,8 @@ static void tipc_sk_callback(struct rcu_head *head)
 static int tipc_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
-	struct net *net = sock_net(sk);
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct net *net;
+	struct tipc_net *tn;
 	struct tipc_sock *tsk;
 	struct sk_buff *skb;
 	u32 dnode, probing_state;
@@ -483,6 +483,9 @@ static int tipc_release(struct socket *sock)
 	if (sk == NULL)
 		return 0;
 
+	net = sock_net(sk);
+	tn = net_generic(net, tipc_net_id);
+
 	tsk = tipc_sk(sk);
 	lock_sock(sk);
 
-- 
cgit v1.2.3


From d8b9605d2697c48fb822c821c5751afbb4567003 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Tue, 13 Jan 2015 17:13:43 +0100
Subject: net: sched: fix skb->protocol use in case of accelerated vlan path

tc code implicitly considers skb->protocol even in case of accelerated
vlan paths and expects vlan protocol type here. However, on rx path,
if the vlan header was already stripped, skb->protocol contains value
of next header. Similar situation is on tx path.

So for skbs that use skb->vlan_tci for tagging, use skb->vlan_proto instead.

Reported-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h | 12 ++++++++++++
 net/sched/act_csum.c    |  2 +-
 net/sched/cls_flow.c    |  8 ++++----
 net/sched/em_ipset.c    |  2 +-
 net/sched/em_meta.c     |  2 +-
 net/sched/sch_api.c     |  2 +-
 net/sched/sch_dsmark.c  |  6 +++---
 net/sched/sch_teql.c    |  4 ++--
 8 files changed, 25 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 27a33833ff4a..fe6e7aac3c56 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -3,6 +3,7 @@
 
 #include <linux/jiffies.h>
 #include <linux/ktime.h>
+#include <linux/if_vlan.h>
 #include <net/sch_generic.h>
 
 struct qdisc_walker {
@@ -114,6 +115,17 @@ int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp,
 int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 		struct tcf_result *res);
 
+static inline __be16 tc_skb_protocol(const struct sk_buff *skb)
+{
+	/* We need to take extra care in case the skb came via
+	 * vlan accelerated path. In that case, use skb->vlan_proto
+	 * as the original vlan header was already stripped.
+	 */
+	if (vlan_tx_tag_present(skb))
+		return skb->vlan_proto;
+	return skb->protocol;
+}
+
 /* Calculate maximal size of packet seen by hard_start_xmit
    routine of this device.
  */
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index edbf40dac709..4cd5cf1aedf8 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -509,7 +509,7 @@ static int tcf_csum(struct sk_buff *skb,
 	if (unlikely(action == TC_ACT_SHOT))
 		goto drop;
 
-	switch (skb->protocol) {
+	switch (tc_skb_protocol(skb)) {
 	case cpu_to_be16(ETH_P_IP):
 		if (!tcf_csum_ipv4(skb, update_flags))
 			goto drop;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 15d68f24a521..461410394d08 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -77,7 +77,7 @@ static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow)
 {
 	if (flow->dst)
 		return ntohl(flow->dst);
-	return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol;
+	return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
 }
 
 static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow)
@@ -98,7 +98,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys
 	if (flow->ports)
 		return ntohs(flow->port16[1]);
 
-	return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol;
+	return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
 }
 
 static u32 flow_get_iif(const struct sk_buff *skb)
@@ -144,7 +144,7 @@ static u32 flow_get_nfct(const struct sk_buff *skb)
 
 static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow)
 {
-	switch (skb->protocol) {
+	switch (tc_skb_protocol(skb)) {
 	case htons(ETH_P_IP):
 		return ntohl(CTTUPLE(skb, src.u3.ip));
 	case htons(ETH_P_IPV6):
@@ -156,7 +156,7 @@ fallback:
 
 static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow)
 {
-	switch (skb->protocol) {
+	switch (tc_skb_protocol(skb)) {
 	case htons(ETH_P_IP):
 		return ntohl(CTTUPLE(skb, dst.u3.ip));
 	case htons(ETH_P_IPV6):
diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c
index 5b4a4efe468c..a3d79c8bf3b8 100644
--- a/net/sched/em_ipset.c
+++ b/net/sched/em_ipset.c
@@ -59,7 +59,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
 	struct net_device *dev, *indev = NULL;
 	int ret, network_offset;
 
-	switch (skb->protocol) {
+	switch (tc_skb_protocol(skb)) {
 	case htons(ETH_P_IP):
 		acpar.family = NFPROTO_IPV4;
 		if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index c8f8c399b99a..2159981b604e 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -197,7 +197,7 @@ META_COLLECTOR(int_priority)
 META_COLLECTOR(int_protocol)
 {
 	/* Let userspace take care of the byte ordering */
-	dst->value = skb->protocol;
+	dst->value = tc_skb_protocol(skb);
 }
 
 META_COLLECTOR(int_pkttype)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 76f402e05bd6..243b7d169d61 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1807,7 +1807,7 @@ done:
 int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp,
 		       struct tcf_result *res)
 {
-	__be16 protocol = skb->protocol;
+	__be16 protocol = tc_skb_protocol(skb);
 	int err;
 
 	for (; tp; tp = rcu_dereference_bh(tp->next)) {
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 227114f27f94..66700a6116aa 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -203,7 +203,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p);
 
 	if (p->set_tc_index) {
-		switch (skb->protocol) {
+		switch (tc_skb_protocol(skb)) {
 		case htons(ETH_P_IP):
 			if (skb_cow_head(skb, sizeof(struct iphdr)))
 				goto drop;
@@ -289,7 +289,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
 	index = skb->tc_index & (p->indices - 1);
 	pr_debug("index %d->%d\n", skb->tc_index, index);
 
-	switch (skb->protocol) {
+	switch (tc_skb_protocol(skb)) {
 	case htons(ETH_P_IP):
 		ipv4_change_dsfield(ip_hdr(skb), p->mask[index],
 				    p->value[index]);
@@ -306,7 +306,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
 		 */
 		if (p->mask[index] != 0xff || p->value[index])
 			pr_warn("%s: unsupported protocol %d\n",
-				__func__, ntohs(skb->protocol));
+				__func__, ntohs(tc_skb_protocol(skb)));
 		break;
 	}
 
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 4899d4a18aa5..e02687185a59 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -242,8 +242,8 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
 		char haddr[MAX_ADDR_LEN];
 
 		neigh_ha_snapshot(haddr, n, dev);
-		err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
-				      NULL, skb->len);
+		err = dev_hard_header(skb, dev, ntohs(tc_skb_protocol(skb)),
+				      haddr, NULL, skb->len);
 
 		if (err < 0)
 			err = -EINVAL;
-- 
cgit v1.2.3


From df8a39defad46b83694ea6dd868d332976d62cc0 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Tue, 13 Jan 2015 17:13:44 +0100
Subject: net: rename vlan_tx_* helpers since "tx" is misleading there

The same macros are used for rx as well. So rename it.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/filter.txt                  |  4 ++--
 drivers/infiniband/hw/nes/nes_nic.c                  | 13 +++++++------
 drivers/net/ethernet/3com/typhoon.c                  |  4 ++--
 drivers/net/ethernet/alteon/acenic.c                 |  8 ++++----
 drivers/net/ethernet/amd/amd8111e.c                  |  4 ++--
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c             |  8 ++++----
 drivers/net/ethernet/atheros/atl1c/atl1c_main.c      |  4 ++--
 drivers/net/ethernet/atheros/atl1e/atl1e_main.c      |  4 ++--
 drivers/net/ethernet/atheros/atlx/atl1.c             |  4 ++--
 drivers/net/ethernet/atheros/atlx/atl2.c             |  4 ++--
 drivers/net/ethernet/broadcom/bnx2.c                 |  4 ++--
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c      |  4 ++--
 drivers/net/ethernet/broadcom/tg3.c                  |  4 ++--
 drivers/net/ethernet/brocade/bna/bnad.c              |  4 ++--
 drivers/net/ethernet/chelsio/cxgb/sge.c              |  4 ++--
 drivers/net/ethernet/chelsio/cxgb3/sge.c             |  6 +++---
 drivers/net/ethernet/chelsio/cxgb4/sge.c             |  4 ++--
 drivers/net/ethernet/chelsio/cxgb4vf/sge.c           |  4 ++--
 drivers/net/ethernet/cisco/enic/enic_main.c          |  4 ++--
 drivers/net/ethernet/emulex/benet/be_main.c          | 12 ++++++------
 drivers/net/ethernet/freescale/gianfar.c             |  4 ++--
 drivers/net/ethernet/ibm/ehea/ehea_main.c            |  4 ++--
 drivers/net/ethernet/intel/e1000/e1000_main.c        |  5 +++--
 drivers/net/ethernet/intel/e1000e/netdev.c           |  9 +++++----
 drivers/net/ethernet/intel/fm10k/fm10k_main.c        |  4 ++--
 drivers/net/ethernet/intel/fm10k/fm10k_netdev.c      |  2 +-
 drivers/net/ethernet/intel/i40e/i40e_txrx.c          |  4 ++--
 drivers/net/ethernet/intel/i40evf/i40e_txrx.c        |  4 ++--
 drivers/net/ethernet/intel/igb/igb_main.c            |  4 ++--
 drivers/net/ethernet/intel/igbvf/netdev.c            |  5 +++--
 drivers/net/ethernet/intel/ixgb/ixgb_main.c          |  4 ++--
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c        |  4 ++--
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c    |  4 ++--
 drivers/net/ethernet/jme.c                           |  4 ++--
 drivers/net/ethernet/marvell/sky2.c                  |  6 +++---
 drivers/net/ethernet/mellanox/mlx4/en_tx.c           | 12 ++++++------
 drivers/net/ethernet/natsemi/ns83820.c               |  4 ++--
 drivers/net/ethernet/neterion/s2io.c                 |  4 ++--
 drivers/net/ethernet/neterion/vxge/vxge-main.c       |  4 ++--
 drivers/net/ethernet/nvidia/forcedeth.c              |  4 ++--
 drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c |  4 ++--
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c       |  8 ++++----
 drivers/net/ethernet/qlogic/qlge/qlge_main.c         |  6 +++---
 drivers/net/ethernet/realtek/8139cp.c                |  4 ++--
 drivers/net/ethernet/realtek/r8169.c                 |  4 ++--
 drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c      |  2 +-
 drivers/net/ethernet/tehuti/tehuti.c                 |  4 ++--
 drivers/net/ethernet/via/via-rhine.c                 |  6 +++---
 drivers/net/ethernet/via/via-velocity.c              |  4 ++--
 drivers/net/macvtap.c                                |  6 +++---
 drivers/net/tun.c                                    |  4 ++--
 drivers/net/usb/r8152.c                              |  4 ++--
 drivers/net/vmxnet3/vmxnet3_drv.c                    |  4 ++--
 drivers/net/vxlan.c                                  |  4 ++--
 drivers/s390/net/qeth_l3_main.c                      |  8 ++++----
 drivers/vhost/net.c                                  |  2 +-
 include/linux/if_vlan.h                              | 16 ++++++++--------
 include/net/pkt_sched.h                              |  2 +-
 include/trace/events/net.h                           |  8 ++++----
 net/8021q/vlan_core.c                                |  2 +-
 net/bridge/br_netfilter.c                            | 12 ++++++------
 net/bridge/br_private.h                              |  4 ++--
 net/bridge/br_vlan.c                                 |  4 ++--
 net/bridge/netfilter/ebt_vlan.c                      |  4 ++--
 net/bridge/netfilter/ebtables.c                      |  2 +-
 net/core/dev.c                                       | 10 +++++-----
 net/core/netpoll.c                                   |  2 +-
 net/core/skbuff.c                                    |  8 ++++----
 net/ipv4/geneve.c                                    |  2 +-
 net/openvswitch/actions.c                            |  4 ++--
 net/openvswitch/datapath.c                           |  2 +-
 net/openvswitch/flow.c                               |  4 ++--
 net/openvswitch/vport-gre.c                          |  2 +-
 net/openvswitch/vport.c                              |  3 ++-
 net/packet/af_packet.c                               | 12 ++++++------
 net/sched/em_meta.c                                  |  2 +-
 net/wireless/util.c                                  |  4 ++--
 77 files changed, 195 insertions(+), 190 deletions(-)

(limited to 'net')

diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index 58d08f8d8d80..9930ecfbb465 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -279,8 +279,8 @@ Possible BPF extensions are shown in the following table:
   hatype                                skb->dev->type
   rxhash                                skb->hash
   cpu                                   raw_smp_processor_id()
-  vlan_tci                              vlan_tx_tag_get(skb)
-  vlan_pr                               vlan_tx_tag_present(skb)
+  vlan_tci                              skb_vlan_tag_get(skb)
+  vlan_pr                               skb_vlan_tag_present(skb)
   rand                                  prandom_u32()
 
 These extensions can also be prefixed with '#'.
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index 49eb5111d2cd..70acda91eb2a 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -373,11 +373,11 @@ static int nes_nic_send(struct sk_buff *skb, struct net_device *netdev)
 	wqe_fragment_length = (__le16 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX];
 
 	/* setup the VLAN tag if present */
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		nes_debug(NES_DBG_NIC_TX, "%s: VLAN packet to send... VLAN = %08X\n",
-				netdev->name, vlan_tx_tag_get(skb));
+				netdev->name, skb_vlan_tag_get(skb));
 		wqe_misc = NES_NIC_SQ_WQE_TAGVALUE_ENABLE;
-		wqe_fragment_length[0] = (__force __le16) vlan_tx_tag_get(skb);
+		wqe_fragment_length[0] = (__force __le16) skb_vlan_tag_get(skb);
 	} else
 		wqe_misc = 0;
 
@@ -576,11 +576,12 @@ tso_sq_no_longer_full:
 				wqe_fragment_length =
 						(__le16 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX];
 				/* setup the VLAN tag if present */
-				if (vlan_tx_tag_present(skb)) {
+				if (skb_vlan_tag_present(skb)) {
 					nes_debug(NES_DBG_NIC_TX, "%s: VLAN packet to send... VLAN = %08X\n",
-							netdev->name, vlan_tx_tag_get(skb) );
+							netdev->name,
+						  skb_vlan_tag_get(skb));
 					wqe_misc = NES_NIC_SQ_WQE_TAGVALUE_ENABLE;
-					wqe_fragment_length[0] = (__force __le16) vlan_tx_tag_get(skb);
+					wqe_fragment_length[0] = (__force __le16) skb_vlan_tag_get(skb);
 				} else
 					wqe_misc = 0;
 
diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c
index dede43f4ce09..8f8418d2ac4a 100644
--- a/drivers/net/ethernet/3com/typhoon.c
+++ b/drivers/net/ethernet/3com/typhoon.c
@@ -769,11 +769,11 @@ typhoon_start_tx(struct sk_buff *skb, struct net_device *dev)
 		first_txd->processFlags |= TYPHOON_TX_PF_IP_CHKSUM;
 	}
 
-	if(vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		first_txd->processFlags |=
 		    TYPHOON_TX_PF_INSERT_VLAN | TYPHOON_TX_PF_VLAN_PRIORITY;
 		first_txd->processFlags |=
-		    cpu_to_le32(htons(vlan_tx_tag_get(skb)) <<
+		    cpu_to_le32(htons(skb_vlan_tag_get(skb)) <<
 				TYPHOON_TX_PF_VLAN_TAG_SHIFT);
 	}
 
diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c
index b68074803de3..b90a26b13fdf 100644
--- a/drivers/net/ethernet/alteon/acenic.c
+++ b/drivers/net/ethernet/alteon/acenic.c
@@ -2429,9 +2429,9 @@ restart:
 		flagsize = (skb->len << 16) | (BD_FLG_END);
 		if (skb->ip_summed == CHECKSUM_PARTIAL)
 			flagsize |= BD_FLG_TCP_UDP_SUM;
-		if (vlan_tx_tag_present(skb)) {
+		if (skb_vlan_tag_present(skb)) {
 			flagsize |= BD_FLG_VLAN_TAG;
-			vlan_tag = vlan_tx_tag_get(skb);
+			vlan_tag = skb_vlan_tag_get(skb);
 		}
 		desc = ap->tx_ring + idx;
 		idx = (idx + 1) % ACE_TX_RING_ENTRIES(ap);
@@ -2450,9 +2450,9 @@ restart:
 		flagsize = (skb_headlen(skb) << 16);
 		if (skb->ip_summed == CHECKSUM_PARTIAL)
 			flagsize |= BD_FLG_TCP_UDP_SUM;
-		if (vlan_tx_tag_present(skb)) {
+		if (skb_vlan_tag_present(skb)) {
 			flagsize |= BD_FLG_VLAN_TAG;
-			vlan_tag = vlan_tx_tag_get(skb);
+			vlan_tag = skb_vlan_tag_get(skb);
 		}
 
 		ace_load_tx_bd(ap, ap->tx_ring + idx, mapping, flagsize, vlan_tag);
diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c
index 841e6558db68..4c2ae2221780 100644
--- a/drivers/net/ethernet/amd/amd8111e.c
+++ b/drivers/net/ethernet/amd/amd8111e.c
@@ -1299,11 +1299,11 @@ static netdev_tx_t amd8111e_start_xmit(struct sk_buff *skb,
 	lp->tx_ring[tx_index].tx_flags = 0;
 
 #if AMD8111E_VLAN_TAG_USED
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		lp->tx_ring[tx_index].tag_ctrl_cmd |=
 				cpu_to_le16(TCC_VLAN_INSERT);
 		lp->tx_ring[tx_index].tag_ctrl_info =
-				cpu_to_le16(vlan_tx_tag_get(skb));
+				cpu_to_le16(skb_vlan_tag_get(skb));
 
 	}
 #endif
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 7bb5f07dbeef..2ba1dd22ad64 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1165,8 +1165,8 @@ static void xgbe_prep_tx_tstamp(struct xgbe_prv_data *pdata,
 
 static void xgbe_prep_vlan(struct sk_buff *skb, struct xgbe_packet_data *packet)
 {
-	if (vlan_tx_tag_present(skb))
-		packet->vlan_ctag = vlan_tx_tag_get(skb);
+	if (skb_vlan_tag_present(skb))
+		packet->vlan_ctag = skb_vlan_tag_get(skb);
 }
 
 static int xgbe_prep_tso(struct sk_buff *skb, struct xgbe_packet_data *packet)
@@ -1247,9 +1247,9 @@ static void xgbe_packet_info(struct xgbe_prv_data *pdata,
 		XGMAC_SET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES,
 			       CSUM_ENABLE, 1);
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		/* VLAN requires an extra descriptor if tag is different */
-		if (vlan_tx_tag_get(skb) != ring->tx.cur_vlan_ctag)
+		if (skb_vlan_tag_get(skb) != ring->tx.cur_vlan_ctag)
 			/* We can share with the TSO context descriptor */
 			if (!context_desc) {
 				context_desc = 1;
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index c9946c6c119e..587f63e87588 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -2235,8 +2235,8 @@ static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb,
 		return NETDEV_TX_OK;
 	}
 
-	if (unlikely(vlan_tx_tag_present(skb))) {
-		u16 vlan = vlan_tx_tag_get(skb);
+	if (unlikely(skb_vlan_tag_present(skb))) {
+		u16 vlan = skb_vlan_tag_get(skb);
 		__le16 tag;
 
 		vlan = cpu_to_le16(vlan);
diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
index c88abf5b6415..59a03a193e83 100644
--- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
+++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
@@ -1892,8 +1892,8 @@ static netdev_tx_t atl1e_xmit_frame(struct sk_buff *skb,
 
 	tpd = atl1e_get_tpd(adapter);
 
-	if (vlan_tx_tag_present(skb)) {
-		u16 vlan_tag = vlan_tx_tag_get(skb);
+	if (skb_vlan_tag_present(skb)) {
+		u16 vlan_tag = skb_vlan_tag_get(skb);
 		u16 atl1e_vlan_tag;
 
 		tpd->word3 |= 1 << TPD_INS_VL_TAG_SHIFT;
diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c
index 2c8f398aeda9..eca1d113fee1 100644
--- a/drivers/net/ethernet/atheros/atlx/atl1.c
+++ b/drivers/net/ethernet/atheros/atlx/atl1.c
@@ -2415,8 +2415,8 @@ static netdev_tx_t atl1_xmit_frame(struct sk_buff *skb,
 		(u16) atomic_read(&tpd_ring->next_to_use));
 	memset(ptpd, 0, sizeof(struct tx_packet_desc));
 
-	if (vlan_tx_tag_present(skb)) {
-		vlan_tag = vlan_tx_tag_get(skb);
+	if (skb_vlan_tag_present(skb)) {
+		vlan_tag = skb_vlan_tag_get(skb);
 		vlan_tag = (vlan_tag << 4) | (vlan_tag >> 13) |
 			((vlan_tag >> 9) & 0x8);
 		ptpd->word3 |= 1 << TPD_INS_VL_TAG_SHIFT;
diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c
index 482a7cabb0a1..46a535318c7a 100644
--- a/drivers/net/ethernet/atheros/atlx/atl2.c
+++ b/drivers/net/ethernet/atheros/atlx/atl2.c
@@ -887,8 +887,8 @@ static netdev_tx_t atl2_xmit_frame(struct sk_buff *skb,
 		offset = ((u32)(skb->len-copy_len + 3) & ~3);
 	}
 #ifdef NETIF_F_HW_VLAN_CTAG_TX
-	if (vlan_tx_tag_present(skb)) {
-		u16 vlan_tag = vlan_tx_tag_get(skb);
+	if (skb_vlan_tag_present(skb)) {
+		u16 vlan_tag = skb_vlan_tag_get(skb);
 		vlan_tag = (vlan_tag << 4) |
 			(vlan_tag >> 13) |
 			((vlan_tag >> 9) & 0x8);
diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index 823d01c5684c..02bf0b86995b 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -6597,9 +6597,9 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		vlan_tag_flags |= TX_BD_FLAGS_TCP_UDP_CKSUM;
 	}
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		vlan_tag_flags |=
-			(TX_BD_FLAGS_VLAN_TAG | (vlan_tx_tag_get(skb) << 16));
+			(TX_BD_FLAGS_VLAN_TAG | (skb_vlan_tag_get(skb) << 16));
 	}
 
 	if ((mss = skb_shinfo(skb)->gso_size)) {
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 1d1147c93d59..b51a18a09d4d 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -3865,9 +3865,9 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	   "sending pkt %u @%p  next_idx %u  bd %u @%p\n",
 	   pkt_prod, tx_buf, txdata->tx_pkt_prod, bd_prod, tx_start_bd);
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		tx_start_bd->vlan_or_ethertype =
-		    cpu_to_le16(vlan_tx_tag_get(skb));
+		    cpu_to_le16(skb_vlan_tag_get(skb));
 		tx_start_bd->bd_flags.as_bitfield |=
 		    (X_ETH_OUTBAND_VLAN << ETH_TX_BD_FLAGS_VLAN_MODE_SHIFT);
 	} else {
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 356bd5b022a5..4cf43bfbc955 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -8002,9 +8002,9 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	    !mss && skb->len > VLAN_ETH_FRAME_LEN)
 		base_flags |= TXD_FLAG_JMB_PKT;
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		base_flags |= TXD_FLAG_VLAN;
-		vlan = vlan_tx_tag_get(skb);
+		vlan = skb_vlan_tag_get(skb);
 	}
 
 	if ((unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) &&
diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c
index 323721838cf9..7714d7790089 100644
--- a/drivers/net/ethernet/brocade/bna/bnad.c
+++ b/drivers/net/ethernet/brocade/bna/bnad.c
@@ -2824,8 +2824,8 @@ bnad_txq_wi_prepare(struct bnad *bnad, struct bna_tcb *tcb,
 	u32 gso_size;
 	u16 vlan_tag = 0;
 
-	if (vlan_tx_tag_present(skb)) {
-		vlan_tag = (u16)vlan_tx_tag_get(skb);
+	if (skb_vlan_tag_present(skb)) {
+		vlan_tag = (u16)skb_vlan_tag_get(skb);
 		flags |= (BNA_TXQ_WI_CF_INS_PRIO | BNA_TXQ_WI_CF_INS_VLAN);
 	}
 	if (test_bit(BNAD_RF_CEE_RUNNING, &bnad->run_flags)) {
diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c
index babe2a915b00..526ea74e82d9 100644
--- a/drivers/net/ethernet/chelsio/cxgb/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb/sge.c
@@ -1860,9 +1860,9 @@ netdev_tx_t t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 	cpl->iff = dev->if_port;
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		cpl->vlan_valid = 1;
-		cpl->vlan = htons(vlan_tx_tag_get(skb));
+		cpl->vlan = htons(skb_vlan_tag_get(skb));
 		st->vlan_insert++;
 	} else
 		cpl->vlan_valid = 0;
diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c
index 3dfcf600fcc6..d6aa602f168d 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c
@@ -1148,8 +1148,8 @@ static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
 	cpl->len = htonl(skb->len);
 	cntrl = V_TXPKT_INTF(pi->port_id);
 
-	if (vlan_tx_tag_present(skb))
-		cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(vlan_tx_tag_get(skb));
+	if (skb_vlan_tag_present(skb))
+		cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(skb_vlan_tag_get(skb));
 
 	tso_info = V_LSO_MSS(skb_shinfo(skb)->gso_size);
 	if (tso_info) {
@@ -1282,7 +1282,7 @@ netdev_tx_t t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 		qs->port_stats[SGE_PSTAT_TX_CSUM]++;
 	if (skb_shinfo(skb)->gso_size)
 		qs->port_stats[SGE_PSTAT_TSO]++;
-	if (vlan_tx_tag_present(skb))
+	if (skb_vlan_tag_present(skb))
 		qs->port_stats[SGE_PSTAT_VLANINS]++;
 
 	/*
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index ca42e2e9dec9..619156112b21 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -1154,9 +1154,9 @@ out_free:	dev_kfree_skb_any(skb);
 			cntrl = TXPKT_L4CSUM_DIS | TXPKT_IPCSUM_DIS;
 	}
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		q->vlan_ins++;
-		cntrl |= TXPKT_VLAN_VLD | TXPKT_VLAN(vlan_tx_tag_get(skb));
+		cntrl |= TXPKT_VLAN_VLD | TXPKT_VLAN(skb_vlan_tag_get(skb));
 	}
 
 	cpl->ctrl0 = htonl(TXPKT_OPCODE(CPL_TX_PKT_XT) |
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
index 4424277a7e4d..0545f0de1c52 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
@@ -1326,9 +1326,9 @@ int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * If there's a VLAN tag present, add that to the list of things to
 	 * do in this Work Request.
 	 */
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		txq->vlan_ins++;
-		cntrl |= TXPKT_VLAN_VLD | TXPKT_VLAN(vlan_tx_tag_get(skb));
+		cntrl |= TXPKT_VLAN_VLD | TXPKT_VLAN(skb_vlan_tag_get(skb));
 	}
 
 	/*
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index 9a952df6606e..0535f6fbdc71 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -520,10 +520,10 @@ static inline void enic_queue_wq_skb(struct enic *enic,
 	int loopback = 0;
 	int err;
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		/* VLAN tag from trunking driver */
 		vlan_tag_insert = 1;
-		vlan_tag = vlan_tx_tag_get(skb);
+		vlan_tag = skb_vlan_tag_get(skb);
 	} else if (enic->loop_enable) {
 		vlan_tag = enic->loop_tag;
 		loopback = 1;
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 37a26b0b7e33..ed46610e5453 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -694,7 +694,7 @@ static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
 	u8 vlan_prio;
 	u16 vlan_tag;
 
-	vlan_tag = vlan_tx_tag_get(skb);
+	vlan_tag = skb_vlan_tag_get(skb);
 	vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
 	/* If vlan priority provided by OS is NOT in available bmap */
 	if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
@@ -745,7 +745,7 @@ static void wrb_fill_hdr(struct be_adapter *adapter, struct be_eth_hdr_wrb *hdr,
 			SET_TX_WRB_HDR_BITS(udpcs, hdr, 1);
 	}
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		SET_TX_WRB_HDR_BITS(vlan, hdr, 1);
 		vlan_tag = be_get_tx_vlan_tag(adapter, skb);
 		SET_TX_WRB_HDR_BITS(vlan_tag, hdr, vlan_tag);
@@ -864,7 +864,7 @@ static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
 	if (unlikely(!skb))
 		return skb;
 
-	if (vlan_tx_tag_present(skb))
+	if (skb_vlan_tag_present(skb))
 		vlan_tag = be_get_tx_vlan_tag(adapter, skb);
 
 	if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
@@ -923,7 +923,7 @@ static bool be_ipv6_exthdr_check(struct sk_buff *skb)
 
 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
 {
-	return vlan_tx_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
+	return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
 }
 
 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
@@ -946,7 +946,7 @@ static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
 	eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
 						VLAN_ETH_HLEN : ETH_HLEN;
 	if (skb->len <= 60 &&
-	    (lancer_chip(adapter) || vlan_tx_tag_present(skb)) &&
+	    (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
 	    is_ipv4_pkt(skb)) {
 		ip = (struct iphdr *)ip_hdr(skb);
 		pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
@@ -964,7 +964,7 @@ static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
 	 * Manually insert VLAN in pkt.
 	 */
 	if (skb->ip_summed != CHECKSUM_PARTIAL &&
-	    vlan_tx_tag_present(skb)) {
+	    skb_vlan_tag_present(skb)) {
 		skb = be_insert_vlan_in_pkt(adapter, skb, skip_hw_vlan);
 		if (unlikely(!skb))
 			goto err;
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index e54b1e39f9b4..93ff846e96f1 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -2170,7 +2170,7 @@ static inline void gfar_tx_checksum(struct sk_buff *skb, struct txfcb *fcb,
 void inline gfar_tx_vlan(struct sk_buff *skb, struct txfcb *fcb)
 {
 	fcb->flags |= TXFCB_VLN;
-	fcb->vlctl = vlan_tx_tag_get(skb);
+	fcb->vlctl = skb_vlan_tag_get(skb);
 }
 
 static inline struct txbd8 *skip_txbd(struct txbd8 *bdp, int stride,
@@ -2230,7 +2230,7 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	regs = tx_queue->grp->regs;
 
 	do_csum = (CHECKSUM_PARTIAL == skb->ip_summed);
-	do_vlan = vlan_tx_tag_present(skb);
+	do_vlan = skb_vlan_tag_present(skb);
 	do_tstamp = (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
 		    priv->hwts_tx_en;
 
diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c
index 566b17db135a..e8a1adb7a962 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_main.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c
@@ -2064,9 +2064,9 @@ static int ehea_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	memset(swqe, 0, SWQE_HEADER_SIZE);
 	atomic_dec(&pr->swqe_avail);
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		swqe->tx_control |= EHEA_SWQE_VLAN_INSERT;
-		swqe->vlan_tag = vlan_tx_tag_get(skb);
+		swqe->vlan_tag = skb_vlan_tag_get(skb);
 	}
 
 	pr->tx_packets++;
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index 83140cbb5f01..9242982db3e0 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -3226,9 +3226,10 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
 		return NETDEV_TX_BUSY;
 	}
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		tx_flags |= E1000_TX_FLAGS_VLAN;
-		tx_flags |= (vlan_tx_tag_get(skb) << E1000_TX_FLAGS_VLAN_SHIFT);
+		tx_flags |= (skb_vlan_tag_get(skb) <<
+			     E1000_TX_FLAGS_VLAN_SHIFT);
 	}
 
 	first = tx_ring->next_to_use;
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 332a298e95b5..38cb586b1bf4 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -5463,8 +5463,8 @@ static int e1000_transfer_dhcp_info(struct e1000_adapter *adapter,
 	struct e1000_hw *hw = &adapter->hw;
 	u16 length, offset;
 
-	if (vlan_tx_tag_present(skb) &&
-	    !((vlan_tx_tag_get(skb) == adapter->hw.mng_cookie.vlan_id) &&
+	if (skb_vlan_tag_present(skb) &&
+	    !((skb_vlan_tag_get(skb) == adapter->hw.mng_cookie.vlan_id) &&
 	      (adapter->hw.mng_cookie.status &
 	       E1000_MNG_DHCP_COOKIE_STATUS_VLAN)))
 		return 0;
@@ -5603,9 +5603,10 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
 	if (e1000_maybe_stop_tx(tx_ring, count + 2))
 		return NETDEV_TX_BUSY;
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		tx_flags |= E1000_TX_FLAGS_VLAN;
-		tx_flags |= (vlan_tx_tag_get(skb) << E1000_TX_FLAGS_VLAN_SHIFT);
+		tx_flags |= (skb_vlan_tag_get(skb) <<
+			     E1000_TX_FLAGS_VLAN_SHIFT);
 	}
 
 	first = tx_ring->next_to_use;
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index eb088b129bc7..caa43f7c2931 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -965,8 +965,8 @@ static void fm10k_tx_map(struct fm10k_ring *tx_ring,
 	tx_desc = FM10K_TX_DESC(tx_ring, i);
 
 	/* add HW VLAN tag */
-	if (vlan_tx_tag_present(skb))
-		tx_desc->vlan = cpu_to_le16(vlan_tx_tag_get(skb));
+	if (skb_vlan_tag_present(skb))
+		tx_desc->vlan = cpu_to_le16(skb_vlan_tag_get(skb));
 	else
 		tx_desc->vlan = 0;
 
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index 8811364b91cb..945b35d31c71 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -609,7 +609,7 @@ static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev)
 	int err;
 
 	if ((skb->protocol ==  htons(ETH_P_8021Q)) &&
-	    !vlan_tx_tag_present(skb)) {
+	    !skb_vlan_tag_present(skb)) {
 		/* FM10K only supports hardware tagging, any tags in frame
 		 * are considered 2nd level or "outer" tags
 		 */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 04b441460bbd..9f536dd8e1ec 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1772,8 +1772,8 @@ static int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
 	u32  tx_flags = 0;
 
 	/* if we have a HW VLAN tag being added, default to the HW one */
-	if (vlan_tx_tag_present(skb)) {
-		tx_flags |= vlan_tx_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT;
+	if (skb_vlan_tag_present(skb)) {
+		tx_flags |= skb_vlan_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT;
 		tx_flags |= I40E_TX_FLAGS_HW_VLAN;
 	/* else if it is a SW VLAN, check the next protocol and store the tag */
 	} else if (protocol == htons(ETH_P_8021Q)) {
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index 04c7c1557a0c..82c3798fdd36 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -1122,8 +1122,8 @@ static int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
 	u32  tx_flags = 0;
 
 	/* if we have a HW VLAN tag being added, default to the HW one */
-	if (vlan_tx_tag_present(skb)) {
-		tx_flags |= vlan_tx_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT;
+	if (skb_vlan_tag_present(skb)) {
+		tx_flags |= skb_vlan_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT;
 		tx_flags |= I40E_TX_FLAGS_HW_VLAN;
 	/* else if it is a SW VLAN, check the next protocol and store the tag */
 	} else if (protocol == htons(ETH_P_8021Q)) {
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index ff59897a9463..6c25ec314183 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -5035,9 +5035,9 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
 
 	skb_tx_timestamp(skb);
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		tx_flags |= IGB_TX_FLAGS_VLAN;
-		tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
+		tx_flags |= (skb_vlan_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
 	}
 
 	/* record initial flags and protocol */
diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
index 63c807c9b21c..ad2b4897b392 100644
--- a/drivers/net/ethernet/intel/igbvf/netdev.c
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -2234,9 +2234,10 @@ static netdev_tx_t igbvf_xmit_frame_ring_adv(struct sk_buff *skb,
 		return NETDEV_TX_BUSY;
 	}
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		tx_flags |= IGBVF_TX_FLAGS_VLAN;
-		tx_flags |= (vlan_tx_tag_get(skb) << IGBVF_TX_FLAGS_VLAN_SHIFT);
+		tx_flags |= (skb_vlan_tag_get(skb) <<
+			     IGBVF_TX_FLAGS_VLAN_SHIFT);
 	}
 
 	if (skb->protocol == htons(ETH_P_IP))
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
index aa87605b144a..11a1bdbe3fd9 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
@@ -1532,9 +1532,9 @@ ixgb_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
                      DESC_NEEDED)))
 		return NETDEV_TX_BUSY;
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		tx_flags |= IXGB_TX_FLAGS_VLAN;
-		vlan_id = vlan_tx_tag_get(skb);
+		vlan_id = skb_vlan_tag_get(skb);
 	}
 
 	first = adapter->tx_ring.next_to_use;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 2ed2c7de2304..7bb421bfd84e 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -7217,8 +7217,8 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
 	first->gso_segs = 1;
 
 	/* if we have a HW VLAN tag being added default to the HW one */
-	if (vlan_tx_tag_present(skb)) {
-		tx_flags |= vlan_tx_tag_get(skb) << IXGBE_TX_FLAGS_VLAN_SHIFT;
+	if (skb_vlan_tag_present(skb)) {
+		tx_flags |= skb_vlan_tag_get(skb) << IXGBE_TX_FLAGS_VLAN_SHIFT;
 		tx_flags |= IXGBE_TX_FLAGS_HW_VLAN;
 	/* else if it is a SW VLAN check the next protocol and store the tag */
 	} else if (protocol == htons(ETH_P_8021Q)) {
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 62a0d8e0f17d..c9b49bfb51bb 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -3452,8 +3452,8 @@ static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 	first->bytecount = skb->len;
 	first->gso_segs = 1;
 
-	if (vlan_tx_tag_present(skb)) {
-		tx_flags |= vlan_tx_tag_get(skb);
+	if (skb_vlan_tag_present(skb)) {
+		tx_flags |= skb_vlan_tag_get(skb);
 		tx_flags <<= IXGBE_TX_FLAGS_VLAN_SHIFT;
 		tx_flags |= IXGBE_TX_FLAGS_VLAN;
 	}
diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
index 44ce7d88f554..6e9a792097d3 100644
--- a/drivers/net/ethernet/jme.c
+++ b/drivers/net/ethernet/jme.c
@@ -2154,9 +2154,9 @@ jme_tx_csum(struct jme_adapter *jme, struct sk_buff *skb, u8 *flags)
 static inline void
 jme_tx_vlan(struct sk_buff *skb, __le16 *vlan, u8 *flags)
 {
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		*flags |= TXFLAG_TAGON;
-		*vlan = cpu_to_le16(vlan_tx_tag_get(skb));
+		*vlan = cpu_to_le16(skb_vlan_tag_get(skb));
 	}
 }
 
diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index 867a6a3ef81f..d9f4498832a1 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -1895,14 +1895,14 @@ static netdev_tx_t sky2_xmit_frame(struct sk_buff *skb,
 	ctrl = 0;
 
 	/* Add VLAN tag, can piggyback on LRGLEN or ADDR64 */
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		if (!le) {
 			le = get_tx_le(sky2, &slot);
 			le->addr = 0;
 			le->opcode = OP_VLAN|HW_OWNER;
 		} else
 			le->opcode |= OP_VLAN;
-		le->length = cpu_to_be16(vlan_tx_tag_get(skb));
+		le->length = cpu_to_be16(skb_vlan_tag_get(skb));
 		ctrl |= INS_VLAN;
 	}
 
@@ -2594,7 +2594,7 @@ static struct sk_buff *sky2_receive(struct net_device *dev,
 	sky2->rx_next = (sky2->rx_next + 1) % sky2->rx_pending;
 	prefetch(sky2->rx_ring + sky2->rx_next);
 
-	if (vlan_tx_tag_present(re->skb))
+	if (skb_vlan_tag_present(re->skb))
 		count -= VLAN_HLEN;	/* Account for vlan tag */
 
 	/* This chip has hardware problems that generates bogus status.
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index e3357bf523df..359bb1286eb5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -682,8 +682,8 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
 	if (dev->num_tc)
 		return skb_tx_hash(dev, skb);
 
-	if (vlan_tx_tag_present(skb))
-		up = vlan_tx_tag_get(skb) >> VLAN_PRIO_SHIFT;
+	if (skb_vlan_tag_present(skb))
+		up = skb_vlan_tag_get(skb) >> VLAN_PRIO_SHIFT;
 
 	return fallback(dev, skb) % rings_p_up + up * rings_p_up;
 }
@@ -742,8 +742,8 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 		goto tx_drop;
 	}
 
-	if (vlan_tx_tag_present(skb))
-		vlan_tag = vlan_tx_tag_get(skb);
+	if (skb_vlan_tag_present(skb))
+		vlan_tag = skb_vlan_tag_get(skb);
 
 
 	netdev_txq_bql_enqueue_prefetchw(ring->tx_queue);
@@ -930,7 +930,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 	real_size = (real_size / 16) & 0x3f;
 
 	if (ring->bf_enabled && desc_size <= MAX_BF && !bounce &&
-	    !vlan_tx_tag_present(skb) && send_doorbell) {
+	    !skb_vlan_tag_present(skb) && send_doorbell) {
 		tx_desc->ctrl.bf_qpn = ring->doorbell_qpn |
 				       cpu_to_be32(real_size);
 
@@ -952,7 +952,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 	} else {
 		tx_desc->ctrl.vlan_tag = cpu_to_be16(vlan_tag);
 		tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_VLAN *
-			!!vlan_tx_tag_present(skb);
+			!!skb_vlan_tag_present(skb);
 		tx_desc->ctrl.fence_size = real_size;
 
 		/* Ensure new descriptor hits memory
diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c
index 2552e550a78c..eb807b0dc72a 100644
--- a/drivers/net/ethernet/natsemi/ns83820.c
+++ b/drivers/net/ethernet/natsemi/ns83820.c
@@ -1122,12 +1122,12 @@ again:
 	}
 
 #ifdef NS83820_VLAN_ACCEL_SUPPORT
-	if(vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		/* fetch the vlan tag info out of the
 		 * ancillary data if the vlan code
 		 * is using hw vlan acceleration
 		 */
-		short tag = vlan_tx_tag_get(skb);
+		short tag = skb_vlan_tag_get(skb);
 		extsts |= (EXTSTS_VPKT | htons(tag));
 	}
 #endif
diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
index f5e4b820128b..0529cad75b10 100644
--- a/drivers/net/ethernet/neterion/s2io.c
+++ b/drivers/net/ethernet/neterion/s2io.c
@@ -4045,8 +4045,8 @@ static netdev_tx_t s2io_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	queue = 0;
-	if (vlan_tx_tag_present(skb))
-		vlan_tag = vlan_tx_tag_get(skb);
+	if (skb_vlan_tag_present(skb))
+		vlan_tag = skb_vlan_tag_get(skb);
 	if (sp->config.tx_steering_type == TX_DEFAULT_STEERING) {
 		if (skb->protocol == htons(ETH_P_IP)) {
 			struct iphdr *ip;
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c
index cc0485e3c621..50d5604833ed 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c
@@ -890,8 +890,8 @@ vxge_xmit(struct sk_buff *skb, struct net_device *dev)
 		dev->name, __func__, __LINE__,
 		fifo_hw, dtr, dtr_priv);
 
-	if (vlan_tx_tag_present(skb)) {
-		u16 vlan_tag = vlan_tx_tag_get(skb);
+	if (skb_vlan_tag_present(skb)) {
+		u16 vlan_tag = skb_vlan_tag_get(skb);
 		vxge_hw_fifo_txdl_vlan_set(dtr, vlan_tag);
 	}
 
diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index f39cae620f61..a41bb5e6b954 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -2462,9 +2462,9 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb,
 			 NV_TX2_CHECKSUM_L3 | NV_TX2_CHECKSUM_L4 : 0;
 
 	/* vlan tag */
-	if (vlan_tx_tag_present(skb))
+	if (skb_vlan_tag_present(skb))
 		start_tx->txvlan = cpu_to_le32(NV_TX3_VLAN_TAG_PRESENT |
-					vlan_tx_tag_get(skb));
+					skb_vlan_tag_get(skb));
 	else
 		start_tx->txvlan = 0;
 
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index 613037584d08..a47fe67fdf58 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ -1893,9 +1893,9 @@ netxen_tso_check(struct net_device *netdev,
 		protocol = vh->h_vlan_encapsulated_proto;
 		flags = FLAGS_VLAN_TAGGED;
 
-	} else if (vlan_tx_tag_present(skb)) {
+	} else if (skb_vlan_tag_present(skb)) {
 		flags = FLAGS_VLAN_OOB;
-		vid = vlan_tx_tag_get(skb);
+		vid = skb_vlan_tag_get(skb);
 		netxen_set_tx_vlan_tci(first_desc, vid);
 		vlan_oob = 1;
 	}
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
index d166e534925d..4d2496f28b85 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
@@ -321,8 +321,8 @@ static void qlcnic_send_filter(struct qlcnic_adapter *adapter,
 		if (protocol == ETH_P_8021Q) {
 			vh = (struct vlan_ethhdr *)skb->data;
 			vlan_id = ntohs(vh->h_vlan_TCI);
-		} else if (vlan_tx_tag_present(skb)) {
-			vlan_id = vlan_tx_tag_get(skb);
+		} else if (skb_vlan_tag_present(skb)) {
+			vlan_id = skb_vlan_tag_get(skb);
 		}
 	}
 
@@ -473,9 +473,9 @@ static int qlcnic_tx_pkt(struct qlcnic_adapter *adapter,
 		flags = QLCNIC_FLAGS_VLAN_TAGGED;
 		vlan_tci = ntohs(vh->h_vlan_TCI);
 		protocol = ntohs(vh->h_vlan_encapsulated_proto);
-	} else if (vlan_tx_tag_present(skb)) {
+	} else if (skb_vlan_tag_present(skb)) {
 		flags = QLCNIC_FLAGS_VLAN_OOB;
-		vlan_tci = vlan_tx_tag_get(skb);
+		vlan_tci = skb_vlan_tag_get(skb);
 	}
 	if (unlikely(adapter->tx_pvid)) {
 		if (vlan_tci && !(adapter->flags & QLCNIC_TAGGING_ENABLED))
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
index 6c904a6cad2a..dc0058f90370 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
@@ -2660,11 +2660,11 @@ static netdev_tx_t qlge_send(struct sk_buff *skb, struct net_device *ndev)
 
 	mac_iocb_ptr->frame_len = cpu_to_le16((u16) skb->len);
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		netif_printk(qdev, tx_queued, KERN_DEBUG, qdev->ndev,
-			     "Adding a vlan tag %d.\n", vlan_tx_tag_get(skb));
+			     "Adding a vlan tag %d.\n", skb_vlan_tag_get(skb));
 		mac_iocb_ptr->flags3 |= OB_MAC_IOCB_V;
-		mac_iocb_ptr->vlan_tci = cpu_to_le16(vlan_tx_tag_get(skb));
+		mac_iocb_ptr->vlan_tci = cpu_to_le16(skb_vlan_tag_get(skb));
 	}
 	tso = ql_tso(skb, (struct ob_mac_tso_iocb_req *)mac_iocb_ptr);
 	if (tso < 0) {
diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c
index 9c31e46d1eee..d79e33b3c191 100644
--- a/drivers/net/ethernet/realtek/8139cp.c
+++ b/drivers/net/ethernet/realtek/8139cp.c
@@ -708,8 +708,8 @@ static void cp_tx (struct cp_private *cp)
 
 static inline u32 cp_tx_vlan_tag(struct sk_buff *skb)
 {
-	return vlan_tx_tag_present(skb) ?
-		TxVlanTag | swab16(vlan_tx_tag_get(skb)) : 0x00;
+	return skb_vlan_tag_present(skb) ?
+		TxVlanTag | swab16(skb_vlan_tag_get(skb)) : 0x00;
 }
 
 static void unwind_tx_frag_mapping(struct cp_private *cp, struct sk_buff *skb,
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 3a280598a15a..cd286b0356ab 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -2073,8 +2073,8 @@ static int rtl8169_set_features(struct net_device *dev,
 
 static inline u32 rtl8169_tx_vlan_tag(struct sk_buff *skb)
 {
-	return (vlan_tx_tag_present(skb)) ?
-		TxVlanTag | swab16(vlan_tx_tag_get(skb)) : 0x00;
+	return (skb_vlan_tag_present(skb)) ?
+		TxVlanTag | swab16(skb_vlan_tag_get(skb)) : 0x00;
 }
 
 static void rtl8169_rx_vlan_tag(struct RxDesc *desc, struct sk_buff *skb)
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
index b6612d6090ac..23545e1e605a 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
@@ -1272,7 +1272,7 @@ static netdev_tx_t sxgbe_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (unlikely(skb_is_gso(skb) && tqueue->prev_mss != cur_mss))
 		ctxt_desc_req = 1;
 
-	if (unlikely(vlan_tx_tag_present(skb) ||
+	if (unlikely(skb_vlan_tag_present(skb) ||
 		     ((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
 		      tqueue->hwts_tx_en)))
 		ctxt_desc_req = 1;
diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c
index 6ab36d9ff2ab..a9cac8413e49 100644
--- a/drivers/net/ethernet/tehuti/tehuti.c
+++ b/drivers/net/ethernet/tehuti/tehuti.c
@@ -1650,9 +1650,9 @@ static netdev_tx_t bdx_tx_transmit(struct sk_buff *skb,
 		    txd_mss);
 	}
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		/*Cut VLAN ID to 12 bits */
-		txd_vlan_id = vlan_tx_tag_get(skb) & BITS_MASK(12);
+		txd_vlan_id = skb_vlan_tag_get(skb) & BITS_MASK(12);
 		txd_vtag = 1;
 	}
 
diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
index a191afc23b56..0ac76102b33d 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c
@@ -1781,8 +1781,8 @@ static netdev_tx_t rhine_start_tx(struct sk_buff *skb,
 	rp->tx_ring[entry].desc_length =
 		cpu_to_le32(TXDESC | (skb->len >= ETH_ZLEN ? skb->len : ETH_ZLEN));
 
-	if (unlikely(vlan_tx_tag_present(skb))) {
-		u16 vid_pcp = vlan_tx_tag_get(skb);
+	if (unlikely(skb_vlan_tag_present(skb))) {
+		u16 vid_pcp = skb_vlan_tag_get(skb);
 
 		/* drop CFI/DEI bit, register needs VID and PCP */
 		vid_pcp = (vid_pcp & VLAN_VID_MASK) |
@@ -1803,7 +1803,7 @@ static netdev_tx_t rhine_start_tx(struct sk_buff *skb,
 
 	/* Non-x86 Todo: explicitly flush cache lines here. */
 
-	if (vlan_tx_tag_present(skb))
+	if (skb_vlan_tag_present(skb))
 		/* Tx queues are bits 7-0 (first Tx queue: bit 7) */
 		BYTE_REG_BITS_ON(1 << 7, ioaddr + TQWake);
 
diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c
index 282f83a63b67..c20206f83cc1 100644
--- a/drivers/net/ethernet/via/via-velocity.c
+++ b/drivers/net/ethernet/via/via-velocity.c
@@ -2611,8 +2611,8 @@ static netdev_tx_t velocity_xmit(struct sk_buff *skb,
 
 	td_ptr->tdesc1.cmd = TCPLS_NORMAL + (tdinfo->nskb_dma + 1) * 16;
 
-	if (vlan_tx_tag_present(skb)) {
-		td_ptr->tdesc1.vlan = cpu_to_le16(vlan_tx_tag_get(skb));
+	if (skb_vlan_tag_present(skb)) {
+		td_ptr->tdesc1.vlan = cpu_to_le16(skb_vlan_tag_get(skb));
 		td_ptr->tdesc1.TCR |= TCR0_VETAG;
 	}
 
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 7df221788cd4..d0ed5694dd7d 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -645,7 +645,7 @@ static void macvtap_skb_to_vnet_hdr(struct macvtap_queue *q,
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
-		if (vlan_tx_tag_present(skb))
+		if (skb_vlan_tag_present(skb))
 			vnet_hdr->csum_start = cpu_to_macvtap16(q,
 				skb_checksum_start_offset(skb) + VLAN_HLEN);
 		else
@@ -821,13 +821,13 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q,
 	total = vnet_hdr_len;
 	total += skb->len;
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		struct {
 			__be16 h_vlan_proto;
 			__be16 h_vlan_TCI;
 		} veth;
 		veth.h_vlan_proto = skb->vlan_proto;
-		veth.h_vlan_TCI = htons(vlan_tx_tag_get(skb));
+		veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb));
 
 		vlan_offset = offsetof(struct vlan_ethhdr, h_vlan_proto);
 		total += VLAN_HLEN;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 74fdf1158448..be196e89ab6c 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1260,7 +1260,7 @@ static ssize_t tun_put_user(struct tun_struct *tun,
 	int vlan_hlen = 0;
 	int vnet_hdr_sz = 0;
 
-	if (vlan_tx_tag_present(skb))
+	if (skb_vlan_tag_present(skb))
 		vlan_hlen = VLAN_HLEN;
 
 	if (tun->flags & IFF_VNET_HDR)
@@ -1337,7 +1337,7 @@ static ssize_t tun_put_user(struct tun_struct *tun,
 		} veth;
 
 		veth.h_vlan_proto = skb->vlan_proto;
-		veth.h_vlan_TCI = htons(vlan_tx_tag_get(skb));
+		veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb));
 
 		vlan_offset = offsetof(struct vlan_ethhdr, h_vlan_proto);
 
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index b23426e4952c..e519e6a269b9 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -1421,10 +1421,10 @@ static int msdn_giant_send_check(struct sk_buff *skb)
 
 static inline void rtl_tx_vlan_tag(struct tx_desc *desc, struct sk_buff *skb)
 {
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		u32 opts2;
 
-		opts2 = TX_VLAN_TAG | swab16(vlan_tx_tag_get(skb));
+		opts2 = TX_VLAN_TAG | swab16(skb_vlan_tag_get(skb));
 		desc->opts2 |= cpu_to_le32(opts2);
 	}
 }
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 31439818c27e..294214c15292 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -1038,9 +1038,9 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 		le32_add_cpu(&tq->shared->txNumDeferred, 1);
 	}
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		gdesc->txd.ti = 1;
-		gdesc->txd.tci = vlan_tx_tag_get(skb);
+		gdesc->txd.tci = skb_vlan_tag_get(skb);
 	}
 
 	/* finally flips the GEN bit of the SOP desc. */
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 3a18d8ed89ca..985359dd6033 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1561,7 +1561,7 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 
 	min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len
 			+ VXLAN_HLEN + sizeof(struct ipv6hdr)
-			+ (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
+			+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
 
 	/* Need space for new headers (invalidates iph ptr) */
 	err = skb_cow_head(skb, min_headroom);
@@ -1607,7 +1607,7 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 
 	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
 			+ VXLAN_HLEN + sizeof(struct iphdr)
-			+ (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
+			+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
 
 	/* Need space for new headers (invalidates iph ptr) */
 	err = skb_cow_head(skb, min_headroom);
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 625227ad16ee..dd4ab8d73d34 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2800,12 +2800,12 @@ static void qeth_l3_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
 	 * before we're going to overwrite this location with next hop ip.
 	 * v6 uses passthrough, v4 sets the tag in the QDIO header.
 	 */
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		if ((ipv == 4) || (card->info.type == QETH_CARD_TYPE_IQD))
 			hdr->hdr.l3.ext_flags = QETH_HDR_EXT_VLAN_FRAME;
 		else
 			hdr->hdr.l3.ext_flags = QETH_HDR_EXT_INCLUDE_VLAN_TAG;
-		hdr->hdr.l3.vlan_id = vlan_tx_tag_get(skb);
+		hdr->hdr.l3.vlan_id = skb_vlan_tag_get(skb);
 	}
 
 	hdr->hdr.l3.length = skb->len - sizeof(struct qeth_hdr);
@@ -2986,7 +2986,7 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			skb_pull(new_skb, ETH_HLEN);
 		}
 
-		if (ipv != 4 && vlan_tx_tag_present(new_skb)) {
+		if (ipv != 4 && skb_vlan_tag_present(new_skb)) {
 			skb_push(new_skb, VLAN_HLEN);
 			skb_copy_to_linear_data(new_skb, new_skb->data + 4, 4);
 			skb_copy_to_linear_data_offset(new_skb, 4,
@@ -2995,7 +2995,7 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 				new_skb->data + 12, 4);
 			tag = (u16 *)(new_skb->data + 12);
 			*tag = __constant_htons(ETH_P_8021Q);
-			*(tag + 1) = htons(vlan_tx_tag_get(new_skb));
+			*(tag + 1) = htons(skb_vlan_tag_get(new_skb));
 		}
 	}
 
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 14419a8ccbb6..bcaf4cabb858 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -469,7 +469,7 @@ static int peek_head_len(struct sock *sk)
 	head = skb_peek(&sk->sk_receive_queue);
 	if (likely(head)) {
 		len = head->len;
-		if (vlan_tx_tag_present(head))
+		if (skb_vlan_tag_present(head))
 			len += VLAN_HLEN;
 	}
 
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 515a35e2a48a..bea465f24ebb 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -78,9 +78,9 @@ static inline bool is_vlan_dev(struct net_device *dev)
         return dev->priv_flags & IFF_802_1Q_VLAN;
 }
 
-#define vlan_tx_tag_present(__skb)	((__skb)->vlan_tci & VLAN_TAG_PRESENT)
-#define vlan_tx_tag_get(__skb)		((__skb)->vlan_tci & ~VLAN_TAG_PRESENT)
-#define vlan_tx_tag_get_id(__skb)	((__skb)->vlan_tci & VLAN_VID_MASK)
+#define skb_vlan_tag_present(__skb)	((__skb)->vlan_tci & VLAN_TAG_PRESENT)
+#define skb_vlan_tag_get(__skb)		((__skb)->vlan_tci & ~VLAN_TAG_PRESENT)
+#define skb_vlan_tag_get_id(__skb)	((__skb)->vlan_tci & VLAN_VID_MASK)
 
 /**
  *	struct vlan_pcpu_stats - VLAN percpu rx/tx stats
@@ -376,7 +376,7 @@ static inline struct sk_buff *vlan_insert_tag_set_proto(struct sk_buff *skb,
 static inline struct sk_buff *__vlan_hwaccel_push_inside(struct sk_buff *skb)
 {
 	skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto,
-					vlan_tx_tag_get(skb));
+					skb_vlan_tag_get(skb));
 	if (likely(skb))
 		skb->vlan_tci = 0;
 	return skb;
@@ -393,7 +393,7 @@ static inline struct sk_buff *__vlan_hwaccel_push_inside(struct sk_buff *skb)
  */
 static inline struct sk_buff *vlan_hwaccel_push_inside(struct sk_buff *skb)
 {
-	if (vlan_tx_tag_present(skb))
+	if (skb_vlan_tag_present(skb))
 		skb = __vlan_hwaccel_push_inside(skb);
 	return skb;
 }
@@ -442,8 +442,8 @@ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci)
 static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb,
 					 u16 *vlan_tci)
 {
-	if (vlan_tx_tag_present(skb)) {
-		*vlan_tci = vlan_tx_tag_get(skb);
+	if (skb_vlan_tag_present(skb)) {
+		*vlan_tci = skb_vlan_tag_get(skb);
 		return 0;
 	} else {
 		*vlan_tci = 0;
@@ -480,7 +480,7 @@ static inline __be16 vlan_get_protocol(const struct sk_buff *skb)
 {
 	__be16 protocol = 0;
 
-	if (vlan_tx_tag_present(skb) ||
+	if (skb_vlan_tag_present(skb) ||
 	     skb->protocol != cpu_to_be16(ETH_P_8021Q))
 		protocol = skb->protocol;
 	else {
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index fe6e7aac3c56..2342bf12cb78 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -121,7 +121,7 @@ static inline __be16 tc_skb_protocol(const struct sk_buff *skb)
 	 * vlan accelerated path. In that case, use skb->vlan_proto
 	 * as the original vlan header was already stripped.
 	 */
-	if (vlan_tx_tag_present(skb))
+	if (skb_vlan_tag_present(skb))
 		return skb->vlan_proto;
 	return skb->protocol;
 }
diff --git a/include/trace/events/net.h b/include/trace/events/net.h
index 1de256b35807..49cc7c3de252 100644
--- a/include/trace/events/net.h
+++ b/include/trace/events/net.h
@@ -40,9 +40,9 @@ TRACE_EVENT(net_dev_start_xmit,
 		__assign_str(name, dev->name);
 		__entry->queue_mapping = skb->queue_mapping;
 		__entry->skbaddr = skb;
-		__entry->vlan_tagged = vlan_tx_tag_present(skb);
+		__entry->vlan_tagged = skb_vlan_tag_present(skb);
 		__entry->vlan_proto = ntohs(skb->vlan_proto);
-		__entry->vlan_tci = vlan_tx_tag_get(skb);
+		__entry->vlan_tci = skb_vlan_tag_get(skb);
 		__entry->protocol = ntohs(skb->protocol);
 		__entry->ip_summed = skb->ip_summed;
 		__entry->len = skb->len;
@@ -174,9 +174,9 @@ DECLARE_EVENT_CLASS(net_dev_rx_verbose_template,
 #endif
 		__entry->queue_mapping = skb->queue_mapping;
 		__entry->skbaddr = skb;
-		__entry->vlan_tagged = vlan_tx_tag_present(skb);
+		__entry->vlan_tagged = skb_vlan_tag_present(skb);
 		__entry->vlan_proto = ntohs(skb->vlan_proto);
-		__entry->vlan_tci = vlan_tx_tag_get(skb);
+		__entry->vlan_tci = skb_vlan_tag_get(skb);
 		__entry->protocol = ntohs(skb->protocol);
 		__entry->ip_summed = skb->ip_summed;
 		__entry->hash = skb->hash;
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 90cc2bdd4064..61bf2a06e85d 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -9,7 +9,7 @@ bool vlan_do_receive(struct sk_buff **skbp)
 {
 	struct sk_buff *skb = *skbp;
 	__be16 vlan_proto = skb->vlan_proto;
-	u16 vlan_id = vlan_tx_tag_get_id(skb);
+	u16 vlan_id = skb_vlan_tag_get_id(skb);
 	struct net_device *vlan_dev;
 	struct vlan_pcpu_stats *rx_stats;
 
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index c190d22b6b3d..65728e0dc4ff 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -66,17 +66,17 @@ static int brnf_pass_vlan_indev __read_mostly = 0;
 #endif
 
 #define IS_IP(skb) \
-	(!vlan_tx_tag_present(skb) && skb->protocol == htons(ETH_P_IP))
+	(!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IP))
 
 #define IS_IPV6(skb) \
-	(!vlan_tx_tag_present(skb) && skb->protocol == htons(ETH_P_IPV6))
+	(!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IPV6))
 
 #define IS_ARP(skb) \
-	(!vlan_tx_tag_present(skb) && skb->protocol == htons(ETH_P_ARP))
+	(!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_ARP))
 
 static inline __be16 vlan_proto(const struct sk_buff *skb)
 {
-	if (vlan_tx_tag_present(skb))
+	if (skb_vlan_tag_present(skb))
 		return skb->protocol;
 	else if (skb->protocol == htons(ETH_P_8021Q))
 		return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
@@ -436,11 +436,11 @@ static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct
 	struct net_device *vlan, *br;
 
 	br = bridge_parent(dev);
-	if (brnf_pass_vlan_indev == 0 || !vlan_tx_tag_present(skb))
+	if (brnf_pass_vlan_indev == 0 || !skb_vlan_tag_present(skb))
 		return br;
 
 	vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto,
-				    vlan_tx_tag_get(skb) & VLAN_VID_MASK);
+				    skb_vlan_tag_get(skb) & VLAN_VID_MASK);
 
 	return vlan ? vlan : br;
 }
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index aea3d1339b3f..d808d766334d 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -628,8 +628,8 @@ static inline int br_vlan_get_tag(const struct sk_buff *skb, u16 *vid)
 {
 	int err = 0;
 
-	if (vlan_tx_tag_present(skb))
-		*vid = vlan_tx_tag_get(skb) & VLAN_VID_MASK;
+	if (skb_vlan_tag_present(skb))
+		*vid = skb_vlan_tag_get(skb) & VLAN_VID_MASK;
 	else {
 		*vid = 0;
 		err = -EINVAL;
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 97b8ddf57363..13013fe8db24 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -187,7 +187,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
 	 * sent from vlan device on the bridge device, it does not have
 	 * HW accelerated vlan tag.
 	 */
-	if (unlikely(!vlan_tx_tag_present(skb) &&
+	if (unlikely(!skb_vlan_tag_present(skb) &&
 		     skb->protocol == proto)) {
 		skb = skb_vlan_untag(skb);
 		if (unlikely(!skb))
@@ -200,7 +200,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
 			/* Protocol-mismatch, empty out vlan_tci for new tag */
 			skb_push(skb, ETH_HLEN);
 			skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto,
-							vlan_tx_tag_get(skb));
+							skb_vlan_tag_get(skb));
 			if (unlikely(!skb))
 				return false;
 
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index 8d3f8c7651f0..618568888128 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -45,8 +45,8 @@ ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	/* VLAN encapsulated Type/Length field, given from orig frame */
 	__be16 encap;
 
-	if (vlan_tx_tag_present(skb)) {
-		TCI = vlan_tx_tag_get(skb);
+	if (skb_vlan_tag_present(skb)) {
+		TCI = skb_vlan_tag_get(skb);
 		encap = skb->protocol;
 	} else {
 		const struct vlan_hdr *fp;
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index d9a8c05d995d..91180a7fc943 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -133,7 +133,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
 	__be16 ethproto;
 	int verdict, i;
 
-	if (vlan_tx_tag_present(skb))
+	if (skb_vlan_tag_present(skb))
 		ethproto = htons(ETH_P_8021Q);
 	else
 		ethproto = h->h_proto;
diff --git a/net/core/dev.c b/net/core/dev.c
index 805456147c30..1e325adc4367 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2578,7 +2578,7 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
 	if (skb->encapsulation)
 		features &= dev->hw_enc_features;
 
-	if (!vlan_tx_tag_present(skb)) {
+	if (!skb_vlan_tag_present(skb)) {
 		if (unlikely(protocol == htons(ETH_P_8021Q) ||
 			     protocol == htons(ETH_P_8021AD))) {
 			struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
@@ -2659,7 +2659,7 @@ out:
 static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb,
 					  netdev_features_t features)
 {
-	if (vlan_tx_tag_present(skb) &&
+	if (skb_vlan_tag_present(skb) &&
 	    !vlan_hw_offload_capable(features, skb->vlan_proto))
 		skb = __vlan_hwaccel_push_inside(skb);
 	return skb;
@@ -3676,7 +3676,7 @@ ncls:
 	if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
 		goto drop;
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		if (pt_prev) {
 			ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = NULL;
@@ -3708,8 +3708,8 @@ ncls:
 		}
 	}
 
-	if (unlikely(vlan_tx_tag_present(skb))) {
-		if (vlan_tx_tag_get_id(skb))
+	if (unlikely(skb_vlan_tag_present(skb))) {
+		if (skb_vlan_tag_get_id(skb))
 			skb->pkt_type = PACKET_OTHERHOST;
 		/* Note: we might in the future use prio bits
 		 * and set skb->priority like in vlan_do_receive()
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index e0ad5d16c9c5..c126a878c47c 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -77,7 +77,7 @@ static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
 
 	features = netif_skb_features(skb);
 
-	if (vlan_tx_tag_present(skb) &&
+	if (skb_vlan_tag_present(skb) &&
 	    !vlan_hw_offload_capable(features, skb->vlan_proto)) {
 		skb = __vlan_hwaccel_push_inside(skb);
 		if (unlikely(!skb)) {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 5a2a2e887a12..56db472e9b86 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4197,7 +4197,7 @@ struct sk_buff *skb_vlan_untag(struct sk_buff *skb)
 	struct vlan_hdr *vhdr;
 	u16 vlan_tci;
 
-	if (unlikely(vlan_tx_tag_present(skb))) {
+	if (unlikely(skb_vlan_tag_present(skb))) {
 		/* vlan_tci is already set-up so leave this for another time */
 		return skb;
 	}
@@ -4283,7 +4283,7 @@ int skb_vlan_pop(struct sk_buff *skb)
 	__be16 vlan_proto;
 	int err;
 
-	if (likely(vlan_tx_tag_present(skb))) {
+	if (likely(skb_vlan_tag_present(skb))) {
 		skb->vlan_tci = 0;
 	} else {
 		if (unlikely((skb->protocol != htons(ETH_P_8021Q) &&
@@ -4313,7 +4313,7 @@ EXPORT_SYMBOL(skb_vlan_pop);
 
 int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
 {
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		unsigned int offset = skb->data - skb_mac_header(skb);
 		int err;
 
@@ -4323,7 +4323,7 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
 		 */
 		__skb_push(skb, offset);
 		err = __vlan_insert_tag(skb, skb->vlan_proto,
-					vlan_tx_tag_get(skb));
+					skb_vlan_tag_get(skb));
 		if (err)
 			return err;
 		skb->protocol = skb->vlan_proto;
diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index 5b52046ec7a2..23744c7a9718 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -119,7 +119,7 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
 
 	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
 			+ GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr)
-			+ (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
+			+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
 
 	err = skb_cow_head(skb, min_headroom);
 	if (unlikely(err)) {
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 770064c83711..b4cffe686126 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -212,7 +212,7 @@ static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
 	int err;
 
 	err = skb_vlan_pop(skb);
-	if (vlan_tx_tag_present(skb))
+	if (skb_vlan_tag_present(skb))
 		invalidate_flow_key(key);
 	else
 		key->eth.tci = 0;
@@ -222,7 +222,7 @@ static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
 static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
 		     const struct ovs_action_push_vlan *vlan)
 {
-	if (vlan_tx_tag_present(skb))
+	if (skb_vlan_tag_present(skb))
 		invalidate_flow_key(key);
 	else
 		key->eth.tci = vlan->vlan_tci;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 4e9a5f035cbc..54854e3ecd83 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -419,7 +419,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
 	if (!dp_ifindex)
 		return -ENODEV;
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		nskb = skb_clone(skb, GFP_ATOMIC);
 		if (!nskb)
 			return -ENOMEM;
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index da2fae0873a5..df334fe43d7f 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -70,7 +70,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
 {
 	struct flow_stats *stats;
 	int node = numa_node_id();
-	int len = skb->len + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
+	int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
 
 	stats = rcu_dereference(flow->stats[node]);
 
@@ -472,7 +472,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 	 */
 
 	key->eth.tci = 0;
-	if (vlan_tx_tag_present(skb))
+	if (skb_vlan_tag_present(skb))
 		key->eth.tci = htons(skb->vlan_tci);
 	else if (eth->h_proto == htons(ETH_P_8021Q))
 		if (unlikely(parse_vlan(skb, key)))
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index d4168c442db5..e9aedb7c7106 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -166,7 +166,7 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
 
 	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
 			+ tunnel_hlen + sizeof(struct iphdr)
-			+ (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
+			+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
 	if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
 		int head_delta = SKB_DATA_ALIGN(min_headroom -
 						skb_headroom(skb) +
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 2034c6d9cb5a..464739aac0f3 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -480,7 +480,8 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
 	stats = this_cpu_ptr(vport->percpu_stats);
 	u64_stats_update_begin(&stats->syncp);
 	stats->rx_packets++;
-	stats->rx_bytes += skb->len + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
+	stats->rx_bytes += skb->len +
+			   (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
 	u64_stats_update_end(&stats->syncp);
 
 	OVS_CB(skb)->input_vport = vport;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 0f02668dc219..d37075b0d6d5 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -986,8 +986,8 @@ static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc,
 static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc,
 			struct tpacket3_hdr *ppd)
 {
-	if (vlan_tx_tag_present(pkc->skb)) {
-		ppd->hv1.tp_vlan_tci = vlan_tx_tag_get(pkc->skb);
+	if (skb_vlan_tag_present(pkc->skb)) {
+		ppd->hv1.tp_vlan_tci = skb_vlan_tag_get(pkc->skb);
 		ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->vlan_proto);
 		ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
 	} else {
@@ -2000,8 +2000,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 		h.h2->tp_net = netoff;
 		h.h2->tp_sec = ts.tv_sec;
 		h.h2->tp_nsec = ts.tv_nsec;
-		if (vlan_tx_tag_present(skb)) {
-			h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
+		if (skb_vlan_tag_present(skb)) {
+			h.h2->tp_vlan_tci = skb_vlan_tag_get(skb);
 			h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto);
 			status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
 		} else {
@@ -3010,8 +3010,8 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 		aux.tp_snaplen = skb->len;
 		aux.tp_mac = 0;
 		aux.tp_net = skb_network_offset(skb);
-		if (vlan_tx_tag_present(skb)) {
-			aux.tp_vlan_tci = vlan_tx_tag_get(skb);
+		if (skb_vlan_tag_present(skb)) {
+			aux.tp_vlan_tci = skb_vlan_tag_get(skb);
 			aux.tp_vlan_tpid = ntohs(skb->vlan_proto);
 			aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
 		} else {
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 2159981b604e..b5294ce20cd4 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -176,7 +176,7 @@ META_COLLECTOR(int_vlan_tag)
 {
 	unsigned short tag;
 
-	tag = vlan_tx_tag_get(skb);
+	tag = skb_vlan_tag_get(skb);
 	if (!tag && __vlan_get_tag(skb, &tag))
 		*err = -1;
 	else
diff --git a/net/wireless/util.c b/net/wireless/util.c
index d0ac795445b7..1d2fcfad06cc 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -708,8 +708,8 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb,
 	if (skb->priority >= 256 && skb->priority <= 263)
 		return skb->priority - 256;
 
-	if (vlan_tx_tag_present(skb)) {
-		vlan_priority = (vlan_tx_tag_get(skb) & VLAN_PRIO_MASK)
+	if (skb_vlan_tag_present(skb)) {
+		vlan_priority = (skb_vlan_tag_get(skb) & VLAN_PRIO_MASK)
 			>> VLAN_PRIO_SHIFT;
 		if (vlan_priority > 0)
 			return vlan_priority;
-- 
cgit v1.2.3


From 36f260ceff9d2a3ac53882eec941a1424719f2a6 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 12 Jan 2015 22:47:22 -0800
Subject: Bluetooth: Move Delete Stored Link Key to 4th phase of initialization

This moves the execution of Delete Stored Link Key command to the
hci_init4_req phase. No actual code has been changed. The command
is just executed at a later stage of the initialization.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_core.c | 46 +++++++++++++++++++++++-----------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 96572a48948e..c04197347c90 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -739,29 +739,6 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt)
 		hci_req_add(req, HCI_OP_READ_STORED_LINK_KEY, sizeof(cp), &cp);
 	}
 
-	/* Some Broadcom based Bluetooth controllers do not support the
-	 * Delete Stored Link Key command. They are clearly indicating its
-	 * absence in the bit mask of supported commands.
-	 *
-	 * Check the supported commands and only if the the command is marked
-	 * as supported send it. If not supported assume that the controller
-	 * does not have actual support for stored link keys which makes this
-	 * command redundant anyway.
-	 *
-	 * Some controllers indicate that they support handling deleting
-	 * stored link keys, but they don't. The quirk lets a driver
-	 * just disable this command.
-	 */
-	if (hdev->commands[6] & 0x80 &&
-	    !test_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks)) {
-		struct hci_cp_delete_stored_link_key cp;
-
-		bacpy(&cp.bdaddr, BDADDR_ANY);
-		cp.delete_all = 0x01;
-		hci_req_add(req, HCI_OP_DELETE_STORED_LINK_KEY,
-			    sizeof(cp), &cp);
-	}
-
 	if (hdev->commands[5] & 0x10)
 		hci_setup_link_policy(req);
 
@@ -853,6 +830,29 @@ static void hci_init4_req(struct hci_request *req, unsigned long opt)
 {
 	struct hci_dev *hdev = req->hdev;
 
+	/* Some Broadcom based Bluetooth controllers do not support the
+	 * Delete Stored Link Key command. They are clearly indicating its
+	 * absence in the bit mask of supported commands.
+	 *
+	 * Check the supported commands and only if the the command is marked
+	 * as supported send it. If not supported assume that the controller
+	 * does not have actual support for stored link keys which makes this
+	 * command redundant anyway.
+	 *
+	 * Some controllers indicate that they support handling deleting
+	 * stored link keys, but they don't. The quirk lets a driver
+	 * just disable this command.
+	 */
+	if (hdev->commands[6] & 0x80 &&
+	    !test_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks)) {
+		struct hci_cp_delete_stored_link_key cp;
+
+		bacpy(&cp.bdaddr, BDADDR_ANY);
+		cp.delete_all = 0x01;
+		hci_req_add(req, HCI_OP_DELETE_STORED_LINK_KEY,
+			    sizeof(cp), &cp);
+	}
+
 	/* Set event mask page 2 if the HCI command for it is supported */
 	if (hdev->commands[22] & 0x04)
 		hci_set_event_mask_page_2(req);
-- 
cgit v1.2.3


From 5ced24644b9f72c6fdd3c1ed2d0fd53a6d568b04 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 12 Jan 2015 23:09:48 -0800
Subject: Bluetooth: Use %llu for printing duration details of selftests

The duration variable for the selftests is unsigned long long and with
that use %llu instead of %lld when printing the results.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/selftest.c | 2 +-
 net/bluetooth/smp.c      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/selftest.c b/net/bluetooth/selftest.c
index 9c67315172cf..378f4064952c 100644
--- a/net/bluetooth/selftest.c
+++ b/net/bluetooth/selftest.c
@@ -184,7 +184,7 @@ static int __init test_ecdh(void)
 	delta = ktime_sub(rettime, calltime);
 	duration = (unsigned long long) ktime_to_ns(delta) >> 10;
 
-	BT_INFO("ECDH test passed in %lld usecs", duration);
+	BT_INFO("ECDH test passed in %llu usecs", duration);
 
 	return 0;
 }
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 96c7d51a2753..379654e70e6f 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -3387,7 +3387,7 @@ static int __init run_selftests(struct crypto_blkcipher *tfm_aes,
 	delta = ktime_sub(rettime, calltime);
 	duration = (unsigned long long) ktime_to_ns(delta) >> 10;
 
-	BT_INFO("SMP test passed in %lld usecs", duration);
+	BT_INFO("SMP test passed in %llu usecs", duration);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 3b24f4c65386dc0f2efb41027bc6e410ea2c0049 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Wed, 7 Jan 2015 15:42:39 +0200
Subject: mac80211: let flush() drop packets when possible

When roaming / suspending, it makes no sense to wait until
the transmit queues of the device are empty. In extreme
condition they can be starved (VO saturating the air), but
even in regular cases, it is pointless to delay the roaming
because the low level driver is trying to send packets to
an AP which is far away. We'd rather drop these packets and
let TCP retransmit if needed. This will allow to speed up
the roaming.

For suspend, the explanation is even more trivial.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/cfg.c         |  2 +-
 net/mac80211/ieee80211_i.h |  4 ++--
 net/mac80211/iface.c       |  2 +-
 net/mac80211/mlme.c        | 15 ++++++++++-----
 net/mac80211/offchannel.c  |  4 ++--
 net/mac80211/pm.c          |  2 +-
 net/mac80211/scan.c        |  4 ++--
 net/mac80211/tdls.c        |  6 +++---
 net/mac80211/tx.c          |  2 +-
 net/mac80211/util.c        |  8 ++++----
 10 files changed, 27 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 169665835b6c..9ccecb405ede 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -3665,7 +3665,7 @@ static int ieee80211_del_tx_ts(struct wiphy *wiphy, struct net_device *dev,
 		 * queues.
 		 */
 		synchronize_net();
-		ieee80211_flush_queues(local, sdata);
+		ieee80211_flush_queues(local, sdata, false);
 
 		/* restore the normal QoS parameters
 		 * (unconditionally to avoid races)
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 4f45cab8b7f1..91878ef5069a 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1882,10 +1882,10 @@ void ieee80211_add_pending_skb(struct ieee80211_local *local,
 void ieee80211_add_pending_skbs(struct ieee80211_local *local,
 				struct sk_buff_head *skbs);
 void ieee80211_flush_queues(struct ieee80211_local *local,
-			    struct ieee80211_sub_if_data *sdata);
+			    struct ieee80211_sub_if_data *sdata, bool drop);
 void __ieee80211_flush_queues(struct ieee80211_local *local,
 			      struct ieee80211_sub_if_data *sdata,
-			      unsigned int queues);
+			      unsigned int queues, bool drop);
 
 void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 			 u16 transaction, u16 auth_alg, u16 status,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 417355390873..677422e11e07 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -93,7 +93,7 @@ static u32 __ieee80211_idle_on(struct ieee80211_local *local)
 	if (local->hw.conf.flags & IEEE80211_CONF_IDLE)
 		return 0;
 
-	ieee80211_flush_queues(local, NULL);
+	ieee80211_flush_queues(local, NULL, false);
 
 	local->hw.conf.flags |= IEEE80211_CONF_IDLE;
 	return IEEE80211_CONF_CHANGE_IDLE;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 8354bedc858a..c0711082a2b8 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1604,7 +1604,7 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
 		} else {
 			ieee80211_send_nullfunc(local, sdata, 1);
 			/* Flush to get the tx status of nullfunc frame */
-			ieee80211_flush_queues(local, sdata);
+			ieee80211_flush_queues(local, sdata, false);
 		}
 	}
 
@@ -2011,18 +2011,23 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 	/* disable per-vif ps */
 	ieee80211_recalc_ps_vif(sdata);
 
-	/* flush out any pending frame (e.g. DELBA) before deauth/disassoc */
+	/*
+	 * drop any frame before deauth/disassoc, this can be data or
+	 * management frame. Since we are disconnecting, we should not
+	 * insist sending these frames which can take time and delay
+	 * the disconnection and possible the roaming.
+	 */
 	if (tx)
-		ieee80211_flush_queues(local, sdata);
+		ieee80211_flush_queues(local, sdata, true);
 
 	/* deauthenticate/disassociate now */
 	if (tx || frame_buf)
 		ieee80211_send_deauth_disassoc(sdata, ifmgd->bssid, stype,
 					       reason, tx, frame_buf);
 
-	/* flush out frame */
+	/* flush out frame - make sure the deauth was actually sent */
 	if (tx)
-		ieee80211_flush_queues(local, sdata);
+		ieee80211_flush_queues(local, sdata, false);
 
 	/* clear bssid only after building the needed mgmt frames */
 	memset(ifmgd->bssid, 0, ETH_ALEN);
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index ff20b2ebdb30..683f0e3cb124 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -121,7 +121,7 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local)
 	ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
 					IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL,
 					false);
-	ieee80211_flush_queues(local, NULL);
+	ieee80211_flush_queues(local, NULL, false);
 
 	mutex_lock(&local->iflist_mtx);
 	list_for_each_entry(sdata, &local->interfaces, list) {
@@ -398,7 +398,7 @@ void ieee80211_sw_roc_work(struct work_struct *work)
 		ieee80211_roc_notify_destroy(roc, !roc->abort);
 
 		if (started && !on_channel) {
-			ieee80211_flush_queues(local, NULL);
+			ieee80211_flush_queues(local, NULL, false);
 
 			local->tmp_channel = NULL;
 			ieee80211_hw_config(local, 0);
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index 4c5192e0d66c..8c8c67819072 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -41,7 +41,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
 	/* flush out all packets */
 	synchronize_net();
 
-	ieee80211_flush_queues(local, NULL);
+	ieee80211_flush_queues(local, NULL, true);
 
 	local->quiescing = true;
 	/* make quiescing visible to timers everywhere */
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index ae842678b629..844fb5f7910e 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -416,7 +416,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local,
 	ieee80211_offchannel_stop_vifs(local);
 
 	/* ensure nullfunc is transmitted before leaving operating channel */
-	ieee80211_flush_queues(local, NULL);
+	ieee80211_flush_queues(local, NULL, false);
 
 	ieee80211_configure_filter(local);
 
@@ -805,7 +805,7 @@ static void ieee80211_scan_state_resume(struct ieee80211_local *local,
 	ieee80211_offchannel_stop_vifs(local);
 
 	if (local->ops->flush) {
-		ieee80211_flush_queues(local, NULL);
+		ieee80211_flush_queues(local, NULL, false);
 		*next_delay = 0;
 	} else
 		*next_delay = HZ / 10;
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index 55ddd77b865d..159ebf1304b2 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -912,7 +912,7 @@ ieee80211_tdls_mgmt_setup(struct wiphy *wiphy, struct net_device *dev,
 		rcu_read_unlock();
 	}
 
-	ieee80211_flush_queues(local, sdata);
+	ieee80211_flush_queues(local, sdata, false);
 
 	ret = ieee80211_tdls_prep_mgmt_packet(wiphy, dev, peer, action_code,
 					      dialog_token, status_code,
@@ -952,7 +952,7 @@ ieee80211_tdls_mgmt_teardown(struct wiphy *wiphy, struct net_device *dev,
 	 */
 	ieee80211_stop_vif_queues(local, sdata,
 				  IEEE80211_QUEUE_STOP_REASON_TDLS_TEARDOWN);
-	ieee80211_flush_queues(local, sdata);
+	ieee80211_flush_queues(local, sdata, false);
 
 	ret = ieee80211_tdls_prep_mgmt_packet(wiphy, dev, peer, action_code,
 					      dialog_token, status_code,
@@ -1098,7 +1098,7 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
 		 */
 		tasklet_kill(&local->tx_pending_tasklet);
 		/* flush a potentially queued teardown packet */
-		ieee80211_flush_queues(local, sdata);
+		ieee80211_flush_queues(local, sdata, false);
 
 		ret = sta_info_destroy_addr(sdata, peer);
 		break;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index da7f352a2b16..02ed6f60629a 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3155,7 +3155,7 @@ int ieee80211_reserve_tid(struct ieee80211_sta *pubsta, u8 tid)
 	}
 
 	queues = BIT(sdata->vif.hw_queue[ieee802_1d_to_ac[tid]]);
-	__ieee80211_flush_queues(local, sdata, queues);
+	__ieee80211_flush_queues(local, sdata, queues, false);
 
 	sta->reserved_tid = tid;
 
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index ad8cb4fb441e..83ba6cd9cf8d 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -578,7 +578,7 @@ ieee80211_get_vif_queues(struct ieee80211_local *local,
 
 void __ieee80211_flush_queues(struct ieee80211_local *local,
 			      struct ieee80211_sub_if_data *sdata,
-			      unsigned int queues)
+			      unsigned int queues, bool drop)
 {
 	if (!local->ops->flush)
 		return;
@@ -594,7 +594,7 @@ void __ieee80211_flush_queues(struct ieee80211_local *local,
 					IEEE80211_QUEUE_STOP_REASON_FLUSH,
 					false);
 
-	drv_flush(local, sdata, queues, false);
+	drv_flush(local, sdata, queues, drop);
 
 	ieee80211_wake_queues_by_reason(&local->hw, queues,
 					IEEE80211_QUEUE_STOP_REASON_FLUSH,
@@ -602,9 +602,9 @@ void __ieee80211_flush_queues(struct ieee80211_local *local,
 }
 
 void ieee80211_flush_queues(struct ieee80211_local *local,
-			    struct ieee80211_sub_if_data *sdata)
+			    struct ieee80211_sub_if_data *sdata, bool drop)
 {
-	__ieee80211_flush_queues(local, sdata, 0);
+	__ieee80211_flush_queues(local, sdata, 0, drop);
 }
 
 void ieee80211_stop_vif_queues(struct ieee80211_local *local,
-- 
cgit v1.2.3


From 50075892ba30c4c19c41235b5308ee5a1e2125d7 Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Wed, 7 Jan 2015 16:45:07 +0200
Subject: mac80211: add TDLS supported channels correctly

The function adding the supported channels IE during a TDLS connection had
several issues:
1. If the entire subband is usable, the function exitted the loop without
   adding it
2. The function only checked chandef_usable, ignoring flags like RADAR
   which would prevent TDLS off-channel communcation.
3. HT20 was explicitly required in the chandef, while not a requirement
   for TDLS off-channel.

Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Reviewed-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tdls.c | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index 159ebf1304b2..917088dfd696 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -68,17 +68,24 @@ ieee80211_tdls_add_subband(struct ieee80211_sub_if_data *sdata,
 		ch = ieee80211_get_channel(sdata->local->hw.wiphy, i);
 		if (ch) {
 			/* we will be active on the channel */
-			u32 flags = IEEE80211_CHAN_DISABLED |
-				    IEEE80211_CHAN_NO_IR;
 			cfg80211_chandef_create(&chandef, ch,
-						NL80211_CHAN_HT20);
-			if (cfg80211_chandef_usable(sdata->local->hw.wiphy,
-						    &chandef, flags)) {
+						NL80211_CHAN_NO_HT);
+			if (cfg80211_reg_can_beacon(sdata->local->hw.wiphy,
+						    &chandef,
+						    sdata->wdev.iftype)) {
 				ch_cnt++;
+				/*
+				 * check if the next channel is also part of
+				 * this allowed range
+				 */
 				continue;
 			}
 		}
 
+		/*
+		 * we've reached the end of a range, with allowed channels
+		 * found
+		 */
 		if (ch_cnt) {
 			u8 *pos = skb_put(skb, 2);
 			*pos++ = ieee80211_frequency_to_channel(subband_start);
@@ -89,6 +96,15 @@ ieee80211_tdls_add_subband(struct ieee80211_sub_if_data *sdata,
 		}
 	}
 
+	/* all channels in the requested range are allowed - add them here */
+	if (ch_cnt) {
+		u8 *pos = skb_put(skb, 2);
+		*pos++ = ieee80211_frequency_to_channel(subband_start);
+		*pos++ = ch_cnt;
+
+		subband_cnt++;
+	}
+
 	return subband_cnt;
 }
 
-- 
cgit v1.2.3


From 5cbc95a749c5f3b238af0953881edede9bfe5bf6 Mon Sep 17 00:00:00 2001
From: Eliad Peller <eliad@wizery.com>
Date: Wed, 7 Jan 2015 17:50:09 +0200
Subject: mac80211: remove local->radar_detect_enabled

local->radar_detect_enabled should tell whether
radar_detect is enabled on any interface belonging
to local.

However, it's not getting updated correctly
in many cases (actually, when testing with hwsim
it's never been set, even when the dfs master
is beaconing).

Instead of handling all the corner cases
(e.g. channel switch), simply check whether
radar detection is enabled only when needed,
instead of caching the result.

Signed-off-by: Eliad Peller <eliad@wizery.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/cfg.c         | 2 +-
 net/mac80211/chan.c        | 5 ++---
 net/mac80211/ieee80211_i.h | 3 +--
 net/mac80211/scan.c        | 2 +-
 4 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 9ccecb405ede..fd6860d7f557 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2557,7 +2557,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
 
 	/* if there's one pending or we're scanning, queue this one */
 	if (!list_empty(&local->roc_list) ||
-	    local->scanning || local->radar_detect_enabled)
+	    local->scanning || ieee80211_is_radar_required(local))
 		goto out_check_combine;
 
 	/* if not HW assist, just queue & schedule work */
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index da1c12c34487..92c721dbd5af 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -388,7 +388,7 @@ ieee80211_find_chanctx(struct ieee80211_local *local,
 	return NULL;
 }
 
-static bool ieee80211_is_radar_required(struct ieee80211_local *local)
+bool ieee80211_is_radar_required(struct ieee80211_local *local)
 {
 	struct ieee80211_sub_if_data *sdata;
 
@@ -567,7 +567,7 @@ static void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local,
 	bool radar_enabled;
 
 	lockdep_assert_held(&local->chanctx_mtx);
-	/* for setting local->radar_detect_enabled */
+	/* for ieee80211_is_radar_required */
 	lockdep_assert_held(&local->mtx);
 
 	radar_enabled = ieee80211_is_radar_required(local);
@@ -576,7 +576,6 @@ static void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local,
 		return;
 
 	chanctx->conf.radar_enabled = radar_enabled;
-	local->radar_detect_enabled = chanctx->conf.radar_enabled;
 
 	if (!local->use_chanctx) {
 		local->hw.conf.radar_enabled = chanctx->conf.radar_enabled;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 91878ef5069a..156ea79e0157 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1168,8 +1168,6 @@ struct ieee80211_local {
 	/* wowlan is enabled -- don't reconfig on resume */
 	bool wowlan;
 
-	/* DFS/radar detection is enabled */
-	bool radar_detect_enabled;
 	struct work_struct radar_detected_work;
 
 	/* number of RX chains the hardware has */
@@ -1982,6 +1980,7 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local,
 				   struct ieee80211_chanctx *chanctx);
 void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
 				      struct ieee80211_chanctx *ctx);
+bool ieee80211_is_radar_required(struct ieee80211_local *local);
 
 void ieee80211_dfs_cac_timer(unsigned long data);
 void ieee80211_dfs_cac_timer_work(struct work_struct *work);
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 844fb5f7910e..b0320bb20ab8 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -432,7 +432,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local,
 static bool ieee80211_can_scan(struct ieee80211_local *local,
 			       struct ieee80211_sub_if_data *sdata)
 {
-	if (local->radar_detect_enabled)
+	if (ieee80211_is_radar_required(local))
 		return false;
 
 	if (!list_empty(&local->roc_list))
-- 
cgit v1.2.3


From e7f2337ae70e0cdaac577c5a4823775097d0e9e7 Mon Sep 17 00:00:00 2001
From: Eliad Peller <eliad@wizery.com>
Date: Wed, 7 Jan 2015 17:50:10 +0200
Subject: mac80211: consider only relevant vifs for radar_required calculation

ctx->conf.radar_enabled should reflect whether radar
detection is enabled for the channel context.

When calculating it, make it consider only the vifs
that have this context assigned (instead of all the
vifs).

Signed-off-by: Eliad Peller <eliad@wizery.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/chan.c | 32 ++++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 92c721dbd5af..35b11e11e0c4 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -406,6 +406,34 @@ bool ieee80211_is_radar_required(struct ieee80211_local *local)
 	return false;
 }
 
+static bool
+ieee80211_chanctx_radar_required(struct ieee80211_local *local,
+				 struct ieee80211_chanctx *ctx)
+{
+	struct ieee80211_chanctx_conf *conf = &ctx->conf;
+	struct ieee80211_sub_if_data *sdata;
+	bool required = false;
+
+	lockdep_assert_held(&local->chanctx_mtx);
+	lockdep_assert_held(&local->mtx);
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+		if (!ieee80211_sdata_running(sdata))
+			continue;
+		if (rcu_access_pointer(sdata->vif.chanctx_conf) != conf)
+			continue;
+		if (!sdata->radar_required)
+			continue;
+
+		required = true;
+		break;
+	}
+	rcu_read_unlock();
+
+	return required;
+}
+
 static struct ieee80211_chanctx *
 ieee80211_alloc_chanctx(struct ieee80211_local *local,
 			const struct cfg80211_chan_def *chandef,
@@ -425,7 +453,7 @@ ieee80211_alloc_chanctx(struct ieee80211_local *local,
 	ctx->conf.rx_chains_static = 1;
 	ctx->conf.rx_chains_dynamic = 1;
 	ctx->mode = mode;
-	ctx->conf.radar_enabled = ieee80211_is_radar_required(local);
+	ctx->conf.radar_enabled = false;
 	ieee80211_recalc_chanctx_min_def(local, ctx);
 
 	return ctx;
@@ -570,7 +598,7 @@ static void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local,
 	/* for ieee80211_is_radar_required */
 	lockdep_assert_held(&local->mtx);
 
-	radar_enabled = ieee80211_is_radar_required(local);
+	radar_enabled = ieee80211_chanctx_radar_required(local, chanctx);
 
 	if (radar_enabled == chanctx->conf.radar_enabled)
 		return;
-- 
cgit v1.2.3


From 2726f23d2d3775668f00b9a884eb88cd8812917c Mon Sep 17 00:00:00 2001
From: Eliad Peller <eliad@wizery.com>
Date: Wed, 7 Jan 2015 17:50:11 +0200
Subject: mac80211: don't defer scans in case of radar detection

Radar detection can last indefinite time. There is no
point in deferring a scan request in this case - simply
return -EBUSY.

Signed-off-by: Eliad Peller <eliad@wizery.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/scan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index b0320bb20ab8..7807fa42ed3f 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -505,7 +505,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
 
 	lockdep_assert_held(&local->mtx);
 
-	if (local->scan_req)
+	if (local->scan_req || ieee80211_is_radar_required(local))
 		return -EBUSY;
 
 	if (!ieee80211_can_scan(local, sdata)) {
-- 
cgit v1.2.3


From 2c3e861c94a29a30c75f60f2561b4ee70b3fb3a4 Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Wed, 7 Jan 2015 16:47:19 +0200
Subject: cfg80211: introduce sync regdom set API for self-managed

A self-managed device will sometimes need to set its regdomain synchronously.
Notably it should be set before usermode has a chance to query it. Expose
a new API to accomplish this which requires the RTNL.

Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Reviewed-by: Ilan Peer <ilan.peer@intel.com>
Reviewed-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 14 ++++++++++++++
 net/wireless/reg.c     | 31 +++++++++++++++++++++++++++++--
 2 files changed, 43 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 197735788f18..38abc07503fd 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3788,6 +3788,20 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2);
 int regulatory_set_wiphy_regd(struct wiphy *wiphy,
 			      struct ieee80211_regdomain *rd);
 
+/**
+ * regulatory_set_wiphy_regd_sync_rtnl - set regdom for self-managed drivers
+ * @wiphy: the wireless device we want to process the regulatory domain on
+ * @rd: the regulatory domain information to use for this wiphy
+ *
+ * This functions requires the RTNL to be held and applies the new regdomain
+ * synchronously to this wiphy. For more details see
+ * regulatory_set_wiphy_regd().
+ *
+ * Return: 0 on success. -EINVAL, -EPERM
+ */
+int regulatory_set_wiphy_regd_sync_rtnl(struct wiphy *wiphy,
+					struct ieee80211_regdomain *rd);
+
 /**
  * wiphy_apply_custom_regulatory - apply a custom driver regulatory domain
  * @wiphy: the wireless device we want to process the regulatory domain on
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 8d232b904210..f8ed79729eb0 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2897,8 +2897,8 @@ int set_regdom(const struct ieee80211_regdomain *rd)
 	return 0;
 }
 
-int regulatory_set_wiphy_regd(struct wiphy *wiphy,
-			      struct ieee80211_regdomain *rd)
+static int __regulatory_set_wiphy_regd(struct wiphy *wiphy,
+				       struct ieee80211_regdomain *rd)
 {
 	const struct ieee80211_regdomain *regd;
 	const struct ieee80211_regdomain *prev_regd;
@@ -2928,12 +2928,39 @@ int regulatory_set_wiphy_regd(struct wiphy *wiphy,
 	spin_unlock(&reg_requests_lock);
 
 	kfree(prev_regd);
+	return 0;
+}
+
+int regulatory_set_wiphy_regd(struct wiphy *wiphy,
+			      struct ieee80211_regdomain *rd)
+{
+	int ret = __regulatory_set_wiphy_regd(wiphy, rd);
+
+	if (ret)
+		return ret;
 
 	schedule_work(&reg_work);
 	return 0;
 }
 EXPORT_SYMBOL(regulatory_set_wiphy_regd);
 
+int regulatory_set_wiphy_regd_sync_rtnl(struct wiphy *wiphy,
+					struct ieee80211_regdomain *rd)
+{
+	int ret;
+
+	ASSERT_RTNL();
+
+	ret = __regulatory_set_wiphy_regd(wiphy, rd);
+	if (ret)
+		return ret;
+
+	/* process the request immediately */
+	reg_process_self_managed_hints();
+	return 0;
+}
+EXPORT_SYMBOL(regulatory_set_wiphy_regd_sync_rtnl);
+
 void wiphy_regulatory_register(struct wiphy *wiphy)
 {
 	struct regulatory_request *lr;
-- 
cgit v1.2.3


From ef51fb1d1cd54ae9e0b0efd3b9bdb561fe5483a0 Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Wed, 7 Jan 2015 16:47:20 +0200
Subject: cfg80211: avoid reg-hints in self-managed only systems

When a system contains only self-managed regulatory devices all hints
from the regulatory core are ignored. Stop hint processing early in this
case. These systems usually don't have CRDA deployed, which results in
endless (irrelevent) logs of the form:
cfg80211: Calling CRDA to update world regulatory domain

Make sure there's at least one self-managed device before discarding a
hint, in order to prevent initial hints from disappearing on CRDA
managed systems.

Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/reg.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index f8ed79729eb0..886cc7cb5566 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2108,6 +2108,26 @@ out_free:
 	reg_free_request(reg_request);
 }
 
+static bool reg_only_self_managed_wiphys(void)
+{
+	struct cfg80211_registered_device *rdev;
+	struct wiphy *wiphy;
+	bool self_managed_found = false;
+
+	ASSERT_RTNL();
+
+	list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+		wiphy = &rdev->wiphy;
+		if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED)
+			self_managed_found = true;
+		else
+			return false;
+	}
+
+	/* make sure at least one self-managed wiphy exists */
+	return self_managed_found;
+}
+
 /*
  * Processes regulatory hints, this is all the NL80211_REGDOM_SET_BY_*
  * Regulatory hints come on a first come first serve basis and we
@@ -2139,6 +2159,11 @@ static void reg_process_pending_hints(void)
 
 	spin_unlock(&reg_requests_lock);
 
+	if (reg_only_self_managed_wiphys()) {
+		reg_free_request(reg_request);
+		return;
+	}
+
 	reg_process_hint(reg_request);
 }
 
-- 
cgit v1.2.3


From 75453ccb61120885d6715a49496c57930dbe6253 Mon Sep 17 00:00:00 2001
From: Luciano Coelho <luciano.coelho@intel.com>
Date: Fri, 9 Jan 2015 14:06:37 +0200
Subject: nl80211: send netdetect configuration info in NL80211_CMD_GET_WOWLAN

Send the netdetect configuration information in the response to
NL8021_CMD_GET_WOWLAN commands.  This includes the scan interval,
SSIDs to match and frequencies to scan.

Additionally, add the NL80211_WOWLAN_TRIG_NET_DETECT with
NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED.

Signed-off-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h |  4 +++-
 net/wireless/nl80211.c       | 52 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index a963d4824c51..b6c1a00bd8d2 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3734,7 +3734,9 @@ struct nl80211_pattern_support {
  *	same attributes used with @NL80211_CMD_START_SCHED_SCAN.  It
  *	specifies how the scan is performed (e.g. the interval and the
  *	channels to scan) as well as the scan results that will
- *	trigger a wake (i.e. the matchsets).
+ *	trigger a wake (i.e. the matchsets).  This attribute is also
+ *	sent in a response to @NL80211_CMD_GET_WIPHY, indicating the
+ *	number of match sets supported by the driver (u32).
  * @NL80211_WOWLAN_TRIG_NET_DETECT_RESULTS: nested attribute
  *	containing an array with information about what triggered the
  *	wake up.  If no elements are present in the array, it means
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 7c2ce26e22de..380784378df8 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1088,6 +1088,11 @@ static int nl80211_send_wowlan(struct sk_buff *msg,
 			return -ENOBUFS;
 	}
 
+	if ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_NET_DETECT) &&
+	    nla_put_u32(msg, NL80211_WOWLAN_TRIG_NET_DETECT,
+			rdev->wiphy.wowlan->max_nd_match_sets))
+		return -ENOBUFS;
+
 	if (large && nl80211_send_wowlan_tcp_caps(rdev, msg))
 		return -ENOBUFS;
 
@@ -8747,6 +8752,48 @@ static int nl80211_send_wowlan_tcp(struct sk_buff *msg,
 	return 0;
 }
 
+static int nl80211_send_wowlan_nd(struct sk_buff *msg,
+				  struct cfg80211_sched_scan_request *req)
+{
+	struct nlattr *nd, *freqs, *matches, *match;
+	int i;
+
+	if (!req)
+		return 0;
+
+	nd = nla_nest_start(msg, NL80211_WOWLAN_TRIG_NET_DETECT);
+	if (!nd)
+		return -ENOBUFS;
+
+	if (nla_put_u32(msg, NL80211_ATTR_SCHED_SCAN_INTERVAL, req->interval))
+		return -ENOBUFS;
+
+	freqs = nla_nest_start(msg, NL80211_ATTR_SCAN_FREQUENCIES);
+	if (!freqs)
+		return -ENOBUFS;
+
+	for (i = 0; i < req->n_channels; i++)
+		nla_put_u32(msg, i, req->channels[i]->center_freq);
+
+	nla_nest_end(msg, freqs);
+
+	if (req->n_match_sets) {
+		matches = nla_nest_start(msg, NL80211_ATTR_SCHED_SCAN_MATCH);
+		for (i = 0; i < req->n_match_sets; i++) {
+			match = nla_nest_start(msg, i);
+			nla_put(msg, NL80211_SCHED_SCAN_MATCH_ATTR_SSID,
+				req->match_sets[i].ssid.ssid_len,
+				req->match_sets[i].ssid.ssid);
+			nla_nest_end(msg, match);
+		}
+		nla_nest_end(msg, matches);
+	}
+
+	nla_nest_end(msg, nd);
+
+	return 0;
+}
+
 static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -8804,6 +8851,11 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info)
 					    rdev->wiphy.wowlan_config->tcp))
 			goto nla_put_failure;
 
+		if (nl80211_send_wowlan_nd(
+			    msg,
+			    rdev->wiphy.wowlan_config->nd_config))
+			goto nla_put_failure;
+
 		nla_nest_end(msg, nl_wowlan);
 	}
 
-- 
cgit v1.2.3


From 36c269cecf05d643ddeff3a0f515e7fde5609a77 Mon Sep 17 00:00:00 2001
From: Gowtham Anandha Babu <gowtham.ab@samsung.com>
Date: Mon, 5 Jan 2015 19:32:55 +0530
Subject: Bluetooth: Remove dead code

Variable 'controller' is assigned a value that is never used.
Identified by cppcheck tool.

Signed-off-by: Gowtham Anandha Babu <gowtham.ab@samsung.com>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/cmtp/capi.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c
index 1ca8a87a0787..75bd2c42e3e7 100644
--- a/net/bluetooth/cmtp/capi.c
+++ b/net/bluetooth/cmtp/capi.c
@@ -253,8 +253,6 @@ static void cmtp_recv_interopmsg(struct cmtp_session *session, struct sk_buff *s
 			if (skb->len < CAPI_MSG_BASELEN + 15)
 				break;
 
-			controller = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 10);
-
 			if (!info && ctrl) {
 				int len = min_t(uint, CAPI_MANUFACTURER_LEN,
 						skb->data[CAPI_MSG_BASELEN + 14]);
@@ -270,8 +268,6 @@ static void cmtp_recv_interopmsg(struct cmtp_session *session, struct sk_buff *s
 			if (skb->len < CAPI_MSG_BASELEN + 32)
 				break;
 
-			controller = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 12);
-
 			if (!info && ctrl) {
 				ctrl->version.majorversion = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 16);
 				ctrl->version.minorversion = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 20);
@@ -285,8 +281,6 @@ static void cmtp_recv_interopmsg(struct cmtp_session *session, struct sk_buff *s
 			if (skb->len < CAPI_MSG_BASELEN + 17)
 				break;
 
-			controller = CAPIMSG_U32(skb->data, CAPI_MSG_BASELEN + 12);
-
 			if (!info && ctrl) {
 				int len = min_t(uint, CAPI_SERIAL_LEN,
 						skb->data[CAPI_MSG_BASELEN + 16]);
-- 
cgit v1.2.3


From a2b12f3c7ac1ea43ae646db74faf0b56c2bba563 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 12 Jan 2015 17:00:37 -0800
Subject: udp: pass udp_offload struct to UDP gro callbacks

This patch introduces udp_offload_callbacks which has the same
GRO functions (but not a GSO function) as offload_callbacks,
except there is an argument to a udp_offload struct passed to
gro_receive and gro_complete functions. This additional argument
can be used to retrieve the per port structure of the encapsulation
for use in gro processing (mostly by doing container_of on the
structure).

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c       |  7 +++++--
 include/linux/netdevice.h | 15 +++++++++++++--
 net/ipv4/fou.c            | 12 ++++++++----
 net/ipv4/geneve.c         |  6 ++++--
 net/ipv4/udp_offload.c    |  7 +++++--
 5 files changed, 35 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 985359dd6033..5c56a3ff25aa 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -539,7 +539,9 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
 	return 1;
 }
 
-static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
+					  struct sk_buff *skb,
+					  struct udp_offload *uoff)
 {
 	struct sk_buff *p, **pp = NULL;
 	struct vxlanhdr *vh, *vh2;
@@ -578,7 +580,8 @@ out:
 	return pp;
 }
 
-static int vxlan_gro_complete(struct sk_buff *skb, int nhoff)
+static int vxlan_gro_complete(struct sk_buff *skb, int nhoff,
+			      struct udp_offload *uoff)
 {
 	udp_tunnel_gro_complete(skb, nhoff);
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 679e6e90aa4c..47921c291dd6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1969,7 +1969,7 @@ struct offload_callbacks {
 	struct sk_buff		*(*gso_segment)(struct sk_buff *skb,
 						netdev_features_t features);
 	struct sk_buff		**(*gro_receive)(struct sk_buff **head,
-					       struct sk_buff *skb);
+						 struct sk_buff *skb);
 	int			(*gro_complete)(struct sk_buff *skb, int nhoff);
 };
 
@@ -1979,10 +1979,21 @@ struct packet_offload {
 	struct list_head	 list;
 };
 
+struct udp_offload;
+
+struct udp_offload_callbacks {
+	struct sk_buff		**(*gro_receive)(struct sk_buff **head,
+						 struct sk_buff *skb,
+						 struct udp_offload *uoff);
+	int			(*gro_complete)(struct sk_buff *skb,
+						int nhoff,
+						struct udp_offload *uoff);
+};
+
 struct udp_offload {
 	__be16			 port;
 	u8			 ipproto;
-	struct offload_callbacks callbacks;
+	struct udp_offload_callbacks callbacks;
 };
 
 /* often modified stats are per cpu, other are shared (netdev->stats) */
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 2197c36f722f..3bc0cf07661c 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -174,7 +174,8 @@ drop:
 }
 
 static struct sk_buff **fou_gro_receive(struct sk_buff **head,
-					struct sk_buff *skb)
+					struct sk_buff *skb,
+					struct udp_offload *uoff)
 {
 	const struct net_offload *ops;
 	struct sk_buff **pp = NULL;
@@ -195,7 +196,8 @@ out_unlock:
 	return pp;
 }
 
-static int fou_gro_complete(struct sk_buff *skb, int nhoff)
+static int fou_gro_complete(struct sk_buff *skb, int nhoff,
+			    struct udp_offload *uoff)
 {
 	const struct net_offload *ops;
 	u8 proto = NAPI_GRO_CB(skb)->proto;
@@ -254,7 +256,8 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
 }
 
 static struct sk_buff **gue_gro_receive(struct sk_buff **head,
-					struct sk_buff *skb)
+					struct sk_buff *skb,
+					struct udp_offload *uoff)
 {
 	const struct net_offload **offloads;
 	const struct net_offload *ops;
@@ -360,7 +363,8 @@ out:
 	return pp;
 }
 
-static int gue_gro_complete(struct sk_buff *skb, int nhoff)
+static int gue_gro_complete(struct sk_buff *skb, int nhoff,
+			    struct udp_offload *uoff)
 {
 	const struct net_offload **offloads;
 	struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff);
diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index 23744c7a9718..9568594ca2f1 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -147,7 +147,8 @@ static int geneve_hlen(struct genevehdr *gh)
 }
 
 static struct sk_buff **geneve_gro_receive(struct sk_buff **head,
-					   struct sk_buff *skb)
+					   struct sk_buff *skb,
+					   struct udp_offload *uoff)
 {
 	struct sk_buff *p, **pp = NULL;
 	struct genevehdr *gh, *gh2;
@@ -211,7 +212,8 @@ out:
 	return pp;
 }
 
-static int geneve_gro_complete(struct sk_buff *skb, int nhoff)
+static int geneve_gro_complete(struct sk_buff *skb, int nhoff,
+			       struct udp_offload *uoff)
 {
 	struct genevehdr *gh;
 	struct packet_offload *ptype;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index d3e537ef6b7f..d10f6f4ead27 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -339,7 +339,8 @@ unflush:
 	skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */
 	skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
 	NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto;
-	pp = uo_priv->offload->callbacks.gro_receive(head, skb);
+	pp = uo_priv->offload->callbacks.gro_receive(head, skb,
+						     uo_priv->offload);
 
 out_unlock:
 	rcu_read_unlock();
@@ -395,7 +396,9 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff)
 
 	if (uo_priv != NULL) {
 		NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto;
-		err = uo_priv->offload->callbacks.gro_complete(skb, nhoff + sizeof(struct udphdr));
+		err = uo_priv->offload->callbacks.gro_complete(skb,
+				nhoff + sizeof(struct udphdr),
+				uo_priv->offload);
 	}
 
 	rcu_read_unlock();
-- 
cgit v1.2.3


From 3f4c1d87af2a0c8a599b95bfebbc6338de343aca Mon Sep 17 00:00:00 2001
From: Fan Du <fan.du@intel.com>
Date: Wed, 14 Jan 2015 13:10:35 +0800
Subject: openvswitch: Introduce ovs_tunnel_route_lookup

Introduce ovs_tunnel_route_lookup to consolidate route lookup
shared by vxlan, gre, and geneve ports.

Signed-off-by: Fan Du <fan.du@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/vport-geneve.c | 13 ++-----------
 net/openvswitch/vport-gre.c    | 12 ++----------
 net/openvswitch/vport-vxlan.c  | 12 ++----------
 net/openvswitch/vport.c        |  9 +--------
 net/openvswitch/vport.h        | 18 ++++++++++++++++++
 5 files changed, 25 insertions(+), 39 deletions(-)

(limited to 'net')

diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 2daf1440618b..88a010c98c05 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -170,7 +170,7 @@ error:
 
 static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
 {
-	struct ovs_key_ipv4_tunnel *tun_key;
+	const struct ovs_key_ipv4_tunnel *tun_key;
 	struct ovs_tunnel_info *tun_info;
 	struct net *net = ovs_dp_get_net(vport->dp);
 	struct geneve_port *geneve_port = geneve_vport(vport);
@@ -189,16 +189,7 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
 	}
 
 	tun_key = &tun_info->tunnel;
-
-	/* Route lookup */
-	memset(&fl, 0, sizeof(fl));
-	fl.daddr = tun_key->ipv4_dst;
-	fl.saddr = tun_key->ipv4_src;
-	fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos);
-	fl.flowi4_mark = skb->mark;
-	fl.flowi4_proto = IPPROTO_UDP;
-
-	rt = ip_route_output_key(net, &fl);
+	rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_UDP);
 	if (IS_ERR(rt)) {
 		err = PTR_ERR(rt);
 		goto error;
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index e9aedb7c7106..f17ac9642f4e 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -134,7 +134,7 @@ static int gre_err(struct sk_buff *skb, u32 info,
 static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
 {
 	struct net *net = ovs_dp_get_net(vport->dp);
-	struct ovs_key_ipv4_tunnel *tun_key;
+	const struct ovs_key_ipv4_tunnel *tun_key;
 	struct flowi4 fl;
 	struct rtable *rt;
 	int min_headroom;
@@ -148,15 +148,7 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
 	}
 
 	tun_key = &OVS_CB(skb)->egress_tun_info->tunnel;
-	/* Route lookup */
-	memset(&fl, 0, sizeof(fl));
-	fl.daddr = tun_key->ipv4_dst;
-	fl.saddr = tun_key->ipv4_src;
-	fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos);
-	fl.flowi4_mark = skb->mark;
-	fl.flowi4_proto = IPPROTO_GRE;
-
-	rt = ip_route_output_key(net, &fl);
+	rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_GRE);
 	if (IS_ERR(rt)) {
 		err = PTR_ERR(rt);
 		goto err_free_skb;
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index d7c46b301024..1435a053a870 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -145,7 +145,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 	struct net *net = ovs_dp_get_net(vport->dp);
 	struct vxlan_port *vxlan_port = vxlan_vport(vport);
 	__be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
-	struct ovs_key_ipv4_tunnel *tun_key;
+	const struct ovs_key_ipv4_tunnel *tun_key;
 	struct rtable *rt;
 	struct flowi4 fl;
 	__be16 src_port;
@@ -158,15 +158,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 	}
 
 	tun_key = &OVS_CB(skb)->egress_tun_info->tunnel;
-	/* Route lookup */
-	memset(&fl, 0, sizeof(fl));
-	fl.daddr = tun_key->ipv4_dst;
-	fl.saddr = tun_key->ipv4_src;
-	fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos);
-	fl.flowi4_mark = skb->mark;
-	fl.flowi4_proto = IPPROTO_UDP;
-
-	rt = ip_route_output_key(net, &fl);
+	rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_UDP);
 	if (IS_ERR(rt)) {
 		err = PTR_ERR(rt);
 		goto error;
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 464739aac0f3..ec2954ffc690 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -595,14 +595,7 @@ int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info,
 	 * The process may need to be changed if the corresponding process
 	 * in vports ops changed.
 	 */
-	memset(&fl, 0, sizeof(fl));
-	fl.daddr = tun_key->ipv4_dst;
-	fl.saddr = tun_key->ipv4_src;
-	fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos);
-	fl.flowi4_mark = skb_mark;
-	fl.flowi4_proto = ipproto;
-
-	rt = ip_route_output_key(net, &fl);
+	rt = ovs_tunnel_route_lookup(net, tun_key, skb_mark, &fl, ipproto);
 	if (IS_ERR(rt))
 		return PTR_ERR(rt);
 
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 99c8e71d9e6c..f8ae295fb001 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -236,4 +236,22 @@ static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb,
 int ovs_vport_ops_register(struct vport_ops *ops);
 void ovs_vport_ops_unregister(struct vport_ops *ops);
 
+static inline struct rtable *ovs_tunnel_route_lookup(struct net *net,
+						     const struct ovs_key_ipv4_tunnel *key,
+						     u32 mark,
+						     struct flowi4 *fl,
+						     u8 protocol)
+{
+	struct rtable *rt;
+
+	memset(fl, 0, sizeof(*fl));
+	fl->daddr = key->ipv4_dst;
+	fl->saddr = key->ipv4_src;
+	fl->flowi4_tos = RT_TOS(key->ipv4_tos);
+	fl->flowi4_mark = mark;
+	fl->flowi4_proto = protocol;
+
+	rt = ip_route_output_key(net, fl);
+	return rt;
+}
 #endif /* vport.h */
-- 
cgit v1.2.3


From 9a6b4b392de4569e83ecfd6f382f3369f250b52f Mon Sep 17 00:00:00 2001
From: zhuyj <zyjzyj2000@gmail.com>
Date: Wed, 14 Jan 2015 17:23:59 +0800
Subject: ipv6:icmp:remove unnecessary brackets

There are too many brackets. Maybe only one bracket is enough.

Signed-off-by: Zhu Yanjun <Yanjun.Zhu@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/icmp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index d674152b6ede..a5e95199585e 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -427,7 +427,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 	 *	Dest addr check
 	 */
 
-	if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
+	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
 		if (type != ICMPV6_PKT_TOOBIG &&
 		    !(type == ICMPV6_PARAMPROB &&
 		      code == ICMPV6_UNK_OPTION &&
-- 
cgit v1.2.3


From e12af489b91d47a806f4e96e4edc20df612482e7 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Wed, 14 Jan 2015 20:51:37 +0200
Subject: Bluetooth: Fix valid Identity Address check

According to the Bluetooth core specification valid identity addresses
are either Public Device Addresses or Static Random Addresses. IRKs
received with any other type of address should be discarded since we
cannot assume to know the permanent identity of the peer device.

This patch fixes a missing check for the Identity Address when receiving
the Identity Address Information SMP PDU.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Cc: stable@vger.kernel.org # 3.17+
---
 net/bluetooth/smp.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 379654e70e6f..04e49f8f0982 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -2305,8 +2305,12 @@ static int smp_cmd_ident_addr_info(struct l2cap_conn *conn,
 	 * implementations are not known of and in order to not over
 	 * complicate our implementation, simply pretend that we never
 	 * received an IRK for such a device.
+	 *
+	 * The Identity Address must also be a Static Random or Public
+	 * Address, which hci_is_identity_address() checks for.
 	 */
-	if (!bacmp(&info->bdaddr, BDADDR_ANY)) {
+	if (!bacmp(&info->bdaddr, BDADDR_ANY) ||
+	    !hci_is_identity_address(&info->bdaddr, info->addr_type)) {
 		BT_ERR("Ignoring IRK with no identity address");
 		goto distribute;
 	}
-- 
cgit v1.2.3


From 7b2ed60ed43ac7348dec33e789d1481891537841 Mon Sep 17 00:00:00 2001
From: Jukka Rissanen <jukka.rissanen@linux.intel.com>
Date: Thu, 8 Jan 2015 17:00:55 +0200
Subject: Bluetooth: 6lowpan: Remove PSM setting code

Removing PSM setting debugfs interface as the IPSP has a well
defined PSM value that should be used.

The patch introduces enable flag that can be used to toggle
6lowpan on/off.

Signed-off-by: Jukka Rissanen <jukka.rissanen@linux.intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/6lowpan.c | 66 +++++++++++++++++++++++--------------------------
 1 file changed, 31 insertions(+), 35 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index c989253737f0..1742b849fcff 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -31,7 +31,7 @@
 
 #define VERSION "0.1"
 
-static struct dentry *lowpan_psm_debugfs;
+static struct dentry *lowpan_enable_debugfs;
 static struct dentry *lowpan_control_debugfs;
 
 #define IFACE_NAME_TEMPLATE "bt%d"
@@ -55,11 +55,7 @@ struct skb_cb {
 static LIST_HEAD(bt_6lowpan_devices);
 static DEFINE_SPINLOCK(devices_lock);
 
-/* If psm is set to 0 (default value), then 6lowpan is disabled.
- * Other values are used to indicate a Protocol Service Multiplexer
- * value for 6lowpan.
- */
-static u16 psm_6lowpan;
+static bool enable_6lowpan;
 
 /* We are listening incoming connections via this channel
  */
@@ -761,7 +757,7 @@ static bool is_bt_6lowpan(struct hci_conn *hcon)
 	if (hcon->type != LE_LINK)
 		return false;
 
-	if (!psm_6lowpan)
+	if (!enable_6lowpan)
 		return false;
 
 	return true;
@@ -1085,7 +1081,7 @@ static int bt_6lowpan_connect(bdaddr_t *addr, u8 dst_type)
 	if (!pchan)
 		return -EINVAL;
 
-	err = l2cap_chan_connect(pchan, cpu_to_le16(psm_6lowpan), 0,
+	err = l2cap_chan_connect(pchan, cpu_to_le16(L2CAP_PSM_IPSP), 0,
 				 addr, dst_type);
 
 	BT_DBG("chan %p err %d", pchan, err);
@@ -1118,7 +1114,7 @@ static struct l2cap_chan *bt_6lowpan_listen(void)
 	struct l2cap_chan *pchan;
 	int err;
 
-	if (psm_6lowpan == 0)
+	if (!enable_6lowpan)
 		return NULL;
 
 	pchan = chan_get();
@@ -1130,10 +1126,9 @@ static struct l2cap_chan *bt_6lowpan_listen(void)
 
 	atomic_set(&pchan->nesting, L2CAP_NESTING_PARENT);
 
-	BT_DBG("psm 0x%04x chan %p src type %d", psm_6lowpan, pchan,
-	       pchan->src_type);
+	BT_DBG("chan %p src type %d", pchan, pchan->src_type);
 
-	err = l2cap_add_psm(pchan, addr, cpu_to_le16(psm_6lowpan));
+	err = l2cap_add_psm(pchan, addr, cpu_to_le16(L2CAP_PSM_IPSP));
 	if (err) {
 		l2cap_chan_put(pchan);
 		BT_ERR("psm cannot be added err %d", err);
@@ -1219,22 +1214,23 @@ static void disconnect_all_peers(void)
 	spin_unlock(&devices_lock);
 }
 
-struct set_psm {
+struct set_enable {
 	struct work_struct work;
-	u16 psm;
+	bool flag;
 };
 
-static void do_psm_set(struct work_struct *work)
+static void do_enable_set(struct work_struct *work)
 {
-	struct set_psm *set_psm = container_of(work, struct set_psm, work);
+	struct set_enable *set_enable = container_of(work,
+						     struct set_enable, work);
 
-	if (set_psm->psm == 0 || psm_6lowpan != set_psm->psm)
+	if (!set_enable->flag || enable_6lowpan != set_enable->flag)
 		/* Disconnect existing connections if 6lowpan is
-		 * disabled (psm = 0), or if psm changes.
+		 * disabled
 		 */
 		disconnect_all_peers();
 
-	psm_6lowpan = set_psm->psm;
+	enable_6lowpan = set_enable->flag;
 
 	if (listen_chan) {
 		l2cap_chan_close(listen_chan, 0);
@@ -1243,33 +1239,33 @@ static void do_psm_set(struct work_struct *work)
 
 	listen_chan = bt_6lowpan_listen();
 
-	kfree(set_psm);
+	kfree(set_enable);
 }
 
-static int lowpan_psm_set(void *data, u64 val)
+static int lowpan_enable_set(void *data, u64 val)
 {
-	struct set_psm *set_psm;
+	struct set_enable *set_enable;
 
-	set_psm = kzalloc(sizeof(*set_psm), GFP_KERNEL);
-	if (!set_psm)
+	set_enable = kzalloc(sizeof(*set_enable), GFP_KERNEL);
+	if (!set_enable)
 		return -ENOMEM;
 
-	set_psm->psm = val;
-	INIT_WORK(&set_psm->work, do_psm_set);
+	set_enable->flag = !!val;
+	INIT_WORK(&set_enable->work, do_enable_set);
 
-	schedule_work(&set_psm->work);
+	schedule_work(&set_enable->work);
 
 	return 0;
 }
 
-static int lowpan_psm_get(void *data, u64 *val)
+static int lowpan_enable_get(void *data, u64 *val)
 {
-	*val = psm_6lowpan;
+	*val = enable_6lowpan;
 	return 0;
 }
 
-DEFINE_SIMPLE_ATTRIBUTE(lowpan_psm_fops, lowpan_psm_get,
-			lowpan_psm_set, "%llu\n");
+DEFINE_SIMPLE_ATTRIBUTE(lowpan_enable_fops, lowpan_enable_get,
+			lowpan_enable_set, "%llu\n");
 
 static ssize_t lowpan_control_write(struct file *fp,
 				    const char __user *user_buffer,
@@ -1439,9 +1435,9 @@ static struct notifier_block bt_6lowpan_dev_notifier = {
 
 static int __init bt_6lowpan_init(void)
 {
-	lowpan_psm_debugfs = debugfs_create_file("6lowpan_psm", 0644,
-						 bt_debugfs, NULL,
-						 &lowpan_psm_fops);
+	lowpan_enable_debugfs = debugfs_create_file("6lowpan_enable", 0644,
+						    bt_debugfs, NULL,
+						    &lowpan_enable_fops);
 	lowpan_control_debugfs = debugfs_create_file("6lowpan_control", 0644,
 						     bt_debugfs, NULL,
 						     &lowpan_control_fops);
@@ -1451,7 +1447,7 @@ static int __init bt_6lowpan_init(void)
 
 static void __exit bt_6lowpan_exit(void)
 {
-	debugfs_remove(lowpan_psm_debugfs);
+	debugfs_remove(lowpan_enable_debugfs);
 	debugfs_remove(lowpan_control_debugfs);
 
 	if (listen_chan) {
-- 
cgit v1.2.3


From 3511494ce2f3d3b77544c79b87511a4ddb61dc89 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 15 Jan 2015 03:53:55 +0100
Subject: vxlan: Group Policy extension

Implements supports for the Group Policy VXLAN extension [0] to provide
a lightweight and simple security label mechanism across network peers
based on VXLAN. The security context and associated metadata is mapped
to/from skb->mark. This allows further mapping to a SELinux context
using SECMARK, to implement ACLs directly with nftables, iptables, OVS,
tc, etc.

The group membership is defined by the lower 16 bits of skb->mark, the
upper 16 bits are used for flags.

SELinux allows to manage label to secure local resources. However,
distributed applications require ACLs to implemented across hosts. This
is typically achieved by matching on L2-L4 fields to identify the
original sending host and process on the receiver. On top of that,
netlabel and specifically CIPSO [1] allow to map security contexts to
universal labels.  However, netlabel and CIPSO are relatively complex.
This patch provides a lightweight alternative for overlay network
environments with a trusted underlay. No additional control protocol
is required.

           Host 1:                       Host 2:

      Group A        Group B        Group B     Group A
      +-----+   +-------------+    +-------+   +-----+
      | lxc |   | SELinux CTX |    | httpd |   | VM  |
      +--+--+   +--+----------+    +---+---+   +--+--+
	  \---+---/                     \----+---/
	      |                              |
	  +---+---+                      +---+---+
	  | vxlan |                      | vxlan |
	  +---+---+                      +---+---+
	      +------------------------------+

Backwards compatibility:
A VXLAN-GBP socket can receive standard VXLAN frames and will assign
the default group 0x0000 to such frames. A Linux VXLAN socket will
drop VXLAN-GBP  frames. The extension is therefore disabled by default
and needs to be specifically enabled:

   ip link add [...] type vxlan [...] gbp

In a mixed environment with VXLAN and VXLAN-GBP sockets, the GBP socket
must run on a separate port number.

Examples:
 iptables:
  host1# iptables -I OUTPUT -m owner --uid-owner 101 -j MARK --set-mark 0x200
  host2# iptables -I INPUT -m mark --mark 0x200 -j DROP

 OVS:
  # ovs-ofctl add-flow br0 'in_port=1,actions=load:0x200->NXM_NX_TUN_GBP_ID[],NORMAL'
  # ovs-ofctl add-flow br0 'in_port=2,tun_gbp_id=0x200,actions=drop'

[0] https://tools.ietf.org/html/draft-smith-vxlan-group-policy
[1] http://lwn.net/Articles/204905/

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c           | 84 ++++++++++++++++++++++++++++++++++++-------
 include/net/vxlan.h           | 79 +++++++++++++++++++++++++++++++++++++---
 include/uapi/linux/if_link.h  |  1 +
 net/openvswitch/vport-vxlan.c |  9 +++--
 4 files changed, 152 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 99df0d76157c..6dbf8e041922 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -620,7 +620,8 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
 			continue;
 
 		vh2 = (struct vxlanhdr *)(p->data + off_vx);
-		if (vh->vx_vni != vh2->vx_vni) {
+		if (vh->vx_flags != vh2->vx_flags ||
+		    vh->vx_vni != vh2->vx_vni) {
 			NAPI_GRO_CB(p)->same_flow = 0;
 			continue;
 		}
@@ -1183,6 +1184,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 	struct vxlan_sock *vs;
 	struct vxlanhdr *vxh;
 	u32 flags, vni;
+	struct vxlan_metadata md = {0};
 
 	/* Need Vxlan and inner Ethernet header to be present */
 	if (!pskb_may_pull(skb, VXLAN_HLEN))
@@ -1216,6 +1218,24 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 		vni &= VXLAN_VID_MASK;
 	}
 
+	/* For backwards compatibility, only allow reserved fields to be
+	 * used by VXLAN extensions if explicitly requested.
+	 */
+	if ((flags & VXLAN_HF_GBP) && (vs->flags & VXLAN_F_GBP)) {
+		struct vxlanhdr_gbp *gbp;
+
+		gbp = (struct vxlanhdr_gbp *)vxh;
+		md.gbp = ntohs(gbp->policy_id);
+
+		if (gbp->dont_learn)
+			md.gbp |= VXLAN_GBP_DONT_LEARN;
+
+		if (gbp->policy_applied)
+			md.gbp |= VXLAN_GBP_POLICY_APPLIED;
+
+		flags &= ~VXLAN_GBP_USED_BITS;
+	}
+
 	if (flags || (vni & ~VXLAN_VID_MASK)) {
 		/* If there are any unprocessed flags remaining treat
 		 * this as a malformed packet. This behavior diverges from
@@ -1229,7 +1249,8 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 		goto bad_flags;
 	}
 
-	vs->rcv(vs, skb, vxh->vx_vni);
+	md.vni = vxh->vx_vni;
+	vs->rcv(vs, skb, &md);
 	return 0;
 
 drop:
@@ -1246,8 +1267,8 @@ error:
 	return 1;
 }
 
-static void vxlan_rcv(struct vxlan_sock *vs,
-		      struct sk_buff *skb, __be32 vx_vni)
+static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
+		      struct vxlan_metadata *md)
 {
 	struct iphdr *oip = NULL;
 	struct ipv6hdr *oip6 = NULL;
@@ -1258,7 +1279,7 @@ static void vxlan_rcv(struct vxlan_sock *vs,
 	int err = 0;
 	union vxlan_addr *remote_ip;
 
-	vni = ntohl(vx_vni) >> 8;
+	vni = ntohl(md->vni) >> 8;
 	/* Is this VNI defined? */
 	vxlan = vxlan_vs_find_vni(vs, vni);
 	if (!vxlan)
@@ -1292,6 +1313,7 @@ static void vxlan_rcv(struct vxlan_sock *vs,
 		goto drop;
 
 	skb_reset_network_header(skb);
+	skb->mark = md->gbp;
 
 	if (oip6)
 		err = IP6_ECN_decapsulate(oip6, skb);
@@ -1641,13 +1663,30 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
 	return false;
 }
 
+static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, struct vxlan_sock *vs,
+				struct vxlan_metadata *md)
+{
+	struct vxlanhdr_gbp *gbp;
+
+	gbp = (struct vxlanhdr_gbp *)vxh;
+	vxh->vx_flags |= htonl(VXLAN_HF_GBP);
+
+	if (md->gbp & VXLAN_GBP_DONT_LEARN)
+		gbp->dont_learn = 1;
+
+	if (md->gbp & VXLAN_GBP_POLICY_APPLIED)
+		gbp->policy_applied = 1;
+
+	gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK);
+}
+
 #if IS_ENABLED(CONFIG_IPV6)
 static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 			   struct dst_entry *dst, struct sk_buff *skb,
 			   struct net_device *dev, struct in6_addr *saddr,
 			   struct in6_addr *daddr, __u8 prio, __u8 ttl,
-			   __be16 src_port, __be16 dst_port, __be32 vni,
-			   bool xnet)
+			   __be16 src_port, __be16 dst_port,
+			   struct vxlan_metadata *md, bool xnet)
 {
 	struct vxlanhdr *vxh;
 	int min_headroom;
@@ -1696,7 +1735,7 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 
 	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
 	vxh->vx_flags = htonl(VXLAN_HF_VNI);
-	vxh->vx_vni = vni;
+	vxh->vx_vni = md->vni;
 
 	if (type & SKB_GSO_TUNNEL_REMCSUM) {
 		u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
@@ -1714,6 +1753,9 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 		}
 	}
 
+	if (vs->flags & VXLAN_F_GBP)
+		vxlan_build_gbp_hdr(vxh, vs, md);
+
 	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
 
 	udp_tunnel6_xmit_skb(vs->sock, dst, skb, dev, saddr, daddr, prio,
@@ -1728,7 +1770,8 @@ err:
 int vxlan_xmit_skb(struct vxlan_sock *vs,
 		   struct rtable *rt, struct sk_buff *skb,
 		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
-		   __be16 src_port, __be16 dst_port, __be32 vni, bool xnet)
+		   __be16 src_port, __be16 dst_port,
+		   struct vxlan_metadata *md, bool xnet)
 {
 	struct vxlanhdr *vxh;
 	int min_headroom;
@@ -1771,7 +1814,7 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 
 	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
 	vxh->vx_flags = htonl(VXLAN_HF_VNI);
-	vxh->vx_vni = vni;
+	vxh->vx_vni = md->vni;
 
 	if (type & SKB_GSO_TUNNEL_REMCSUM) {
 		u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
@@ -1789,6 +1832,9 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 		}
 	}
 
+	if (vs->flags & VXLAN_F_GBP)
+		vxlan_build_gbp_hdr(vxh, vs, md);
+
 	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
 
 	return udp_tunnel_xmit_skb(vs->sock, rt, skb, src, dst, tos,
@@ -1849,6 +1895,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 	const struct iphdr *old_iph;
 	struct flowi4 fl4;
 	union vxlan_addr *dst;
+	struct vxlan_metadata md;
 	__be16 src_port = 0, dst_port;
 	u32 vni;
 	__be16 df = 0;
@@ -1919,11 +1966,12 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 
 		tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
 		ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
+		md.vni = htonl(vni << 8);
+		md.gbp = skb->mark;
 
 		err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb,
 				     fl4.saddr, dst->sin.sin_addr.s_addr,
-				     tos, ttl, df, src_port, dst_port,
-				     htonl(vni << 8),
+				     tos, ttl, df, src_port, dst_port, &md,
 				     !net_eq(vxlan->net, dev_net(vxlan->dev)));
 		if (err < 0) {
 			/* skb is already freed. */
@@ -1976,10 +2024,12 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		}
 
 		ttl = ttl ? : ip6_dst_hoplimit(ndst);
+		md.vni = htonl(vni << 8);
+		md.gbp = skb->mark;
 
 		err = vxlan6_xmit_skb(vxlan->vn_sock, ndst, skb,
 				      dev, &fl6.saddr, &fl6.daddr, 0, ttl,
-				      src_port, dst_port, htonl(vni << 8),
+				      src_port, dst_port, &md,
 				      !net_eq(vxlan->net, dev_net(vxlan->dev)));
 #endif
 	}
@@ -2382,6 +2432,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
 	[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]	= { .type = NLA_U8 },
 	[IFLA_VXLAN_REMCSUM_TX]	= { .type = NLA_U8 },
 	[IFLA_VXLAN_REMCSUM_RX]	= { .type = NLA_U8 },
+	[IFLA_VXLAN_GBP]	= { .type = NLA_FLAG, },
 };
 
 static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -2706,6 +2757,9 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
 	    nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX]))
 		vxlan->flags |= VXLAN_F_REMCSUM_RX;
 
+	if (data[IFLA_VXLAN_GBP])
+		vxlan->flags |= VXLAN_F_GBP;
+
 	if (vxlan_find_vni(net, vni, use_ipv6 ? AF_INET6 : AF_INET,
 			   vxlan->dst_port)) {
 		pr_info("duplicate VNI %u\n", vni);
@@ -2851,6 +2905,10 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
 		goto nla_put_failure;
 
+	if (vxlan->flags & VXLAN_F_GBP &&
+	    nla_put_flag(skb, IFLA_VXLAN_GBP))
+		goto nla_put_failure;
+
 	return 0;
 
 nla_put_failure:
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 0a7443b49133..f4a3583171bd 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -11,15 +11,76 @@
 #define VNI_HASH_BITS	10
 #define VNI_HASH_SIZE	(1<<VNI_HASH_BITS)
 
-/* VXLAN protocol header */
+/*
+ * VXLAN Group Based Policy Extension:
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |1|-|-|-|1|-|-|-|R|D|R|R|A|R|R|R|        Group Policy ID        |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                VXLAN Network Identifier (VNI) |   Reserved    |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * D = Don't Learn bit. When set, this bit indicates that the egress
+ *     VTEP MUST NOT learn the source address of the encapsulated frame.
+ *
+ * A = Indicates that the group policy has already been applied to
+ *     this packet. Policies MUST NOT be applied by devices when the
+ *     A bit is set.
+ *
+ * [0] https://tools.ietf.org/html/draft-smith-vxlan-group-policy
+ */
+struct vxlanhdr_gbp {
+	__u8	vx_flags;
+#ifdef __LITTLE_ENDIAN_BITFIELD
+	__u8	reserved_flags1:3,
+		policy_applied:1,
+		reserved_flags2:2,
+		dont_learn:1,
+		reserved_flags3:1;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+	__u8	reserved_flags1:1,
+		dont_learn:1,
+		reserved_flags2:2,
+		policy_applied:1,
+		reserved_flags3:3;
+#else
+#error	"Please fix <asm/byteorder.h>"
+#endif
+	__be16	policy_id;
+	__be32	vx_vni;
+};
+
+#define VXLAN_GBP_USED_BITS (VXLAN_HF_GBP | 0xFFFFFF)
+
+/* skb->mark mapping
+ *
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |R|R|R|R|R|R|R|R|R|D|R|R|A|R|R|R|        Group Policy ID        |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+#define VXLAN_GBP_DONT_LEARN		(BIT(6) << 16)
+#define VXLAN_GBP_POLICY_APPLIED	(BIT(3) << 16)
+#define VXLAN_GBP_ID_MASK		(0xFFFF)
+
+/* VXLAN protocol header:
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |G|R|R|R|I|R|R|C|               Reserved                        |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                VXLAN Network Identifier (VNI) |   Reserved    |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * G = 1	Group Policy (VXLAN-GBP)
+ * I = 1	VXLAN Network Identifier (VNI) present
+ * C = 1	Remote checksum offload (RCO)
+ */
 struct vxlanhdr {
 	__be32 vx_flags;
 	__be32 vx_vni;
 };
 
 /* VXLAN header flags. */
-#define VXLAN_HF_VNI 0x08000000
-#define VXLAN_HF_RCO 0x00200000
+#define VXLAN_HF_RCO BIT(24)
+#define VXLAN_HF_VNI BIT(27)
+#define VXLAN_HF_GBP BIT(31)
 
 /* Remote checksum offload header option */
 #define VXLAN_RCO_MASK  0x7f    /* Last byte of vni field */
@@ -32,8 +93,14 @@ struct vxlanhdr {
 #define VXLAN_VID_MASK  (VXLAN_N_VID - 1)
 #define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
 
+struct vxlan_metadata {
+	__be32		vni;
+	u32		gbp;
+};
+
 struct vxlan_sock;
-typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb, __be32 key);
+typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb,
+			   struct vxlan_metadata *md);
 
 /* per UDP socket information */
 struct vxlan_sock {
@@ -60,6 +127,7 @@ struct vxlan_sock {
 #define VXLAN_F_UDP_ZERO_CSUM6_RX	0x100
 #define VXLAN_F_REMCSUM_TX		0x200
 #define VXLAN_F_REMCSUM_RX		0x400
+#define VXLAN_F_GBP			0x800
 
 struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
 				  vxlan_rcv_t *rcv, void *data,
@@ -70,7 +138,8 @@ void vxlan_sock_release(struct vxlan_sock *vs);
 int vxlan_xmit_skb(struct vxlan_sock *vs,
 		   struct rtable *rt, struct sk_buff *skb,
 		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
-		   __be16 src_port, __be16 dst_port, __be32 vni, bool xnet);
+		   __be16 src_port, __be16 dst_port, struct vxlan_metadata *md,
+		   bool xnet);
 
 static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
 						     netdev_features_t features)
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index b2723f65846f..2a8380edbb7e 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -372,6 +372,7 @@ enum {
 	IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
 	IFLA_VXLAN_REMCSUM_TX,
 	IFLA_VXLAN_REMCSUM_RX,
+	IFLA_VXLAN_GBP,
 	__IFLA_VXLAN_MAX
 };
 #define IFLA_VXLAN_MAX	(__IFLA_VXLAN_MAX - 1)
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 1435a053a870..9919d71c52c3 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -59,7 +59,8 @@ static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
 }
 
 /* Called with rcu_read_lock and BH disabled. */
-static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
+static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
+		      struct vxlan_metadata *md)
 {
 	struct ovs_tunnel_info tun_info;
 	struct vport *vport = vs->data;
@@ -68,7 +69,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
 
 	/* Save outer tunnel values */
 	iph = ip_hdr(skb);
-	key = cpu_to_be64(ntohl(vx_vni) >> 8);
+	key = cpu_to_be64(ntohl(md->vni) >> 8);
 	ovs_flow_tun_info_init(&tun_info, iph,
 			       udp_hdr(skb)->source, udp_hdr(skb)->dest,
 			       key, TUNNEL_KEY, NULL, 0);
@@ -146,6 +147,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 	struct vxlan_port *vxlan_port = vxlan_vport(vport);
 	__be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
 	const struct ovs_key_ipv4_tunnel *tun_key;
+	struct vxlan_metadata md = {0};
 	struct rtable *rt;
 	struct flowi4 fl;
 	__be16 src_port;
@@ -170,12 +172,13 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 	skb->ignore_df = 1;
 
 	src_port = udp_flow_src_port(net, skb, 0, 0, true);
+	md.vni = htonl(be64_to_cpu(tun_key->tun_id) << 8);
 
 	err = vxlan_xmit_skb(vxlan_port->vs, rt, skb,
 			     fl.saddr, tun_key->ipv4_dst,
 			     tun_key->ipv4_tos, tun_key->ipv4_ttl, df,
 			     src_port, dst_port,
-			     htonl(be64_to_cpu(tun_key->tun_id) << 8),
+			     &md,
 			     false);
 	if (err < 0)
 		ip_rt_put(rt);
-- 
cgit v1.2.3


From d91641d9b5047b0a0a4d223a0b87306e6dff8c02 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 15 Jan 2015 03:53:57 +0100
Subject: openvswitch: Rename GENEVE_TUN_OPTS() to TUN_METADATA_OPTS()

Also factors out Geneve validation code into a new separate function
validate_and_copy_geneve_opts().

A subsequent patch will introduce VXLAN options. Rename the existing
GENEVE_TUN_OPTS() to reflect its extended purpose of carrying generic
tunnel metadata options.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/flow.c         |  2 +-
 net/openvswitch/flow.h         | 14 ++++----
 net/openvswitch/flow_netlink.c | 72 +++++++++++++++++++++++-------------------
 3 files changed, 47 insertions(+), 41 deletions(-)

(limited to 'net')

diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index df334fe43d7f..e2c348b8baca 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -691,7 +691,7 @@ int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info,
 			BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) *
 						   8)) - 1
 					> sizeof(key->tun_opts));
-			memcpy(GENEVE_OPTS(key, tun_info->options_len),
+			memcpy(TUN_METADATA_OPTS(key, tun_info->options_len),
 			       tun_info->options, tun_info->options_len);
 			key->tun_opts_len = tun_info->options_len;
 		} else {
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index a8b30f334388..d3d0a406562d 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -53,7 +53,7 @@ struct ovs_key_ipv4_tunnel {
 
 struct ovs_tunnel_info {
 	struct ovs_key_ipv4_tunnel tunnel;
-	const struct geneve_opt *options;
+	const void *options;
 	u8 options_len;
 };
 
@@ -61,10 +61,10 @@ struct ovs_tunnel_info {
  * maximum size. This allows us to get the benefits of variable length
  * matching for small options.
  */
-#define GENEVE_OPTS(flow_key, opt_len)	\
-	((struct geneve_opt *)((flow_key)->tun_opts + \
-			       FIELD_SIZEOF(struct sw_flow_key, tun_opts) - \
-			       opt_len))
+#define TUN_METADATA_OFFSET(opt_len) \
+	(FIELD_SIZEOF(struct sw_flow_key, tun_opts) - opt_len)
+#define TUN_METADATA_OPTS(flow_key, opt_len) \
+	((void *)((flow_key)->tun_opts + TUN_METADATA_OFFSET(opt_len)))
 
 static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
 					    __be32 saddr, __be32 daddr,
@@ -73,7 +73,7 @@ static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
 					    __be16 tp_dst,
 					    __be64 tun_id,
 					    __be16 tun_flags,
-					    const struct geneve_opt *opts,
+					    const void *opts,
 					    u8 opts_len)
 {
 	tun_info->tunnel.tun_id = tun_id;
@@ -105,7 +105,7 @@ static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
 					  __be16 tp_dst,
 					  __be64 tun_id,
 					  __be16 tun_flags,
-					  const struct geneve_opt *opts,
+					  const void *opts,
 					  u8 opts_len)
 {
 	__ovs_flow_tun_info_init(tun_info, iph->saddr, iph->daddr,
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index d1eecf707613..2e8a9cd10802 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -432,8 +432,7 @@ static int genev_tun_opt_from_nlattr(const struct nlattr *a,
 		SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
 	}
 
-	opt_key_offset = (unsigned long)GENEVE_OPTS((struct sw_flow_key *)0,
-						    nla_len(a));
+	opt_key_offset = TUN_METADATA_OFFSET(nla_len(a));
 	SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
 				  nla_len(a), is_mask);
 	return 0;
@@ -558,8 +557,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 
 static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
 				const struct ovs_key_ipv4_tunnel *output,
-				const struct geneve_opt *tun_opts,
-				int swkey_tun_opts_len)
+				const void *tun_opts, int swkey_tun_opts_len)
 {
 	if (output->tun_flags & TUNNEL_KEY &&
 	    nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
@@ -600,8 +598,7 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
 
 static int ipv4_tun_to_nlattr(struct sk_buff *skb,
 			      const struct ovs_key_ipv4_tunnel *output,
-			      const struct geneve_opt *tun_opts,
-			      int swkey_tun_opts_len)
+			      const void *tun_opts, int swkey_tun_opts_len)
 {
 	struct nlattr *nla;
 	int err;
@@ -1148,10 +1145,10 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
 		goto nla_put_failure;
 
 	if ((swkey->tun_key.ipv4_dst || is_mask)) {
-		const struct geneve_opt *opts = NULL;
+		const void *opts = NULL;
 
 		if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
-			opts = GENEVE_OPTS(output, swkey->tun_opts_len);
+			opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len);
 
 		if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts,
 				       swkey->tun_opts_len))
@@ -1540,6 +1537,34 @@ void ovs_match_init(struct sw_flow_match *match,
 	}
 }
 
+static int validate_geneve_opts(struct sw_flow_key *key)
+{
+	struct geneve_opt *option;
+	int opts_len = key->tun_opts_len;
+	bool crit_opt = false;
+
+	option = (struct geneve_opt *)TUN_METADATA_OPTS(key, key->tun_opts_len);
+	while (opts_len > 0) {
+		int len;
+
+		if (opts_len < sizeof(*option))
+			return -EINVAL;
+
+		len = sizeof(*option) + option->length * 4;
+		if (len > opts_len)
+			return -EINVAL;
+
+		crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
+
+		option = (struct geneve_opt *)((u8 *)option + len);
+		opts_len -= len;
+	};
+
+	key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
+
+	return 0;
+}
+
 static int validate_and_copy_set_tun(const struct nlattr *attr,
 				     struct sw_flow_actions **sfa, bool log)
 {
@@ -1555,28 +1580,9 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
 		return err;
 
 	if (key.tun_opts_len) {
-		struct geneve_opt *option = GENEVE_OPTS(&key,
-							key.tun_opts_len);
-		int opts_len = key.tun_opts_len;
-		bool crit_opt = false;
-
-		while (opts_len > 0) {
-			int len;
-
-			if (opts_len < sizeof(*option))
-				return -EINVAL;
-
-			len = sizeof(*option) + option->length * 4;
-			if (len > opts_len)
-				return -EINVAL;
-
-			crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
-
-			option = (struct geneve_opt *)((u8 *)option + len);
-			opts_len -= len;
-		};
-
-		key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
+		err = validate_geneve_opts(&key);
+		if (err < 0)
+			return err;
 	};
 
 	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log);
@@ -1597,9 +1603,9 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
 		 * everything else will go away after flow setup. We can append
 		 * it to tun_info and then point there.
 		 */
-		memcpy((tun_info + 1), GENEVE_OPTS(&key, key.tun_opts_len),
-		       key.tun_opts_len);
-		tun_info->options = (struct geneve_opt *)(tun_info + 1);
+		memcpy((tun_info + 1),
+		       TUN_METADATA_OPTS(&key, key.tun_opts_len), key.tun_opts_len);
+		tun_info->options = (tun_info + 1);
 	} else {
 		tun_info->options = NULL;
 	}
-- 
cgit v1.2.3


From 81bfe3c3cf153441588e50a57f53bc9816f37d37 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 15 Jan 2015 03:53:58 +0100
Subject: openvswitch: Allow for any level of nesting in flow attributes

nlattr_set() is currently hardcoded to two levels of nesting. This change
introduces struct ovs_len_tbl to define minimal length requirements plus
next level nesting tables to traverse the key attributes to arbitrary depth.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/flow_netlink.c | 106 ++++++++++++++++++++++-------------------
 1 file changed, 56 insertions(+), 50 deletions(-)

(limited to 'net')

diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 2e8a9cd10802..518941c5bdf1 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -50,6 +50,13 @@
 
 #include "flow_netlink.h"
 
+struct ovs_len_tbl {
+	int len;
+	const struct ovs_len_tbl *next;
+};
+
+#define OVS_ATTR_NESTED -1
+
 static void update_range(struct sw_flow_match *match,
 			 size_t offset, size_t size, bool is_mask)
 {
@@ -289,29 +296,44 @@ size_t ovs_key_attr_size(void)
 		+ nla_total_size(28); /* OVS_KEY_ATTR_ND */
 }
 
+static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
+	[OVS_TUNNEL_KEY_ATTR_ID]	    = { .len = sizeof(u64) },
+	[OVS_TUNNEL_KEY_ATTR_IPV4_SRC]	    = { .len = sizeof(u32) },
+	[OVS_TUNNEL_KEY_ATTR_IPV4_DST]	    = { .len = sizeof(u32) },
+	[OVS_TUNNEL_KEY_ATTR_TOS]	    = { .len = 1 },
+	[OVS_TUNNEL_KEY_ATTR_TTL]	    = { .len = 1 },
+	[OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 },
+	[OVS_TUNNEL_KEY_ATTR_CSUM]	    = { .len = 0 },
+	[OVS_TUNNEL_KEY_ATTR_TP_SRC]	    = { .len = sizeof(u16) },
+	[OVS_TUNNEL_KEY_ATTR_TP_DST]	    = { .len = sizeof(u16) },
+	[OVS_TUNNEL_KEY_ATTR_OAM]	    = { .len = 0 },
+	[OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS]   = { .len = OVS_ATTR_NESTED },
+};
+
 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
-static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
-	[OVS_KEY_ATTR_ENCAP] = -1,
-	[OVS_KEY_ATTR_PRIORITY] = sizeof(u32),
-	[OVS_KEY_ATTR_IN_PORT] = sizeof(u32),
-	[OVS_KEY_ATTR_SKB_MARK] = sizeof(u32),
-	[OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet),
-	[OVS_KEY_ATTR_VLAN] = sizeof(__be16),
-	[OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16),
-	[OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4),
-	[OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6),
-	[OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp),
-	[OVS_KEY_ATTR_TCP_FLAGS] = sizeof(__be16),
-	[OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp),
-	[OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp),
-	[OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp),
-	[OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
-	[OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
-	[OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd),
-	[OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32),
-	[OVS_KEY_ATTR_DP_HASH] = sizeof(u32),
-	[OVS_KEY_ATTR_TUNNEL] = -1,
-	[OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls),
+static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
+	[OVS_KEY_ATTR_ENCAP]	 = { .len = OVS_ATTR_NESTED },
+	[OVS_KEY_ATTR_PRIORITY]	 = { .len = sizeof(u32) },
+	[OVS_KEY_ATTR_IN_PORT]	 = { .len = sizeof(u32) },
+	[OVS_KEY_ATTR_SKB_MARK]	 = { .len = sizeof(u32) },
+	[OVS_KEY_ATTR_ETHERNET]	 = { .len = sizeof(struct ovs_key_ethernet) },
+	[OVS_KEY_ATTR_VLAN]	 = { .len = sizeof(__be16) },
+	[OVS_KEY_ATTR_ETHERTYPE] = { .len = sizeof(__be16) },
+	[OVS_KEY_ATTR_IPV4]	 = { .len = sizeof(struct ovs_key_ipv4) },
+	[OVS_KEY_ATTR_IPV6]	 = { .len = sizeof(struct ovs_key_ipv6) },
+	[OVS_KEY_ATTR_TCP]	 = { .len = sizeof(struct ovs_key_tcp) },
+	[OVS_KEY_ATTR_TCP_FLAGS] = { .len = sizeof(__be16) },
+	[OVS_KEY_ATTR_UDP]	 = { .len = sizeof(struct ovs_key_udp) },
+	[OVS_KEY_ATTR_SCTP]	 = { .len = sizeof(struct ovs_key_sctp) },
+	[OVS_KEY_ATTR_ICMP]	 = { .len = sizeof(struct ovs_key_icmp) },
+	[OVS_KEY_ATTR_ICMPV6]	 = { .len = sizeof(struct ovs_key_icmpv6) },
+	[OVS_KEY_ATTR_ARP]	 = { .len = sizeof(struct ovs_key_arp) },
+	[OVS_KEY_ATTR_ND]	 = { .len = sizeof(struct ovs_key_nd) },
+	[OVS_KEY_ATTR_RECIRC_ID] = { .len = sizeof(u32) },
+	[OVS_KEY_ATTR_DP_HASH]	 = { .len = sizeof(u32) },
+	[OVS_KEY_ATTR_TUNNEL]	 = { .len = OVS_ATTR_NESTED,
+				     .next = ovs_tunnel_key_lens, },
+	[OVS_KEY_ATTR_MPLS]	 = { .len = sizeof(struct ovs_key_mpls) },
 };
 
 static bool is_all_zero(const u8 *fp, size_t size)
@@ -352,8 +374,8 @@ static int __parse_flow_nlattrs(const struct nlattr *attr,
 			return -EINVAL;
 		}
 
-		expected_len = ovs_key_lens[type];
-		if (nla_len(nla) != expected_len && expected_len != -1) {
+		expected_len = ovs_key_lens[type].len;
+		if (nla_len(nla) != expected_len && expected_len != OVS_ATTR_NESTED) {
 			OVS_NLERR(log, "Key %d has unexpected len %d expected %d",
 				  type, nla_len(nla), expected_len);
 			return -EINVAL;
@@ -451,30 +473,16 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 		int type = nla_type(a);
 		int err;
 
-		static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
-			[OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64),
-			[OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32),
-			[OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32),
-			[OVS_TUNNEL_KEY_ATTR_TOS] = 1,
-			[OVS_TUNNEL_KEY_ATTR_TTL] = 1,
-			[OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
-			[OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
-			[OVS_TUNNEL_KEY_ATTR_TP_SRC] = sizeof(u16),
-			[OVS_TUNNEL_KEY_ATTR_TP_DST] = sizeof(u16),
-			[OVS_TUNNEL_KEY_ATTR_OAM] = 0,
-			[OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1,
-		};
-
 		if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
 			OVS_NLERR(log, "Tunnel attr %d out of range max %d",
 				  type, OVS_TUNNEL_KEY_ATTR_MAX);
 			return -EINVAL;
 		}
 
-		if (ovs_tunnel_key_lens[type] != nla_len(a) &&
-		    ovs_tunnel_key_lens[type] != -1) {
+		if (ovs_tunnel_key_lens[type].len != nla_len(a) &&
+		    ovs_tunnel_key_lens[type].len != OVS_ATTR_NESTED) {
 			OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d",
-				  type, nla_len(a), ovs_tunnel_key_lens[type]);
+				  type, nla_len(a), ovs_tunnel_key_lens[type].len);
 			return -EINVAL;
 		}
 
@@ -912,18 +920,16 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
 	return 0;
 }
 
-static void nlattr_set(struct nlattr *attr, u8 val, bool is_attr_mask_key)
+static void nlattr_set(struct nlattr *attr, u8 val,
+		       const struct ovs_len_tbl *tbl)
 {
 	struct nlattr *nla;
 	int rem;
 
 	/* The nlattr stream should already have been validated */
 	nla_for_each_nested(nla, attr, rem) {
-		/* We assume that ovs_key_lens[type] == -1 means that type is a
-		 * nested attribute
-		 */
-		if (is_attr_mask_key && ovs_key_lens[nla_type(nla)] == -1)
-			nlattr_set(nla, val, false);
+		if (tbl && tbl[nla_type(nla)].len == OVS_ATTR_NESTED)
+			nlattr_set(nla, val, tbl[nla_type(nla)].next);
 		else
 			memset(nla_data(nla), val, nla_len(nla));
 	}
@@ -931,7 +937,7 @@ static void nlattr_set(struct nlattr *attr, u8 val, bool is_attr_mask_key)
 
 static void mask_set_nlattr(struct nlattr *attr, u8 val)
 {
-	nlattr_set(attr, val, true);
+	nlattr_set(attr, val, ovs_key_lens);
 }
 
 /**
@@ -1628,8 +1634,8 @@ static int validate_set(const struct nlattr *a,
 		return -EINVAL;
 
 	if (key_type > OVS_KEY_ATTR_MAX ||
-	    (ovs_key_lens[key_type] != nla_len(ovs_key) &&
-	     ovs_key_lens[key_type] != -1))
+	    (ovs_key_lens[key_type].len != nla_len(ovs_key) &&
+	     ovs_key_lens[key_type].len != OVS_ATTR_NESTED))
 		return -EINVAL;
 
 	switch (key_type) {
-- 
cgit v1.2.3


From 1dd144cf5b4b47e12438c2c6883925ce1a9b499f Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 15 Jan 2015 03:53:59 +0100
Subject: openvswitch: Support VXLAN Group Policy extension

Introduces support for the group policy extension to the VXLAN virtual
port. The extension is disabled by default and only enabled if the user
has provided the respective configuration.

  ovs-vsctl add-port br0 vxlan0 -- \
     set Interface vxlan0 type=vxlan options:exts=gbp

The configuration interface to enable the extension is based on a new
attribute OVS_VXLAN_EXT_GBP nested inside OVS_TUNNEL_ATTR_EXTENSION
which can carry additional extensions as needed in the future.

The group policy metadata is stored as binary blob (struct ovs_vxlan_opts)
internally just like Geneve options but transported as nested Netlink
attributes to user space.

Renames the existing TUNNEL_OPTIONS_PRESENT to TUNNEL_GENEVE_OPT with the
binary value kept intact, a new flag TUNNEL_VXLAN_OPT is introduced.

The attributes OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS and existing
OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS are implemented mutually exclusive.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_tunnels.h         |   5 +-
 include/uapi/linux/openvswitch.h |  11 ++++
 net/openvswitch/flow_netlink.c   | 114 ++++++++++++++++++++++++++++++++++-----
 net/openvswitch/vport-geneve.c   |  15 ++++--
 net/openvswitch/vport-vxlan.c    |  82 +++++++++++++++++++++++++++-
 net/openvswitch/vport-vxlan.h    |  11 ++++
 6 files changed, 218 insertions(+), 20 deletions(-)
 create mode 100644 net/openvswitch/vport-vxlan.h

(limited to 'net')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 25a59eb388a6..ce4db3cc5647 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -97,7 +97,10 @@ struct ip_tunnel {
 #define TUNNEL_DONT_FRAGMENT    __cpu_to_be16(0x0100)
 #define TUNNEL_OAM		__cpu_to_be16(0x0200)
 #define TUNNEL_CRIT_OPT		__cpu_to_be16(0x0400)
-#define TUNNEL_OPTIONS_PRESENT	__cpu_to_be16(0x0800)
+#define TUNNEL_GENEVE_OPT	__cpu_to_be16(0x0800)
+#define TUNNEL_VXLAN_OPT	__cpu_to_be16(0x1000)
+
+#define TUNNEL_OPTIONS_PRESENT	(TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT)
 
 struct tnl_ptk_info {
 	__be16 flags;
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index f714e8633352..cd8d933963c2 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -252,11 +252,21 @@ enum ovs_vport_attr {
 
 #define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1)
 
+enum {
+	OVS_VXLAN_EXT_UNSPEC,
+	OVS_VXLAN_EXT_GBP,	/* Flag or __u32 */
+	__OVS_VXLAN_EXT_MAX,
+};
+
+#define OVS_VXLAN_EXT_MAX (__OVS_VXLAN_EXT_MAX - 1)
+
+
 /* OVS_VPORT_ATTR_OPTIONS attributes for tunnels.
  */
 enum {
 	OVS_TUNNEL_ATTR_UNSPEC,
 	OVS_TUNNEL_ATTR_DST_PORT, /* 16-bit UDP port, used by L4 tunnels. */
+	OVS_TUNNEL_ATTR_EXTENSION,
 	__OVS_TUNNEL_ATTR_MAX
 };
 
@@ -328,6 +338,7 @@ enum ovs_tunnel_key_attr {
 	OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,        /* Array of Geneve options. */
 	OVS_TUNNEL_KEY_ATTR_TP_SRC,		/* be16 src Transport Port. */
 	OVS_TUNNEL_KEY_ATTR_TP_DST,		/* be16 dst Transport Port. */
+	OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS,		/* Nested OVS_VXLAN_EXT_* */
 	__OVS_TUNNEL_KEY_ATTR_MAX
 };
 
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 518941c5bdf1..d210d1be3470 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -49,6 +49,7 @@
 #include <net/mpls.h>
 
 #include "flow_netlink.h"
+#include "vport-vxlan.h"
 
 struct ovs_len_tbl {
 	int len;
@@ -268,6 +269,9 @@ size_t ovs_tun_key_attr_size(void)
 		+ nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_CSUM */
 		+ nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_OAM */
 		+ nla_total_size(256)  /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
+		/* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with
+		 * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
+		 */
 		+ nla_total_size(2)    /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
 		+ nla_total_size(2);   /* OVS_TUNNEL_KEY_ATTR_TP_DST */
 }
@@ -308,6 +312,7 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1]
 	[OVS_TUNNEL_KEY_ATTR_TP_DST]	    = { .len = sizeof(u16) },
 	[OVS_TUNNEL_KEY_ATTR_OAM]	    = { .len = 0 },
 	[OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS]   = { .len = OVS_ATTR_NESTED },
+	[OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS]    = { .len = OVS_ATTR_NESTED },
 };
 
 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
@@ -460,6 +465,41 @@ static int genev_tun_opt_from_nlattr(const struct nlattr *a,
 	return 0;
 }
 
+static const struct nla_policy vxlan_opt_policy[OVS_VXLAN_EXT_MAX + 1] = {
+	[OVS_VXLAN_EXT_GBP]	= { .type = NLA_U32 },
+};
+
+static int vxlan_tun_opt_from_nlattr(const struct nlattr *a,
+				     struct sw_flow_match *match, bool is_mask,
+				     bool log)
+{
+	struct nlattr *tb[OVS_VXLAN_EXT_MAX+1];
+	unsigned long opt_key_offset;
+	struct ovs_vxlan_opts opts;
+	int err;
+
+	BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
+
+	err = nla_parse_nested(tb, OVS_VXLAN_EXT_MAX, a, vxlan_opt_policy);
+	if (err < 0)
+		return err;
+
+	memset(&opts, 0, sizeof(opts));
+
+	if (tb[OVS_VXLAN_EXT_GBP])
+		opts.gbp = nla_get_u32(tb[OVS_VXLAN_EXT_GBP]);
+
+	if (!is_mask)
+		SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false);
+	else
+		SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
+
+	opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
+	SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
+				  is_mask);
+	return 0;
+}
+
 static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 				struct sw_flow_match *match, bool is_mask,
 				bool log)
@@ -468,6 +508,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 	int rem;
 	bool ttl = false;
 	__be16 tun_flags = 0;
+	int opts_type = 0;
 
 	nla_for_each_nested(a, attr, rem) {
 		int type = nla_type(a);
@@ -527,11 +568,30 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 			tun_flags |= TUNNEL_OAM;
 			break;
 		case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
+			if (opts_type) {
+				OVS_NLERR(log, "Multiple metadata blocks provided");
+				return -EINVAL;
+			}
+
 			err = genev_tun_opt_from_nlattr(a, match, is_mask, log);
 			if (err)
 				return err;
 
-			tun_flags |= TUNNEL_OPTIONS_PRESENT;
+			tun_flags |= TUNNEL_GENEVE_OPT;
+			opts_type = type;
+			break;
+		case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
+			if (opts_type) {
+				OVS_NLERR(log, "Multiple metadata blocks provided");
+				return -EINVAL;
+			}
+
+			err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log);
+			if (err)
+				return err;
+
+			tun_flags |= TUNNEL_VXLAN_OPT;
+			opts_type = type;
 			break;
 		default:
 			OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d",
@@ -560,6 +620,23 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 		}
 	}
 
+	return opts_type;
+}
+
+static int vxlan_opt_to_nlattr(struct sk_buff *skb,
+			       const void *tun_opts, int swkey_tun_opts_len)
+{
+	const struct ovs_vxlan_opts *opts = tun_opts;
+	struct nlattr *nla;
+
+	nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS);
+	if (!nla)
+		return -EMSGSIZE;
+
+	if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0)
+		return -EMSGSIZE;
+
+	nla_nest_end(skb, nla);
 	return 0;
 }
 
@@ -596,10 +673,15 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
 	if ((output->tun_flags & TUNNEL_OAM) &&
 	    nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
 		return -EMSGSIZE;
-	if (tun_opts &&
-	    nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
-		    swkey_tun_opts_len, tun_opts))
-		return -EMSGSIZE;
+	if (tun_opts) {
+		if (output->tun_flags & TUNNEL_GENEVE_OPT &&
+		    nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
+			    swkey_tun_opts_len, tun_opts))
+			return -EMSGSIZE;
+		else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
+			 vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
+			return -EMSGSIZE;
+	}
 
 	return 0;
 }
@@ -680,7 +762,7 @@ static int metadata_from_nlattrs(struct sw_flow_match *match,  u64 *attrs,
 	}
 	if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
 		if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
-					 is_mask, log))
+					 is_mask, log) < 0)
 			return -EINVAL;
 		*attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
 	}
@@ -1578,17 +1660,23 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
 	struct sw_flow_key key;
 	struct ovs_tunnel_info *tun_info;
 	struct nlattr *a;
-	int err, start;
+	int err, start, opts_type;
 
 	ovs_match_init(&match, &key, NULL);
-	err = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log);
-	if (err)
-		return err;
+	opts_type = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log);
+	if (opts_type < 0)
+		return opts_type;
 
 	if (key.tun_opts_len) {
-		err = validate_geneve_opts(&key);
-		if (err < 0)
-			return err;
+		switch (opts_type) {
+		case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
+			err = validate_geneve_opts(&key);
+			if (err < 0)
+				return err;
+			break;
+		case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
+			break;
+		}
 	};
 
 	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log);
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 88a010c98c05..7ca3d454ff3b 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -88,7 +88,7 @@ static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb)
 
 	opts_len = geneveh->opt_len * 4;
 
-	flags = TUNNEL_KEY | TUNNEL_OPTIONS_PRESENT |
+	flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT |
 		(udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) |
 		(geneveh->oam ? TUNNEL_OAM : 0) |
 		(geneveh->critical ? TUNNEL_CRIT_OPT : 0);
@@ -178,7 +178,7 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
 	__be16 sport;
 	struct rtable *rt;
 	struct flowi4 fl;
-	u8 vni[3];
+	u8 vni[3], opts_len, *opts;
 	__be16 df;
 	int err;
 
@@ -200,11 +200,18 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
 	tunnel_id_to_vni(tun_key->tun_id, vni);
 	skb->ignore_df = 1;
 
+	if (tun_key->tun_flags & TUNNEL_GENEVE_OPT) {
+		opts = (u8 *)tun_info->options;
+		opts_len = tun_info->options_len;
+	} else {
+		opts = NULL;
+		opts_len = 0;
+	}
+
 	err = geneve_xmit_skb(geneve_port->gs, rt, skb, fl.saddr,
 			      tun_key->ipv4_dst, tun_key->ipv4_tos,
 			      tun_key->ipv4_ttl, df, sport, dport,
-			      tun_key->tun_flags, vni,
-			      tun_info->options_len, (u8 *)tun_info->options,
+			      tun_key->tun_flags, vni, opts_len, opts,
 			      false);
 	if (err < 0)
 		ip_rt_put(rt);
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 9919d71c52c3..8a2d54cba9ba 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -40,6 +40,7 @@
 
 #include "datapath.h"
 #include "vport.h"
+#include "vport-vxlan.h"
 
 /**
  * struct vxlan_port - Keeps track of open UDP ports
@@ -49,6 +50,7 @@
 struct vxlan_port {
 	struct vxlan_sock *vs;
 	char name[IFNAMSIZ];
+	u32 exts; /* VXLAN_F_* in <net/vxlan.h> */
 };
 
 static struct vport_ops ovs_vxlan_vport_ops;
@@ -63,16 +65,26 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
 		      struct vxlan_metadata *md)
 {
 	struct ovs_tunnel_info tun_info;
+	struct vxlan_port *vxlan_port;
 	struct vport *vport = vs->data;
 	struct iphdr *iph;
+	struct ovs_vxlan_opts opts = {
+		.gbp = md->gbp,
+	};
 	__be64 key;
+	__be16 flags;
+
+	flags = TUNNEL_KEY;
+	vxlan_port = vxlan_vport(vport);
+	if (vxlan_port->exts & VXLAN_F_GBP)
+		flags |= TUNNEL_VXLAN_OPT;
 
 	/* Save outer tunnel values */
 	iph = ip_hdr(skb);
 	key = cpu_to_be64(ntohl(md->vni) >> 8);
 	ovs_flow_tun_info_init(&tun_info, iph,
 			       udp_hdr(skb)->source, udp_hdr(skb)->dest,
-			       key, TUNNEL_KEY, NULL, 0);
+			       key, flags, &opts, sizeof(opts));
 
 	ovs_vport_receive(vport, skb, &tun_info);
 }
@@ -84,6 +96,21 @@ static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
 
 	if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port)))
 		return -EMSGSIZE;
+
+	if (vxlan_port->exts) {
+		struct nlattr *exts;
+
+		exts = nla_nest_start(skb, OVS_TUNNEL_ATTR_EXTENSION);
+		if (!exts)
+			return -EMSGSIZE;
+
+		if (vxlan_port->exts & VXLAN_F_GBP &&
+		    nla_put_flag(skb, OVS_VXLAN_EXT_GBP))
+			return -EMSGSIZE;
+
+		nla_nest_end(skb, exts);
+	}
+
 	return 0;
 }
 
@@ -96,6 +123,31 @@ static void vxlan_tnl_destroy(struct vport *vport)
 	ovs_vport_deferred_free(vport);
 }
 
+static const struct nla_policy exts_policy[OVS_VXLAN_EXT_MAX+1] = {
+	[OVS_VXLAN_EXT_GBP]	= { .type = NLA_FLAG, },
+};
+
+static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr)
+{
+	struct nlattr *exts[OVS_VXLAN_EXT_MAX+1];
+	struct vxlan_port *vxlan_port;
+	int err;
+
+	if (nla_len(attr) < sizeof(struct nlattr))
+		return -EINVAL;
+
+	err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy);
+	if (err < 0)
+		return err;
+
+	vxlan_port = vxlan_vport(vport);
+
+	if (exts[OVS_VXLAN_EXT_GBP])
+		vxlan_port->exts |= VXLAN_F_GBP;
+
+	return 0;
+}
+
 static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
 {
 	struct net *net = ovs_dp_get_net(parms->dp);
@@ -128,7 +180,17 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
 	vxlan_port = vxlan_vport(vport);
 	strncpy(vxlan_port->name, parms->name, IFNAMSIZ);
 
-	vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, 0);
+	a = nla_find_nested(options, OVS_TUNNEL_ATTR_EXTENSION);
+	if (a) {
+		err = vxlan_configure_exts(vport, a);
+		if (err) {
+			ovs_vport_free(vport);
+			goto error;
+		}
+	}
+
+	vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true,
+			    vxlan_port->exts);
 	if (IS_ERR(vs)) {
 		ovs_vport_free(vport);
 		return (void *)vs;
@@ -141,6 +203,21 @@ error:
 	return ERR_PTR(err);
 }
 
+static int vxlan_ext_gbp(struct sk_buff *skb)
+{
+	const struct ovs_tunnel_info *tun_info;
+	const struct ovs_vxlan_opts *opts;
+
+	tun_info = OVS_CB(skb)->egress_tun_info;
+	opts = tun_info->options;
+
+	if (tun_info->tunnel.tun_flags & TUNNEL_VXLAN_OPT &&
+	    tun_info->options_len >= sizeof(*opts))
+		return opts->gbp;
+	else
+		return 0;
+}
+
 static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 {
 	struct net *net = ovs_dp_get_net(vport->dp);
@@ -173,6 +250,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 
 	src_port = udp_flow_src_port(net, skb, 0, 0, true);
 	md.vni = htonl(be64_to_cpu(tun_key->tun_id) << 8);
+	md.gbp = vxlan_ext_gbp(skb);
 
 	err = vxlan_xmit_skb(vxlan_port->vs, rt, skb,
 			     fl.saddr, tun_key->ipv4_dst,
diff --git a/net/openvswitch/vport-vxlan.h b/net/openvswitch/vport-vxlan.h
new file mode 100644
index 000000000000..4b08233e73d5
--- /dev/null
+++ b/net/openvswitch/vport-vxlan.h
@@ -0,0 +1,11 @@
+#ifndef VPORT_VXLAN_H
+#define VPORT_VXLAN_H 1
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+struct ovs_vxlan_opts {
+	__u32 gbp;
+};
+
+#endif
-- 
cgit v1.2.3


From eeb5a067d11e69fd235a36e17ff16a3177d847a4 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 14 Jan 2015 13:44:21 -0800
Subject: Bluetooth: Show device address type for L2CAP debugfs entries

The devices address types are BR/EDR Public, LE Public and LE Random and
any of these three is valid for L2CAP connections. So show the correct
type in the debugfs list.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/l2cap_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 11029b212874..4e7477d34eb8 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -7527,8 +7527,8 @@ static int l2cap_debugfs_show(struct seq_file *f, void *p)
 	read_lock(&chan_list_lock);
 
 	list_for_each_entry(c, &chan_list, global_l) {
-		seq_printf(f, "%pMR %pMR %d %d 0x%4.4x 0x%4.4x %d %d %d %d\n",
-			   &c->src, &c->dst,
+		seq_printf(f, "%pMR (%u) %pMR (%u) %d %d 0x%4.4x 0x%4.4x %d %d %d %d\n",
+			   &c->src, c->src_type, &c->dst, c->dst_type,
 			   c->state, __le16_to_cpu(c->psm),
 			   c->scid, c->dcid, c->imtu, c->omtu,
 			   c->sec_level, c->mode);
-- 
cgit v1.2.3


From 111e4bccd1e27f0703936aadf41f227a9226e031 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 14 Jan 2015 14:40:42 -0800
Subject: Bluetooth: Fix issue with switching BR/EDR back on when disabled

For dual-mode controllers it is possible to disable BR/EDR and operate
as LE single mode controllers with a static random address. If that is
the case, then refuse switching BR/EDR back on after the controller has
been powered.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/mgmt.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index e531da805923..cae612658ba9 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -4683,6 +4683,21 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 		err = cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR,
 				 MGMT_STATUS_REJECTED);
 		goto unlock;
+	} else {
+		/* When configuring a dual-mode controller to operate
+		 * with LE only and using a static address, then switching
+		 * BR/EDR back on is not allowed.
+		 *
+		 * Dual-mode controllers shall operate with the public
+		 * address as its identity address for BR/EDR and LE. So
+		 * reject the attempt to create an invalid configuration.
+		 */
+		if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) &&
+		    bacmp(&hdev->static_addr, BDADDR_ANY)) {
+			err = cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR,
+					 MGMT_STATUS_REJECTED);
+			goto unlock;
+		}
 	}
 
 	if (mgmt_pending_find(MGMT_OP_SET_BREDR, hdev)) {
-- 
cgit v1.2.3


From 157029ba3014e2025170c7bf83bbe7d4ccb16692 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 14 Jan 2015 15:43:09 -0800
Subject: Bluetooth: Fix LE SMP channel source address and source address type

The source address and source address type of the LE SMP channel can
either be the public address of the controller or the static random
address configured by the host.

Right now the public address is used for the LE SMP channel and
obviously that is not correct if the controller operates with the
configured static random address.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/smp.c | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 04e49f8f0982..3e4de935f667 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -2950,11 +2950,30 @@ create_chan:
 
 	l2cap_chan_set_defaults(chan);
 
-	bacpy(&chan->src, &hdev->bdaddr);
-	if (cid == L2CAP_CID_SMP)
-		chan->src_type = BDADDR_LE_PUBLIC;
-	else
+	if (cid == L2CAP_CID_SMP) {
+		/* If usage of static address is forced or if the devices
+		 * does not have a public address, then listen on the static
+		 * address.
+		 *
+		 * In case BR/EDR has been disabled on a dual-mode controller
+		 * and a static address has been configued, then listen on
+		 * the static address instead.
+		 */
+		if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) ||
+		    !bacmp(&hdev->bdaddr, BDADDR_ANY) ||
+		    (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) &&
+		     bacmp(&hdev->static_addr, BDADDR_ANY))) {
+			bacpy(&chan->src, &hdev->static_addr);
+			chan->src_type = BDADDR_LE_RANDOM;
+		} else {
+			bacpy(&chan->src, &hdev->bdaddr);
+			chan->src_type = BDADDR_LE_PUBLIC;
+		}
+	} else {
+		bacpy(&chan->src, &hdev->bdaddr);
 		chan->src_type = BDADDR_BREDR;
+	}
+
 	chan->state = BT_LISTEN;
 	chan->mode = L2CAP_MODE_BASIC;
 	chan->imtu = L2CAP_DEFAULT_MTU;
-- 
cgit v1.2.3


From 7e7ec44564cacd4a19cca9523a7be1916b108aa6 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 14 Jan 2015 15:43:10 -0800
Subject: Bluetooth: Don't register any SMP channel if LE is not supported

When LE features are not supported, then do not bother registering any
kind of SMP channel.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/smp.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 3e4de935f667..08a9314f3ca7 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -3066,6 +3066,12 @@ int smp_register(struct hci_dev *hdev)
 
 	BT_DBG("%s", hdev->name);
 
+	/* If the controller does not support Low Energy operation, then
+	 * there is also no need to register any SMP channel.
+	 */
+	if (!lmp_le_capable(hdev))
+		return 0;
+
 	chan = smp_add_cid(hdev, L2CAP_CID_SMP);
 	if (IS_ERR(chan))
 		return PTR_ERR(chan);
-- 
cgit v1.2.3


From 162a3bac8d007aae3d1ba9da9b0d0b9ecfef87cc Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 14 Jan 2015 15:43:11 -0800
Subject: Bluetooth: Bind the SMP channel registration to management power
 state

When the controller gets powered on via the management interface, then
register the supported SMP channels. There is no point in registering
these channels earlier since it is not know what identity address the
controller is going to operate with.

When powering down a controller unregister all SMP channels. This is
required since a powered down controller is allowed to change its
identity address.

In addition the SMP channels are only available when the controller
is powered via the management interface. When using legacy ioctl, then
Bluetooth Low Energy is not supported and registering kernel side SMP
integration may actually cause confusion.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_core.c | 6 +++---
 net/bluetooth/mgmt.c     | 9 +++++++++
 2 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index c04197347c90..34c17a0645ce 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -933,10 +933,8 @@ static int __hci_init(struct hci_dev *hdev)
 	if (lmp_bredr_capable(hdev))
 		hci_debugfs_create_bredr(hdev);
 
-	if (lmp_le_capable(hdev)) {
+	if (lmp_le_capable(hdev))
 		hci_debugfs_create_le(hdev);
-		smp_register(hdev);
-	}
 
 	return 0;
 }
@@ -2133,6 +2131,8 @@ static void hci_power_off(struct work_struct *work)
 	BT_DBG("%s", hdev->name);
 
 	hci_dev_do_close(hdev);
+
+	smp_unregister(hdev);
 }
 
 static void hci_discov_off(struct work_struct *work)
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index cae612658ba9..f5c4d2eed9a1 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -6232,6 +6232,15 @@ static void powered_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 
 	BT_DBG("status 0x%02x", status);
 
+	if (!status) {
+		/* Register the available SMP channels (BR/EDR and LE) only
+		 * when successfully powering on the controller. This late
+		 * registration is required so that LE SMP can clearly
+		 * decide if the public address or static address is used.
+		 */
+		smp_register(hdev);
+	}
+
 	hci_dev_lock(hdev);
 
 	mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match);
-- 
cgit v1.2.3


From f89903d53f4d39577be98940f7cfa49d66f86db5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 15 Jan 2015 16:02:46 +0100
Subject: mac80211: remove 80+80 MHz rate reporting

These rates are treated the same as 160 MHz in the spec,
so it makes no sense to distinguish them. As no driver
uses them yet, this is also not a problem, just remove
them.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 6 ++----
 net/mac80211/cfg.c     | 2 --
 net/mac80211/rx.c      | 5 -----
 net/mac80211/util.c    | 2 --
 4 files changed, 2 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 123f2308958a..275ee56152ad 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -931,15 +931,13 @@ enum mac80211_rx_flags {
  * These flags are used with the @vht_flag member of
  *	&struct ieee80211_rx_status.
  * @RX_VHT_FLAG_80MHZ: 80 MHz was used
- * @RX_VHT_FLAG_80P80MHZ: 80+80 MHz was used
  * @RX_VHT_FLAG_160MHZ: 160 MHz was used
  * @RX_VHT_FLAG_BF: packet was beamformed
  */
 enum mac80211_rx_vht_flags {
 	RX_VHT_FLAG_80MHZ		= BIT(0),
-	RX_VHT_FLAG_80P80MHZ		= BIT(1),
-	RX_VHT_FLAG_160MHZ		= BIT(2),
-	RX_VHT_FLAG_BF			= BIT(3),
+	RX_VHT_FLAG_160MHZ		= BIT(1),
+	RX_VHT_FLAG_BF			= BIT(2),
 };
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index fd6860d7f557..6d5076fbf87a 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -465,8 +465,6 @@ void sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
 		rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI;
 	if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_80MHZ)
 		rinfo->flags |= RATE_INFO_FLAGS_80_MHZ_WIDTH;
-	if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_80P80MHZ)
-		rinfo->flags |= RATE_INFO_FLAGS_80P80_MHZ_WIDTH;
 	if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_160MHZ)
 		rinfo->flags |= RATE_INFO_FLAGS_160_MHZ_WIDTH;
 }
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 3a1a3ba40bd8..3d79d498e7f6 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -361,9 +361,6 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
 		u16 known = local->hw.radiotap_vht_details;
 
 		rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_VHT);
-		/* known field - how to handle 80+80? */
-		if (status->vht_flag & RX_VHT_FLAG_80P80MHZ)
-			known &= ~IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH;
 		put_unaligned_le16(known, pos);
 		pos += 2;
 		/* flags */
@@ -378,8 +375,6 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
 		/* bandwidth */
 		if (status->vht_flag & RX_VHT_FLAG_80MHZ)
 			*pos++ = 4;
-		else if (status->vht_flag & RX_VHT_FLAG_80P80MHZ)
-			*pos++ = 0; /* marked not known above */
 		else if (status->vht_flag & RX_VHT_FLAG_160MHZ)
 			*pos++ = 11;
 		else if (status->flag & RX_FLAG_40MHZ)
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 83ba6cd9cf8d..db7216124736 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -2552,8 +2552,6 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
 			ri.flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
 		if (status->vht_flag & RX_VHT_FLAG_80MHZ)
 			ri.flags |= RATE_INFO_FLAGS_80_MHZ_WIDTH;
-		if (status->vht_flag & RX_VHT_FLAG_80P80MHZ)
-			ri.flags |= RATE_INFO_FLAGS_80P80_MHZ_WIDTH;
 		if (status->vht_flag & RX_VHT_FLAG_160MHZ)
 			ri.flags |= RATE_INFO_FLAGS_160_MHZ_WIDTH;
 		if (status->flag & RX_FLAG_SHORT_GI)
-- 
cgit v1.2.3


From 97d910d0aaa619ca530d08e2b1125b8014ccb030 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 15 Jan 2015 16:05:21 +0100
Subject: cfg80211: remove 80+80 MHz rate reporting

These rates are treated the same as 160 MHz in the spec, so
it makes no sense to distinguish them. As no driver uses them
yet, this is also not a problem, just remove them.

In the userspace API the field remains reserved to preserve
API and ABI.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 8 +++-----
 include/uapi/linux/nl80211.h | 3 ++-
 net/wireless/nl80211.c       | 3 ---
 net/wireless/util.c          | 3 +--
 4 files changed, 6 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 38abc07503fd..0322048fddab 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -875,7 +875,6 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
  * @RATE_INFO_FLAGS_VHT_MCS: mcs field filled with VHT MCS
  * @RATE_INFO_FLAGS_40_MHZ_WIDTH: 40 MHz width transmission
  * @RATE_INFO_FLAGS_80_MHZ_WIDTH: 80 MHz width transmission
- * @RATE_INFO_FLAGS_80P80_MHZ_WIDTH: 80+80 MHz width transmission
  * @RATE_INFO_FLAGS_160_MHZ_WIDTH: 160 MHz width transmission
  * @RATE_INFO_FLAGS_SHORT_GI: 400ns guard interval
  * @RATE_INFO_FLAGS_60G: 60GHz MCS
@@ -885,10 +884,9 @@ enum rate_info_flags {
 	RATE_INFO_FLAGS_VHT_MCS			= BIT(1),
 	RATE_INFO_FLAGS_40_MHZ_WIDTH		= BIT(2),
 	RATE_INFO_FLAGS_80_MHZ_WIDTH		= BIT(3),
-	RATE_INFO_FLAGS_80P80_MHZ_WIDTH		= BIT(4),
-	RATE_INFO_FLAGS_160_MHZ_WIDTH		= BIT(5),
-	RATE_INFO_FLAGS_SHORT_GI		= BIT(6),
-	RATE_INFO_FLAGS_60G			= BIT(7),
+	RATE_INFO_FLAGS_160_MHZ_WIDTH		= BIT(4),
+	RATE_INFO_FLAGS_SHORT_GI		= BIT(5),
+	RATE_INFO_FLAGS_60G			= BIT(6),
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index b6c1a00bd8d2..11cdb85ac646 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2278,7 +2278,8 @@ struct nl80211_sta_flag_update {
  * @NL80211_RATE_INFO_VHT_MCS: MCS index for VHT (u8)
  * @NL80211_RATE_INFO_VHT_NSS: number of streams in VHT (u8)
  * @NL80211_RATE_INFO_80_MHZ_WIDTH: 80 MHz VHT rate
- * @NL80211_RATE_INFO_80P80_MHZ_WIDTH: 80+80 MHz VHT rate
+ * @NL80211_RATE_INFO_80P80_MHZ_WIDTH: unused - 80+80 is treated the
+ *	same as 160 for purposes of the bitrates
  * @NL80211_RATE_INFO_160_MHZ_WIDTH: 160 MHz VHT rate
  * @__NL80211_RATE_INFO_AFTER_LAST: internal use
  */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 380784378df8..8998484ea970 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3614,9 +3614,6 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info,
 		if (info->flags & RATE_INFO_FLAGS_80_MHZ_WIDTH &&
 		    nla_put_flag(msg, NL80211_RATE_INFO_80_MHZ_WIDTH))
 			return false;
-		if (info->flags & RATE_INFO_FLAGS_80P80_MHZ_WIDTH &&
-		    nla_put_flag(msg, NL80211_RATE_INFO_80P80_MHZ_WIDTH))
-			return false;
 		if (info->flags & RATE_INFO_FLAGS_160_MHZ_WIDTH &&
 		    nla_put_flag(msg, NL80211_RATE_INFO_160_MHZ_WIDTH))
 			return false;
diff --git a/net/wireless/util.c b/net/wireless/util.c
index d0ac795445b7..6942d48f1ac5 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1073,8 +1073,7 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate)
 	if (WARN_ON_ONCE(rate->mcs > 9))
 		return 0;
 
-	idx = rate->flags & (RATE_INFO_FLAGS_160_MHZ_WIDTH |
-			     RATE_INFO_FLAGS_80P80_MHZ_WIDTH) ? 3 :
+	idx = rate->flags & RATE_INFO_FLAGS_160_MHZ_WIDTH ? 3 :
 		  rate->flags & RATE_INFO_FLAGS_80_MHZ_WIDTH ? 2 :
 		  rate->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH ? 1 : 0;
 
-- 
cgit v1.2.3


From a250e048a7fb32c04366e3ed17707aa2be627f8a Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Thu, 15 Jan 2015 13:06:44 +0200
Subject: Bluetooth: Add helpers for src/dst bdaddr type conversion

The current bdaddr_type() usage in l2cap_core.c is a bit funny in that
it's always passed a hci_conn + a hci_conn member. Because of this only
the hci_conn is really needed. Since the second parameter is always
either hcon->src_type or hcon->dst type this patch adds two helper
functions for each purpose: bdaddr_src_type() and bdaddr_dst_type().

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap_core.c | 34 ++++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 4e7477d34eb8..b080133010e2 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -63,10 +63,10 @@ static void l2cap_send_disconn_req(struct l2cap_chan *chan, int err);
 static void l2cap_tx(struct l2cap_chan *chan, struct l2cap_ctrl *control,
 		     struct sk_buff_head *skbs, u8 event);
 
-static inline __u8 bdaddr_type(struct hci_conn *hcon, __u8 type)
+static inline u8 bdaddr_type(u8 link_type, u8 bdaddr_type)
 {
-	if (hcon->type == LE_LINK) {
-		if (type == ADDR_LE_DEV_PUBLIC)
+	if (link_type == LE_LINK) {
+		if (bdaddr_type == ADDR_LE_DEV_PUBLIC)
 			return BDADDR_LE_PUBLIC;
 		else
 			return BDADDR_LE_RANDOM;
@@ -75,6 +75,16 @@ static inline __u8 bdaddr_type(struct hci_conn *hcon, __u8 type)
 	return BDADDR_BREDR;
 }
 
+static inline u8 bdaddr_src_type(struct hci_conn *hcon)
+{
+	return bdaddr_type(hcon->type, hcon->src_type);
+}
+
+static inline u8 bdaddr_dst_type(struct hci_conn *hcon)
+{
+	return bdaddr_type(hcon->type, hcon->dst_type);
+}
+
 /* ---- L2CAP channels ---- */
 
 static struct l2cap_chan *__l2cap_get_chan_by_dcid(struct l2cap_conn *conn,
@@ -646,7 +656,7 @@ static void l2cap_conn_update_id_addr(struct work_struct *work)
 	list_for_each_entry(chan, &conn->chan_l, list) {
 		l2cap_chan_lock(chan);
 		bacpy(&chan->dst, &hcon->dst);
-		chan->dst_type = bdaddr_type(hcon, hcon->dst_type);
+		chan->dst_type = bdaddr_dst_type(hcon);
 		l2cap_chan_unlock(chan);
 	}
 
@@ -3790,8 +3800,8 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn,
 
 	bacpy(&chan->src, &conn->hcon->src);
 	bacpy(&chan->dst, &conn->hcon->dst);
-	chan->src_type = bdaddr_type(conn->hcon, conn->hcon->src_type);
-	chan->dst_type = bdaddr_type(conn->hcon, conn->hcon->dst_type);
+	chan->src_type = bdaddr_src_type(conn->hcon);
+	chan->dst_type = bdaddr_dst_type(conn->hcon);
 	chan->psm  = psm;
 	chan->dcid = scid;
 	chan->local_amp_id = amp_id;
@@ -5441,8 +5451,8 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn,
 
 	bacpy(&chan->src, &conn->hcon->src);
 	bacpy(&chan->dst, &conn->hcon->dst);
-	chan->src_type = bdaddr_type(conn->hcon, conn->hcon->src_type);
-	chan->dst_type = bdaddr_type(conn->hcon, conn->hcon->dst_type);
+	chan->src_type = bdaddr_src_type(conn->hcon);
+	chan->dst_type = bdaddr_dst_type(conn->hcon);
 	chan->psm  = psm;
 	chan->dcid = scid;
 	chan->omtu = mtu;
@@ -6881,7 +6891,7 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb)
 	 */
 	if (hcon->type == LE_LINK &&
 	    hci_bdaddr_list_lookup(&hcon->hdev->blacklist, &hcon->dst,
-				   bdaddr_type(hcon, hcon->dst_type))) {
+				   bdaddr_dst_type(hcon))) {
 		kfree_skb(skb);
 		return;
 	}
@@ -7123,7 +7133,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
 
 	/* Update source addr of the socket */
 	bacpy(&chan->src, &hcon->src);
-	chan->src_type = bdaddr_type(hcon, hcon->src_type);
+	chan->src_type = bdaddr_src_type(hcon);
 
 	__l2cap_chan_add(conn, chan);
 
@@ -7246,7 +7256,7 @@ void l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
 	if (!conn)
 		return;
 
-	dst_type = bdaddr_type(hcon, hcon->dst_type);
+	dst_type = bdaddr_dst_type(hcon);
 
 	/* If device is blocked, do not create channels for it */
 	if (hci_bdaddr_list_lookup(&hdev->blacklist, &hcon->dst, dst_type))
@@ -7270,7 +7280,7 @@ void l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
 		if (chan) {
 			bacpy(&chan->src, &hcon->src);
 			bacpy(&chan->dst, &hcon->dst);
-			chan->src_type = bdaddr_type(hcon, hcon->src_type);
+			chan->src_type = bdaddr_src_type(hcon);
 			chan->dst_type = dst_type;
 
 			__l2cap_chan_add(conn, chan);
-- 
cgit v1.2.3


From 327a71910cbecd873054c3fb764f1fc59ede5297 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Thu, 15 Jan 2015 13:06:45 +0200
Subject: Bluetooth: Fix lookup of fixed channels by local bdaddr

The comparing of chan->src should always be done against the local
identity address, represented by hcon->src and hcon->src_type. This
patch modifies l2cap_global_fixed_chan() to take the full hci_conn so
that we can easily compare against hcon->src and hcon->src_type.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap_core.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index b080133010e2..6ba33f9631e8 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -7207,8 +7207,10 @@ int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr)
  * global list (by passing NULL as first parameter).
  */
 static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c,
-						  bdaddr_t *src, u8 link_type)
+						  struct hci_conn *hcon)
 {
+	u8 src_type = bdaddr_src_type(hcon);
+
 	read_lock(&chan_list_lock);
 
 	if (c)
@@ -7221,11 +7223,9 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c,
 			continue;
 		if (c->state != BT_LISTEN)
 			continue;
-		if (bacmp(&c->src, src) && bacmp(&c->src, BDADDR_ANY))
-			continue;
-		if (link_type == ACL_LINK && c->src_type != BDADDR_BREDR)
+		if (bacmp(&c->src, &hcon->src) && bacmp(&c->src, BDADDR_ANY))
 			continue;
-		if (link_type == LE_LINK && c->src_type == BDADDR_BREDR)
+		if (src_type != c->src_type)
 			continue;
 
 		l2cap_chan_hold(c);
@@ -7267,7 +7267,7 @@ void l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
 	 * we left off, because the list lock would prevent calling the
 	 * potentially sleeping l2cap_chan_lock() function.
 	 */
-	pchan = l2cap_global_fixed_chan(NULL, &hdev->bdaddr, hcon->type);
+	pchan = l2cap_global_fixed_chan(NULL, hcon);
 	while (pchan) {
 		struct l2cap_chan *chan, *next;
 
@@ -7288,8 +7288,7 @@ void l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
 
 		l2cap_chan_unlock(pchan);
 next:
-		next = l2cap_global_fixed_chan(pchan, &hdev->bdaddr,
-					       hcon->type);
+		next = l2cap_global_fixed_chan(pchan, hcon);
 		l2cap_chan_put(pchan);
 		pchan = next;
 	}
-- 
cgit v1.2.3


From 7eb35b148305c6f86a6bc5818c29f79e0d45146c Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 15 Jan 2015 10:35:58 +0100
Subject: socket: use iov_length()

Better to use available helpers.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index a2c33a4dc7ba..e1278d7e1d5d 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -882,11 +882,7 @@ static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
 		unsigned long nr_segs)
 {
 	struct socket *sock = file->private_data;
-	size_t size = 0;
-	int i;
-
-	for (i = 0; i < nr_segs; i++)
-		size += iov[i].iov_len;
+	size_t size = iov_length(iov, nr_segs);
 
 	msg->msg_name = NULL;
 	msg->msg_namelen = 0;
@@ -921,11 +917,7 @@ static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
 			unsigned long nr_segs)
 {
 	struct socket *sock = file->private_data;
-	size_t size = 0;
-	int i;
-
-	for (i = 0; i < nr_segs; i++)
-		size += iov[i].iov_len;
+	size_t size = iov_length(iov, nr_segs);
 
 	msg->msg_name = NULL;
 	msg->msg_namelen = 0;
-- 
cgit v1.2.3


From 2b8df323953bfbeb8289c56f43b7fcfa4c70e90a Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Thu, 15 Jan 2015 08:04:21 -0800
Subject: Bluetooth: Add paranoid check for existing LE and BR/EDR SMP channels

When the SMP channels have been already registered, then print out a
clear WARN_ON message that something went wrong. Also unregister the
existing channels in this case before trying to register new ones.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/smp.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 08a9314f3ca7..37d9180bfe1c 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -3072,6 +3072,12 @@ int smp_register(struct hci_dev *hdev)
 	if (!lmp_le_capable(hdev))
 		return 0;
 
+	if (WARN_ON(hdev->smp_data)) {
+		chan = hdev->smp_data;
+		hdev->smp_data = NULL;
+		smp_del_chan(chan);
+	}
+
 	chan = smp_add_cid(hdev, L2CAP_CID_SMP);
 	if (IS_ERR(chan))
 		return PTR_ERR(chan);
@@ -3091,6 +3097,12 @@ int smp_register(struct hci_dev *hdev)
 		return 0;
 	}
 
+	if (WARN_ON(hdev->smp_bredr_data)) {
+		chan = hdev->smp_bredr_data;
+		hdev->smp_bredr_data = NULL;
+		smp_del_chan(chan);
+	}
+
 	chan = smp_add_cid(hdev, L2CAP_CID_SMP_BREDR);
 	if (IS_ERR(chan)) {
 		int err = PTR_ERR(chan);
-- 
cgit v1.2.3


From 597561bf6a666f532fbd6216624ed47916762f8e Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 13 Jan 2015 11:02:37 -0500
Subject: svcrdma: Clean up dprintk

Nit: Fix inconsistent white space in dprintk messages.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index e0110270d650..2c67de032009 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -501,8 +501,8 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
 	ret = rqstp->rq_arg.head[0].iov_len
 		+ rqstp->rq_arg.page_len
 		+ rqstp->rq_arg.tail[0].iov_len;
-	dprintk("svcrdma: deferred read ret=%d, rq_arg.len =%d, "
-		"rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n",
+	dprintk("svcrdma: deferred read ret=%d, rq_arg.len=%u, "
+		"rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len=%zu\n",
 		ret, rqstp->rq_arg.len,	rqstp->rq_arg.head[0].iov_base,
 		rqstp->rq_arg.head[0].iov_len);
 
@@ -591,8 +591,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 		+ rqstp->rq_arg.tail[0].iov_len;
 	svc_rdma_put_context(ctxt, 0);
  out:
-	dprintk("svcrdma: ret = %d, rq_arg.len =%d, "
-		"rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n",
+	dprintk("svcrdma: ret=%d, rq_arg.len=%u, "
+		"rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len=%zd\n",
 		ret, rqstp->rq_arg.len,
 		rqstp->rq_arg.head[0].iov_base,
 		rqstp->rq_arg.head[0].iov_len);
-- 
cgit v1.2.3


From 83f2bedfc6435ffeaa7b466058c5d22b5e8f428b Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 13 Jan 2015 11:02:45 -0500
Subject: svcrdma: Remove unused variable

Nit: remove an unused variable to squelch a compiler warning.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 4e618808bc98..4ba11d0cefe1 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -687,7 +687,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
 {
 	struct rdma_cm_id *listen_id;
 	struct svcxprt_rdma *cma_xprt;
-	struct svc_xprt *xprt;
 	int ret;
 
 	dprintk("svcrdma: Creating RDMA socket\n");
@@ -698,7 +697,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
 	cma_xprt = rdma_create_xprt(serv, 1);
 	if (!cma_xprt)
 		return ERR_PTR(-ENOMEM);
-	xprt = &cma_xprt->sc_xprt;
 
 	listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP,
 				   IB_QPT_RC);
-- 
cgit v1.2.3


From 2397aa8b515f7bd77c8d5698170b6a98fdd6721c Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 13 Jan 2015 11:02:54 -0500
Subject: svcrdma: Clean up read chunk counting

The byte_count argument is not used, and the function is called
only from one place.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h         |  2 --
 net/sunrpc/xprtrdma/svc_rdma_marshal.c  | 16 ----------------
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 15 ++++++++++++---
 3 files changed, 12 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 975da754c778..2280325e4c88 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -178,8 +178,6 @@ struct svcxprt_rdma {
 #define RPCRDMA_MAX_REQ_SIZE    4096
 
 /* svc_rdma_marshal.c */
-extern void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *,
-				      int *, int *);
 extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *);
 extern int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *);
 extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *,
diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
index 65b146297f5a..b681855cf970 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
@@ -70,22 +70,6 @@ static u32 *decode_read_list(u32 *va, u32 *vaend)
 	return (u32 *)&ch->rc_position;
 }
 
-/*
- * Determine number of chunks and total bytes in chunk list. The chunk
- * list has already been verified to fit within the RPCRDMA header.
- */
-void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch,
-			       int *ch_count, int *byte_count)
-{
-	/* compute the number of bytes represented by read chunks */
-	*byte_count = 0;
-	*ch_count = 0;
-	for (; ch->rc_discrim != 0; ch++) {
-		*byte_count = *byte_count + ntohl(ch->rc_target.rs_length);
-		*ch_count = *ch_count + 1;
-	}
-}
-
 /*
  * Decodes a write chunk list. The expected format is as follows:
  *    descrim  : xdr_one
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 2c67de032009..b3b7bb85844d 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -365,12 +365,22 @@ static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
 	return ret;
 }
 
+static unsigned int
+rdma_rcl_chunk_count(struct rpcrdma_read_chunk *ch)
+{
+	unsigned int count;
+
+	for (count = 0; ch->rc_discrim != xdr_zero; ch++)
+		count++;
+	return count;
+}
+
 static int rdma_read_chunks(struct svcxprt_rdma *xprt,
 			    struct rpcrdma_msg *rmsgp,
 			    struct svc_rqst *rqstp,
 			    struct svc_rdma_op_ctxt *head)
 {
-	int page_no, ch_count, ret;
+	int page_no, ret;
 	struct rpcrdma_read_chunk *ch;
 	u32 page_offset, byte_count;
 	u64 rs_offset;
@@ -381,8 +391,7 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt,
 	if (!ch)
 		return 0;
 
-	svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count);
-	if (ch_count > RPCSVC_MAXPAGES)
+	if (rdma_rcl_chunk_count(ch) > RPCSVC_MAXPAGES)
 		return -EINVAL;
 
 	/* The request is completed when the RDMA_READs complete. The
-- 
cgit v1.2.3


From 3fe04ee9f91084e7e6e999b09b8b15bcf97375e8 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 13 Jan 2015 11:03:03 -0500
Subject: svcrdma: Scrub BUG_ON() and WARN_ON() call sites

Current convention is to avoid using BUG_ON() in places where an
oops could cause complete system failure.

Replace BUG_ON() call sites in svcrdma with an assertion error
message and allow execution to continue safely.

Some BUG_ON() calls are removed because they have never fired in
production (that we are aware of).

Some WARN_ON() calls are also replaced where a back trace is not
helpful; e.g., in a workqueue task.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c  | 11 --------
 net/sunrpc/xprtrdma/svc_rdma_sendto.c    | 28 ++++++++++++++++-----
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 43 ++++++++++++++++++++------------
 3 files changed, 49 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index b3b7bb85844d..577f8659ca30 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -95,14 +95,6 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
 	rqstp->rq_respages = &rqstp->rq_pages[sge_no];
 	rqstp->rq_next_page = rqstp->rq_respages + 1;
 
-	/* We should never run out of SGE because the limit is defined to
-	 * support the max allowed RPC data length
-	 */
-	BUG_ON(bc && (sge_no == ctxt->count));
-	BUG_ON((rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len)
-	       != byte_count);
-	BUG_ON(rqstp->rq_arg.len != byte_count);
-
 	/* If not all pages were used from the SGL, free the remaining ones */
 	bc = sge_no;
 	while (sge_no < ctxt->count) {
@@ -477,8 +469,6 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
 	int page_no;
 	int ret;
 
-	BUG_ON(!head);
-
 	/* Copy RPC pages */
 	for (page_no = 0; page_no < head->count; page_no++) {
 		put_page(rqstp->rq_pages[page_no]);
@@ -567,7 +557,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 	}
 	dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n",
 		ctxt, rdma_xprt, rqstp, ctxt->wc_status);
-	BUG_ON(ctxt->wc_status != IB_WC_SUCCESS);
 	atomic_inc(&rdma_stat_recv);
 
 	/* Build up the XDR from the receive buffers. */
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 9f1b50689c0f..7d79897959a4 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -60,8 +60,11 @@ static int map_xdr(struct svcxprt_rdma *xprt,
 	u32 page_off;
 	int page_no;
 
-	BUG_ON(xdr->len !=
-	       (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len));
+	if (xdr->len !=
+	    (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)) {
+		pr_err("svcrdma: map_xdr: XDR buffer length error\n");
+		return -EIO;
+	}
 
 	/* Skip the first sge, this is for the RPCRDMA header */
 	sge_no = 1;
@@ -150,7 +153,11 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
 	int bc;
 	struct svc_rdma_op_ctxt *ctxt;
 
-	BUG_ON(vec->count > RPCSVC_MAXPAGES);
+	if (vec->count > RPCSVC_MAXPAGES) {
+		pr_err("svcrdma: Too many pages (%lu)\n", vec->count);
+		return -EIO;
+	}
+
 	dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, "
 		"write_len=%d, vec->sge=%p, vec->count=%lu\n",
 		rmr, (unsigned long long)to, xdr_off,
@@ -190,7 +197,10 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
 		sge_off = 0;
 		sge_no++;
 		xdr_sge_no++;
-		BUG_ON(xdr_sge_no > vec->count);
+		if (xdr_sge_no > vec->count) {
+			pr_err("svcrdma: Too many sges (%d)\n", xdr_sge_no);
+			goto err;
+		}
 		bc -= sge_bytes;
 		if (sge_no == xprt->sc_max_sge)
 			break;
@@ -421,7 +431,10 @@ static int send_reply(struct svcxprt_rdma *rdma,
 		ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
 		ctxt->sge[sge_no].length = sge_bytes;
 	}
-	BUG_ON(byte_count != 0);
+	if (byte_count != 0) {
+		pr_err("svcrdma: Could not map %d bytes\n", byte_count);
+		goto err;
+	}
 
 	/* Save all respages in the ctxt and remove them from the
 	 * respages array. They are our pages until the I/O
@@ -442,7 +455,10 @@ static int send_reply(struct svcxprt_rdma *rdma,
 	}
 	rqstp->rq_next_page = rqstp->rq_respages + 1;
 
-	BUG_ON(sge_no > rdma->sc_max_sge);
+	if (sge_no > rdma->sc_max_sge) {
+		pr_err("svcrdma: Too many sges (%d)\n", sge_no);
+		goto err;
+	}
 	memset(&send_wr, 0, sizeof send_wr);
 	ctxt->wr_op = IB_WR_SEND;
 	send_wr.wr_id = (unsigned long)ctxt;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 4ba11d0cefe1..f2e059bbab42 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -139,7 +139,6 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
 	struct svcxprt_rdma *xprt;
 	int i;
 
-	BUG_ON(!ctxt);
 	xprt = ctxt->xprt;
 	if (free_pages)
 		for (i = 0; i < ctxt->count; i++)
@@ -339,12 +338,14 @@ static void process_context(struct svcxprt_rdma *xprt,
 
 	switch (ctxt->wr_op) {
 	case IB_WR_SEND:
-		BUG_ON(ctxt->frmr);
+		if (ctxt->frmr)
+			pr_err("svcrdma: SEND: ctxt->frmr != NULL\n");
 		svc_rdma_put_context(ctxt, 1);
 		break;
 
 	case IB_WR_RDMA_WRITE:
-		BUG_ON(ctxt->frmr);
+		if (ctxt->frmr)
+			pr_err("svcrdma: WRITE: ctxt->frmr != NULL\n");
 		svc_rdma_put_context(ctxt, 0);
 		break;
 
@@ -353,19 +354,21 @@ static void process_context(struct svcxprt_rdma *xprt,
 		svc_rdma_put_frmr(xprt, ctxt->frmr);
 		if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
 			struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
-			BUG_ON(!read_hdr);
-			spin_lock_bh(&xprt->sc_rq_dto_lock);
-			set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
-			list_add_tail(&read_hdr->dto_q,
-				      &xprt->sc_read_complete_q);
-			spin_unlock_bh(&xprt->sc_rq_dto_lock);
+			if (read_hdr) {
+				spin_lock_bh(&xprt->sc_rq_dto_lock);
+				set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
+				list_add_tail(&read_hdr->dto_q,
+					      &xprt->sc_read_complete_q);
+				spin_unlock_bh(&xprt->sc_rq_dto_lock);
+			} else {
+				pr_err("svcrdma: ctxt->read_hdr == NULL\n");
+			}
 			svc_xprt_enqueue(&xprt->sc_xprt);
 		}
 		svc_rdma_put_context(ctxt, 0);
 		break;
 
 	default:
-		BUG_ON(1);
 		printk(KERN_ERR "svcrdma: unexpected completion type, "
 		       "opcode=%d\n",
 		       ctxt->wr_op);
@@ -513,7 +516,10 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
 	buflen = 0;
 	ctxt->direction = DMA_FROM_DEVICE;
 	for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) {
-		BUG_ON(sge_no >= xprt->sc_max_sge);
+		if (sge_no >= xprt->sc_max_sge) {
+			pr_err("svcrdma: Too many sges (%d)\n", sge_no);
+			goto err_put_ctxt;
+		}
 		page = svc_rdma_get_page();
 		ctxt->pages[sge_no] = page;
 		pa = ib_dma_map_page(xprt->sc_cm_id->device,
@@ -820,7 +826,7 @@ void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
 	if (frmr) {
 		frmr_unmap_dma(rdma, frmr);
 		spin_lock_bh(&rdma->sc_frmr_q_lock);
-		BUG_ON(!list_empty(&frmr->frmr_list));
+		WARN_ON_ONCE(!list_empty(&frmr->frmr_list));
 		list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
 		spin_unlock_bh(&rdma->sc_frmr_q_lock);
 	}
@@ -1123,7 +1129,9 @@ static void __svc_rdma_free(struct work_struct *work)
 	dprintk("svcrdma: svc_rdma_free(%p)\n", rdma);
 
 	/* We should only be called from kref_put */
-	BUG_ON(atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0);
+	if (atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0)
+		pr_err("svcrdma: sc_xprt still in use? (%d)\n",
+		       atomic_read(&rdma->sc_xprt.xpt_ref.refcount));
 
 	/*
 	 * Destroy queued, but not processed read completions. Note
@@ -1151,8 +1159,12 @@ static void __svc_rdma_free(struct work_struct *work)
 	}
 
 	/* Warn if we leaked a resource or under-referenced */
-	WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0);
-	WARN_ON(atomic_read(&rdma->sc_dma_used) != 0);
+	if (atomic_read(&rdma->sc_ctxt_used) != 0)
+		pr_err("svcrdma: ctxt still in use? (%d)\n",
+		       atomic_read(&rdma->sc_ctxt_used));
+	if (atomic_read(&rdma->sc_dma_used) != 0)
+		pr_err("svcrdma: dma still in use? (%d)\n",
+		       atomic_read(&rdma->sc_dma_used));
 
 	/* De-allocate fastreg mr */
 	rdma_dealloc_frmr_q(rdma);
@@ -1252,7 +1264,6 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
 	if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
 		return -ENOTCONN;
 
-	BUG_ON(wr->send_flags != IB_SEND_SIGNALED);
 	wr_count = 1;
 	for (n_wr = wr->next; n_wr; n_wr = n_wr->next)
 		wr_count++;
-- 
cgit v1.2.3


From e5523bd28101869c85856247fc120faaf72bd232 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 13 Jan 2015 11:03:11 -0500
Subject: svcrdma: Find rmsgp more reliably

xdr_start() can return the wrong rmsgp address if an assumption
about how the xdr_buf was constructed changes.  When it gets it
wrong, the client receives a reply that has gibberish in the
RPC/RDMA header, preventing it from matching a waiting RPC request.

Instead, make (and document) just one assumption: that the RDMA
header for the client's RPC call is at the start of the first page
in rq_pages.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xprtrdma/svc_rdma_sendto.c | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 7d79897959a4..7de33d1af9b6 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -483,18 +483,6 @@ void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
 {
 }
 
-/*
- * Return the start of an xdr buffer.
- */
-static void *xdr_start(struct xdr_buf *xdr)
-{
-	return xdr->head[0].iov_base -
-		(xdr->len -
-		 xdr->page_len -
-		 xdr->tail[0].iov_len -
-		 xdr->head[0].iov_len);
-}
-
 int svc_rdma_sendto(struct svc_rqst *rqstp)
 {
 	struct svc_xprt *xprt = rqstp->rq_xprt;
@@ -512,8 +500,10 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 
 	dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
 
-	/* Get the RDMA request header. */
-	rdma_argp = xdr_start(&rqstp->rq_arg);
+	/* Get the RDMA request header. The receive logic always
+	 * places this at the start of page 0.
+	 */
+	rdma_argp = page_address(rqstp->rq_pages[0]);
 
 	/* Build an req vec for the XDR */
 	ctxt = svc_rdma_get_context(rdma);
-- 
cgit v1.2.3


From e54524111f51eac1900cf91aca3d38a92a6b11c0 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 13 Jan 2015 11:03:20 -0500
Subject: svcrdma: Plant reader function in struct svcxprt_rdma

The RDMA reader function doesn't change once an svcxprt_rdma is
instantiated. Instead of checking sc_devcap during every incoming
RPC, set the reader function once when the connection is accepted.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h          | 10 +++++
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c  | 71 ++++++++++++--------------------
 net/sunrpc/xprtrdma/svc_rdma_transport.c |  2 +
 3 files changed, 39 insertions(+), 44 deletions(-)

(limited to 'net')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 2280325e4c88..f161e309f25e 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -150,6 +150,10 @@ struct svcxprt_rdma {
 	struct ib_cq         *sc_rq_cq;
 	struct ib_cq         *sc_sq_cq;
 	struct ib_mr         *sc_phys_mr;	/* MR for server memory */
+	int		     (*sc_reader)(struct svcxprt_rdma *,
+					  struct svc_rqst *,
+					  struct svc_rdma_op_ctxt *,
+					  int *, u32 *, u32, u32, u64, bool);
 	u32		     sc_dev_caps;	/* distilled device caps */
 	u32		     sc_dma_lkey;	/* local dma key */
 	unsigned int	     sc_frmr_pg_list_len;
@@ -195,6 +199,12 @@ extern int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *);
 
 /* svc_rdma_recvfrom.c */
 extern int svc_rdma_recvfrom(struct svc_rqst *);
+extern int rdma_read_chunk_lcl(struct svcxprt_rdma *, struct svc_rqst *,
+			       struct svc_rdma_op_ctxt *, int *, u32 *,
+			       u32, u32, u64, bool);
+extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *,
+				struct svc_rdma_op_ctxt *, int *, u32 *,
+				u32, u32, u64, bool);
 
 /* svc_rdma_sendto.c */
 extern int svc_rdma_sendto(struct svc_rqst *);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 577f8659ca30..c3aebc1bf0a6 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -117,26 +117,16 @@ static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
 		return min_t(int, sge_count, xprt->sc_max_sge);
 }
 
-typedef int (*rdma_reader_fn)(struct svcxprt_rdma *xprt,
-			      struct svc_rqst *rqstp,
-			      struct svc_rdma_op_ctxt *head,
-			      int *page_no,
-			      u32 *page_offset,
-			      u32 rs_handle,
-			      u32 rs_length,
-			      u64 rs_offset,
-			      int last);
-
 /* Issue an RDMA_READ using the local lkey to map the data sink */
-static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
-			       struct svc_rqst *rqstp,
-			       struct svc_rdma_op_ctxt *head,
-			       int *page_no,
-			       u32 *page_offset,
-			       u32 rs_handle,
-			       u32 rs_length,
-			       u64 rs_offset,
-			       int last)
+int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
+			struct svc_rqst *rqstp,
+			struct svc_rdma_op_ctxt *head,
+			int *page_no,
+			u32 *page_offset,
+			u32 rs_handle,
+			u32 rs_length,
+			u64 rs_offset,
+			bool last)
 {
 	struct ib_send_wr read_wr;
 	int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
@@ -221,15 +211,15 @@ static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
 }
 
 /* Issue an RDMA_READ using an FRMR to map the data sink */
-static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
-				struct svc_rqst *rqstp,
-				struct svc_rdma_op_ctxt *head,
-				int *page_no,
-				u32 *page_offset,
-				u32 rs_handle,
-				u32 rs_length,
-				u64 rs_offset,
-				int last)
+int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
+			 struct svc_rqst *rqstp,
+			 struct svc_rdma_op_ctxt *head,
+			 int *page_no,
+			 u32 *page_offset,
+			 u32 rs_handle,
+			 u32 rs_length,
+			 u64 rs_offset,
+			 bool last)
 {
 	struct ib_send_wr read_wr;
 	struct ib_send_wr inv_wr;
@@ -374,9 +364,9 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt,
 {
 	int page_no, ret;
 	struct rpcrdma_read_chunk *ch;
-	u32 page_offset, byte_count;
+	u32 handle, page_offset, byte_count;
 	u64 rs_offset;
-	rdma_reader_fn reader;
+	bool last;
 
 	/* If no read list is present, return 0 */
 	ch = svc_rdma_get_read_chunk(rmsgp);
@@ -399,27 +389,20 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt,
 	head->arg.len = rqstp->rq_arg.len;
 	head->arg.buflen = rqstp->rq_arg.buflen;
 
-	/* Use FRMR if supported */
-	if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)
-		reader = rdma_read_chunk_frmr;
-	else
-		reader = rdma_read_chunk_lcl;
-
 	page_no = 0; page_offset = 0;
 	for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
 	     ch->rc_discrim != 0; ch++) {
-
+		handle = be32_to_cpu(ch->rc_target.rs_handle);
+		byte_count = be32_to_cpu(ch->rc_target.rs_length);
 		xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset,
 				 &rs_offset);
-		byte_count = ntohl(ch->rc_target.rs_length);
 
 		while (byte_count > 0) {
-			ret = reader(xprt, rqstp, head,
-				     &page_no, &page_offset,
-				     ntohl(ch->rc_target.rs_handle),
-				     byte_count, rs_offset,
-				     ((ch+1)->rc_discrim == 0) /* last */
-				     );
+			last = (ch + 1)->rc_discrim == xdr_zero;
+			ret = xprt->sc_reader(xprt, rqstp, head,
+					      &page_no, &page_offset,
+					      handle, byte_count,
+					      rs_offset, last);
 			if (ret < 0)
 				goto err;
 			byte_count -= ret;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index f2e059bbab42..f609c1c2d38d 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -974,10 +974,12 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	 * NB:	iWARP requires remote write access for the data sink
 	 *	of an RDMA_READ. IB does not.
 	 */
+	newxprt->sc_reader = rdma_read_chunk_lcl;
 	if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
 		newxprt->sc_frmr_pg_list_len =
 			devattr.max_fast_reg_page_list_len;
 		newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
+		newxprt->sc_reader = rdma_read_chunk_frmr;
 	}
 
 	/*
-- 
cgit v1.2.3


From 61edbcb7c7f4efb65df4ad793d007237f9fa311f Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 13 Jan 2015 11:03:28 -0500
Subject: svcrdma: rc_position sanity checking

An RPC/RDMA client may send large RPC arguments via a read
list. This is a list of scatter/gather elements which convey
RPC call arguments too large to fit in a small RDMA SEND.

Each entry in the read list has a "position" field, whose value is
the byte offset in the XDR stream where the data in that entry is to
be inserted. Entries which share the same "position" value make up
the same RPC argument. The receiver inserts entries with the same
position field value in list order into the XDR stream.

Currently the Linux NFS/RDMA server cannot handle receiving read
chunks in more than one position, mostly because no current client
sends read lists with elements in more than one position. As a
sanity check, ensure that all received chunks have the same
"rc_position."

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index c3aebc1bf0a6..a67dd1a081dd 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -365,6 +365,7 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt,
 	int page_no, ret;
 	struct rpcrdma_read_chunk *ch;
 	u32 handle, page_offset, byte_count;
+	u32 position;
 	u64 rs_offset;
 	bool last;
 
@@ -389,10 +390,17 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt,
 	head->arg.len = rqstp->rq_arg.len;
 	head->arg.buflen = rqstp->rq_arg.buflen;
 
-	page_no = 0; page_offset = 0;
-	for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
-	     ch->rc_discrim != 0; ch++) {
-		handle = be32_to_cpu(ch->rc_target.rs_handle);
+	ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
+	position = be32_to_cpu(ch->rc_position);
+
+	ret = 0;
+	page_no = 0;
+	page_offset = 0;
+	for (; ch->rc_discrim != xdr_zero; ch++) {
+		if (be32_to_cpu(ch->rc_position) != position)
+			goto err;
+
+		handle = be32_to_cpu(ch->rc_target.rs_handle),
 		byte_count = be32_to_cpu(ch->rc_target.rs_length);
 		xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset,
 				 &rs_offset);
-- 
cgit v1.2.3


From 0b056c224bea63060ce8a981e84193c93fac6f5d Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 13 Jan 2015 11:03:37 -0500
Subject: svcrdma: Support RDMA_NOMSG requests

Currently the Linux server can not decode RDMA_NOMSG type requests.
Operations whose length exceeds the fixed size of RDMA SEND buffers,
like large NFSv4 CREATE(NF4LNK) operations, must be conveyed via
RDMA_NOMSG.

For an RDMA_MSG type request, the client sends the RPC/RDMA, RPC
headers, and some or all of the NFS arguments via RDMA SEND.

For an RDMA_NOMSG type request, the client sends just the RPC/RDMA
header via RDMA SEND. The request's read list contains elements for
the entire RPC message, including the RPC header.

NFSD expects the RPC/RMDA header and RPC header to be contiguous in
page zero of the XDR buffer. Add logic in the RDMA READ path to make
the read list contents land where the server prefers, when the
incoming message is a type RDMA_NOMSG message.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h         |  1 +
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 39 ++++++++++++++++++++++++++++++---
 2 files changed, 37 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index f161e309f25e..c343a94bc791 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -79,6 +79,7 @@ struct svc_rdma_op_ctxt {
 	enum ib_wr_opcode wr_op;
 	enum ib_wc_status wc_status;
 	u32 byte_len;
+	u32 position;
 	struct svcxprt_rdma *xprt;
 	unsigned long flags;
 	enum dma_data_direction direction;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index a67dd1a081dd..36cf51a3eab7 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -60,6 +60,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
 			       struct svc_rdma_op_ctxt *ctxt,
 			       u32 byte_count)
 {
+	struct rpcrdma_msg *rmsgp;
 	struct page *page;
 	u32 bc;
 	int sge_no;
@@ -82,7 +83,14 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
 	/* If data remains, store it in the pagelist */
 	rqstp->rq_arg.page_len = bc;
 	rqstp->rq_arg.page_base = 0;
-	rqstp->rq_arg.pages = &rqstp->rq_pages[1];
+
+	/* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */
+	rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
+	if (be32_to_cpu(rmsgp->rm_type) == RDMA_NOMSG)
+		rqstp->rq_arg.pages = &rqstp->rq_pages[0];
+	else
+		rqstp->rq_arg.pages = &rqstp->rq_pages[1];
+
 	sge_no = 1;
 	while (bc && sge_no < ctxt->count) {
 		page = ctxt->pages[sge_no];
@@ -383,7 +391,6 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt,
 	 */
 	head->arg.head[0] = rqstp->rq_arg.head[0];
 	head->arg.tail[0] = rqstp->rq_arg.tail[0];
-	head->arg.pages = &head->pages[head->count];
 	head->hdr_count = head->count;
 	head->arg.page_base = 0;
 	head->arg.page_len = 0;
@@ -393,9 +400,17 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt,
 	ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
 	position = be32_to_cpu(ch->rc_position);
 
+	/* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */
+	if (position == 0) {
+		head->arg.pages = &head->pages[0];
+		page_offset = head->byte_len;
+	} else {
+		head->arg.pages = &head->pages[head->count];
+		page_offset = 0;
+	}
+
 	ret = 0;
 	page_no = 0;
-	page_offset = 0;
 	for (; ch->rc_discrim != xdr_zero; ch++) {
 		if (be32_to_cpu(ch->rc_position) != position)
 			goto err;
@@ -418,7 +433,10 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt,
 			head->arg.buflen += ret;
 		}
 	}
+
 	ret = 1;
+	head->position = position;
+
  err:
 	/* Detach arg pages. svc_recv will replenish them */
 	for (page_no = 0;
@@ -465,6 +483,21 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
 		put_page(rqstp->rq_pages[page_no]);
 		rqstp->rq_pages[page_no] = head->pages[page_no];
 	}
+
+	/* Adjustments made for RDMA_NOMSG type requests */
+	if (head->position == 0) {
+		if (head->arg.len <= head->sge[0].length) {
+			head->arg.head[0].iov_len = head->arg.len -
+							head->byte_len;
+			head->arg.page_len = 0;
+		} else {
+			head->arg.head[0].iov_len = head->sge[0].length -
+								head->byte_len;
+			head->arg.page_len = head->arg.len -
+						head->sge[0].length;
+		}
+	}
+
 	/* Point rq_arg.pages past header */
 	rdma_fix_xdr_pad(&head->arg);
 	rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count];
-- 
cgit v1.2.3


From fcbeced5b4df5e7f05ed8a18b69acfac733aab11 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 13 Jan 2015 11:03:45 -0500
Subject: svcrdma: Move read list XDR round-up logic

This is a pre-requisite for a subsequent patch.

Read list XDR round-up needs to be done _before_ additional inline
content is copied to the end of the XDR buffer's page list. Move
the logic added by commit e560e3b510d2 ("svcrdma: Add zero padding
if the client doesn't send it").

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 37 ++++++++-------------------------
 1 file changed, 9 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 36cf51a3eab7..a345cadad4dd 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -43,7 +43,6 @@
 #include <linux/sunrpc/debug.h>
 #include <linux/sunrpc/rpc_rdma.h>
 #include <linux/spinlock.h>
-#include <linux/highmem.h>
 #include <asm/unaligned.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/rdma_cm.h>
@@ -434,6 +433,15 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt,
 		}
 	}
 
+	/* Read list may need XDR round-up (see RFC 5666, s. 3.7) */
+	if (page_offset & 3) {
+		u32 pad = 4 - (page_offset & 3);
+
+		head->arg.page_len += pad;
+		head->arg.len += pad;
+		head->arg.buflen += pad;
+	}
+
 	ret = 1;
 	head->position = position;
 
@@ -446,32 +454,6 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt,
 	return ret;
 }
 
-/*
- * To avoid a separate RDMA READ just for a handful of zero bytes,
- * RFC 5666 section 3.7 allows the client to omit the XDR zero pad
- * in chunk lists.
- */
-static void
-rdma_fix_xdr_pad(struct xdr_buf *buf)
-{
-	unsigned int page_len = buf->page_len;
-	unsigned int size = (XDR_QUADLEN(page_len) << 2) - page_len;
-	unsigned int offset, pg_no;
-	char *p;
-
-	if (size == 0)
-		return;
-
-	pg_no = page_len >> PAGE_SHIFT;
-	offset = page_len & ~PAGE_MASK;
-	p = page_address(buf->pages[pg_no]);
-	memset(p + offset, 0, size);
-
-	buf->page_len += size;
-	buf->buflen += size;
-	buf->len += size;
-}
-
 static int rdma_read_complete(struct svc_rqst *rqstp,
 			      struct svc_rdma_op_ctxt *head)
 {
@@ -499,7 +481,6 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
 	}
 
 	/* Point rq_arg.pages past header */
-	rdma_fix_xdr_pad(&head->arg);
 	rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count];
 	rqstp->rq_arg.page_len = head->arg.page_len;
 	rqstp->rq_arg.page_base = head->arg.page_base;
-- 
cgit v1.2.3


From a97c331f9aa9080706a7835225d9d82e832e0bb6 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 13 Jan 2015 11:03:53 -0500
Subject: svcrdma: Handle additional inline content

Most NFS RPCs place their large payload argument at the end of the
RPC header (eg, NFSv3 WRITE). For NFSv3 WRITE and SYMLINK, RPC/RDMA
sends the complete RPC header inline, and the payload argument in
the read list. Data in the read list is the last part of the XDR
stream.

One important case is not like this, however. NFSv4 COMPOUND is a
counted array of operations. A WRITE operation, with its large data
payload, can appear in the middle of the compound's operations
array. Thus NFSv4 WRITE compounds can have header content after the
WRITE payload.

The Linux client, for example, performs an NFSv4 WRITE like this:

  { PUTFH, WRITE, GETATTR }

Though RFC 5667 is not precise about this, the proper way to convey
this compound is to place the GETATTR inline, _after_ the front of
the RPC header. The receiver inserts the read list payload into the
XDR stream after the initial WRITE arguments, and before the GETATTR
operation, thanks to the value of the read list "position" field.

The Linux client currently sends the GETATTR at the end of the
RPC/RDMA read list, which is incorrect. It will be corrected in the
future.

The Linux server currently rejects NFSv4 compounds with inline
content after the read list. For the above NFSv4 WRITE compound, the
NFS compound header indicates there are three operations, but the
server finds nonsense when it looks in the XDR stream for the third
operation, and the compound fails with OP_ILLEGAL.

Move trailing inline content to the end of the XDR buffer's page
list. This presents incoming NFSv4 WRITE compounds to NFSD in the
same way the socket transport does.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 55 +++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index a345cadad4dd..f9f13a32ddb8 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -364,6 +364,56 @@ rdma_rcl_chunk_count(struct rpcrdma_read_chunk *ch)
 	return count;
 }
 
+/* If there was additional inline content, append it to the end of arg.pages.
+ * Tail copy has to be done after the reader function has determined how many
+ * pages are needed for RDMA READ.
+ */
+static int
+rdma_copy_tail(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head,
+	       u32 position, u32 byte_count, u32 page_offset, int page_no)
+{
+	char *srcp, *destp;
+	int ret;
+
+	ret = 0;
+	srcp = head->arg.head[0].iov_base + position;
+	byte_count = head->arg.head[0].iov_len - position;
+	if (byte_count > PAGE_SIZE) {
+		dprintk("svcrdma: large tail unsupported\n");
+		return 0;
+	}
+
+	/* Fit as much of the tail on the current page as possible */
+	if (page_offset != PAGE_SIZE) {
+		destp = page_address(rqstp->rq_arg.pages[page_no]);
+		destp += page_offset;
+		while (byte_count--) {
+			*destp++ = *srcp++;
+			page_offset++;
+			if (page_offset == PAGE_SIZE && byte_count)
+				goto more;
+		}
+		goto done;
+	}
+
+more:
+	/* Fit the rest on the next page */
+	page_no++;
+	destp = page_address(rqstp->rq_arg.pages[page_no]);
+	while (byte_count--)
+		*destp++ = *srcp++;
+
+	rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1];
+	rqstp->rq_next_page = rqstp->rq_respages + 1;
+
+done:
+	byte_count = head->arg.head[0].iov_len - position;
+	head->arg.page_len += byte_count;
+	head->arg.len += byte_count;
+	head->arg.buflen += byte_count;
+	return 1;
+}
+
 static int rdma_read_chunks(struct svcxprt_rdma *xprt,
 			    struct rpcrdma_msg *rmsgp,
 			    struct svc_rqst *rqstp,
@@ -440,9 +490,14 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt,
 		head->arg.page_len += pad;
 		head->arg.len += pad;
 		head->arg.buflen += pad;
+		page_offset += pad;
 	}
 
 	ret = 1;
+	if (position && position < head->arg.head[0].iov_len)
+		ret = rdma_copy_tail(rqstp, head, position,
+				     byte_count, page_offset, page_no);
+	head->arg.head[0].iov_len = position;
 	head->position = position;
 
  err:
-- 
cgit v1.2.3


From b51f3beecfbbfc946749a91fb444cb8917cf444f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 15 Jan 2015 16:14:02 +0100
Subject: cfg80211: change bandwidth reporting to explicit field

For some reason, we made the bandwidth separate flags, which
is rather confusing - a single rate cannot have different
bandwidths at the same time.

Change this to no longer be flags but use a separate field
for the bandwidth ('bw') instead.

While at it, add support for 5 and 10 MHz rates - these are
reported as regular legacy rates with their real bitrate,
but tagged as 5/10 now to make it easier to distinguish them.

In the nl80211 API, the flags are preserved, but the code
now can also clearly only set a single one of the flags.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath6kl/cfg80211.c |  3 ++-
 drivers/net/wireless/mwifiex/cfg80211.c    | 11 ++++----
 include/net/cfg80211.h                     | 33 ++++++++++++++++++------
 include/uapi/linux/nl80211.h               |  8 ++++++
 net/mac80211/cfg.c                         | 31 +++++++++++++++--------
 net/mac80211/util.c                        | 25 +++++++++++++------
 net/wireless/nl80211.c                     | 40 +++++++++++++++++++++---------
 net/wireless/util.c                        | 24 ++++++++++++++----
 8 files changed, 125 insertions(+), 50 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c
index 44dd6ef923cd..85da63a67faf 100644
--- a/drivers/net/wireless/ath/ath6kl/cfg80211.c
+++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c
@@ -1827,6 +1827,7 @@ static int ath6kl_get_station(struct wiphy *wiphy, struct net_device *dev,
 		}
 
 		sinfo->txrate.flags |= RATE_INFO_FLAGS_MCS;
+		sinfo->txrate.bw = RATE_INFO_BW_20;
 	} else if (is_rate_ht40(rate, &mcs, &sgi)) {
 		if (sgi) {
 			sinfo->txrate.flags |= RATE_INFO_FLAGS_SHORT_GI;
@@ -1835,7 +1836,7 @@ static int ath6kl_get_station(struct wiphy *wiphy, struct net_device *dev,
 			sinfo->txrate.mcs = mcs;
 		}
 
-		sinfo->txrate.flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
+		sinfo->txrate.bw = RATE_INFO_BW_40;
 		sinfo->txrate.flags |= RATE_INFO_FLAGS_MCS;
 	} else {
 		ath6kl_dbg(ATH6KL_DBG_WLAN_CFG,
diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c
index 71312ff52703..1996a8b612d2 100644
--- a/drivers/net/wireless/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/mwifiex/cfg80211.c
@@ -856,16 +856,16 @@ mwifiex_parse_htinfo(struct mwifiex_private *priv, u8 tx_htinfo,
 			/* HT or VHT */
 			switch (tx_htinfo & (BIT(3) | BIT(2))) {
 			case 0:
-				/* This will be 20MHz */
+				rate->bw = RATE_INFO_BW_20;
 				break;
 			case (BIT(2)):
-				rate->flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
+				rate->bw = RATE_INFO_BW_40;
 				break;
 			case (BIT(3)):
-				rate->flags |= RATE_INFO_FLAGS_80_MHZ_WIDTH;
+				rate->bw = RATE_INFO_BW_80;
 				break;
 			case (BIT(3) | BIT(2)):
-				rate->flags |= RATE_INFO_FLAGS_160_MHZ_WIDTH;
+				rate->bw = RATE_INFO_BW_160;
 				break;
 			}
 
@@ -885,8 +885,9 @@ mwifiex_parse_htinfo(struct mwifiex_private *priv, u8 tx_htinfo,
 		if ((tx_htinfo & BIT(0)) && (priv->tx_rate < 16)) {
 			rate->mcs = priv->tx_rate;
 			rate->flags |= RATE_INFO_FLAGS_MCS;
+			rate->bw = RATE_INFO_BW_20;
 			if (tx_htinfo & BIT(1))
-				rate->flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
+				rate->bw = RATE_INFO_BW_40;
 			if (tx_htinfo & BIT(2))
 				rate->flags |= RATE_INFO_FLAGS_SHORT_GI;
 		}
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 0322048fddab..7b44ba0a7632 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -873,20 +873,35 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
  *
  * @RATE_INFO_FLAGS_MCS: mcs field filled with HT MCS
  * @RATE_INFO_FLAGS_VHT_MCS: mcs field filled with VHT MCS
- * @RATE_INFO_FLAGS_40_MHZ_WIDTH: 40 MHz width transmission
- * @RATE_INFO_FLAGS_80_MHZ_WIDTH: 80 MHz width transmission
- * @RATE_INFO_FLAGS_160_MHZ_WIDTH: 160 MHz width transmission
  * @RATE_INFO_FLAGS_SHORT_GI: 400ns guard interval
  * @RATE_INFO_FLAGS_60G: 60GHz MCS
  */
 enum rate_info_flags {
 	RATE_INFO_FLAGS_MCS			= BIT(0),
 	RATE_INFO_FLAGS_VHT_MCS			= BIT(1),
-	RATE_INFO_FLAGS_40_MHZ_WIDTH		= BIT(2),
-	RATE_INFO_FLAGS_80_MHZ_WIDTH		= BIT(3),
-	RATE_INFO_FLAGS_160_MHZ_WIDTH		= BIT(4),
-	RATE_INFO_FLAGS_SHORT_GI		= BIT(5),
-	RATE_INFO_FLAGS_60G			= BIT(6),
+	RATE_INFO_FLAGS_SHORT_GI		= BIT(2),
+	RATE_INFO_FLAGS_60G			= BIT(3),
+};
+
+/**
+ * enum rate_info_bw - rate bandwidth information
+ *
+ * Used by the driver to indicate the rate bandwidth.
+ *
+ * @RATE_INFO_BW_5: 5 MHz bandwidth
+ * @RATE_INFO_BW_10: 10 MHz bandwidth
+ * @RATE_INFO_BW_20: 20 MHz bandwidth
+ * @RATE_INFO_BW_40: 40 MHz bandwidth
+ * @RATE_INFO_BW_80: 80 MHz bandwidth
+ * @RATE_INFO_BW_160: 160 MHz bandwidth
+ */
+enum rate_info_bw {
+	RATE_INFO_BW_5,
+	RATE_INFO_BW_10,
+	RATE_INFO_BW_20,
+	RATE_INFO_BW_40,
+	RATE_INFO_BW_80,
+	RATE_INFO_BW_160,
 };
 
 /**
@@ -898,12 +913,14 @@ enum rate_info_flags {
  * @mcs: mcs index if struct describes a 802.11n bitrate
  * @legacy: bitrate in 100kbit/s for 802.11abg
  * @nss: number of streams (VHT only)
+ * @bw: bandwidth (from &enum rate_info_bw)
  */
 struct rate_info {
 	u8 flags;
 	u8 mcs;
 	u16 legacy;
 	u8 nss;
+	u8 bw;
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 11cdb85ac646..f52797a90816 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2281,6 +2281,12 @@ struct nl80211_sta_flag_update {
  * @NL80211_RATE_INFO_80P80_MHZ_WIDTH: unused - 80+80 is treated the
  *	same as 160 for purposes of the bitrates
  * @NL80211_RATE_INFO_160_MHZ_WIDTH: 160 MHz VHT rate
+ * @NL80211_RATE_INFO_10_MHZ_WIDTH: 10 MHz width - note that this is
+ *	a legacy rate and will be reported as the actual bitrate, i.e.
+ *	half the base (20 MHz) rate
+ * @NL80211_RATE_INFO_5_MHZ_WIDTH: 5 MHz width - note that this is
+ *	a legacy rate and will be reported as the actual bitrate, i.e.
+ *	a quarter of the base (20 MHz) rate
  * @__NL80211_RATE_INFO_AFTER_LAST: internal use
  */
 enum nl80211_rate_info {
@@ -2295,6 +2301,8 @@ enum nl80211_rate_info {
 	NL80211_RATE_INFO_80_MHZ_WIDTH,
 	NL80211_RATE_INFO_80P80_MHZ_WIDTH,
 	NL80211_RATE_INFO_160_MHZ_WIDTH,
+	NL80211_RATE_INFO_10_MHZ_WIDTH,
+	NL80211_RATE_INFO_5_MHZ_WIDTH,
 
 	/* keep last */
 	__NL80211_RATE_INFO_AFTER_LAST,
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 6d5076fbf87a..ff090ef1ea2c 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -428,11 +428,13 @@ void sta_set_rate_info_tx(struct sta_info *sta,
 		rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift);
 	}
 	if (rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH)
-		rinfo->flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
-	if (rate->flags & IEEE80211_TX_RC_80_MHZ_WIDTH)
-		rinfo->flags |= RATE_INFO_FLAGS_80_MHZ_WIDTH;
-	if (rate->flags & IEEE80211_TX_RC_160_MHZ_WIDTH)
-		rinfo->flags |= RATE_INFO_FLAGS_160_MHZ_WIDTH;
+		rinfo->bw = RATE_INFO_BW_40;
+	else if (rate->flags & IEEE80211_TX_RC_80_MHZ_WIDTH)
+		rinfo->bw = RATE_INFO_BW_80;
+	else if (rate->flags & IEEE80211_TX_RC_160_MHZ_WIDTH)
+		rinfo->bw = RATE_INFO_BW_160;
+	else
+		rinfo->bw = RATE_INFO_BW_20;
 	if (rate->flags & IEEE80211_TX_RC_SHORT_GI)
 		rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI;
 }
@@ -459,14 +461,21 @@ void sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
 		rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift);
 	}
 
-	if (sta->last_rx_rate_flag & RX_FLAG_40MHZ)
-		rinfo->flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
 	if (sta->last_rx_rate_flag & RX_FLAG_SHORT_GI)
 		rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI;
-	if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_80MHZ)
-		rinfo->flags |= RATE_INFO_FLAGS_80_MHZ_WIDTH;
-	if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_160MHZ)
-		rinfo->flags |= RATE_INFO_FLAGS_160_MHZ_WIDTH;
+
+	if (sta->last_rx_rate_flag & RX_FLAG_5MHZ)
+		rinfo->bw = RATE_INFO_BW_5;
+	else if (sta->last_rx_rate_flag & RX_FLAG_10MHZ)
+		rinfo->bw = RATE_INFO_BW_10;
+	else if (sta->last_rx_rate_flag & RX_FLAG_40MHZ)
+		rinfo->bw = RATE_INFO_BW_40;
+	else if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_80MHZ)
+		rinfo->bw = RATE_INFO_BW_80;
+	else if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_160MHZ)
+		rinfo->bw = RATE_INFO_BW_160;
+	else
+		rinfo->bw = RATE_INFO_BW_20;
 }
 
 static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev,
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index db7216124736..fbd37d43dfce 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -2541,7 +2541,9 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
 		ri.mcs = status->rate_idx;
 		ri.flags |= RATE_INFO_FLAGS_MCS;
 		if (status->flag & RX_FLAG_40MHZ)
-			ri.flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
+			ri.bw = RATE_INFO_BW_40;
+		else
+			ri.bw = RATE_INFO_BW_20;
 		if (status->flag & RX_FLAG_SHORT_GI)
 			ri.flags |= RATE_INFO_FLAGS_SHORT_GI;
 	} else if (status->flag & RX_FLAG_VHT) {
@@ -2549,11 +2551,13 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
 		ri.mcs = status->rate_idx;
 		ri.nss = status->vht_nss;
 		if (status->flag & RX_FLAG_40MHZ)
-			ri.flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
-		if (status->vht_flag & RX_VHT_FLAG_80MHZ)
-			ri.flags |= RATE_INFO_FLAGS_80_MHZ_WIDTH;
-		if (status->vht_flag & RX_VHT_FLAG_160MHZ)
-			ri.flags |= RATE_INFO_FLAGS_160_MHZ_WIDTH;
+			ri.bw = RATE_INFO_BW_40;
+		else if (status->vht_flag & RX_VHT_FLAG_80MHZ)
+			ri.bw = RATE_INFO_BW_80;
+		else if (status->vht_flag & RX_VHT_FLAG_160MHZ)
+			ri.bw = RATE_INFO_BW_160;
+		else
+			ri.bw = RATE_INFO_BW_20;
 		if (status->flag & RX_FLAG_SHORT_GI)
 			ri.flags |= RATE_INFO_FLAGS_SHORT_GI;
 	} else {
@@ -2561,10 +2565,15 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
 		int shift = 0;
 		int bitrate;
 
-		if (status->flag & RX_FLAG_10MHZ)
+		if (status->flag & RX_FLAG_10MHZ) {
 			shift = 1;
-		if (status->flag & RX_FLAG_5MHZ)
+			ri.bw = RATE_INFO_BW_10;
+		} else if (status->flag & RX_FLAG_5MHZ) {
 			shift = 2;
+			ri.bw = RATE_INFO_BW_5;
+		} else {
+			ri.bw = RATE_INFO_BW_20;
+		}
 
 		sband = local->hw.wiphy->bands[status->band];
 		bitrate = sband->bitrates[status->rate_idx].bitrate;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 8998484ea970..8e56eeb583aa 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3578,6 +3578,7 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info,
 	struct nlattr *rate;
 	u32 bitrate;
 	u16 bitrate_compat;
+	enum nl80211_attrs rate_flg;
 
 	rate = nla_nest_start(msg, attr);
 	if (!rate)
@@ -3594,12 +3595,36 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info,
 	    nla_put_u16(msg, NL80211_RATE_INFO_BITRATE, bitrate_compat))
 		return false;
 
+	switch (info->bw) {
+	case RATE_INFO_BW_5:
+		rate_flg = NL80211_RATE_INFO_5_MHZ_WIDTH;
+		break;
+	case RATE_INFO_BW_10:
+		rate_flg = NL80211_RATE_INFO_10_MHZ_WIDTH;
+		break;
+	default:
+		WARN_ON(1);
+		/* fall through */
+	case RATE_INFO_BW_20:
+		rate_flg = 0;
+		break;
+	case RATE_INFO_BW_40:
+		rate_flg = NL80211_RATE_INFO_40_MHZ_WIDTH;
+		break;
+	case RATE_INFO_BW_80:
+		rate_flg = NL80211_RATE_INFO_80_MHZ_WIDTH;
+		break;
+	case RATE_INFO_BW_160:
+		rate_flg = NL80211_RATE_INFO_160_MHZ_WIDTH;
+		break;
+	}
+
+	if (rate_flg && nla_put_flag(msg, rate_flg))
+		return false;
+
 	if (info->flags & RATE_INFO_FLAGS_MCS) {
 		if (nla_put_u8(msg, NL80211_RATE_INFO_MCS, info->mcs))
 			return false;
-		if (info->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH &&
-		    nla_put_flag(msg, NL80211_RATE_INFO_40_MHZ_WIDTH))
-			return false;
 		if (info->flags & RATE_INFO_FLAGS_SHORT_GI &&
 		    nla_put_flag(msg, NL80211_RATE_INFO_SHORT_GI))
 			return false;
@@ -3608,15 +3633,6 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info,
 			return false;
 		if (nla_put_u8(msg, NL80211_RATE_INFO_VHT_NSS, info->nss))
 			return false;
-		if (info->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH &&
-		    nla_put_flag(msg, NL80211_RATE_INFO_40_MHZ_WIDTH))
-			return false;
-		if (info->flags & RATE_INFO_FLAGS_80_MHZ_WIDTH &&
-		    nla_put_flag(msg, NL80211_RATE_INFO_80_MHZ_WIDTH))
-			return false;
-		if (info->flags & RATE_INFO_FLAGS_160_MHZ_WIDTH &&
-		    nla_put_flag(msg, NL80211_RATE_INFO_160_MHZ_WIDTH))
-			return false;
 		if (info->flags & RATE_INFO_FLAGS_SHORT_GI &&
 		    nla_put_flag(msg, NL80211_RATE_INFO_SHORT_GI))
 			return false;
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 6942d48f1ac5..3535e8ade48f 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1073,9 +1073,24 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate)
 	if (WARN_ON_ONCE(rate->mcs > 9))
 		return 0;
 
-	idx = rate->flags & RATE_INFO_FLAGS_160_MHZ_WIDTH ? 3 :
-		  rate->flags & RATE_INFO_FLAGS_80_MHZ_WIDTH ? 2 :
-		  rate->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH ? 1 : 0;
+	switch (rate->bw) {
+	case RATE_INFO_BW_160:
+		idx = 3;
+		break;
+	case RATE_INFO_BW_80:
+		idx = 2;
+		break;
+	case RATE_INFO_BW_40:
+		idx = 1;
+		break;
+	case RATE_INFO_BW_5:
+	case RATE_INFO_BW_10:
+	default:
+		WARN_ON(1);
+		/* fall through */
+	case RATE_INFO_BW_20:
+		idx = 0;
+	}
 
 	bitrate = base[idx][rate->mcs];
 	bitrate *= rate->nss;
@@ -1106,8 +1121,7 @@ u32 cfg80211_calculate_bitrate(struct rate_info *rate)
 	modulation = rate->mcs & 7;
 	streams = (rate->mcs >> 3) + 1;
 
-	bitrate = (rate->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH) ?
-			13500000 : 6500000;
+	bitrate = (rate->bw == RATE_INFO_BW_40) ? 13500000 : 6500000;
 
 	if (modulation < 4)
 		bitrate *= (modulation + 1);
-- 
cgit v1.2.3


From 5055c371bfd53fd369b895051b541318c2bad495 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 14 Jan 2015 15:17:06 -0800
Subject: ipv4: per cpu uncached list

RAW sockets with hdrinc suffer from contention on rt_uncached_lock
spinlock.

One solution is to use percpu lists, since most routes are destroyed
by the cpu that created them.

It is unclear why we even have to put these routes in uncached_list,
as all outgoing packets should be freed when a device is dismantled.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Fixes: caacf05e5ad1 ("ipv4: Properly purge netdev references on uncached routes.")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/route.h |  2 ++
 net/ipv4/route.c    | 46 +++++++++++++++++++++++++++++++++-------------
 2 files changed, 35 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/include/net/route.h b/include/net/route.h
index b17cf28f996e..fe22d03afb6a 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -46,6 +46,7 @@
 
 struct fib_nh;
 struct fib_info;
+struct uncached_list;
 struct rtable {
 	struct dst_entry	dst;
 
@@ -64,6 +65,7 @@ struct rtable {
 	u32			rt_pmtu;
 
 	struct list_head	rt_uncached;
+	struct uncached_list	*rt_uncached_list;
 };
 
 static inline bool rt_is_input_route(const struct rtable *rt)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6a2155b02602..ce112d0f2698 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1325,14 +1325,22 @@ static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
 	return ret;
 }
 
-static DEFINE_SPINLOCK(rt_uncached_lock);
-static LIST_HEAD(rt_uncached_list);
+struct uncached_list {
+	spinlock_t		lock;
+	struct list_head	head;
+};
+
+static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);
 
 static void rt_add_uncached_list(struct rtable *rt)
 {
-	spin_lock_bh(&rt_uncached_lock);
-	list_add_tail(&rt->rt_uncached, &rt_uncached_list);
-	spin_unlock_bh(&rt_uncached_lock);
+	struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list);
+
+	rt->rt_uncached_list = ul;
+
+	spin_lock_bh(&ul->lock);
+	list_add_tail(&rt->rt_uncached, &ul->head);
+	spin_unlock_bh(&ul->lock);
 }
 
 static void ipv4_dst_destroy(struct dst_entry *dst)
@@ -1340,27 +1348,32 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
 	struct rtable *rt = (struct rtable *) dst;
 
 	if (!list_empty(&rt->rt_uncached)) {
-		spin_lock_bh(&rt_uncached_lock);
+		struct uncached_list *ul = rt->rt_uncached_list;
+
+		spin_lock_bh(&ul->lock);
 		list_del(&rt->rt_uncached);
-		spin_unlock_bh(&rt_uncached_lock);
+		spin_unlock_bh(&ul->lock);
 	}
 }
 
 void rt_flush_dev(struct net_device *dev)
 {
-	if (!list_empty(&rt_uncached_list)) {
-		struct net *net = dev_net(dev);
-		struct rtable *rt;
+	struct net *net = dev_net(dev);
+	struct rtable *rt;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
 
-		spin_lock_bh(&rt_uncached_lock);
-		list_for_each_entry(rt, &rt_uncached_list, rt_uncached) {
+		spin_lock_bh(&ul->lock);
+		list_for_each_entry(rt, &ul->head, rt_uncached) {
 			if (rt->dst.dev != dev)
 				continue;
 			rt->dst.dev = net->loopback_dev;
 			dev_hold(rt->dst.dev);
 			dev_put(dev);
 		}
-		spin_unlock_bh(&rt_uncached_lock);
+		spin_unlock_bh(&ul->lock);
 	}
 }
 
@@ -2717,6 +2730,7 @@ struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
 int __init ip_rt_init(void)
 {
 	int rc = 0;
+	int cpu;
 
 	ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
 	if (!ip_idents)
@@ -2724,6 +2738,12 @@ int __init ip_rt_init(void)
 
 	prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
 
+	for_each_possible_cpu(cpu) {
+		struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
+
+		INIT_LIST_HEAD(&ul->head);
+		spin_lock_init(&ul->lock);
+	}
 #ifdef CONFIG_IP_ROUTE_CLASSID
 	ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
 	if (!ip_rt_acct)
-- 
cgit v1.2.3


From 12d872511cdf71d86a2547b32c0d86133d1d0f41 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 15 Jan 2015 16:29:12 +0100
Subject: bridge: use MDBA_SET_ENTRY_MAX for maxtype in nlmsg_parse()

This is just a cleanup, because in the current code MDBA_SET_ENTRY_MAX ==
MDBA_SET_ENTRY.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_mdb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 5df05269d17a..fed61c971b17 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -276,7 +276,7 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct net_device *dev;
 	int err;
 
-	err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY, NULL);
+	err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY_MAX, NULL);
 	if (err < 0)
 		return err;
 
-- 
cgit v1.2.3


From f812116b174e59a350acc8e4856213a166a91222 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Thu, 15 Jan 2015 13:18:40 -0500
Subject: ip: zero sockaddr returned on error queue

The sockaddr is returned in IP(V6)_RECVERR as part of errhdr. That
structure is defined and allocated on the stack as

    struct {
            struct sock_extended_err ee;
            struct sockaddr_in(6)    offender;
    } errhdr;

The second part is only initialized for certain SO_EE_ORIGIN values.
Always initialize it completely.

An MTU exceeded error on a SOCK_RAW/IPPROTO_RAW is one example that
would return uninitialized bytes.

Signed-off-by: Willem de Bruijn <willemb@google.com>

----

Also verified that there is no padding between errhdr.ee and
errhdr.offender that could leak additional kernel data.
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_sockglue.c |  8 ++------
 net/ipv6/datagram.c    | 10 +++-------
 2 files changed, 5 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 8a89c738b7a3..6b85adb05003 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -461,17 +461,13 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 
 	memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
 	sin = &errhdr.offender;
-	sin->sin_family = AF_UNSPEC;
+	memset(sin, 0, sizeof(*sin));
 
 	if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
 	    ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin)) {
-		struct inet_sock *inet = inet_sk(sk);
-
 		sin->sin_family = AF_INET;
 		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
-		sin->sin_port = 0;
-		memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
-		if (inet->cmsg_flags)
+		if (inet_sk(sk)->cmsg_flags)
 			ip_cmsg_recv(msg, skb);
 	}
 
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 100c589a2a6c..49f5e73db122 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -393,11 +393,10 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 
 	memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
 	sin = &errhdr.offender;
-	sin->sin6_family = AF_UNSPEC;
+	memset(sin, 0, sizeof(*sin));
+
 	if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) {
 		sin->sin6_family = AF_INET6;
-		sin->sin6_flowinfo = 0;
-		sin->sin6_port = 0;
 		if (np->rxopt.all) {
 			if (serr->ee.ee_origin != SO_EE_ORIGIN_ICMP &&
 			    serr->ee.ee_origin != SO_EE_ORIGIN_ICMP6)
@@ -412,12 +411,9 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 				ipv6_iface_scope_id(&sin->sin6_addr,
 						    IP6CB(skb)->iif);
 		} else {
-			struct inet_sock *inet = inet_sk(sk);
-
 			ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
 					       &sin->sin6_addr);
-			sin->sin6_scope_id = 0;
-			if (inet->cmsg_flags)
+			if (inet_sk(sk)->cmsg_flags)
 				ip_cmsg_recv(msg, skb);
 		}
 	}
-- 
cgit v1.2.3


From ac64da0b83d82abe62f78b3d0e21cca31aea24fa Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 15 Jan 2015 17:04:22 -0800
Subject: net: rps: fix cpu unplug

softnet_data.input_pkt_queue is protected by a spinlock that
we must hold when transferring packets from victim queue to an active
one. This is because other cpus could still be trying to enqueue packets
into victim queue.

A second problem is that when we transfert the NAPI poll_list from
victim to current cpu, we absolutely need to special case the percpu
backlog, because we do not want to add complex locking to protect
process_queue : Only owner cpu is allowed to manipulate it, unless cpu
is offline.

Based on initial patch from Prasad Sodagudi & Subash Abhinov
Kasiviswanathan.

This version is better because we do not slow down packet processing,
only make migration safer.

Reported-by: Prasad Sodagudi <psodagud@codeaurora.org>
Reported-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 683d493aa1bf..171420e75b03 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -7072,10 +7072,20 @@ static int dev_cpu_callback(struct notifier_block *nfb,
 		oldsd->output_queue = NULL;
 		oldsd->output_queue_tailp = &oldsd->output_queue;
 	}
-	/* Append NAPI poll list from offline CPU. */
-	if (!list_empty(&oldsd->poll_list)) {
-		list_splice_init(&oldsd->poll_list, &sd->poll_list);
-		raise_softirq_irqoff(NET_RX_SOFTIRQ);
+	/* Append NAPI poll list from offline CPU, with one exception :
+	 * process_backlog() must be called by cpu owning percpu backlog.
+	 * We properly handle process_queue & input_pkt_queue later.
+	 */
+	while (!list_empty(&oldsd->poll_list)) {
+		struct napi_struct *napi = list_first_entry(&oldsd->poll_list,
+							    struct napi_struct,
+							    poll_list);
+
+		list_del_init(&napi->poll_list);
+		if (napi->poll == process_backlog)
+			napi->state = 0;
+		else
+			____napi_schedule(sd, napi);
 	}
 
 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
@@ -7086,7 +7096,7 @@ static int dev_cpu_callback(struct notifier_block *nfb,
 		netif_rx_internal(skb);
 		input_queue_head_incr(oldsd);
 	}
-	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
+	while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
 		netif_rx_internal(skb);
 		input_queue_head_incr(oldsd);
 	}
-- 
cgit v1.2.3


From 919d9db9521862d01712d9be25cd6ebfcc353eed Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 16 Jan 2015 17:23:48 +1100
Subject: netlink: Fix netlink_insert EADDRINUSE error

The patch c5adde9468b0714a051eac7f9666f23eb10b61f7 ("netlink:
eliminate nl_sk_hash_lock") introduced a bug where the EADDRINUSE
error has been replaced by ENOMEM.  This patch rectifies that
problem.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 01b702d63457..7a94185bde6b 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1050,7 +1050,7 @@ netlink_update_listeners(struct sock *sk)
 static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
 {
 	struct netlink_table *table = &nl_table[sk->sk_protocol];
-	int err = -EADDRINUSE;
+	int err;
 
 	lock_sock(sk);
 
@@ -1065,10 +1065,13 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
 
 	nlk_sk(sk)->portid = portid;
 	sock_hold(sk);
-	if (__netlink_insert(table, sk, net))
-		err = 0;
-	else
+
+	err = 0;
+	if (!__netlink_insert(table, sk, net)) {
+		err = -EADDRINUSE;
 		sock_put(sk);
+	}
+
 err:
 	release_sock(sk);
 	return err;
-- 
cgit v1.2.3


From 0026b6551b51a9520b912f41b8d447b89a825f5a Mon Sep 17 00:00:00 2001
From: Rickard Strandqvist <rickard_strandqvist@spectrumdigital.se>
Date: Sun, 4 Jan 2015 16:55:14 +0100
Subject: Bluetooth: Remove unused function

Remove the function hci_conn_change_link_key() that is not used anywhere.

This was partially found by using a static code analysis program called
cppcheck.

Signed-off-by: Rickard Strandqvist <rickard_strandqvist@spectrumdigital.se>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h |  1 -
 net/bluetooth/hci_conn.c         | 15 ---------------
 2 files changed, 16 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 1f21fe48b38e..7777124bff55 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -781,7 +781,6 @@ int hci_conn_check_link_mode(struct hci_conn *conn);
 int hci_conn_check_secure(struct hci_conn *conn, __u8 sec_level);
 int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type,
 		      bool initiator);
-int hci_conn_change_link_key(struct hci_conn *conn);
 int hci_conn_switch_role(struct hci_conn *conn, __u8 role);
 
 void hci_conn_enter_active_mode(struct hci_conn *conn, __u8 force_active);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 2e724e0b75b9..c9b8fa544785 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -1084,21 +1084,6 @@ int hci_conn_check_secure(struct hci_conn *conn, __u8 sec_level)
 }
 EXPORT_SYMBOL(hci_conn_check_secure);
 
-/* Change link key */
-int hci_conn_change_link_key(struct hci_conn *conn)
-{
-	BT_DBG("hcon %p", conn);
-
-	if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->flags)) {
-		struct hci_cp_change_conn_link_key cp;
-		cp.handle = cpu_to_le16(conn->handle);
-		hci_send_cmd(conn->hdev, HCI_OP_CHANGE_CONN_LINK_KEY,
-			     sizeof(cp), &cp);
-	}
-
-	return 0;
-}
-
 /* Switch role */
 int hci_conn_switch_role(struct hci_conn *conn, __u8 role)
 {
-- 
cgit v1.2.3


From 5e06a9e8b6db441371dadc02f3e2348196f408dd Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 16 Jan 2015 13:27:56 +0100
Subject: mac80211: remove doubled semicolon

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/status.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 788707f05516..e679b7c9b160 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -730,7 +730,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 	struct ieee80211_bar *bar;
 	int rtap_len;
 	int shift = 0;
-	int tid = IEEE80211_NUM_TIDS;;
+	int tid = IEEE80211_NUM_TIDS;
 
 	rates_idx = ieee80211_tx_get_rates(hw, info, &retry_count);
 
-- 
cgit v1.2.3


From 57007121228347515187049a9c65f7cbeb923dd1 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 16 Jan 2015 21:05:02 +0100
Subject: cfg80211: fix checking nl80211_send_station() return value

The return value from nl80211_send_station() is the length of the
skb, or a negative error, so abort sending the message only when
the return value was negative.

This fixes the ibss_rsn wpa_supplicant test case.

Reported-by: Jouni Malinen <j@w1.fi>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 8e56eeb583aa..c5661c5ad8f3 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -11788,7 +11788,7 @@ void cfg80211_del_sta_sinfo(struct net_device *dev, const u8 *mac_addr,
 		return;
 
 	if (nl80211_send_station(msg, NL80211_CMD_DEL_STATION, 0, 0, 0,
-				 rdev, dev, mac_addr, sinfo)) {
+				 rdev, dev, mac_addr, sinfo) < 0) {
 		nlmsg_free(msg);
 		return;
 	}
-- 
cgit v1.2.3


From 5ad6300524c0332ac67e912c20d6e5cf262ba58f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 16 Jan 2015 11:37:13 +0100
Subject: genetlink: disallow subscribing to unknown mcast groups

Jeff Layton reported that he could trigger the multicast unbind warning
in generic netlink using trinity. I originally thought it was a race
condition between unregistering the generic netlink family and closing
the socket, but there's a far simpler explanation: genetlink currently
allows subscribing to groups that don't (yet) exist, and the warning is
triggered when unsubscribing again while the group still doesn't exist.

Originally, I had a warning in the subscribe case and accepted it out of
userspace API concerns, but the warning was of course wrong and removed
later.

However, I now think that allowing userspace to subscribe to groups that
don't exist is wrong and could possibly become a security problem:
Consider a (new) genetlink family implementing a permission check in
the mcast_bind() function similar to the like the audit code does today;
it would be possible to bypass the permission check by guessing the ID
and subscribing to the group it exists. This is only possible in case a
family like that would be dynamically loaded, but it doesn't seem like a
huge stretch, for example wireless may be loaded when you plug in a USB
device.

To avoid this reject such subscription attempts.

If this ends up causing userspace issues we may need to add a workaround
in af_netlink to deny such requests but not return an error.

Reported-by: Jeff Layton <jeff.layton@primarydata.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/genetlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 2e11061ef885..c18d3f5624b2 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -985,7 +985,7 @@ static struct genl_multicast_group genl_ctrl_groups[] = {
 
 static int genl_bind(struct net *net, int group)
 {
-	int i, err = 0;
+	int i, err = -ENOENT;
 
 	down_read(&cb_lock);
 	for (i = 0; i < GENL_FAM_TAB_SIZE; i++) {
-- 
cgit v1.2.3


From ee1c244219fd652964710a6cc3e4f922e86aa492 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 16 Jan 2015 11:37:14 +0100
Subject: genetlink: synchronize socket closing and family removal

In addition to the problem Jeff Layton reported, I looked at the code
and reproduced the same warning by subscribing and removing the genl
family with a socket still open. This is a fairly tricky race which
originates in the fact that generic netlink allows the family to go
away while sockets are still open - unlike regular netlink which has
a module refcount for every open socket so in general this cannot be
triggered.

Trying to resolve this issue by the obvious locking isn't possible as
it will result in deadlocks between unregistration and group unbind
notification (which incidentally lockdep doesn't find due to the home
grown locking in the netlink table.)

To really resolve this, introduce a "closing socket" reference counter
(for generic netlink only, as it's the only affected family) in the
core netlink code and use that in generic netlink to wait for all the
sockets that are being closed at the same time as a generic netlink
family is removed.

This fixes the race that when a socket is closed, it will should call
the unbind, but if the family is removed at the same time the unbind
will not find it, leading to the warning. The real problem though is
that in this case the unbind could actually find a new family that is
registered to have a multicast group with the same ID, and call its
mcast_unbind() leading to confusing.

Also remove the warning since it would still trigger, but is now no
longer a problem.

This also moves the code in af_netlink.c to before unreferencing the
module to avoid having the same problem in the normal non-genl case.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/genetlink.h |  4 ++++
 include/net/genetlink.h   |  5 ++++-
 net/netlink/af_netlink.c  | 24 +++++++++++++++++-------
 net/netlink/af_netlink.h  |  1 +
 net/netlink/genetlink.c   | 16 +++++++++-------
 5 files changed, 35 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h
index 55b685719d52..09460d6d6682 100644
--- a/include/linux/genetlink.h
+++ b/include/linux/genetlink.h
@@ -11,6 +11,10 @@ extern void genl_unlock(void);
 extern int lockdep_genl_is_held(void);
 #endif
 
+/* for synchronisation between af_netlink and genetlink */
+extern atomic_t genl_sk_destructing_cnt;
+extern wait_queue_head_t genl_sk_destructing_waitq;
+
 /**
  * rcu_dereference_genl - rcu_dereference with debug checking
  * @p: The pointer to read, prior to dereferencing
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index 2ea2c55bdc87..6c92415311ca 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -35,7 +35,10 @@ struct genl_info;
  *	undo operations done by pre_doit, for example release locks
  * @mcast_bind: a socket bound to the given multicast group (which
  *	is given as the offset into the groups array)
- * @mcast_unbind: a socket was unbound from the given multicast group
+ * @mcast_unbind: a socket was unbound from the given multicast group.
+ *	Note that unbind() will not be called symmetrically if the
+ *	generic netlink family is removed while there are still open
+ *	sockets.
  * @attrbuf: buffer to store parsed attributes
  * @family_list: family list
  * @mcgrps: multicast groups used by this family (private)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 84ea76ca3f1f..02fdde28dada 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -61,6 +61,7 @@
 #include <linux/rhashtable.h>
 #include <asm/cacheflush.h>
 #include <linux/hash.h>
+#include <linux/genetlink.h>
 
 #include <net/net_namespace.h>
 #include <net/sock.h>
@@ -1095,6 +1096,8 @@ static void netlink_remove(struct sock *sk)
 		__sk_del_bind_node(sk);
 		netlink_update_listeners(sk);
 	}
+	if (sk->sk_protocol == NETLINK_GENERIC)
+		atomic_inc(&genl_sk_destructing_cnt);
 	netlink_table_ungrab();
 }
 
@@ -1211,6 +1214,20 @@ static int netlink_release(struct socket *sock)
 	 * will be purged.
 	 */
 
+	/* must not acquire netlink_table_lock in any way again before unbind
+	 * and notifying genetlink is done as otherwise it might deadlock
+	 */
+	if (nlk->netlink_unbind) {
+		int i;
+
+		for (i = 0; i < nlk->ngroups; i++)
+			if (test_bit(i, nlk->groups))
+				nlk->netlink_unbind(sock_net(sk), i + 1);
+	}
+	if (sk->sk_protocol == NETLINK_GENERIC &&
+	    atomic_dec_return(&genl_sk_destructing_cnt) == 0)
+		wake_up(&genl_sk_destructing_waitq);
+
 	sock->sk = NULL;
 	wake_up_interruptible_all(&nlk->wait);
 
@@ -1246,13 +1263,6 @@ static int netlink_release(struct socket *sock)
 		netlink_table_ungrab();
 	}
 
-	if (nlk->netlink_unbind) {
-		int i;
-
-		for (i = 0; i < nlk->ngroups; i++)
-			if (test_bit(i, nlk->groups))
-				nlk->netlink_unbind(sock_net(sk), i + 1);
-	}
 	kfree(nlk->groups);
 	nlk->groups = NULL;
 
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index f123a88496f8..f1c31b39aa3e 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -2,6 +2,7 @@
 #define _AF_NETLINK_H
 
 #include <linux/rhashtable.h>
+#include <linux/atomic.h>
 #include <net/sock.h>
 
 #define NLGRPSZ(x)	(ALIGN(x, sizeof(unsigned long) * 8) / 8)
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index c18d3f5624b2..ee57459fc258 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -23,6 +23,9 @@
 static DEFINE_MUTEX(genl_mutex); /* serialization of message processing */
 static DECLARE_RWSEM(cb_lock);
 
+atomic_t genl_sk_destructing_cnt = ATOMIC_INIT(0);
+DECLARE_WAIT_QUEUE_HEAD(genl_sk_destructing_waitq);
+
 void genl_lock(void)
 {
 	mutex_lock(&genl_mutex);
@@ -435,15 +438,18 @@ int genl_unregister_family(struct genl_family *family)
 
 	genl_lock_all();
 
-	genl_unregister_mc_groups(family);
-
 	list_for_each_entry(rc, genl_family_chain(family->id), family_list) {
 		if (family->id != rc->id || strcmp(rc->name, family->name))
 			continue;
 
+		genl_unregister_mc_groups(family);
+
 		list_del(&rc->family_list);
 		family->n_ops = 0;
-		genl_unlock_all();
+		up_write(&cb_lock);
+		wait_event(genl_sk_destructing_waitq,
+			   atomic_read(&genl_sk_destructing_cnt) == 0);
+		genl_unlock();
 
 		kfree(family->attrbuf);
 		genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0);
@@ -1014,7 +1020,6 @@ static int genl_bind(struct net *net, int group)
 static void genl_unbind(struct net *net, int group)
 {
 	int i;
-	bool found = false;
 
 	down_read(&cb_lock);
 	for (i = 0; i < GENL_FAM_TAB_SIZE; i++) {
@@ -1027,14 +1032,11 @@ static void genl_unbind(struct net *net, int group)
 
 				if (f->mcast_unbind)
 					f->mcast_unbind(net, fam_grp);
-				found = true;
 				break;
 			}
 		}
 	}
 	up_read(&cb_lock);
-
-	WARN_ON(!found);
 }
 
 static int __net_init genl_pernet_init(struct net *net)
-- 
cgit v1.2.3


From 02dba4388d1691a087f40fe8acd2e1ffd577a07f Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Wed, 14 Jan 2015 20:02:25 -0800
Subject: bridge: fix setlink/dellink notifications

problems with bridge getlink/setlink notifications today:
        - bridge setlink generates two notifications to userspace
                - one from the bridge driver
                - one from rtnetlink.c (rtnl_bridge_notify)
        - dellink generates one notification from rtnetlink.c. Which
	means bridge setlink and dellink notifications are not
	consistent

        - Looking at the code it appears,
	If both BRIDGE_FLAGS_MASTER and BRIDGE_FLAGS_SELF were set,
        the size calculation in rtnl_bridge_notify can be wrong.
        Example: if you set both BRIDGE_FLAGS_MASTER and BRIDGE_FLAGS_SELF
        in a setlink request to rocker dev, rtnl_bridge_notify will
	allocate skb for one set of bridge attributes, but,
	both the bridge driver and rocker dev will try to add
	attributes resulting in twice the number of attributes
	being added to the skb.  (rocker dev calls ndo_dflt_bridge_getlink)

There are multiple options:
1) Generate one notification including all attributes from master and self:
   But, I don't think it will work, because both master and self may use
   the same attributes/policy. Cannot pack the same set of attributes in a
   single notification from both master and slave (duplicate attributes).

2) Generate one notification from master and the other notification from
   self (This seems to be ideal):
     For master: the master driver will send notification (bridge in this
	example)
     For self: the self driver will send notification (rocker in the above
	example. It can use helpers from rtnetlink.c to do so. Like the
	ndo_dflt_bridge_getlink api).

This patch implements 2) (leaving the 'rtnl_bridge_notify' around to be used
with 'self').

v1->v2 :
	- rtnl_bridge_notify is now called only for self,
	so, remove 'BRIDGE_FLAGS_SELF' check and cleanup a few things
	- rtnl_bridge_dellink used to always send a RTM_NEWLINK msg
	earlier. So, I have changed the notification from br_dellink to
	go as RTM_NEWLINK

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netlink.c |  5 +++++
 net/core/rtnetlink.c    | 45 +++++++++++++++++++++------------------------
 2 files changed, 26 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 66ece91ee165..163950b10d8c 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -569,6 +569,11 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh)
 
 	err = br_afspec((struct net_bridge *)netdev_priv(dev), p,
 			afspec, RTM_DELLINK);
+	if (err == 0)
+		/* Send RTM_NEWLINK because userspace
+		 * expects RTM_NEWLINK for vlan dels
+		 */
+		br_ifinfo_notify(RTM_NEWLINK, p);
 
 	return err;
 }
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 6a6cdade1676..eadc5c0e2dfa 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2880,32 +2880,24 @@ static inline size_t bridge_nlmsg_size(void)
 		+ nla_total_size(sizeof(u16));	/* IFLA_BRIDGE_MODE */
 }
 
-static int rtnl_bridge_notify(struct net_device *dev, u16 flags)
+static int rtnl_bridge_notify(struct net_device *dev)
 {
 	struct net *net = dev_net(dev);
-	struct net_device *br_dev = netdev_master_upper_dev_get(dev);
 	struct sk_buff *skb;
 	int err = -EOPNOTSUPP;
 
+	if (!dev->netdev_ops->ndo_bridge_getlink)
+		return 0;
+
 	skb = nlmsg_new(bridge_nlmsg_size(), GFP_ATOMIC);
 	if (!skb) {
 		err = -ENOMEM;
 		goto errout;
 	}
 
-	if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) &&
-	    br_dev && br_dev->netdev_ops->ndo_bridge_getlink) {
-		err = br_dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0);
-		if (err < 0)
-			goto errout;
-	}
-
-	if ((flags & BRIDGE_FLAGS_SELF) &&
-	    dev->netdev_ops->ndo_bridge_getlink) {
-		err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0);
-		if (err < 0)
-			goto errout;
-	}
+	err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0);
+	if (err < 0)
+		goto errout;
 
 	rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
 	return 0;
@@ -2975,16 +2967,18 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
 			err = -EOPNOTSUPP;
 		else
 			err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh);
-
-		if (!err)
+		if (!err) {
 			flags &= ~BRIDGE_FLAGS_SELF;
+
+			/* Generate event to notify upper layer of bridge
+			 * change
+			 */
+			err = rtnl_bridge_notify(dev);
+		}
 	}
 
 	if (have_flags)
 		memcpy(nla_data(attr), &flags, sizeof(flags));
-	/* Generate event to notify upper layer of bridge change */
-	if (!err)
-		err = rtnl_bridge_notify(dev, oflags);
 out:
 	return err;
 }
@@ -3049,15 +3043,18 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
 		else
 			err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh);
 
-		if (!err)
+		if (!err) {
 			flags &= ~BRIDGE_FLAGS_SELF;
+
+			/* Generate event to notify upper layer of bridge
+			 * change
+			 */
+			err = rtnl_bridge_notify(dev);
+		}
 	}
 
 	if (have_flags)
 		memcpy(nla_data(attr), &flags, sizeof(flags));
-	/* Generate event to notify upper layer of bridge change */
-	if (!err)
-		err = rtnl_bridge_notify(dev, oflags);
 out:
 	return err;
 }
-- 
cgit v1.2.3


From d23b8ad8ab23f5a18b91e2396fb63d10f66b08d6 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Thu, 15 Jan 2015 09:52:39 +0100
Subject: tc: add BPF based action

This action provides a possibility to exec custom BPF code.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_bpf.h        |  25 +++++
 include/uapi/linux/tc_act/Kbuild   |   1 +
 include/uapi/linux/tc_act/tc_bpf.h |  31 ++++++
 net/sched/Kconfig                  |  12 +++
 net/sched/Makefile                 |   1 +
 net/sched/act_bpf.c                | 205 +++++++++++++++++++++++++++++++++++++
 6 files changed, 275 insertions(+)
 create mode 100644 include/net/tc_act/tc_bpf.h
 create mode 100644 include/uapi/linux/tc_act/tc_bpf.h
 create mode 100644 net/sched/act_bpf.c

(limited to 'net')

diff --git a/include/net/tc_act/tc_bpf.h b/include/net/tc_act/tc_bpf.h
new file mode 100644
index 000000000000..86a070ffc930
--- /dev/null
+++ b/include/net/tc_act/tc_bpf.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __NET_TC_BPF_H
+#define __NET_TC_BPF_H
+
+#include <linux/filter.h>
+#include <net/act_api.h>
+
+struct tcf_bpf {
+	struct tcf_common	common;
+	struct bpf_prog		*filter;
+	struct sock_filter	*bpf_ops;
+	u16			bpf_num_ops;
+};
+#define to_bpf(a) \
+	container_of(a->priv, struct tcf_bpf, common)
+
+#endif /* __NET_TC_BPF_H */
diff --git a/include/uapi/linux/tc_act/Kbuild b/include/uapi/linux/tc_act/Kbuild
index b057da2b87a4..19d5219b0b99 100644
--- a/include/uapi/linux/tc_act/Kbuild
+++ b/include/uapi/linux/tc_act/Kbuild
@@ -8,3 +8,4 @@ header-y += tc_nat.h
 header-y += tc_pedit.h
 header-y += tc_skbedit.h
 header-y += tc_vlan.h
+header-y += tc_bpf.h
diff --git a/include/uapi/linux/tc_act/tc_bpf.h b/include/uapi/linux/tc_act/tc_bpf.h
new file mode 100644
index 000000000000..5288bd77e63b
--- /dev/null
+++ b/include/uapi/linux/tc_act/tc_bpf.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __LINUX_TC_BPF_H
+#define __LINUX_TC_BPF_H
+
+#include <linux/pkt_cls.h>
+
+#define TCA_ACT_BPF 13
+
+struct tc_act_bpf {
+	tc_gen;
+};
+
+enum {
+	TCA_ACT_BPF_UNSPEC,
+	TCA_ACT_BPF_TM,
+	TCA_ACT_BPF_PARMS,
+	TCA_ACT_BPF_OPS_LEN,
+	TCA_ACT_BPF_OPS,
+	__TCA_ACT_BPF_MAX,
+};
+#define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1)
+
+#endif
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index c54c9d9d1ffb..466943551581 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -698,6 +698,18 @@ config NET_ACT_VLAN
 	  To compile this code as a module, choose M here: the
 	  module will be called act_vlan.
 
+config NET_ACT_BPF
+        tristate "BPF based action"
+        depends on NET_CLS_ACT
+        ---help---
+	  Say Y here to execute BPF code on packets. The BPF code will decide
+	  if the packet should be dropped or not.
+
+	  If unsure, say N.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called act_bpf.
+
 config NET_CLS_IND
 	bool "Incoming device classification"
 	depends on NET_CLS_U32 || NET_CLS_FW
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 679f24ae7f93..7ca2b4e76312 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_NET_ACT_SIMP)	+= act_simple.o
 obj-$(CONFIG_NET_ACT_SKBEDIT)	+= act_skbedit.o
 obj-$(CONFIG_NET_ACT_CSUM)	+= act_csum.o
 obj-$(CONFIG_NET_ACT_VLAN)	+= act_vlan.o
+obj-$(CONFIG_NET_ACT_BPF)	+= act_bpf.o
 obj-$(CONFIG_NET_SCH_FIFO)	+= sch_fifo.o
 obj-$(CONFIG_NET_SCH_CBQ)	+= sch_cbq.o
 obj-$(CONFIG_NET_SCH_HTB)	+= sch_htb.o
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
new file mode 100644
index 000000000000..1bd257e473a9
--- /dev/null
+++ b/net/sched/act_bpf.c
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/filter.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+#include <linux/tc_act/tc_bpf.h>
+#include <net/tc_act/tc_bpf.h>
+
+#define BPF_TAB_MASK     15
+
+static int tcf_bpf(struct sk_buff *skb, const struct tc_action *a,
+		   struct tcf_result *res)
+{
+	struct tcf_bpf *b = a->priv;
+	int action;
+	int filter_res;
+
+	spin_lock(&b->tcf_lock);
+	b->tcf_tm.lastuse = jiffies;
+	bstats_update(&b->tcf_bstats, skb);
+	action = b->tcf_action;
+
+	filter_res = BPF_PROG_RUN(b->filter, skb);
+	if (filter_res == 0) {
+		/* Return code 0 from the BPF program
+		 * is being interpreted as a drop here.
+		 */
+		action = TC_ACT_SHOT;
+		b->tcf_qstats.drops++;
+	}
+
+	spin_unlock(&b->tcf_lock);
+	return action;
+}
+
+static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *a,
+			int bind, int ref)
+{
+	unsigned char *tp = skb_tail_pointer(skb);
+	struct tcf_bpf *b = a->priv;
+	struct tc_act_bpf opt = {
+		.index    = b->tcf_index,
+		.refcnt   = b->tcf_refcnt - ref,
+		.bindcnt  = b->tcf_bindcnt - bind,
+		.action   = b->tcf_action,
+	};
+	struct tcf_t t;
+	struct nlattr *nla;
+
+	if (nla_put(skb, TCA_ACT_BPF_PARMS, sizeof(opt), &opt))
+		goto nla_put_failure;
+
+	if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, b->bpf_num_ops))
+		goto nla_put_failure;
+
+	nla = nla_reserve(skb, TCA_ACT_BPF_OPS, b->bpf_num_ops *
+			  sizeof(struct sock_filter));
+	if (!nla)
+		goto nla_put_failure;
+
+	memcpy(nla_data(nla), b->bpf_ops, nla_len(nla));
+
+	t.install = jiffies_to_clock_t(jiffies - b->tcf_tm.install);
+	t.lastuse = jiffies_to_clock_t(jiffies - b->tcf_tm.lastuse);
+	t.expires = jiffies_to_clock_t(b->tcf_tm.expires);
+	if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(t), &t))
+		goto nla_put_failure;
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, tp);
+	return -1;
+}
+
+static const struct nla_policy act_bpf_policy[TCA_ACT_BPF_MAX + 1] = {
+	[TCA_ACT_BPF_PARMS]	= { .len = sizeof(struct tc_act_bpf) },
+	[TCA_ACT_BPF_OPS_LEN]	= { .type = NLA_U16 },
+	[TCA_ACT_BPF_OPS]	= { .type = NLA_BINARY,
+				    .len = sizeof(struct sock_filter) * BPF_MAXINSNS },
+};
+
+static int tcf_bpf_init(struct net *net, struct nlattr *nla,
+			struct nlattr *est, struct tc_action *a,
+			int ovr, int bind)
+{
+	struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
+	struct tc_act_bpf *parm;
+	struct tcf_bpf *b;
+	u16 bpf_size, bpf_num_ops;
+	struct sock_filter *bpf_ops;
+	struct sock_fprog_kern tmp;
+	struct bpf_prog *fp;
+	int ret;
+
+	if (!nla)
+		return -EINVAL;
+
+	ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy);
+	if (ret < 0)
+		return ret;
+
+	if (!tb[TCA_ACT_BPF_PARMS] ||
+	    !tb[TCA_ACT_BPF_OPS_LEN] || !tb[TCA_ACT_BPF_OPS])
+		return -EINVAL;
+	parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
+
+	bpf_num_ops = nla_get_u16(tb[TCA_ACT_BPF_OPS_LEN]);
+	if (bpf_num_ops	> BPF_MAXINSNS || bpf_num_ops == 0)
+		return -EINVAL;
+
+	bpf_size = bpf_num_ops * sizeof(*bpf_ops);
+	bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
+	if (!bpf_ops)
+		return -ENOMEM;
+
+	memcpy(bpf_ops, nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size);
+
+	tmp.len = bpf_num_ops;
+	tmp.filter = bpf_ops;
+
+	ret = bpf_prog_create(&fp, &tmp);
+	if (ret)
+		goto free_bpf_ops;
+
+	if (!tcf_hash_check(parm->index, a, bind)) {
+		ret = tcf_hash_create(parm->index, est, a, sizeof(*b), bind);
+		if (ret)
+			goto destroy_fp;
+
+		ret = ACT_P_CREATED;
+	} else {
+		if (bind)
+			goto destroy_fp;
+		tcf_hash_release(a, bind);
+		if (!ovr) {
+			ret = -EEXIST;
+			goto destroy_fp;
+		}
+	}
+
+	b = to_bpf(a);
+	spin_lock_bh(&b->tcf_lock);
+	b->tcf_action = parm->action;
+	b->bpf_num_ops = bpf_num_ops;
+	b->bpf_ops = bpf_ops;
+	b->filter = fp;
+	spin_unlock_bh(&b->tcf_lock);
+
+	if (ret == ACT_P_CREATED)
+		tcf_hash_insert(a);
+	return ret;
+
+destroy_fp:
+	bpf_prog_destroy(fp);
+free_bpf_ops:
+	kfree(bpf_ops);
+	return ret;
+}
+
+static void tcf_bpf_cleanup(struct tc_action *a, int bind)
+{
+	struct tcf_bpf *b = a->priv;
+
+	bpf_prog_destroy(b->filter);
+}
+
+static struct tc_action_ops act_bpf_ops = {
+	.kind =		"bpf",
+	.type =		TCA_ACT_BPF,
+	.owner =	THIS_MODULE,
+	.act =		tcf_bpf,
+	.dump =		tcf_bpf_dump,
+	.cleanup =	tcf_bpf_cleanup,
+	.init =		tcf_bpf_init,
+};
+
+static int __init bpf_init_module(void)
+{
+	return tcf_register_action(&act_bpf_ops, BPF_TAB_MASK);
+}
+
+static void __exit bpf_cleanup_module(void)
+{
+	tcf_unregister_action(&act_bpf_ops);
+}
+
+module_init(bpf_init_module);
+module_exit(bpf_cleanup_module);
+
+MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>");
+MODULE_DESCRIPTION("TC BPF based action");
+MODULE_LICENSE("GPL v2");
-- 
cgit v1.2.3


From 33e9fcc666e23d2a09d413ab89a43e0514b91d77 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Thu, 15 Jan 2015 09:52:40 +0100
Subject: tc: cls_bpf: rename bpf_len to bpf_num_ops

It was suggested by DaveM to change the name as "len" might indicate
unit bytes.

Suggested-by: David Miller <davem@davemloft.net>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_bpf.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 84c8219c3e1c..1029923f9e86 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -37,7 +37,7 @@ struct cls_bpf_prog {
 	struct tcf_result res;
 	struct list_head link;
 	u32 handle;
-	u16 bpf_len;
+	u16 bpf_num_ops;
 	struct tcf_proto *tp;
 	struct rcu_head rcu;
 };
@@ -160,7 +160,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 	struct tcf_exts exts;
 	struct sock_fprog_kern tmp;
 	struct bpf_prog *fp;
-	u16 bpf_size, bpf_len;
+	u16 bpf_size, bpf_num_ops;
 	u32 classid;
 	int ret;
 
@@ -173,13 +173,13 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 		return ret;
 
 	classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
-	bpf_len = nla_get_u16(tb[TCA_BPF_OPS_LEN]);
-	if (bpf_len > BPF_MAXINSNS || bpf_len == 0) {
+	bpf_num_ops = nla_get_u16(tb[TCA_BPF_OPS_LEN]);
+	if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0) {
 		ret = -EINVAL;
 		goto errout;
 	}
 
-	bpf_size = bpf_len * sizeof(*bpf_ops);
+	bpf_size = bpf_num_ops * sizeof(*bpf_ops);
 	bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
 	if (bpf_ops == NULL) {
 		ret = -ENOMEM;
@@ -188,14 +188,14 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 
 	memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size);
 
-	tmp.len = bpf_len;
+	tmp.len = bpf_num_ops;
 	tmp.filter = bpf_ops;
 
 	ret = bpf_prog_create(&fp, &tmp);
 	if (ret)
 		goto errout_free;
 
-	prog->bpf_len = bpf_len;
+	prog->bpf_num_ops = bpf_num_ops;
 	prog->bpf_ops = bpf_ops;
 	prog->filter = fp;
 	prog->res.classid = classid;
@@ -303,10 +303,10 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 
 	if (nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid))
 		goto nla_put_failure;
-	if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_len))
+	if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_num_ops))
 		goto nla_put_failure;
 
-	nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_len *
+	nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_num_ops *
 			  sizeof(struct sock_filter));
 	if (nla == NULL)
 		goto nla_put_failure;
-- 
cgit v1.2.3


From 2061dcd6bff8b774b4fac8b0739b6be3f87bc9f2 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Thu, 15 Jan 2015 16:34:35 +0100
Subject: net: sctp: fix race for one-to-many sockets in sendmsg's auto
 associate

I.e. one-to-many sockets in SCTP are not required to explicitly
call into connect(2) or sctp_connectx(2) prior to data exchange.
Instead, they can directly invoke sendmsg(2) and the SCTP stack
will automatically trigger connection establishment through 4WHS
via sctp_primitive_ASSOCIATE(). However, this in its current
implementation is racy: INIT is being sent out immediately (as
it cannot be bundled anyway) and the rest of the DATA chunks are
queued up for later xmit when connection is established, meaning
sendmsg(2) will return successfully. This behaviour can result
in an undesired side-effect that the kernel made the application
think the data has already been transmitted, although none of it
has actually left the machine, worst case even after close(2)'ing
the socket.

Instead, when the association from client side has been shut down
e.g. first gracefully through SCTP_EOF and then close(2), the
client could afterwards still receive the server's INIT_ACK due
to a connection with higher latency. This INIT_ACK is then considered
out of the blue and hence responded with ABORT as there was no
alive assoc found anymore. This can be easily reproduced f.e.
with sctp_test application from lksctp. One way to fix this race
is to wait for the handshake to actually complete.

The fix defers waiting after sctp_primitive_ASSOCIATE() and
sctp_primitive_SEND() succeeded, so that DATA chunks cooked up
from sctp_sendmsg() have already been placed into the output
queue through the side-effect interpreter, and therefore can then
be bundeled together with COOKIE_ECHO control chunks.

strace from example application (shortened):

socket(PF_INET, SOCK_SEQPACKET, IPPROTO_SCTP) = 3
sendmsg(3, {msg_name(28)={sa_family=AF_INET, sin_port=htons(8888), sin_addr=inet_addr("192.168.1.115")},
           msg_iov(1)=[{"hello", 5}], msg_controllen=0, msg_flags=0}, 0) = 5
sendmsg(3, {msg_name(28)={sa_family=AF_INET, sin_port=htons(8888), sin_addr=inet_addr("192.168.1.115")},
           msg_iov(1)=[{"hello", 5}], msg_controllen=0, msg_flags=0}, 0) = 5
sendmsg(3, {msg_name(28)={sa_family=AF_INET, sin_port=htons(8888), sin_addr=inet_addr("192.168.1.115")},
           msg_iov(1)=[{"hello", 5}], msg_controllen=0, msg_flags=0}, 0) = 5
sendmsg(3, {msg_name(28)={sa_family=AF_INET, sin_port=htons(8888), sin_addr=inet_addr("192.168.1.115")},
           msg_iov(1)=[{"hello", 5}], msg_controllen=0, msg_flags=0}, 0) = 5
sendmsg(3, {msg_name(28)={sa_family=AF_INET, sin_port=htons(8888), sin_addr=inet_addr("192.168.1.115")},
           msg_iov(0)=[], msg_controllen=48, {cmsg_len=48, cmsg_level=0x84 /* SOL_??? */, cmsg_type=, ...},
           msg_flags=0}, 0) = 0 // graceful shutdown for SOCK_SEQPACKET via SCTP_EOF
close(3) = 0

tcpdump before patch (fooling the application):

22:33:36.306142 IP 192.168.1.114.41462 > 192.168.1.115.8888: sctp (1) [INIT] [init tag: 3879023686] [rwnd: 106496] [OS: 10] [MIS: 65535] [init TSN: 3139201684]
22:33:36.316619 IP 192.168.1.115.8888 > 192.168.1.114.41462: sctp (1) [INIT ACK] [init tag: 3345394793] [rwnd: 106496] [OS: 10] [MIS: 10] [init TSN: 3380109591]
22:33:36.317600 IP 192.168.1.114.41462 > 192.168.1.115.8888: sctp (1) [ABORT]

tcpdump after patch:

14:28:58.884116 IP 192.168.1.114.35846 > 192.168.1.115.8888: sctp (1) [INIT] [init tag: 438593213] [rwnd: 106496] [OS: 10] [MIS: 65535] [init TSN: 3092969729]
14:28:58.888414 IP 192.168.1.115.8888 > 192.168.1.114.35846: sctp (1) [INIT ACK] [init tag: 381429855] [rwnd: 106496] [OS: 10] [MIS: 10] [init TSN: 2141904492]
14:28:58.888638 IP 192.168.1.114.35846 > 192.168.1.115.8888: sctp (1) [COOKIE ECHO] , (2) [DATA] (B)(E) [TSN: 3092969729] [...]
14:28:58.893278 IP 192.168.1.115.8888 > 192.168.1.114.35846: sctp (1) [COOKIE ACK] , (2) [SACK] [cum ack 3092969729] [a_rwnd 106491] [#gap acks 0] [#dup tsns 0]
14:28:58.893591 IP 192.168.1.114.35846 > 192.168.1.115.8888: sctp (1) [DATA] (B)(E) [TSN: 3092969730] [...]
14:28:59.096963 IP 192.168.1.115.8888 > 192.168.1.114.35846: sctp (1) [SACK] [cum ack 3092969730] [a_rwnd 106496] [#gap acks 0] [#dup tsns 0]
14:28:59.097086 IP 192.168.1.114.35846 > 192.168.1.115.8888: sctp (1) [DATA] (B)(E) [TSN: 3092969731] [...] , (2) [DATA] (B)(E) [TSN: 3092969732] [...]
14:28:59.103218 IP 192.168.1.115.8888 > 192.168.1.114.35846: sctp (1) [SACK] [cum ack 3092969732] [a_rwnd 106486] [#gap acks 0] [#dup tsns 0]
14:28:59.103330 IP 192.168.1.114.35846 > 192.168.1.115.8888: sctp (1) [SHUTDOWN]
14:28:59.107793 IP 192.168.1.115.8888 > 192.168.1.114.35846: sctp (1) [SHUTDOWN ACK]
14:28:59.107890 IP 192.168.1.114.35846 > 192.168.1.115.8888: sctp (1) [SHUTDOWN COMPLETE]

Looks like this bug is from the pre-git history museum. ;)

Fixes: 08707d5482df ("lksctp-2_5_31-0_5_1.patch")
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 2625eccb77d5..aafe94bf292e 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1603,7 +1603,7 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
 	sctp_assoc_t associd = 0;
 	sctp_cmsgs_t cmsgs = { NULL };
 	sctp_scope_t scope;
-	bool fill_sinfo_ttl = false;
+	bool fill_sinfo_ttl = false, wait_connect = false;
 	struct sctp_datamsg *datamsg;
 	int msg_flags = msg->msg_flags;
 	__u16 sinfo_flags = 0;
@@ -1943,6 +1943,7 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
 		if (err < 0)
 			goto out_free;
 
+		wait_connect = true;
 		pr_debug("%s: we associated primitively\n", __func__);
 	}
 
@@ -1980,6 +1981,11 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
 	sctp_datamsg_put(datamsg);
 	err = msg_len;
 
+	if (unlikely(wait_connect)) {
+		timeo = sock_sndtimeo(sk, msg_flags & MSG_DONTWAIT);
+		sctp_wait_for_connect(asoc, &timeo);
+	}
+
 	/* If we are already past ASSOCIATE, the lower
 	 * layers are responsible for association cleanup.
 	 */
-- 
cgit v1.2.3


From 66c1a12c65c5bee2427b7813218d1279e4906084 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Fri, 16 Jan 2015 14:35:09 +0100
Subject: socket: use ki_nbytes instead of iov_length()

This field already contains the length of the iovec, no need to calculate it
again.

Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index e1278d7e1d5d..9fcf15b61009 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -882,16 +882,15 @@ static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
 		unsigned long nr_segs)
 {
 	struct socket *sock = file->private_data;
-	size_t size = iov_length(iov, nr_segs);
 
 	msg->msg_name = NULL;
 	msg->msg_namelen = 0;
 	msg->msg_control = NULL;
 	msg->msg_controllen = 0;
-	iov_iter_init(&msg->msg_iter, READ, iov, nr_segs, size);
+	iov_iter_init(&msg->msg_iter, READ, iov, nr_segs, iocb->ki_nbytes);
 	msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
 
-	return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
+	return __sock_recvmsg(iocb, sock, msg, iocb->ki_nbytes, msg->msg_flags);
 }
 
 static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
@@ -917,18 +916,17 @@ static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
 			unsigned long nr_segs)
 {
 	struct socket *sock = file->private_data;
-	size_t size = iov_length(iov, nr_segs);
 
 	msg->msg_name = NULL;
 	msg->msg_namelen = 0;
 	msg->msg_control = NULL;
 	msg->msg_controllen = 0;
-	iov_iter_init(&msg->msg_iter, WRITE, iov, nr_segs, size);
+	iov_iter_init(&msg->msg_iter, WRITE, iov, nr_segs, iocb->ki_nbytes);
 	msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
 	if (sock->type == SOCK_SEQPACKET)
 		msg->msg_flags |= MSG_EOR;
 
-	return __sock_sendmsg(iocb, sock, msg, size);
+	return __sock_sendmsg(iocb, sock, msg, iocb->ki_nbytes);
 }
 
 static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
-- 
cgit v1.2.3


From 03bf0c281234028388108d0aee720954f5fe6924 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Thu, 15 Jan 2015 23:49:36 +0100
Subject: switchdev: introduce switchdev notifier

This patch introduces new notifier for purposes of exposing events which happen
on switch driver side. The consumers of the event messages are mainly involved
masters, namely bridge and ovs.

Suggested-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/switchdev.h   | 31 ++++++++++++++++++++++
 net/switchdev/switchdev.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 96 insertions(+)

(limited to 'net')

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 8a6d1641fd9b..7f8d74372d87 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -11,12 +11,27 @@
 #define _LINUX_SWITCHDEV_H_
 
 #include <linux/netdevice.h>
+#include <linux/notifier.h>
+
+struct netdev_switch_notifier_info {
+	struct net_device *dev;
+};
+
+static inline struct net_device *
+netdev_switch_notifier_info_to_dev(const struct netdev_switch_notifier_info *info)
+{
+	return info->dev;
+}
 
 #ifdef CONFIG_NET_SWITCHDEV
 
 int netdev_switch_parent_id_get(struct net_device *dev,
 				struct netdev_phys_item_id *psid);
 int netdev_switch_port_stp_update(struct net_device *dev, u8 state);
+int register_netdev_switch_notifier(struct notifier_block *nb);
+int unregister_netdev_switch_notifier(struct notifier_block *nb);
+int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev,
+				 struct netdev_switch_notifier_info *info);
 
 #else
 
@@ -32,6 +47,22 @@ static inline int netdev_switch_port_stp_update(struct net_device *dev,
 	return -EOPNOTSUPP;
 }
 
+static inline int register_netdev_switch_notifier(struct notifier_block *nb)
+{
+	return 0;
+}
+
+static inline int unregister_netdev_switch_notifier(struct notifier_block *nb)
+{
+	return 0;
+}
+
+static inline int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev,
+					       struct netdev_switch_notifier_info *info);
+{
+	return NOTIFY_DONE;
+}
+
 #endif
 
 #endif /* _LINUX_SWITCHDEV_H_ */
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index d162b21b14bd..22e02f4edd99 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -11,6 +11,8 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
 #include <linux/netdevice.h>
 #include <net/switchdev.h>
 
@@ -50,3 +52,66 @@ int netdev_switch_port_stp_update(struct net_device *dev, u8 state)
 	return ops->ndo_switch_port_stp_update(dev, state);
 }
 EXPORT_SYMBOL(netdev_switch_port_stp_update);
+
+static DEFINE_MUTEX(netdev_switch_mutex);
+static RAW_NOTIFIER_HEAD(netdev_switch_notif_chain);
+
+/**
+ *	register_netdev_switch_notifier - Register nofifier
+ *	@nb: notifier_block
+ *
+ *	Register switch device notifier. This should be used by code
+ *	which needs to monitor events happening in particular device.
+ *	Return values are same as for atomic_notifier_chain_register().
+ */
+int register_netdev_switch_notifier(struct notifier_block *nb)
+{
+	int err;
+
+	mutex_lock(&netdev_switch_mutex);
+	err = raw_notifier_chain_register(&netdev_switch_notif_chain, nb);
+	mutex_unlock(&netdev_switch_mutex);
+	return err;
+}
+EXPORT_SYMBOL(register_netdev_switch_notifier);
+
+/**
+ *	unregister_netdev_switch_notifier - Unregister nofifier
+ *	@nb: notifier_block
+ *
+ *	Unregister switch device notifier.
+ *	Return values are same as for atomic_notifier_chain_unregister().
+ */
+int unregister_netdev_switch_notifier(struct notifier_block *nb)
+{
+	int err;
+
+	mutex_lock(&netdev_switch_mutex);
+	err = raw_notifier_chain_unregister(&netdev_switch_notif_chain, nb);
+	mutex_unlock(&netdev_switch_mutex);
+	return err;
+}
+EXPORT_SYMBOL(unregister_netdev_switch_notifier);
+
+/**
+ *	call_netdev_switch_notifiers - Call nofifiers
+ *	@val: value passed unmodified to notifier function
+ *	@dev: port device
+ *	@info: notifier information data
+ *
+ *	Call all network notifier blocks. This should be called by driver
+ *	when it needs to propagate hardware event.
+ *	Return values are same as for atomic_notifier_call_chain().
+ */
+int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev,
+				 struct netdev_switch_notifier_info *info)
+{
+	int err;
+
+	info->dev = dev;
+	mutex_lock(&netdev_switch_mutex);
+	err = raw_notifier_call_chain(&netdev_switch_notif_chain, val, info);
+	mutex_unlock(&netdev_switch_mutex);
+	return err;
+}
+EXPORT_SYMBOL(call_netdev_switch_notifiers);
-- 
cgit v1.2.3


From 3aeb66176ffa8fefd7a9f7d37bda1d8adcf469a1 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Thu, 15 Jan 2015 23:49:37 +0100
Subject: net: replace br_fdb_external_learn_* calls with switchdev notifier
 events

This patch benefits from newly introduced switchdev notifier and uses it
to propagate fdb learn events from rocker driver to bridge. That avoids
direct function calls and possible use by other listeners (ovs).

Suggested-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/rocker/rocker.c | 10 +++++--
 include/linux/if_bridge.h            | 18 -------------
 include/net/switchdev.h              | 11 ++++++++
 net/bridge/br.c                      | 52 +++++++++++++++++++++++++++++++++++-
 net/bridge/br_fdb.c                  | 38 +++-----------------------
 net/bridge/br_private.h              |  4 +++
 6 files changed, 78 insertions(+), 55 deletions(-)

(limited to 'net')

diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c
index cad8cf962cdf..964d719b150f 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -3026,11 +3026,17 @@ static void rocker_port_fdb_learn_work(struct work_struct *work)
 		container_of(work, struct rocker_fdb_learn_work, work);
 	bool removing = (lw->flags & ROCKER_OP_FLAG_REMOVE);
 	bool learned = (lw->flags & ROCKER_OP_FLAG_LEARNED);
+	struct netdev_switch_notifier_fdb_info info;
+
+	info.addr = lw->addr;
+	info.vid = lw->vid;
 
 	if (learned && removing)
-		br_fdb_external_learn_del(lw->dev, lw->addr, lw->vid);
+		call_netdev_switch_notifiers(NETDEV_SWITCH_FDB_DEL,
+					     lw->dev, &info.info);
 	else if (learned && !removing)
-		br_fdb_external_learn_add(lw->dev, lw->addr, lw->vid);
+		call_netdev_switch_notifiers(NETDEV_SWITCH_FDB_ADD,
+					     lw->dev, &info.info);
 
 	kfree(work);
 }
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 0a8ce762a47f..a57bca2ea97e 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -50,24 +50,6 @@ extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __use
 typedef int br_should_route_hook_t(struct sk_buff *skb);
 extern br_should_route_hook_t __rcu *br_should_route_hook;
 
-#if IS_ENABLED(CONFIG_BRIDGE)
-int br_fdb_external_learn_add(struct net_device *dev,
-			      const unsigned char *addr, u16 vid);
-int br_fdb_external_learn_del(struct net_device *dev,
-			      const unsigned char *addr, u16 vid);
-#else
-static inline int br_fdb_external_learn_add(struct net_device *dev,
-					    const unsigned char *addr, u16 vid)
-{
-	return 0;
-}
-static inline int br_fdb_external_learn_del(struct net_device *dev,
-					    const unsigned char *addr, u16 vid)
-{
-	return 0;
-}
-#endif
-
 #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING)
 int br_multicast_list_adjacent(struct net_device *dev,
 			       struct list_head *br_ip_list);
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 7f8d74372d87..201120e18e4d 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -13,10 +13,21 @@
 #include <linux/netdevice.h>
 #include <linux/notifier.h>
 
+enum netdev_switch_notifier_type {
+	NETDEV_SWITCH_FDB_ADD = 1,
+	NETDEV_SWITCH_FDB_DEL,
+};
+
 struct netdev_switch_notifier_info {
 	struct net_device *dev;
 };
 
+struct netdev_switch_notifier_fdb_info {
+	struct netdev_switch_notifier_info info; /* must be first */
+	const unsigned char *addr;
+	u16 vid;
+};
+
 static inline struct net_device *
 netdev_switch_notifier_info_to_dev(const struct netdev_switch_notifier_info *info)
 {
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 44425aff7cba..fb57ab6b24f9 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -19,6 +19,7 @@
 #include <linux/llc.h>
 #include <net/llc.h>
 #include <net/stp.h>
+#include <net/switchdev.h>
 
 #include "br_private.h"
 
@@ -120,6 +121,48 @@ static struct notifier_block br_device_notifier = {
 	.notifier_call = br_device_event
 };
 
+static int br_netdev_switch_event(struct notifier_block *unused,
+				  unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_switch_notifier_info_to_dev(ptr);
+	struct net_bridge_port *p;
+	struct net_bridge *br;
+	struct netdev_switch_notifier_fdb_info *fdb_info;
+	int err = NOTIFY_DONE;
+
+	rtnl_lock();
+	p = br_port_get_rtnl(dev);
+	if (!p)
+		goto out;
+
+	br = p->br;
+
+	switch (event) {
+	case NETDEV_SWITCH_FDB_ADD:
+		fdb_info = ptr;
+		err = br_fdb_external_learn_add(br, p, fdb_info->addr,
+						fdb_info->vid);
+		if (err)
+			err = notifier_from_errno(err);
+		break;
+	case NETDEV_SWITCH_FDB_DEL:
+		fdb_info = ptr;
+		err = br_fdb_external_learn_del(br, p, fdb_info->addr,
+						fdb_info->vid);
+		if (err)
+			err = notifier_from_errno(err);
+		break;
+	}
+
+out:
+	rtnl_unlock();
+	return err;
+}
+
+static struct notifier_block br_netdev_switch_notifier = {
+	.notifier_call = br_netdev_switch_event,
+};
+
 static void __net_exit br_net_exit(struct net *net)
 {
 	struct net_device *dev;
@@ -169,10 +212,14 @@ static int __init br_init(void)
 	if (err)
 		goto err_out3;
 
-	err = br_netlink_init();
+	err = register_netdev_switch_notifier(&br_netdev_switch_notifier);
 	if (err)
 		goto err_out4;
 
+	err = br_netlink_init();
+	if (err)
+		goto err_out5;
+
 	brioctl_set(br_ioctl_deviceless_stub);
 
 #if IS_ENABLED(CONFIG_ATM_LANE)
@@ -185,6 +232,8 @@ static int __init br_init(void)
 
 	return 0;
 
+err_out5:
+	unregister_netdev_switch_notifier(&br_netdev_switch_notifier);
 err_out4:
 	unregister_netdevice_notifier(&br_device_notifier);
 err_out3:
@@ -202,6 +251,7 @@ static void __exit br_deinit(void)
 {
 	stp_proto_unregister(&br_stp_proto);
 	br_netlink_fini();
+	unregister_netdev_switch_notifier(&br_netdev_switch_notifier);
 	unregister_netdevice_notifier(&br_device_notifier);
 	brioctl_set(NULL);
 	unregister_pernet_subsys(&br_net_ops);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index e6e0372bc3cd..03667e65cc29 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -990,26 +990,14 @@ void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p)
 	}
 }
 
-int br_fdb_external_learn_add(struct net_device *dev,
+int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
 			      const unsigned char *addr, u16 vid)
 {
-	struct net_bridge_port *p;
-	struct net_bridge *br;
 	struct hlist_head *head;
 	struct net_bridge_fdb_entry *fdb;
 	int err = 0;
 
-	rtnl_lock();
-
-	p = br_port_get_rtnl(dev);
-	if (!p) {
-		pr_info("bridge: %s not a bridge port\n", dev->name);
-		err = -EINVAL;
-		goto err_rtnl_unlock;
-	}
-
-	br = p->br;
-
+	ASSERT_RTNL();
 	spin_lock_bh(&br->hash_lock);
 
 	head = &br->hash[br_mac_hash(addr, vid)];
@@ -1034,33 +1022,18 @@ int br_fdb_external_learn_add(struct net_device *dev,
 
 err_unlock:
 	spin_unlock_bh(&br->hash_lock);
-err_rtnl_unlock:
-	rtnl_unlock();
 
 	return err;
 }
-EXPORT_SYMBOL(br_fdb_external_learn_add);
 
-int br_fdb_external_learn_del(struct net_device *dev,
+int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
 			      const unsigned char *addr, u16 vid)
 {
-	struct net_bridge_port *p;
-	struct net_bridge *br;
 	struct hlist_head *head;
 	struct net_bridge_fdb_entry *fdb;
 	int err = 0;
 
-	rtnl_lock();
-
-	p = br_port_get_rtnl(dev);
-	if (!p) {
-		pr_info("bridge: %s not a bridge port\n", dev->name);
-		err = -EINVAL;
-		goto err_rtnl_unlock;
-	}
-
-	br = p->br;
-
+	ASSERT_RTNL();
 	spin_lock_bh(&br->hash_lock);
 
 	head = &br->hash[br_mac_hash(addr, vid)];
@@ -1071,9 +1044,6 @@ int br_fdb_external_learn_del(struct net_device *dev,
 		err = -ENOENT;
 
 	spin_unlock_bh(&br->hash_lock);
-err_rtnl_unlock:
-	rtnl_unlock();
 
 	return err;
 }
-EXPORT_SYMBOL(br_fdb_external_learn_del);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index d808d766334d..e8e3f3681680 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -402,6 +402,10 @@ int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
 		struct net_device *dev, struct net_device *fdev, int idx);
 int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p);
 void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p);
+int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
+			      const unsigned char *addr, u16 vid);
+int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
+			      const unsigned char *addr, u16 vid);
 
 /* br_forward.c */
 void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb);
-- 
cgit v1.2.3


From d6e164e3215794f9920af69cd2c6794632773478 Mon Sep 17 00:00:00 2001
From: Richard Alpe <richard.alpe@ericsson.com>
Date: Fri, 16 Jan 2015 12:30:40 +0100
Subject: tipc: fix socket list regression in new nl api

Commit 07f6c4bc (tipc: convert tipc reference table to use generic
rhashtable) introduced a problem with port listing in the new netlink
API. It broke the resume functionality resulting in a never ending
loop. This was caused by starting with the first hash table every time
subsequently never returning an empty skb (terminating).

This patch fixes the resume mechanism by keeping a logical reference
to the last hash table along with a logical reference to the socket
(port) that didn't fit in the previous message.

Signed-off-by: Richard Alpe <richard.alpe@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 720fda6cc2e6..679a22082fcb 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2749,29 +2749,35 @@ int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	struct tipc_sock *tsk;
 	const struct bucket_table *tbl;
 	struct rhash_head *pos;
-	u32 prev_portid = cb->args[0];
-	u32 portid = prev_portid;
 	struct net *net = sock_net(skb->sk);
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	int i;
+	u32 tbl_id = cb->args[0];
+	u32 prev_portid = cb->args[1];
 
 	rcu_read_lock();
 	tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht);
-	for (i = 0; i < tbl->size; i++) {
-		rht_for_each_entry_rcu(tsk, pos, tbl, i, node) {
+	for (; tbl_id < tbl->size; tbl_id++) {
+		rht_for_each_entry_rcu(tsk, pos, tbl, tbl_id, node) {
 			spin_lock_bh(&tsk->sk.sk_lock.slock);
-			portid = tsk->portid;
+			if (prev_portid && prev_portid != tsk->portid) {
+				spin_unlock_bh(&tsk->sk.sk_lock.slock);
+				continue;
+			}
+
 			err = __tipc_nl_add_sk(skb, cb, tsk);
+			if (err) {
+				prev_portid = tsk->portid;
+				spin_unlock_bh(&tsk->sk.sk_lock.slock);
+				goto out;
+			}
+			prev_portid = 0;
 			spin_unlock_bh(&tsk->sk.sk_lock.slock);
-			if (err)
-				break;
-
-			prev_portid = portid;
 		}
 	}
+out:
 	rcu_read_unlock();
-
-	cb->args[0] = prev_portid;
+	cb->args[0] = tbl_id;
+	cb->args[1] = prev_portid;
 
 	return skb->len;
 }
-- 
cgit v1.2.3


From 053c095a82cf773075e83d7233b5cc19a1f73ece Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 16 Jan 2015 22:09:00 +0100
Subject: netlink: make nlmsg_end() and genlmsg_end() void

Contrary to common expectations for an "int" return, these functions
return only a positive value -- if used correctly they cannot even
return 0 because the message header will necessarily be in the skb.

This makes the very common pattern of

  if (genlmsg_end(...) < 0) { ... }

be a whole bunch of dead code. Many places also simply do

  return nlmsg_end(...);

and the caller is expected to deal with it.

This also commonly (at least for me) causes errors, because it is very
common to write

  if (my_function(...))
    /* error condition */

and if my_function() does "return nlmsg_end()" this is of course wrong.

Additionally, there's not a single place in the kernel that actually
needs the message length returned, and if anyone needs it later then
it'll be very easy to just use skb->len there.

Remove this, and make the functions void. This removes a bunch of dead
code as described above. The patch adds lines because I did

-	return nlmsg_end(...);
+	nlmsg_end(...);
+	return 0;

I could have preserved all the function's return values by returning
skb->len, but instead I've audited all the places calling the affected
functions and found that none cared. A few places actually compared
the return value with <= 0 in dump functionality, but that could just
be changed to < 0 with no change in behaviour, so I opted for the more
efficient version.

One instance of the error I've made numerous times now is also present
in net/phonet/pn_netlink.c in the route_dumpit() function - it didn't
check for <0 or <=0 and thus broke out of the loop every single time.
I've preserved this since it will (I think) have caused the messages to
userspace to be formatted differently with just a single message for
every SKB returned to userspace. It's possible that this isn't needed
for the tools that actually use this, but I don't even know what they
are so couldn't test that changing this behaviour would be acceptable.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/acpi/event.c                  |  7 +------
 drivers/net/ethernet/rocker/rocker.c  |  3 ++-
 drivers/net/vxlan.c                   |  3 ++-
 drivers/net/wireless/mac80211_hwsim.c |  3 ++-
 drivers/scsi/pmcraid.c                |  8 +-------
 drivers/target/target_core_user.c     |  4 +---
 drivers/thermal/thermal_core.c        |  6 +-----
 fs/dlm/netlink.c                      |  7 +------
 include/net/genetlink.h               |  4 ++--
 include/net/netlink.h                 |  6 +-----
 kernel/taskstats.c                    | 13 ++-----------
 net/bridge/br_fdb.c                   |  3 ++-
 net/bridge/br_mdb.c                   |  3 ++-
 net/bridge/br_netlink.c               |  3 ++-
 net/can/gw.c                          |  3 ++-
 net/core/fib_rules.c                  |  3 ++-
 net/core/neighbour.c                  | 12 ++++++++----
 net/core/rtnetlink.c                  |  9 ++++++---
 net/decnet/dn_dev.c                   |  3 ++-
 net/decnet/dn_route.c                 |  3 ++-
 net/decnet/dn_table.c                 |  3 ++-
 net/ieee802154/netlink.c              | 12 ++----------
 net/ieee802154/nl-mac.c               |  3 ++-
 net/ieee802154/nl-phy.c               |  3 ++-
 net/ieee802154/nl802154.c             |  6 ++++--
 net/ipv4/devinet.c                    |  8 +++++---
 net/ipv4/fib_semantics.c              |  3 ++-
 net/ipv4/inet_diag.c                  |  9 ++++++---
 net/ipv4/ipmr.c                       |  3 ++-
 net/ipv4/route.c                      |  3 ++-
 net/ipv4/tcp_metrics.c                |  3 ++-
 net/ipv6/addrconf.c                   | 32 +++++++++++++++++++-------------
 net/ipv6/addrlabel.c                  |  5 +++--
 net/ipv6/ip6_fib.c                    |  1 -
 net/ipv6/ip6mr.c                      |  3 ++-
 net/ipv6/route.c                      |  3 ++-
 net/l2tp/l2tp_netlink.c               | 10 ++++++----
 net/netfilter/ipvs/ip_vs_ctl.c        |  9 ++++++---
 net/netfilter/nf_tables_api.c         | 18 ++++++++++++------
 net/netlabel/netlabel_cipso_v4.c      |  3 ++-
 net/netlabel/netlabel_mgmt.c          |  6 ++++--
 net/netlabel/netlabel_unlabeled.c     |  3 ++-
 net/netlink/diag.c                    |  3 ++-
 net/netlink/genetlink.c               |  6 ++++--
 net/nfc/netlink.c                     | 12 +++++++-----
 net/openvswitch/datapath.c            |  9 ++++++---
 net/packet/diag.c                     |  3 ++-
 net/phonet/pn_netlink.c               | 16 +++++++++++-----
 net/unix/diag.c                       |  3 ++-
 net/wireless/nl80211.c                | 27 ++++++++++++++++++---------
 net/xfrm/xfrm_user.c                  | 27 ++++++++++++++++++---------
 51 files changed, 203 insertions(+), 158 deletions(-)

(limited to 'net')

diff --git a/drivers/acpi/event.c b/drivers/acpi/event.c
index ef2d730734dc..e24ea4e796e4 100644
--- a/drivers/acpi/event.c
+++ b/drivers/acpi/event.c
@@ -100,7 +100,6 @@ int acpi_bus_generate_netlink_event(const char *device_class,
 	struct acpi_genl_event *event;
 	void *msg_header;
 	int size;
-	int result;
 
 	/* allocate memory */
 	size = nla_total_size(sizeof(struct acpi_genl_event)) +
@@ -137,11 +136,7 @@ int acpi_bus_generate_netlink_event(const char *device_class,
 	event->data = data;
 
 	/* send multicast genetlink message */
-	result = genlmsg_end(skb, msg_header);
-	if (result < 0) {
-		nlmsg_free(skb);
-		return result;
-	}
+	genlmsg_end(skb, msg_header);
 
 	genlmsg_multicast(&acpi_event_genl_family, skb, 0, 0, GFP_ATOMIC);
 	return 0;
diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c
index 964d719b150f..d54781e71cd4 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -3674,7 +3674,8 @@ static int rocker_fdb_fill_info(struct sk_buff *skb,
 	if (vid && nla_put_u16(skb, NDA_VLAN, vid))
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 6b6b45622a0a..c5f79e7513a6 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -363,7 +363,8 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
 	if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 494e7335aa64..4a4c6586a8d2 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -2557,7 +2557,8 @@ static int mac80211_hwsim_get_radio(struct sk_buff *skb,
 	if (res < 0)
 		goto out_err;
 
-	return genlmsg_end(skb, hdr);
+	genlmsg_end(skb, hdr);
+	return 0;
 
 out_err:
 	genlmsg_cancel(skb, hdr);
diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c
index 8c27b6a77ec4..cf222f46eac5 100644
--- a/drivers/scsi/pmcraid.c
+++ b/drivers/scsi/pmcraid.c
@@ -1473,13 +1473,7 @@ static int pmcraid_notify_aen(
 	}
 
 	/* send genetlink multicast message to notify appplications */
-	result = genlmsg_end(skb, msg_header);
-
-	if (result < 0) {
-		pmcraid_err("genlmsg_end failed\n");
-		nlmsg_free(skb);
-		return result;
-	}
+	genlmsg_end(skb, msg_header);
 
 	result = genlmsg_multicast(&pmcraid_event_family, skb,
 				   0, 0, GFP_ATOMIC);
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 1157b559683b..1a1bcf71ec9d 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -784,9 +784,7 @@ static int tcmu_netlink_event(enum tcmu_genl_cmd cmd, const char *name, int mino
 	if (ret < 0)
 		goto free_skb;
 
-	ret = genlmsg_end(skb, msg_header);
-	if (ret < 0)
-		goto free_skb;
+	genlmsg_end(skb, msg_header);
 
 	ret = genlmsg_multicast(&tcmu_genl_family, skb, 0,
 				TCMU_MCGRP_CONFIG, GFP_KERNEL);
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 87e0b0782023..48491d1a81d6 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -1759,11 +1759,7 @@ int thermal_generate_netlink_event(struct thermal_zone_device *tz,
 	thermal_event->event = event;
 
 	/* send multicast genetlink message */
-	result = genlmsg_end(skb, msg_header);
-	if (result < 0) {
-		nlmsg_free(skb);
-		return result;
-	}
+	genlmsg_end(skb, msg_header);
 
 	result = genlmsg_multicast(&thermal_event_genl_family, skb, 0,
 				   0, GFP_ATOMIC);
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
index e7cfbaf8d0e2..1e6e227134d7 100644
--- a/fs/dlm/netlink.c
+++ b/fs/dlm/netlink.c
@@ -56,13 +56,8 @@ static int send_data(struct sk_buff *skb)
 {
 	struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data);
 	void *data = genlmsg_data(genlhdr);
-	int rv;
 
-	rv = genlmsg_end(skb, data);
-	if (rv < 0) {
-		nlmsg_free(skb);
-		return rv;
-	}
+	genlmsg_end(skb, data);
 
 	return genlmsg_unicast(&init_net, skb, listener_nlportid);
 }
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index 84125088c309..f24aa83b80b6 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -245,9 +245,9 @@ static inline void *genlmsg_put_reply(struct sk_buff *skb,
  * @skb: socket buffer the message is stored in
  * @hdr: user specific header
  */
-static inline int genlmsg_end(struct sk_buff *skb, void *hdr)
+static inline void genlmsg_end(struct sk_buff *skb, void *hdr)
 {
-	return nlmsg_end(skb, hdr - GENL_HDRLEN - NLMSG_HDRLEN);
+	nlmsg_end(skb, hdr - GENL_HDRLEN - NLMSG_HDRLEN);
 }
 
 /**
diff --git a/include/net/netlink.h b/include/net/netlink.h
index d5869b90bfbb..e010ee8da41d 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -490,14 +490,10 @@ static inline struct sk_buff *nlmsg_new(size_t payload, gfp_t flags)
  * Corrects the netlink message header to include the appeneded
  * attributes. Only necessary if attributes have been added to
  * the message.
- *
- * Returns the total data length of the skb.
  */
-static inline int nlmsg_end(struct sk_buff *skb, struct nlmsghdr *nlh)
+static inline void nlmsg_end(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	nlh->nlmsg_len = skb_tail_pointer(skb) - (unsigned char *)nlh;
-
-	return skb->len;
 }
 
 /**
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 670fff88a961..21f82c29c914 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -111,13 +111,8 @@ static int send_reply(struct sk_buff *skb, struct genl_info *info)
 {
 	struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb));
 	void *reply = genlmsg_data(genlhdr);
-	int rc;
 
-	rc = genlmsg_end(skb, reply);
-	if (rc < 0) {
-		nlmsg_free(skb);
-		return rc;
-	}
+	genlmsg_end(skb, reply);
 
 	return genlmsg_reply(skb, info);
 }
@@ -134,11 +129,7 @@ static void send_cpu_listeners(struct sk_buff *skb,
 	void *reply = genlmsg_data(genlhdr);
 	int rc, delcount = 0;
 
-	rc = genlmsg_end(skb, reply);
-	if (rc < 0) {
-		nlmsg_free(skb);
-		return;
-	}
+	genlmsg_end(skb, reply);
 
 	rc = 0;
 	down_read(&listeners->sem);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 03667e65cc29..08bf04bdac58 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -633,7 +633,8 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
 	if (fdb->vlan_id && nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->vlan_id))
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index fed61c971b17..409608960899 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -190,7 +190,8 @@ static int nlmsg_populate_mdb_fill(struct sk_buff *skb,
 
 	nla_nest_end(skb, nest2);
 	nla_nest_end(skb, nest);
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 end:
 	nla_nest_end(skb, nest);
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 163950b10d8c..528cf2790a5f 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -263,7 +263,8 @@ static int br_fill_ifinfo(struct sk_buff *skb,
 	}
 
 done:
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/can/gw.c b/net/can/gw.c
index 295f62e62eb3..a6f448e18ea8 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -575,7 +575,8 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type,
 			goto cancel;
 	}
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 cancel:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 185c341fafbd..44706e81b2e0 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -609,7 +609,8 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
 	if (ops->fill(rule, skb, frh) < 0)
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 8d614c93f86a..d36d564f149f 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1884,7 +1884,8 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
 		goto nla_put_failure;
 
 	read_unlock_bh(&tbl->lock);
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	read_unlock_bh(&tbl->lock);
@@ -1917,7 +1918,8 @@ static int neightbl_fill_param_info(struct sk_buff *skb,
 		goto errout;
 
 	read_unlock_bh(&tbl->lock);
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 errout:
 	read_unlock_bh(&tbl->lock);
 	nlmsg_cancel(skb, nlh);
@@ -2202,7 +2204,8 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
 	    nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
@@ -2232,7 +2235,8 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
 	if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index eadc5c0e2dfa..e13b9dbdf154 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1199,7 +1199,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 
 	nla_nest_end(skb, af_spec);
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
@@ -2326,7 +2327,8 @@ static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
 	if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr))
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
@@ -2809,7 +2811,8 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
 
 	nla_nest_end(skb, protinfo);
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
 	return -EMSGSIZE;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 4400da7739da..b2c26b081134 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -702,7 +702,8 @@ static int dn_nl_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa,
 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
 	     nla_put_u32(skb, IFA_FLAGS, ifa_flags))
 		goto nla_put_failure;
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index daccc4a36d80..812e5e6e88fb 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1616,7 +1616,8 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
 	    nla_put_u32(skb, RTA_IIF, rt->fld.flowidn_iif) < 0)
 		goto errout;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 errout:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 3f19fcbf126d..1540b506e3e0 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -367,7 +367,8 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 		nla_nest_end(skb, mp_head);
 	}
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 errout:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index fa1464762d0d..c8133c07ceee 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -63,13 +63,9 @@ int ieee802154_nl_mcast(struct sk_buff *msg, unsigned int group)
 	struct nlmsghdr *nlh = nlmsg_hdr(msg);
 	void *hdr = genlmsg_data(nlmsg_data(nlh));
 
-	if (genlmsg_end(msg, hdr) < 0)
-		goto out;
+	genlmsg_end(msg, hdr);
 
 	return genlmsg_multicast(&nl802154_family, msg, 0, group, GFP_ATOMIC);
-out:
-	nlmsg_free(msg);
-	return -ENOBUFS;
 }
 
 struct sk_buff *ieee802154_nl_new_reply(struct genl_info *info,
@@ -96,13 +92,9 @@ int ieee802154_nl_reply(struct sk_buff *msg, struct genl_info *info)
 	struct nlmsghdr *nlh = nlmsg_hdr(msg);
 	void *hdr = genlmsg_data(nlmsg_data(nlh));
 
-	if (genlmsg_end(msg, hdr) < 0)
-		goto out;
+	genlmsg_end(msg, hdr);
 
 	return genlmsg_reply(msg, info);
-out:
-	nlmsg_free(msg);
-	return -ENOBUFS;
 }
 
 static const struct genl_ops ieee8021154_ops[] = {
diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c
index 3c902e9516fb..9105265920fe 100644
--- a/net/ieee802154/nl-mac.c
+++ b/net/ieee802154/nl-mac.c
@@ -136,7 +136,8 @@ static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid,
 	}
 
 	wpan_phy_put(phy);
-	return genlmsg_end(msg, hdr);
+	genlmsg_end(msg, hdr);
+	return 0;
 
 nla_put_failure:
 	wpan_phy_put(phy);
diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c
index 7baf98b14611..1b9d25f6e898 100644
--- a/net/ieee802154/nl-phy.c
+++ b/net/ieee802154/nl-phy.c
@@ -65,7 +65,8 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid,
 		goto nla_put_failure;
 	mutex_unlock(&phy->pib_lock);
 	kfree(buf);
-	return genlmsg_end(msg, hdr);
+	genlmsg_end(msg, hdr);
+	return 0;
 
 nla_put_failure:
 	mutex_unlock(&phy->pib_lock);
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index a25b9bbd077b..a4daf91b8d0a 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -306,7 +306,8 @@ static int nl802154_send_wpan_phy(struct cfg802154_registered_device *rdev,
 		goto nla_put_failure;
 
 finish:
-	return genlmsg_end(msg, hdr);
+	genlmsg_end(msg, hdr);
+	return 0;
 
 nla_put_failure:
 	genlmsg_cancel(msg, hdr);
@@ -489,7 +490,8 @@ nl802154_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags,
 	if (nla_put_u8(msg, NL802154_ATTR_LBT_MODE, wpan_dev->lbt))
 		goto nla_put_failure;
 
-	return genlmsg_end(msg, hdr);
+	genlmsg_end(msg, hdr);
+	return 0;
 
 nla_put_failure:
 	genlmsg_cancel(msg, hdr);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 214882e7d6de..5f344eb3fc25 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1522,7 +1522,8 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
 			  preferred, valid))
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
@@ -1566,7 +1567,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 				if (inet_fill_ifaddr(skb, ifa,
 					     NETLINK_CB(cb->skb).portid,
 					     cb->nlh->nlmsg_seq,
-					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
+					     RTM_NEWADDR, NLM_F_MULTI) < 0) {
 					rcu_read_unlock();
 					goto done;
 				}
@@ -1749,7 +1750,8 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index d2b7b5521b1b..265cb72b7c1b 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1091,7 +1091,8 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 		nla_nest_end(skb, mp);
 	}
 #endif
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index e34dccbc4d70..81751f12645f 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -203,7 +203,8 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 		icsk->icsk_ca_ops->get_info(sk, ext, skb);
 
 out:
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 errout:
 	nlmsg_cancel(skb, nlh);
@@ -271,7 +272,8 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
 	}
 #endif
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 }
 
 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
@@ -758,7 +760,8 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
 	}
 #endif
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 }
 
 static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index c8034587859d..9d78427652d2 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -2290,7 +2290,8 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 	if (err < 0 && err != -ENOENT)
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ce112d0f2698..f6e43ca5e641 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2390,7 +2390,8 @@ static int rt_fill_info(struct net *net,  __be32 dst, __be32 src,
 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index ed9c9a91851c..e5f41bd5ec1b 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -886,7 +886,8 @@ static int tcp_metrics_dump_info(struct sk_buff *skb,
 	if (tcp_metrics_fill_info(skb, tm) < 0)
 		goto nla_put_failure;
 
-	return genlmsg_end(skb, hdr);
+	genlmsg_end(skb, hdr);
+	return 0;
 
 nla_put_failure:
 	genlmsg_cancel(skb, hdr);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f7c8bbeb27b7..8975d9501d50 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -489,7 +489,8 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH, devconf->proxy_ndp) < 0)
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
@@ -619,7 +620,7 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb,
 						       cb->nlh->nlmsg_seq,
 						       RTM_NEWNETCONF,
 						       NLM_F_MULTI,
-						       -1) <= 0) {
+						       -1) < 0) {
 				rcu_read_unlock();
 				goto done;
 			}
@@ -635,7 +636,7 @@ cont:
 					       NETLINK_CB(cb->skb).portid,
 					       cb->nlh->nlmsg_seq,
 					       RTM_NEWNETCONF, NLM_F_MULTI,
-					       -1) <= 0)
+					       -1) < 0)
 			goto done;
 		else
 			h++;
@@ -646,7 +647,7 @@ cont:
 					       NETLINK_CB(cb->skb).portid,
 					       cb->nlh->nlmsg_seq,
 					       RTM_NEWNETCONF, NLM_F_MULTI,
-					       -1) <= 0)
+					       -1) < 0)
 			goto done;
 		else
 			h++;
@@ -4047,7 +4048,8 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
 	if (nla_put_u32(skb, IFA_FLAGS, ifa->flags) < 0)
 		goto error;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 error:
 	nlmsg_cancel(skb, nlh);
@@ -4076,7 +4078,8 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
 		return -EMSGSIZE;
 	}
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 }
 
 static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
@@ -4101,7 +4104,8 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
 		return -EMSGSIZE;
 	}
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 }
 
 enum addr_type_t {
@@ -4134,7 +4138,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
 						cb->nlh->nlmsg_seq,
 						RTM_NEWADDR,
 						NLM_F_MULTI);
-			if (err <= 0)
+			if (err < 0)
 				break;
 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
 		}
@@ -4151,7 +4155,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
 						  cb->nlh->nlmsg_seq,
 						  RTM_GETMULTICAST,
 						  NLM_F_MULTI);
-			if (err <= 0)
+			if (err < 0)
 				break;
 		}
 		break;
@@ -4166,7 +4170,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
 						  cb->nlh->nlmsg_seq,
 						  RTM_GETANYCAST,
 						  NLM_F_MULTI);
-			if (err <= 0)
+			if (err < 0)
 				break;
 		}
 		break;
@@ -4638,7 +4642,8 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
 		goto nla_put_failure;
 
 	nla_nest_end(skb, protoinfo);
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
@@ -4670,7 +4675,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 			if (inet6_fill_ifinfo(skb, idev,
 					      NETLINK_CB(cb->skb).portid,
 					      cb->nlh->nlmsg_seq,
-					      RTM_NEWLINK, NLM_F_MULTI) <= 0)
+					      RTM_NEWLINK, NLM_F_MULTI) < 0)
 				goto out;
 cont:
 			idx++;
@@ -4747,7 +4752,8 @@ static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
 	ci.valid_time = ntohl(pinfo->valid);
 	if (nla_put(skb, PREFIX_CACHEINFO, sizeof(ci), &ci))
 		goto nla_put_failure;
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index fd0dc47f471d..e43e79d0a612 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -490,7 +490,8 @@ static int ip6addrlbl_fill(struct sk_buff *skb,
 		return -EMSGSIZE;
 	}
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 }
 
 static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
@@ -510,7 +511,7 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
 					      cb->nlh->nlmsg_seq,
 					      RTM_NEWADDRLABEL,
 					      NLM_F_MULTI);
-			if (err <= 0)
+			if (err < 0)
 				break;
 		}
 		idx++;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 03c520a4ebeb..53775ee7d376 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -277,7 +277,6 @@ static int fib6_dump_node(struct fib6_walker *w)
 			w->leaf = rt;
 			return 1;
 		}
-		WARN_ON(res == 0);
 	}
 	w->leaf = NULL;
 	return 0;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 722669754bbf..34b682617f50 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -2388,7 +2388,8 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
 	if (err < 0 && err != -ENOENT)
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 34dcbb59df75..c60f15775c53 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2725,7 +2725,8 @@ static int rt6_fill_node(struct net *net,
 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 6b16598f31d5..b4e923f77954 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -390,7 +390,8 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
 	}
 
 out:
-	return genlmsg_end(skb, hdr);
+	genlmsg_end(skb, hdr);
+	return 0;
 
 nla_put_failure:
 	genlmsg_cancel(skb, hdr);
@@ -451,7 +452,7 @@ static int l2tp_nl_cmd_tunnel_dump(struct sk_buff *skb, struct netlink_callback
 
 		if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).portid,
 					cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					tunnel, L2TP_CMD_TUNNEL_GET) <= 0)
+					tunnel, L2TP_CMD_TUNNEL_GET) < 0)
 			goto out;
 
 		ti++;
@@ -752,7 +753,8 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl
 		goto nla_put_failure;
 	nla_nest_end(skb, nest);
 
-	return genlmsg_end(skb, hdr);
+	genlmsg_end(skb, hdr);
+	return 0;
 
  nla_put_failure:
 	genlmsg_cancel(skb, hdr);
@@ -816,7 +818,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback
 
 		if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).portid,
 					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					 session, L2TP_CMD_SESSION_GET) <= 0)
+					 session, L2TP_CMD_SESSION_GET) < 0)
 			break;
 
 		si++;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index b8295a430a56..e55759056361 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2887,7 +2887,8 @@ static int ip_vs_genl_dump_service(struct sk_buff *skb,
 	if (ip_vs_genl_fill_service(skb, svc) < 0)
 		goto nla_put_failure;
 
-	return genlmsg_end(skb, hdr);
+	genlmsg_end(skb, hdr);
+	return 0;
 
 nla_put_failure:
 	genlmsg_cancel(skb, hdr);
@@ -3079,7 +3080,8 @@ static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
 	if (ip_vs_genl_fill_dest(skb, dest) < 0)
 		goto nla_put_failure;
 
-	return genlmsg_end(skb, hdr);
+	genlmsg_end(skb, hdr);
+	return 0;
 
 nla_put_failure:
 	genlmsg_cancel(skb, hdr);
@@ -3215,7 +3217,8 @@ static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state,
 	if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
 		goto nla_put_failure;
 
-	return genlmsg_end(skb, hdr);
+	genlmsg_end(skb, hdr);
+	return 0;
 
 nla_put_failure:
 	genlmsg_cancel(skb, hdr);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 3b3ddb4fb9ee..70f697827b9b 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -427,7 +427,8 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
 	    nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)))
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_trim(skb, nlh);
@@ -971,7 +972,8 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
 	if (nla_put_be32(skb, NFTA_CHAIN_USE, htonl(chain->use)))
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_trim(skb, nlh);
@@ -1707,7 +1709,8 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
 	    nla_put(skb, NFTA_RULE_USERDATA, rule->ulen, nft_userdata(rule)))
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_trim(skb, nlh);
@@ -2361,7 +2364,8 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
 		goto nla_put_failure;
 	nla_nest_end(skb, desc);
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_trim(skb, nlh);
@@ -3035,7 +3039,8 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb,
 
 	nla_nest_end(skb, nest);
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_trim(skb, nlh);
@@ -3324,7 +3329,8 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
 	if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)))
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_trim(skb, nlh);
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index c2f2a53a4879..179625353cac 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -641,7 +641,8 @@ static int netlbl_cipsov4_listall_cb(struct cipso_v4_doi *doi_def, void *arg)
 	if (ret_val != 0)
 		goto listall_cb_failure;
 
-	return genlmsg_end(cb_arg->skb, data);
+	genlmsg_end(cb_arg->skb, data);
+	return 0;
 
 listall_cb_failure:
 	genlmsg_cancel(cb_arg->skb, data);
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index e66e977ef2fa..8b3b789c43c2 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -456,7 +456,8 @@ static int netlbl_mgmt_listall_cb(struct netlbl_dom_map *entry, void *arg)
 		goto listall_cb_failure;
 
 	cb_arg->seq++;
-	return genlmsg_end(cb_arg->skb, data);
+	genlmsg_end(cb_arg->skb, data);
+	return 0;
 
 listall_cb_failure:
 	genlmsg_cancel(cb_arg->skb, data);
@@ -620,7 +621,8 @@ static int netlbl_mgmt_protocols_cb(struct sk_buff *skb,
 	if (ret_val != 0)
 		goto protocols_cb_failure;
 
-	return genlmsg_end(skb, data);
+	genlmsg_end(skb, data);
+	return 0;
 
 protocols_cb_failure:
 	genlmsg_cancel(skb, data);
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 78a63c18779e..aec7994f78cf 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -1163,7 +1163,8 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd,
 		goto list_cb_failure;
 
 	cb_arg->seq++;
-	return genlmsg_end(cb_arg->skb, data);
+	genlmsg_end(cb_arg->skb, data);
+	return 0;
 
 list_cb_failure:
 	genlmsg_cancel(cb_arg->skb, data);
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index bb59a7ed0859..3ee63a3cff30 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -91,7 +91,8 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 	    sk_diag_put_rings_cfg(sk, skb))
 		goto out_nlmsg_trim;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 out_nlmsg_trim:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 2e11061ef885..f52a7d5734cd 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -756,7 +756,8 @@ static int ctrl_fill_info(struct genl_family *family, u32 portid, u32 seq,
 		nla_nest_end(skb, nla_grps);
 	}
 
-	return genlmsg_end(skb, hdr);
+	genlmsg_end(skb, hdr);
+	return 0;
 
 nla_put_failure:
 	genlmsg_cancel(skb, hdr);
@@ -796,7 +797,8 @@ static int ctrl_fill_mcgrp_info(struct genl_family *family,
 	nla_nest_end(skb, nest);
 	nla_nest_end(skb, nla_grps);
 
-	return genlmsg_end(skb, hdr);
+	genlmsg_end(skb, hdr);
+	return 0;
 
 nla_put_failure:
 	genlmsg_cancel(skb, hdr);
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 44989fc8cddf..be387e6219a0 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -102,7 +102,8 @@ static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target,
 			goto nla_put_failure;
 	}
 
-	return genlmsg_end(msg, hdr);
+	genlmsg_end(msg, hdr);
+	return 0;
 
 nla_put_failure:
 	genlmsg_cancel(msg, hdr);
@@ -518,7 +519,8 @@ static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev,
 	    nla_put_u8(msg, NFC_ATTR_RF_MODE, dev->rf_mode))
 		goto nla_put_failure;
 
-	return genlmsg_end(msg, hdr);
+	genlmsg_end(msg, hdr);
+	return 0;
 
 nla_put_failure:
 	genlmsg_cancel(msg, hdr);
@@ -908,7 +910,8 @@ static int nfc_genl_send_params(struct sk_buff *msg,
 	    nla_put_u16(msg, NFC_ATTR_LLC_PARAM_MIUX, be16_to_cpu(local->miux)))
 		goto nla_put_failure;
 
-	return genlmsg_end(msg, hdr);
+	genlmsg_end(msg, hdr);
+	return 0;
 
 nla_put_failure:
 
@@ -1247,8 +1250,7 @@ static int nfc_genl_send_se(struct sk_buff *msg, struct nfc_dev *dev,
 		    nla_put_u8(msg, NFC_ATTR_SE_TYPE, se->type))
 			goto nla_put_failure;
 
-		if (genlmsg_end(msg, hdr) < 0)
-			goto nla_put_failure;
+		genlmsg_end(msg, hdr);
 	}
 
 	return 0;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 8bda3cc12344..f45f1bf4422c 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -799,7 +799,8 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
 	if (err)
 		goto error;
 
-	return genlmsg_end(skb, ovs_header);
+	genlmsg_end(skb, ovs_header);
+	return 0;
 
 error:
 	genlmsg_cancel(skb, ovs_header);
@@ -1349,7 +1350,8 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
 	if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
 		goto nla_put_failure;
 
-	return genlmsg_end(skb, ovs_header);
+	genlmsg_end(skb, ovs_header);
+	return 0;
 
 nla_put_failure:
 	genlmsg_cancel(skb, ovs_header);
@@ -1723,7 +1725,8 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
 	if (err == -EMSGSIZE)
 		goto error;
 
-	return genlmsg_end(skb, ovs_header);
+	genlmsg_end(skb, ovs_header);
+	return 0;
 
 nla_put_failure:
 	err = -EMSGSIZE;
diff --git a/net/packet/diag.c b/net/packet/diag.c
index 92f2c7107eec..0ed68f0238bf 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -177,7 +177,8 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 				     PACKET_DIAG_FILTER))
 		goto out_nlmsg_trim;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 out_nlmsg_trim:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index b64151ade6b3..54d766842c2b 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -121,7 +121,8 @@ static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr,
 	ifm->ifa_index = dev->ifindex;
 	if (nla_put_u8(skb, IFA_LOCAL, addr))
 		goto nla_put_failure;
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
@@ -190,7 +191,8 @@ static int fill_route(struct sk_buff *skb, struct net_device *dev, u8 dst,
 	if (nla_put_u8(skb, RTA_DST, dst) ||
 	    nla_put_u32(skb, RTA_OIF, dev->ifindex))
 		goto nla_put_failure;
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
@@ -282,9 +284,13 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
 
 		if (addr_idx++ < addr_start_idx)
 			continue;
-		if (fill_route(skb, dev, addr << 2, NETLINK_CB(cb->skb).portid,
-				cb->nlh->nlmsg_seq, RTM_NEWROUTE))
-			goto out;
+		fill_route(skb, dev, addr << 2, NETLINK_CB(cb->skb).portid,
+			   cb->nlh->nlmsg_seq, RTM_NEWROUTE);
+		/* fill_route() used to return > 0 (or negative errors) but
+		 * never 0 - ignore the return value and just go out to
+		 * call dumpit again from outside to preserve the behavior
+		 */
+		goto out;
 	}
 
 out:
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 86fa0f3b2caf..ef542fbca9fe 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -155,7 +155,8 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
 	if (nla_put_u8(skb, UNIX_DIAG_SHUTDOWN, sk->sk_shutdown))
 		goto out_nlmsg_trim;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 out_nlmsg_trim:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 380784378df8..4ed9039bd5f9 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1721,7 +1721,8 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
 		break;
 	}
  finish:
-	return genlmsg_end(msg, hdr);
+	genlmsg_end(msg, hdr);
+	return 0;
 
  nla_put_failure:
 	genlmsg_cancel(msg, hdr);
@@ -2404,7 +2405,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
 			goto nla_put_failure;
 	}
 
-	return genlmsg_end(msg, hdr);
+	genlmsg_end(msg, hdr);
+	return 0;
 
  nla_put_failure:
 	genlmsg_cancel(msg, hdr);
@@ -3825,7 +3827,8 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
 		    sinfo->assoc_req_ies))
 		goto nla_put_failure;
 
-	return genlmsg_end(msg, hdr);
+	genlmsg_end(msg, hdr);
+	return 0;
 
  nla_put_failure:
 	genlmsg_cancel(msg, hdr);
@@ -4555,7 +4558,8 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 portid, u32 seq,
 
 	nla_nest_end(msg, pinfoattr);
 
-	return genlmsg_end(msg, hdr);
+	genlmsg_end(msg, hdr);
+	return 0;
 
  nla_put_failure:
 	genlmsg_cancel(msg, hdr);
@@ -5507,7 +5511,8 @@ static int nl80211_send_regdom(struct sk_buff *msg, struct netlink_callback *cb,
 	    nla_put_flag(msg, NL80211_ATTR_WIPHY_SELF_MANAGED_REG))
 		goto nla_put_failure;
 
-	return genlmsg_end(msg, hdr);
+	genlmsg_end(msg, hdr);
+	return 0;
 
 nla_put_failure:
 	genlmsg_cancel(msg, hdr);
@@ -6577,7 +6582,8 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
 
 	nla_nest_end(msg, bss);
 
-	return genlmsg_end(msg, hdr);
+	genlmsg_end(msg, hdr);
+	return 0;
 
  fail_unlock_rcu:
 	rcu_read_unlock();
@@ -6686,7 +6692,8 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq,
 
 	nla_nest_end(msg, infoattr);
 
-	return genlmsg_end(msg, hdr);
+	genlmsg_end(msg, hdr);
+	return 0;
 
  nla_put_failure:
 	genlmsg_cancel(msg, hdr);
@@ -11025,7 +11032,8 @@ static int nl80211_send_scan_msg(struct sk_buff *msg,
 	/* ignore errors and send incomplete event anyway */
 	nl80211_add_scan_req(msg, rdev);
 
-	return genlmsg_end(msg, hdr);
+	genlmsg_end(msg, hdr);
+	return 0;
 
  nla_put_failure:
 	genlmsg_cancel(msg, hdr);
@@ -11048,7 +11056,8 @@ nl80211_send_sched_scan_msg(struct sk_buff *msg,
 	    nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex))
 		goto nla_put_failure;
 
-	return genlmsg_end(msg, hdr);
+	genlmsg_end(msg, hdr);
+	return 0;
 
  nla_put_failure:
 	genlmsg_cancel(msg, hdr);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 8128594ab379..7de2ed9ec46d 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1019,7 +1019,8 @@ static int build_spdinfo(struct sk_buff *skb, struct net *net,
 		return err;
 	}
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 }
 
 static int xfrm_set_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -1121,7 +1122,8 @@ static int build_sadinfo(struct sk_buff *skb, struct net *net,
 		return err;
 	}
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 }
 
 static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -1842,7 +1844,8 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct
 	if (err)
 		goto out_cancel;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 out_cancel:
 	nlmsg_cancel(skb, nlh);
@@ -2282,7 +2285,8 @@ static int build_migrate(struct sk_buff *skb, const struct xfrm_migrate *m,
 			goto out_cancel;
 	}
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 out_cancel:
 	nlmsg_cancel(skb, nlh);
@@ -2490,7 +2494,8 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct
 	if (err)
 		return err;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 }
 
 static int xfrm_exp_state_notify(struct xfrm_state *x, const struct km_event *c)
@@ -2712,7 +2717,8 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
 		return err;
 	}
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 }
 
 static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
@@ -2827,7 +2833,8 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
 	}
 	upe->hard = !!hard;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 }
 
 static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)
@@ -2986,7 +2993,8 @@ static int build_report(struct sk_buff *skb, u8 proto,
 			return err;
 		}
 	}
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 }
 
 static int xfrm_send_report(struct net *net, u8 proto,
@@ -3031,7 +3039,8 @@ static int build_mapping(struct sk_buff *skb, struct xfrm_state *x,
 	um->old_sport = x->encap->encap_sport;
 	um->reqid = x->props.reqid;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 }
 
 static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
-- 
cgit v1.2.3


From 7b46a644a407f9f2f1c8f7b2af157c79af55b49e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 18 Jan 2015 23:36:08 -0500
Subject: netlink: Fix bugs in nlmsg_end() conversions.

Commit 053c095a82cf ("netlink: make nlmsg_end() and genlmsg_end()
void") didn't catch all of the cases where callers were breaking out
on the return value being equal to zero, which they no longer should
when zero means success.

Fix all such cases.

Reported-by: Marcel Holtmann <marcel@holtmann.org>
Reported-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c  | 8 ++++----
 net/core/rtnetlink.c  | 2 +-
 net/decnet/dn_route.c | 5 +----
 net/ipv4/devinet.c    | 6 +++---
 net/ipv4/route.c      | 2 +-
 net/ipv6/addrconf.c   | 2 +-
 6 files changed, 11 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index d36d564f149f..70fe9e10ac86 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2128,7 +2128,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
 
 		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
 				       cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
-				       NLM_F_MULTI) <= 0)
+				       NLM_F_MULTI) < 0)
 			break;
 
 		nidx = 0;
@@ -2144,7 +2144,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
 						     NETLINK_CB(cb->skb).portid,
 						     cb->nlh->nlmsg_seq,
 						     RTM_NEWNEIGHTBL,
-						     NLM_F_MULTI) <= 0)
+						     NLM_F_MULTI) < 0)
 				goto out;
 		next:
 			nidx++;
@@ -2274,7 +2274,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
 			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
 					    cb->nlh->nlmsg_seq,
 					    RTM_NEWNEIGH,
-					    NLM_F_MULTI) <= 0) {
+					    NLM_F_MULTI) < 0) {
 				rc = -1;
 				goto out;
 			}
@@ -2311,7 +2311,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
 			if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
 					    cb->nlh->nlmsg_seq,
 					    RTM_NEWNEIGH,
-					    NLM_F_MULTI, tbl) <= 0) {
+					    NLM_F_MULTI, tbl) < 0) {
 				read_unlock_bh(&tbl->lock);
 				rc = -1;
 				goto out;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index e13b9dbdf154..0e26b9f66cad 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1327,7 +1327,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 			 */
 			WARN_ON((err == -EMSGSIZE) && (skb->len == 0));
 
-			if (err <= 0)
+			if (err < 0)
 				goto out;
 
 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 812e5e6e88fb..1d7c1256e845 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1710,9 +1710,6 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
 		rt->rt_flags |= RTCF_NOTIFY;
 
 	err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0);
-
-	if (err == 0)
-		goto out_free;
 	if (err < 0) {
 		err = -EMSGSIZE;
 		goto out_free;
@@ -1763,7 +1760,7 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
 			skb_dst_set(skb, dst_clone(&rt->dst));
 			if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).portid,
 					cb->nlh->nlmsg_seq, RTM_NEWROUTE,
-					1, NLM_F_MULTI) <= 0) {
+					1, NLM_F_MULTI) < 0) {
 				skb_dst_drop(skb);
 				rcu_read_unlock_bh();
 				goto done;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 5f344eb3fc25..59ebe16d06fc 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1883,7 +1883,7 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb,
 						      cb->nlh->nlmsg_seq,
 						      RTM_NEWNETCONF,
 						      NLM_F_MULTI,
-						      -1) <= 0) {
+						      -1) < 0) {
 				rcu_read_unlock();
 				goto done;
 			}
@@ -1899,7 +1899,7 @@ cont:
 					      NETLINK_CB(cb->skb).portid,
 					      cb->nlh->nlmsg_seq,
 					      RTM_NEWNETCONF, NLM_F_MULTI,
-					      -1) <= 0)
+					      -1) < 0)
 			goto done;
 		else
 			h++;
@@ -1910,7 +1910,7 @@ cont:
 					      NETLINK_CB(cb->skb).portid,
 					      cb->nlh->nlmsg_seq,
 					      RTM_NEWNETCONF, NLM_F_MULTI,
-					      -1) <= 0)
+					      -1) < 0)
 			goto done;
 		else
 			h++;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f6e43ca5e641..2000110c75f0 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2483,7 +2483,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
 	err = rt_fill_info(net, dst, src, &fl4, skb,
 			   NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
 			   RTM_NEWROUTE, 0, 0);
-	if (err <= 0)
+	if (err < 0)
 		goto errout_free;
 
 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 8975d9501d50..d6b4f5d08014 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4213,7 +4213,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
 				goto cont;
 
 			if (in6_dump_addrs(idev, skb, cb, type,
-					   s_ip_idx, &ip_idx) <= 0)
+					   s_ip_idx, &ip_idx) < 0)
 				goto done;
 cont:
 			idx++;
-- 
cgit v1.2.3


From 4de8b413700e78560388eb14c4bbc67aff62da6d Mon Sep 17 00:00:00 2001
From: "Rosen, Rami" <rami.rosen@intel.com>
Date: Mon, 19 Jan 2015 11:45:04 +0200
Subject: bridge: remove oflags from setlink/dellink.

Commit 02dba4388d16 ("bridge: fix setlink/dellink notifications") removed usage of oflags in
both rtnl_bridge_setlink() and rtnl_bridge_dellink() methods. This patch removes this variable as it is no
longer needed.

Signed-off-by: Rami Rosen <rami.rosen@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 0e26b9f66cad..47b39f3e867c 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2918,7 +2918,7 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
 	struct net_device *dev;
 	struct nlattr *br_spec, *attr = NULL;
 	int rem, err = -EOPNOTSUPP;
-	u16 oflags, flags = 0;
+	u16 flags = 0;
 	bool have_flags = false;
 
 	if (nlmsg_len(nlh) < sizeof(*ifm))
@@ -2948,8 +2948,6 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
 		}
 	}
 
-	oflags = flags;
-
 	if (!flags || (flags & BRIDGE_FLAGS_MASTER)) {
 		struct net_device *br_dev = netdev_master_upper_dev_get(dev);
 
@@ -2993,7 +2991,7 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
 	struct net_device *dev;
 	struct nlattr *br_spec, *attr = NULL;
 	int rem, err = -EOPNOTSUPP;
-	u16 oflags, flags = 0;
+	u16 flags = 0;
 	bool have_flags = false;
 
 	if (nlmsg_len(nlh) < sizeof(*ifm))
@@ -3023,8 +3021,6 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
 		}
 	}
 
-	oflags = flags;
-
 	if (!flags || (flags & BRIDGE_FLAGS_MASTER)) {
 		struct net_device *br_dev = netdev_master_upper_dev_get(dev);
 
-- 
cgit v1.2.3


From 75e8d06d4308436055d1a78a2c02bf6328ba724d Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 14 Jan 2015 15:33:57 +0100
Subject: netfilter: nf_tables: validate hooks in NAT expressions

The user can crash the kernel if it uses any of the existing NAT
expressions from the wrong hook, so add some code to validate this
when loading the rule.

This patch introduces nft_chain_validate_hooks() which is based on
an existing function in the bridge version of the reject expression.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h        |  2 ++
 net/bridge/netfilter/nft_reject_bridge.c | 29 +++++------------------
 net/netfilter/nf_tables_api.c            | 18 ++++++++++++++
 net/netfilter/nft_masq.c                 | 26 ++++++++++++++-------
 net/netfilter/nft_nat.c                  | 40 ++++++++++++++++++++++++--------
 net/netfilter/nft_redir.c                | 25 +++++++++++++-------
 6 files changed, 90 insertions(+), 50 deletions(-)

(limited to 'net')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 3ae969e3acf0..9eaaa7884586 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -530,6 +530,8 @@ enum nft_chain_type {
 
 int nft_chain_validate_dependency(const struct nft_chain *chain,
 				  enum nft_chain_type type);
+int nft_chain_validate_hooks(const struct nft_chain *chain,
+                             unsigned int hook_flags);
 
 struct nft_stats {
 	u64			bytes;
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index b0330aecbf97..3244aead0926 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -265,22 +265,12 @@ out:
 	data[NFT_REG_VERDICT].verdict = NF_DROP;
 }
 
-static int nft_reject_bridge_validate_hooks(const struct nft_chain *chain)
+static int nft_reject_bridge_validate(const struct nft_ctx *ctx,
+				      const struct nft_expr *expr,
+				      const struct nft_data **data)
 {
-	struct nft_base_chain *basechain;
-
-	if (chain->flags & NFT_BASE_CHAIN) {
-		basechain = nft_base_chain(chain);
-
-		switch (basechain->ops[0].hooknum) {
-		case NF_BR_PRE_ROUTING:
-		case NF_BR_LOCAL_IN:
-			break;
-		default:
-			return -EOPNOTSUPP;
-		}
-	}
-	return 0;
+	return nft_chain_validate_hooks(ctx->chain, (1 << NF_BR_PRE_ROUTING) |
+						    (1 << NF_BR_LOCAL_IN));
 }
 
 static int nft_reject_bridge_init(const struct nft_ctx *ctx,
@@ -290,7 +280,7 @@ static int nft_reject_bridge_init(const struct nft_ctx *ctx,
 	struct nft_reject *priv = nft_expr_priv(expr);
 	int icmp_code, err;
 
-	err = nft_reject_bridge_validate_hooks(ctx->chain);
+	err = nft_reject_bridge_validate(ctx, expr, NULL);
 	if (err < 0)
 		return err;
 
@@ -341,13 +331,6 @@ nla_put_failure:
 	return -1;
 }
 
-static int nft_reject_bridge_validate(const struct nft_ctx *ctx,
-				      const struct nft_expr *expr,
-				      const struct nft_data **data)
-{
-	return nft_reject_bridge_validate_hooks(ctx->chain);
-}
-
 static struct nft_expr_type nft_reject_bridge_type;
 static const struct nft_expr_ops nft_reject_bridge_ops = {
 	.type		= &nft_reject_bridge_type,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 3b3ddb4fb9ee..7e686948ddca 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3753,6 +3753,24 @@ int nft_chain_validate_dependency(const struct nft_chain *chain,
 }
 EXPORT_SYMBOL_GPL(nft_chain_validate_dependency);
 
+int nft_chain_validate_hooks(const struct nft_chain *chain,
+			     unsigned int hook_flags)
+{
+	struct nft_base_chain *basechain;
+
+	if (chain->flags & NFT_BASE_CHAIN) {
+		basechain = nft_base_chain(chain);
+
+		if ((1 << basechain->ops[0].hooknum) & hook_flags)
+			return 0;
+
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_chain_validate_hooks);
+
 /*
  * Loop detection - walk through the ruleset beginning at the destination chain
  * of a new jump until either the source chain is reached (loop) or all
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index d1ffd5eb3a9b..9aea747b43ea 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -21,6 +21,21 @@ const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = {
 };
 EXPORT_SYMBOL_GPL(nft_masq_policy);
 
+int nft_masq_validate(const struct nft_ctx *ctx,
+		      const struct nft_expr *expr,
+		      const struct nft_data **data)
+{
+	int err;
+
+	err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
+	if (err < 0)
+		return err;
+
+	return nft_chain_validate_hooks(ctx->chain,
+				        (1 << NF_INET_POST_ROUTING));
+}
+EXPORT_SYMBOL_GPL(nft_masq_validate);
+
 int nft_masq_init(const struct nft_ctx *ctx,
 		  const struct nft_expr *expr,
 		  const struct nlattr * const tb[])
@@ -28,8 +43,8 @@ int nft_masq_init(const struct nft_ctx *ctx,
 	struct nft_masq *priv = nft_expr_priv(expr);
 	int err;
 
-	err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
-	if (err < 0)
+	err = nft_masq_validate(ctx, expr, NULL);
+	if (err)
 		return err;
 
 	if (tb[NFTA_MASQ_FLAGS] == NULL)
@@ -60,12 +75,5 @@ nla_put_failure:
 }
 EXPORT_SYMBOL_GPL(nft_masq_dump);
 
-int nft_masq_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
-		      const struct nft_data **data)
-{
-	return nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
-}
-EXPORT_SYMBOL_GPL(nft_masq_validate);
-
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index aff54fb1c8a0..a0837c6c9283 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -88,17 +88,40 @@ static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = {
 	[NFTA_NAT_FLAGS]	 = { .type = NLA_U32 },
 };
 
-static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
-			const struct nlattr * const tb[])
+static int nft_nat_validate(const struct nft_ctx *ctx,
+			    const struct nft_expr *expr,
+			    const struct nft_data **data)
 {
 	struct nft_nat *priv = nft_expr_priv(expr);
-	u32 family;
 	int err;
 
 	err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
 	if (err < 0)
 		return err;
 
+	switch (priv->type) {
+	case NFT_NAT_SNAT:
+		err = nft_chain_validate_hooks(ctx->chain,
+					       (1 << NF_INET_POST_ROUTING) |
+					       (1 << NF_INET_LOCAL_IN));
+		break;
+	case NFT_NAT_DNAT:
+		err = nft_chain_validate_hooks(ctx->chain,
+					       (1 << NF_INET_PRE_ROUTING) |
+					       (1 << NF_INET_LOCAL_OUT));
+		break;
+	}
+
+	return err;
+}
+
+static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+			const struct nlattr * const tb[])
+{
+	struct nft_nat *priv = nft_expr_priv(expr);
+	u32 family;
+	int err;
+
 	if (tb[NFTA_NAT_TYPE] == NULL ||
 	    (tb[NFTA_NAT_REG_ADDR_MIN] == NULL &&
 	     tb[NFTA_NAT_REG_PROTO_MIN] == NULL))
@@ -115,6 +138,10 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 		return -EINVAL;
 	}
 
+	err = nft_nat_validate(ctx, expr, NULL);
+	if (err < 0)
+		return err;
+
 	if (tb[NFTA_NAT_FAMILY] == NULL)
 		return -EINVAL;
 
@@ -219,13 +246,6 @@ nla_put_failure:
 	return -1;
 }
 
-static int nft_nat_validate(const struct nft_ctx *ctx,
-			    const struct nft_expr *expr,
-			    const struct nft_data **data)
-{
-	return nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
-}
-
 static struct nft_expr_type nft_nat_type;
 static const struct nft_expr_ops nft_nat_ops = {
 	.type           = &nft_nat_type,
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index 9e8093f28311..d7e9e93a4e90 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -23,6 +23,22 @@ const struct nla_policy nft_redir_policy[NFTA_REDIR_MAX + 1] = {
 };
 EXPORT_SYMBOL_GPL(nft_redir_policy);
 
+int nft_redir_validate(const struct nft_ctx *ctx,
+		       const struct nft_expr *expr,
+		       const struct nft_data **data)
+{
+	int err;
+
+	err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
+	if (err < 0)
+		return err;
+
+	return nft_chain_validate_hooks(ctx->chain,
+					(1 << NF_INET_PRE_ROUTING) |
+					(1 << NF_INET_LOCAL_OUT));
+}
+EXPORT_SYMBOL_GPL(nft_redir_validate);
+
 int nft_redir_init(const struct nft_ctx *ctx,
 		   const struct nft_expr *expr,
 		   const struct nlattr * const tb[])
@@ -30,7 +46,7 @@ int nft_redir_init(const struct nft_ctx *ctx,
 	struct nft_redir *priv = nft_expr_priv(expr);
 	int err;
 
-	err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
+	err = nft_redir_validate(ctx, expr, NULL);
 	if (err < 0)
 		return err;
 
@@ -88,12 +104,5 @@ nla_put_failure:
 }
 EXPORT_SYMBOL_GPL(nft_redir_dump);
 
-int nft_redir_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
-		       const struct nft_data **data)
-{
-	return nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
-}
-EXPORT_SYMBOL_GPL(nft_redir_validate);
-
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
-- 
cgit v1.2.3


From 6e9f3fa4f0401ad3a8fe4e5b1282d35539b58a0f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 19 Jan 2015 18:49:50 +0100
Subject: Revert "wireless: Support of IFLA_INFO_KIND rtnl attribute"

This reverts commit ba1debdfed974f25aa598c283567878657b292ee.

Oliver reported that it breaks network-manager, for some reason with
this patch NM decides that the device isn't wireless but "generic"
(ethernet), sees no carrier (as expected with wifi) and fails to do
anything else with it.

Revert this to unbreak userspace.

Reported-by: Oliver Hartkopp <socketcan@hartkopp.net>
Tested-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/core.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'net')

diff --git a/net/wireless/core.c b/net/wireless/core.c
index 456e4c38c279..3af0ecf1cc16 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -21,7 +21,6 @@
 #include <linux/sched.h>
 #include <net/genetlink.h>
 #include <net/cfg80211.h>
-#include <net/rtnetlink.h>
 #include "nl80211.h"
 #include "core.h"
 #include "sysfs.h"
@@ -964,10 +963,6 @@ void cfg80211_stop_iface(struct wiphy *wiphy, struct wireless_dev *wdev,
 }
 EXPORT_SYMBOL(cfg80211_stop_iface);
 
-static const struct rtnl_link_ops wireless_link_ops = {
-	.kind = "wlan",
-};
-
 static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
 					 unsigned long state, void *ptr)
 {
@@ -986,7 +981,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
 	switch (state) {
 	case NETDEV_POST_INIT:
 		SET_NETDEV_DEVTYPE(dev, &wiphy_type);
-		dev->rtnl_link_ops = &wireless_link_ops;
 		break;
 	case NETDEV_REGISTER:
 		/*
-- 
cgit v1.2.3


From c1e140bf79d817d4a7aa9932eb98b0359c87af33 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Mon, 19 Jan 2015 16:53:06 +0200
Subject: mac80211: delete the assoc/auth timer upon suspend

While suspending, we destroy the authentication /
association that might be taking place. While doing so, we
forgot to delete the timer which can be firing after
local->suspended is already set, producing the warning below.

Fix that by deleting the timer.

[66722.825487] WARNING: CPU: 2 PID: 5612 at net/mac80211/util.c:755 ieee80211_can_queue_work.isra.18+0x32/0x40 [mac80211]()
[66722.825487] queueing ieee80211 work while going to suspend
[66722.825529] CPU: 2 PID: 5612 Comm: kworker/u16:69 Tainted: G        W  O  3.16.1+ #24
[66722.825537] Workqueue: events_unbound async_run_entry_fn
[66722.825545] Call Trace:
[66722.825552]  <IRQ>  [<ffffffff817edbb2>] dump_stack+0x4d/0x66
[66722.825556]  [<ffffffff81075cad>] warn_slowpath_common+0x7d/0xa0
[66722.825572]  [<ffffffffa06b5b90>] ? ieee80211_sta_bcn_mon_timer+0x50/0x50 [mac80211]
[66722.825573]  [<ffffffff81075d1c>] warn_slowpath_fmt+0x4c/0x50
[66722.825586]  [<ffffffffa06977a2>] ieee80211_can_queue_work.isra.18+0x32/0x40 [mac80211]
[66722.825598]  [<ffffffffa06977d5>] ieee80211_queue_work+0x25/0x50 [mac80211]
[66722.825611]  [<ffffffffa06b5bac>] ieee80211_sta_timer+0x1c/0x20 [mac80211]
[66722.825614]  [<ffffffff8108655a>] call_timer_fn+0x8a/0x300

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index c0711082a2b8..1875181ebd94 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2453,6 +2453,12 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata,
 	sdata_assert_lock(sdata);
 
 	if (!assoc) {
+		/*
+		 * we are not authenticated yet, the only timer that could be
+		 * running is the timeout for the authentication response which
+		 * which is not relevant anymore.
+		 */
+		del_timer_sync(&sdata->u.mgd.timer);
 		sta_info_destroy_addr(sdata, auth_data->bss->bssid);
 
 		memset(sdata->u.mgd.bssid, 0, ETH_ALEN);
@@ -2760,6 +2766,12 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata,
 	sdata_assert_lock(sdata);
 
 	if (!assoc) {
+		/*
+		 * we are not associated yet, the only timer that could be
+		 * running is the timeout for the association response which
+		 * which is not relevant anymore.
+		 */
+		del_timer_sync(&sdata->u.mgd.timer);
 		sta_info_destroy_addr(sdata, assoc_data->bss->bssid);
 
 		memset(sdata->u.mgd.bssid, 0, ETH_ALEN);
-- 
cgit v1.2.3


From 0c7aecd4bde4b7302cd41986d3a29e4f0b0ed218 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 15 Jan 2015 15:11:15 +0100
Subject: netns: add rtnl cmd to add and get peer netns ids

With this patch, a user can define an id for a peer netns by providing a FD or a
PID. These ids are local to the netns where it is added (ie valid only into this
netns).

The main function (ie the one exported to other module), peernet2id(), allows to
get the id of a peer netns. If no id has been assigned by the user, this
function allocates one.

These ids will be used in netlink messages to point to a peer netns, for example
in case of a x-netns interface.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS                        |   1 +
 include/net/net_namespace.h        |   4 +
 include/uapi/linux/Kbuild          |   1 +
 include/uapi/linux/net_namespace.h |  23 ++++
 include/uapi/linux/rtnetlink.h     |   5 +
 net/core/net_namespace.c           | 211 +++++++++++++++++++++++++++++++++++++
 6 files changed, 245 insertions(+)
 create mode 100644 include/uapi/linux/net_namespace.h

(limited to 'net')

diff --git a/MAINTAINERS b/MAINTAINERS
index 9de900572633..9b91d9f0257e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6578,6 +6578,7 @@ F:	include/linux/netdevice.h
 F:	include/uapi/linux/in.h
 F:	include/uapi/linux/net.h
 F:	include/uapi/linux/netdevice.h
+F:	include/uapi/linux/net_namespace.h
 F:	tools/net/
 F:	tools/testing/selftests/net/
 F:	lib/random32.c
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 2e8756b8c775..36faf4990c4b 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -60,6 +60,7 @@ struct net {
 	struct list_head	exit_list;	/* Use only net_mutex */
 
 	struct user_namespace   *user_ns;	/* Owning user namespace */
+	struct idr		netns_ids;
 
 	struct ns_common	ns;
 
@@ -290,6 +291,9 @@ static inline struct net *read_pnet(struct net * const *pnet)
 #define __net_initconst	__initconst
 #endif
 
+int peernet2id(struct net *net, struct net *peer);
+struct net *get_net_ns_by_id(struct net *net, int id);
+
 struct pernet_operations {
 	struct list_head list;
 	int (*init)(struct net *net);
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 00b100023c47..14b7b6e44c77 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -283,6 +283,7 @@ header-y += net.h
 header-y += netlink_diag.h
 header-y += netlink.h
 header-y += netrom.h
+header-y += net_namespace.h
 header-y += net_tstamp.h
 header-y += nfc.h
 header-y += nfs2.h
diff --git a/include/uapi/linux/net_namespace.h b/include/uapi/linux/net_namespace.h
new file mode 100644
index 000000000000..778cd2c3ebf4
--- /dev/null
+++ b/include/uapi/linux/net_namespace.h
@@ -0,0 +1,23 @@
+/* Copyright (c) 2015 6WIND S.A.
+ * Author: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+#ifndef _UAPI_LINUX_NET_NAMESPACE_H_
+#define _UAPI_LINUX_NET_NAMESPACE_H_
+
+/* Attributes of RTM_NEWNSID/RTM_GETNSID messages */
+enum {
+	NETNSA_NONE,
+#define NETNSA_NSID_NOT_ASSIGNED -1
+	NETNSA_NSID,
+	NETNSA_PID,
+	NETNSA_FD,
+	__NETNSA_MAX,
+};
+
+#define NETNSA_MAX		(__NETNSA_MAX - 1)
+
+#endif /* _UAPI_LINUX_NET_NAMESPACE_H_ */
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index a1d18593f41e..5cc5d66bf519 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -132,6 +132,11 @@ enum {
 	RTM_GETMDB = 86,
 #define RTM_GETMDB RTM_GETMDB
 
+	RTM_NEWNSID = 88,
+#define RTM_NEWNSID RTM_NEWNSID
+	RTM_GETNSID = 90,
+#define RTM_GETNSID RTM_GETNSID
+
 	__RTM_MAX,
 #define RTM_MAX		(((__RTM_MAX + 3) & ~3) - 1)
 };
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index ce780c722e48..9d1a4cac83b6 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -15,6 +15,10 @@
 #include <linux/file.h>
 #include <linux/export.h>
 #include <linux/user_namespace.h>
+#include <linux/net_namespace.h>
+#include <linux/rtnetlink.h>
+#include <net/sock.h>
+#include <net/netlink.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 
@@ -144,6 +148,77 @@ static void ops_free_list(const struct pernet_operations *ops,
 	}
 }
 
+static int alloc_netid(struct net *net, struct net *peer, int reqid)
+{
+	int min = 0, max = 0;
+
+	ASSERT_RTNL();
+
+	if (reqid >= 0) {
+		min = reqid;
+		max = reqid + 1;
+	}
+
+	return idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL);
+}
+
+/* This function is used by idr_for_each(). If net is equal to peer, the
+ * function returns the id so that idr_for_each() stops. Because we cannot
+ * returns the id 0 (idr_for_each() will not stop), we return the magic value
+ * NET_ID_ZERO (-1) for it.
+ */
+#define NET_ID_ZERO -1
+static int net_eq_idr(int id, void *net, void *peer)
+{
+	if (net_eq(net, peer))
+		return id ? : NET_ID_ZERO;
+	return 0;
+}
+
+static int __peernet2id(struct net *net, struct net *peer, bool alloc)
+{
+	int id = idr_for_each(&net->netns_ids, net_eq_idr, peer);
+
+	ASSERT_RTNL();
+
+	/* Magic value for id 0. */
+	if (id == NET_ID_ZERO)
+		return 0;
+	if (id > 0)
+		return id;
+
+	if (alloc)
+		return alloc_netid(net, peer, -1);
+
+	return -ENOENT;
+}
+
+/* This function returns the id of a peer netns. If no id is assigned, one will
+ * be allocated and returned.
+ */
+int peernet2id(struct net *net, struct net *peer)
+{
+	int id = __peernet2id(net, peer, true);
+
+	return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED;
+}
+
+struct net *get_net_ns_by_id(struct net *net, int id)
+{
+	struct net *peer;
+
+	if (id < 0)
+		return NULL;
+
+	rcu_read_lock();
+	peer = idr_find(&net->netns_ids, id);
+	if (peer)
+		get_net(peer);
+	rcu_read_unlock();
+
+	return peer;
+}
+
 /*
  * setup_net runs the initializers for the network namespace object.
  */
@@ -158,6 +233,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 	atomic_set(&net->passive, 1);
 	net->dev_base_seq = 1;
 	net->user_ns = user_ns;
+	idr_init(&net->netns_ids);
 
 #ifdef NETNS_REFCNT_DEBUG
 	atomic_set(&net->use_count, 0);
@@ -288,6 +364,14 @@ static void cleanup_net(struct work_struct *work)
 	list_for_each_entry(net, &net_kill_list, cleanup_list) {
 		list_del_rcu(&net->list);
 		list_add_tail(&net->exit_list, &net_exit_list);
+		for_each_net(tmp) {
+			int id = __peernet2id(tmp, net, false);
+
+			if (id >= 0)
+				idr_remove(&tmp->netns_ids, id);
+		}
+		idr_destroy(&net->netns_ids);
+
 	}
 	rtnl_unlock();
 
@@ -402,6 +486,130 @@ static struct pernet_operations __net_initdata net_ns_ops = {
 	.exit = net_ns_net_exit,
 };
 
+static struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
+	[NETNSA_NONE]		= { .type = NLA_UNSPEC },
+	[NETNSA_NSID]		= { .type = NLA_S32 },
+	[NETNSA_PID]		= { .type = NLA_U32 },
+	[NETNSA_FD]		= { .type = NLA_U32 },
+};
+
+static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	struct net *net = sock_net(skb->sk);
+	struct nlattr *tb[NETNSA_MAX + 1];
+	struct net *peer;
+	int nsid, err;
+
+	err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
+			  rtnl_net_policy);
+	if (err < 0)
+		return err;
+	if (!tb[NETNSA_NSID])
+		return -EINVAL;
+	nsid = nla_get_s32(tb[NETNSA_NSID]);
+
+	if (tb[NETNSA_PID])
+		peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
+	else if (tb[NETNSA_FD])
+		peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
+	else
+		return -EINVAL;
+	if (IS_ERR(peer))
+		return PTR_ERR(peer);
+
+	if (__peernet2id(net, peer, false) >= 0) {
+		err = -EEXIST;
+		goto out;
+	}
+
+	err = alloc_netid(net, peer, nsid);
+	if (err > 0)
+		err = 0;
+out:
+	put_net(peer);
+	return err;
+}
+
+static int rtnl_net_get_size(void)
+{
+	return NLMSG_ALIGN(sizeof(struct rtgenmsg))
+	       + nla_total_size(sizeof(s32)) /* NETNSA_NSID */
+	       ;
+}
+
+static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
+			 int cmd, struct net *net, struct net *peer)
+{
+	struct nlmsghdr *nlh;
+	struct rtgenmsg *rth;
+	int id;
+
+	ASSERT_RTNL();
+
+	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags);
+	if (!nlh)
+		return -EMSGSIZE;
+
+	rth = nlmsg_data(nlh);
+	rth->rtgen_family = AF_UNSPEC;
+
+	id = __peernet2id(net, peer, false);
+	if  (id < 0)
+		id = NETNSA_NSID_NOT_ASSIGNED;
+	if (nla_put_s32(skb, NETNSA_NSID, id))
+		goto nla_put_failure;
+
+	nlmsg_end(skb, nlh);
+	return 0;
+
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+}
+
+static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	struct net *net = sock_net(skb->sk);
+	struct nlattr *tb[NETNSA_MAX + 1];
+	struct sk_buff *msg;
+	int err = -ENOBUFS;
+	struct net *peer;
+
+	err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
+			  rtnl_net_policy);
+	if (err < 0)
+		return err;
+	if (tb[NETNSA_PID])
+		peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
+	else if (tb[NETNSA_FD])
+		peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
+	else
+		return -EINVAL;
+
+	if (IS_ERR(peer))
+		return PTR_ERR(peer);
+
+	msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
+	if (!msg) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
+			    RTM_GETNSID, net, peer);
+	if (err < 0)
+		goto err_out;
+
+	err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid);
+	goto out;
+
+err_out:
+	nlmsg_free(msg);
+out:
+	put_net(peer);
+	return err;
+}
+
 static int __init net_ns_init(void)
 {
 	struct net_generic *ng;
@@ -435,6 +643,9 @@ static int __init net_ns_init(void)
 
 	register_pernet_subsys(&net_ns_ops);
 
+	rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, NULL, NULL);
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From d37512a277dfb2cef8a578e25a3246f61399a55a Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 15 Jan 2015 15:11:16 +0100
Subject: rtnl: add link netns id to interface messages

This patch adds a new attribute (IFLA_LINK_NETNSID) which contains the 'link'
netns id when this netns is different from the netns where the interface
stands (for example for x-net interfaces like ip tunnels).
With this attribute, it's possible to interpret correctly all advertised
information (like IFLA_LINK, etc.).

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/rtnetlink.h      |  2 ++
 include/uapi/linux/if_link.h |  1 +
 net/core/rtnetlink.c         | 13 +++++++++++++
 3 files changed, 16 insertions(+)

(limited to 'net')

diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index e21b9f9653c0..6c6d5393fc34 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -46,6 +46,7 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh)
  *			    to create when creating a new device.
  *	@get_num_rx_queues: Function to determine number of receive queues
  *			    to create when creating a new device.
+ *	@get_link_net: Function to get the i/o netns of the device
  */
 struct rtnl_link_ops {
 	struct list_head	list;
@@ -93,6 +94,7 @@ struct rtnl_link_ops {
 	int			(*fill_slave_info)(struct sk_buff *skb,
 						   const struct net_device *dev,
 						   const struct net_device *slave_dev);
+	struct net		*(*get_link_net)(const struct net_device *dev);
 };
 
 int __rtnl_link_register(struct rtnl_link_ops *ops);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 2a8380edbb7e..0deee3eeddbf 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -146,6 +146,7 @@ enum {
 	IFLA_PHYS_PORT_ID,
 	IFLA_CARRIER_CHANGES,
 	IFLA_PHYS_SWITCH_ID,
+	IFLA_LINK_NETNSID,
 	__IFLA_MAX
 };
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 47b39f3e867c..bd6370f0cb31 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -875,6 +875,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
 	       + nla_total_size(1) /* IFLA_OPERSTATE */
 	       + nla_total_size(1) /* IFLA_LINKMODE */
 	       + nla_total_size(4) /* IFLA_CARRIER_CHANGES */
+	       + nla_total_size(4) /* IFLA_LINK_NETNSID */
 	       + nla_total_size(ext_filter_mask
 			        & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */
 	       + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
@@ -1169,6 +1170,18 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			goto nla_put_failure;
 	}
 
+	if (dev->rtnl_link_ops &&
+	    dev->rtnl_link_ops->get_link_net) {
+		struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
+
+		if (!net_eq(dev_net(dev), link_net)) {
+			int id = peernet2id(dev_net(dev), link_net);
+
+			if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
+				goto nla_put_failure;
+		}
+	}
+
 	if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC)))
 		goto nla_put_failure;
 
-- 
cgit v1.2.3


From 1728d4fabd1bc9965728de25dda0b694b8da6450 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 15 Jan 2015 15:11:17 +0100
Subject: tunnels: advertise link netns via netlink

Implement rtnl_link_ops->get_link_net() callback so that IFLA_LINK_NETNSID is
added to rtnetlink messages.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c      | 8 ++++++++
 include/net/ip6_tunnel.h | 1 +
 include/net/ip_tunnels.h | 1 +
 net/ipv4/ip_gre.c        | 2 ++
 net/ipv4/ip_tunnel.c     | 8 ++++++++
 net/ipv4/ip_vti.c        | 1 +
 net/ipv4/ipip.c          | 1 +
 net/ipv6/ip6_gre.c       | 1 +
 net/ipv6/ip6_tunnel.c    | 9 +++++++++
 net/ipv6/ip6_vti.c       | 1 +
 net/ipv6/sit.c           | 1 +
 11 files changed, 34 insertions(+)

(limited to 'net')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index c5f79e7513a6..0346eaa6d236 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2923,6 +2923,13 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
+static struct net *vxlan_get_link_net(const struct net_device *dev)
+{
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+
+	return vxlan->net;
+}
+
 static struct rtnl_link_ops vxlan_link_ops __read_mostly = {
 	.kind		= "vxlan",
 	.maxtype	= IFLA_VXLAN_MAX,
@@ -2934,6 +2941,7 @@ static struct rtnl_link_ops vxlan_link_ops __read_mostly = {
 	.dellink	= vxlan_dellink,
 	.get_size	= vxlan_get_size,
 	.fill_info	= vxlan_fill_info,
+	.get_link_net	= vxlan_get_link_net,
 };
 
 static void vxlan_handle_lowerdev_unregister(struct vxlan_net *vn,
diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index 9326c41c2d7f..76c091b53dae 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -70,6 +70,7 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t, const struct in6_addr *laddr,
 __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw);
 __u32 ip6_tnl_get_cap(struct ip6_tnl *t, const struct in6_addr *laddr,
 			     const struct in6_addr *raddr);
+struct net *ip6_tnl_get_link_net(const struct net_device *dev);
 
 static inline void ip6tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 {
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index ce4db3cc5647..2c47061a6954 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -141,6 +141,7 @@ int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *op,
 int ip_tunnel_init(struct net_device *dev);
 void ip_tunnel_uninit(struct net_device *dev);
 void  ip_tunnel_dellink(struct net_device *dev, struct list_head *head);
+struct net *ip_tunnel_get_link_net(const struct net_device *dev);
 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
 		       struct rtnl_link_ops *ops, char *devname);
 
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 942576e27df1..6e7727f27393 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -829,6 +829,7 @@ static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
 	.dellink	= ip_tunnel_dellink,
 	.get_size	= ipgre_get_size,
 	.fill_info	= ipgre_fill_info,
+	.get_link_net	= ip_tunnel_get_link_net,
 };
 
 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
@@ -843,6 +844,7 @@ static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
 	.dellink	= ip_tunnel_dellink,
 	.get_size	= ipgre_get_size,
 	.fill_info	= ipgre_fill_info,
+	.get_link_net	= ip_tunnel_get_link_net,
 };
 
 static int __net_init ipgre_tap_init_net(struct net *net)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index d3e447936720..2cd08280c77b 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -972,6 +972,14 @@ void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
 }
 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
 
+struct net *ip_tunnel_get_link_net(const struct net_device *dev)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+
+	return tunnel->net;
+}
+EXPORT_SYMBOL(ip_tunnel_get_link_net);
+
 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
 				  struct rtnl_link_ops *ops, char *devname)
 {
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 1a7e979e80ba..94efe148181c 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -531,6 +531,7 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = {
 	.dellink        = ip_tunnel_dellink,
 	.get_size	= vti_get_size,
 	.fill_info	= vti_fill_info,
+	.get_link_net	= ip_tunnel_get_link_net,
 };
 
 static int __init vti_init(void)
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 40403114f00a..b58d6689874c 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -498,6 +498,7 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly = {
 	.dellink	= ip_tunnel_dellink,
 	.get_size	= ipip_get_size,
 	.fill_info	= ipip_fill_info,
+	.get_link_net	= ip_tunnel_get_link_net,
 };
 
 static struct xfrm_tunnel ipip_handler __read_mostly = {
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 13cda4c6313b..9306a5ff9149 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1662,6 +1662,7 @@ static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
 	.dellink	= ip6gre_dellink,
 	.get_size	= ip6gre_get_size,
 	.fill_info	= ip6gre_fill_info,
+	.get_link_net	= ip6_tnl_get_link_net,
 };
 
 static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = {
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 92b3da571980..266a264ec212 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1760,6 +1760,14 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
+struct net *ip6_tnl_get_link_net(const struct net_device *dev)
+{
+	struct ip6_tnl *tunnel = netdev_priv(dev);
+
+	return tunnel->net;
+}
+EXPORT_SYMBOL(ip6_tnl_get_link_net);
+
 static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = {
 	[IFLA_IPTUN_LINK]		= { .type = NLA_U32 },
 	[IFLA_IPTUN_LOCAL]		= { .len = sizeof(struct in6_addr) },
@@ -1783,6 +1791,7 @@ static struct rtnl_link_ops ip6_link_ops __read_mostly = {
 	.dellink	= ip6_tnl_dellink,
 	.get_size	= ip6_tnl_get_size,
 	.fill_info	= ip6_tnl_fill_info,
+	.get_link_net	= ip6_tnl_get_link_net,
 };
 
 static struct xfrm6_tunnel ip4ip6_handler __read_mostly = {
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index ace10d0b3aac..5fb9e212eca8 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -1016,6 +1016,7 @@ static struct rtnl_link_ops vti6_link_ops __read_mostly = {
 	.changelink	= vti6_changelink,
 	.get_size	= vti6_get_size,
 	.fill_info	= vti6_fill_info,
+	.get_link_net	= ip6_tnl_get_link_net,
 };
 
 static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 213546bd6d5d..3cc197c72b59 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1763,6 +1763,7 @@ static struct rtnl_link_ops sit_link_ops __read_mostly = {
 	.get_size	= ipip6_get_size,
 	.fill_info	= ipip6_fill_info,
 	.dellink	= ipip6_dellink,
+	.get_link_net	= ip_tunnel_get_link_net,
 };
 
 static struct xfrm_tunnel sit_handler __read_mostly = {
-- 
cgit v1.2.3


From 317f4810e45eebe65d4f8897670df8b779de1467 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 15 Jan 2015 15:11:18 +0100
Subject: rtnl: allow to create device with IFLA_LINK_NETNSID set

This patch adds the ability to create a netdevice in a specified netns and
then move it into the final netns. In fact, it allows to have a symetry between
get and set rtnl messages.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index bd6370f0cb31..a12eecc0f976 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1248,6 +1248,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_PHYS_PORT_ID]	= { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
 	[IFLA_CARRIER_CHANGES]	= { .type = NLA_U32 },  /* ignored */
 	[IFLA_PHYS_SWITCH_ID]	= { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
+	[IFLA_LINK_NETNSID]	= { .type = NLA_S32 },
 };
 
 static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -2021,7 +2022,7 @@ replay:
 		struct nlattr *slave_attr[m_ops ? m_ops->slave_maxtype + 1 : 0];
 		struct nlattr **data = NULL;
 		struct nlattr **slave_data = NULL;
-		struct net *dest_net;
+		struct net *dest_net, *link_net = NULL;
 
 		if (ops) {
 			if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) {
@@ -2127,7 +2128,18 @@ replay:
 		if (IS_ERR(dest_net))
 			return PTR_ERR(dest_net);
 
-		dev = rtnl_create_link(dest_net, ifname, name_assign_type, ops, tb);
+		if (tb[IFLA_LINK_NETNSID]) {
+			int id = nla_get_s32(tb[IFLA_LINK_NETNSID]);
+
+			link_net = get_net_ns_by_id(dest_net, id);
+			if (!link_net) {
+				err =  -EINVAL;
+				goto out;
+			}
+		}
+
+		dev = rtnl_create_link(link_net ? : dest_net, ifname,
+				       name_assign_type, ops, tb);
 		if (IS_ERR(dev)) {
 			err = PTR_ERR(dev);
 			goto out;
@@ -2155,9 +2167,16 @@ replay:
 			}
 		}
 		err = rtnl_configure_link(dev, ifm);
-		if (err < 0)
+		if (err < 0) {
 			unregister_netdevice(dev);
+			goto out;
+		}
+
+		if (link_net)
+			err = dev_change_net_namespace(dev, dest_net, ifname);
 out:
+		if (link_net)
+			put_net(link_net);
 		put_net(dest_net);
 		return err;
 	}
-- 
cgit v1.2.3


From 9d289715eb5c252ae15bd547cb252ca547a3c4f2 Mon Sep 17 00:00:00 2001
From: Hagen Paul Pfeifer <hagen@jauu.net>
Date: Thu, 15 Jan 2015 22:34:25 +0100
Subject: ipv6: stop sending PTB packets for MTU < 1280

Reduce the attack vector and stop generating IPv6 Fragment Header for
paths with an MTU smaller than the minimum required IPv6 MTU
size (1280 byte) - called atomic fragments.

See IETF I-D "Deprecating the Generation of IPv6 Atomic Fragments" [1]
for more information and how this "feature" can be misused.

[1] https://tools.ietf.org/html/draft-ietf-6man-deprecate-atomfrag-generation-00

Signed-off-by: Fernando Gont <fgont@si6networks.com>
Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c91083156edb..166e33bed222 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1160,12 +1160,9 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
 		struct net *net = dev_net(dst->dev);
 
 		rt6->rt6i_flags |= RTF_MODIFIED;
-		if (mtu < IPV6_MIN_MTU) {
-			u32 features = dst_metric(dst, RTAX_FEATURES);
+		if (mtu < IPV6_MIN_MTU)
 			mtu = IPV6_MIN_MTU;
-			features |= RTAX_FEATURE_ALLFRAG;
-			dst_metric_set(dst, RTAX_FEATURES, features);
-		}
+
 		dst_metric_set(dst, RTAX_MTU, mtu);
 		rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
 	}
-- 
cgit v1.2.3


From 728c02089a0e3eefb02e9927bfae50490f40e72e Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 16 Jan 2015 09:56:01 -0800
Subject: net: ipv4: handle DSA enabled master network devices

The logic to configure a network interface for kernel IP
auto-configuration is very simplistic, and does not handle the case
where a device is stacked onto another such as with DSA. This causes the
kernel not to open and configure the master network device in a DSA
switch tree, and therefore slave network devices using this master
network devices as conduit device cannot be open.

This restriction comes from a check in net/dsa/slave.c, which is
basically checking the master netdev flags for IFF_UP and returns
-ENETDOWN if it is not the case.

Automatically bringing-up DSA master network devices allows DSA slave
network devices to be used as valid interfaces for e.g: NFS root booting
by allowing kernel IP autoconfiguration to succeed on these interfaces.

On the reverse path, make sure we do not attempt to close a DSA-enabled
device as this would implicitely prevent the slave DSA network device
from operating.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipconfig.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 7fa18bc7e47f..b26376ef87f6 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -209,9 +209,9 @@ static int __init ic_open_devs(void)
 	last = &ic_first_dev;
 	rtnl_lock();
 
-	/* bring loopback device up first */
+	/* bring loopback and DSA master network devices up first */
 	for_each_netdev(&init_net, dev) {
-		if (!(dev->flags & IFF_LOOPBACK))
+		if (!(dev->flags & IFF_LOOPBACK) && !netdev_uses_dsa(dev))
 			continue;
 		if (dev_change_flags(dev, dev->flags | IFF_UP) < 0)
 			pr_err("IP-Config: Failed to open %s\n", dev->name);
@@ -306,7 +306,7 @@ static void __init ic_close_devs(void)
 	while ((d = next)) {
 		next = d->next;
 		dev = d->dev;
-		if (dev != ic_dev) {
+		if (dev != ic_dev && !netdev_uses_dsa(dev)) {
 			DBG(("IP-Config: Downing %s\n", dev->name));
 			dev_change_flags(dev, d->flags);
 		}
-- 
cgit v1.2.3


From 8db0a2ee2c6302a1dcbcdb93cb731dfc6c0cdb5e Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 16 Jan 2015 09:56:02 -0800
Subject: net: bridge: reject DSA-enabled master netdevices as bridge members

DSA-enabled master network devices with a switch tagging protocol should
strip the protocol specific format before handing the frame over to
higher layer.

When adding such a DSA master network device as a bridge member, we go
through the following code path when receiving a frame:

__netif_receive_skb_core
	-> first ptype check against ptype_all is not returning any
	   handler for this skb

	-> check and invoke rx_handler:
		-> deliver frame to the bridge layer: br_handle_frame

DSA registers a ptype handler with the fake ETH_XDSA ethertype, which is
called *after* the bridge-layer rx_handler has run. br_handle_frame()
tries to parse the frame it received from the DSA master network device,
and will not be able to match any of its conditions and jumps straight
at the end of the end of br_handle_frame() and returns
RX_HANDLER_CONSUMED there.

Since we returned RX_HANDLER_CONSUMED, __netif_receive_skb_core() stops
RX processing for this frame and returns NET_RX_SUCCESS, so we never get
a chance to call our switch tag packet processing logic and deliver
frames to the DSA slave network devices, and so we do not get any
functional bridge members at all.

Instead of cluttering the bridge receive path with DSA-specific checks,
and rely on assumptions about how __netif_receive_skb_core() is
processing frames, we simply deny adding the DSA master network device
(conduit interface) as a bridge member, leaving only the slave DSA
network devices to be bridge members, since those will work correctly in
all circumstances.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_if.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 81e49fb73169..b087d278c679 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -436,10 +436,16 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	int err = 0;
 	bool changed_addr;
 
-	/* Don't allow bridging non-ethernet like devices */
+	/* Don't allow bridging non-ethernet like devices, or DSA-enabled
+	 * master network devices since the bridge layer rx_handler prevents
+	 * the DSA fake ethertype handler to be invoked, so we do not strip off
+	 * the DSA switch tag protocol header and the bridge layer just return
+	 * RX_HANDLER_CONSUMED, stopping RX processing for these frames.
+	 */
 	if ((dev->flags & IFF_LOOPBACK) ||
 	    dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN ||
-	    !is_valid_ether_addr(dev->dev_addr))
+	    !is_valid_ether_addr(dev->dev_addr) ||
+	    netdev_uses_dsa(dev))
 		return -EINVAL;
 
 	/* No bridging of bridges */
-- 
cgit v1.2.3


From 22a5dc0e5e3e8fef804230cd73ed7b0afd4c7bae Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sun, 18 Jan 2015 16:35:14 -0500
Subject: net: sched: Introduce connmark action

This tc action allows you to retrieve the connection tracking mark
This action has been used heavily by openwrt for a few years now.

There are known limitations currently:

doesn't work for initial packets, since we only query the ct table.
  Fine given use case is for returning packets

no implicit defrag.
  frags should be rare so fix later..

won't work for more complex tasks, e.g. lookup of other extensions
  since we have no means to store results

we still have a 2nd lookup later on via normal conntrack path.
This shouldn't break anything though since skb->nfct isn't altered.

V2:
remove unnecessary braces (Jiri)
change the action identifier to 14 (Jiri)
Fix some stylistic issues caught by checkpatch
V3:
Move module params to bottom (Cong)
Get rid of tcf_hashinfo_init and friends and conform to newer API (Cong)

Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_connmark.h        |  14 +++
 include/uapi/linux/tc_act/tc_connmark.h |  22 ++++
 net/sched/Kconfig                       |  11 ++
 net/sched/Makefile                      |   1 +
 net/sched/act_connmark.c                | 192 ++++++++++++++++++++++++++++++++
 5 files changed, 240 insertions(+)
 create mode 100644 include/net/tc_act/tc_connmark.h
 create mode 100644 include/uapi/linux/tc_act/tc_connmark.h
 create mode 100644 net/sched/act_connmark.c

(limited to 'net')

diff --git a/include/net/tc_act/tc_connmark.h b/include/net/tc_act/tc_connmark.h
new file mode 100644
index 000000000000..5c1104c2e24f
--- /dev/null
+++ b/include/net/tc_act/tc_connmark.h
@@ -0,0 +1,14 @@
+#ifndef __NET_TC_CONNMARK_H
+#define __NET_TC_CONNMARK_H
+
+#include <net/act_api.h>
+
+struct tcf_connmark_info {
+	struct tcf_common common;
+	u16 zone;
+};
+
+#define to_connmark(a) \
+	container_of(a->priv, struct tcf_connmark_info, common)
+
+#endif /* __NET_TC_CONNMARK_H */
diff --git a/include/uapi/linux/tc_act/tc_connmark.h b/include/uapi/linux/tc_act/tc_connmark.h
new file mode 100644
index 000000000000..994b0971bce2
--- /dev/null
+++ b/include/uapi/linux/tc_act/tc_connmark.h
@@ -0,0 +1,22 @@
+#ifndef __UAPI_TC_CONNMARK_H
+#define __UAPI_TC_CONNMARK_H
+
+#include <linux/types.h>
+#include <linux/pkt_cls.h>
+
+#define TCA_ACT_CONNMARK 14
+
+struct tc_connmark {
+	tc_gen;
+	__u16 zone;
+};
+
+enum {
+	TCA_CONNMARK_UNSPEC,
+	TCA_CONNMARK_PARMS,
+	TCA_CONNMARK_TM,
+	__TCA_CONNMARK_MAX
+};
+#define TCA_CONNMARK_MAX (__TCA_CONNMARK_MAX - 1)
+
+#endif
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 466943551581..475e35e261ec 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -710,6 +710,17 @@ config NET_ACT_BPF
 	  To compile this code as a module, choose M here: the
 	  module will be called act_bpf.
 
+config NET_ACT_CONNMARK
+        tristate "Netfilter Connection Mark Retriever"
+        depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
+        ---help---
+	  Say Y here to allow retrieving of conn mark
+
+	  If unsure, say N.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called act_connmark.
+
 config NET_CLS_IND
 	bool "Incoming device classification"
 	depends on NET_CLS_U32 || NET_CLS_FW
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 7ca2b4e76312..7ca7f4c1b8c2 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_NET_ACT_SKBEDIT)	+= act_skbedit.o
 obj-$(CONFIG_NET_ACT_CSUM)	+= act_csum.o
 obj-$(CONFIG_NET_ACT_VLAN)	+= act_vlan.o
 obj-$(CONFIG_NET_ACT_BPF)	+= act_bpf.o
+obj-$(CONFIG_NET_ACT_CONNMARK)	+= act_connmark.o
 obj-$(CONFIG_NET_SCH_FIFO)	+= sch_fifo.o
 obj-$(CONFIG_NET_SCH_CBQ)	+= sch_cbq.o
 obj-$(CONFIG_NET_SCH_HTB)	+= sch_htb.o
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
new file mode 100644
index 000000000000..8e472518f9f6
--- /dev/null
+++ b/net/sched/act_connmark.c
@@ -0,0 +1,192 @@
+/*
+ * net/sched/act_connmark.c  netfilter connmark retriever action
+ * skb mark is over-written
+ *
+ * Copyright (c) 2011 Felix Fietkau <nbd@openwrt.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+*/
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/pkt_cls.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/act_api.h>
+#include <uapi/linux/tc_act/tc_connmark.h>
+#include <net/tc_act/tc_connmark.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+
+#define CONNMARK_TAB_MASK     3
+
+static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
+			struct tcf_result *res)
+{
+	const struct nf_conntrack_tuple_hash *thash;
+	struct nf_conntrack_tuple tuple;
+	enum ip_conntrack_info ctinfo;
+	struct tcf_connmark_info *ca = a->priv;
+	struct nf_conn *c;
+	int proto;
+
+	spin_lock(&ca->tcf_lock);
+	ca->tcf_tm.lastuse = jiffies;
+	bstats_update(&ca->tcf_bstats, skb);
+
+	if (skb->protocol == htons(ETH_P_IP)) {
+		if (skb->len < sizeof(struct iphdr))
+			goto out;
+
+		proto = NFPROTO_IPV4;
+	} else if (skb->protocol == htons(ETH_P_IPV6)) {
+		if (skb->len < sizeof(struct ipv6hdr))
+			goto out;
+
+		proto = NFPROTO_IPV6;
+	} else {
+		goto out;
+	}
+
+	c = nf_ct_get(skb, &ctinfo);
+	if (c) {
+		skb->mark = c->mark;
+		/* using overlimits stats to count how many packets marked */
+		ca->tcf_qstats.overlimits++;
+		nf_ct_put(c);
+		goto out;
+	}
+
+	if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
+			       proto, &tuple))
+		goto out;
+
+	thash = nf_conntrack_find_get(dev_net(skb->dev), ca->zone, &tuple);
+	if (!thash)
+		goto out;
+
+	c = nf_ct_tuplehash_to_ctrack(thash);
+	/* using overlimits stats to count how many packets marked */
+	ca->tcf_qstats.overlimits++;
+	skb->mark = c->mark;
+	nf_ct_put(c);
+
+out:
+	skb->nfct = NULL;
+	spin_unlock(&ca->tcf_lock);
+	return ca->tcf_action;
+}
+
+static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
+	[TCA_CONNMARK_PARMS] = { .len = sizeof(struct tc_connmark) },
+};
+
+static int tcf_connmark_init(struct net *net, struct nlattr *nla,
+			     struct nlattr *est, struct tc_action *a,
+			     int ovr, int bind)
+{
+	struct nlattr *tb[TCA_CONNMARK_MAX + 1];
+	struct tcf_connmark_info *ci;
+	struct tc_connmark *parm;
+	int ret = 0;
+
+	if (!nla)
+		return -EINVAL;
+
+	ret = nla_parse_nested(tb, TCA_CONNMARK_MAX, nla, connmark_policy);
+	if (ret < 0)
+		return ret;
+
+	parm = nla_data(tb[TCA_CONNMARK_PARMS]);
+
+	if (!tcf_hash_check(parm->index, a, bind)) {
+		ret = tcf_hash_create(parm->index, est, a, sizeof(*ci), bind);
+		if (ret)
+			return ret;
+
+		ci = to_connmark(a);
+		ci->tcf_action = parm->action;
+		ci->zone = parm->zone;
+
+		tcf_hash_insert(a);
+		ret = ACT_P_CREATED;
+	} else {
+		ci = to_connmark(a);
+		if (bind)
+			return 0;
+		tcf_hash_release(a, bind);
+		if (!ovr)
+			return -EEXIST;
+		/* replacing action and zone */
+		ci->tcf_action = parm->action;
+		ci->zone = parm->zone;
+	}
+
+	return ret;
+}
+
+static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
+				    int bind, int ref)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tcf_connmark_info *ci = a->priv;
+
+	struct tc_connmark opt = {
+		.index   = ci->tcf_index,
+		.refcnt  = ci->tcf_refcnt - ref,
+		.bindcnt = ci->tcf_bindcnt - bind,
+		.action  = ci->tcf_action,
+		.zone   = ci->zone,
+	};
+	struct tcf_t t;
+
+	if (nla_put(skb, TCA_CONNMARK_PARMS, sizeof(opt), &opt))
+		goto nla_put_failure;
+
+	t.install = jiffies_to_clock_t(jiffies - ci->tcf_tm.install);
+	t.lastuse = jiffies_to_clock_t(jiffies - ci->tcf_tm.lastuse);
+	t.expires = jiffies_to_clock_t(ci->tcf_tm.expires);
+	if (nla_put(skb, TCA_CONNMARK_TM, sizeof(t), &t))
+		goto nla_put_failure;
+
+	return skb->len;
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static struct tc_action_ops act_connmark_ops = {
+	.kind		=	"connmark",
+	.type		=	TCA_ACT_CONNMARK,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_connmark,
+	.dump		=	tcf_connmark_dump,
+	.init		=	tcf_connmark_init,
+};
+
+static int __init connmark_init_module(void)
+{
+	return tcf_register_action(&act_connmark_ops, CONNMARK_TAB_MASK);
+}
+
+static void __exit connmark_cleanup_module(void)
+{
+	tcf_unregister_action(&act_connmark_ops);
+}
+
+module_init(connmark_init_module);
+module_exit(connmark_cleanup_module);
+MODULE_AUTHOR("Felix Fietkau <nbd@openwrt.org>");
+MODULE_DESCRIPTION("Connection tracking mark restoring");
+MODULE_LICENSE("GPL");
+
-- 
cgit v1.2.3


From 926e9878a360fc57112259949c44c74c31709cc6 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 19 Jan 2015 12:15:24 +0100
Subject: phonet netlink: allow multiple messages per skb in route dump

My previous patch to this file changed the code to be bug-compatible
towards userspace. Unless userspace (which I wasn't able to find)
implements the dump reader by hand in a wrong way, this isn't needed.
If it uses libnl or similar code putting multiple messages into a
single SKB is far more efficient.

Change the code to do this. While at it, also clean it up and don't
use so many variables - just store the address in the callback args
directly.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/phonet/pn_netlink.c | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index 54d766842c2b..bc5ee5fbe6ae 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -272,31 +272,23 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
 static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct net *net = sock_net(skb->sk);
-	u8 addr, addr_idx = 0, addr_start_idx = cb->args[0];
+	u8 addr;
 
 	rcu_read_lock();
-	for (addr = 0; addr < 64; addr++) {
-		struct net_device *dev;
+	for (addr = cb->args[0]; addr < 64; addr++) {
+		struct net_device *dev = phonet_route_get_rcu(net, addr << 2);
 
-		dev = phonet_route_get_rcu(net, addr << 2);
 		if (!dev)
 			continue;
 
-		if (addr_idx++ < addr_start_idx)
-			continue;
-		fill_route(skb, dev, addr << 2, NETLINK_CB(cb->skb).portid,
-			   cb->nlh->nlmsg_seq, RTM_NEWROUTE);
-		/* fill_route() used to return > 0 (or negative errors) but
-		 * never 0 - ignore the return value and just go out to
-		 * call dumpit again from outside to preserve the behavior
-		 */
-		goto out;
+		if (fill_route(skb, dev, addr << 2, NETLINK_CB(cb->skb).portid,
+			       cb->nlh->nlmsg_seq, RTM_NEWROUTE) < 0)
+			goto out;
 	}
 
 out:
 	rcu_read_unlock();
-	cb->args[0] = addr_idx;
-	cb->args[1] = 0;
+	cb->args[0] = addr;
 
 	return skb->len;
 }
-- 
cgit v1.2.3


From 54330bf63b49683d006d8f3857d45722a8c0fbff Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 21 Jan 2015 13:52:24 +0100
Subject: mac80211: fix HW registration error paths

Station info state is started in allocation, so should be
destroyed on free (it's just a timer); rate control must
be freed if anything afterwards fails to initialize.

LED exit should be later, no need for locking there, but
it needs to be done also when rate init failed.

Also clean up the code by moving a label so the locking
doesn't have to be done separately.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/main.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index d9ce33663c73..46264cb6604b 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1041,10 +1041,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		ieee80211_max_network_latency;
 	result = pm_qos_add_notifier(PM_QOS_NETWORK_LATENCY,
 				     &local->network_latency_notifier);
-	if (result) {
-		rtnl_lock();
+	if (result)
 		goto fail_pm_qos;
-	}
 
 #ifdef CONFIG_INET
 	local->ifa_notifier.notifier_call = ieee80211_ifa_changed;
@@ -1072,15 +1070,15 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
  fail_ifa:
 	pm_qos_remove_notifier(PM_QOS_NETWORK_LATENCY,
 			       &local->network_latency_notifier);
-	rtnl_lock();
 #endif
  fail_pm_qos:
-	ieee80211_led_exit(local);
+	rtnl_lock();
+	rate_control_deinitialize(local);
 	ieee80211_remove_interfaces(local);
  fail_rate:
 	rtnl_unlock();
+	ieee80211_led_exit(local);
 	ieee80211_wep_free(local);
-	sta_info_stop(local);
 	destroy_workqueue(local->workqueue);
  fail_workqueue:
 	wiphy_unregister(local->hw.wiphy);
@@ -1176,6 +1174,8 @@ void ieee80211_free_hw(struct ieee80211_hw *hw)
 
 	kfree(rcu_access_pointer(local->tx_latency));
 
+	sta_info_stop(local);
+
 	wiphy_free(local->hw.wiphy);
 }
 EXPORT_SYMBOL(ieee80211_free_hw);
-- 
cgit v1.2.3


From 91200e9f3e76af2652952e73ce5d9913f1c987c6 Mon Sep 17 00:00:00 2001
From: Szymon Janc <szymon.janc@tieto.com>
Date: Thu, 22 Jan 2015 16:57:05 +0100
Subject: Bluetooth: Fix reporting invalid RSSI for LE devices

Start Discovery was reporting 0 RSSI for invalid RSSI only for
BR/EDR devices. LE devices were reported with RSSI 127.

Signed-off-by: Szymon Janc <szymon.janc@tieto.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Cc: stable@vger.kernel.org # 3.19+
---
 net/bluetooth/mgmt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index f5c4d2eed9a1..a619d983c078 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -7238,7 +7238,8 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
 	 * However when using service discovery, the value 127 will be
 	 * returned when the RSSI is not available.
 	 */
-	if (rssi == HCI_RSSI_INVALID && !hdev->discovery.report_invalid_rssi)
+	if (rssi == HCI_RSSI_INVALID && !hdev->discovery.report_invalid_rssi &&
+	    link_type == ACL_LINK)
 		rssi = 0;
 
 	bacpy(&ev->addr.bdaddr, bdaddr);
-- 
cgit v1.2.3


From 574ea3c7137c9deee75b107221be5159720ab501 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Thu, 22 Jan 2015 11:15:20 -0800
Subject: Bluetooth: Fix dependency for BR/EDR Secure Connections mode on SSP

The BR/EDR Secure Connections feature should only be enabled when the
Secure Simple Pairing mode has been enabled first. However since secure
connections is feature that is valid for BR/EDR and LE, this needs
special handling.

When enabling secure connections on a LE only configured controller,
thent the BR/EDR side should not be enabled in the controller. This
patches makes the BR/EDR Secure Connections feature depending on
enabling Secure Simple Pairing mode first.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_core.c |  5 ++++-
 net/bluetooth/mgmt.c     | 14 ++++++++------
 2 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 34c17a0645ce..bb831d678868 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -609,6 +609,7 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt)
 
 		if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) {
 			u8 mode = 0x01;
+
 			hci_req_add(req, HCI_OP_WRITE_SSP_MODE,
 				    sizeof(mode), &mode);
 		} else {
@@ -870,8 +871,10 @@ static void hci_init4_req(struct hci_request *req, unsigned long opt)
 		hci_req_add(req, HCI_OP_READ_SYNC_TRAIN_PARAMS, 0, NULL);
 
 	/* Enable Secure Connections if supported and configured */
-	if (bredr_sc_enabled(hdev)) {
+	if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) &&
+	    bredr_sc_enabled(hdev)) {
 		u8 support = 0x01;
+
 		hci_req_add(req, HCI_OP_WRITE_SC_SUPPORT,
 			    sizeof(support), &support);
 	}
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index a619d983c078..3049a4815665 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -6262,14 +6262,16 @@ static int powered_update_hci(struct hci_dev *hdev)
 
 	if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) &&
 	    !lmp_host_ssp_capable(hdev)) {
-		u8 ssp = 1;
+		u8 mode = 0x01;
 
-		hci_req_add(&req, HCI_OP_WRITE_SSP_MODE, 1, &ssp);
-	}
+		hci_req_add(&req, HCI_OP_WRITE_SSP_MODE, sizeof(mode), &mode);
+
+		if (bredr_sc_enabled(hdev) && !lmp_host_sc_capable(hdev)) {
+			u8 support = 0x01;
 
-	if (bredr_sc_enabled(hdev) && !lmp_host_sc_capable(hdev)) {
-		u8 sc = 0x01;
-		hci_req_add(&req, HCI_OP_WRITE_SC_SUPPORT, sizeof(sc), &sc);
+			hci_req_add(&req, HCI_OP_WRITE_SC_SUPPORT,
+				    sizeof(support), &support);
+		}
 	}
 
 	if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags) &&
-- 
cgit v1.2.3


From 3a5486e1fdb0da7a18788f40b17d351af90d0ea7 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Thu, 22 Jan 2015 11:15:21 -0800
Subject: Bluetooth: Limit BR/EDR switching for LE only with secure connections

When a powered on dual-mode controller has been configured to operate
as LE only with secure connections, then the BR/EDR side of things can
not be switched back on. Do reconfigure the controller it first needs
to be powered down.

The secure connections feature is implemented in the BR/EDR controller
while for LE it is implemented in the host. So explicitly forbid such
a transaction to avoid inconsistent states.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/mgmt.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 3049a4815665..e86b8d9105e9 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -4691,9 +4691,16 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 		 * Dual-mode controllers shall operate with the public
 		 * address as its identity address for BR/EDR and LE. So
 		 * reject the attempt to create an invalid configuration.
+		 *
+		 * The same restrictions applies when secure connections
+		 * has been enabled. For BR/EDR this is a controller feature
+		 * while for LE it is a host stack feature. This means that
+		 * switching BR/EDR back on when secure connections has been
+		 * enabled is not a supported transaction.
 		 */
 		if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) &&
-		    bacmp(&hdev->static_addr, BDADDR_ANY)) {
+		    (bacmp(&hdev->static_addr, BDADDR_ANY) ||
+		     test_bit(HCI_SC_ENABLED, &hdev->dev_flags))) {
 			err = cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR,
 					 MGMT_STATUS_REJECTED);
 			goto unlock;
-- 
cgit v1.2.3


From ed93ec69c7e0e80d733dced5b73af0f23cc3b061 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Thu, 22 Jan 2015 11:15:22 -0800
Subject: Bluetooth: Require SSP enabling before BR/EDR Secure Connections

When BR/EDR is supported by a controller, then it is required to enable
Secure Simple Pairing first before enabling the Secure Connections
feature.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/mgmt.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index e86b8d9105e9..41e30055bae8 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -4758,6 +4758,11 @@ static int set_secure_conn(struct sock *sk, struct hci_dev *hdev,
 		return cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN,
 				  MGMT_STATUS_NOT_SUPPORTED);
 
+	if (test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) &&
+	    !test_bit(HCI_SSP_ENABLED, &hdev->dev_flags))
+		return cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN,
+				  MGMT_STATUS_REJECTED);
+
 	if (cp->val != 0x00 && cp->val != 0x01 && cp->val != 0x02)
 		return cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN,
 				  MGMT_STATUS_INVALID_PARAMS);
-- 
cgit v1.2.3


From fa7e1fbcb52cc9efab394526a566d80fb31529bb Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 22 Jan 2015 18:44:19 +0100
Subject: mac80211: allow drivers to control software crypto

Some drivers unfortunately cannot support software crypto, but
mac80211 currently assumes that they do.

This has the issue that if the hardware enabling fails for some
reason, the software fallback is used, which won't work. This
clearly isn't desirable, the error should be reported and the
key setting refused.

Support this in mac80211 by allowing drivers to set a new HW
flag IEEE80211_HW_SW_CRYPTO_CONTROL, in which case mac80211 will
only allow software fallback if the set_key() method returns 1.
The driver will also need to advertise supported cipher suites
so that mac80211 doesn't advertise any (future) software ciphers
that the driver can't actually do.

While at it, to make it easier to support this, refactor the
ieee80211_init_cipher_suites() code.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 12 ++++++++++
 net/mac80211/key.c     | 10 +++++---
 net/mac80211/main.c    | 65 ++++++++++++++++++++++++++++++--------------------
 3 files changed, 58 insertions(+), 29 deletions(-)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 275ee56152ad..33b87c50a4cf 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1634,6 +1634,12 @@ struct ieee80211_tx_control {
  *	be created.  It is expected user-space will create vifs as
  *	desired (and thus have them named as desired).
  *
+ * @IEEE80211_HW_SW_CRYPTO_CONTROL: The driver wants to control which of the
+ *	crypto algorithms can be done in software - so don't automatically
+ *	try to fall back to it if hardware crypto fails, but do so only if
+ *	the driver returns 1. This also forces the driver to advertise its
+ *	supported cipher suites.
+ *
  * @IEEE80211_HW_QUEUE_CONTROL: The driver wants to control per-interface
  *	queue mapping in order to use different queues (not just one per AC)
  *	for different virtual interfaces. See the doc section on HW queue
@@ -1681,6 +1687,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_MFP_CAPABLE			= 1<<13,
 	IEEE80211_HW_WANT_MONITOR_VIF			= 1<<14,
 	IEEE80211_HW_NO_AUTO_VIF			= 1<<15,
+	IEEE80211_HW_SW_CRYPTO_CONTROL			= 1<<16,
 	/* free slots */
 	IEEE80211_HW_REPORTS_TX_ACK_STATUS		= 1<<18,
 	IEEE80211_HW_CONNECTION_MONITOR			= 1<<19,
@@ -1955,6 +1962,11 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb);
  * added; if you return 0 then hw_key_idx must be assigned to the
  * hardware key index, you are free to use the full u8 range.
  *
+ * Note that in the case that the @IEEE80211_HW_SW_CRYPTO_CONTROL flag is
+ * set, mac80211 will not automatically fall back to software crypto if
+ * enabling hardware crypto failed. The set_key() call may also return the
+ * value 1 to permit this specific key/algorithm to be done in software.
+ *
  * When the cmd is %DISABLE_KEY then it must succeed.
  *
  * Note that it is permissible to not decrypt a frame even if a key
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index f8d9f0ee59bf..5167c53aa15f 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -90,7 +90,7 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 {
 	struct ieee80211_sub_if_data *sdata;
 	struct sta_info *sta;
-	int ret;
+	int ret = -EOPNOTSUPP;
 
 	might_sleep();
 
@@ -150,7 +150,7 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 		return 0;
 	}
 
-	if (ret != -ENOSPC && ret != -EOPNOTSUPP)
+	if (ret != -ENOSPC && ret != -EOPNOTSUPP && ret != 1)
 		sdata_err(sdata,
 			  "failed to set key (%d, %pM) to hardware (%d)\n",
 			  key->conf.keyidx,
@@ -163,7 +163,11 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 	case WLAN_CIPHER_SUITE_TKIP:
 	case WLAN_CIPHER_SUITE_CCMP:
 	case WLAN_CIPHER_SUITE_AES_CMAC:
-		/* all of these we can do in software */
+		/* all of these we can do in software - if driver can */
+		if (ret == 1)
+			return 0;
+		if (key->local->hw.flags & IEEE80211_HW_SW_CRYPTO_CONTROL)
+			return -EINVAL;
 		return 0;
 	default:
 		return -EINVAL;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 46264cb6604b..ea6b82ac4f0b 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -658,7 +658,6 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 	bool have_wep = !(IS_ERR(local->wep_tx_tfm) ||
 			  IS_ERR(local->wep_rx_tfm));
 	bool have_mfp = local->hw.flags & IEEE80211_HW_MFP_CAPABLE;
-	const struct ieee80211_cipher_scheme *cs = local->hw.cipher_schemes;
 	int n_suites = 0, r = 0, w = 0;
 	u32 *suites;
 	static const u32 cipher_suites[] = {
@@ -672,12 +671,38 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 		WLAN_CIPHER_SUITE_AES_CMAC
 	};
 
-	/* Driver specifies the ciphers, we have nothing to do... */
-	if (local->hw.wiphy->cipher_suites && have_wep)
-		return 0;
+	if (local->hw.flags & IEEE80211_HW_SW_CRYPTO_CONTROL ||
+	    local->hw.wiphy->cipher_suites) {
+		/* If the driver advertises, or doesn't support SW crypto,
+		 * we only need to remove WEP if necessary.
+		 */
+		if (have_wep)
+			return 0;
+
+		/* well if it has _no_ ciphers ... fine */
+		if (!local->hw.wiphy->n_cipher_suites)
+			return 0;
+
+		/* Driver provides cipher suites, but we need to exclude WEP */
+		suites = kmemdup(local->hw.wiphy->cipher_suites,
+				 sizeof(u32) * local->hw.wiphy->n_cipher_suites,
+				 GFP_KERNEL);
+		if (!suites)
+			return -ENOMEM;
 
-	/* Set up cipher suites if driver relies on mac80211 cipher defs */
-	if (!local->hw.wiphy->cipher_suites && !cs) {
+		for (r = 0; r < local->hw.wiphy->n_cipher_suites; r++) {
+			u32 suite = local->hw.wiphy->cipher_suites[r];
+
+			if (suite == WLAN_CIPHER_SUITE_WEP40 ||
+			    suite == WLAN_CIPHER_SUITE_WEP104)
+				continue;
+			suites[w++] = suite;
+		}
+	} else if (!local->hw.cipher_schemes) {
+		/* If the driver doesn't have cipher schemes, there's nothing
+		 * else to do other than assign the (software supported and
+		 * perhaps offloaded) cipher suites.
+		 */
 		local->hw.wiphy->cipher_suites = cipher_suites;
 		local->hw.wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites);
 
@@ -689,12 +714,16 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 			local->hw.wiphy->n_cipher_suites -= 2;
 		}
 
+		/* not dynamically allocated, so just return */
 		return 0;
-	}
+	} else {
+		const struct ieee80211_cipher_scheme *cs;
 
-	if (!local->hw.wiphy->cipher_suites) {
-		/*
-		 * Driver specifies cipher schemes only
+		cs = local->hw.cipher_schemes;
+
+		/* Driver specifies cipher schemes only (but not cipher suites
+		 * including the schemes)
+		 *
 		 * We start counting ciphers defined by schemes, TKIP and CCMP
 		 */
 		n_suites = local->hw.n_cipher_schemes + 2;
@@ -724,22 +753,6 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 
 		for (r = 0; r < local->hw.n_cipher_schemes; r++)
 			suites[w++] = cs[r].cipher;
-	} else {
-		/* Driver provides cipher suites, but we need to exclude WEP */
-		suites = kmemdup(local->hw.wiphy->cipher_suites,
-				 sizeof(u32) * local->hw.wiphy->n_cipher_suites,
-				 GFP_KERNEL);
-		if (!suites)
-			return -ENOMEM;
-
-		for (r = 0; r < local->hw.wiphy->n_cipher_suites; r++) {
-			u32 suite = local->hw.wiphy->cipher_suites[r];
-
-			if (suite == WLAN_CIPHER_SUITE_WEP40 ||
-			    suite == WLAN_CIPHER_SUITE_WEP104)
-				continue;
-			suites[w++] = suite;
-		}
 	}
 
 	local->hw.wiphy->cipher_suites = suites;
-- 
cgit v1.2.3


From 4b681c82d2f9bef121c912ffcaac89a004af3f2c Mon Sep 17 00:00:00 2001
From: Vadim Kochan <vadim4j@gmail.com>
Date: Mon, 12 Jan 2015 16:34:05 +0200
Subject: nl80211: Allow set network namespace by fd

Added new NL80211_ATTR_NETNS_FD which allows to
set namespace via nl80211 by fd.

Signed-off-by: Vadim Kochan <vadim4j@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h |  2 ++
 net/core/net_namespace.c     |  1 +
 net/wireless/nl80211.c       | 16 +++++++++++-----
 3 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index f52797a90816..f68532b015db 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2098,6 +2098,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_SURVEY_RADIO_STATS,
 
+	NL80211_ATTR_NETNS_FD,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 7f155175bba8..5d5ee8f3e4ff 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -361,6 +361,7 @@ struct net *get_net_ns_by_fd(int fd)
 	return ERR_PTR(-EINVAL);
 }
 #endif
+EXPORT_SYMBOL_GPL(get_net_ns_by_fd);
 
 struct net *get_net_ns_by_pid(pid_t pid)
 {
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c5661c5ad8f3..c64100ec79e3 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -397,6 +397,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_SMPS_MODE] = { .type = NLA_U8 },
 	[NL80211_ATTR_MAC_MASK] = { .len = ETH_ALEN },
 	[NL80211_ATTR_WIPHY_SELF_MANAGED_REG] = { .type = NLA_FLAG },
+	[NL80211_ATTR_NETNS_FD] = { .type = NLA_U32 },
 };
 
 /* policy for the key attributes */
@@ -7762,14 +7763,19 @@ static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info)
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
 	struct net *net;
 	int err;
-	u32 pid;
 
-	if (!info->attrs[NL80211_ATTR_PID])
-		return -EINVAL;
+	if (info->attrs[NL80211_ATTR_PID]) {
+		u32 pid = nla_get_u32(info->attrs[NL80211_ATTR_PID]);
+
+		net = get_net_ns_by_pid(pid);
+	} else if (info->attrs[NL80211_ATTR_NETNS_FD]) {
+		u32 fd = nla_get_u32(info->attrs[NL80211_ATTR_NETNS_FD]);
 
-	pid = nla_get_u32(info->attrs[NL80211_ATTR_PID]);
+		net = get_net_ns_by_fd(fd);
+	} else {
+		return -EINVAL;
+	}
 
-	net = get_net_ns_by_pid(pid);
 	if (IS_ERR(net))
 		return PTR_ERR(net);
 
-- 
cgit v1.2.3


From db82d8a966ded064bd4cf0e1fcca13442f50d0ae Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi83@gmail.com>
Date: Wed, 14 Jan 2015 12:55:08 +0100
Subject: mac80211: enable TPC through mac80211 stack

Control per packet Transmit Power Control (TPC) in lower drivers
according to TX power settings configured by the user. In particular TPC is
enabled if value passed in enum nl80211_tx_power_setting is
NL80211_TX_POWER_LIMITED (allow using less than specified from userspace),
whereas TPC is disabled if nl80211_tx_power_setting is set to
NL80211_TX_POWER_FIXED (use value configured from userspace)

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi83@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h     |  7 +++++++
 net/mac80211/cfg.c         | 19 ++++++++++++++++---
 net/mac80211/chan.c        |  4 ++--
 net/mac80211/ieee80211_i.h |  3 ++-
 net/mac80211/iface.c       |  5 +++--
 5 files changed, 30 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 33b87c50a4cf..866073e27ea2 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -376,6 +376,12 @@ enum ieee80211_rssi_event {
  * @ssid_len: Length of SSID given in @ssid.
  * @hidden_ssid: The SSID of the current vif is hidden. Only valid in AP-mode.
  * @txpower: TX power in dBm
+ * @txpower_type: TX power adjustment used to control per packet Transmit
+ *	Power Control (TPC) in lower driver for the current vif. In particular
+ *	TPC is enabled if value passed in %txpower_type is
+ *	NL80211_TX_POWER_LIMITED (allow using less than specified from
+ *	userspace), whereas TPC is disabled if %txpower_type is set to
+ *	NL80211_TX_POWER_FIXED (use value configured from userspace)
  * @p2p_noa_attr: P2P NoA attribute for P2P powersave
  */
 struct ieee80211_bss_conf {
@@ -411,6 +417,7 @@ struct ieee80211_bss_conf {
 	size_t ssid_len;
 	bool hidden_ssid;
 	int txpower;
+	enum nl80211_tx_power_setting txpower_type;
 	struct ieee80211_p2p_noa_attr p2p_noa_attr;
 };
 
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index ff090ef1ea2c..a777114d663b 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2110,6 +2110,8 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
 {
 	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct ieee80211_sub_if_data *sdata;
+	enum nl80211_tx_power_setting txp_type = type;
+	bool update_txp_type = false;
 
 	if (wdev) {
 		sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
@@ -2117,6 +2119,7 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
 		switch (type) {
 		case NL80211_TX_POWER_AUTOMATIC:
 			sdata->user_power_level = IEEE80211_UNSET_POWER_LEVEL;
+			txp_type = NL80211_TX_POWER_LIMITED;
 			break;
 		case NL80211_TX_POWER_LIMITED:
 		case NL80211_TX_POWER_FIXED:
@@ -2126,7 +2129,12 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
 			break;
 		}
 
-		ieee80211_recalc_txpower(sdata);
+		if (txp_type != sdata->vif.bss_conf.txpower_type) {
+			update_txp_type = true;
+			sdata->vif.bss_conf.txpower_type = txp_type;
+		}
+
+		ieee80211_recalc_txpower(sdata, update_txp_type);
 
 		return 0;
 	}
@@ -2134,6 +2142,7 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
 	switch (type) {
 	case NL80211_TX_POWER_AUTOMATIC:
 		local->user_power_level = IEEE80211_UNSET_POWER_LEVEL;
+		txp_type = NL80211_TX_POWER_LIMITED;
 		break;
 	case NL80211_TX_POWER_LIMITED:
 	case NL80211_TX_POWER_FIXED:
@@ -2144,10 +2153,14 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
 	}
 
 	mutex_lock(&local->iflist_mtx);
-	list_for_each_entry(sdata, &local->interfaces, list)
+	list_for_each_entry(sdata, &local->interfaces, list) {
 		sdata->user_power_level = local->user_power_level;
+		if (txp_type != sdata->vif.bss_conf.txpower_type)
+			update_txp_type = true;
+		sdata->vif.bss_conf.txpower_type = txp_type;
+	}
 	list_for_each_entry(sdata, &local->interfaces, list)
-		ieee80211_recalc_txpower(sdata);
+		ieee80211_recalc_txpower(sdata, update_txp_type);
 	mutex_unlock(&local->iflist_mtx);
 
 	return 0;
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 35b11e11e0c4..ff0d2db09df9 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -655,7 +655,7 @@ out:
 	}
 
 	if (new_ctx && ieee80211_chanctx_num_assigned(local, new_ctx) > 0) {
-		ieee80211_recalc_txpower(sdata);
+		ieee80211_recalc_txpower(sdata, false);
 		ieee80211_recalc_chanctx_min_def(local, new_ctx);
 	}
 
@@ -1387,7 +1387,7 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
 				ieee80211_bss_info_change_notify(sdata,
 								 changed);
 
-			ieee80211_recalc_txpower(sdata);
+			ieee80211_recalc_txpower(sdata, false);
 		}
 
 		ieee80211_recalc_chanctx_chantype(local, ctx);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 156ea79e0157..6e1b184183fb 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1621,7 +1621,8 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local);
 void ieee80211_del_virtual_monitor(struct ieee80211_local *local);
 
 bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata);
-void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata);
+void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata,
+			      bool update_bss);
 
 static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata)
 {
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 677422e11e07..4371c123a95e 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -73,9 +73,10 @@ bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata)
 	return false;
 }
 
-void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata)
+void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata,
+			      bool update_bss)
 {
-	if (__ieee80211_recalc_txpower(sdata))
+	if (__ieee80211_recalc_txpower(sdata) || update_bss)
 		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_TXPOWER);
 }
 
-- 
cgit v1.2.3


From 9c74893441d3cf4b258a82b19cbf6bfd2ed6e549 Mon Sep 17 00:00:00 2001
From: Luciano Coelho <luciano.coelho@intel.com>
Date: Fri, 16 Jan 2015 16:04:09 +0200
Subject: nl80211: add an attribute to allow delaying the first scheduled scan
 cycle

The userspace may want to delay the the first scheduled scan or
net-detect cycle.  Add an optional attribute to the scheduled scan
configuration to pass the delay to be (optionally) used by the driver.

Signed-off-by: Luciano Coelho <luciano.coelho@intel.com>
[add the attribute to the policy to validate it]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  5 +++++
 include/uapi/linux/nl80211.h | 20 ++++++++++++++------
 net/wireless/nl80211.c       |  5 +++++
 3 files changed, 24 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7b44ba0a7632..64e09e1e8099 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1493,6 +1493,10 @@ struct cfg80211_match_set {
  * @rcu_head: RCU callback used to free the struct
  * @owner_nlportid: netlink portid of owner (if this should is a request
  *	owned by a particular socket)
+ * @delay: delay in seconds to use before starting the first scan
+ *	cycle.  The driver may ignore this parameter and start
+ *	immediately (or at any other time), if this feature is not
+ *	supported.
  */
 struct cfg80211_sched_scan_request {
 	struct cfg80211_ssid *ssids;
@@ -1506,6 +1510,7 @@ struct cfg80211_sched_scan_request {
 	struct cfg80211_match_set *match_sets;
 	int n_match_sets;
 	s32 min_rssi_thold;
+	u32 delay;
 
 	u8 mac_addr[ETH_ALEN] __aligned(2);
 	u8 mac_addr_mask[ETH_ALEN] __aligned(2);
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index f68532b015db..1cbc3aae425c 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -324,7 +324,9 @@
  *	if passed, define which channels should be scanned; if not
  *	passed, all channels allowed for the current regulatory domain
  *	are used.  Extra IEs can also be passed from the userspace by
- *	using the %NL80211_ATTR_IE attribute.
+ *	using the %NL80211_ATTR_IE attribute.  The first cycle of the
+ *	scheduled scan can be delayed by %NL80211_ATTR_SCHED_SCAN_DELAY
+ *	is supplied.
  * @NL80211_CMD_STOP_SCHED_SCAN: stop a scheduled scan. Returns -ENOENT if
  *	scheduled scan is not running. The caller may assume that as soon
  *	as the call returns, it is safe to start a new scheduled scan again.
@@ -1735,6 +1737,9 @@ enum nl80211_commands {
  *	should be contained in the result as the sum of the respective counters
  *	over all channels.
  *
+ * @NL80211_ATTR_SCHED_SCAN_DELAY: delay before a scheduled scan (or a
+ *	WoWLAN net-detect scan) is started, u32 in seconds.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2100,6 +2105,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_NETNS_FD,
 
+	NL80211_ATTR_SCHED_SCAN_DELAY,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -3743,11 +3750,12 @@ struct nl80211_pattern_support {
  * @NL80211_WOWLAN_TRIG_NET_DETECT: wake up when a configured network
  *	is detected.  This is a nested attribute that contains the
  *	same attributes used with @NL80211_CMD_START_SCHED_SCAN.  It
- *	specifies how the scan is performed (e.g. the interval and the
- *	channels to scan) as well as the scan results that will
- *	trigger a wake (i.e. the matchsets).  This attribute is also
- *	sent in a response to @NL80211_CMD_GET_WIPHY, indicating the
- *	number of match sets supported by the driver (u32).
+ *	specifies how the scan is performed (e.g. the interval, the
+ *	channels to scan and the initial delay) as well as the scan
+ *	results that will trigger a wake (i.e. the matchsets).  This
+ *	attribute is also sent in a response to
+ *	@NL80211_CMD_GET_WIPHY, indicating the number of match sets
+ *	supported by the driver (u32).
  * @NL80211_WOWLAN_TRIG_NET_DETECT_RESULTS: nested attribute
  *	containing an array with information about what triggered the
  *	wake up.  If no elements are present in the array, it means
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c64100ec79e3..4542e8683beb 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -398,6 +398,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_MAC_MASK] = { .len = ETH_ALEN },
 	[NL80211_ATTR_WIPHY_SELF_MANAGED_REG] = { .type = NLA_FLAG },
 	[NL80211_ATTR_NETNS_FD] = { .type = NLA_U32 },
+	[NL80211_ATTR_SCHED_SCAN_DELAY] = { .type = NLA_U32 },
 };
 
 /* policy for the key attributes */
@@ -6205,6 +6206,10 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
 		}
 	}
 
+	if (attrs[NL80211_ATTR_SCHED_SCAN_DELAY])
+		request->delay =
+			nla_get_u32(attrs[NL80211_ATTR_SCHED_SCAN_DELAY]);
+
 	request->interval = interval;
 	request->scan_start = jiffies;
 
-- 
cgit v1.2.3


From 2af81d6718f5ec92b1d787e0fe79b0d3b6f78601 Mon Sep 17 00:00:00 2001
From: Luciano Coelho <luciano.coelho@intel.com>
Date: Wed, 21 Jan 2015 22:19:34 +0200
Subject: mac80211: only roll back station states for WDS when suspending

In normal cases (i.e. when we are fully associated), cfg80211 takes
care of removing all the stations before calling suspend in mac80211.

But in the corner case when we suspend during authentication or
association, mac80211 needs to roll back the station states.  But we
shouldn't roll back the station states in the suspend function,
because this is taken care of in other parts of the code, except for
WDS interfaces.  For AP types of interfaces, cfg80211 takes care of
disconnecting all stations before calling the driver's suspend code.
For station interfaces, this is done in the quiesce code.

For WDS interfaces we still need to do it here, so move the code into
a new switch case for WDS.

Cc: stable@kernel.org [3.15+]
Signed-off-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/pm.c | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index 4c5192e0d66c..4a95fe3cffbc 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -86,20 +86,6 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
 		}
 	}
 
-	/* tear down aggregation sessions and remove STAs */
-	mutex_lock(&local->sta_mtx);
-	list_for_each_entry(sta, &local->sta_list, list) {
-		if (sta->uploaded) {
-			enum ieee80211_sta_state state;
-
-			state = sta->sta_state;
-			for (; state > IEEE80211_STA_NOTEXIST; state--)
-				WARN_ON(drv_sta_state(local, sta->sdata, sta,
-						      state, state - 1));
-		}
-	}
-	mutex_unlock(&local->sta_mtx);
-
 	/* remove all interfaces that were created in the driver */
 	list_for_each_entry(sdata, &local->interfaces, list) {
 		if (!ieee80211_sdata_running(sdata))
@@ -111,6 +97,21 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
 		case NL80211_IFTYPE_STATION:
 			ieee80211_mgd_quiesce(sdata);
 			break;
+		case NL80211_IFTYPE_WDS:
+			/* tear down aggregation sessions and remove STAs */
+			mutex_lock(&local->sta_mtx);
+			sta = sdata->u.wds.sta;
+			if (sta && sta->uploaded) {
+				enum ieee80211_sta_state state;
+
+				state = sta->sta_state;
+				for (; state > IEEE80211_STA_NOTEXIST; state--)
+					WARN_ON(drv_sta_state(local, sta->sdata,
+							      sta, state,
+							      state - 1));
+			}
+			mutex_unlock(&local->sta_mtx);
+			break;
 		default:
 			break;
 		}
-- 
cgit v1.2.3


From fb142f4bbb7d718b3d9cc8f27c909b4809545f5c Mon Sep 17 00:00:00 2001
From: Fred Chou <fred.chou.nd@gmail.com>
Date: Tue, 20 Jan 2015 10:17:27 +0800
Subject: mac80211: correct header length calculation

HT Control field may also be present in management frames, as defined
in 8.2.4.1.10 of 802.11-2012. Account for this in calculation of header
length.

Signed-off-by: Fred Chou <fred.chou.nd@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/util.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/wireless/util.c b/net/wireless/util.c
index d0ac795445b7..5488c3662f7d 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -308,6 +308,12 @@ unsigned int __attribute_const__ ieee80211_hdrlen(__le16 fc)
 		goto out;
 	}
 
+	if (ieee80211_is_mgmt(fc)) {
+		if (ieee80211_has_order(fc))
+			hdrlen += IEEE80211_HT_CTL_LEN;
+		goto out;
+	}
+
 	if (ieee80211_is_ctl(fc)) {
 		/*
 		 * ACK and CTS are 10 bytes, all others 16. To see how
-- 
cgit v1.2.3


From 3a5c5e81d8128a9e43abc52b75dd21d3da7a0cfc Mon Sep 17 00:00:00 2001
From: Mathy Vanhoef <vanhoefm@gmail.com>
Date: Tue, 20 Jan 2015 15:05:08 +0100
Subject: mac80211: properly set CCK flag in radiotap

Fix a regression introduced by commit a5e70697d0c4 ("mac80211: add radiotap flag
and handling for 5/10 MHz") where the IEEE80211_CHAN_CCK channel type flag was
incorrectly replaced by the IEEE80211_CHAN_OFDM flag. This commit fixes that by
using the CCK flag again.

Cc: stable@vger.kernel.org
Fixes: a5e70697d0c4 ("mac80211: add radiotap flag and handling for 5/10 MHz")
Signed-off-by: Mathy Vanhoef <vanhoefm@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 683b10f46505..d69ca513848e 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -272,7 +272,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
 	else if (rate && rate->flags & IEEE80211_RATE_ERP_G)
 		channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_2GHZ;
 	else if (rate)
-		channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_2GHZ;
+		channel_flags |= IEEE80211_CHAN_CCK | IEEE80211_CHAN_2GHZ;
 	else
 		channel_flags |= IEEE80211_CHAN_2GHZ;
 	put_unaligned_le16(channel_flags, pos);
-- 
cgit v1.2.3


From 14f2ae83d07a0a0d499d4760dd4a1bffd310b6ae Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Thu, 22 Jan 2015 23:30:19 +0200
Subject: mac80211: synchronize_net() before flushing the queues

When mac80211 disconnects, it drops all the packets on the
queues. This happens after the net stack has been notified
that we have no link anymore (netif_carrier_off).
netif_carrier_off ensures that no new packets are sent to
xmit() callback, but we might have older packets in the
middle of the Tx path. These packets will land in the
driver's queues after the latter have been flushed.
Synchronize_net() between netif_carrier_off and drv_flush()
will fix this.

Note that we can't call synchronize_net inside
ieee80211_flush_queues since there are flows that call
ieee80211_flush_queues and don't need synchronize_net()
which is an expensive operation.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
[reword comment to be more accurate]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 1875181ebd94..5cce6055f3ae 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2011,6 +2011,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 	/* disable per-vif ps */
 	ieee80211_recalc_ps_vif(sdata);
 
+	/* make sure ongoing transmission finishes */
+	synchronize_net();
+
 	/*
 	 * drop any frame before deauth/disassoc, this can be data or
 	 * management frame. Since we are disconnecting, we should not
-- 
cgit v1.2.3


From 4afaff176a968457df18eeebc1aad910b6154761 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Thu, 22 Jan 2015 23:32:46 +0200
Subject: mac80211: avoid races related to suspend flow

When we go to suspend, there is complex set of states that
avoids races. The quiescing variable is set whlie
__ieee80211_suspend is running. Then suspended is set.
The code makes sure there is no window without any of these
flags.

The problem is that workers can still be enqueued while we
are quiescing. This leads to situations where the driver is
already suspending and other flows like disassociation are
handled by a worker.

To fix this, we need to check quiescing and suspended flags
in the worker itself and not only before enqueueing it.
I also add here extensive documentation to ease the
understanding of these complex issues.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ieee80211_i.h | 30 ++++++++++++++++++++++++++++++
 net/mac80211/iface.c       |  7 +------
 net/mac80211/util.c        | 15 +++++++++------
 3 files changed, 40 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 6e1b184183fb..bdb3e3bc7503 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1886,6 +1886,36 @@ void __ieee80211_flush_queues(struct ieee80211_local *local,
 			      struct ieee80211_sub_if_data *sdata,
 			      unsigned int queues, bool drop);
 
+static inline bool ieee80211_can_run_worker(struct ieee80211_local *local)
+{
+	/*
+	 * If quiescing is set, we are racing with __ieee80211_suspend.
+	 * __ieee80211_suspend flushes the workers after setting quiescing,
+	 * and we check quiescing / suspended before enqueing new workers.
+	 * We should abort the worker to avoid the races below.
+	 */
+	if (local->quiescing)
+		return false;
+
+	/*
+	 * We might already be suspended if the following scenario occurs:
+	 * __ieee80211_suspend		Control path
+	 *
+	 *				if (local->quiescing)
+	 *					return;
+	 * local->quiescing = true;
+	 * flush_workqueue();
+	 *				queue_work(...);
+	 * local->suspended = true;
+	 * local->quiescing = false;
+	 *				worker starts running...
+	 */
+	if (local->suspended)
+		return false;
+
+	return true;
+}
+
 void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 			 u16 transaction, u16 auth_alg, u16 status,
 			 const u8 *extra, size_t extra_len, const u8 *bssid,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 4371c123a95e..81a27516813e 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1170,12 +1170,7 @@ static void ieee80211_iface_work(struct work_struct *work)
 	if (local->scanning)
 		return;
 
-	/*
-	 * ieee80211_queue_work() should have picked up most cases,
-	 * here we'll pick the rest.
-	 */
-	if (WARN(local->suspended,
-		 "interface work scheduled while going to suspend\n"))
+	if (!ieee80211_can_run_worker(local))
 		return;
 
 	/* first process frames */
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index fbd37d43dfce..c65d03f3c167 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -744,16 +744,19 @@ EXPORT_SYMBOL_GPL(wdev_to_ieee80211_vif);
 
 /*
  * Nothing should have been stuffed into the workqueue during
- * the suspend->resume cycle. If this WARN is seen then there
- * is a bug with either the driver suspend or something in
- * mac80211 stuffing into the workqueue which we haven't yet
- * cleared during mac80211's suspend cycle.
+ * the suspend->resume cycle. Since we can't check each caller
+ * of this function if we are already quiescing / suspended,
+ * check here and don't WARN since this can actually happen when
+ * the rx path (for example) is racing against __ieee80211_suspend
+ * and suspending / quiescing was set after the rx path checked
+ * them.
  */
 static bool ieee80211_can_queue_work(struct ieee80211_local *local)
 {
-	if (WARN(local->suspended && !local->resuming,
-		 "queueing ieee80211 work while going to suspend\n"))
+	if (local->quiescing || (local->suspended && !local->resuming)) {
+		pr_warn("queueing ieee80211 work while going to suspend\n");
 		return false;
+	}
 
 	return true;
 }
-- 
cgit v1.2.3


From 332ff7fe36bfdcab4ae7d35a2273ce431bf15171 Mon Sep 17 00:00:00 2001
From: Luciano Coelho <luciano.coelho@intel.com>
Date: Thu, 22 Jan 2015 23:34:10 +0200
Subject: mac80211: complete scan work immediately if quiesced or suspended

It is possible that a deferred scan is queued after the queues are
flushed in __ieee80211_suspend().  The deferred scan work may be
scheduled by ROC or ieee80211_stop_poll().

To make sure don't start a new scan while suspending, check whether
we're quiescing or suspended and complete the scan immediately if
that's the case.

Signed-off-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/scan.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 7807fa42ed3f..05f0d711b6d8 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -828,6 +828,11 @@ void ieee80211_scan_work(struct work_struct *work)
 
 	mutex_lock(&local->mtx);
 
+	if (!ieee80211_can_run_worker(local)) {
+		aborted = true;
+		goto out_complete;
+	}
+
 	sdata = rcu_dereference_protected(local->scan_sdata,
 					  lockdep_is_held(&local->mtx));
 	scan_req = rcu_dereference_protected(local->scan_req,
-- 
cgit v1.2.3


From 985e88b13a25599eeb46b03c123531a5a11ac1cd Mon Sep 17 00:00:00 2001
From: Bob Copeland <me@bobcopeland.com>
Date: Thu, 22 Jan 2015 16:49:53 -0500
Subject: Revert "mac80211: keep sending peer candidate events while in listen
 state"

This reverts commit 2ae70efcea7a695a62bb47170d3fb16674b8dbea.

The new peer events that are generated by the change are causing problems
with wpa_supplicant in userspace: wpa_s tries to restart SAE authentication
with the peer when receiving the event, even though authentication may be in
progress already, and it gets very confused.

Revert back to the original operating mode, which is to only get events when
there is no corresponding station entry.

Cc: Nishikawa, Kenzoh <Kenzoh.Nishikawa@jp.sony.com>
Cc: Masashi Honma <masashi.honma@gmail.com>
Signed-off-by: Bob Copeland <me@bobcopeland.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mesh_plink.c | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index fa94ca15ba95..b488e1859b18 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -523,13 +523,6 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata,
 	    sdata->u.mesh.mshcfg.auto_open_plinks &&
 	    rssi_threshold_check(sdata, sta))
 		changed = mesh_plink_open(sta);
-	else if (sta->plink_state == NL80211_PLINK_LISTEN &&
-		 (sdata->u.mesh.user_mpm ||
-		  sdata->u.mesh.security & IEEE80211_MESH_SEC_AUTHED))
-		cfg80211_notify_new_peer_candidate(sdata->dev, hw_addr,
-						   elems->ie_start,
-						   elems->total_len,
-						   GFP_ATOMIC);
 
 	ieee80211_mps_frame_release(sta, elems);
 out:
-- 
cgit v1.2.3


From 0fa7b39131576dd1baa6ca17fca53c65d7f62249 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 23 Jan 2015 11:10:12 +0100
Subject: nl80211: fix per-station group key get/del and memory leak

In case userspace attempts to obtain key information for or delete a
unicast key, this is currently erroneously rejected unless the driver
sets the WIPHY_FLAG_IBSS_RSN flag. Apparently enough drivers do so it
was never noticed.

Fix that, and while at it fix a potential memory leak: the error path
in the get_key() function was placed after allocating a message but
didn't free it - move it to a better place. Luckily admin permissions
are needed to call this operation.

Cc: stable@vger.kernel.org
Fixes: e31b82136d1ad ("cfg80211/mac80211: allow per-station GTKs")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 7ca4b5133123..8887c6e5fca8 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2854,6 +2854,9 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
 	if (!rdev->ops->get_key)
 		return -EOPNOTSUPP;
 
+	if (!pairwise && mac_addr && !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN))
+		return -ENOENT;
+
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (!msg)
 		return -ENOMEM;
@@ -2873,10 +2876,6 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
 	    nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr))
 		goto nla_put_failure;
 
-	if (pairwise && mac_addr &&
-	    !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN))
-		return -ENOENT;
-
 	err = rdev_get_key(rdev, dev, key_idx, pairwise, mac_addr, &cookie,
 			   get_key_callback);
 
@@ -3047,7 +3046,7 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info)
 	wdev_lock(dev->ieee80211_ptr);
 	err = nl80211_key_allowed(dev->ieee80211_ptr);
 
-	if (key.type == NL80211_KEYTYPE_PAIRWISE && mac_addr &&
+	if (key.type == NL80211_KEYTYPE_GROUP && mac_addr &&
 	    !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN))
 		err = -ENOENT;
 
-- 
cgit v1.2.3


From 13874e4b23de83899cc2d48011a98e42347c67cb Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 23 Jan 2015 11:25:20 +0100
Subject: nl80211: suppress smatch warnings

smatch warns that we once checked request->ssids in two functions
and then unconditionally used it later again.

This is actually fine, because the code has a relationship between
attrs[NL80211_ATTR_SCAN_SSIDS], n_ssids and request->ssids, but
smatch isn't smart enough to realize that.

Suppress the warnings by always checking just n_ssids - that way
smatch won't know that request->ssids could be NULL, and since it
is only NULL when n_ssids is 0 we still check everything correctly.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 4542e8683beb..454d7a079d03 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5776,7 +5776,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
 		request->ssids = (void *)&request->channels[n_channels];
 	request->n_ssids = n_ssids;
 	if (ie_len) {
-		if (request->ssids)
+		if (n_ssids)
 			request->ie = (void *)(request->ssids + n_ssids);
 		else
 			request->ie = (void *)(request->channels + n_channels);
@@ -5832,7 +5832,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
 	request->n_channels = i;
 
 	i = 0;
-	if (info->attrs[NL80211_ATTR_SCAN_SSIDS]) {
+	if (n_ssids) {
 		nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp) {
 			if (nla_len(attr) > IEEE80211_MAX_SSID_LEN) {
 				err = -EINVAL;
@@ -6030,7 +6030,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
 		request->ssids = (void *)&request->channels[n_channels];
 	request->n_ssids = n_ssids;
 	if (ie_len) {
-		if (request->ssids)
+		if (n_ssids)
 			request->ie = (void *)(request->ssids + n_ssids);
 		else
 			request->ie = (void *)(request->channels + n_channels);
@@ -6039,7 +6039,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
 	if (n_match_sets) {
 		if (request->ie)
 			request->match_sets = (void *)(request->ie + ie_len);
-		else if (request->ssids)
+		else if (n_ssids)
 			request->match_sets =
 				(void *)(request->ssids + n_ssids);
 		else
@@ -6098,7 +6098,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
 	request->n_channels = i;
 
 	i = 0;
-	if (attrs[NL80211_ATTR_SCAN_SSIDS]) {
+	if (n_ssids) {
 		nla_for_each_nested(attr, attrs[NL80211_ATTR_SCAN_SSIDS],
 				    tmp) {
 			if (nla_len(attr) > IEEE80211_MAX_SSID_LEN) {
-- 
cgit v1.2.3


From d6f5cc091b04c4364c7cd928cef3dff21e2bef55 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 23 Jan 2015 11:36:54 +0100
Subject: mac80211: tdls: remove shadowing variable

There's no need to use another local 'sta' variable as the
original (outer scope) one isn't needed any more and has
become invalid anyway when exiting the RCU read section.

Remove the inner scope one and along with it the useless NULL
initialization.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tdls.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index 917088dfd696..80b66ce9157b 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -852,7 +852,6 @@ ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev,
 	 */
 	if ((action_code == WLAN_TDLS_TEARDOWN) &&
 	    (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) {
-		struct sta_info *sta = NULL;
 		bool try_resend; /* Should we keep skb for possible resend */
 
 		/* If not sending directly to peer - no point in keeping skb */
-- 
cgit v1.2.3


From c5309ba78742483514b8f7e447ebe56685894298 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 23 Jan 2015 11:42:14 +0100
Subject: mac80211: tdls: disentangle HT supported conditions

These conditions are rather difficult to follow, for example
because "!sta" only exists to not crash in the case that we
don't have a station pointer (WLAN_TDLS_SETUP_REQUEST) in
which the additional condition (peer supports HT) doesn't
actually matter anyway.

Cleaning this up only duplicates two lines of code but makes
the rest far easier to read, so do that.

As a side effect, smatch stops complaining about the lack of
a sta pointer test after the !sta (since the !sta goes away)

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tdls.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index 80b66ce9157b..c9f9752217ac 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -345,24 +345,24 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata,
 	 */
 	sband = local->hw.wiphy->bands[band];
 	memcpy(&ht_cap, &sband->ht_cap, sizeof(ht_cap));
-	if ((action_code == WLAN_TDLS_SETUP_REQUEST ||
-	     action_code == WLAN_TDLS_SETUP_RESPONSE) &&
-	    ht_cap.ht_supported && (!sta || sta->sta.ht_cap.ht_supported)) {
-		if (action_code == WLAN_TDLS_SETUP_REQUEST) {
-			ieee80211_apply_htcap_overrides(sdata, &ht_cap);
-
-			/* disable SMPS in TDLS initiator */
-			ht_cap.cap |= (WLAN_HT_CAP_SM_PS_DISABLED
-				       << IEEE80211_HT_CAP_SM_PS_SHIFT);
-		} else {
-			/* disable SMPS in TDLS responder */
-			sta->sta.ht_cap.cap |=
-				(WLAN_HT_CAP_SM_PS_DISABLED
-				 << IEEE80211_HT_CAP_SM_PS_SHIFT);
-
-			/* the peer caps are already intersected with our own */
-			memcpy(&ht_cap, &sta->sta.ht_cap, sizeof(ht_cap));
-		}
+
+	if (action_code == WLAN_TDLS_SETUP_REQUEST && ht_cap.ht_supported) {
+		ieee80211_apply_htcap_overrides(sdata, &ht_cap);
+
+		/* disable SMPS in TDLS initiator */
+		ht_cap.cap |= WLAN_HT_CAP_SM_PS_DISABLED
+				<< IEEE80211_HT_CAP_SM_PS_SHIFT;
+
+		pos = skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2);
+		ieee80211_ie_build_ht_cap(pos, &ht_cap, ht_cap.cap);
+	} else if (action_code == WLAN_TDLS_SETUP_RESPONSE &&
+		   ht_cap.ht_supported && sta->sta.ht_cap.ht_supported) {
+		/* disable SMPS in TDLS responder */
+		sta->sta.ht_cap.cap |= WLAN_HT_CAP_SM_PS_DISABLED
+					<< IEEE80211_HT_CAP_SM_PS_SHIFT;
+
+		/* the peer caps are already intersected with our own */
+		memcpy(&ht_cap, &sta->sta.ht_cap, sizeof(ht_cap));
 
 		pos = skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2);
 		ieee80211_ie_build_ht_cap(pos, &ht_cap, ht_cap.cap);
-- 
cgit v1.2.3


From 3d6dc3431e944cf800637404167447f4badffdba Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 23 Jan 2015 13:23:48 +0100
Subject: mac80211: fix per-TID RX-MSDU counter

In the case of non-QoS association, the counter was actually
wrong. The right index isn't security_idx but seqno_idx, as
security_idx will be 0 for data frames, while 16 is needed.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 3d79d498e7f6..ed516ae80a3b 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2310,12 +2310,12 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
 		return RX_DROP_MONITOR;
 
 	if (rx->sta) {
-		/* The security index has the same property as needed
+		/* The seqno index has the same property as needed
 		 * for the rx_msdu field, i.e. it is IEEE80211_NUM_TIDS
 		 * for non-QoS-data frames. Here we know it's a data
 		 * frame, so count MSDUs.
 		 */
-		rx->sta->rx_msdu[rx->security_idx]++;
+		rx->sta->rx_msdu[rx->seqno_idx]++;
 	}
 
 	/*
-- 
cgit v1.2.3


From 225b818982403120ce1f5e7d4b3e5245e0399775 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 21 Jan 2015 21:09:02 +0100
Subject: mac80211: support beacon statistics

For drivers without beacon filtering, support beacon statistics
entirely, i.e. report the number of beacons and average signal.

For drivers with beacon filtering, give them the number of beacons
received by mac80211 -- in case the device reports only the number
of filtered beacons then driver doesn't have to count all beacons
again as mac80211 already does.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/sta_info.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 79383ef0c264..00ca8dcc2bcf 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1764,6 +1764,13 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 
 	sinfo->generation = sdata->local->sta_generation;
 
+	/* do before driver, so beacon filtering drivers have a
+	 * chance to e.g. just add the number of filtered beacons
+	 * (or just modify the value entirely, of course)
+	 */
+	if (sdata->vif.type == NL80211_IFTYPE_STATION)
+		sinfo->rx_beacon = sdata->u.mgd.count_beacon_signal;
+
 	drv_sta_statistics(local, sdata, &sta->sta, sinfo);
 
 	sinfo->filled |= BIT(NL80211_STA_INFO_INACTIVE_TIME) |
@@ -1816,6 +1823,13 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 	sinfo->rx_dropped_misc = sta->rx_dropped;
 	sinfo->beacon_loss_count = sta->beacon_loss_count;
 
+	if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+	    !(sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)) {
+		sinfo->filled |= BIT(NL80211_STA_INFO_BEACON_RX) |
+				 BIT(NL80211_STA_INFO_BEACON_SIGNAL_AVG);
+		sinfo->rx_beacon_signal_avg = ieee80211_ave_rssi(&sdata->vif);
+	}
+
 	if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) ||
 	    (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) {
 		if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL))) {
-- 
cgit v1.2.3


From 3c5199143bc4b35f472c5c2534026d74821e2044 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jeff.layton@primarydata.com>
Date: Thu, 22 Jan 2015 08:19:32 -0500
Subject: sunrpc/lockd: fix references to the BKL

The BKL is completely out of the picture in the lockd and sunrpc code
these days. Update the antiquated comments that refer to it.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/lockd/svclock.c         | 4 ++--
 include/linux/sunrpc/svc.h | 2 +-
 net/sunrpc/svc.c           | 4 ++--
 net/sunrpc/svc_xprt.c      | 3 +--
 4 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 56598742dde4..5581e020644b 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -57,8 +57,8 @@ static DEFINE_SPINLOCK(nlm_blocked_lock);
 static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
 {
 	/*
-	 * We can get away with a static buffer because we're only
-	 * called with BKL held.
+	 * We can get away with a static buffer because this is only called
+	 * from lockd, which is single-threaded.
 	 */
 	static char buf[2*NLM_MAXCOOKIELEN+1];
 	unsigned int i, len = sizeof(buf);
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 6f22cfeef5e3..fae6fb947fc8 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -110,7 +110,7 @@ struct svc_serv {
  * We use sv_nrthreads as a reference count.  svc_destroy() drops
  * this refcount, so we need to bump it up around operations that
  * change the number of threads.  Horrible, but there it is.
- * Should be called with the BKL held.
+ * Should be called with the "service mutex" held.
  */
 static inline void svc_get(struct svc_serv *serv)
 {
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 91eaef1844c8..78974e4d9ad2 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -768,8 +768,8 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 EXPORT_SYMBOL_GPL(svc_set_num_threads);
 
 /*
- * Called from a server thread as it's exiting. Caller must hold the BKL or
- * the "service mutex", whichever is appropriate for the service.
+ * Called from a server thread as it's exiting. Caller must hold the "service
+ * mutex" for the service.
  */
 void
 svc_exit_thread(struct svc_rqst *rqstp)
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index c69358b3cf7f..163ac45c3639 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -42,7 +42,7 @@ static LIST_HEAD(svc_xprt_class_list);
  *	svc_pool->sp_lock protects most of the fields of that pool.
  *	svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt.
  *	when both need to be taken (rare), svc_serv->sv_lock is first.
- *	BKL protects svc_serv->sv_nrthread.
+ *	The "service mutex" protects svc_serv->sv_nrthread.
  *	svc_sock->sk_lock protects the svc_sock->sk_deferred list
  *             and the ->sk_info_authunix cache.
  *
@@ -67,7 +67,6 @@ static LIST_HEAD(svc_xprt_class_list);
  *		  that no other thread will be using the transport or will
  *		  try to set XPT_DEAD.
  */
-
 int svc_reg_xprt_class(struct svc_xprt_class *xcl)
 {
 	struct svc_xprt_class *cl;
-- 
cgit v1.2.3


From 5d57e7964c3261827cc718b0e5317ac4950e2f21 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 23 Jan 2015 10:10:38 +0200
Subject: Bluetooth: Check for valid bdaddr in add_remote_oob_data

Before doing any other verifications, the add_remote_oob_data function
should first check that the given address is valid. This patch adds such
a missing check to the beginning of the function.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/mgmt.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 41e30055bae8..2c0de3e4e79a 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -3633,10 +3633,16 @@ unlock:
 static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev,
 			       void *data, u16 len)
 {
+	struct mgmt_addr_info *addr = data;
 	int err;
 
 	BT_DBG("%s ", hdev->name);
 
+	if (!bdaddr_type_is_valid(addr->type))
+		return cmd_complete(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA,
+				    MGMT_STATUS_INVALID_PARAMS, addr,
+				    sizeof(*addr));
+
 	hci_dev_lock(hdev);
 
 	if (len == MGMT_ADD_REMOTE_OOB_DATA_SIZE) {
-- 
cgit v1.2.3


From 484aabc1c4e869879e614a621e2dfabc9f98fb00 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 23 Jan 2015 10:10:39 +0200
Subject: Bluetooth: Remove incorrect check for BDADDR_BREDR address type

The Add Remote OOB Data mgmt command should allow data to be passed for
LE as well. This patch removes a left-over check for BDADDR_BREDR that
should not be there anymore.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/mgmt.c | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 2c0de3e4e79a..862a005d9db2 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -3672,14 +3672,6 @@ static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev,
 		u8 *rand192, *hash192;
 		u8 status;
 
-		if (cp->addr.type != BDADDR_BREDR) {
-			err = cmd_complete(sk, hdev->id,
-					   MGMT_OP_ADD_REMOTE_OOB_DATA,
-					   MGMT_STATUS_INVALID_PARAMS,
-					   &cp->addr, sizeof(cp->addr));
-			goto unlock;
-		}
-
 		if (bdaddr_type_is_le(cp->addr.type)) {
 			rand192 = NULL;
 			hash192 = NULL;
-- 
cgit v1.2.3


From a1443f5a273713d4bfda360e45aa6e1d14fe7324 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 23 Jan 2015 15:42:46 +0200
Subject: Bluetooth: Convert Set SC to use HCI Request

This patch converts the Set Secure Connection HCI handling to use a HCI
request instead of using a hard-coded callback in hci_event.c. This e.g.
ensures that we don't clear the flags incorrectly if something goes
wrong with the power up process (not related to a mgmt Set SC command).

The code can also be simplified a bit since only one pending Set SC
command is allowed, i.e. mgmt_pending_foreach usage is not needed.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  1 -
 net/bluetooth/hci_event.c        |  4 +-
 net/bluetooth/mgmt.c             | 92 +++++++++++++++++++++-------------------
 3 files changed, 50 insertions(+), 47 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 7777124bff55..0f5e59f1e3cb 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1369,7 +1369,6 @@ int mgmt_user_passkey_notify(struct hci_dev *hdev, bdaddr_t *bdaddr,
 void mgmt_auth_failed(struct hci_conn *conn, u8 status);
 void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status);
 void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status);
-void mgmt_sc_enable_complete(struct hci_dev *hdev, u8 enable, u8 status);
 void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class,
 				    u8 status);
 void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index a58845e98921..e2b81adc232f 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -525,9 +525,7 @@ static void hci_cc_write_sc_support(struct hci_dev *hdev, struct sk_buff *skb)
 			hdev->features[1][0] &= ~LMP_HOST_SC;
 	}
 
-	if (test_bit(HCI_MGMT, &hdev->dev_flags))
-		mgmt_sc_enable_complete(hdev, sent->support, status);
-	else if (!status) {
+	if (!test_bit(HCI_MGMT, &hdev->dev_flags) && !status) {
 		if (sent->support)
 			set_bit(HCI_SC_ENABLED, &hdev->dev_flags);
 		else
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 862a005d9db2..25e40e82b9a2 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -4741,11 +4741,57 @@ unlock:
 	return err;
 }
 
+static void sc_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode)
+{
+	struct pending_cmd *cmd;
+	struct mgmt_mode *cp;
+
+	BT_DBG("%s status %u", hdev->name, status);
+
+	hci_dev_lock(hdev);
+
+	cmd = mgmt_pending_find(MGMT_OP_SET_SECURE_CONN, hdev);
+	if (!cmd)
+		goto unlock;
+
+	if (status) {
+		cmd_status(cmd->sk, cmd->index, cmd->opcode,
+			   mgmt_status(status));
+		goto remove;
+	}
+
+	cp = cmd->param;
+
+	switch (cp->val) {
+	case 0x00:
+		clear_bit(HCI_SC_ENABLED, &hdev->dev_flags);
+		clear_bit(HCI_SC_ONLY, &hdev->dev_flags);
+		break;
+	case 0x01:
+		set_bit(HCI_SC_ENABLED, &hdev->dev_flags);
+		clear_bit(HCI_SC_ONLY, &hdev->dev_flags);
+		break;
+	case 0x02:
+		set_bit(HCI_SC_ENABLED, &hdev->dev_flags);
+		set_bit(HCI_SC_ONLY, &hdev->dev_flags);
+		break;
+	}
+
+	send_settings_rsp(cmd->sk, MGMT_OP_SET_SECURE_CONN, hdev);
+	new_settings(hdev, cmd->sk);
+
+remove:
+	mgmt_pending_remove(cmd);
+unlock:
+	hci_dev_unlock(hdev);
+}
+
 static int set_secure_conn(struct sock *sk, struct hci_dev *hdev,
 			   void *data, u16 len)
 {
 	struct mgmt_mode *cp = data;
 	struct pending_cmd *cmd;
+	struct hci_request req;
 	u8 val;
 	int err;
 
@@ -4814,17 +4860,14 @@ static int set_secure_conn(struct sock *sk, struct hci_dev *hdev,
 		goto failed;
 	}
 
-	err = hci_send_cmd(hdev, HCI_OP_WRITE_SC_SUPPORT, 1, &val);
+	hci_req_init(&req, hdev);
+	hci_req_add(&req, HCI_OP_WRITE_SC_SUPPORT, 1, &val);
+	err = hci_req_run(&req, sc_enable_complete);
 	if (err < 0) {
 		mgmt_pending_remove(cmd);
 		goto failed;
 	}
 
-	if (cp->val == 0x02)
-		set_bit(HCI_SC_ONLY, &hdev->dev_flags);
-	else
-		clear_bit(HCI_SC_ONLY, &hdev->dev_flags);
-
 failed:
 	hci_dev_unlock(hdev);
 	return err;
@@ -7001,43 +7044,6 @@ void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status)
 	hci_req_run(&req, NULL);
 }
 
-void mgmt_sc_enable_complete(struct hci_dev *hdev, u8 enable, u8 status)
-{
-	struct cmd_lookup match = { NULL, hdev };
-	bool changed = false;
-
-	if (status) {
-		u8 mgmt_err = mgmt_status(status);
-
-		if (enable) {
-			if (test_and_clear_bit(HCI_SC_ENABLED,
-					       &hdev->dev_flags))
-				new_settings(hdev, NULL);
-			clear_bit(HCI_SC_ONLY, &hdev->dev_flags);
-		}
-
-		mgmt_pending_foreach(MGMT_OP_SET_SECURE_CONN, hdev,
-				     cmd_status_rsp, &mgmt_err);
-		return;
-	}
-
-	if (enable) {
-		changed = !test_and_set_bit(HCI_SC_ENABLED, &hdev->dev_flags);
-	} else {
-		changed = test_and_clear_bit(HCI_SC_ENABLED, &hdev->dev_flags);
-		clear_bit(HCI_SC_ONLY, &hdev->dev_flags);
-	}
-
-	mgmt_pending_foreach(MGMT_OP_SET_SECURE_CONN, hdev,
-			     settings_rsp, &match);
-
-	if (changed)
-		new_settings(hdev, match.sk);
-
-	if (match.sk)
-		sock_put(match.sk);
-}
-
 static void sk_lookup(struct pending_cmd *cmd, void *data)
 {
 	struct cmd_lookup *match = data;
-- 
cgit v1.2.3


From dfb2fae7cd0a1aa13610b11d54203bcd3893da07 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Fri, 23 Jan 2015 12:16:53 -0500
Subject: Bluetooth: Fix nested sleeps

l2cap/rfcomm/sco_sock_accept() are wait loops which may acquire
sleeping locks. Since both wait loops and sleeping locks use
task_struct.state to sleep and wake, the nested sleeping locks
destroy the wait loop state.

Use the newly-minted wait_woken() and DEFINE_WAIT_FUNC() for the
wait loop. DEFINE_WAIT_FUNC() allows an alternate wake function
to be specified; in this case, the predefined scheduler function,
woken_wake_function(). This wait construct ensures wakeups will
not be missed without requiring the wait loop to set the
task state before condition evaluation. How this works:

 CPU 0                            |  CPU 1
                                  |
                                  | is <condition> set?
                                  | no
set <condition>                   |
                                  |
wake_up_interruptible             |
  woken_wake_function             |
    set WQ_FLAG_WOKEN             |
    try_to_wake_up                |
                                  | wait_woken
                                  |   set TASK_INTERRUPTIBLE
                                  |   WQ_FLAG_WOKEN? yes
                                  |   set TASK_RUNNING
                                  |
                                  | - loop -
				  |
				  | is <condition> set?
                                  | yes - exit wait loop

Fixes "do not call blocking ops when !TASK_RUNNING" warnings
in l2cap_sock_accept(), rfcomm_sock_accept() and sco_sock_accept().

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/l2cap_sock.c  | 9 ++++-----
 net/bluetooth/rfcomm/sock.c | 9 ++++-----
 net/bluetooth/sco.c         | 8 +++-----
 3 files changed, 11 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 20206cd3acbc..60694f0f4c73 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -302,7 +302,7 @@ done:
 static int l2cap_sock_accept(struct socket *sock, struct socket *newsock,
 			     int flags)
 {
-	DECLARE_WAITQUEUE(wait, current);
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 	struct sock *sk = sock->sk, *nsk;
 	long timeo;
 	int err = 0;
@@ -316,8 +316,6 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock,
 	/* Wait for an incoming connection. (wake-one). */
 	add_wait_queue_exclusive(sk_sleep(sk), &wait);
 	while (1) {
-		set_current_state(TASK_INTERRUPTIBLE);
-
 		if (sk->sk_state != BT_LISTEN) {
 			err = -EBADFD;
 			break;
@@ -338,10 +336,11 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock,
 		}
 
 		release_sock(sk);
-		timeo = schedule_timeout(timeo);
+
+		timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
+
 		lock_sock_nested(sk, L2CAP_NESTING_PARENT);
 	}
-	__set_current_state(TASK_RUNNING);
 	remove_wait_queue(sk_sleep(sk), &wait);
 
 	if (err)
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index d8a95755a8a8..3c6d2c8ac1a4 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -468,7 +468,7 @@ done:
 
 static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int flags)
 {
-	DECLARE_WAITQUEUE(wait, current);
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 	struct sock *sk = sock->sk, *nsk;
 	long timeo;
 	int err = 0;
@@ -487,8 +487,6 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f
 	/* Wait for an incoming connection. (wake-one). */
 	add_wait_queue_exclusive(sk_sleep(sk), &wait);
 	while (1) {
-		set_current_state(TASK_INTERRUPTIBLE);
-
 		if (sk->sk_state != BT_LISTEN) {
 			err = -EBADFD;
 			break;
@@ -509,10 +507,11 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f
 		}
 
 		release_sock(sk);
-		timeo = schedule_timeout(timeo);
+
+		timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
+
 		lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
 	}
-	__set_current_state(TASK_RUNNING);
 	remove_wait_queue(sk_sleep(sk), &wait);
 
 	if (err)
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 07ec7d23b843..76321b546e84 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -618,7 +618,7 @@ done:
 
 static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flags)
 {
-	DECLARE_WAITQUEUE(wait, current);
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 	struct sock *sk = sock->sk, *ch;
 	long timeo;
 	int err = 0;
@@ -632,8 +632,6 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flag
 	/* Wait for an incoming connection. (wake-one). */
 	add_wait_queue_exclusive(sk_sleep(sk), &wait);
 	while (1) {
-		set_current_state(TASK_INTERRUPTIBLE);
-
 		if (sk->sk_state != BT_LISTEN) {
 			err = -EBADFD;
 			break;
@@ -654,10 +652,10 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flag
 		}
 
 		release_sock(sk);
-		timeo = schedule_timeout(timeo);
+
+		timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
 		lock_sock(sk);
 	}
-	__set_current_state(TASK_RUNNING);
 	remove_wait_queue(sk_sleep(sk), &wait);
 
 	if (err)
-- 
cgit v1.2.3


From d7924450e14ea414568563ec01489f77452b00b4 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 20 Jan 2015 13:44:25 +0100
Subject: act_connmark: Add missing dependency on NF_CONNTRACK_MARK

Depending on NETFILTER is not sufficient to ensure the presence of the
'mark' field in nf_conn, also needs to depend on NF_CONNTRACK_MARK.

Fixes: 22a5dc ("net: sched: Introduce connmark action")
Cc: Felix Fietkau <nbd@openwrt.org>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 475e35e261ec..7a57f66e654a 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -713,6 +713,7 @@ config NET_ACT_BPF
 config NET_ACT_CONNMARK
         tristate "Netfilter Connection Mark Retriever"
         depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
+        depends on NF_CONNTRACK_MARK
         ---help---
 	  Say Y here to allow retrieving of conn mark
 
-- 
cgit v1.2.3


From bdef279b993b3a112c493447d5b3d8f28c229d88 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Tue, 20 Jan 2015 15:15:42 +0100
Subject: rtnl: fix error path when adding an iface with a link net

If an error occurs when the netdevice is moved to the link netns, a full cleanup
must be done.

Fixes: 317f4810e45e ("rtnl: allow to create device with IFLA_LINK_NETNSID set")
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a12eecc0f976..07447d1665e6 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2172,8 +2172,11 @@ replay:
 			goto out;
 		}
 
-		if (link_net)
+		if (link_net) {
 			err = dev_change_net_namespace(dev, dest_net, ifname);
+			if (err < 0)
+				unregister_netdevice(dev);
+		}
 out:
 		if (link_net)
 			put_net(link_net);
-- 
cgit v1.2.3


From 3390e397611ca9da6bbd6ba5e8bc81a0a91da572 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Tue, 20 Jan 2015 15:15:43 +0100
Subject: ip6gretap: advertise link netns via netlink

Assign rtnl_link_ops->get_link_net() callback so that IFLA_LINK_NETNSID is
added to rtnetlink messages.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_gre.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 9306a5ff9149..6dee2a8ca0a9 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1676,6 +1676,7 @@ static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = {
 	.changelink	= ip6gre_changelink,
 	.get_size	= ip6gre_get_size,
 	.fill_info	= ip6gre_fill_info,
+	.get_link_net	= ip6_tnl_get_link_net,
 };
 
 /*
-- 
cgit v1.2.3


From 1f17257b1f4a000d65c44da1a95b10ee83382896 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Tue, 20 Jan 2015 15:15:44 +0100
Subject: vlan: advertise link netns via netlink

Assign rtnl_link_ops->get_link_net() callback so that IFLA_LINK_NETNSID is
added to rtnetlink messages.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_netlink.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'net')

diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 8ac8a5cc2143..c92b52f37d38 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -238,6 +238,13 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
+static struct net *vlan_get_link_net(const struct net_device *dev)
+{
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
+
+	return dev_net(real_dev);
+}
+
 struct rtnl_link_ops vlan_link_ops __read_mostly = {
 	.kind		= "vlan",
 	.maxtype	= IFLA_VLAN_MAX,
@@ -250,6 +257,7 @@ struct rtnl_link_ops vlan_link_ops __read_mostly = {
 	.dellink	= unregister_vlan_dev,
 	.get_size	= vlan_get_size,
 	.fill_info	= vlan_fill_info,
+	.get_link_net	= vlan_get_link_net,
 };
 
 int __init vlan_netlink_init(void)
-- 
cgit v1.2.3


From 193523bf937309d57c6dd7839bcf34d7a029dbee Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Tue, 20 Jan 2015 15:15:47 +0100
Subject: vxlan: advertise netns of vxlan dev in fdb msg

Netlink FDB messages are sent in the link netns. The header of these messages
contains the ifindex (ndm_ifindex) of the netdevice, but this ifindex is
unusable in case of x-netns vxlan.
I named the new attribute NDA_NDM_IFINDEX_NETNSID, to avoid confusion with
NDA_IFINDEX.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c            | 5 +++++
 include/uapi/linux/neighbour.h | 1 +
 net/core/net_namespace.c       | 1 +
 3 files changed, 7 insertions(+)

(limited to 'net')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 0346eaa6d236..19d3664ab9dd 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -339,6 +339,11 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
 	ndm->ndm_flags = fdb->flags;
 	ndm->ndm_type = RTN_UNICAST;
 
+	if (!net_eq(dev_net(vxlan->dev), vxlan->net) &&
+	    nla_put_s32(skb, NDA_NDM_IFINDEX_NETNSID,
+			peernet2id(vxlan->net, dev_net(vxlan->dev))))
+		goto nla_put_failure;
+
 	if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr))
 		goto nla_put_failure;
 
diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h
index f3d77f9f1e0b..38f236853cc0 100644
--- a/include/uapi/linux/neighbour.h
+++ b/include/uapi/linux/neighbour.h
@@ -25,6 +25,7 @@ enum {
 	NDA_VNI,
 	NDA_IFINDEX,
 	NDA_MASTER,
+	NDA_NDM_IFINDEX_NETNSID,
 	__NDA_MAX
 };
 
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 9d1a4cac83b6..b7bde551ef76 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -202,6 +202,7 @@ int peernet2id(struct net *net, struct net *peer)
 
 	return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED;
 }
+EXPORT_SYMBOL(peernet2id);
 
 struct net *get_net_ns_by_id(struct net *net, int id)
 {
-- 
cgit v1.2.3


From d998f8efa47221405ceae129aa93fa6d4ac8510d Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Tue, 20 Jan 2015 11:23:04 -0800
Subject: udp: Do not require sock in udp_tunnel_xmit_skb

The UDP tunnel transmit functions udp_tunnel_xmit_skb and
udp_tunnel6_xmit_skb include a socket argument. The socket being
passed to the functions (from VXLAN) is a UDP created for receive
side. The only thing that the socket is used for in the transmit
functions is to get the setting for checksum (enabled or zero).
This patch removes the argument and and adds a nocheck argument
for checksum setting. This eliminates the unnecessary dependency
on a UDP socket for UDP tunnel transmit.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c       | 10 ++++++----
 include/net/udp_tunnel.h  | 16 ++++++++--------
 net/ipv4/geneve.c         |  5 +++--
 net/ipv4/udp_tunnel.c     | 12 ++++++------
 net/ipv6/ip6_udp_tunnel.c | 12 ++++++------
 5 files changed, 29 insertions(+), 26 deletions(-)

(limited to 'net')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 19d3664ab9dd..a288ceab502e 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1769,8 +1769,9 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 
 	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
 
-	udp_tunnel6_xmit_skb(vs->sock, dst, skb, dev, saddr, daddr, prio,
-			     ttl, src_port, dst_port);
+	udp_tunnel6_xmit_skb(dst, skb, dev, saddr, daddr, prio,
+			     ttl, src_port, dst_port,
+			     udp_get_no_check6_tx(vs->sock->sk));
 	return 0;
 err:
 	dst_release(dst);
@@ -1848,8 +1849,9 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 
 	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
 
-	return udp_tunnel_xmit_skb(vs->sock, rt, skb, src, dst, tos,
-				   ttl, df, src_port, dst_port, xnet);
+	return udp_tunnel_xmit_skb(rt, skb, src, dst, tos,
+				   ttl, df, src_port, dst_port, xnet,
+				   vs->sock->sk->sk_no_check_tx);
 }
 EXPORT_SYMBOL_GPL(vxlan_xmit_skb);
 
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index 2a50a70ef587..1a20d33d56bc 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -77,17 +77,17 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
 			   struct udp_tunnel_sock_cfg *sock_cfg);
 
 /* Transmit the skb using UDP encapsulation. */
-int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt,
-			struct sk_buff *skb, __be32 src, __be32 dst,
-			__u8 tos, __u8 ttl, __be16 df, __be16 src_port,
-			__be16 dst_port, bool xnet);
+int udp_tunnel_xmit_skb(struct rtable *rt, struct sk_buff *skb,
+			__be32 src, __be32 dst, __u8 tos, __u8 ttl,
+			__be16 df, __be16 src_port, __be16 dst_port,
+			bool xnet, bool nocheck);
 
 #if IS_ENABLED(CONFIG_IPV6)
-int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst,
-			 struct sk_buff *skb, struct net_device *dev,
-			 struct in6_addr *saddr, struct in6_addr *daddr,
+int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb,
+			 struct net_device *dev, struct in6_addr *saddr,
+			 struct in6_addr *daddr,
 			 __u8 prio, __u8 ttl, __be16 src_port,
-			 __be16 dst_port);
+			 __be16 dst_port, bool nocheck);
 #endif
 
 void udp_tunnel_sock_release(struct socket *sock);
diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index 9568594ca2f1..93e51199e44b 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -136,8 +136,9 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
 
 	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
 
-	return udp_tunnel_xmit_skb(gs->sock, rt, skb, src, dst,
-				   tos, ttl, df, src_port, dst_port, xnet);
+	return udp_tunnel_xmit_skb(rt, skb, src, dst,
+				   tos, ttl, df, src_port, dst_port, xnet,
+				   gs->sock->sk->sk_no_check_tx);
 }
 EXPORT_SYMBOL_GPL(geneve_xmit_skb);
 
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 9996e63ed304..c83b35485056 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -75,10 +75,10 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
 }
 EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock);
 
-int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt,
-			struct sk_buff *skb, __be32 src, __be32 dst,
-			__u8 tos, __u8 ttl, __be16 df, __be16 src_port,
-			__be16 dst_port, bool xnet)
+int udp_tunnel_xmit_skb(struct rtable *rt, struct sk_buff *skb,
+			__be32 src, __be32 dst, __u8 tos, __u8 ttl,
+			__be16 df, __be16 src_port, __be16 dst_port,
+			bool xnet, bool nocheck)
 {
 	struct udphdr *uh;
 
@@ -90,9 +90,9 @@ int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt,
 	uh->source = src_port;
 	uh->len = htons(skb->len);
 
-	udp_set_csum(sock->sk->sk_no_check_tx, skb, src, dst, skb->len);
+	udp_set_csum(nocheck, skb, src, dst, skb->len);
 
-	return iptunnel_xmit(sock->sk, rt, skb, src, dst, IPPROTO_UDP,
+	return iptunnel_xmit(skb->sk, rt, skb, src, dst, IPPROTO_UDP,
 			     tos, ttl, df, xnet);
 }
 EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb);
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index 8db6c98fe218..32d9b268e7d8 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -62,14 +62,14 @@ error:
 }
 EXPORT_SYMBOL_GPL(udp_sock_create6);
 
-int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst,
-			 struct sk_buff *skb, struct net_device *dev,
-			 struct in6_addr *saddr, struct in6_addr *daddr,
-			 __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port)
+int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb,
+			 struct net_device *dev, struct in6_addr *saddr,
+			 struct in6_addr *daddr,
+			 __u8 prio, __u8 ttl, __be16 src_port,
+			 __be16 dst_port, bool nocheck)
 {
 	struct udphdr *uh;
 	struct ipv6hdr *ip6h;
-	struct sock *sk = sock->sk;
 
 	__skb_push(skb, sizeof(*uh));
 	skb_reset_transport_header(skb);
@@ -85,7 +85,7 @@ int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst,
 			    | IPSKB_REROUTED);
 	skb_dst_set(skb, dst);
 
-	udp6_set_csum(udp_get_no_check6_tx(sk), skb, saddr, daddr, skb->len);
+	udp6_set_csum(nocheck, skb, saddr, daddr, skb->len);
 
 	__skb_push(skb, sizeof(*ip6h));
 	skb_reset_network_header(skb);
-- 
cgit v1.2.3


From af33c1adae1e095e90d14fe35501256ebb07aabf Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Tue, 20 Jan 2015 11:23:05 -0800
Subject: vxlan: Eliminate dependency on UDP socket in transmit path

In the vxlan transmit path there is no need to reference the socket
for a tunnel which is needed for the receive side. We do, however,
need the vxlan_dev flags. This patch eliminate references
to the socket in the transmit path, and changes VXLAN_F_UNSHAREABLE
to be VXLAN_F_RCV_FLAGS. This mask is used to store the flags
applicable to receive (GBP, CSUM6_RX, and REMCSUM_RX) in the
vxlan_sock flags.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c           | 60 ++++++++++++++++++++-----------------------
 include/net/vxlan.h           | 13 ++++++----
 net/openvswitch/vport-vxlan.c |  6 ++---
 3 files changed, 38 insertions(+), 41 deletions(-)

(limited to 'net')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index a288ceab502e..87736e65cd15 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -270,12 +270,13 @@ static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
 					  __be16 port, u32 flags)
 {
 	struct vxlan_sock *vs;
-	u32 match_flags = flags & VXLAN_F_UNSHAREABLE;
+
+	flags &= VXLAN_F_RCV_FLAGS;
 
 	hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
 		if (inet_sk(vs->sock->sk)->inet_sport == port &&
 		    inet_sk(vs->sock->sk)->sk.sk_family == family &&
-		    (vs->flags & VXLAN_F_UNSHAREABLE) == match_flags)
+		    vs->flags == flags)
 			return vs;
 	}
 	return NULL;
@@ -1674,7 +1675,7 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
 	return false;
 }
 
-static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, struct vxlan_sock *vs,
+static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags,
 				struct vxlan_metadata *md)
 {
 	struct vxlanhdr_gbp *gbp;
@@ -1692,21 +1693,20 @@ static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, struct vxlan_sock *vs,
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static int vxlan6_xmit_skb(struct vxlan_sock *vs,
-			   struct dst_entry *dst, struct sk_buff *skb,
+static int vxlan6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb,
 			   struct net_device *dev, struct in6_addr *saddr,
 			   struct in6_addr *daddr, __u8 prio, __u8 ttl,
 			   __be16 src_port, __be16 dst_port,
-			   struct vxlan_metadata *md, bool xnet)
+			   struct vxlan_metadata *md, bool xnet, u32 vxflags)
 {
 	struct vxlanhdr *vxh;
 	int min_headroom;
 	int err;
-	bool udp_sum = !udp_get_no_check6_tx(vs->sock->sk);
+	bool udp_sum = !(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX);
 	int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
 	u16 hdrlen = sizeof(struct vxlanhdr);
 
-	if ((vs->flags & VXLAN_F_REMCSUM_TX) &&
+	if ((vxflags & VXLAN_F_REMCSUM_TX) &&
 	    skb->ip_summed == CHECKSUM_PARTIAL) {
 		int csum_start = skb_checksum_start_offset(skb);
 
@@ -1764,14 +1764,14 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 		}
 	}
 
-	if (vs->flags & VXLAN_F_GBP)
-		vxlan_build_gbp_hdr(vxh, vs, md);
+	if (vxflags & VXLAN_F_GBP)
+		vxlan_build_gbp_hdr(vxh, vxflags, md);
 
 	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
 
 	udp_tunnel6_xmit_skb(dst, skb, dev, saddr, daddr, prio,
 			     ttl, src_port, dst_port,
-			     udp_get_no_check6_tx(vs->sock->sk));
+			     !!(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX));
 	return 0;
 err:
 	dst_release(dst);
@@ -1779,20 +1779,19 @@ err:
 }
 #endif
 
-int vxlan_xmit_skb(struct vxlan_sock *vs,
-		   struct rtable *rt, struct sk_buff *skb,
+int vxlan_xmit_skb(struct rtable *rt, struct sk_buff *skb,
 		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
 		   __be16 src_port, __be16 dst_port,
-		   struct vxlan_metadata *md, bool xnet)
+		   struct vxlan_metadata *md, bool xnet, u32 vxflags)
 {
 	struct vxlanhdr *vxh;
 	int min_headroom;
 	int err;
-	bool udp_sum = !vs->sock->sk->sk_no_check_tx;
+	bool udp_sum = !!(vxflags & VXLAN_F_UDP_CSUM);
 	int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
 	u16 hdrlen = sizeof(struct vxlanhdr);
 
-	if ((vs->flags & VXLAN_F_REMCSUM_TX) &&
+	if ((vxflags & VXLAN_F_REMCSUM_TX) &&
 	    skb->ip_summed == CHECKSUM_PARTIAL) {
 		int csum_start = skb_checksum_start_offset(skb);
 
@@ -1844,14 +1843,14 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 		}
 	}
 
-	if (vs->flags & VXLAN_F_GBP)
-		vxlan_build_gbp_hdr(vxh, vs, md);
+	if (vxflags & VXLAN_F_GBP)
+		vxlan_build_gbp_hdr(vxh, vxflags, md);
 
 	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
 
 	return udp_tunnel_xmit_skb(rt, skb, src, dst, tos,
 				   ttl, df, src_port, dst_port, xnet,
-				   vs->sock->sk->sk_no_check_tx);
+				   !(vxflags & VXLAN_F_UDP_CSUM));
 }
 EXPORT_SYMBOL_GPL(vxlan_xmit_skb);
 
@@ -1983,10 +1982,11 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		md.vni = htonl(vni << 8);
 		md.gbp = skb->mark;
 
-		err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb,
-				     fl4.saddr, dst->sin.sin_addr.s_addr,
-				     tos, ttl, df, src_port, dst_port, &md,
-				     !net_eq(vxlan->net, dev_net(vxlan->dev)));
+		err = vxlan_xmit_skb(rt, skb, fl4.saddr,
+				     dst->sin.sin_addr.s_addr, tos, ttl, df,
+				     src_port, dst_port, &md,
+				     !net_eq(vxlan->net, dev_net(vxlan->dev)),
+				     vxlan->flags);
 		if (err < 0) {
 			/* skb is already freed. */
 			skb = NULL;
@@ -2042,10 +2042,10 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		md.vni = htonl(vni << 8);
 		md.gbp = skb->mark;
 
-		err = vxlan6_xmit_skb(vxlan->vn_sock, ndst, skb,
-				      dev, &fl6.saddr, &fl6.daddr, 0, ttl,
-				      src_port, dst_port, &md,
-				      !net_eq(vxlan->net, dev_net(vxlan->dev)));
+		err = vxlan6_xmit_skb(ndst, skb, dev, &fl6.saddr, &fl6.daddr,
+				      0, ttl, src_port, dst_port, &md,
+				      !net_eq(vxlan->net, dev_net(vxlan->dev)),
+				      vxlan->flags);
 #endif
 	}
 
@@ -2517,15 +2517,11 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
 
 	if (ipv6) {
 		udp_conf.family = AF_INET6;
-		udp_conf.use_udp6_tx_checksums =
-		    !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
 		udp_conf.use_udp6_rx_checksums =
 		    !(flags & VXLAN_F_UDP_ZERO_CSUM6_RX);
 	} else {
 		udp_conf.family = AF_INET;
 		udp_conf.local_ip.s_addr = INADDR_ANY;
-		udp_conf.use_udp_checksums =
-		    !!(flags & VXLAN_F_UDP_CSUM);
 	}
 
 	udp_conf.local_udp_port = port;
@@ -2569,7 +2565,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
 	atomic_set(&vs->refcnt, 1);
 	vs->rcv = rcv;
 	vs->data = data;
-	vs->flags = flags;
+	vs->flags = (flags & VXLAN_F_RCV_FLAGS);
 
 	/* Initialize the vxlan udp offloads structure */
 	vs->udp_offloads.port = port;
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 7be8c342fc95..2927d6244481 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -129,8 +129,12 @@ struct vxlan_sock {
 #define VXLAN_F_REMCSUM_RX		0x400
 #define VXLAN_F_GBP			0x800
 
-/* These flags must match in order for a socket to be shareable */
-#define VXLAN_F_UNSHAREABLE		VXLAN_F_GBP
+/* Flags that are used in the receive patch. These flags must match in
+ * order for a socket to be shareable
+ */
+#define VXLAN_F_RCV_FLAGS		(VXLAN_F_GBP |			\
+					 VXLAN_F_UDP_ZERO_CSUM6_RX |	\
+					 VXLAN_F_REMCSUM_RX)
 
 struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
 				  vxlan_rcv_t *rcv, void *data,
@@ -138,11 +142,10 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
 
 void vxlan_sock_release(struct vxlan_sock *vs);
 
-int vxlan_xmit_skb(struct vxlan_sock *vs,
-		   struct rtable *rt, struct sk_buff *skb,
+int vxlan_xmit_skb(struct rtable *rt, struct sk_buff *skb,
 		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
 		   __be16 src_port, __be16 dst_port, struct vxlan_metadata *md,
-		   bool xnet);
+		   bool xnet, u32 vxflags);
 
 static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
 						     netdev_features_t features)
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 8a2d54cba9ba..3cc983bf444a 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -252,12 +252,10 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 	md.vni = htonl(be64_to_cpu(tun_key->tun_id) << 8);
 	md.gbp = vxlan_ext_gbp(skb);
 
-	err = vxlan_xmit_skb(vxlan_port->vs, rt, skb,
-			     fl.saddr, tun_key->ipv4_dst,
+	err = vxlan_xmit_skb(rt, skb, fl.saddr, tun_key->ipv4_dst,
 			     tun_key->ipv4_tos, tun_key->ipv4_ttl, df,
 			     src_port, dst_port,
-			     &md,
-			     false);
+			     &md, false, vxlan_port->exts);
 	if (err < 0)
 		ip_rt_put(rt);
 	return err;
-- 
cgit v1.2.3


From 6b8d9117ccb4f81b1244aafa7bc70ef8fa45fc49 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Fri, 23 Jan 2015 20:47:00 -0500
Subject: net: llc: use correct size for sysctl timeout entries

The timeout entries are sizeof(int) rather than sizeof(long), which
means that when they were getting read we'd also leak kernel memory
to userspace along with the timeout values.

Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/sysctl_net_llc.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c
index 612a5ddaf93b..799bafc2af39 100644
--- a/net/llc/sysctl_net_llc.c
+++ b/net/llc/sysctl_net_llc.c
@@ -18,28 +18,28 @@ static struct ctl_table llc2_timeout_table[] = {
 	{
 		.procname	= "ack",
 		.data		= &sysctl_llc2_ack_timeout,
-		.maxlen		= sizeof(long),
+		.maxlen		= sizeof(sysctl_llc2_ack_timeout),
 		.mode		= 0644,
 		.proc_handler   = proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "busy",
 		.data		= &sysctl_llc2_busy_timeout,
-		.maxlen		= sizeof(long),
+		.maxlen		= sizeof(sysctl_llc2_busy_timeout),
 		.mode		= 0644,
 		.proc_handler   = proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "p",
 		.data		= &sysctl_llc2_p_timeout,
-		.maxlen		= sizeof(long),
+		.maxlen		= sizeof(sysctl_llc2_p_timeout),
 		.mode		= 0644,
 		.proc_handler   = proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "rej",
 		.data		= &sysctl_llc2_rej_timeout,
-		.maxlen		= sizeof(long),
+		.maxlen		= sizeof(sysctl_llc2_rej_timeout),
 		.mode		= 0644,
 		.proc_handler   = proc_dointvec_jiffies,
 	},
-- 
cgit v1.2.3


From b3832117b4b61374fac08692f1b1a620088342dd Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Thu, 22 Jan 2015 15:51:08 -0800
Subject: fib_trie: Use index & (~0ul << n->bits) instead of index >> n->bits

In doing performance testing and analysis of the changes I recently found
that by shifting the index I had created an unnecessary dependency.

I have updated the code so that we instead shift a mask by bits and then
just test against that as that should save us about 2 CPU cycles since we
can generate the mask while the key and pos are being processed.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 281e5e00025f..dea2f80e08c3 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -961,12 +961,12 @@ static struct tnode *fib_find_node(struct trie *t, u32 key)
 		 * prefix plus zeros for the bits in the cindex. The index
 		 * is the difference between the key and this value.  From
 		 * this we can actually derive several pieces of data.
-		 *   if !(index >> bits)
-		 *     we know the value is cindex
-		 *   else
+		 *   if (index & (~0ul << bits))
 		 *     we have a mismatch in skip bits and failed
+		 *   else
+		 *     we know the value is cindex
 		 */
-		if (index >> n->bits)
+		if (index & (~0ul << n->bits))
 			return NULL;
 
 		/* we have found a leaf. Prefixes have already been compared */
@@ -1301,12 +1301,12 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
 		 * prefix plus zeros for the "bits" in the prefix. The index
 		 * is the difference between the key and this value.  From
 		 * this we can actually derive several pieces of data.
-		 *   if !(index >> bits)
-		 *     we know the value is child index
-		 *   else
+		 *   if (index & (~0ul << bits))
 		 *     we have a mismatch in skip bits and failed
+		 *   else
+		 *     we know the value is cindex
 		 */
-		if (index >> n->bits)
+		if (index & (~0ul << n->bits))
 			break;
 
 		/* we have found a leaf. Prefixes have already been compared */
-- 
cgit v1.2.3


From 69fa57b1e42c171599d53486839c3d58f7ed8eec Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Thu, 22 Jan 2015 15:51:14 -0800
Subject: fib_trie: Fix RCU bug and merge similar bits of inflate/halve

This patch addresses two issues.

The first issue is the fact that I believe I had the RCU freeing sequence
slightly out of order.  As a result we could get into an issue if a caller
went into a child of a child of the new node, then backtraced into the to be
freed parent, and then attempted to access a child of a child that may have
been consumed in a resize of one of the new nodes children.  To resolve this I
have moved the resize after we have freed the oldtnode.  The only side effect
of this is that we will now be calling resize on more nodes in the case of
inflate due to the fact that we don't have a good way to test to see if a
full_tnode on the new node was there before or after the allocation.  This
should have minimal impact however since the node should already be
correctly size so it is just the cost of calling should_inflate that we
will be taking on the node which is only a couple of cycles.

The second issue is the fact that inflate and halve were essentially doing
the same thing after the new node was added to the trie replacing the old
one.  As such it wasn't really necessary to keep the code in both functions
so I have split it out into two other functions, called replace and
update_children.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 157 ++++++++++++++++++++++++----------------------------
 1 file changed, 73 insertions(+), 84 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index dea2f80e08c3..7e9031739f06 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -396,8 +396,30 @@ static void put_child(struct tnode *tn, unsigned long i, struct tnode *n)
 	rcu_assign_pointer(tn->child[i], n);
 }
 
-static void put_child_root(struct tnode *tp, struct trie *t,
-			   t_key key, struct tnode *n)
+static void update_children(struct tnode *tn)
+{
+	unsigned long i;
+
+	/* update all of the child parent pointers */
+	for (i = tnode_child_length(tn); i;) {
+		struct tnode *inode = tnode_get_child(tn, --i);
+
+		if (!inode)
+			continue;
+
+		/* Either update the children of a tnode that
+		 * already belongs to us or update the child
+		 * to point to ourselves.
+		 */
+		if (node_parent(inode) == tn)
+			update_children(inode);
+		else
+			node_set_parent(inode, tn);
+	}
+}
+
+static inline void put_child_root(struct tnode *tp, struct trie *t,
+				  t_key key, struct tnode *n)
 {
 	if (tp)
 		put_child(tp, get_index(key, tp), n);
@@ -434,10 +456,35 @@ static void tnode_free(struct tnode *tn)
 	}
 }
 
+static void replace(struct trie *t, struct tnode *oldtnode, struct tnode *tn)
+{
+	struct tnode *tp = node_parent(oldtnode);
+	unsigned long i;
+
+	/* setup the parent pointer out of and back into this node */
+	NODE_INIT_PARENT(tn, tp);
+	put_child_root(tp, t, tn->key, tn);
+
+	/* update all of the child parent pointers */
+	update_children(tn);
+
+	/* all pointers should be clean so we are done */
+	tnode_free(oldtnode);
+
+	/* resize children now that oldtnode is freed */
+	for (i = tnode_child_length(tn); i;) {
+		struct tnode *inode = tnode_get_child(tn, --i);
+
+		/* resize child node */
+		if (tnode_full(tn, inode))
+			resize(t, inode);
+	}
+}
+
 static int inflate(struct trie *t, struct tnode *oldtnode)
 {
-	struct tnode *inode, *node0, *node1, *tn, *tp;
-	unsigned long i, j, k;
+	struct tnode *tn;
+	unsigned long i;
 	t_key m;
 
 	pr_debug("In inflate\n");
@@ -446,13 +493,18 @@ static int inflate(struct trie *t, struct tnode *oldtnode)
 	if (!tn)
 		return -ENOMEM;
 
+	/* prepare oldtnode to be freed */
+	tnode_free_init(oldtnode);
+
 	/* Assemble all of the pointers in our cluster, in this case that
 	 * represents all of the pointers out of our allocated nodes that
 	 * point to existing tnodes and the links between our allocated
 	 * nodes.
 	 */
 	for (i = tnode_child_length(oldtnode), m = 1u << tn->pos; i;) {
-		inode = tnode_get_child(oldtnode, --i);
+		struct tnode *inode = tnode_get_child(oldtnode, --i);
+		struct tnode *node0, *node1;
+		unsigned long j, k;
 
 		/* An empty child */
 		if (inode == NULL)
@@ -464,6 +516,9 @@ static int inflate(struct trie *t, struct tnode *oldtnode)
 			continue;
 		}
 
+		/* drop the node in the old tnode free list */
+		tnode_free_append(oldtnode, inode);
+
 		/* An internal node with two children */
 		if (inode->bits == 1) {
 			put_child(tn, 2 * i + 1, tnode_get_child(inode, 1));
@@ -488,9 +543,9 @@ static int inflate(struct trie *t, struct tnode *oldtnode)
 		node1 = tnode_new(inode->key | m, inode->pos, inode->bits - 1);
 		if (!node1)
 			goto nomem;
-		tnode_free_append(tn, node1);
+		node0 = tnode_new(inode->key, inode->pos, inode->bits - 1);
 
-		node0 = tnode_new(inode->key & ~m, inode->pos, inode->bits - 1);
+		tnode_free_append(tn, node1);
 		if (!node0)
 			goto nomem;
 		tnode_free_append(tn, node0);
@@ -512,53 +567,9 @@ static int inflate(struct trie *t, struct tnode *oldtnode)
 		put_child(tn, 2 * i, node0);
 	}
 
-	/* setup the parent pointer into and out of this node */
-	tp = node_parent(oldtnode);
-	NODE_INIT_PARENT(tn, tp);
-	put_child_root(tp, t, tn->key, tn);
-
-	/* prepare oldtnode to be freed */
-	tnode_free_init(oldtnode);
-
-	/* update all child nodes parent pointers to route to us */
-	for (i = tnode_child_length(oldtnode); i;) {
-		inode = tnode_get_child(oldtnode, --i);
-
-		/* A leaf or an internal node with skipped bits */
-		if (!tnode_full(oldtnode, inode)) {
-			node_set_parent(inode, tn);
-			continue;
-		}
-
-		/* drop the node in the old tnode free list */
-		tnode_free_append(oldtnode, inode);
-
-		/* fetch new nodes */
-		node1 = tnode_get_child(tn, 2 * i + 1);
-		node0 = tnode_get_child(tn, 2 * i);
+	/* setup the parent pointers into and out of this node */
+	replace(t, oldtnode, tn);
 
-		/* bits == 1 then node0 and node1 represent inode's children */
-		if (inode->bits == 1) {
-			node_set_parent(node1, tn);
-			node_set_parent(node0, tn);
-			continue;
-		}
-
-		/* update parent pointers in child node's children */
-		for (k = tnode_child_length(inode), j = k / 2; j;) {
-			node_set_parent(tnode_get_child(inode, --k), node1);
-			node_set_parent(tnode_get_child(inode, --j), node0);
-			node_set_parent(tnode_get_child(inode, --k), node1);
-			node_set_parent(tnode_get_child(inode, --j), node0);
-		}
-
-		/* resize child nodes */
-		resize(t, node1);
-		resize(t, node0);
-	}
-
-	/* we completed without error, prepare to free old node */
-	tnode_free(oldtnode);
 	return 0;
 nomem:
 	/* all pointers should be clean so we are done */
@@ -568,7 +579,7 @@ nomem:
 
 static int halve(struct trie *t, struct tnode *oldtnode)
 {
-	struct tnode *tn, *tp, *inode, *node0, *node1;
+	struct tnode *tn;
 	unsigned long i;
 
 	pr_debug("In halve\n");
@@ -577,14 +588,18 @@ static int halve(struct trie *t, struct tnode *oldtnode)
 	if (!tn)
 		return -ENOMEM;
 
+	/* prepare oldtnode to be freed */
+	tnode_free_init(oldtnode);
+
 	/* Assemble all of the pointers in our cluster, in this case that
 	 * represents all of the pointers out of our allocated nodes that
 	 * point to existing tnodes and the links between our allocated
 	 * nodes.
 	 */
 	for (i = tnode_child_length(oldtnode); i;) {
-		node1 = tnode_get_child(oldtnode, --i);
-		node0 = tnode_get_child(oldtnode, --i);
+		struct tnode *node1 = tnode_get_child(oldtnode, --i);
+		struct tnode *node0 = tnode_get_child(oldtnode, --i);
+		struct tnode *inode;
 
 		/* At least one of the children is empty */
 		if (!node1 || !node0) {
@@ -609,34 +624,8 @@ static int halve(struct trie *t, struct tnode *oldtnode)
 		put_child(tn, i / 2, inode);
 	}
 
-	/* setup the parent pointer out of and back into this node */
-	tp = node_parent(oldtnode);
-	NODE_INIT_PARENT(tn, tp);
-	put_child_root(tp, t, tn->key, tn);
-
-	/* prepare oldtnode to be freed */
-	tnode_free_init(oldtnode);
-
-	/* update all of the child parent pointers */
-	for (i = tnode_child_length(tn); i;) {
-		inode = tnode_get_child(tn, --i);
-
-		/* only new tnodes will be considered "full" nodes */
-		if (!tnode_full(tn, inode)) {
-			node_set_parent(inode, tn);
-			continue;
-		}
-
-		/* Two nonempty children */
-		node_set_parent(tnode_get_child(inode, 1), inode);
-		node_set_parent(tnode_get_child(inode, 0), inode);
-
-		/* resize child node */
-		resize(t, inode);
-	}
-
-	/* all pointers should be clean so we are done */
-	tnode_free(oldtnode);
+	/* setup the parent pointers into and out of this node */
+	replace(t, oldtnode, tn);
 
 	return 0;
 }
-- 
cgit v1.2.3


From a80e89d4c650a7c3ab74f0b2d133cc2ce9738994 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Thu, 22 Jan 2015 15:51:20 -0800
Subject: fib_trie: Fall back to slen update on inflate/halve failure

This change corrects an issue where if inflate or halve fails we were
exiting the resize function without at least updating the slen for the
node.  To correct this I have moved the update of max_size into the while
loop so that it is only decremented on a successful call to either inflate
or halve.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 7e9031739f06..80892f565030 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -752,7 +752,7 @@ static void resize(struct trie *t, struct tnode *tn)
 {
 	struct tnode *tp = node_parent(tn), *n = NULL;
 	struct tnode __rcu **cptr;
-	int max_work;
+	int max_work = MAX_WORK;
 
 	pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
 		 tn, inflate_threshold, halve_threshold);
@@ -775,8 +775,7 @@ static void resize(struct trie *t, struct tnode *tn)
 	/* Double as long as the resulting node has a number of
 	 * nonempty nodes that are above the threshold.
 	 */
-	max_work = MAX_WORK;
-	while (should_inflate(tp, tn) && max_work--) {
+	while (should_inflate(tp, tn) && max_work) {
 		if (inflate(t, tn)) {
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 			this_cpu_inc(t->stats->resize_node_skipped);
@@ -784,6 +783,7 @@ static void resize(struct trie *t, struct tnode *tn)
 			break;
 		}
 
+		max_work--;
 		tn = rtnl_dereference(*cptr);
 	}
 
@@ -794,8 +794,7 @@ static void resize(struct trie *t, struct tnode *tn)
 	/* Halve as long as the number of empty children in this
 	 * node is above threshold.
 	 */
-	max_work = MAX_WORK;
-	while (should_halve(tp, tn) && max_work--) {
+	while (should_halve(tp, tn) && max_work) {
 		if (halve(t, tn)) {
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 			this_cpu_inc(t->stats->resize_node_skipped);
@@ -803,6 +802,7 @@ static void resize(struct trie *t, struct tnode *tn)
 			break;
 		}
 
+		max_work--;
 		tn = rtnl_dereference(*cptr);
 	}
 
-- 
cgit v1.2.3


From 95f60ea3e99aef5967d1566979f4ade7be386e34 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Thu, 22 Jan 2015 15:51:26 -0800
Subject: fib_trie: Add collapse() and should_collapse() to resize

This patch really does two things.

First it pulls the logic for determining if we should collapse one node out
of the tree and the actual code doing the collapse into a separate pair of
functions.  This helps to make the changes to these areas more readable.

Second it encodes the upper 32b of the empty_children value onto the
full_children value in the case of bits == KEYLENGTH.  By doing this we are
able to handle the case of a 32b node where empty_children would appear to
be 0 when it was actually 1ul << 32.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 100 ++++++++++++++++++++++++++++++++++------------------
 1 file changed, 65 insertions(+), 35 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 80892f565030..f874e1811eab 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -83,7 +83,8 @@
 
 #define MAX_STAT_DEPTH 32
 
-#define KEYLENGTH (8*sizeof(t_key))
+#define KEYLENGTH	(8*sizeof(t_key))
+#define KEY_MAX		((t_key)~0)
 
 typedef unsigned int t_key;
 
@@ -102,8 +103,8 @@ struct tnode {
 	union {
 		/* The fields in this struct are valid if bits > 0 (TNODE) */
 		struct {
-			unsigned int full_children;  /* KEYLENGTH bits needed */
-			unsigned int empty_children; /* KEYLENGTH bits needed */
+			t_key empty_children; /* KEYLENGTH bits needed */
+			t_key full_children;  /* KEYLENGTH bits needed */
 			struct tnode __rcu *child[0];
 		};
 		/* This list pointer if valid if bits == 0 (LEAF) */
@@ -302,6 +303,16 @@ static struct tnode *tnode_alloc(size_t size)
 		return vzalloc(size);
 }
 
+static inline void empty_child_inc(struct tnode *n)
+{
+	++n->empty_children ? : ++n->full_children;
+}
+
+static inline void empty_child_dec(struct tnode *n)
+{
+	n->empty_children-- ? : n->full_children--;
+}
+
 static struct tnode *leaf_new(t_key key)
 {
 	struct tnode *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
@@ -335,7 +346,7 @@ static struct leaf_info *leaf_info_new(int plen)
 
 static struct tnode *tnode_new(t_key key, int pos, int bits)
 {
-	size_t sz = offsetof(struct tnode, child[1 << bits]);
+	size_t sz = offsetof(struct tnode, child[1ul << bits]);
 	struct tnode *tn = tnode_alloc(sz);
 	unsigned int shift = pos + bits;
 
@@ -348,8 +359,10 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
 		tn->pos = pos;
 		tn->bits = bits;
 		tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0;
-		tn->full_children = 0;
-		tn->empty_children = 1<<bits;
+		if (bits == KEYLENGTH)
+			tn->full_children = 1;
+		else
+			tn->empty_children = 1ul << bits;
 	}
 
 	pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode),
@@ -375,11 +388,11 @@ static void put_child(struct tnode *tn, unsigned long i, struct tnode *n)
 
 	BUG_ON(i >= tnode_child_length(tn));
 
-	/* update emptyChildren */
+	/* update emptyChildren, overflow into fullChildren */
 	if (n == NULL && chi != NULL)
-		tn->empty_children++;
-	else if (n != NULL && chi == NULL)
-		tn->empty_children--;
+		empty_child_inc(tn);
+	if (n != NULL && chi == NULL)
+		empty_child_dec(tn);
 
 	/* update fullChildren */
 	wasfull = tnode_full(tn, chi);
@@ -630,6 +643,24 @@ static int halve(struct trie *t, struct tnode *oldtnode)
 	return 0;
 }
 
+static void collapse(struct trie *t, struct tnode *oldtnode)
+{
+	struct tnode *n, *tp;
+	unsigned long i;
+
+	/* scan the tnode looking for that one child that might still exist */
+	for (n = NULL, i = tnode_child_length(oldtnode); !n && i;)
+		n = tnode_get_child(oldtnode, --i);
+
+	/* compress one level */
+	tp = node_parent(oldtnode);
+	put_child_root(tp, t, oldtnode->key, n);
+	node_set_parent(n, tp);
+
+	/* drop dead node */
+	node_free(oldtnode);
+}
+
 static unsigned char update_suffix(struct tnode *tn)
 {
 	unsigned char slen = tn->pos;
@@ -729,10 +760,12 @@ static bool should_inflate(const struct tnode *tp, const struct tnode *tn)
 
 	/* Keep root node larger */
 	threshold *= tp ? inflate_threshold : inflate_threshold_root;
-	used += tn->full_children;
 	used -= tn->empty_children;
+	used += tn->full_children;
 
-	return tn->pos && ((50 * used) >= threshold);
+	/* if bits == KEYLENGTH then pos = 0, and will fail below */
+
+	return (used > 1) && tn->pos && ((50 * used) >= threshold);
 }
 
 static bool should_halve(const struct tnode *tp, const struct tnode *tn)
@@ -744,13 +777,29 @@ static bool should_halve(const struct tnode *tp, const struct tnode *tn)
 	threshold *= tp ? halve_threshold : halve_threshold_root;
 	used -= tn->empty_children;
 
-	return (tn->bits > 1) && ((100 * used) < threshold);
+	/* if bits == KEYLENGTH then used = 100% on wrap, and will fail below */
+
+	return (used > 1) && (tn->bits > 1) && ((100 * used) < threshold);
+}
+
+static bool should_collapse(const struct tnode *tn)
+{
+	unsigned long used = tnode_child_length(tn);
+
+	used -= tn->empty_children;
+
+	/* account for bits == KEYLENGTH case */
+	if ((tn->bits == KEYLENGTH) && tn->full_children)
+		used -= KEY_MAX;
+
+	/* One child or none, time to drop us from the trie */
+	return used < 2;
 }
 
 #define MAX_WORK 10
 static void resize(struct trie *t, struct tnode *tn)
 {
-	struct tnode *tp = node_parent(tn), *n = NULL;
+	struct tnode *tp = node_parent(tn);
 	struct tnode __rcu **cptr;
 	int max_work = MAX_WORK;
 
@@ -764,14 +813,6 @@ static void resize(struct trie *t, struct tnode *tn)
 	cptr = tp ? &tp->child[get_index(tn->key, tp)] : &t->trie;
 	BUG_ON(tn != rtnl_dereference(*cptr));
 
-	/* No children */
-	if (tn->empty_children > (tnode_child_length(tn) - 1))
-		goto no_children;
-
-	/* One child */
-	if (tn->empty_children == (tnode_child_length(tn) - 1))
-		goto one_child;
-
 	/* Double as long as the resulting node has a number of
 	 * nonempty nodes that are above the threshold.
 	 */
@@ -807,19 +848,8 @@ static void resize(struct trie *t, struct tnode *tn)
 	}
 
 	/* Only one child remains */
-	if (tn->empty_children == (tnode_child_length(tn) - 1)) {
-		unsigned long i;
-one_child:
-		for (i = tnode_child_length(tn); !n && i;)
-			n = tnode_get_child(tn, --i);
-no_children:
-		/* compress one level */
-		put_child_root(tp, t, tn->key, n);
-		node_set_parent(n, tp);
-
-		/* drop dead node */
-		tnode_free_init(tn);
-		tnode_free(tn);
+	if (should_collapse(tn)) {
+		collapse(t, tn);
 		return;
 	}
 
-- 
cgit v1.2.3


From 30cfe7c9c88d73440560d7e381bab12f5463a6cd Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Thu, 22 Jan 2015 15:51:33 -0800
Subject: fib_trie: Use empty_children instead of counting empty nodes in stats
 collection

It doesn't make much sense to count the pointers ourselves when
empty_children already has a count for the number of NULL pointers stored
in the tnode.  As such save ourselves the cycles and just use
empty_children.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index f874e1811eab..90654bb64e21 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1954,16 +1954,10 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s)
 			hlist_for_each_entry_rcu(li, &n->list, hlist)
 				++s->prefixes;
 		} else {
-			unsigned long i;
-
 			s->tnodes++;
 			if (n->bits < MAX_STAT_DEPTH)
 				s->nodesizes[n->bits]++;
-
-			for (i = tnode_child_length(n); i--;) {
-				if (!rcu_access_pointer(n->child[i]))
-					s->nullpointers++;
-			}
+			s->nullpointers += n->empty_children;
 		}
 	}
 	rcu_read_unlock();
-- 
cgit v1.2.3


From 02525368f48c197bce6e4251ff7bde92fa6f026e Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Thu, 22 Jan 2015 15:51:39 -0800
Subject: fib_trie: Move fib_find_alias to file where it is used

The function fib_find_alias is only accessed by functions in fib_trie.c as
such it makes sense to relocate it and cast it as static so that the
compiler can take advantage of optimizations it can do to it as a local
function.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_lookup.h    |  1 -
 net/ipv4/fib_semantics.c | 18 ------------------
 net/ipv4/fib_trie.c      | 20 ++++++++++++++++++++
 3 files changed, 20 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 1e4f6600b31d..825981b1049a 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -32,7 +32,6 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id,
 		  unsigned int);
 void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, int dst_len,
 	       u32 tb_id, const struct nl_info *info, unsigned int nlm_flags);
-struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio);
 
 static inline void fib_result_assign(struct fib_result *res,
 				     struct fib_info *fi)
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 265cb72b7c1b..1e2090ea663e 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -411,24 +411,6 @@ errout:
 		rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
 }
 
-/* Return the first fib alias matching TOS with
- * priority less than or equal to PRIO.
- */
-struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
-{
-	if (fah) {
-		struct fib_alias *fa;
-		list_for_each_entry(fa, fah, fa_list) {
-			if (fa->fa_tos > tos)
-				continue;
-			if (fa->fa_info->fib_priority >= prio ||
-			    fa->fa_tos < tos)
-				return fa;
-		}
-	}
-	return NULL;
-}
-
 static int fib_detect_death(struct fib_info *fi, int order,
 			    struct fib_info **last_resort, int *last_idx,
 			    int dflt)
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 90654bb64e21..7f342265968e 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -998,6 +998,26 @@ static struct tnode *fib_find_node(struct trie *t, u32 key)
 	return n;
 }
 
+/* Return the first fib alias matching TOS with
+ * priority less than or equal to PRIO.
+ */
+static struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
+{
+	struct fib_alias *fa;
+
+	if (!fah)
+		return NULL;
+
+	list_for_each_entry(fa, fah, fa_list) {
+		if (fa->fa_tos > tos)
+			continue;
+		if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos)
+			return fa;
+	}
+
+	return NULL;
+}
+
 static void trie_rebalance(struct trie *t, struct tnode *tn)
 {
 	struct tnode *tp;
-- 
cgit v1.2.3


From 64c6272351a0eca55574f487b103770163d1dbce Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Thu, 22 Jan 2015 15:51:45 -0800
Subject: fib_trie: Various clean-ups for handling slen

While doing further work on the fib_trie I noted a few items.

First I was using calls that were far more complicated than they needed to
be for determining when to push/pull the suffix length.  I have updated the
code to reflect the simplier logic.

The second issue is that I realised we weren't necessarily handling the
case of a leaf_info struct surviving a flush.  I have updated the logic so
that now we will call pull_suffix in the event of having a leaf info value
left in the leaf after flushing it.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 49 +++++++++++++++++++++++++++++--------------------
 1 file changed, 29 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 7f342265968e..3daf0224ff2e 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -917,27 +917,20 @@ static void leaf_push_suffix(struct tnode *l)
 
 static void remove_leaf_info(struct tnode *l, struct leaf_info *old)
 {
-	struct hlist_node *prev;
-
-	/* record the location of the pointer to this object */
-	prev = rtnl_dereference(hlist_pprev_rcu(&old->hlist));
+	/* record the location of the previous list_info entry */
+	struct hlist_node **pprev = old->hlist.pprev;
+	struct leaf_info *li = hlist_entry(pprev, typeof(*li), hlist.next);
 
 	/* remove the leaf info from the list */
 	hlist_del_rcu(&old->hlist);
 
-	/* if we emptied the list this leaf will be freed and we can sort
-	 * out parent suffix lengths as a part of trie_rebalance
-	 */
-	if (hlist_empty(&l->list))
+	/* only access li if it is pointing at the last valid hlist_node */
+	if (hlist_empty(&l->list) || (*pprev))
 		return;
 
-	/* if we removed the tail then we need to update slen */
-	if (!rcu_access_pointer(hlist_next_rcu(prev))) {
-		struct leaf_info *li = hlist_entry(prev, typeof(*li), hlist);
-
-		l->slen = KEYLENGTH - li->plen;
-		leaf_pull_suffix(l);
-	}
+	/* update the trie with the latest suffix length */
+	l->slen = KEYLENGTH - li->plen;
+	leaf_pull_suffix(l);
 }
 
 static void insert_leaf_info(struct tnode *l, struct leaf_info *new)
@@ -961,7 +954,7 @@ static void insert_leaf_info(struct tnode *l, struct leaf_info *new)
 	}
 
 	/* if we added to the tail node then we need to update slen */
-	if (!rcu_access_pointer(hlist_next_rcu(&new->hlist))) {
+	if (l->slen < (KEYLENGTH - new->plen)) {
 		l->slen = KEYLENGTH - new->plen;
 		leaf_push_suffix(l);
 	}
@@ -1613,6 +1606,7 @@ static int trie_flush_leaf(struct tnode *l)
 	struct hlist_head *lih = &l->list;
 	struct hlist_node *tmp;
 	struct leaf_info *li = NULL;
+	unsigned char plen = KEYLENGTH;
 
 	hlist_for_each_entry_safe(li, tmp, lih, hlist) {
 		found += trie_flush_list(&li->falh);
@@ -1620,8 +1614,14 @@ static int trie_flush_leaf(struct tnode *l)
 		if (list_empty(&li->falh)) {
 			hlist_del_rcu(&li->hlist);
 			free_leaf_info(li);
+			continue;
 		}
+
+		plen = li->plen;
 	}
+
+	l->slen = KEYLENGTH - plen;
+
 	return found;
 }
 
@@ -1700,13 +1700,22 @@ int fib_table_flush(struct fib_table *tb)
 	for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) {
 		found += trie_flush_leaf(l);
 
-		if (ll && hlist_empty(&ll->list))
-			trie_leaf_remove(t, ll);
+		if (ll) {
+			if (hlist_empty(&ll->list))
+				trie_leaf_remove(t, ll);
+			else
+				leaf_pull_suffix(ll);
+		}
+
 		ll = l;
 	}
 
-	if (ll && hlist_empty(&ll->list))
-		trie_leaf_remove(t, ll);
+	if (ll) {
+		if (hlist_empty(&ll->list))
+			trie_leaf_remove(t, ll);
+		else
+			leaf_pull_suffix(ll);
+	}
 
 	pr_debug("trie_flush found=%d\n", found);
 	return found;
-- 
cgit v1.2.3


From c2943f14534bdc4230f4da6dcd4ea03c5d8c8162 Mon Sep 17 00:00:00 2001
From: Harout Hedeshian <harouth@codeaurora.org>
Date: Tue, 20 Jan 2015 10:06:05 -0700
Subject: net: ipv6: Add sysctl entry to disable MTU updates from RA

The kernel forcefully applies MTU values received in router
advertisements provided the new MTU is less than the current. This
behavior is undesirable when the user space is managing the MTU. Instead
a sysctl flag 'accept_ra_mtu' is introduced such that the user space
can control whether or not RA provided MTU updates should be applied. The
default behavior is unchanged; user space must explicitly set this flag
to 0 for RA MTUs to be ignored.

Signed-off-by: Harout Hedeshian <harouth@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt |  7 +++++++
 include/linux/ipv6.h                   |  1 +
 include/uapi/linux/ipv6.h              |  1 +
 net/ipv6/addrconf.c                    | 10 ++++++++++
 net/ipv6/ndisc.c                       |  2 +-
 5 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 85b022179104..a5e4c813f17f 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1287,6 +1287,13 @@ accept_ra_rtr_pref - BOOLEAN
 	Functional default: enabled if accept_ra is enabled.
 			    disabled if accept_ra is disabled.
 
+accept_ra_mtu - BOOLEAN
+	Apply the MTU value specified in RA option 5 (RFC4861). If
+	disabled, the MTU specified in the RA will be ignored.
+
+	Functional default: enabled if accept_ra is enabled.
+			    disabled if accept_ra is disabled.
+
 accept_redirects - BOOLEAN
 	Accept Redirects.
 
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index c694e7baa621..2805062c013f 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -52,6 +52,7 @@ struct ipv6_devconf {
 	__s32		force_tllao;
 	__s32           ndisc_notify;
 	__s32		suppress_frag_ndisc;
+	__s32		accept_ra_mtu;
 	void		*sysctl;
 };
 
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index 73cb02dc3065..437a6a4b125a 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -169,6 +169,7 @@ enum {
 	DEVCONF_SUPPRESS_FRAG_NDISC,
 	DEVCONF_ACCEPT_RA_FROM_LOCAL,
 	DEVCONF_USE_OPTIMISTIC,
+	DEVCONF_ACCEPT_RA_MTU,
 	DEVCONF_MAX
 };
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index d6b4f5d08014..7dcc065e2160 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -201,6 +201,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
 	.disable_ipv6		= 0,
 	.accept_dad		= 1,
 	.suppress_frag_ndisc	= 1,
+	.accept_ra_mtu		= 1,
 };
 
 static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -238,6 +239,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
 	.disable_ipv6		= 0,
 	.accept_dad		= 1,
 	.suppress_frag_ndisc	= 1,
+	.accept_ra_mtu		= 1,
 };
 
 /* Check if a valid qdisc is available */
@@ -4380,6 +4382,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify;
 	array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc;
 	array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local;
+	array[DEVCONF_ACCEPT_RA_MTU] = cnf->accept_ra_mtu;
 }
 
 static inline size_t inet6_ifla6_size(void)
@@ -5258,6 +5261,13 @@ static struct addrconf_sysctl_table
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec,
 		},
+		{
+			.procname	= "accept_ra_mtu",
+			.data		= &ipv6_devconf.accept_ra_mtu,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
+		},
 		{
 			/* sentinel */
 		}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 682866777d53..8a9d7c19e247 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1348,7 +1348,7 @@ skip_routeinfo:
 		}
 	}
 
-	if (ndopts.nd_opts_mtu) {
+	if (ndopts.nd_opts_mtu && in6_dev->cnf.accept_ra_mtu) {
 		__be32 n;
 		u32 mtu;
 
-- 
cgit v1.2.3


From 24df8986f36b9a5d8ae08236498d92267bac454b Mon Sep 17 00:00:00 2001
From: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Date: Tue, 20 Jan 2015 19:13:32 -0500
Subject: net: dsa: set slave MII bus PHY mask

When registering a mdio bus, Linux assumes than every port has a PHY and tries
to scan it. If a switch port has no PHY registered, DSA will fail to register
the slave MII bus. To fix this, set the slave MII bus PHY mask to the switch
PHYs mask.

As an example, if we use a Marvell MV88E6352 (which is a 7-port switch with no
registered PHYs for port 5 and port 6), with the following declared names:

	static struct dsa_chip_data switch_cdata = {
		[...]
		.port_names[0] = "sw0",
		.port_names[1] = "sw1",
		.port_names[2] = "sw2",
		.port_names[3] = "sw3",
		.port_names[4] = "sw4",
		.port_names[5] = "cpu",
	};

DSA will fail to create the switch instance. With the PHY mask set for the
slave MII bus, only the PHY for ports 0-4 will be scanned and the instance will
be successfully created.

Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Tested-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/slave.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 515569ffde8a..589aafd01fc5 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -46,6 +46,7 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds)
 	snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d:%.2x",
 			ds->index, ds->pd->sw_addr);
 	ds->slave_mii_bus->parent = ds->master_dev;
+	ds->slave_mii_bus->phy_mask = ~ds->phys_mii_mask;
 }
 
 
-- 
cgit v1.2.3


From b0a1ba59921eaaa9cb8f97bb35f2e6870fcdfedc Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Tue, 20 Jan 2015 19:16:02 -0800
Subject: ipv6: Fix __ip6_route_redirect

In my last commit (a3c00e4: ipv6: Remove BACKTRACK macro), the changes in
__ip6_route_redirect is incorrect.  The following case is missed:
1. The for loop tries to find a valid gateway rt. If it fails to find
   one, rt will be NULL.
2. When rt is NULL, it is set to the ip6_null_entry.
3. The newly added 'else if', from a3c00e4, will stop the backtrack from
   happening.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 166e33bed222..495965358d22 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1242,12 +1242,16 @@ restart:
 		rt = net->ipv6.ip6_null_entry;
 	else if (rt->dst.error) {
 		rt = net->ipv6.ip6_null_entry;
-	} else if (rt == net->ipv6.ip6_null_entry) {
+		goto out;
+	}
+
+	if (rt == net->ipv6.ip6_null_entry) {
 		fn = fib6_backtrack(fn, &fl6->saddr);
 		if (fn)
 			goto restart;
 	}
 
+out:
 	dst_hold(&rt->dst);
 
 	read_unlock_bh(&table->tb6_lock);
-- 
cgit v1.2.3


From 1b846f9282c0781f8ecf90e355008d94a0b973dd Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 21 Jan 2015 12:22:35 +0300
Subject: bridge: simplify br_getlink() a bit

Static checkers complain that we should maybe set "ret" before we do the
"goto out;".  They interpret the NULL return from br_port_get_rtnl() as
a failure and forgetting to set the error code is a common bug in this
situation.

The code is confusing but it's actually correct.  We are returning zero
deliberately.  Let's re-write it a bit to be more clear.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netlink.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 528cf2790a5f..3875ea51f6fe 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -311,17 +311,14 @@ errout:
 int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
 	       struct net_device *dev, u32 filter_mask)
 {
-	int err = 0;
 	struct net_bridge_port *port = br_port_get_rtnl(dev);
 
 	if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN) &&
 	    !(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED))
-		goto out;
+		return 0;
 
-	err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI,
-			     filter_mask, dev);
-out:
-	return err;
+	return br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI,
+			      filter_mask, dev);
 }
 
 static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p,
-- 
cgit v1.2.3


From 1dc7b90f7cd050ef6d5e511e652347e52874469c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 21 Jan 2015 03:45:42 -0800
Subject: ipv6: tcp: fix race in IPV6_2292PKTOPTIONS

IPv6 TCP sockets store in np->pktoptions skbs, and use skb_set_owner_r()
to charge the skb to socket.

It means that destructor must be called while socket is locked.

Therefore, we cannot use skb_get() or atomic_inc(&skb->users)
to protect ourselves : kfree_skb() might race with other users
manipulating sk->sk_forward_alloc

Fix this race by holding socket lock for the duration of
ip6_datagram_recv_ctl()

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ipv6_sockglue.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 66980d8d98d1..8d766d9100cb 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -996,13 +996,9 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		lock_sock(sk);
 		skb = np->pktoptions;
 		if (skb)
-			atomic_inc(&skb->users);
-		release_sock(sk);
-
-		if (skb) {
 			ip6_datagram_recv_ctl(sk, &msg, skb);
-			kfree_skb(skb);
-		} else {
+		release_sock(sk);
+		if (!skb) {
 			if (np->rxopt.bits.rxinfo) {
 				struct in6_pktinfo src_info;
 				src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :
-- 
cgit v1.2.3


From e8781f70a5b210a1b08cff8ce05895ebcec18d83 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 21 Jan 2015 18:04:18 +0100
Subject: netfilter: nf_tables: disable preemption when restoring chain
 counters

With CONFIG_DEBUG_PREEMPT=y

[22144.496057] BUG: using smp_processor_id() in preemptible [00000000] code: iptables-compat/10406
[22144.496061] caller is debug_smp_processor_id+0x17/0x1b
[22144.496065] CPU: 2 PID: 10406 Comm: iptables-compat Not tainted 3.19.0-rc4+ #
[...]
[22144.496092] Call Trace:
[22144.496098]  [<ffffffff8145b9fa>] dump_stack+0x4f/0x7b
[22144.496104]  [<ffffffff81244f52>] check_preemption_disabled+0xd6/0xe8
[22144.496110]  [<ffffffff81244f90>] debug_smp_processor_id+0x17/0x1b
[22144.496120]  [<ffffffffa07c557e>] nft_stats_alloc+0x94/0xc7 [nf_tables]
[22144.496130]  [<ffffffffa07c73d2>] nf_tables_newchain+0x471/0x6d8 [nf_tables]
[22144.496140]  [<ffffffffa07c5ef6>] ? nft_trans_alloc+0x18/0x34 [nf_tables]
[22144.496154]  [<ffffffffa063c8da>] nfnetlink_rcv_batch+0x2b4/0x457 [nfnetlink]

Reported-by: Andreas Schultz <aschultz@tpip.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 7e686948ddca..b54360634e95 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1134,9 +1134,11 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr)
 	/* Restore old counters on this cpu, no problem. Per-cpu statistics
 	 * are not exposed to userspace.
 	 */
+	preempt_disable();
 	stats = this_cpu_ptr(newstats);
 	stats->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
 	stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
+	preempt_enable();
 
 	return newstats;
 }
-- 
cgit v1.2.3


From ec0684898fa53c318a221d3f76860067543d12e3 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Sun, 25 Jan 2015 23:33:24 +0100
Subject: NFC: nfc_enable_se Remove useless blank line at beginning of function

Remove one useless blank line at beginning of nfc_enable_se function.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 net/nfc/core.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/nfc/core.c b/net/nfc/core.c
index 819b87702b70..81fabed31f42 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -555,7 +555,6 @@ EXPORT_SYMBOL(nfc_find_se);
 
 int nfc_enable_se(struct nfc_dev *dev, u32 se_idx)
 {
-
 	struct nfc_se *se;
 	int rc;
 
-- 
cgit v1.2.3


From 511e78a38aa611f1bcc1f9b383766ad3e35816c0 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Sun, 25 Jan 2015 23:33:25 +0100
Subject: NFC: nfc_disable_se Remove useless blank line at beginning of
 function

Remove one useless blank line at beginning of nfc_disable_se function.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 net/nfc/core.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/nfc/core.c b/net/nfc/core.c
index 81fabed31f42..7f1b6351755c 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -604,7 +604,6 @@ error:
 
 int nfc_disable_se(struct nfc_dev *dev, u32 se_idx)
 {
-
 	struct nfc_se *se;
 	int rc;
 
-- 
cgit v1.2.3


From 5b4237bbc93b1b54d35b037cfc0ece71cd8e358d Mon Sep 17 00:00:00 2001
From: Joe Stringer <joestringer@nicira.com>
Date: Wed, 21 Jan 2015 16:42:48 -0800
Subject: openvswitch: Refactor ovs_nla_fill_match().

Refactor the ovs_nla_fill_match() function into separate netlink
serialization functions ovs_nla_put_{unmasked_key,mask}(). Modify
ovs_nla_put_flow() to handle attribute nesting and expose the 'is_mask'
parameter - all callers need to nest the flow, and callers have better
knowledge about whether it is serializing a mask or not.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/datapath.c     | 41 ++++++-----------------------------------
 net/openvswitch/flow_netlink.c | 38 +++++++++++++++++++++++++++++++++++---
 net/openvswitch/flow_netlink.h |  7 +++++--
 3 files changed, 46 insertions(+), 40 deletions(-)

(limited to 'net')

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index f45f1bf4422c..257b97546b33 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -461,10 +461,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
 			     0, upcall_info->cmd);
 	upcall->dp_ifindex = dp_ifindex;
 
-	nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
-	err = ovs_nla_put_flow(key, key, user_skb);
+	err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
 	BUG_ON(err);
-	nla_nest_end(user_skb, nla);
 
 	if (upcall_info->userdata)
 		__nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
@@ -675,37 +673,6 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
 		+ nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
 }
 
-/* Called with ovs_mutex or RCU read lock. */
-static int ovs_flow_cmd_fill_match(const struct sw_flow *flow,
-				   struct sk_buff *skb)
-{
-	struct nlattr *nla;
-	int err;
-
-	/* Fill flow key. */
-	nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
-	if (!nla)
-		return -EMSGSIZE;
-
-	err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb);
-	if (err)
-		return err;
-
-	nla_nest_end(skb, nla);
-
-	/* Fill flow mask. */
-	nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK);
-	if (!nla)
-		return -EMSGSIZE;
-
-	err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb);
-	if (err)
-		return err;
-
-	nla_nest_end(skb, nla);
-	return 0;
-}
-
 /* Called with ovs_mutex or RCU read lock. */
 static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
 				   struct sk_buff *skb)
@@ -787,7 +754,11 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
 
 	ovs_header->dp_ifindex = dp_ifindex;
 
-	err = ovs_flow_cmd_fill_match(flow, skb);
+	err = ovs_nla_put_unmasked_key(flow, skb);
+	if (err)
+		goto error;
+
+	err = ovs_nla_put_mask(flow, skb);
 	if (err)
 		goto error;
 
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index d210d1be3470..33751f81bfcb 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -1216,12 +1216,12 @@ int ovs_nla_get_flow_metadata(const struct nlattr *attr,
 	return metadata_from_nlattrs(&match, &attrs, a, false, log);
 }
 
-int ovs_nla_put_flow(const struct sw_flow_key *swkey,
-		     const struct sw_flow_key *output, struct sk_buff *skb)
+static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
+			     const struct sw_flow_key *output, bool is_mask,
+			     struct sk_buff *skb)
 {
 	struct ovs_key_ethernet *eth_key;
 	struct nlattr *nla, *encap;
-	bool is_mask = (swkey != output);
 
 	if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
 		goto nla_put_failure;
@@ -1431,6 +1431,38 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
+int ovs_nla_put_key(const struct sw_flow_key *swkey,
+		    const struct sw_flow_key *output, int attr, bool is_mask,
+		    struct sk_buff *skb)
+{
+	int err;
+	struct nlattr *nla;
+
+	nla = nla_nest_start(skb, attr);
+	if (!nla)
+		return -EMSGSIZE;
+	err = __ovs_nla_put_key(swkey, output, is_mask, skb);
+	if (err)
+		return err;
+	nla_nest_end(skb, nla);
+
+	return 0;
+}
+
+/* Called with ovs_mutex or RCU read lock. */
+int ovs_nla_put_unmasked_key(const struct sw_flow *flow, struct sk_buff *skb)
+{
+	return ovs_nla_put_key(&flow->unmasked_key, &flow->unmasked_key,
+				OVS_FLOW_ATTR_KEY, false, skb);
+}
+
+/* Called with ovs_mutex or RCU read lock. */
+int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb)
+{
+	return ovs_nla_put_key(&flow->key, &flow->mask->key,
+				OVS_FLOW_ATTR_MASK, true, skb);
+}
+
 #define MAX_ACTIONS_BUFSIZE	(32 * 1024)
 
 static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log)
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index 577f12be3459..9ed09e66876a 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -43,11 +43,14 @@ size_t ovs_key_attr_size(void);
 void ovs_match_init(struct sw_flow_match *match,
 		    struct sw_flow_key *key, struct sw_flow_mask *mask);
 
-int ovs_nla_put_flow(const struct sw_flow_key *,
-		     const struct sw_flow_key *, struct sk_buff *);
+int ovs_nla_put_key(const struct sw_flow_key *, const struct sw_flow_key *,
+		    int attr, bool is_mask, struct sk_buff *);
 int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *,
 			      bool log);
 
+int ovs_nla_put_unmasked_key(const struct sw_flow *flow, struct sk_buff *skb);
+int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb);
+
 int ovs_nla_get_match(struct sw_flow_match *, const struct nlattr *key,
 		      const struct nlattr *mask, bool log);
 int ovs_nla_put_egress_tunnel_key(struct sk_buff *,
-- 
cgit v1.2.3


From d29ab6f8a92eceb00d6085f028b6e05213faa72d Mon Sep 17 00:00:00 2001
From: Joe Stringer <joestringer@nicira.com>
Date: Wed, 21 Jan 2015 16:42:49 -0800
Subject: openvswitch: Refactor ovs_flow_tbl_insert().

Rework so that ovs_flow_tbl_insert() calls flow_{key,mask}_insert().
This tidies up a future patch.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/flow_table.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 5899bf161c61..81b977d839df 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -585,16 +585,10 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
 }
 
 /* Must be called with OVS mutex held. */
-int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
-			const struct sw_flow_mask *mask)
+static void flow_key_insert(struct flow_table *table, struct sw_flow *flow)
 {
 	struct table_instance *new_ti = NULL;
 	struct table_instance *ti;
-	int err;
-
-	err = flow_mask_insert(table, flow, mask);
-	if (err)
-		return err;
 
 	flow->hash = flow_hash(&flow->key, flow->mask->range.start,
 			flow->mask->range.end);
@@ -613,6 +607,19 @@ int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
 		table_instance_destroy(ti, true);
 		table->last_rehash = jiffies;
 	}
+}
+
+/* Must be called with OVS mutex held. */
+int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
+			const struct sw_flow_mask *mask)
+{
+	int err;
+
+	err = flow_mask_insert(table, flow, mask);
+	if (err)
+		return err;
+	flow_key_insert(table, flow);
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 272c2cf84152eb7fba11407d1649c85eee243e21 Mon Sep 17 00:00:00 2001
From: Joe Stringer <joestringer@nicira.com>
Date: Wed, 21 Jan 2015 16:42:50 -0800
Subject: openvswitch: Use sw_flow_key_range for key ranges.

These minor tidyups make a future patch a little tidier.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/flow_table.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 81b977d839df..9a3f41f26da8 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -357,9 +357,11 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table)
 	return 0;
 }
 
-static u32 flow_hash(const struct sw_flow_key *key, int key_start,
-		     int key_end)
+static u32 flow_hash(const struct sw_flow_key *key,
+		     const struct sw_flow_key_range *range)
 {
+	int key_start = range->start;
+	int key_end = range->end;
 	const u32 *hash_key = (const u32 *)((const u8 *)key + key_start);
 	int hash_u32s = (key_end - key_start) >> 2;
 
@@ -395,9 +397,9 @@ static bool cmp_key(const struct sw_flow_key *key1,
 
 static bool flow_cmp_masked_key(const struct sw_flow *flow,
 				const struct sw_flow_key *key,
-				int key_start, int key_end)
+				const struct sw_flow_key_range *range)
 {
-	return cmp_key(&flow->key, key, key_start, key_end);
+	return cmp_key(&flow->key, key, range->start, range->end);
 }
 
 bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
@@ -416,18 +418,15 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
 {
 	struct sw_flow *flow;
 	struct hlist_head *head;
-	int key_start = mask->range.start;
-	int key_end = mask->range.end;
 	u32 hash;
 	struct sw_flow_key masked_key;
 
 	ovs_flow_mask_key(&masked_key, unmasked, mask);
-	hash = flow_hash(&masked_key, key_start, key_end);
+	hash = flow_hash(&masked_key, &mask->range);
 	head = find_bucket(ti, hash);
 	hlist_for_each_entry_rcu(flow, head, hash_node[ti->node_ver]) {
 		if (flow->mask == mask && flow->hash == hash &&
-		    flow_cmp_masked_key(flow, &masked_key,
-					  key_start, key_end))
+		    flow_cmp_masked_key(flow, &masked_key, &mask->range))
 			return flow;
 	}
 	return NULL;
@@ -590,8 +589,7 @@ static void flow_key_insert(struct flow_table *table, struct sw_flow *flow)
 	struct table_instance *new_ti = NULL;
 	struct table_instance *ti;
 
-	flow->hash = flow_hash(&flow->key, flow->mask->range.start,
-			flow->mask->range.end);
+	flow->hash = flow_hash(&flow->key, &flow->mask->range);
 	ti = ovsl_dereference(table->ti);
 	table_instance_insert(ti, flow);
 	table->count++;
-- 
cgit v1.2.3


From 74ed7ab9264c54471c7f057409d352052820d750 Mon Sep 17 00:00:00 2001
From: Joe Stringer <joestringer@nicira.com>
Date: Wed, 21 Jan 2015 16:42:52 -0800
Subject: openvswitch: Add support for unique flow IDs.

Previously, flows were manipulated by userspace specifying a full,
unmasked flow key. This adds significant burden onto flow
serialization/deserialization, particularly when dumping flows.

This patch adds an alternative way to refer to flows using a
variable-length "unique flow identifier" (UFID). At flow setup time,
userspace may specify a UFID for a flow, which is stored with the flow
and inserted into a separate table for lookup, in addition to the
standard flow table. Flows created using a UFID must be fetched or
deleted using the UFID.

All flow dump operations may now be made more terse with OVS_UFID_F_*
flags. For example, the OVS_UFID_F_OMIT_KEY flag allows responses to
omit the flow key from a datapath operation if the flow has a
corresponding UFID. This significantly reduces the time spent assembling
and transacting netlink messages. With all OVS_UFID_F_OMIT_* flags
enabled, the datapath only returns the UFID and statistics for each flow
during flow dump, increasing ovs-vswitchd revalidator performance by 40%
or more.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/openvswitch.txt |  13 ++
 include/uapi/linux/openvswitch.h         |  20 +++
 net/openvswitch/datapath.c               | 207 +++++++++++++++++++++++--------
 net/openvswitch/flow.h                   |  28 ++++-
 net/openvswitch/flow_netlink.c           |  68 +++++++++-
 net/openvswitch/flow_netlink.h           |   8 +-
 net/openvswitch/flow_table.c             | 187 +++++++++++++++++++++++-----
 net/openvswitch/flow_table.h             |   8 +-
 8 files changed, 448 insertions(+), 91 deletions(-)

(limited to 'net')

diff --git a/Documentation/networking/openvswitch.txt b/Documentation/networking/openvswitch.txt
index 37c20ee2455e..b3b9ac61d29d 100644
--- a/Documentation/networking/openvswitch.txt
+++ b/Documentation/networking/openvswitch.txt
@@ -131,6 +131,19 @@ performs best-effort detection of overlapping wildcarded flows and may reject
 some but not all of them. However, this behavior may change in future versions.
 
 
+Unique flow identifiers
+-----------------------
+
+An alternative to using the original match portion of a key as the handle for
+flow identification is a unique flow identifier, or "UFID". UFIDs are optional
+for both the kernel and user space program.
+
+User space programs that support UFID are expected to provide it during flow
+setup in addition to the flow, then refer to the flow using the UFID for all
+future operations. The kernel is not required to index flows by the original
+flow key if a UFID is specified.
+
+
 Basic rule for evolving flow keys
 ---------------------------------
 
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index cd8d933963c2..7a8785a99243 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -459,6 +459,14 @@ struct ovs_key_nd {
  * a wildcarded match. Omitting attribute is treated as wildcarding all
  * corresponding fields. Optional for all requests. If not present,
  * all flow key bits are exact match bits.
+ * @OVS_FLOW_ATTR_UFID: A value between 1-16 octets specifying a unique
+ * identifier for the flow. Causes the flow to be indexed by this value rather
+ * than the value of the %OVS_FLOW_ATTR_KEY attribute. Optional for all
+ * requests. Present in notifications if the flow was created with this
+ * attribute.
+ * @OVS_FLOW_ATTR_UFID_FLAGS: A 32-bit value of OR'd %OVS_UFID_F_*
+ * flags that provide alternative semantics for flow installation and
+ * retrieval. Optional for all requests.
  *
  * These attributes follow the &struct ovs_header within the Generic Netlink
  * payload for %OVS_FLOW_* commands.
@@ -474,11 +482,23 @@ enum ovs_flow_attr {
 	OVS_FLOW_ATTR_MASK,      /* Sequence of OVS_KEY_ATTR_* attributes. */
 	OVS_FLOW_ATTR_PROBE,     /* Flow operation is a feature probe, error
 				  * logging should be suppressed. */
+	OVS_FLOW_ATTR_UFID,      /* Variable length unique flow identifier. */
+	OVS_FLOW_ATTR_UFID_FLAGS,/* u32 of OVS_UFID_F_*. */
 	__OVS_FLOW_ATTR_MAX
 };
 
 #define OVS_FLOW_ATTR_MAX (__OVS_FLOW_ATTR_MAX - 1)
 
+/**
+ * Omit attributes for notifications.
+ *
+ * If a datapath request contains an %OVS_UFID_F_OMIT_* flag, then the datapath
+ * may omit the corresponding %OVS_FLOW_ATTR_* from the response.
+ */
+#define OVS_UFID_F_OMIT_KEY      (1 << 0)
+#define OVS_UFID_F_OMIT_MASK     (1 << 1)
+#define OVS_UFID_F_OMIT_ACTIONS  (1 << 2)
+
 /**
  * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action.
  * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 257b97546b33..ae5e77cdc0ca 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -65,6 +65,8 @@ static struct genl_family dp_packet_genl_family;
 static struct genl_family dp_flow_genl_family;
 static struct genl_family dp_datapath_genl_family;
 
+static const struct nla_policy flow_policy[];
+
 static const struct genl_multicast_group ovs_dp_flow_multicast_group = {
 	.name = OVS_FLOW_MCGROUP,
 };
@@ -662,15 +664,48 @@ static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
 	}
 }
 
-static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
+static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
+{
+	return ovs_identifier_is_ufid(sfid) &&
+	       !(ufid_flags & OVS_UFID_F_OMIT_KEY);
+}
+
+static bool should_fill_mask(uint32_t ufid_flags)
+{
+	return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
+}
+
+static bool should_fill_actions(uint32_t ufid_flags)
 {
-	return NLMSG_ALIGN(sizeof(struct ovs_header))
-		+ nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_KEY */
-		+ nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_MASK */
+	return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
+}
+
+static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
+				    const struct sw_flow_id *sfid,
+				    uint32_t ufid_flags)
+{
+	size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
+
+	/* OVS_FLOW_ATTR_UFID */
+	if (sfid && ovs_identifier_is_ufid(sfid))
+		len += nla_total_size(sfid->ufid_len);
+
+	/* OVS_FLOW_ATTR_KEY */
+	if (!sfid || should_fill_key(sfid, ufid_flags))
+		len += nla_total_size(ovs_key_attr_size());
+
+	/* OVS_FLOW_ATTR_MASK */
+	if (should_fill_mask(ufid_flags))
+		len += nla_total_size(ovs_key_attr_size());
+
+	/* OVS_FLOW_ATTR_ACTIONS */
+	if (should_fill_actions(ufid_flags))
+		len += nla_total_size(acts->actions_len);
+
+	return len
 		+ nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
 		+ nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
-		+ nla_total_size(8) /* OVS_FLOW_ATTR_USED */
-		+ nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
+		+ nla_total_size(8); /* OVS_FLOW_ATTR_USED */
 }
 
 /* Called with ovs_mutex or RCU read lock. */
@@ -741,7 +776,7 @@ static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
 /* Called with ovs_mutex or RCU read lock. */
 static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
 				  struct sk_buff *skb, u32 portid,
-				  u32 seq, u32 flags, u8 cmd)
+				  u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
 {
 	const int skb_orig_len = skb->len;
 	struct ovs_header *ovs_header;
@@ -754,21 +789,31 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
 
 	ovs_header->dp_ifindex = dp_ifindex;
 
-	err = ovs_nla_put_unmasked_key(flow, skb);
+	err = ovs_nla_put_identifier(flow, skb);
 	if (err)
 		goto error;
 
-	err = ovs_nla_put_mask(flow, skb);
-	if (err)
-		goto error;
+	if (should_fill_key(&flow->id, ufid_flags)) {
+		err = ovs_nla_put_masked_key(flow, skb);
+		if (err)
+			goto error;
+	}
+
+	if (should_fill_mask(ufid_flags)) {
+		err = ovs_nla_put_mask(flow, skb);
+		if (err)
+			goto error;
+	}
 
 	err = ovs_flow_cmd_fill_stats(flow, skb);
 	if (err)
 		goto error;
 
-	err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
-	if (err)
-		goto error;
+	if (should_fill_actions(ufid_flags)) {
+		err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
+		if (err)
+			goto error;
+	}
 
 	genlmsg_end(skb, ovs_header);
 	return 0;
@@ -780,15 +825,19 @@ error:
 
 /* May not be called with RCU read lock. */
 static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
+					       const struct sw_flow_id *sfid,
 					       struct genl_info *info,
-					       bool always)
+					       bool always,
+					       uint32_t ufid_flags)
 {
 	struct sk_buff *skb;
+	size_t len;
 
 	if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0))
 		return NULL;
 
-	skb = genlmsg_new_unicast(ovs_flow_cmd_msg_size(acts), info, GFP_KERNEL);
+	len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
+	skb = genlmsg_new_unicast(len, info, GFP_KERNEL);
 	if (!skb)
 		return ERR_PTR(-ENOMEM);
 
@@ -799,19 +848,19 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *act
 static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
 					       int dp_ifindex,
 					       struct genl_info *info, u8 cmd,
-					       bool always)
+					       bool always, u32 ufid_flags)
 {
 	struct sk_buff *skb;
 	int retval;
 
-	skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info,
-				      always);
+	skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
+				      &flow->id, info, always, ufid_flags);
 	if (IS_ERR_OR_NULL(skb))
 		return skb;
 
 	retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
 					info->snd_portid, info->snd_seq, 0,
-					cmd);
+					cmd, ufid_flags);
 	BUG_ON(retval < 0);
 	return skb;
 }
@@ -820,12 +869,14 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 {
 	struct nlattr **a = info->attrs;
 	struct ovs_header *ovs_header = info->userhdr;
-	struct sw_flow *flow, *new_flow;
+	struct sw_flow *flow = NULL, *new_flow;
 	struct sw_flow_mask mask;
 	struct sk_buff *reply;
 	struct datapath *dp;
+	struct sw_flow_key key;
 	struct sw_flow_actions *acts;
 	struct sw_flow_match match;
+	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
 	int error;
 	bool log = !a[OVS_FLOW_ATTR_PROBE];
 
@@ -850,13 +901,19 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	/* Extract key. */
-	ovs_match_init(&match, &new_flow->unmasked_key, &mask);
+	ovs_match_init(&match, &key, &mask);
 	error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
 				  a[OVS_FLOW_ATTR_MASK], log);
 	if (error)
 		goto err_kfree_flow;
 
-	ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask);
+	ovs_flow_mask_key(&new_flow->key, &key, &mask);
+
+	/* Extract flow identifier. */
+	error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
+				       &key, log);
+	if (error)
+		goto err_kfree_flow;
 
 	/* Validate actions. */
 	error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
@@ -866,7 +923,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 		goto err_kfree_flow;
 	}
 
-	reply = ovs_flow_cmd_alloc_info(acts, info, false);
+	reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
+					ufid_flags);
 	if (IS_ERR(reply)) {
 		error = PTR_ERR(reply);
 		goto err_kfree_acts;
@@ -878,8 +936,12 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 		error = -ENODEV;
 		goto err_unlock_ovs;
 	}
+
 	/* Check if this is a duplicate flow */
-	flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->unmasked_key);
+	if (ovs_identifier_is_ufid(&new_flow->id))
+		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
+	if (!flow)
+		flow = ovs_flow_tbl_lookup(&dp->table, &key);
 	if (likely(!flow)) {
 		rcu_assign_pointer(new_flow->sf_acts, acts);
 
@@ -895,7 +957,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 						       ovs_header->dp_ifindex,
 						       reply, info->snd_portid,
 						       info->snd_seq, 0,
-						       OVS_FLOW_CMD_NEW);
+						       OVS_FLOW_CMD_NEW,
+						       ufid_flags);
 			BUG_ON(error < 0);
 		}
 		ovs_unlock();
@@ -913,10 +976,15 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 			error = -EEXIST;
 			goto err_unlock_ovs;
 		}
-		/* The unmasked key has to be the same for flow updates. */
-		if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) {
-			/* Look for any overlapping flow. */
-			flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+		/* The flow identifier has to be the same for flow updates.
+		 * Look for any overlapping flow.
+		 */
+		if (unlikely(!ovs_flow_cmp(flow, &match))) {
+			if (ovs_identifier_is_key(&flow->id))
+				flow = ovs_flow_tbl_lookup_exact(&dp->table,
+								 &match);
+			else /* UFID matches but key is different */
+				flow = NULL;
 			if (!flow) {
 				error = -ENOENT;
 				goto err_unlock_ovs;
@@ -931,7 +999,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 						       ovs_header->dp_ifindex,
 						       reply, info->snd_portid,
 						       info->snd_seq, 0,
-						       OVS_FLOW_CMD_NEW);
+						       OVS_FLOW_CMD_NEW,
+						       ufid_flags);
 			BUG_ON(error < 0);
 		}
 		ovs_unlock();
@@ -987,8 +1056,11 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	struct datapath *dp;
 	struct sw_flow_actions *old_acts = NULL, *acts = NULL;
 	struct sw_flow_match match;
+	struct sw_flow_id sfid;
+	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
 	int error;
 	bool log = !a[OVS_FLOW_ATTR_PROBE];
+	bool ufid_present;
 
 	/* Extract key. */
 	error = -EINVAL;
@@ -997,6 +1069,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
 		goto error;
 	}
 
+	ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
 	ovs_match_init(&match, &key, &mask);
 	error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
 				  a[OVS_FLOW_ATTR_MASK], log);
@@ -1013,7 +1086,8 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
 		}
 
 		/* Can allocate before locking if have acts. */
-		reply = ovs_flow_cmd_alloc_info(acts, info, false);
+		reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
+						ufid_flags);
 		if (IS_ERR(reply)) {
 			error = PTR_ERR(reply);
 			goto err_kfree_acts;
@@ -1027,7 +1101,10 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
 		goto err_unlock_ovs;
 	}
 	/* Check that the flow exists. */
-	flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+	if (ufid_present)
+		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
+	else
+		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
 	if (unlikely(!flow)) {
 		error = -ENOENT;
 		goto err_unlock_ovs;
@@ -1043,13 +1120,16 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
 						       ovs_header->dp_ifindex,
 						       reply, info->snd_portid,
 						       info->snd_seq, 0,
-						       OVS_FLOW_CMD_NEW);
+						       OVS_FLOW_CMD_NEW,
+						       ufid_flags);
 			BUG_ON(error < 0);
 		}
 	} else {
 		/* Could not alloc without acts before locking. */
 		reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
-						info, OVS_FLOW_CMD_NEW, false);
+						info, OVS_FLOW_CMD_NEW, false,
+						ufid_flags);
+
 		if (unlikely(IS_ERR(reply))) {
 			error = PTR_ERR(reply);
 			goto err_unlock_ovs;
@@ -1086,17 +1166,22 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
 	struct sw_flow *flow;
 	struct datapath *dp;
 	struct sw_flow_match match;
-	int err;
+	struct sw_flow_id ufid;
+	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
+	int err = 0;
 	bool log = !a[OVS_FLOW_ATTR_PROBE];
+	bool ufid_present;
 
-	if (!a[OVS_FLOW_ATTR_KEY]) {
+	ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
+	if (a[OVS_FLOW_ATTR_KEY]) {
+		ovs_match_init(&match, &key, NULL);
+		err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL,
+					log);
+	} else if (!ufid_present) {
 		OVS_NLERR(log,
 			  "Flow get message rejected, Key attribute missing.");
-		return -EINVAL;
+		err = -EINVAL;
 	}
-
-	ovs_match_init(&match, &key, NULL);
-	err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, log);
 	if (err)
 		return err;
 
@@ -1107,14 +1192,17 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
 		goto unlock;
 	}
 
-	flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+	if (ufid_present)
+		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
+	else
+		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
 	if (!flow) {
 		err = -ENOENT;
 		goto unlock;
 	}
 
 	reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
-					OVS_FLOW_CMD_NEW, true);
+					OVS_FLOW_CMD_NEW, true, ufid_flags);
 	if (IS_ERR(reply)) {
 		err = PTR_ERR(reply);
 		goto unlock;
@@ -1133,13 +1221,17 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	struct ovs_header *ovs_header = info->userhdr;
 	struct sw_flow_key key;
 	struct sk_buff *reply;
-	struct sw_flow *flow;
+	struct sw_flow *flow = NULL;
 	struct datapath *dp;
 	struct sw_flow_match match;
+	struct sw_flow_id ufid;
+	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
 	int err;
 	bool log = !a[OVS_FLOW_ATTR_PROBE];
+	bool ufid_present;
 
-	if (likely(a[OVS_FLOW_ATTR_KEY])) {
+	ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
+	if (a[OVS_FLOW_ATTR_KEY]) {
 		ovs_match_init(&match, &key, NULL);
 		err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL,
 					log);
@@ -1154,12 +1246,15 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
 		goto unlock;
 	}
 
-	if (unlikely(!a[OVS_FLOW_ATTR_KEY])) {
+	if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
 		err = ovs_flow_tbl_flush(&dp->table);
 		goto unlock;
 	}
 
-	flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+	if (ufid_present)
+		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
+	else
+		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
 	if (unlikely(!flow)) {
 		err = -ENOENT;
 		goto unlock;
@@ -1169,14 +1264,15 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	ovs_unlock();
 
 	reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
-					info, false);
+					&flow->id, info, false, ufid_flags);
 	if (likely(reply)) {
 		if (likely(!IS_ERR(reply))) {
 			rcu_read_lock();	/*To keep RCU checker happy. */
 			err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
 						     reply, info->snd_portid,
 						     info->snd_seq, 0,
-						     OVS_FLOW_CMD_DEL);
+						     OVS_FLOW_CMD_DEL,
+						     ufid_flags);
 			rcu_read_unlock();
 			BUG_ON(err < 0);
 
@@ -1195,9 +1291,18 @@ unlock:
 
 static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct nlattr *a[__OVS_FLOW_ATTR_MAX];
 	struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
 	struct table_instance *ti;
 	struct datapath *dp;
+	u32 ufid_flags;
+	int err;
+
+	err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a,
+			    OVS_FLOW_ATTR_MAX, flow_policy);
+	if (err)
+		return err;
+	ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
 
 	rcu_read_lock();
 	dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
@@ -1220,7 +1325,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
 		if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
 					   NETLINK_CB(cb->skb).portid,
 					   cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					   OVS_FLOW_CMD_NEW) < 0)
+					   OVS_FLOW_CMD_NEW, ufid_flags) < 0)
 			break;
 
 		cb->args[0] = bucket;
@@ -1236,6 +1341,8 @@ static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
 	[OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
 	[OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
 	[OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
+	[OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
+	[OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
 };
 
 static const struct genl_ops dp_flow_genl_ops[] = {
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index d3d0a406562d..a076e445ccc2 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -197,6 +197,16 @@ struct sw_flow_match {
 	struct sw_flow_mask *mask;
 };
 
+#define MAX_UFID_LENGTH 16 /* 128 bits */
+
+struct sw_flow_id {
+	u32 ufid_len;
+	union {
+		u32 ufid[MAX_UFID_LENGTH / 4];
+		struct sw_flow_key *unmasked_key;
+	};
+};
+
 struct sw_flow_actions {
 	struct rcu_head rcu;
 	u32 actions_len;
@@ -213,13 +223,15 @@ struct flow_stats {
 
 struct sw_flow {
 	struct rcu_head rcu;
-	struct hlist_node hash_node[2];
-	u32 hash;
+	struct {
+		struct hlist_node node[2];
+		u32 hash;
+	} flow_table, ufid_table;
 	int stats_last_writer;		/* NUMA-node id of the last writer on
 					 * 'stats[0]'.
 					 */
 	struct sw_flow_key key;
-	struct sw_flow_key unmasked_key;
+	struct sw_flow_id id;
 	struct sw_flow_mask *mask;
 	struct sw_flow_actions __rcu *sf_acts;
 	struct flow_stats __rcu *stats[]; /* One for each NUMA node.  First one
@@ -243,6 +255,16 @@ struct arp_eth_header {
 	unsigned char       ar_tip[4];		/* target IP address        */
 } __packed;
 
+static inline bool ovs_identifier_is_ufid(const struct sw_flow_id *sfid)
+{
+	return sfid->ufid_len;
+}
+
+static inline bool ovs_identifier_is_key(const struct sw_flow_id *sfid)
+{
+	return !ovs_identifier_is_ufid(sfid);
+}
+
 void ovs_flow_stats_update(struct sw_flow *, __be16 tcp_flags,
 			   const struct sk_buff *);
 void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *,
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 33751f81bfcb..8b9a612b39d1 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -1180,6 +1180,59 @@ free_newmask:
 	return err;
 }
 
+static size_t get_ufid_len(const struct nlattr *attr, bool log)
+{
+	size_t len;
+
+	if (!attr)
+		return 0;
+
+	len = nla_len(attr);
+	if (len < 1 || len > MAX_UFID_LENGTH) {
+		OVS_NLERR(log, "ufid size %u bytes exceeds the range (1, %d)",
+			  nla_len(attr), MAX_UFID_LENGTH);
+		return 0;
+	}
+
+	return len;
+}
+
+/* Initializes 'flow->ufid', returning true if 'attr' contains a valid UFID,
+ * or false otherwise.
+ */
+bool ovs_nla_get_ufid(struct sw_flow_id *sfid, const struct nlattr *attr,
+		      bool log)
+{
+	sfid->ufid_len = get_ufid_len(attr, log);
+	if (sfid->ufid_len)
+		memcpy(sfid->ufid, nla_data(attr), sfid->ufid_len);
+
+	return sfid->ufid_len;
+}
+
+int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
+			   const struct sw_flow_key *key, bool log)
+{
+	struct sw_flow_key *new_key;
+
+	if (ovs_nla_get_ufid(sfid, ufid, log))
+		return 0;
+
+	/* If UFID was not provided, use unmasked key. */
+	new_key = kmalloc(sizeof(*new_key), GFP_KERNEL);
+	if (!new_key)
+		return -ENOMEM;
+	memcpy(new_key, key, sizeof(*key));
+	sfid->unmasked_key = new_key;
+
+	return 0;
+}
+
+u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
+{
+	return attr ? nla_get_u32(attr) : 0;
+}
+
 /**
  * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
  * @key: Receives extracted in_port, priority, tun_key and skb_mark.
@@ -1450,9 +1503,20 @@ int ovs_nla_put_key(const struct sw_flow_key *swkey,
 }
 
 /* Called with ovs_mutex or RCU read lock. */
-int ovs_nla_put_unmasked_key(const struct sw_flow *flow, struct sk_buff *skb)
+int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb)
+{
+	if (ovs_identifier_is_ufid(&flow->id))
+		return nla_put(skb, OVS_FLOW_ATTR_UFID, flow->id.ufid_len,
+			       flow->id.ufid);
+
+	return ovs_nla_put_key(flow->id.unmasked_key, flow->id.unmasked_key,
+			       OVS_FLOW_ATTR_KEY, false, skb);
+}
+
+/* Called with ovs_mutex or RCU read lock. */
+int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb)
 {
-	return ovs_nla_put_key(&flow->unmasked_key, &flow->unmasked_key,
+	return ovs_nla_put_key(&flow->mask->key, &flow->key,
 				OVS_FLOW_ATTR_KEY, false, skb);
 }
 
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index 9ed09e66876a..5c3d75bff310 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -48,7 +48,8 @@ int ovs_nla_put_key(const struct sw_flow_key *, const struct sw_flow_key *,
 int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *,
 			      bool log);
 
-int ovs_nla_put_unmasked_key(const struct sw_flow *flow, struct sk_buff *skb);
+int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb);
+int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb);
 int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb);
 
 int ovs_nla_get_match(struct sw_flow_match *, const struct nlattr *key,
@@ -56,6 +57,11 @@ int ovs_nla_get_match(struct sw_flow_match *, const struct nlattr *key,
 int ovs_nla_put_egress_tunnel_key(struct sk_buff *,
 				  const struct ovs_tunnel_info *);
 
+bool ovs_nla_get_ufid(struct sw_flow_id *, const struct nlattr *, bool log);
+int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
+			   const struct sw_flow_key *key, bool log);
+u32 ovs_nla_get_ufid_flags(const struct nlattr *attr);
+
 int ovs_nla_copy_actions(const struct nlattr *attr,
 			 const struct sw_flow_key *key,
 			 struct sw_flow_actions **sfa, bool log);
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 9a3f41f26da8..5e57628e6584 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -139,6 +139,8 @@ static void flow_free(struct sw_flow *flow)
 {
 	int node;
 
+	if (ovs_identifier_is_key(&flow->id))
+		kfree(flow->id.unmasked_key);
 	kfree((struct sw_flow_actions __force *)flow->sf_acts);
 	for_each_node(node)
 		if (flow->stats[node])
@@ -200,18 +202,28 @@ static struct table_instance *table_instance_alloc(int new_size)
 
 int ovs_flow_tbl_init(struct flow_table *table)
 {
-	struct table_instance *ti;
+	struct table_instance *ti, *ufid_ti;
 
 	ti = table_instance_alloc(TBL_MIN_BUCKETS);
 
 	if (!ti)
 		return -ENOMEM;
 
+	ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
+	if (!ufid_ti)
+		goto free_ti;
+
 	rcu_assign_pointer(table->ti, ti);
+	rcu_assign_pointer(table->ufid_ti, ufid_ti);
 	INIT_LIST_HEAD(&table->mask_list);
 	table->last_rehash = jiffies;
 	table->count = 0;
+	table->ufid_count = 0;
 	return 0;
+
+free_ti:
+	__table_instance_destroy(ti);
+	return -ENOMEM;
 }
 
 static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
@@ -221,13 +233,16 @@ static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
 	__table_instance_destroy(ti);
 }
 
-static void table_instance_destroy(struct table_instance *ti, bool deferred)
+static void table_instance_destroy(struct table_instance *ti,
+				   struct table_instance *ufid_ti,
+				   bool deferred)
 {
 	int i;
 
 	if (!ti)
 		return;
 
+	BUG_ON(!ufid_ti);
 	if (ti->keep_flows)
 		goto skip_flows;
 
@@ -236,18 +251,24 @@ static void table_instance_destroy(struct table_instance *ti, bool deferred)
 		struct hlist_head *head = flex_array_get(ti->buckets, i);
 		struct hlist_node *n;
 		int ver = ti->node_ver;
+		int ufid_ver = ufid_ti->node_ver;
 
-		hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
-			hlist_del_rcu(&flow->hash_node[ver]);
+		hlist_for_each_entry_safe(flow, n, head, flow_table.node[ver]) {
+			hlist_del_rcu(&flow->flow_table.node[ver]);
+			if (ovs_identifier_is_ufid(&flow->id))
+				hlist_del_rcu(&flow->ufid_table.node[ufid_ver]);
 			ovs_flow_free(flow, deferred);
 		}
 	}
 
 skip_flows:
-	if (deferred)
+	if (deferred) {
 		call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
-	else
+		call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb);
+	} else {
 		__table_instance_destroy(ti);
+		__table_instance_destroy(ufid_ti);
+	}
 }
 
 /* No need for locking this function is called from RCU callback or
@@ -256,8 +277,9 @@ skip_flows:
 void ovs_flow_tbl_destroy(struct flow_table *table)
 {
 	struct table_instance *ti = rcu_dereference_raw(table->ti);
+	struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti);
 
-	table_instance_destroy(ti, false);
+	table_instance_destroy(ti, ufid_ti, false);
 }
 
 struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
@@ -272,7 +294,7 @@ struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
 	while (*bucket < ti->n_buckets) {
 		i = 0;
 		head = flex_array_get(ti->buckets, *bucket);
-		hlist_for_each_entry_rcu(flow, head, hash_node[ver]) {
+		hlist_for_each_entry_rcu(flow, head, flow_table.node[ver]) {
 			if (i < *last) {
 				i++;
 				continue;
@@ -294,16 +316,26 @@ static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash)
 				(hash & (ti->n_buckets - 1)));
 }
 
-static void table_instance_insert(struct table_instance *ti, struct sw_flow *flow)
+static void table_instance_insert(struct table_instance *ti,
+				  struct sw_flow *flow)
 {
 	struct hlist_head *head;
 
-	head = find_bucket(ti, flow->hash);
-	hlist_add_head_rcu(&flow->hash_node[ti->node_ver], head);
+	head = find_bucket(ti, flow->flow_table.hash);
+	hlist_add_head_rcu(&flow->flow_table.node[ti->node_ver], head);
+}
+
+static void ufid_table_instance_insert(struct table_instance *ti,
+				       struct sw_flow *flow)
+{
+	struct hlist_head *head;
+
+	head = find_bucket(ti, flow->ufid_table.hash);
+	hlist_add_head_rcu(&flow->ufid_table.node[ti->node_ver], head);
 }
 
 static void flow_table_copy_flows(struct table_instance *old,
-				  struct table_instance *new)
+				  struct table_instance *new, bool ufid)
 {
 	int old_ver;
 	int i;
@@ -318,15 +350,21 @@ static void flow_table_copy_flows(struct table_instance *old,
 
 		head = flex_array_get(old->buckets, i);
 
-		hlist_for_each_entry(flow, head, hash_node[old_ver])
-			table_instance_insert(new, flow);
+		if (ufid)
+			hlist_for_each_entry(flow, head,
+					     ufid_table.node[old_ver])
+				ufid_table_instance_insert(new, flow);
+		else
+			hlist_for_each_entry(flow, head,
+					     flow_table.node[old_ver])
+				table_instance_insert(new, flow);
 	}
 
 	old->keep_flows = true;
 }
 
 static struct table_instance *table_instance_rehash(struct table_instance *ti,
-					    int n_buckets)
+						    int n_buckets, bool ufid)
 {
 	struct table_instance *new_ti;
 
@@ -334,27 +372,38 @@ static struct table_instance *table_instance_rehash(struct table_instance *ti,
 	if (!new_ti)
 		return NULL;
 
-	flow_table_copy_flows(ti, new_ti);
+	flow_table_copy_flows(ti, new_ti, ufid);
 
 	return new_ti;
 }
 
 int ovs_flow_tbl_flush(struct flow_table *flow_table)
 {
-	struct table_instance *old_ti;
-	struct table_instance *new_ti;
+	struct table_instance *old_ti, *new_ti;
+	struct table_instance *old_ufid_ti, *new_ufid_ti;
 
-	old_ti = ovsl_dereference(flow_table->ti);
 	new_ti = table_instance_alloc(TBL_MIN_BUCKETS);
 	if (!new_ti)
 		return -ENOMEM;
+	new_ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
+	if (!new_ufid_ti)
+		goto err_free_ti;
+
+	old_ti = ovsl_dereference(flow_table->ti);
+	old_ufid_ti = ovsl_dereference(flow_table->ufid_ti);
 
 	rcu_assign_pointer(flow_table->ti, new_ti);
+	rcu_assign_pointer(flow_table->ufid_ti, new_ufid_ti);
 	flow_table->last_rehash = jiffies;
 	flow_table->count = 0;
+	flow_table->ufid_count = 0;
 
-	table_instance_destroy(old_ti, true);
+	table_instance_destroy(old_ti, old_ufid_ti, true);
 	return 0;
+
+err_free_ti:
+	__table_instance_destroy(new_ti);
+	return -ENOMEM;
 }
 
 static u32 flow_hash(const struct sw_flow_key *key,
@@ -402,14 +451,15 @@ static bool flow_cmp_masked_key(const struct sw_flow *flow,
 	return cmp_key(&flow->key, key, range->start, range->end);
 }
 
-bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
-			       const struct sw_flow_match *match)
+static bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
+				      const struct sw_flow_match *match)
 {
 	struct sw_flow_key *key = match->key;
 	int key_start = flow_key_start(key);
 	int key_end = match->range.end;
 
-	return cmp_key(&flow->unmasked_key, key, key_start, key_end);
+	BUG_ON(ovs_identifier_is_ufid(&flow->id));
+	return cmp_key(flow->id.unmasked_key, key, key_start, key_end);
 }
 
 static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
@@ -424,8 +474,8 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
 	ovs_flow_mask_key(&masked_key, unmasked, mask);
 	hash = flow_hash(&masked_key, &mask->range);
 	head = find_bucket(ti, hash);
-	hlist_for_each_entry_rcu(flow, head, hash_node[ti->node_ver]) {
-		if (flow->mask == mask && flow->hash == hash &&
+	hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) {
+		if (flow->mask == mask && flow->flow_table.hash == hash &&
 		    flow_cmp_masked_key(flow, &masked_key, &mask->range))
 			return flow;
 	}
@@ -468,7 +518,48 @@ struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
 	/* Always called under ovs-mutex. */
 	list_for_each_entry(mask, &tbl->mask_list, list) {
 		flow = masked_flow_lookup(ti, match->key, mask);
-		if (flow && ovs_flow_cmp_unmasked_key(flow, match))  /* Found */
+		if (flow && ovs_identifier_is_key(&flow->id) &&
+		    ovs_flow_cmp_unmasked_key(flow, match))
+			return flow;
+	}
+	return NULL;
+}
+
+static u32 ufid_hash(const struct sw_flow_id *sfid)
+{
+	return jhash(sfid->ufid, sfid->ufid_len, 0);
+}
+
+static bool ovs_flow_cmp_ufid(const struct sw_flow *flow,
+			      const struct sw_flow_id *sfid)
+{
+	if (flow->id.ufid_len != sfid->ufid_len)
+		return false;
+
+	return !memcmp(flow->id.ufid, sfid->ufid, sfid->ufid_len);
+}
+
+bool ovs_flow_cmp(const struct sw_flow *flow, const struct sw_flow_match *match)
+{
+	if (ovs_identifier_is_ufid(&flow->id))
+		return flow_cmp_masked_key(flow, match->key, &match->range);
+
+	return ovs_flow_cmp_unmasked_key(flow, match);
+}
+
+struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
+					 const struct sw_flow_id *ufid)
+{
+	struct table_instance *ti = rcu_dereference_ovsl(tbl->ufid_ti);
+	struct sw_flow *flow;
+	struct hlist_head *head;
+	u32 hash;
+
+	hash = ufid_hash(ufid);
+	head = find_bucket(ti, hash);
+	hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver]) {
+		if (flow->ufid_table.hash == hash &&
+		    ovs_flow_cmp_ufid(flow, ufid))
 			return flow;
 	}
 	return NULL;
@@ -485,9 +576,10 @@ int ovs_flow_tbl_num_masks(const struct flow_table *table)
 	return num;
 }
 
-static struct table_instance *table_instance_expand(struct table_instance *ti)
+static struct table_instance *table_instance_expand(struct table_instance *ti,
+						    bool ufid)
 {
-	return table_instance_rehash(ti, ti->n_buckets * 2);
+	return table_instance_rehash(ti, ti->n_buckets * 2, ufid);
 }
 
 /* Remove 'mask' from the mask list, if it is not needed any more. */
@@ -512,10 +604,15 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
 void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
 {
 	struct table_instance *ti = ovsl_dereference(table->ti);
+	struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti);
 
 	BUG_ON(table->count == 0);
-	hlist_del_rcu(&flow->hash_node[ti->node_ver]);
+	hlist_del_rcu(&flow->flow_table.node[ti->node_ver]);
 	table->count--;
+	if (ovs_identifier_is_ufid(&flow->id)) {
+		hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]);
+		table->ufid_count--;
+	}
 
 	/* RCU delete the mask. 'flow->mask' is not NULLed, as it should be
 	 * accessible as long as the RCU read lock is held.
@@ -589,24 +686,46 @@ static void flow_key_insert(struct flow_table *table, struct sw_flow *flow)
 	struct table_instance *new_ti = NULL;
 	struct table_instance *ti;
 
-	flow->hash = flow_hash(&flow->key, &flow->mask->range);
+	flow->flow_table.hash = flow_hash(&flow->key, &flow->mask->range);
 	ti = ovsl_dereference(table->ti);
 	table_instance_insert(ti, flow);
 	table->count++;
 
 	/* Expand table, if necessary, to make room. */
 	if (table->count > ti->n_buckets)
-		new_ti = table_instance_expand(ti);
+		new_ti = table_instance_expand(ti, false);
 	else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL))
-		new_ti = table_instance_rehash(ti, ti->n_buckets);
+		new_ti = table_instance_rehash(ti, ti->n_buckets, false);
 
 	if (new_ti) {
 		rcu_assign_pointer(table->ti, new_ti);
-		table_instance_destroy(ti, true);
+		call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
 		table->last_rehash = jiffies;
 	}
 }
 
+/* Must be called with OVS mutex held. */
+static void flow_ufid_insert(struct flow_table *table, struct sw_flow *flow)
+{
+	struct table_instance *ti;
+
+	flow->ufid_table.hash = ufid_hash(&flow->id);
+	ti = ovsl_dereference(table->ufid_ti);
+	ufid_table_instance_insert(ti, flow);
+	table->ufid_count++;
+
+	/* Expand table, if necessary, to make room. */
+	if (table->ufid_count > ti->n_buckets) {
+		struct table_instance *new_ti;
+
+		new_ti = table_instance_expand(ti, true);
+		if (new_ti) {
+			rcu_assign_pointer(table->ufid_ti, new_ti);
+			call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
+		}
+	}
+}
+
 /* Must be called with OVS mutex held. */
 int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
 			const struct sw_flow_mask *mask)
@@ -617,6 +736,8 @@ int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
 	if (err)
 		return err;
 	flow_key_insert(table, flow);
+	if (ovs_identifier_is_ufid(&flow->id))
+		flow_ufid_insert(table, flow);
 
 	return 0;
 }
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index 309fa6415689..616eda10d955 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -47,9 +47,11 @@ struct table_instance {
 
 struct flow_table {
 	struct table_instance __rcu *ti;
+	struct table_instance __rcu *ufid_ti;
 	struct list_head mask_list;
 	unsigned long last_rehash;
 	unsigned int count;
+	unsigned int ufid_count;
 };
 
 extern struct kmem_cache *flow_stats_cache;
@@ -78,8 +80,10 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *,
 				    const struct sw_flow_key *);
 struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
 					  const struct sw_flow_match *match);
-bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
-			       const struct sw_flow_match *match);
+struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *,
+					 const struct sw_flow_id *);
+
+bool ovs_flow_cmp(const struct sw_flow *, const struct sw_flow_match *);
 
 void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
 		       const struct sw_flow_mask *mask);
-- 
cgit v1.2.3


From 7913ecf69e24bd7575e0d0325eda3b43c8cfa749 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Thu, 22 Jan 2015 10:41:01 +0100
Subject: net: cls_bpf: fix size mismatch on filter preparation

In cls_bpf_modify_existing(), we read out the number of filter blocks,
do some sanity checks, allocate a block on that size, and copy over the
BPF instruction blob from user space, then pass everything through the
classic BPF checker prior to installation of the classifier.

We should reject mismatches here, there are 2 scenarios: the number of
filter blocks could be smaller than the provided instruction blob, so
we do a partial copy of the BPF program, and thus the instructions will
either be rejected from the verifier or a valid BPF program will be run;
in the other case, we'll end up copying more than we're supposed to,
and most likely the trailing garbage will be rejected by the verifier
as well (i.e. we need to fit instruction pattern, ret {A,K} needs to be
last instruction, load/stores must be correct, etc); in case not, we
would leak memory when dumping back instruction patterns. The code should
have only used nla_len() as Dave noted to avoid this from the beginning.
Anyway, lets fix it by rejecting such load attempts.

Fixes: 7d1d65cb84e1 ("net: sched: cls_bpf: add BPF-based classifier")
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_bpf.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 84c8219c3e1c..49e5fa8795ae 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -180,6 +180,11 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 	}
 
 	bpf_size = bpf_len * sizeof(*bpf_ops);
+	if (bpf_size != nla_len(tb[TCA_BPF_OPS])) {
+		ret = -EINVAL;
+		goto errout;
+	}
+
 	bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
 	if (bpf_ops == NULL) {
 		ret = -ENOMEM;
-- 
cgit v1.2.3


From 3f2ab135946dcd4eb6af92a53d6d4bd35e7526ca Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Thu, 22 Jan 2015 10:41:02 +0100
Subject: net: cls_bpf: fix auto generation of per list handles

When creating a bpf classifier in tc with priority collisions and
invoking automatic unique handle assignment, cls_bpf_grab_new_handle()
will return a wrong handle id which in fact is non-unique. Usually
altering of specific filters is being addressed over major id, but
in case of collisions we result in a filter chain, where handle ids
address individual cls_bpf_progs inside the classifier.

Issue is, in cls_bpf_grab_new_handle() we probe for head->hgen handle
in cls_bpf_get() and in case we found a free handle, we're supposed
to use exactly head->hgen. In case of insufficient numbers of handles,
we bail out later as handle id 0 is not allowed.

Fixes: 7d1d65cb84e1 ("net: sched: cls_bpf: add BPF-based classifier")
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_bpf.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 49e5fa8795ae..f59adf8a4cd7 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -220,15 +220,21 @@ static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
 				   struct cls_bpf_head *head)
 {
 	unsigned int i = 0x80000000;
+	u32 handle;
 
 	do {
 		if (++head->hgen == 0x7FFFFFFF)
 			head->hgen = 1;
 	} while (--i > 0 && cls_bpf_get(tp, head->hgen));
-	if (i == 0)
+
+	if (unlikely(i == 0)) {
 		pr_err("Insufficient number of handles\n");
+		handle = 0;
+	} else {
+		handle = head->hgen;
+	}
 
-	return i;
+	return handle;
 }
 
 static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
-- 
cgit v1.2.3


From 1c1bc6bdb7f529eb3383b6e34a0ea327d7e9f615 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Thu, 22 Jan 2015 10:58:18 +0100
Subject: net: cls_basic: return from walking on match in basic_get

As soon as we've found a matching handle in basic_get(), we can
return it. There's no need to continue walking until the end of
a filter chain, since they are unique anyway.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Cc: Thomas Graf <tgraf@suug.ch>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_basic.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 5aed341406c2..fc399db86f11 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -65,9 +65,12 @@ static unsigned long basic_get(struct tcf_proto *tp, u32 handle)
 	if (head == NULL)
 		return 0UL;
 
-	list_for_each_entry(f, &head->flist, link)
-		if (f->handle == handle)
+	list_for_each_entry(f, &head->flist, link) {
+		if (f->handle == handle) {
 			l = (unsigned long) f;
+			break;
+		}
+	}
 
 	return l;
 }
-- 
cgit v1.2.3


From fd3e646c87ab3f2ba98aa25394581af27cc78dc5 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Thu, 22 Jan 2015 10:58:19 +0100
Subject: net: act_bpf: fix size mismatch on filter preparation

Similarly as in cls_bpf, also this code needs to reject mismatches.

Reference: http://article.gmane.org/gmane.linux.network/347406
Fixes: d23b8ad8ab23 ("tc: add BPF based action")
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_bpf.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 1bd257e473a9..82c5d7fc1988 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -122,6 +122,9 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 
 	bpf_size = bpf_num_ops * sizeof(*bpf_ops);
+	if (bpf_size != nla_len(tb[TCA_ACT_BPF_OPS]))
+		return -EINVAL;
+
 	bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
 	if (!bpf_ops)
 		return -ENOMEM;
-- 
cgit v1.2.3


From 3fa9cacd697eb26d99c59a8479d8a1b3d6311182 Mon Sep 17 00:00:00 2001
From: Erik Hugne <erik.hugne@ericsson.com>
Date: Thu, 22 Jan 2015 17:10:31 +0100
Subject: tipc: fix excessive network event logging

If a large number of namespaces is spawned on a node and TIPC is
enabled in each of these, the excessive printk tracing of network
events will cause the system to grind down to a near halt.
The traces are still of debug value, so instead of removing them
completely we fix it by changing the link state and node availability
logging debug traces.

Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 20 ++++++++++----------
 net/tipc/node.c | 22 +++++++++++-----------
 2 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 193bc1560677..2846ad802e43 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -538,8 +538,8 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
 			link_set_timer(l_ptr, cont_intv / 4);
 			break;
 		case RESET_MSG:
-			pr_info("%s<%s>, requested by peer\n", link_rst_msg,
-				l_ptr->name);
+			pr_debug("%s<%s>, requested by peer\n",
+				 link_rst_msg, l_ptr->name);
 			tipc_link_reset(l_ptr);
 			l_ptr->state = RESET_RESET;
 			l_ptr->fsm_msg_cnt = 0;
@@ -549,7 +549,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
 			link_set_timer(l_ptr, cont_intv);
 			break;
 		default:
-			pr_err("%s%u in WW state\n", link_unk_evt, event);
+			pr_debug("%s%u in WW state\n", link_unk_evt, event);
 		}
 		break;
 	case WORKING_UNKNOWN:
@@ -561,8 +561,8 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
 			link_set_timer(l_ptr, cont_intv);
 			break;
 		case RESET_MSG:
-			pr_info("%s<%s>, requested by peer while probing\n",
-				link_rst_msg, l_ptr->name);
+			pr_debug("%s<%s>, requested by peer while probing\n",
+				 link_rst_msg, l_ptr->name);
 			tipc_link_reset(l_ptr);
 			l_ptr->state = RESET_RESET;
 			l_ptr->fsm_msg_cnt = 0;
@@ -588,8 +588,8 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
 				l_ptr->fsm_msg_cnt++;
 				link_set_timer(l_ptr, cont_intv / 4);
 			} else {	/* Link has failed */
-				pr_warn("%s<%s>, peer not responding\n",
-					link_rst_msg, l_ptr->name);
+				pr_debug("%s<%s>, peer not responding\n",
+					 link_rst_msg, l_ptr->name);
 				tipc_link_reset(l_ptr);
 				l_ptr->state = RESET_UNKNOWN;
 				l_ptr->fsm_msg_cnt = 0;
@@ -1568,9 +1568,9 @@ static void tipc_link_proto_rcv(struct net *net, struct tipc_link *l_ptr,
 
 		if (msg_linkprio(msg) &&
 		    (msg_linkprio(msg) != l_ptr->priority)) {
-			pr_warn("%s<%s>, priority change %u->%u\n",
-				link_rst_msg, l_ptr->name, l_ptr->priority,
-				msg_linkprio(msg));
+			pr_debug("%s<%s>, priority change %u->%u\n",
+				 link_rst_msg, l_ptr->name,
+				 l_ptr->priority, msg_linkprio(msg));
 			l_ptr->priority = msg_linkprio(msg);
 			tipc_link_reset(l_ptr); /* Enforce change to take effect */
 			break;
diff --git a/net/tipc/node.c b/net/tipc/node.c
index b1eb0927bac8..ee5d33cfcf80 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -230,8 +230,8 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
 	n_ptr->action_flags |= TIPC_NOTIFY_LINK_UP;
 	n_ptr->link_id = l_ptr->peer_bearer_id << 16 | l_ptr->bearer_id;
 
-	pr_info("Established link <%s> on network plane %c\n",
-		l_ptr->name, l_ptr->net_plane);
+	pr_debug("Established link <%s> on network plane %c\n",
+		 l_ptr->name, l_ptr->net_plane);
 
 	if (!active[0]) {
 		active[0] = active[1] = l_ptr;
@@ -239,7 +239,7 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
 		goto exit;
 	}
 	if (l_ptr->priority < active[0]->priority) {
-		pr_info("New link <%s> becomes standby\n", l_ptr->name);
+		pr_debug("New link <%s> becomes standby\n", l_ptr->name);
 		goto exit;
 	}
 	tipc_link_dup_queue_xmit(active[0], l_ptr);
@@ -247,9 +247,9 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
 		active[0] = l_ptr;
 		goto exit;
 	}
-	pr_info("Old link <%s> becomes standby\n", active[0]->name);
+	pr_debug("Old link <%s> becomes standby\n", active[0]->name);
 	if (active[1] != active[0])
-		pr_info("Old link <%s> becomes standby\n", active[1]->name);
+		pr_debug("Old link <%s> becomes standby\n", active[1]->name);
 	active[0] = active[1] = l_ptr;
 exit:
 	/* Leave room for changeover header when returning 'mtu' to users: */
@@ -297,12 +297,12 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
 	n_ptr->link_id = l_ptr->peer_bearer_id << 16 | l_ptr->bearer_id;
 
 	if (!tipc_link_is_active(l_ptr)) {
-		pr_info("Lost standby link <%s> on network plane %c\n",
-			l_ptr->name, l_ptr->net_plane);
+		pr_debug("Lost standby link <%s> on network plane %c\n",
+			 l_ptr->name, l_ptr->net_plane);
 		return;
 	}
-	pr_info("Lost link <%s> on network plane %c\n",
-		l_ptr->name, l_ptr->net_plane);
+	pr_debug("Lost link <%s> on network plane %c\n",
+		 l_ptr->name, l_ptr->net_plane);
 
 	active = &n_ptr->active_links[0];
 	if (active[0] == l_ptr)
@@ -380,8 +380,8 @@ static void node_lost_contact(struct tipc_node *n_ptr)
 	char addr_string[16];
 	u32 i;
 
-	pr_info("Lost contact with %s\n",
-		tipc_addr_string_fill(addr_string, n_ptr->addr));
+	pr_debug("Lost contact with %s\n",
+		 tipc_addr_string_fill(addr_string, n_ptr->addr));
 
 	/* Flush broadcast link info associated with lost node */
 	if (n_ptr->bclink.recv_permitted) {
-- 
cgit v1.2.3


From 08bfc9cb76e26d2489c401fb1d2de58d06fd1ded Mon Sep 17 00:00:00 2001
From: Erik Hugne <erik.hugne@ericsson.com>
Date: Thu, 22 Jan 2015 17:10:32 +0100
Subject: flow_dissector: add tipc support

The flows are hashed on the sending node address, which allows us
to spread out the TIPC link processing to RPS enabled cores. There
is no point to include the destination address in the hash as that
will always be the same for all inbound links. We have experimented
with a 3-tuple hash over [srcnode, sport, dport], but this showed to
give slightly lower performance because of increased lock contention
when the same link was handled by multiple cores.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/flow_dissector.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'net')

diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 45084938c403..beb83d1ac1c6 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -178,6 +178,20 @@ ipv6:
 			return false;
 		}
 	}
+	case htons(ETH_P_TIPC): {
+		struct {
+			__be32 pre[3];
+			__be32 srcnode;
+		} *hdr, _hdr;
+		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
+		if (!hdr)
+			return false;
+		flow->src = hdr->srcnode;
+		flow->dst = 0;
+		flow->n_proto = proto;
+		flow->thoff = (u16)nhoff;
+		return true;
+	}
 	case htons(ETH_P_FCOE):
 		flow->thoff = (u16)(nhoff + FCOE_HEADER_LEN);
 		/* fall through */
-- 
cgit v1.2.3


From 600ddd6825543962fb807884169e57b580dba208 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Thu, 22 Jan 2015 18:26:54 +0100
Subject: net: sctp: fix slab corruption from use after free on INIT collisions

When hitting an INIT collision case during the 4WHS with AUTH enabled, as
already described in detail in commit 1be9a950c646 ("net: sctp: inherit
auth_capable on INIT collisions"), it can happen that we occasionally
still remotely trigger the following panic on server side which seems to
have been uncovered after the fix from commit 1be9a950c646 ...

[  533.876389] BUG: unable to handle kernel paging request at 00000000ffffffff
[  533.913657] IP: [<ffffffff811ac385>] __kmalloc+0x95/0x230
[  533.940559] PGD 5030f2067 PUD 0
[  533.957104] Oops: 0000 [#1] SMP
[  533.974283] Modules linked in: sctp mlx4_en [...]
[  534.939704] Call Trace:
[  534.951833]  [<ffffffff81294e30>] ? crypto_init_shash_ops+0x60/0xf0
[  534.984213]  [<ffffffff81294e30>] crypto_init_shash_ops+0x60/0xf0
[  535.015025]  [<ffffffff8128c8ed>] __crypto_alloc_tfm+0x6d/0x170
[  535.045661]  [<ffffffff8128d12c>] crypto_alloc_base+0x4c/0xb0
[  535.074593]  [<ffffffff8160bd42>] ? _raw_spin_lock_bh+0x12/0x50
[  535.105239]  [<ffffffffa0418c11>] sctp_inet_listen+0x161/0x1e0 [sctp]
[  535.138606]  [<ffffffff814e43bd>] SyS_listen+0x9d/0xb0
[  535.166848]  [<ffffffff816149a9>] system_call_fastpath+0x16/0x1b

... or depending on the the application, for example this one:

[ 1370.026490] BUG: unable to handle kernel paging request at 00000000ffffffff
[ 1370.026506] IP: [<ffffffff811ab455>] kmem_cache_alloc+0x75/0x1d0
[ 1370.054568] PGD 633c94067 PUD 0
[ 1370.070446] Oops: 0000 [#1] SMP
[ 1370.085010] Modules linked in: sctp kvm_amd kvm [...]
[ 1370.963431] Call Trace:
[ 1370.974632]  [<ffffffff8120f7cf>] ? SyS_epoll_ctl+0x53f/0x960
[ 1371.000863]  [<ffffffff8120f7cf>] SyS_epoll_ctl+0x53f/0x960
[ 1371.027154]  [<ffffffff812100d3>] ? anon_inode_getfile+0xd3/0x170
[ 1371.054679]  [<ffffffff811e3d67>] ? __alloc_fd+0xa7/0x130
[ 1371.080183]  [<ffffffff816149a9>] system_call_fastpath+0x16/0x1b

With slab debugging enabled, we can see that the poison has been overwritten:

[  669.826368] BUG kmalloc-128 (Tainted: G        W     ): Poison overwritten
[  669.826385] INFO: 0xffff880228b32e50-0xffff880228b32e50. First byte 0x6a instead of 0x6b
[  669.826414] INFO: Allocated in sctp_auth_create_key+0x23/0x50 [sctp] age=3 cpu=0 pid=18494
[  669.826424]  __slab_alloc+0x4bf/0x566
[  669.826433]  __kmalloc+0x280/0x310
[  669.826453]  sctp_auth_create_key+0x23/0x50 [sctp]
[  669.826471]  sctp_auth_asoc_create_secret+0xcb/0x1e0 [sctp]
[  669.826488]  sctp_auth_asoc_init_active_key+0x68/0xa0 [sctp]
[  669.826505]  sctp_do_sm+0x29d/0x17c0 [sctp] [...]
[  669.826629] INFO: Freed in kzfree+0x31/0x40 age=1 cpu=0 pid=18494
[  669.826635]  __slab_free+0x39/0x2a8
[  669.826643]  kfree+0x1d6/0x230
[  669.826650]  kzfree+0x31/0x40
[  669.826666]  sctp_auth_key_put+0x19/0x20 [sctp]
[  669.826681]  sctp_assoc_update+0x1ee/0x2d0 [sctp]
[  669.826695]  sctp_do_sm+0x674/0x17c0 [sctp]

Since this only triggers in some collision-cases with AUTH, the problem at
heart is that sctp_auth_key_put() on asoc->asoc_shared_key is called twice
when having refcnt 1, once directly in sctp_assoc_update() and yet again
from within sctp_auth_asoc_init_active_key() via sctp_assoc_update() on
the already kzfree'd memory, which is also consistent with the observation
of the poison decrease from 0x6b to 0x6a (note: the overwrite is detected
at a later point in time when poison is checked on new allocation).

Reference counting of auth keys revisited:

Shared keys for AUTH chunks are being stored in endpoints and associations
in endpoint_shared_keys list. On endpoint creation, a null key is being
added; on association creation, all endpoint shared keys are being cached
and thus cloned over to the association. struct sctp_shared_key only holds
a pointer to the actual key bytes, that is, struct sctp_auth_bytes which
keeps track of users internally through refcounting. Naturally, on assoc
or enpoint destruction, sctp_shared_key are being destroyed directly and
the reference on sctp_auth_bytes dropped.

User space can add keys to either list via setsockopt(2) through struct
sctp_authkey and by passing that to sctp_auth_set_key() which replaces or
adds a new auth key. There, sctp_auth_create_key() creates a new sctp_auth_bytes
with refcount 1 and in case of replacement drops the reference on the old
sctp_auth_bytes. A key can be set active from user space through setsockopt()
on the id via sctp_auth_set_active_key(), which iterates through either
endpoint_shared_keys and in case of an assoc, invokes (one of various places)
sctp_auth_asoc_init_active_key().

sctp_auth_asoc_init_active_key() computes the actual secret from local's
and peer's random, hmac and shared key parameters and returns a new key
directly as sctp_auth_bytes, that is asoc->asoc_shared_key, plus drops
the reference if there was a previous one. The secret, which where we
eventually double drop the ref comes from sctp_auth_asoc_set_secret() with
intitial refcount of 1, which also stays unchanged eventually in
sctp_assoc_update(). This key is later being used for crypto layer to
set the key for the hash in crypto_hash_setkey() from sctp_auth_calculate_hmac().

To close the loop: asoc->asoc_shared_key is freshly allocated secret
material and independant of the sctp_shared_key management keeping track
of only shared keys in endpoints and assocs. Hence, also commit 4184b2a79a76
("net: sctp: fix memory leak in auth key management") is independant of
this bug here since it concerns a different layer (though same structures
being used eventually). asoc->asoc_shared_key is reference dropped correctly
on assoc destruction in sctp_association_free() and when active keys are
being replaced in sctp_auth_asoc_init_active_key(), it always has a refcount
of 1. Hence, it's freed prematurely in sctp_assoc_update(). Simple fix is
to remove that sctp_auth_key_put() from there which fixes these panics.

Fixes: 730fc3d05cd4 ("[SCTP]: Implete SCTP-AUTH parameter processing")
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/associola.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index f791edd64d6c..26d06dbcc1c8 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1182,7 +1182,6 @@ void sctp_assoc_update(struct sctp_association *asoc,
 	asoc->peer.peer_hmacs = new->peer.peer_hmacs;
 	new->peer.peer_hmacs = NULL;
 
-	sctp_auth_key_put(asoc->asoc_shared_key);
 	sctp_auth_asoc_init_active_key(asoc, GFP_ATOMIC);
 }
 
-- 
cgit v1.2.3


From df4d92549f23e1c037e83323aff58a21b3de7fe0 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Fri, 23 Jan 2015 12:01:26 +0100
Subject: ipv4: try to cache dst_entries which would cause a redirect

Not caching dst_entries which cause redirects could be exploited by hosts
on the same subnet, causing a severe DoS attack. This effect aggravated
since commit f88649721268999 ("ipv4: fix dst race in sk_dst_get()").

Lookups causing redirects will be allocated with DST_NOCACHE set which
will force dst_release to free them via RCU.  Unfortunately waiting for
RCU grace period just takes too long, we can end up with >1M dst_entries
waiting to be released and the system will run OOM. rcuos threads cannot
catch up under high softirq load.

Attaching the flag to emit a redirect later on to the specific skb allows
us to cache those dst_entries thus reducing the pressure on allocation
and deallocation.

This issue was discovered by Marcelo Leitner.

Cc: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Marcelo Leitner <mleitner@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h      | 11 ++++++-----
 net/ipv4/ip_forward.c |  3 ++-
 net/ipv4/route.c      |  9 +++++----
 3 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/include/net/ip.h b/include/net/ip.h
index 0bb620702929..f7cbd703d15d 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -39,11 +39,12 @@ struct inet_skb_parm {
 	struct ip_options	opt;		/* Compiled IP options		*/
 	unsigned char		flags;
 
-#define IPSKB_FORWARDED		1
-#define IPSKB_XFRM_TUNNEL_SIZE	2
-#define IPSKB_XFRM_TRANSFORMED	4
-#define IPSKB_FRAG_COMPLETE	8
-#define IPSKB_REROUTED		16
+#define IPSKB_FORWARDED		BIT(0)
+#define IPSKB_XFRM_TUNNEL_SIZE	BIT(1)
+#define IPSKB_XFRM_TRANSFORMED	BIT(2)
+#define IPSKB_FRAG_COMPLETE	BIT(3)
+#define IPSKB_REROUTED		BIT(4)
+#define IPSKB_DOREDIRECT	BIT(5)
 
 	u16			frag_max_size;
 };
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 3a83ce5efa80..787b3c294ce6 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -129,7 +129,8 @@ int ip_forward(struct sk_buff *skb)
 	 *	We now generate an ICMP HOST REDIRECT giving the route
 	 *	we calculated.
 	 */
-	if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr && !skb_sec_path(skb))
+	if (IPCB(skb)->flags & IPSKB_DOREDIRECT && !opt->srr &&
+	    !skb_sec_path(skb))
 		ip_rt_send_redirect(skb);
 
 	skb->priority = rt_tos2priority(iph->tos);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6a2155b02602..d58dd0ec3e53 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1554,11 +1554,10 @@ static int __mkroute_input(struct sk_buff *skb,
 
 	do_cache = res->fi && !itag;
 	if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
+	    skb->protocol == htons(ETH_P_IP) &&
 	    (IN_DEV_SHARED_MEDIA(out_dev) ||
-	     inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) {
-		flags |= RTCF_DOREDIRECT;
-		do_cache = false;
-	}
+	     inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
+		IPCB(skb)->flags |= IPSKB_DOREDIRECT;
 
 	if (skb->protocol != htons(ETH_P_IP)) {
 		/* Not IP (i.e. ARP). Do not create route, if it is
@@ -2303,6 +2302,8 @@ static int rt_fill_info(struct net *net,  __be32 dst, __be32 src,
 	r->rtm_flags	= (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
 	if (rt->rt_flags & RTCF_NOTIFY)
 		r->rtm_flags |= RTM_F_NOTIFY;
+	if (IPCB(skb)->flags & IPSKB_DOREDIRECT)
+		r->rtm_flags |= RTCF_DOREDIRECT;
 
 	if (nla_put_be32(skb, RTA_DST, dst))
 		goto nla_put_failure;
-- 
cgit v1.2.3


From 86f3cddbc3037882414c7308973530167906b7e9 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 24 Jan 2015 08:02:40 +1100
Subject: udp_diag: Fix socket skipping within chain

While working on rhashtable walking I noticed that the UDP diag
dumping code is buggy.  In particular, the socket skipping within
a chain never happens, even though we record the number of sockets
that should be skipped.

As this code was supposedly copied from TCP, this patch does what
TCP does and resets num before we walk a chain.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp_diag.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 7927db0a9279..4a000f1dd757 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -99,11 +99,13 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlin
 	s_slot = cb->args[0];
 	num = s_num = cb->args[1];
 
-	for (slot = s_slot; slot <= table->mask; num = s_num = 0, slot++) {
+	for (slot = s_slot; slot <= table->mask; s_num = 0, slot++) {
 		struct sock *sk;
 		struct hlist_nulls_node *node;
 		struct udp_hslot *hslot = &table->hash[slot];
 
+		num = 0;
+
 		if (hlist_nulls_empty(&hslot->head))
 			continue;
 
-- 
cgit v1.2.3


From fc752f1f43c1c038a2c6ae58cc739ebb5953ccb0 Mon Sep 17 00:00:00 2001
From: "subashab@codeaurora.org" <subashab@codeaurora.org>
Date: Fri, 23 Jan 2015 22:26:02 +0000
Subject: ping: Fix race in free in receive path

An exception is seen in ICMP ping receive path where the skb
destructor sock_rfree() tries to access a freed socket. This happens
because ping_rcv() releases socket reference with sock_put() and this
internally frees up the socket. Later icmp_rcv() will try to free the
skb and as part of this, skb destructor is called and which leads
to a kernel panic as the socket is freed already in ping_rcv().

-->|exception
-007|sk_mem_uncharge
-007|sock_rfree
-008|skb_release_head_state
-009|skb_release_all
-009|__kfree_skb
-010|kfree_skb
-011|icmp_rcv
-012|ip_local_deliver_finish

Fix this incorrect free by cloning this skb and processing this cloned
skb instead.

This patch was suggested by Eric Dumazet

Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ping.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index c0d82f78d364..2a3720fb5a5f 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -966,8 +966,11 @@ bool ping_rcv(struct sk_buff *skb)
 
 	sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id));
 	if (sk != NULL) {
+		struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+
 		pr_debug("rcv on socket %p\n", sk);
-		ping_queue_rcv_skb(sk, skb_get(skb));
+		if (skb2)
+			ping_queue_rcv_skb(sk, skb2);
 		sock_put(sk);
 		return true;
 	}
-- 
cgit v1.2.3


From 6e9e16e6143b725662e47026a1d0f270721cdd24 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Mon, 26 Jan 2015 15:11:17 +0100
Subject: ipv6: replacing a rt6_info needs to purge possible propagated
 rt6_infos too

Lubomir Rintel reported that during replacing a route the interface
reference counter isn't correctly decremented.

To quote bug <https://bugzilla.kernel.org/show_bug.cgi?id=91941>:
| [root@rhel7-5 lkundrak]# sh -x lal
| + ip link add dev0 type dummy
| + ip link set dev0 up
| + ip link add dev1 type dummy
| + ip link set dev1 up
| + ip addr add 2001:db8:8086::2/64 dev dev0
| + ip route add 2001:db8:8086::/48 dev dev0 proto static metric 20
| + ip route add 2001:db8:8088::/48 dev dev1 proto static metric 10
| + ip route replace 2001:db8:8086::/48 dev dev1 proto static metric 20
| + ip link del dev0 type dummy
| Message from syslogd@rhel7-5 at Jan 23 10:54:41 ...
|  kernel:unregister_netdevice: waiting for dev0 to become free. Usage count = 2
|
| Message from syslogd@rhel7-5 at Jan 23 10:54:51 ...
|  kernel:unregister_netdevice: waiting for dev0 to become free. Usage count = 2

During replacement of a rt6_info we must walk all parent nodes and check
if the to be replaced rt6_info got propagated. If so, replace it with
an alive one.

Fixes: 4a287eba2de3957 ("IPv6 routing, NLM_F_* flag support: REPLACE and EXCL flags support, warn about missing CREATE flag")
Reported-by: Lubomir Rintel <lkundrak@v3.sk>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Tested-by: Lubomir Rintel <lkundrak@v3.sk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 45 ++++++++++++++++++++++++++-------------------
 1 file changed, 26 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index b2d1838897c9..f1c6d5e98322 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -659,6 +659,29 @@ static int fib6_commit_metrics(struct dst_entry *dst,
 	return 0;
 }
 
+static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
+			  struct net *net)
+{
+	if (atomic_read(&rt->rt6i_ref) != 1) {
+		/* This route is used as dummy address holder in some split
+		 * nodes. It is not leaked, but it still holds other resources,
+		 * which must be released in time. So, scan ascendant nodes
+		 * and replace dummy references to this route with references
+		 * to still alive ones.
+		 */
+		while (fn) {
+			if (!(fn->fn_flags & RTN_RTINFO) && fn->leaf == rt) {
+				fn->leaf = fib6_find_prefix(net, fn);
+				atomic_inc(&fn->leaf->rt6i_ref);
+				rt6_release(rt);
+			}
+			fn = fn->parent;
+		}
+		/* No more references are possible at this point. */
+		BUG_ON(atomic_read(&rt->rt6i_ref) != 1);
+	}
+}
+
 /*
  *	Insert routing information in a node.
  */
@@ -807,11 +830,12 @@ add:
 		rt->dst.rt6_next = iter->dst.rt6_next;
 		atomic_inc(&rt->rt6i_ref);
 		inet6_rt_notify(RTM_NEWROUTE, rt, info);
-		rt6_release(iter);
 		if (!(fn->fn_flags & RTN_RTINFO)) {
 			info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
 			fn->fn_flags |= RTN_RTINFO;
 		}
+		fib6_purge_rt(iter, fn, info->nl_net);
+		rt6_release(iter);
 	}
 
 	return 0;
@@ -1322,24 +1346,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
 		fn = fib6_repair_tree(net, fn);
 	}
 
-	if (atomic_read(&rt->rt6i_ref) != 1) {
-		/* This route is used as dummy address holder in some split
-		 * nodes. It is not leaked, but it still holds other resources,
-		 * which must be released in time. So, scan ascendant nodes
-		 * and replace dummy references to this route with references
-		 * to still alive ones.
-		 */
-		while (fn) {
-			if (!(fn->fn_flags & RTN_RTINFO) && fn->leaf == rt) {
-				fn->leaf = fib6_find_prefix(net, fn);
-				atomic_inc(&fn->leaf->rt6i_ref);
-				rt6_release(rt);
-			}
-			fn = fn->parent;
-		}
-		/* No more references are possible at this point. */
-		BUG_ON(atomic_read(&rt->rt6i_ref) != 1);
-	}
+	fib6_purge_rt(rt, fn, net);
 
 	inet6_rt_notify(RTM_DELROUTE, rt, info);
 	rt6_release(rt);
-- 
cgit v1.2.3


From 8ea65f4a2dfaaf494ef42a16cbf2fea39b07450f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 26 Jan 2015 14:02:56 +1100
Subject: netlink: Kill redundant net argument in netlink_insert

The socket already carries the net namespace with it so there is
no need to be passing another net around.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 7a94185bde6b..d77b3467b1d4 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -994,11 +994,10 @@ static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid,
 					 &netlink_compare, &arg);
 }
 
-static bool __netlink_insert(struct netlink_table *table, struct sock *sk,
-			     struct net *net)
+static bool __netlink_insert(struct netlink_table *table, struct sock *sk)
 {
 	struct netlink_compare_arg arg = {
-		.net = net,
+		.net = sock_net(sk),
 		.portid = nlk_sk(sk)->portid,
 	};
 
@@ -1047,7 +1046,7 @@ netlink_update_listeners(struct sock *sk)
 	 * makes sure updates are visible before bind or setsockopt return. */
 }
 
-static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
+static int netlink_insert(struct sock *sk, u32 portid)
 {
 	struct netlink_table *table = &nl_table[sk->sk_protocol];
 	int err;
@@ -1067,7 +1066,7 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
 	sock_hold(sk);
 
 	err = 0;
-	if (!__netlink_insert(table, sk, net)) {
+	if (!__netlink_insert(table, sk)) {
 		err = -EADDRINUSE;
 		sock_put(sk);
 	}
@@ -1289,7 +1288,7 @@ retry:
 	}
 	rcu_read_unlock();
 
-	err = netlink_insert(sk, net, portid);
+	err = netlink_insert(sk, portid);
 	if (err == -EADDRINUSE)
 		goto retry;
 
@@ -1477,7 +1476,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 
 	if (!nlk->portid) {
 		err = nladdr->nl_pid ?
-			netlink_insert(sk, net, nladdr->nl_pid) :
+			netlink_insert(sk, nladdr->nl_pid) :
 			netlink_autobind(sock);
 		if (err) {
 			netlink_undo_bind(nlk->ngroups, groups, sk);
@@ -2483,7 +2482,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
 	if (cfg && cfg->input)
 		nlk_sk(sk)->netlink_rcv = cfg->input;
 
-	if (netlink_insert(sk, net, 0))
+	if (netlink_insert(sk, 0))
 		goto out_sock_release;
 
 	nlk = nlk_sk(sk);
-- 
cgit v1.2.3


From 9120d94e8f9abd3eb9f00a5aaa6eca85cdf4f439 Mon Sep 17 00:00:00 2001
From: Luciano Coelho <luciano.coelho@intel.com>
Date: Sat, 24 Jan 2015 10:30:02 +0200
Subject: mac80211: handle potential race between suspend and scan completion

If suspend starts while ieee80211_scan_completed() is running, between
the point where SCAN_COMPLETED is set and the work is queued,
ieee80211_scan_cancel() will not catch the work and we may finish
suspending before the work is actually executed, leaving the scan
running while suspended.

To fix this race, queue the scan work during resume if the
SCAN_COMPLETED flag is set and flush it immediately.

Signed-off-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ieee80211_i.h |  3 ++-
 net/mac80211/util.c        | 12 ++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index bdb3e3bc7503..3afe36824703 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1752,7 +1752,8 @@ static inline int __ieee80211_resume(struct ieee80211_hw *hw)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 
-	WARN(test_bit(SCAN_HW_SCANNING, &local->scanning),
+	WARN(test_bit(SCAN_HW_SCANNING, &local->scanning) &&
+	     !test_bit(SCAN_COMPLETED, &local->scanning),
 		"%s: resume with hardware scan still in progress\n",
 		wiphy_name(hw->wiphy));
 
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index c65d03f3c167..8428f4a95479 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -2060,6 +2060,18 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 	mb();
 	local->resuming = false;
 
+	/* It's possible that we don't handle the scan completion in
+	 * time during suspend, so if it's still marked as completed
+	 * here, queue the work and flush it to clean things up.
+	 * Instead of calling the worker function directly here, we
+	 * really queue it to avoid potential races with other flows
+	 * scheduling the same work.
+	 */
+	if (test_bit(SCAN_COMPLETED, &local->scanning)) {
+		ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0);
+		flush_delayed_work(&local->scan_work);
+	}
+
 	if (local->open_count && !reconfig_due_to_wowlan)
 		drv_reconfig_complete(local, IEEE80211_RECONFIG_TYPE_SUSPEND);
 
-- 
cgit v1.2.3


From 37720569cc89205c23011d6b52f8c63ea2057c3d Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@qca.qualcomm.com>
Date: Sat, 24 Jan 2015 19:52:04 +0200
Subject: cfg80211: Fix BIP (AES-CMAC) cipher validation

This cipher can be used only as a group management frame cipher and as
such, there is no point in validating that it is not used with non-zero
key-index. Instead, verify that it is not used as a pairwise cipher
regardless of the key index.

Signed-off-by: Jouni Malinen <jouni@qca.qualcomm.com>
[change code to use switch statement which is easier to extend]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/util.c | 32 ++++++++++++++++++++------------
 1 file changed, 20 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/wireless/util.c b/net/wireless/util.c
index 3535e8ade48f..08f136ad2ea5 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -227,18 +227,26 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
 	if (pairwise && !mac_addr)
 		return -EINVAL;
 
-	/*
-	 * Disallow pairwise keys with non-zero index unless it's WEP
-	 * or a vendor specific cipher (because current deployments use
-	 * pairwise WEP keys with non-zero indices and for vendor specific
-	 * ciphers this should be validated in the driver or hardware level
-	 * - but 802.11i clearly specifies to use zero)
-	 */
-	if (pairwise && key_idx &&
-	    ((params->cipher == WLAN_CIPHER_SUITE_TKIP) ||
-	     (params->cipher == WLAN_CIPHER_SUITE_CCMP) ||
-	     (params->cipher == WLAN_CIPHER_SUITE_AES_CMAC)))
-		return -EINVAL;
+	switch (params->cipher) {
+	case WLAN_CIPHER_SUITE_TKIP:
+	case WLAN_CIPHER_SUITE_CCMP:
+		/* Disallow pairwise keys with non-zero index unless it's WEP
+		 * or a vendor specific cipher (because current deployments use
+		 * pairwise WEP keys with non-zero indices and for vendor
+		 * specific ciphers this should be validated in the driver or
+		 * hardware level - but 802.11i clearly specifies to use zero)
+		 */
+		if (pairwise && key_idx)
+			return -EINVAL;
+		break;
+	case WLAN_CIPHER_SUITE_AES_CMAC:
+		/* Disallow BIP (group-only) cipher as pairwise cipher */
+		if (pairwise)
+			return -EINVAL;
+		break;
+	default:
+		break;
+	}
 
 	switch (params->cipher) {
 	case WLAN_CIPHER_SUITE_WEP40:
-- 
cgit v1.2.3


From cfcf1682c4ca8f601a4702255958e0b1c9aa12cc Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@qca.qualcomm.com>
Date: Sat, 24 Jan 2015 19:52:05 +0200
Subject: cfg80211: Add new GCMP, CCMP-256, BIP-GMAC, BIP-CMAC-256 ciphers

This makes cfg80211 aware of the GCMP, GCMP-256, CCMP-256, BIP-GMAC-128,
BIP-GMAC-256, and BIP-CMAC-256 cipher suites. These new cipher suites
were defined in IEEE Std 802.11ac-2013.

Signed-off-by: Jouni Malinen <jouni@qca.qualcomm.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 18 ++++++++++++++++++
 net/wireless/util.c       | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)

(limited to 'net')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 4f4eea8a6288..dbf417bf25bf 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1994,9 +1994,15 @@ enum ieee80211_key_len {
 	WLAN_KEY_LEN_WEP40 = 5,
 	WLAN_KEY_LEN_WEP104 = 13,
 	WLAN_KEY_LEN_CCMP = 16,
+	WLAN_KEY_LEN_CCMP_256 = 32,
 	WLAN_KEY_LEN_TKIP = 32,
 	WLAN_KEY_LEN_AES_CMAC = 16,
 	WLAN_KEY_LEN_SMS4 = 32,
+	WLAN_KEY_LEN_GCMP = 16,
+	WLAN_KEY_LEN_GCMP_256 = 32,
+	WLAN_KEY_LEN_BIP_CMAC_256 = 32,
+	WLAN_KEY_LEN_BIP_GMAC_128 = 16,
+	WLAN_KEY_LEN_BIP_GMAC_256 = 32,
 };
 
 #define IEEE80211_WEP_IV_LEN		4
@@ -2004,9 +2010,16 @@ enum ieee80211_key_len {
 #define IEEE80211_CCMP_HDR_LEN		8
 #define IEEE80211_CCMP_MIC_LEN		8
 #define IEEE80211_CCMP_PN_LEN		6
+#define IEEE80211_CCMP_256_HDR_LEN	8
+#define IEEE80211_CCMP_256_MIC_LEN	16
+#define IEEE80211_CCMP_256_PN_LEN	6
 #define IEEE80211_TKIP_IV_LEN		8
 #define IEEE80211_TKIP_ICV_LEN		4
 #define IEEE80211_CMAC_PN_LEN		6
+#define IEEE80211_GMAC_PN_LEN		6
+#define IEEE80211_GCMP_HDR_LEN		8
+#define IEEE80211_GCMP_MIC_LEN		16
+#define IEEE80211_GCMP_PN_LEN		6
 
 /* Public action codes */
 enum ieee80211_pub_actioncode {
@@ -2230,6 +2243,11 @@ enum ieee80211_sa_query_action {
 #define WLAN_CIPHER_SUITE_WEP104	0x000FAC05
 #define WLAN_CIPHER_SUITE_AES_CMAC	0x000FAC06
 #define WLAN_CIPHER_SUITE_GCMP		0x000FAC08
+#define WLAN_CIPHER_SUITE_GCMP_256	0x000FAC09
+#define WLAN_CIPHER_SUITE_CCMP_256	0x000FAC0A
+#define WLAN_CIPHER_SUITE_BIP_GMAC_128	0x000FAC0B
+#define WLAN_CIPHER_SUITE_BIP_GMAC_256	0x000FAC0C
+#define WLAN_CIPHER_SUITE_BIP_CMAC_256	0x000FAC0D
 
 #define WLAN_CIPHER_SUITE_SMS4		0x00147201
 
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 08f136ad2ea5..919fee807dd9 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -230,6 +230,9 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
 	switch (params->cipher) {
 	case WLAN_CIPHER_SUITE_TKIP:
 	case WLAN_CIPHER_SUITE_CCMP:
+	case WLAN_CIPHER_SUITE_CCMP_256:
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
 		/* Disallow pairwise keys with non-zero index unless it's WEP
 		 * or a vendor specific cipher (because current deployments use
 		 * pairwise WEP keys with non-zero indices and for vendor
@@ -240,6 +243,9 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
 			return -EINVAL;
 		break;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
 		/* Disallow BIP (group-only) cipher as pairwise cipher */
 		if (pairwise)
 			return -EINVAL;
@@ -261,6 +267,18 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
 		if (params->key_len != WLAN_KEY_LEN_CCMP)
 			return -EINVAL;
 		break;
+	case WLAN_CIPHER_SUITE_CCMP_256:
+		if (params->key_len != WLAN_KEY_LEN_CCMP_256)
+			return -EINVAL;
+		break;
+	case WLAN_CIPHER_SUITE_GCMP:
+		if (params->key_len != WLAN_KEY_LEN_GCMP)
+			return -EINVAL;
+		break;
+	case WLAN_CIPHER_SUITE_GCMP_256:
+		if (params->key_len != WLAN_KEY_LEN_GCMP_256)
+			return -EINVAL;
+		break;
 	case WLAN_CIPHER_SUITE_WEP104:
 		if (params->key_len != WLAN_KEY_LEN_WEP104)
 			return -EINVAL;
@@ -269,6 +287,18 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
 		if (params->key_len != WLAN_KEY_LEN_AES_CMAC)
 			return -EINVAL;
 		break;
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
+		if (params->key_len != WLAN_KEY_LEN_BIP_CMAC_256)
+			return -EINVAL;
+		break;
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+		if (params->key_len != WLAN_KEY_LEN_BIP_GMAC_128)
+			return -EINVAL;
+		break;
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+		if (params->key_len != WLAN_KEY_LEN_BIP_GMAC_256)
+			return -EINVAL;
+		break;
 	default:
 		/*
 		 * We don't know anything about this algorithm,
@@ -288,7 +318,13 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
 			return -EINVAL;
 		case WLAN_CIPHER_SUITE_TKIP:
 		case WLAN_CIPHER_SUITE_CCMP:
+		case WLAN_CIPHER_SUITE_CCMP_256:
+		case WLAN_CIPHER_SUITE_GCMP:
+		case WLAN_CIPHER_SUITE_GCMP_256:
 		case WLAN_CIPHER_SUITE_AES_CMAC:
+		case WLAN_CIPHER_SUITE_BIP_CMAC_256:
+		case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+		case WLAN_CIPHER_SUITE_BIP_GMAC_256:
 			if (params->seq_len != 6)
 				return -EINVAL;
 			break;
-- 
cgit v1.2.3


From 00b9cfa3ff38401bd70c34b250ca13e5ea347b4a Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@qca.qualcomm.com>
Date: Sat, 24 Jan 2015 19:52:06 +0200
Subject: mac80111: Add GCMP and GCMP-256 ciphers

This allows mac80211 to configure GCMP and GCMP-256 to the driver and
also use software-implementation within mac80211 when the driver does
not support this with hardware accelaration.

Signed-off-by: Jouni Malinen <jouni@qca.qualcomm.com>
[remove a spurious newline]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h     |  15 ++-
 net/mac80211/Kconfig       |   1 +
 net/mac80211/Makefile      |   1 +
 net/mac80211/aes_gcm.c     |  95 +++++++++++++++++++
 net/mac80211/aes_gcm.h     |  22 +++++
 net/mac80211/cfg.c         |  13 +++
 net/mac80211/debugfs_key.c |  22 +++++
 net/mac80211/key.c         |  74 ++++++++++++++-
 net/mac80211/key.h         |  11 +++
 net/mac80211/main.c        |   9 +-
 net/mac80211/rx.c          |   4 +
 net/mac80211/tx.c          |   5 +
 net/mac80211/wpa.c         | 224 +++++++++++++++++++++++++++++++++++++++++++++
 net/mac80211/wpa.h         |   5 +
 14 files changed, 492 insertions(+), 9 deletions(-)
 create mode 100644 net/mac80211/aes_gcm.c
 create mode 100644 net/mac80211/aes_gcm.h

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 866073e27ea2..ae6638436112 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1294,8 +1294,8 @@ struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev);
  * @IEEE80211_KEY_FLAG_PAIRWISE: Set by mac80211, this flag indicates
  *	that the key is pairwise rather then a shared key.
  * @IEEE80211_KEY_FLAG_SW_MGMT_TX: This flag should be set by the driver for a
- *	CCMP key if it requires CCMP encryption of management frames (MFP) to
- *	be done in software.
+ *	CCMP/GCMP key if it requires CCMP/GCMP encryption of management frames
+ *	(MFP) to be done in software.
  * @IEEE80211_KEY_FLAG_PUT_IV_SPACE: This flag should be set by the driver
  *	if space should be prepared for the IV, but the IV
  *	itself should not be generated. Do not set together with
@@ -1310,7 +1310,7 @@ struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev);
  *	RX, if your crypto engine can't deal with TX you can also set the
  *	%IEEE80211_KEY_FLAG_SW_MGMT_TX flag to encrypt such frames in SW.
  * @IEEE80211_KEY_FLAG_GENERATE_IV_MGMT: This flag should be set by the
- *	driver for a CCMP key to indicate that is requires IV generation
+ *	driver for a CCMP/GCMP key to indicate that is requires IV generation
  *	only for managment frames (MFP).
  * @IEEE80211_KEY_FLAG_RESERVE_TAILROOM: This flag should be set by the
  *	driver for a key to indicate that sufficient tailroom must always
@@ -4098,6 +4098,8 @@ void ieee80211_aes_cmac_calculate_k1_k2(struct ieee80211_key_conf *keyconf,
  *	reverse order than in packet)
  * @aes_cmac: PN data, most significant byte first (big endian,
  *	reverse order than in packet)
+ * @gcmp: PN data, most significant byte first (big endian,
+ *	reverse order than in packet)
  */
 struct ieee80211_key_seq {
 	union {
@@ -4111,6 +4113,9 @@ struct ieee80211_key_seq {
 		struct {
 			u8 pn[6];
 		} aes_cmac;
+		struct {
+			u8 pn[6];
+		} gcmp;
 	};
 };
 
@@ -4135,7 +4140,7 @@ void ieee80211_get_key_tx_seq(struct ieee80211_key_conf *keyconf,
  * ieee80211_get_key_rx_seq - get key RX sequence counter
  *
  * @keyconf: the parameter passed with the set key
- * @tid: The TID, or -1 for the management frame value (CCMP only);
+ * @tid: The TID, or -1 for the management frame value (CCMP/GCMP only);
  *	the value on TID 0 is also used for non-QoS frames. For
  *	CMAC, only TID 0 is valid.
  * @seq: buffer to receive the sequence data
@@ -4171,7 +4176,7 @@ void ieee80211_set_key_tx_seq(struct ieee80211_key_conf *keyconf,
  * ieee80211_set_key_rx_seq - set key RX sequence counter
  *
  * @keyconf: the parameter passed with the set key
- * @tid: The TID, or -1 for the management frame value (CCMP only);
+ * @tid: The TID, or -1 for the management frame value (CCMP/GCMP only);
  *	the value on TID 0 is also used for non-QoS frames. For
  *	CMAC, only TID 0 is valid.
  * @seq: new sequence data
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 75cc6801a431..64a012a0c6e5 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -5,6 +5,7 @@ config MAC80211
 	select CRYPTO_ARC4
 	select CRYPTO_AES
 	select CRYPTO_CCM
+	select CRYPTO_GCM
 	select CRC32
 	select AVERAGE
 	---help---
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index e53671b1105e..0cbf93618433 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -15,6 +15,7 @@ mac80211-y := \
 	michael.o \
 	tkip.o \
 	aes_ccm.o \
+	aes_gcm.o \
 	aes_cmac.o \
 	cfg.o \
 	ethtool.o \
diff --git a/net/mac80211/aes_gcm.c b/net/mac80211/aes_gcm.c
new file mode 100644
index 000000000000..c2bf6698d738
--- /dev/null
+++ b/net/mac80211/aes_gcm.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2014-2015, Qualcomm Atheros, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <crypto/aes.h>
+
+#include <net/mac80211.h>
+#include "key.h"
+#include "aes_gcm.h"
+
+void ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
+			       u8 *data, size_t data_len, u8 *mic)
+{
+	struct scatterlist assoc, pt, ct[2];
+
+	char aead_req_data[sizeof(struct aead_request) +
+			   crypto_aead_reqsize(tfm)]
+		__aligned(__alignof__(struct aead_request));
+	struct aead_request *aead_req = (void *)aead_req_data;
+
+	memset(aead_req, 0, sizeof(aead_req_data));
+
+	sg_init_one(&pt, data, data_len);
+	sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad));
+	sg_init_table(ct, 2);
+	sg_set_buf(&ct[0], data, data_len);
+	sg_set_buf(&ct[1], mic, IEEE80211_GCMP_MIC_LEN);
+
+	aead_request_set_tfm(aead_req, tfm);
+	aead_request_set_assoc(aead_req, &assoc, assoc.length);
+	aead_request_set_crypt(aead_req, &pt, ct, data_len, j_0);
+
+	crypto_aead_encrypt(aead_req);
+}
+
+int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
+			      u8 *data, size_t data_len, u8 *mic)
+{
+	struct scatterlist assoc, pt, ct[2];
+	char aead_req_data[sizeof(struct aead_request) +
+			   crypto_aead_reqsize(tfm)]
+		__aligned(__alignof__(struct aead_request));
+	struct aead_request *aead_req = (void *)aead_req_data;
+
+	if (data_len == 0)
+		return -EINVAL;
+
+	memset(aead_req, 0, sizeof(aead_req_data));
+
+	sg_init_one(&pt, data, data_len);
+	sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad));
+	sg_init_table(ct, 2);
+	sg_set_buf(&ct[0], data, data_len);
+	sg_set_buf(&ct[1], mic, IEEE80211_GCMP_MIC_LEN);
+
+	aead_request_set_tfm(aead_req, tfm);
+	aead_request_set_assoc(aead_req, &assoc, assoc.length);
+	aead_request_set_crypt(aead_req, ct, &pt,
+			       data_len + IEEE80211_GCMP_MIC_LEN, j_0);
+
+	return crypto_aead_decrypt(aead_req);
+}
+
+struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[],
+							size_t key_len)
+{
+	struct crypto_aead *tfm;
+	int err;
+
+	tfm = crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm))
+		return tfm;
+
+	err = crypto_aead_setkey(tfm, key, key_len);
+	if (!err)
+		err = crypto_aead_setauthsize(tfm, IEEE80211_GCMP_MIC_LEN);
+	if (!err)
+		return tfm;
+
+	crypto_free_aead(tfm);
+	return ERR_PTR(err);
+}
+
+void ieee80211_aes_gcm_key_free(struct crypto_aead *tfm)
+{
+	crypto_free_aead(tfm);
+}
diff --git a/net/mac80211/aes_gcm.h b/net/mac80211/aes_gcm.h
new file mode 100644
index 000000000000..1347fda6b76a
--- /dev/null
+++ b/net/mac80211/aes_gcm.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright 2014-2015, Qualcomm Atheros, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef AES_GCM_H
+#define AES_GCM_H
+
+#include <linux/crypto.h>
+
+void ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
+			       u8 *data, size_t data_len, u8 *mic);
+int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
+			      u8 *data, size_t data_len, u8 *mic);
+struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[],
+							size_t key_len);
+void ieee80211_aes_gcm_key_free(struct crypto_aead *tfm);
+
+#endif /* AES_GCM_H */
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index a777114d663b..1c1d061cff56 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -164,6 +164,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 	case WLAN_CIPHER_SUITE_CCMP:
 	case WLAN_CIPHER_SUITE_AES_CMAC:
 	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
 		break;
 	default:
 		cs = ieee80211_cs_get(local, params->cipher, sdata->vif.type);
@@ -369,6 +370,18 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
 		params.seq = seq;
 		params.seq_len = 6;
 		break;
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
+		pn64 = atomic64_read(&key->u.gcmp.tx_pn);
+		seq[0] = pn64;
+		seq[1] = pn64 >> 8;
+		seq[2] = pn64 >> 16;
+		seq[3] = pn64 >> 24;
+		seq[4] = pn64 >> 32;
+		seq[5] = pn64 >> 40;
+		params.seq = seq;
+		params.seq_len = 6;
+		break;
 	}
 
 	params.key = key->conf.key;
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index 5523b94c7c90..0e223e602296 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -105,6 +105,13 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf,
 				(u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24),
 				(u8)(pn >> 16), (u8)(pn >> 8), (u8)pn);
 		break;
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
+		pn = atomic64_read(&key->u.gcmp.tx_pn);
+		len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
+				(u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24),
+				(u8)(pn >> 16), (u8)(pn >> 8), (u8)pn);
+		break;
 	default:
 		return 0;
 	}
@@ -151,6 +158,17 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
 			       rpn[3], rpn[4], rpn[5]);
 		len = p - buf;
 		break;
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
+		for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) {
+			rpn = key->u.gcmp.rx_pn[i];
+			p += scnprintf(p, sizeof(buf)+buf-p,
+				       "%02x%02x%02x%02x%02x%02x\n",
+				       rpn[0], rpn[1], rpn[2],
+				       rpn[3], rpn[4], rpn[5]);
+		}
+		len = p - buf;
+		break;
 	default:
 		return 0;
 	}
@@ -173,6 +191,10 @@ static ssize_t key_replays_read(struct file *file, char __user *userbuf,
 		len = scnprintf(buf, sizeof(buf), "%u\n",
 				key->u.aes_cmac.replays);
 		break;
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
+		len = scnprintf(buf, sizeof(buf), "%u\n", key->u.gcmp.replays);
+		break;
 	default:
 		return 0;
 	}
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 5167c53aa15f..cbee2f5180ce 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -24,6 +24,7 @@
 #include "debugfs_key.h"
 #include "aes_ccm.h"
 #include "aes_cmac.h"
+#include "aes_gcm.h"
 
 
 /**
@@ -163,6 +164,8 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 	case WLAN_CIPHER_SUITE_TKIP:
 	case WLAN_CIPHER_SUITE_CCMP:
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
 		/* all of these we can do in software - if driver can */
 		if (ret == 1)
 			return 0;
@@ -412,6 +415,25 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
 			return ERR_PTR(err);
 		}
 		break;
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
+		key->conf.iv_len = IEEE80211_GCMP_HDR_LEN;
+		key->conf.icv_len = IEEE80211_GCMP_MIC_LEN;
+		for (i = 0; seq && i < IEEE80211_NUM_TIDS + 1; i++)
+			for (j = 0; j < IEEE80211_GCMP_PN_LEN; j++)
+				key->u.gcmp.rx_pn[i][j] =
+					seq[IEEE80211_GCMP_PN_LEN - j - 1];
+		/* Initialize AES key state here as an optimization so that
+		 * it does not need to be initialized for every packet.
+		 */
+		key->u.gcmp.tfm = ieee80211_aes_gcm_key_setup_encrypt(key_data,
+								      key_len);
+		if (IS_ERR(key->u.gcmp.tfm)) {
+			err = PTR_ERR(key->u.gcmp.tfm);
+			kfree(key);
+			return ERR_PTR(err);
+		}
+		break;
 	default:
 		if (cs) {
 			size_t len = (seq_len > MAX_PN_LEN) ?
@@ -433,10 +455,18 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
 
 static void ieee80211_key_free_common(struct ieee80211_key *key)
 {
-	if (key->conf.cipher == WLAN_CIPHER_SUITE_CCMP)
+	switch (key->conf.cipher) {
+	case WLAN_CIPHER_SUITE_CCMP:
 		ieee80211_aes_key_free(key->u.ccmp.tfm);
-	if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC)
+		break;
+	case WLAN_CIPHER_SUITE_AES_CMAC:
 		ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm);
+		break;
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
+		ieee80211_aes_gcm_key_free(key->u.gcmp.tfm);
+		break;
+	}
 	kzfree(key);
 }
 
@@ -760,6 +790,16 @@ void ieee80211_get_key_tx_seq(struct ieee80211_key_conf *keyconf,
 		seq->ccmp.pn[1] = pn64 >> 32;
 		seq->ccmp.pn[0] = pn64 >> 40;
 		break;
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
+		pn64 = atomic64_read(&key->u.gcmp.tx_pn);
+		seq->gcmp.pn[5] = pn64;
+		seq->gcmp.pn[4] = pn64 >> 8;
+		seq->gcmp.pn[3] = pn64 >> 16;
+		seq->gcmp.pn[2] = pn64 >> 24;
+		seq->gcmp.pn[1] = pn64 >> 32;
+		seq->gcmp.pn[0] = pn64 >> 40;
+		break;
 	default:
 		WARN_ON(1);
 	}
@@ -796,6 +836,16 @@ void ieee80211_get_key_rx_seq(struct ieee80211_key_conf *keyconf,
 		pn = key->u.aes_cmac.rx_pn;
 		memcpy(seq->aes_cmac.pn, pn, IEEE80211_CMAC_PN_LEN);
 		break;
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
+		if (WARN_ON(tid < -1 || tid >= IEEE80211_NUM_TIDS))
+			return;
+		if (tid < 0)
+			pn = key->u.gcmp.rx_pn[IEEE80211_NUM_TIDS];
+		else
+			pn = key->u.gcmp.rx_pn[tid];
+		memcpy(seq->gcmp.pn, pn, IEEE80211_GCMP_PN_LEN);
+		break;
 	}
 }
 EXPORT_SYMBOL(ieee80211_get_key_rx_seq);
@@ -831,6 +881,16 @@ void ieee80211_set_key_tx_seq(struct ieee80211_key_conf *keyconf,
 		       ((u64)seq->aes_cmac.pn[0] << 40);
 		atomic64_set(&key->u.aes_cmac.tx_pn, pn64);
 		break;
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
+		pn64 = (u64)seq->gcmp.pn[5] |
+		       ((u64)seq->gcmp.pn[4] << 8) |
+		       ((u64)seq->gcmp.pn[3] << 16) |
+		       ((u64)seq->gcmp.pn[2] << 24) |
+		       ((u64)seq->gcmp.pn[1] << 32) |
+		       ((u64)seq->gcmp.pn[0] << 40);
+		atomic64_set(&key->u.gcmp.tx_pn, pn64);
+		break;
 	default:
 		WARN_ON(1);
 		break;
@@ -868,6 +928,16 @@ void ieee80211_set_key_rx_seq(struct ieee80211_key_conf *keyconf,
 		pn = key->u.aes_cmac.rx_pn;
 		memcpy(pn, seq->aes_cmac.pn, IEEE80211_CMAC_PN_LEN);
 		break;
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
+		if (WARN_ON(tid < -1 || tid >= IEEE80211_NUM_TIDS))
+			return;
+		if (tid < 0)
+			pn = key->u.gcmp.rx_pn[IEEE80211_NUM_TIDS];
+		else
+			pn = key->u.gcmp.rx_pn[tid];
+		memcpy(pn, seq->gcmp.pn, IEEE80211_GCMP_PN_LEN);
+		break;
 	default:
 		WARN_ON(1);
 		break;
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index 19db68663d75..27580da851c8 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -94,6 +94,17 @@ struct ieee80211_key {
 			u32 replays; /* dot11RSNAStatsCMACReplays */
 			u32 icverrors; /* dot11RSNAStatsCMACICVErrors */
 		} aes_cmac;
+		struct {
+			atomic64_t tx_pn;
+			/* Last received packet number. The first
+			 * IEEE80211_NUM_TIDS counters are used with Data
+			 * frames and the last counter is used with Robust
+			 * Management frames.
+			 */
+			u8 rx_pn[IEEE80211_NUM_TIDS + 1][IEEE80211_GCMP_PN_LEN];
+			struct crypto_aead *tfm;
+			u32 replays; /* dot11RSNAStatsGCMPReplays */
+		} gcmp;
 		struct {
 			/* generic cipher scheme */
 			u8 rx_pn[IEEE80211_NUM_TIDS + 1][MAX_PN_LEN];
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index ea6b82ac4f0b..7223b4e16752 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -666,6 +666,8 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 		WLAN_CIPHER_SUITE_WEP104,
 		WLAN_CIPHER_SUITE_TKIP,
 		WLAN_CIPHER_SUITE_CCMP,
+		WLAN_CIPHER_SUITE_GCMP,
+		WLAN_CIPHER_SUITE_GCMP_256,
 
 		/* keep last -- depends on hw flags! */
 		WLAN_CIPHER_SUITE_AES_CMAC
@@ -724,9 +726,10 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 		/* Driver specifies cipher schemes only (but not cipher suites
 		 * including the schemes)
 		 *
-		 * We start counting ciphers defined by schemes, TKIP and CCMP
+		 * We start counting ciphers defined by schemes, TKIP, CCMP,
+		 * GCMP, and GCMP-256
 		 */
-		n_suites = local->hw.n_cipher_schemes + 2;
+		n_suites = local->hw.n_cipher_schemes + 4;
 
 		/* check if we have WEP40 and WEP104 */
 		if (have_wep)
@@ -742,6 +745,8 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 
 		suites[w++] = WLAN_CIPHER_SUITE_CCMP;
 		suites[w++] = WLAN_CIPHER_SUITE_TKIP;
+		suites[w++] = WLAN_CIPHER_SUITE_GCMP;
+		suites[w++] = WLAN_CIPHER_SUITE_GCMP_256;
 
 		if (have_wep) {
 			suites[w++] = WLAN_CIPHER_SUITE_WEP40;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index ed516ae80a3b..a11d2518c365 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1655,6 +1655,10 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 	case WLAN_CIPHER_SUITE_AES_CMAC:
 		result = ieee80211_crypto_aes_cmac_decrypt(rx);
 		break;
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
+		result = ieee80211_crypto_gcmp_decrypt(rx);
+		break;
 	default:
 		result = ieee80211_crypto_hw_decrypt(rx);
 	}
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 02ed6f60629a..e4c6fbc4bf7a 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -626,6 +626,8 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 				tx->key = NULL;
 			break;
 		case WLAN_CIPHER_SUITE_CCMP:
+		case WLAN_CIPHER_SUITE_GCMP:
+		case WLAN_CIPHER_SUITE_GCMP_256:
 			if (!ieee80211_is_data_present(hdr->frame_control) &&
 			    !ieee80211_use_mfp(hdr->frame_control, tx->sta,
 					       tx->skb))
@@ -1014,6 +1016,9 @@ ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx)
 		return ieee80211_crypto_ccmp_encrypt(tx);
 	case WLAN_CIPHER_SUITE_AES_CMAC:
 		return ieee80211_crypto_aes_cmac_encrypt(tx);
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
+		return ieee80211_crypto_gcmp_encrypt(tx);
 	default:
 		return ieee80211_crypto_hw_encrypt(tx);
 	}
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 12398fde02e8..96b65c240109 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -22,6 +22,7 @@
 #include "tkip.h"
 #include "aes_ccm.h"
 #include "aes_cmac.h"
+#include "aes_gcm.h"
 #include "wpa.h"
 
 ieee80211_tx_result
@@ -546,6 +547,229 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
 	return RX_CONTINUE;
 }
 
+static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad)
+{
+	__le16 mask_fc;
+	u8 qos_tid;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+
+	memcpy(j_0, hdr->addr2, ETH_ALEN);
+	memcpy(&j_0[ETH_ALEN], pn, IEEE80211_GCMP_PN_LEN);
+	j_0[13] = 0;
+	j_0[14] = 0;
+	j_0[AES_BLOCK_SIZE - 1] = 0x01;
+
+	/* AAD (extra authenticate-only data) / masked 802.11 header
+	 * FC | A1 | A2 | A3 | SC | [A4] | [QC]
+	 */
+	put_unaligned_be16(ieee80211_hdrlen(hdr->frame_control) - 2, &aad[0]);
+	/* Mask FC: zero subtype b4 b5 b6 (if not mgmt)
+	 * Retry, PwrMgt, MoreData; set Protected
+	 */
+	mask_fc = hdr->frame_control;
+	mask_fc &= ~cpu_to_le16(IEEE80211_FCTL_RETRY |
+				IEEE80211_FCTL_PM | IEEE80211_FCTL_MOREDATA);
+	if (!ieee80211_is_mgmt(hdr->frame_control))
+		mask_fc &= ~cpu_to_le16(0x0070);
+	mask_fc |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
+
+	put_unaligned(mask_fc, (__le16 *)&aad[2]);
+	memcpy(&aad[4], &hdr->addr1, 3 * ETH_ALEN);
+
+	/* Mask Seq#, leave Frag# */
+	aad[22] = *((u8 *)&hdr->seq_ctrl) & 0x0f;
+	aad[23] = 0;
+
+	if (ieee80211_is_data_qos(hdr->frame_control))
+		qos_tid = *ieee80211_get_qos_ctl(hdr) &
+			IEEE80211_QOS_CTL_TID_MASK;
+	else
+		qos_tid = 0;
+
+	if (ieee80211_has_a4(hdr->frame_control)) {
+		memcpy(&aad[24], hdr->addr4, ETH_ALEN);
+		aad[30] = qos_tid;
+		aad[31] = 0;
+	} else {
+		memset(&aad[24], 0, ETH_ALEN + IEEE80211_QOS_CTL_LEN);
+		aad[24] = qos_tid;
+	}
+}
+
+static inline void gcmp_pn2hdr(u8 *hdr, const u8 *pn, int key_id)
+{
+	hdr[0] = pn[5];
+	hdr[1] = pn[4];
+	hdr[2] = 0;
+	hdr[3] = 0x20 | (key_id << 6);
+	hdr[4] = pn[3];
+	hdr[5] = pn[2];
+	hdr[6] = pn[1];
+	hdr[7] = pn[0];
+}
+
+static inline void gcmp_hdr2pn(u8 *pn, const u8 *hdr)
+{
+	pn[0] = hdr[7];
+	pn[1] = hdr[6];
+	pn[2] = hdr[5];
+	pn[3] = hdr[4];
+	pn[4] = hdr[1];
+	pn[5] = hdr[0];
+}
+
+static int gcmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
+{
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+	struct ieee80211_key *key = tx->key;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	int hdrlen, len, tail;
+	u8 *pos;
+	u8 pn[6];
+	u64 pn64;
+	u8 aad[2 * AES_BLOCK_SIZE];
+	u8 j_0[AES_BLOCK_SIZE];
+
+	if (info->control.hw_key &&
+	    !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_GENERATE_IV) &&
+	    !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE) &&
+	    !((info->control.hw_key->flags &
+	       IEEE80211_KEY_FLAG_GENERATE_IV_MGMT) &&
+	      ieee80211_is_mgmt(hdr->frame_control))) {
+		/* hwaccel has no need for preallocated room for GCMP
+		 * header or MIC fields
+		 */
+		return 0;
+	}
+
+	hdrlen = ieee80211_hdrlen(hdr->frame_control);
+	len = skb->len - hdrlen;
+
+	if (info->control.hw_key)
+		tail = 0;
+	else
+		tail = IEEE80211_GCMP_MIC_LEN;
+
+	if (WARN_ON(skb_tailroom(skb) < tail ||
+		    skb_headroom(skb) < IEEE80211_GCMP_HDR_LEN))
+		return -1;
+
+	pos = skb_push(skb, IEEE80211_GCMP_HDR_LEN);
+	memmove(pos, pos + IEEE80211_GCMP_HDR_LEN, hdrlen);
+	skb_set_network_header(skb, skb_network_offset(skb) +
+				    IEEE80211_GCMP_HDR_LEN);
+
+	/* the HW only needs room for the IV, but not the actual IV */
+	if (info->control.hw_key &&
+	    (info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE))
+		return 0;
+
+	hdr = (struct ieee80211_hdr *)pos;
+	pos += hdrlen;
+
+	pn64 = atomic64_inc_return(&key->u.gcmp.tx_pn);
+
+	pn[5] = pn64;
+	pn[4] = pn64 >> 8;
+	pn[3] = pn64 >> 16;
+	pn[2] = pn64 >> 24;
+	pn[1] = pn64 >> 32;
+	pn[0] = pn64 >> 40;
+
+	gcmp_pn2hdr(pos, pn, key->conf.keyidx);
+
+	/* hwaccel - with software GCMP header */
+	if (info->control.hw_key)
+		return 0;
+
+	pos += IEEE80211_GCMP_HDR_LEN;
+	gcmp_special_blocks(skb, pn, j_0, aad);
+	ieee80211_aes_gcm_encrypt(key->u.gcmp.tfm, j_0, aad, pos, len,
+				  skb_put(skb, IEEE80211_GCMP_MIC_LEN));
+
+	return 0;
+}
+
+ieee80211_tx_result
+ieee80211_crypto_gcmp_encrypt(struct ieee80211_tx_data *tx)
+{
+	struct sk_buff *skb;
+
+	ieee80211_tx_set_protected(tx);
+
+	skb_queue_walk(&tx->skbs, skb) {
+		if (gcmp_encrypt_skb(tx, skb) < 0)
+			return TX_DROP;
+	}
+
+	return TX_CONTINUE;
+}
+
+ieee80211_rx_result
+ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx)
+{
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
+	int hdrlen;
+	struct ieee80211_key *key = rx->key;
+	struct sk_buff *skb = rx->skb;
+	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
+	u8 pn[IEEE80211_GCMP_PN_LEN];
+	int data_len;
+	int queue;
+
+	hdrlen = ieee80211_hdrlen(hdr->frame_control);
+
+	if (!ieee80211_is_data(hdr->frame_control) &&
+	    !ieee80211_is_robust_mgmt_frame(skb))
+		return RX_CONTINUE;
+
+	data_len = skb->len - hdrlen - IEEE80211_GCMP_HDR_LEN -
+		   IEEE80211_GCMP_MIC_LEN;
+	if (!rx->sta || data_len < 0)
+		return RX_DROP_UNUSABLE;
+
+	if (status->flag & RX_FLAG_DECRYPTED) {
+		if (!pskb_may_pull(rx->skb, hdrlen + IEEE80211_GCMP_HDR_LEN))
+			return RX_DROP_UNUSABLE;
+	} else {
+		if (skb_linearize(rx->skb))
+			return RX_DROP_UNUSABLE;
+	}
+
+	gcmp_hdr2pn(pn, skb->data + hdrlen);
+
+	queue = rx->security_idx;
+
+	if (memcmp(pn, key->u.gcmp.rx_pn[queue], IEEE80211_GCMP_PN_LEN) <= 0) {
+		key->u.gcmp.replays++;
+		return RX_DROP_UNUSABLE;
+	}
+
+	if (!(status->flag & RX_FLAG_DECRYPTED)) {
+		u8 aad[2 * AES_BLOCK_SIZE];
+		u8 j_0[AES_BLOCK_SIZE];
+		/* hardware didn't decrypt/verify MIC */
+		gcmp_special_blocks(skb, pn, j_0, aad);
+
+		if (ieee80211_aes_gcm_decrypt(
+			    key->u.gcmp.tfm, j_0, aad,
+			    skb->data + hdrlen + IEEE80211_GCMP_HDR_LEN,
+			    data_len,
+			    skb->data + skb->len - IEEE80211_GCMP_MIC_LEN))
+			return RX_DROP_UNUSABLE;
+	}
+
+	memcpy(key->u.gcmp.rx_pn[queue], pn, IEEE80211_GCMP_PN_LEN);
+
+	/* Remove GCMP header and MIC */
+	if (pskb_trim(skb, skb->len - IEEE80211_GCMP_MIC_LEN))
+		return RX_DROP_UNUSABLE;
+	memmove(skb->data + IEEE80211_GCMP_HDR_LEN, skb->data, hdrlen);
+	skb_pull(skb, IEEE80211_GCMP_HDR_LEN);
+
+	return RX_CONTINUE;
+}
+
 static ieee80211_tx_result
 ieee80211_crypto_cs_encrypt(struct ieee80211_tx_data *tx,
 			    struct sk_buff *skb)
diff --git a/net/mac80211/wpa.h b/net/mac80211/wpa.h
index 62e5a12dfe0a..ea955f278351 100644
--- a/net/mac80211/wpa.h
+++ b/net/mac80211/wpa.h
@@ -37,4 +37,9 @@ ieee80211_crypto_hw_encrypt(struct ieee80211_tx_data *tx);
 ieee80211_rx_result
 ieee80211_crypto_hw_decrypt(struct ieee80211_rx_data *rx);
 
+ieee80211_tx_result
+ieee80211_crypto_gcmp_encrypt(struct ieee80211_tx_data *tx);
+ieee80211_rx_result
+ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx);
+
 #endif /* WPA_H */
-- 
cgit v1.2.3


From 2b2ba0db1c820d04d5143452d70012cd44d7b578 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@qca.qualcomm.com>
Date: Sat, 24 Jan 2015 19:52:07 +0200
Subject: mac80111: Add CCMP-256 cipher

This allows mac80211 to configure CCMP-256 to the driver and also use
software-implementation within mac80211 when the driver does not support
this with hardware accelaration.

Signed-off-by: Jouni Malinen <jouni@qca.qualcomm.com>
[squash ccmp256 -> mic_len argument change]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/aes_ccm.c     | 21 ++++++++++++---------
 net/mac80211/aes_ccm.h     | 10 +++++++---
 net/mac80211/cfg.c         |  2 ++
 net/mac80211/debugfs_key.c |  3 +++
 net/mac80211/key.c         | 27 ++++++++++++++++++++++++++-
 net/mac80211/main.c        |  6 ++++--
 net/mac80211/rx.c          | 15 ++++++++++++---
 net/mac80211/tx.c          |  7 ++++++-
 net/mac80211/wpa.c         | 22 ++++++++++++----------
 net/mac80211/wpa.h         |  6 ++++--
 10 files changed, 88 insertions(+), 31 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c
index 09d9caaec591..7869bb40acaa 100644
--- a/net/mac80211/aes_ccm.c
+++ b/net/mac80211/aes_ccm.c
@@ -20,7 +20,8 @@
 #include "aes_ccm.h"
 
 void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
-			       u8 *data, size_t data_len, u8 *mic)
+			       u8 *data, size_t data_len, u8 *mic,
+			       size_t mic_len)
 {
 	struct scatterlist assoc, pt, ct[2];
 
@@ -35,7 +36,7 @@ void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
 	sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad));
 	sg_init_table(ct, 2);
 	sg_set_buf(&ct[0], data, data_len);
-	sg_set_buf(&ct[1], mic, IEEE80211_CCMP_MIC_LEN);
+	sg_set_buf(&ct[1], mic, mic_len);
 
 	aead_request_set_tfm(aead_req, tfm);
 	aead_request_set_assoc(aead_req, &assoc, assoc.length);
@@ -45,7 +46,8 @@ void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
 }
 
 int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
-			      u8 *data, size_t data_len, u8 *mic)
+			      u8 *data, size_t data_len, u8 *mic,
+			      size_t mic_len)
 {
 	struct scatterlist assoc, pt, ct[2];
 	char aead_req_data[sizeof(struct aead_request) +
@@ -62,17 +64,18 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
 	sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad));
 	sg_init_table(ct, 2);
 	sg_set_buf(&ct[0], data, data_len);
-	sg_set_buf(&ct[1], mic, IEEE80211_CCMP_MIC_LEN);
+	sg_set_buf(&ct[1], mic, mic_len);
 
 	aead_request_set_tfm(aead_req, tfm);
 	aead_request_set_assoc(aead_req, &assoc, assoc.length);
-	aead_request_set_crypt(aead_req, ct, &pt,
-			       data_len + IEEE80211_CCMP_MIC_LEN, b_0);
+	aead_request_set_crypt(aead_req, ct, &pt, data_len + mic_len, b_0);
 
 	return crypto_aead_decrypt(aead_req);
 }
 
-struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[])
+struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[],
+						    size_t key_len,
+						    size_t mic_len)
 {
 	struct crypto_aead *tfm;
 	int err;
@@ -81,9 +84,9 @@ struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[])
 	if (IS_ERR(tfm))
 		return tfm;
 
-	err = crypto_aead_setkey(tfm, key, WLAN_KEY_LEN_CCMP);
+	err = crypto_aead_setkey(tfm, key, key_len);
 	if (!err)
-		err = crypto_aead_setauthsize(tfm, IEEE80211_CCMP_MIC_LEN);
+		err = crypto_aead_setauthsize(tfm, mic_len);
 	if (!err)
 		return tfm;
 
diff --git a/net/mac80211/aes_ccm.h b/net/mac80211/aes_ccm.h
index 2c7ab1948a2e..6a73d1e4d186 100644
--- a/net/mac80211/aes_ccm.h
+++ b/net/mac80211/aes_ccm.h
@@ -12,11 +12,15 @@
 
 #include <linux/crypto.h>
 
-struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[]);
+struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[],
+						    size_t key_len,
+						    size_t mic_len);
 void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
-			       u8 *data, size_t data_len, u8 *mic);
+			       u8 *data, size_t data_len, u8 *mic,
+			       size_t mic_len);
 int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
-			      u8 *data, size_t data_len, u8 *mic);
+			      u8 *data, size_t data_len, u8 *mic,
+			      size_t mic_len);
 void ieee80211_aes_key_free(struct crypto_aead *tfm);
 
 #endif /* AES_CCM_H */
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 1c1d061cff56..ef84441c119c 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -162,6 +162,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 			return -EINVAL;
 		break;
 	case WLAN_CIPHER_SUITE_CCMP:
+	case WLAN_CIPHER_SUITE_CCMP_256:
 	case WLAN_CIPHER_SUITE_AES_CMAC:
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
@@ -349,6 +350,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
 		params.seq_len = 6;
 		break;
 	case WLAN_CIPHER_SUITE_CCMP:
+	case WLAN_CIPHER_SUITE_CCMP_256:
 		pn64 = atomic64_read(&key->u.ccmp.tx_pn);
 		seq[0] = pn64;
 		seq[1] = pn64 >> 8;
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index 0e223e602296..64de07b16092 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -94,6 +94,7 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf,
 				key->u.tkip.tx.iv16);
 		break;
 	case WLAN_CIPHER_SUITE_CCMP:
+	case WLAN_CIPHER_SUITE_CCMP_256:
 		pn = atomic64_read(&key->u.ccmp.tx_pn);
 		len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
 				(u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24),
@@ -141,6 +142,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
 		len = p - buf;
 		break;
 	case WLAN_CIPHER_SUITE_CCMP:
+	case WLAN_CIPHER_SUITE_CCMP_256:
 		for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) {
 			rpn = key->u.ccmp.rx_pn[i];
 			p += scnprintf(p, sizeof(buf)+buf-p,
@@ -185,6 +187,7 @@ static ssize_t key_replays_read(struct file *file, char __user *userbuf,
 
 	switch (key->conf.cipher) {
 	case WLAN_CIPHER_SUITE_CCMP:
+	case WLAN_CIPHER_SUITE_CCMP_256:
 		len = scnprintf(buf, sizeof(buf), "%u\n", key->u.ccmp.replays);
 		break;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index cbee2f5180ce..83c61085c3f0 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -163,6 +163,7 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 	case WLAN_CIPHER_SUITE_WEP104:
 	case WLAN_CIPHER_SUITE_TKIP:
 	case WLAN_CIPHER_SUITE_CCMP:
+	case WLAN_CIPHER_SUITE_CCMP_256:
 	case WLAN_CIPHER_SUITE_AES_CMAC:
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
@@ -389,7 +390,26 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
 		 * Initialize AES key state here as an optimization so that
 		 * it does not need to be initialized for every packet.
 		 */
-		key->u.ccmp.tfm = ieee80211_aes_key_setup_encrypt(key_data);
+		key->u.ccmp.tfm = ieee80211_aes_key_setup_encrypt(
+			key_data, key_len, IEEE80211_CCMP_MIC_LEN);
+		if (IS_ERR(key->u.ccmp.tfm)) {
+			err = PTR_ERR(key->u.ccmp.tfm);
+			kfree(key);
+			return ERR_PTR(err);
+		}
+		break;
+	case WLAN_CIPHER_SUITE_CCMP_256:
+		key->conf.iv_len = IEEE80211_CCMP_256_HDR_LEN;
+		key->conf.icv_len = IEEE80211_CCMP_256_MIC_LEN;
+		for (i = 0; seq && i < IEEE80211_NUM_TIDS + 1; i++)
+			for (j = 0; j < IEEE80211_CCMP_256_PN_LEN; j++)
+				key->u.ccmp.rx_pn[i][j] =
+					seq[IEEE80211_CCMP_256_PN_LEN - j - 1];
+		/* Initialize AES key state here as an optimization so that
+		 * it does not need to be initialized for every packet.
+		 */
+		key->u.ccmp.tfm = ieee80211_aes_key_setup_encrypt(
+			key_data, key_len, IEEE80211_CCMP_256_MIC_LEN);
 		if (IS_ERR(key->u.ccmp.tfm)) {
 			err = PTR_ERR(key->u.ccmp.tfm);
 			kfree(key);
@@ -457,6 +477,7 @@ static void ieee80211_key_free_common(struct ieee80211_key *key)
 {
 	switch (key->conf.cipher) {
 	case WLAN_CIPHER_SUITE_CCMP:
+	case WLAN_CIPHER_SUITE_CCMP_256:
 		ieee80211_aes_key_free(key->u.ccmp.tfm);
 		break;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
@@ -773,6 +794,7 @@ void ieee80211_get_key_tx_seq(struct ieee80211_key_conf *keyconf,
 		seq->tkip.iv16 = key->u.tkip.tx.iv16;
 		break;
 	case WLAN_CIPHER_SUITE_CCMP:
+	case WLAN_CIPHER_SUITE_CCMP_256:
 		pn64 = atomic64_read(&key->u.ccmp.tx_pn);
 		seq->ccmp.pn[5] = pn64;
 		seq->ccmp.pn[4] = pn64 >> 8;
@@ -822,6 +844,7 @@ void ieee80211_get_key_rx_seq(struct ieee80211_key_conf *keyconf,
 		seq->tkip.iv16 = key->u.tkip.rx[tid].iv16;
 		break;
 	case WLAN_CIPHER_SUITE_CCMP:
+	case WLAN_CIPHER_SUITE_CCMP_256:
 		if (WARN_ON(tid < -1 || tid >= IEEE80211_NUM_TIDS))
 			return;
 		if (tid < 0)
@@ -864,6 +887,7 @@ void ieee80211_set_key_tx_seq(struct ieee80211_key_conf *keyconf,
 		key->u.tkip.tx.iv16 = seq->tkip.iv16;
 		break;
 	case WLAN_CIPHER_SUITE_CCMP:
+	case WLAN_CIPHER_SUITE_CCMP_256:
 		pn64 = (u64)seq->ccmp.pn[5] |
 		       ((u64)seq->ccmp.pn[4] << 8) |
 		       ((u64)seq->ccmp.pn[3] << 16) |
@@ -914,6 +938,7 @@ void ieee80211_set_key_rx_seq(struct ieee80211_key_conf *keyconf,
 		key->u.tkip.rx[tid].iv16 = seq->tkip.iv16;
 		break;
 	case WLAN_CIPHER_SUITE_CCMP:
+	case WLAN_CIPHER_SUITE_CCMP_256:
 		if (WARN_ON(tid < -1 || tid >= IEEE80211_NUM_TIDS))
 			return;
 		if (tid < 0)
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 7223b4e16752..a5ad2d5bb29b 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -666,6 +666,7 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 		WLAN_CIPHER_SUITE_WEP104,
 		WLAN_CIPHER_SUITE_TKIP,
 		WLAN_CIPHER_SUITE_CCMP,
+		WLAN_CIPHER_SUITE_CCMP_256,
 		WLAN_CIPHER_SUITE_GCMP,
 		WLAN_CIPHER_SUITE_GCMP_256,
 
@@ -727,9 +728,9 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 		 * including the schemes)
 		 *
 		 * We start counting ciphers defined by schemes, TKIP, CCMP,
-		 * GCMP, and GCMP-256
+		 * CCMP-256, GCMP, and GCMP-256
 		 */
-		n_suites = local->hw.n_cipher_schemes + 4;
+		n_suites = local->hw.n_cipher_schemes + 5;
 
 		/* check if we have WEP40 and WEP104 */
 		if (have_wep)
@@ -744,6 +745,7 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 			return -ENOMEM;
 
 		suites[w++] = WLAN_CIPHER_SUITE_CCMP;
+		suites[w++] = WLAN_CIPHER_SUITE_CCMP_256;
 		suites[w++] = WLAN_CIPHER_SUITE_TKIP;
 		suites[w++] = WLAN_CIPHER_SUITE_GCMP;
 		suites[w++] = WLAN_CIPHER_SUITE_GCMP_256;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index a11d2518c365..e8c6ba5ce70b 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1650,7 +1650,12 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 		result = ieee80211_crypto_tkip_decrypt(rx);
 		break;
 	case WLAN_CIPHER_SUITE_CCMP:
-		result = ieee80211_crypto_ccmp_decrypt(rx);
+		result = ieee80211_crypto_ccmp_decrypt(
+			rx, IEEE80211_CCMP_MIC_LEN);
+		break;
+	case WLAN_CIPHER_SUITE_CCMP_256:
+		result = ieee80211_crypto_ccmp_decrypt(
+			rx, IEEE80211_CCMP_256_MIC_LEN);
 		break;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
 		result = ieee80211_crypto_aes_cmac_decrypt(rx);
@@ -1785,7 +1790,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 		/* This is the first fragment of a new frame. */
 		entry = ieee80211_reassemble_add(rx->sdata, frag, seq,
 						 rx->seqno_idx, &(rx->skb));
-		if (rx->key && rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP &&
+		if (rx->key &&
+		    (rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP ||
+		     rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256) &&
 		    ieee80211_has_protected(fc)) {
 			int queue = rx->security_idx;
 			/* Store CCMP PN so that we can verify that the next
@@ -1814,7 +1821,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 		int i;
 		u8 pn[IEEE80211_CCMP_PN_LEN], *rpn;
 		int queue;
-		if (!rx->key || rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP)
+		if (!rx->key ||
+		    (rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP &&
+		     rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP_256))
 			return RX_DROP_UNUSABLE;
 		memcpy(pn, entry->last_pn, IEEE80211_CCMP_PN_LEN);
 		for (i = IEEE80211_CCMP_PN_LEN - 1; i >= 0; i--) {
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index e4c6fbc4bf7a..be57e0afd019 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -626,6 +626,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 				tx->key = NULL;
 			break;
 		case WLAN_CIPHER_SUITE_CCMP:
+		case WLAN_CIPHER_SUITE_CCMP_256:
 		case WLAN_CIPHER_SUITE_GCMP:
 		case WLAN_CIPHER_SUITE_GCMP_256:
 			if (!ieee80211_is_data_present(hdr->frame_control) &&
@@ -1013,7 +1014,11 @@ ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx)
 	case WLAN_CIPHER_SUITE_TKIP:
 		return ieee80211_crypto_tkip_encrypt(tx);
 	case WLAN_CIPHER_SUITE_CCMP:
-		return ieee80211_crypto_ccmp_encrypt(tx);
+		return ieee80211_crypto_ccmp_encrypt(
+			tx, IEEE80211_CCMP_MIC_LEN);
+	case WLAN_CIPHER_SUITE_CCMP_256:
+		return ieee80211_crypto_ccmp_encrypt(
+			tx, IEEE80211_CCMP_256_MIC_LEN);
 	case WLAN_CIPHER_SUITE_AES_CMAC:
 		return ieee80211_crypto_aes_cmac_encrypt(tx);
 	case WLAN_CIPHER_SUITE_GCMP:
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 96b65c240109..ae654de9782a 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -394,7 +394,8 @@ static inline void ccmp_hdr2pn(u8 *pn, u8 *hdr)
 }
 
 
-static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
+static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb,
+			    unsigned int mic_len)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	struct ieee80211_key *key = tx->key;
@@ -425,7 +426,7 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 	if (info->control.hw_key)
 		tail = 0;
 	else
-		tail = IEEE80211_CCMP_MIC_LEN;
+		tail = mic_len;
 
 	if (WARN_ON(skb_tailroom(skb) < tail ||
 		    skb_headroom(skb) < IEEE80211_CCMP_HDR_LEN))
@@ -460,21 +461,22 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 	pos += IEEE80211_CCMP_HDR_LEN;
 	ccmp_special_blocks(skb, pn, b_0, aad);
 	ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, b_0, aad, pos, len,
-				  skb_put(skb, IEEE80211_CCMP_MIC_LEN));
+				  skb_put(skb, mic_len), mic_len);
 
 	return 0;
 }
 
 
 ieee80211_tx_result
-ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx)
+ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx,
+			      unsigned int mic_len)
 {
 	struct sk_buff *skb;
 
 	ieee80211_tx_set_protected(tx);
 
 	skb_queue_walk(&tx->skbs, skb) {
-		if (ccmp_encrypt_skb(tx, skb) < 0)
+		if (ccmp_encrypt_skb(tx, skb, mic_len) < 0)
 			return TX_DROP;
 	}
 
@@ -483,7 +485,8 @@ ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx)
 
 
 ieee80211_rx_result
-ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
+ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
+			      unsigned int mic_len)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
 	int hdrlen;
@@ -500,8 +503,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
 	    !ieee80211_is_robust_mgmt_frame(skb))
 		return RX_CONTINUE;
 
-	data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN -
-		   IEEE80211_CCMP_MIC_LEN;
+	data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN - mic_len;
 	if (!rx->sta || data_len < 0)
 		return RX_DROP_UNUSABLE;
 
@@ -532,14 +534,14 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
 			    key->u.ccmp.tfm, b_0, aad,
 			    skb->data + hdrlen + IEEE80211_CCMP_HDR_LEN,
 			    data_len,
-			    skb->data + skb->len - IEEE80211_CCMP_MIC_LEN))
+			    skb->data + skb->len - mic_len, mic_len))
 			return RX_DROP_UNUSABLE;
 	}
 
 	memcpy(key->u.ccmp.rx_pn[queue], pn, IEEE80211_CCMP_PN_LEN);
 
 	/* Remove CCMP header and MIC */
-	if (pskb_trim(skb, skb->len - IEEE80211_CCMP_MIC_LEN))
+	if (pskb_trim(skb, skb->len - mic_len))
 		return RX_DROP_UNUSABLE;
 	memmove(skb->data + IEEE80211_CCMP_HDR_LEN, skb->data, hdrlen);
 	skb_pull(skb, IEEE80211_CCMP_HDR_LEN);
diff --git a/net/mac80211/wpa.h b/net/mac80211/wpa.h
index ea955f278351..43e109f27a89 100644
--- a/net/mac80211/wpa.h
+++ b/net/mac80211/wpa.h
@@ -24,9 +24,11 @@ ieee80211_rx_result
 ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx);
 
 ieee80211_tx_result
-ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx);
+ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx,
+			      unsigned int mic_len);
 ieee80211_rx_result
-ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx);
+ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
+			      unsigned int mic_len);
 
 ieee80211_tx_result
 ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx);
-- 
cgit v1.2.3


From 56c52da2d554f081e8fce58ecbcf6a40c605b95b Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@qca.qualcomm.com>
Date: Sat, 24 Jan 2015 19:52:08 +0200
Subject: mac80111: Add BIP-CMAC-256 cipher

This allows mac80211 to configure BIP-CMAC-256 to the driver and also
use software-implementation within mac80211 when the driver does not
support this with hardware accelaration.

Signed-off-by: Jouni Malinen <jouni@qca.qualcomm.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h  |  9 +++++
 net/mac80211/aes_cmac.c    | 34 +++++++++++++----
 net/mac80211/aes_cmac.h    |  5 ++-
 net/mac80211/cfg.c         |  2 +
 net/mac80211/debugfs_key.c |  4 ++
 net/mac80211/key.c         | 14 ++++++-
 net/mac80211/main.c        | 13 ++++---
 net/mac80211/rx.c          | 21 ++++++++---
 net/mac80211/tx.c          |  3 ++
 net/mac80211/wpa.c         | 92 ++++++++++++++++++++++++++++++++++++++++++++++
 net/mac80211/wpa.h         |  4 ++
 11 files changed, 180 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index dbf417bf25bf..b9c7897dc566 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1017,6 +1017,15 @@ struct ieee80211_mmie {
 	u8 mic[8];
 } __packed;
 
+/* Management MIC information element (IEEE 802.11w) for GMAC and CMAC-256 */
+struct ieee80211_mmie_16 {
+	u8 element_id;
+	u8 length;
+	__le16 key_id;
+	u8 sequence_number[6];
+	u8 mic[16];
+} __packed;
+
 struct ieee80211_vendor_ie {
 	u8 element_id;
 	u8 len;
diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c
index 9b9009f99551..4192806be3d3 100644
--- a/net/mac80211/aes_cmac.c
+++ b/net/mac80211/aes_cmac.c
@@ -18,8 +18,8 @@
 #include "key.h"
 #include "aes_cmac.h"
 
-#define AES_CMAC_KEY_LEN 16
 #define CMAC_TLEN 8 /* CMAC TLen = 64 bits (8 octets) */
+#define CMAC_TLEN_256 16 /* CMAC TLen = 128 bits (16 octets) */
 #define AAD_LEN 20
 
 
@@ -35,9 +35,9 @@ static void gf_mulx(u8 *pad)
 		pad[AES_BLOCK_SIZE - 1] ^= 0x87;
 }
 
-
-static void aes_128_cmac_vector(struct crypto_cipher *tfm, size_t num_elem,
-				const u8 *addr[], const size_t *len, u8 *mac)
+static void aes_cmac_vector(struct crypto_cipher *tfm, size_t num_elem,
+			    const u8 *addr[], const size_t *len, u8 *mac,
+			    size_t mac_len)
 {
 	u8 cbc[AES_BLOCK_SIZE], pad[AES_BLOCK_SIZE];
 	const u8 *pos, *end;
@@ -88,7 +88,7 @@ static void aes_128_cmac_vector(struct crypto_cipher *tfm, size_t num_elem,
 	for (i = 0; i < AES_BLOCK_SIZE; i++)
 		pad[i] ^= cbc[i];
 	crypto_cipher_encrypt_one(tfm, pad, pad);
-	memcpy(mac, pad, CMAC_TLEN);
+	memcpy(mac, pad, mac_len);
 }
 
 
@@ -107,17 +107,35 @@ void ieee80211_aes_cmac(struct crypto_cipher *tfm, const u8 *aad,
 	addr[2] = zero;
 	len[2] = CMAC_TLEN;
 
-	aes_128_cmac_vector(tfm, 3, addr, len, mic);
+	aes_cmac_vector(tfm, 3, addr, len, mic, CMAC_TLEN);
 }
 
+void ieee80211_aes_cmac_256(struct crypto_cipher *tfm, const u8 *aad,
+			    const u8 *data, size_t data_len, u8 *mic)
+{
+	const u8 *addr[3];
+	size_t len[3];
+	u8 zero[CMAC_TLEN_256];
+
+	memset(zero, 0, CMAC_TLEN_256);
+	addr[0] = aad;
+	len[0] = AAD_LEN;
+	addr[1] = data;
+	len[1] = data_len - CMAC_TLEN_256;
+	addr[2] = zero;
+	len[2] = CMAC_TLEN_256;
+
+	aes_cmac_vector(tfm, 3, addr, len, mic, CMAC_TLEN_256);
+}
 
-struct crypto_cipher *ieee80211_aes_cmac_key_setup(const u8 key[])
+struct crypto_cipher *ieee80211_aes_cmac_key_setup(const u8 key[],
+						   size_t key_len)
 {
 	struct crypto_cipher *tfm;
 
 	tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
 	if (!IS_ERR(tfm))
-		crypto_cipher_setkey(tfm, key, AES_CMAC_KEY_LEN);
+		crypto_cipher_setkey(tfm, key, key_len);
 
 	return tfm;
 }
diff --git a/net/mac80211/aes_cmac.h b/net/mac80211/aes_cmac.h
index 0ce6487af795..3702041f44fd 100644
--- a/net/mac80211/aes_cmac.h
+++ b/net/mac80211/aes_cmac.h
@@ -11,9 +11,12 @@
 
 #include <linux/crypto.h>
 
-struct crypto_cipher *ieee80211_aes_cmac_key_setup(const u8 key[]);
+struct crypto_cipher *ieee80211_aes_cmac_key_setup(const u8 key[],
+						   size_t key_len);
 void ieee80211_aes_cmac(struct crypto_cipher *tfm, const u8 *aad,
 			const u8 *data, size_t data_len, u8 *mic);
+void ieee80211_aes_cmac_256(struct crypto_cipher *tfm, const u8 *aad,
+			    const u8 *data, size_t data_len, u8 *mic);
 void ieee80211_aes_cmac_key_free(struct crypto_cipher *tfm);
 
 #endif /* AES_CMAC_H */
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index ef84441c119c..b7e528bbecce 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -164,6 +164,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 	case WLAN_CIPHER_SUITE_CCMP:
 	case WLAN_CIPHER_SUITE_CCMP_256:
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		break;
@@ -362,6 +363,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
 		params.seq_len = 6;
 		break;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 		pn64 = atomic64_read(&key->u.aes_cmac.tx_pn);
 		seq[0] = pn64;
 		seq[1] = pn64 >> 8;
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index 64de07b16092..d1b60eb014a8 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -101,6 +101,7 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf,
 				(u8)(pn >> 16), (u8)(pn >> 8), (u8)pn);
 		break;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 		pn = atomic64_read(&key->u.aes_cmac.tx_pn);
 		len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
 				(u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24),
@@ -153,6 +154,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
 		len = p - buf;
 		break;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 		rpn = key->u.aes_cmac.rx_pn;
 		p += scnprintf(p, sizeof(buf)+buf-p,
 			       "%02x%02x%02x%02x%02x%02x\n",
@@ -191,6 +193,7 @@ static ssize_t key_replays_read(struct file *file, char __user *userbuf,
 		len = scnprintf(buf, sizeof(buf), "%u\n", key->u.ccmp.replays);
 		break;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 		len = scnprintf(buf, sizeof(buf), "%u\n",
 				key->u.aes_cmac.replays);
 		break;
@@ -214,6 +217,7 @@ static ssize_t key_icverrors_read(struct file *file, char __user *userbuf,
 
 	switch (key->conf.cipher) {
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 		len = scnprintf(buf, sizeof(buf), "%u\n",
 				key->u.aes_cmac.icverrors);
 		break;
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 83c61085c3f0..7ceea9d9fcd2 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -165,6 +165,7 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 	case WLAN_CIPHER_SUITE_CCMP:
 	case WLAN_CIPHER_SUITE_CCMP_256:
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		/* all of these we can do in software - if driver can */
@@ -417,8 +418,12 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
 		}
 		break;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 		key->conf.iv_len = 0;
-		key->conf.icv_len = sizeof(struct ieee80211_mmie);
+		if (cipher == WLAN_CIPHER_SUITE_AES_CMAC)
+			key->conf.icv_len = sizeof(struct ieee80211_mmie);
+		else
+			key->conf.icv_len = sizeof(struct ieee80211_mmie_16);
 		if (seq)
 			for (j = 0; j < IEEE80211_CMAC_PN_LEN; j++)
 				key->u.aes_cmac.rx_pn[j] =
@@ -428,7 +433,7 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
 		 * it does not need to be initialized for every packet.
 		 */
 		key->u.aes_cmac.tfm =
-			ieee80211_aes_cmac_key_setup(key_data);
+			ieee80211_aes_cmac_key_setup(key_data, key_len);
 		if (IS_ERR(key->u.aes_cmac.tfm)) {
 			err = PTR_ERR(key->u.aes_cmac.tfm);
 			kfree(key);
@@ -481,6 +486,7 @@ static void ieee80211_key_free_common(struct ieee80211_key *key)
 		ieee80211_aes_key_free(key->u.ccmp.tfm);
 		break;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 		ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm);
 		break;
 	case WLAN_CIPHER_SUITE_GCMP:
@@ -804,6 +810,7 @@ void ieee80211_get_key_tx_seq(struct ieee80211_key_conf *keyconf,
 		seq->ccmp.pn[0] = pn64 >> 40;
 		break;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 		pn64 = atomic64_read(&key->u.aes_cmac.tx_pn);
 		seq->ccmp.pn[5] = pn64;
 		seq->ccmp.pn[4] = pn64 >> 8;
@@ -854,6 +861,7 @@ void ieee80211_get_key_rx_seq(struct ieee80211_key_conf *keyconf,
 		memcpy(seq->ccmp.pn, pn, IEEE80211_CCMP_PN_LEN);
 		break;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 		if (WARN_ON(tid != 0))
 			return;
 		pn = key->u.aes_cmac.rx_pn;
@@ -897,6 +905,7 @@ void ieee80211_set_key_tx_seq(struct ieee80211_key_conf *keyconf,
 		atomic64_set(&key->u.ccmp.tx_pn, pn64);
 		break;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 		pn64 = (u64)seq->aes_cmac.pn[5] |
 		       ((u64)seq->aes_cmac.pn[4] << 8) |
 		       ((u64)seq->aes_cmac.pn[3] << 16) |
@@ -948,6 +957,7 @@ void ieee80211_set_key_rx_seq(struct ieee80211_key_conf *keyconf,
 		memcpy(pn, seq->ccmp.pn, IEEE80211_CCMP_PN_LEN);
 		break;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 		if (WARN_ON(tid != 0))
 			return;
 		pn = key->u.aes_cmac.rx_pn;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index a5ad2d5bb29b..053a17c5023a 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -671,7 +671,8 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 		WLAN_CIPHER_SUITE_GCMP_256,
 
 		/* keep last -- depends on hw flags! */
-		WLAN_CIPHER_SUITE_AES_CMAC
+		WLAN_CIPHER_SUITE_AES_CMAC,
+		WLAN_CIPHER_SUITE_BIP_CMAC_256,
 	};
 
 	if (local->hw.flags & IEEE80211_HW_SW_CRYPTO_CONTROL ||
@@ -710,7 +711,7 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 		local->hw.wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites);
 
 		if (!have_mfp)
-			local->hw.wiphy->n_cipher_suites--;
+			local->hw.wiphy->n_cipher_suites -= 2;
 
 		if (!have_wep) {
 			local->hw.wiphy->cipher_suites += 2;
@@ -736,9 +737,9 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 		if (have_wep)
 			n_suites += 2;
 
-		/* check if we have AES_CMAC */
+		/* check if we have AES_CMAC, BIP-CMAC-256 */
 		if (have_mfp)
-			n_suites++;
+			n_suites += 2;
 
 		suites = kmalloc(sizeof(u32) * n_suites, GFP_KERNEL);
 		if (!suites)
@@ -755,8 +756,10 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 			suites[w++] = WLAN_CIPHER_SUITE_WEP104;
 		}
 
-		if (have_mfp)
+		if (have_mfp) {
 			suites[w++] = WLAN_CIPHER_SUITE_AES_CMAC;
+			suites[w++] = WLAN_CIPHER_SUITE_BIP_CMAC_256;
+		}
 
 		for (r = 0; r < local->hw.n_cipher_schemes; r++)
 			suites[w++] = cs[r].cipher;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index e8c6ba5ce70b..93ebc9525478 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -647,6 +647,7 @@ static int ieee80211_get_mmie_keyidx(struct sk_buff *skb)
 {
 	struct ieee80211_mgmt *hdr = (struct ieee80211_mgmt *) skb->data;
 	struct ieee80211_mmie *mmie;
+	struct ieee80211_mmie_16 *mmie16;
 
 	if (skb->len < 24 + sizeof(*mmie) || !is_multicast_ether_addr(hdr->da))
 		return -1;
@@ -656,11 +657,18 @@ static int ieee80211_get_mmie_keyidx(struct sk_buff *skb)
 
 	mmie = (struct ieee80211_mmie *)
 		(skb->data + skb->len - sizeof(*mmie));
-	if (mmie->element_id != WLAN_EID_MMIE ||
-	    mmie->length != sizeof(*mmie) - 2)
-		return -1;
-
-	return le16_to_cpu(mmie->key_id);
+	if (mmie->element_id == WLAN_EID_MMIE &&
+	    mmie->length == sizeof(*mmie) - 2)
+		return le16_to_cpu(mmie->key_id);
+
+	mmie16 = (struct ieee80211_mmie_16 *)
+		(skb->data + skb->len - sizeof(*mmie16));
+	if (skb->len >= 24 + sizeof(*mmie16) &&
+	    mmie16->element_id == WLAN_EID_MMIE &&
+	    mmie16->length == sizeof(*mmie16) - 2)
+		return le16_to_cpu(mmie16->key_id);
+
+	return -1;
 }
 
 static int iwl80211_get_cs_keyid(const struct ieee80211_cipher_scheme *cs,
@@ -1660,6 +1668,9 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 	case WLAN_CIPHER_SUITE_AES_CMAC:
 		result = ieee80211_crypto_aes_cmac_decrypt(rx);
 		break;
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
+		result = ieee80211_crypto_aes_cmac_256_decrypt(rx);
+		break;
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		result = ieee80211_crypto_gcmp_decrypt(rx);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index be57e0afd019..909c27be1fdc 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -639,6 +639,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 					ieee80211_is_mgmt(hdr->frame_control);
 			break;
 		case WLAN_CIPHER_SUITE_AES_CMAC:
+		case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 			if (!ieee80211_is_mgmt(hdr->frame_control))
 				tx->key = NULL;
 			break;
@@ -1021,6 +1022,8 @@ ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx)
 			tx, IEEE80211_CCMP_256_MIC_LEN);
 	case WLAN_CIPHER_SUITE_AES_CMAC:
 		return ieee80211_crypto_aes_cmac_encrypt(tx);
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
+		return ieee80211_crypto_aes_cmac_256_encrypt(tx);
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		return ieee80211_crypto_gcmp_encrypt(tx);
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index ae654de9782a..549af118de9f 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -955,6 +955,48 @@ ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx)
 	return TX_CONTINUE;
 }
 
+ieee80211_tx_result
+ieee80211_crypto_aes_cmac_256_encrypt(struct ieee80211_tx_data *tx)
+{
+	struct sk_buff *skb;
+	struct ieee80211_tx_info *info;
+	struct ieee80211_key *key = tx->key;
+	struct ieee80211_mmie_16 *mmie;
+	u8 aad[20];
+	u64 pn64;
+
+	if (WARN_ON(skb_queue_len(&tx->skbs) != 1))
+		return TX_DROP;
+
+	skb = skb_peek(&tx->skbs);
+
+	info = IEEE80211_SKB_CB(skb);
+
+	if (info->control.hw_key)
+		return TX_CONTINUE;
+
+	if (WARN_ON(skb_tailroom(skb) < sizeof(*mmie)))
+		return TX_DROP;
+
+	mmie = (struct ieee80211_mmie_16 *)skb_put(skb, sizeof(*mmie));
+	mmie->element_id = WLAN_EID_MMIE;
+	mmie->length = sizeof(*mmie) - 2;
+	mmie->key_id = cpu_to_le16(key->conf.keyidx);
+
+	/* PN = PN + 1 */
+	pn64 = atomic64_inc_return(&key->u.aes_cmac.tx_pn);
+
+	bip_ipn_set64(mmie->sequence_number, pn64);
+
+	bip_aad(skb, aad);
+
+	/* MIC = AES-256-CMAC(IGTK, AAD || Management Frame Body || MMIE, 128)
+	 */
+	ieee80211_aes_cmac_256(key->u.aes_cmac.tfm, aad,
+			       skb->data + 24, skb->len - 24, mmie->mic);
+
+	return TX_CONTINUE;
+}
 
 ieee80211_rx_result
 ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx)
@@ -1006,6 +1048,56 @@ ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx)
 	return RX_CONTINUE;
 }
 
+ieee80211_rx_result
+ieee80211_crypto_aes_cmac_256_decrypt(struct ieee80211_rx_data *rx)
+{
+	struct sk_buff *skb = rx->skb;
+	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
+	struct ieee80211_key *key = rx->key;
+	struct ieee80211_mmie_16 *mmie;
+	u8 aad[20], mic[16], ipn[6];
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+
+	if (!ieee80211_is_mgmt(hdr->frame_control))
+		return RX_CONTINUE;
+
+	/* management frames are already linear */
+
+	if (skb->len < 24 + sizeof(*mmie))
+		return RX_DROP_UNUSABLE;
+
+	mmie = (struct ieee80211_mmie_16 *)
+		(skb->data + skb->len - sizeof(*mmie));
+	if (mmie->element_id != WLAN_EID_MMIE ||
+	    mmie->length != sizeof(*mmie) - 2)
+		return RX_DROP_UNUSABLE; /* Invalid MMIE */
+
+	bip_ipn_swap(ipn, mmie->sequence_number);
+
+	if (memcmp(ipn, key->u.aes_cmac.rx_pn, 6) <= 0) {
+		key->u.aes_cmac.replays++;
+		return RX_DROP_UNUSABLE;
+	}
+
+	if (!(status->flag & RX_FLAG_DECRYPTED)) {
+		/* hardware didn't decrypt/verify MIC */
+		bip_aad(skb, aad);
+		ieee80211_aes_cmac_256(key->u.aes_cmac.tfm, aad,
+				       skb->data + 24, skb->len - 24, mic);
+		if (memcmp(mic, mmie->mic, sizeof(mmie->mic)) != 0) {
+			key->u.aes_cmac.icverrors++;
+			return RX_DROP_UNUSABLE;
+		}
+	}
+
+	memcpy(key->u.aes_cmac.rx_pn, ipn, 6);
+
+	/* Remove MMIE */
+	skb_trim(skb, skb->len - sizeof(*mmie));
+
+	return RX_CONTINUE;
+}
+
 ieee80211_tx_result
 ieee80211_crypto_hw_encrypt(struct ieee80211_tx_data *tx)
 {
diff --git a/net/mac80211/wpa.h b/net/mac80211/wpa.h
index 43e109f27a89..06b7f167a176 100644
--- a/net/mac80211/wpa.h
+++ b/net/mac80211/wpa.h
@@ -32,8 +32,12 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
 
 ieee80211_tx_result
 ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx);
+ieee80211_tx_result
+ieee80211_crypto_aes_cmac_256_encrypt(struct ieee80211_tx_data *tx);
 ieee80211_rx_result
 ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx);
+ieee80211_rx_result
+ieee80211_crypto_aes_cmac_256_decrypt(struct ieee80211_rx_data *rx);
 ieee80211_tx_result
 ieee80211_crypto_hw_encrypt(struct ieee80211_tx_data *tx);
 ieee80211_rx_result
-- 
cgit v1.2.3


From 8ade538bf39b1ee53418528fdacd36b8e65621b9 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@qca.qualcomm.com>
Date: Sat, 24 Jan 2015 19:52:09 +0200
Subject: mac80111: Add BIP-GMAC-128 and BIP-GMAC-256 ciphers

This allows mac80211 to configure BIP-GMAC-128 and BIP-GMAC-256 to the
driver and also use software-implementation within mac80211 when the
driver does not support this with hardware accelaration.

Signed-off-by: Jouni Malinen <jouni@qca.qualcomm.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h     |   5 +++
 net/mac80211/Makefile      |   1 +
 net/mac80211/aes_gmac.c    |  84 ++++++++++++++++++++++++++++++++++++
 net/mac80211/aes_gmac.h    |  20 +++++++++
 net/mac80211/cfg.c         |  14 ++++++
 net/mac80211/debugfs_key.c |  26 +++++++++++
 net/mac80211/key.c         |  60 ++++++++++++++++++++++++++
 net/mac80211/key.h         |   7 +++
 net/mac80211/main.c        |  12 ++++--
 net/mac80211/rx.c          |   4 ++
 net/mac80211/tx.c          |   5 +++
 net/mac80211/wpa.c         | 105 +++++++++++++++++++++++++++++++++++++++++++++
 net/mac80211/wpa.h         |   4 ++
 13 files changed, 344 insertions(+), 3 deletions(-)
 create mode 100644 net/mac80211/aes_gmac.c
 create mode 100644 net/mac80211/aes_gmac.h

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index ae6638436112..d52914b75331 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -4098,6 +4098,8 @@ void ieee80211_aes_cmac_calculate_k1_k2(struct ieee80211_key_conf *keyconf,
  *	reverse order than in packet)
  * @aes_cmac: PN data, most significant byte first (big endian,
  *	reverse order than in packet)
+ * @aes_gmac: PN data, most significant byte first (big endian,
+ *	reverse order than in packet)
  * @gcmp: PN data, most significant byte first (big endian,
  *	reverse order than in packet)
  */
@@ -4113,6 +4115,9 @@ struct ieee80211_key_seq {
 		struct {
 			u8 pn[6];
 		} aes_cmac;
+		struct {
+			u8 pn[6];
+		} aes_gmac;
 		struct {
 			u8 pn[6];
 		} gcmp;
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 0cbf93618433..3275f01881be 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -17,6 +17,7 @@ mac80211-y := \
 	aes_ccm.o \
 	aes_gcm.o \
 	aes_cmac.o \
+	aes_gmac.o \
 	cfg.o \
 	ethtool.o \
 	rx.o \
diff --git a/net/mac80211/aes_gmac.c b/net/mac80211/aes_gmac.c
new file mode 100644
index 000000000000..1c72edcb0083
--- /dev/null
+++ b/net/mac80211/aes_gmac.c
@@ -0,0 +1,84 @@
+/*
+ * AES-GMAC for IEEE 802.11 BIP-GMAC-128 and BIP-GMAC-256
+ * Copyright 2015, Qualcomm Atheros, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <crypto/aes.h>
+
+#include <net/mac80211.h>
+#include "key.h"
+#include "aes_gmac.h"
+
+#define GMAC_MIC_LEN 16
+#define GMAC_NONCE_LEN 12
+#define AAD_LEN 20
+
+int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce,
+		       const u8 *data, size_t data_len, u8 *mic)
+{
+	struct scatterlist sg[3], ct[1];
+	char aead_req_data[sizeof(struct aead_request) +
+			   crypto_aead_reqsize(tfm)]
+		__aligned(__alignof__(struct aead_request));
+	struct aead_request *aead_req = (void *)aead_req_data;
+	u8 zero[GMAC_MIC_LEN], iv[AES_BLOCK_SIZE];
+
+	if (data_len < GMAC_MIC_LEN)
+		return -EINVAL;
+
+	memset(aead_req, 0, sizeof(aead_req_data));
+
+	memset(zero, 0, GMAC_MIC_LEN);
+	sg_init_table(sg, 3);
+	sg_set_buf(&sg[0], aad, AAD_LEN);
+	sg_set_buf(&sg[1], data, data_len - GMAC_MIC_LEN);
+	sg_set_buf(&sg[2], zero, GMAC_MIC_LEN);
+
+	memcpy(iv, nonce, GMAC_NONCE_LEN);
+	memset(iv + GMAC_NONCE_LEN, 0, sizeof(iv) - GMAC_NONCE_LEN);
+	iv[AES_BLOCK_SIZE - 1] = 0x01;
+
+	sg_init_table(ct, 1);
+	sg_set_buf(&ct[0], mic, GMAC_MIC_LEN);
+
+	aead_request_set_tfm(aead_req, tfm);
+	aead_request_set_assoc(aead_req, sg, AAD_LEN + data_len);
+	aead_request_set_crypt(aead_req, NULL, ct, 0, iv);
+
+	crypto_aead_encrypt(aead_req);
+
+	return 0;
+}
+
+struct crypto_aead *ieee80211_aes_gmac_key_setup(const u8 key[],
+						 size_t key_len)
+{
+	struct crypto_aead *tfm;
+	int err;
+
+	tfm = crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm))
+		return tfm;
+
+	err = crypto_aead_setkey(tfm, key, key_len);
+	if (!err)
+		return tfm;
+	if (!err)
+		err = crypto_aead_setauthsize(tfm, GMAC_MIC_LEN);
+
+	crypto_free_aead(tfm);
+	return ERR_PTR(err);
+}
+
+void ieee80211_aes_gmac_key_free(struct crypto_aead *tfm)
+{
+	crypto_free_aead(tfm);
+}
diff --git a/net/mac80211/aes_gmac.h b/net/mac80211/aes_gmac.h
new file mode 100644
index 000000000000..d328204d73a8
--- /dev/null
+++ b/net/mac80211/aes_gmac.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2015, Qualcomm Atheros, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef AES_GMAC_H
+#define AES_GMAC_H
+
+#include <linux/crypto.h>
+
+struct crypto_aead *ieee80211_aes_gmac_key_setup(const u8 key[],
+						 size_t key_len);
+int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce,
+		       const u8 *data, size_t data_len, u8 *mic);
+void ieee80211_aes_gmac_key_free(struct crypto_aead *tfm);
+
+#endif /* AES_GMAC_H */
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index b7e528bbecce..dd4ff36c557a 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -165,6 +165,8 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 	case WLAN_CIPHER_SUITE_CCMP_256:
 	case WLAN_CIPHER_SUITE_AES_CMAC:
 	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		break;
@@ -374,6 +376,18 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
 		params.seq = seq;
 		params.seq_len = 6;
 		break;
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+		pn64 = atomic64_read(&key->u.aes_gmac.tx_pn);
+		seq[0] = pn64;
+		seq[1] = pn64 >> 8;
+		seq[2] = pn64 >> 16;
+		seq[3] = pn64 >> 24;
+		seq[4] = pn64 >> 32;
+		seq[5] = pn64 >> 40;
+		params.seq = seq;
+		params.seq_len = 6;
+		break;
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		pn64 = atomic64_read(&key->u.gcmp.tx_pn);
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index d1b60eb014a8..71ac1b5f4da5 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -107,6 +107,13 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf,
 				(u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24),
 				(u8)(pn >> 16), (u8)(pn >> 8), (u8)pn);
 		break;
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+		pn = atomic64_read(&key->u.aes_gmac.tx_pn);
+		len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
+				(u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24),
+				(u8)(pn >> 16), (u8)(pn >> 8), (u8)pn);
+		break;
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		pn = atomic64_read(&key->u.gcmp.tx_pn);
@@ -162,6 +169,15 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
 			       rpn[3], rpn[4], rpn[5]);
 		len = p - buf;
 		break;
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+		rpn = key->u.aes_gmac.rx_pn;
+		p += scnprintf(p, sizeof(buf)+buf-p,
+			       "%02x%02x%02x%02x%02x%02x\n",
+			       rpn[0], rpn[1], rpn[2],
+			       rpn[3], rpn[4], rpn[5]);
+		len = p - buf;
+		break;
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) {
@@ -197,6 +213,11 @@ static ssize_t key_replays_read(struct file *file, char __user *userbuf,
 		len = scnprintf(buf, sizeof(buf), "%u\n",
 				key->u.aes_cmac.replays);
 		break;
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+		len = scnprintf(buf, sizeof(buf), "%u\n",
+				key->u.aes_gmac.replays);
+		break;
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		len = scnprintf(buf, sizeof(buf), "%u\n", key->u.gcmp.replays);
@@ -221,6 +242,11 @@ static ssize_t key_icverrors_read(struct file *file, char __user *userbuf,
 		len = scnprintf(buf, sizeof(buf), "%u\n",
 				key->u.aes_cmac.icverrors);
 		break;
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+		len = scnprintf(buf, sizeof(buf), "%u\n",
+				key->u.aes_gmac.icverrors);
+		break;
 	default:
 		return 0;
 	}
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 7ceea9d9fcd2..0825d76edcfc 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -24,6 +24,7 @@
 #include "debugfs_key.h"
 #include "aes_ccm.h"
 #include "aes_cmac.h"
+#include "aes_gmac.h"
 #include "aes_gcm.h"
 
 
@@ -166,6 +167,8 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 	case WLAN_CIPHER_SUITE_CCMP_256:
 	case WLAN_CIPHER_SUITE_AES_CMAC:
 	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		/* all of these we can do in software - if driver can */
@@ -440,6 +443,25 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
 			return ERR_PTR(err);
 		}
 		break;
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+		key->conf.iv_len = 0;
+		key->conf.icv_len = sizeof(struct ieee80211_mmie_16);
+		if (seq)
+			for (j = 0; j < IEEE80211_GMAC_PN_LEN; j++)
+				key->u.aes_gmac.rx_pn[j] =
+					seq[IEEE80211_GMAC_PN_LEN - j - 1];
+		/* Initialize AES key state here as an optimization so that
+		 * it does not need to be initialized for every packet.
+		 */
+		key->u.aes_gmac.tfm =
+			ieee80211_aes_gmac_key_setup(key_data, key_len);
+		if (IS_ERR(key->u.aes_gmac.tfm)) {
+			err = PTR_ERR(key->u.aes_gmac.tfm);
+			kfree(key);
+			return ERR_PTR(err);
+		}
+		break;
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		key->conf.iv_len = IEEE80211_GCMP_HDR_LEN;
@@ -489,6 +511,10 @@ static void ieee80211_key_free_common(struct ieee80211_key *key)
 	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 		ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm);
 		break;
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+		ieee80211_aes_gmac_key_free(key->u.aes_gmac.tfm);
+		break;
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		ieee80211_aes_gcm_key_free(key->u.gcmp.tfm);
@@ -819,6 +845,16 @@ void ieee80211_get_key_tx_seq(struct ieee80211_key_conf *keyconf,
 		seq->ccmp.pn[1] = pn64 >> 32;
 		seq->ccmp.pn[0] = pn64 >> 40;
 		break;
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+		pn64 = atomic64_read(&key->u.aes_gmac.tx_pn);
+		seq->ccmp.pn[5] = pn64;
+		seq->ccmp.pn[4] = pn64 >> 8;
+		seq->ccmp.pn[3] = pn64 >> 16;
+		seq->ccmp.pn[2] = pn64 >> 24;
+		seq->ccmp.pn[1] = pn64 >> 32;
+		seq->ccmp.pn[0] = pn64 >> 40;
+		break;
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		pn64 = atomic64_read(&key->u.gcmp.tx_pn);
@@ -867,6 +903,13 @@ void ieee80211_get_key_rx_seq(struct ieee80211_key_conf *keyconf,
 		pn = key->u.aes_cmac.rx_pn;
 		memcpy(seq->aes_cmac.pn, pn, IEEE80211_CMAC_PN_LEN);
 		break;
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+		if (WARN_ON(tid != 0))
+			return;
+		pn = key->u.aes_gmac.rx_pn;
+		memcpy(seq->aes_gmac.pn, pn, IEEE80211_GMAC_PN_LEN);
+		break;
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		if (WARN_ON(tid < -1 || tid >= IEEE80211_NUM_TIDS))
@@ -914,6 +957,16 @@ void ieee80211_set_key_tx_seq(struct ieee80211_key_conf *keyconf,
 		       ((u64)seq->aes_cmac.pn[0] << 40);
 		atomic64_set(&key->u.aes_cmac.tx_pn, pn64);
 		break;
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+		pn64 = (u64)seq->aes_gmac.pn[5] |
+		       ((u64)seq->aes_gmac.pn[4] << 8) |
+		       ((u64)seq->aes_gmac.pn[3] << 16) |
+		       ((u64)seq->aes_gmac.pn[2] << 24) |
+		       ((u64)seq->aes_gmac.pn[1] << 32) |
+		       ((u64)seq->aes_gmac.pn[0] << 40);
+		atomic64_set(&key->u.aes_gmac.tx_pn, pn64);
+		break;
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		pn64 = (u64)seq->gcmp.pn[5] |
@@ -963,6 +1016,13 @@ void ieee80211_set_key_rx_seq(struct ieee80211_key_conf *keyconf,
 		pn = key->u.aes_cmac.rx_pn;
 		memcpy(pn, seq->aes_cmac.pn, IEEE80211_CMAC_PN_LEN);
 		break;
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+		if (WARN_ON(tid != 0))
+			return;
+		pn = key->u.aes_gmac.rx_pn;
+		memcpy(pn, seq->aes_gmac.pn, IEEE80211_GMAC_PN_LEN);
+		break;
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		if (WARN_ON(tid < -1 || tid >= IEEE80211_NUM_TIDS))
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index 27580da851c8..d57a9915494f 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -94,6 +94,13 @@ struct ieee80211_key {
 			u32 replays; /* dot11RSNAStatsCMACReplays */
 			u32 icverrors; /* dot11RSNAStatsCMACICVErrors */
 		} aes_cmac;
+		struct {
+			atomic64_t tx_pn;
+			u8 rx_pn[IEEE80211_GMAC_PN_LEN];
+			struct crypto_aead *tfm;
+			u32 replays; /* dot11RSNAStatsCMACReplays */
+			u32 icverrors; /* dot11RSNAStatsCMACICVErrors */
+		} aes_gmac;
 		struct {
 			atomic64_t tx_pn;
 			/* Last received packet number. The first
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 053a17c5023a..5e09d354c5a5 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -673,6 +673,8 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 		/* keep last -- depends on hw flags! */
 		WLAN_CIPHER_SUITE_AES_CMAC,
 		WLAN_CIPHER_SUITE_BIP_CMAC_256,
+		WLAN_CIPHER_SUITE_BIP_GMAC_128,
+		WLAN_CIPHER_SUITE_BIP_GMAC_256,
 	};
 
 	if (local->hw.flags & IEEE80211_HW_SW_CRYPTO_CONTROL ||
@@ -711,7 +713,7 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 		local->hw.wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites);
 
 		if (!have_mfp)
-			local->hw.wiphy->n_cipher_suites -= 2;
+			local->hw.wiphy->n_cipher_suites -= 4;
 
 		if (!have_wep) {
 			local->hw.wiphy->cipher_suites += 2;
@@ -737,9 +739,11 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 		if (have_wep)
 			n_suites += 2;
 
-		/* check if we have AES_CMAC, BIP-CMAC-256 */
+		/* check if we have AES_CMAC, BIP-CMAC-256, BIP-GMAC-128,
+		 * BIP-GMAC-256
+		 */
 		if (have_mfp)
-			n_suites += 2;
+			n_suites += 4;
 
 		suites = kmalloc(sizeof(u32) * n_suites, GFP_KERNEL);
 		if (!suites)
@@ -759,6 +763,8 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 		if (have_mfp) {
 			suites[w++] = WLAN_CIPHER_SUITE_AES_CMAC;
 			suites[w++] = WLAN_CIPHER_SUITE_BIP_CMAC_256;
+			suites[w++] = WLAN_CIPHER_SUITE_BIP_GMAC_128;
+			suites[w++] = WLAN_CIPHER_SUITE_BIP_GMAC_256;
 		}
 
 		for (r = 0; r < local->hw.n_cipher_schemes; r++)
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 93ebc9525478..ed38d8302659 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1671,6 +1671,10 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 		result = ieee80211_crypto_aes_cmac_256_decrypt(rx);
 		break;
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+		result = ieee80211_crypto_aes_gmac_decrypt(rx);
+		break;
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		result = ieee80211_crypto_gcmp_decrypt(rx);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 909c27be1fdc..88a18ffe2975 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -640,6 +640,8 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 			break;
 		case WLAN_CIPHER_SUITE_AES_CMAC:
 		case WLAN_CIPHER_SUITE_BIP_CMAC_256:
+		case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+		case WLAN_CIPHER_SUITE_BIP_GMAC_256:
 			if (!ieee80211_is_mgmt(hdr->frame_control))
 				tx->key = NULL;
 			break;
@@ -1024,6 +1026,9 @@ ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx)
 		return ieee80211_crypto_aes_cmac_encrypt(tx);
 	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 		return ieee80211_crypto_aes_cmac_256_encrypt(tx);
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+		return ieee80211_crypto_aes_gmac_encrypt(tx);
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		return ieee80211_crypto_gcmp_encrypt(tx);
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 549af118de9f..75de6fac40d1 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -22,6 +22,7 @@
 #include "tkip.h"
 #include "aes_ccm.h"
 #include "aes_cmac.h"
+#include "aes_gmac.h"
 #include "aes_gcm.h"
 #include "wpa.h"
 
@@ -1098,6 +1099,110 @@ ieee80211_crypto_aes_cmac_256_decrypt(struct ieee80211_rx_data *rx)
 	return RX_CONTINUE;
 }
 
+ieee80211_tx_result
+ieee80211_crypto_aes_gmac_encrypt(struct ieee80211_tx_data *tx)
+{
+	struct sk_buff *skb;
+	struct ieee80211_tx_info *info;
+	struct ieee80211_key *key = tx->key;
+	struct ieee80211_mmie_16 *mmie;
+	struct ieee80211_hdr *hdr;
+	u8 aad[20];
+	u64 pn64;
+	u8 nonce[12];
+
+	if (WARN_ON(skb_queue_len(&tx->skbs) != 1))
+		return TX_DROP;
+
+	skb = skb_peek(&tx->skbs);
+
+	info = IEEE80211_SKB_CB(skb);
+
+	if (info->control.hw_key)
+		return TX_CONTINUE;
+
+	if (WARN_ON(skb_tailroom(skb) < sizeof(*mmie)))
+		return TX_DROP;
+
+	mmie = (struct ieee80211_mmie_16 *)skb_put(skb, sizeof(*mmie));
+	mmie->element_id = WLAN_EID_MMIE;
+	mmie->length = sizeof(*mmie) - 2;
+	mmie->key_id = cpu_to_le16(key->conf.keyidx);
+
+	/* PN = PN + 1 */
+	pn64 = atomic64_inc_return(&key->u.aes_gmac.tx_pn);
+
+	bip_ipn_set64(mmie->sequence_number, pn64);
+
+	bip_aad(skb, aad);
+
+	hdr = (struct ieee80211_hdr *)skb->data;
+	memcpy(nonce, hdr->addr2, ETH_ALEN);
+	bip_ipn_swap(nonce + ETH_ALEN, mmie->sequence_number);
+
+	/* MIC = AES-GMAC(IGTK, AAD || Management Frame Body || MMIE, 128) */
+	if (ieee80211_aes_gmac(key->u.aes_gmac.tfm, aad, nonce,
+			       skb->data + 24, skb->len - 24, mmie->mic) < 0)
+		return TX_DROP;
+
+	return TX_CONTINUE;
+}
+
+ieee80211_rx_result
+ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx)
+{
+	struct sk_buff *skb = rx->skb;
+	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
+	struct ieee80211_key *key = rx->key;
+	struct ieee80211_mmie_16 *mmie;
+	u8 aad[20], mic[16], ipn[6], nonce[12];
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+
+	if (!ieee80211_is_mgmt(hdr->frame_control))
+		return RX_CONTINUE;
+
+	/* management frames are already linear */
+
+	if (skb->len < 24 + sizeof(*mmie))
+		return RX_DROP_UNUSABLE;
+
+	mmie = (struct ieee80211_mmie_16 *)
+		(skb->data + skb->len - sizeof(*mmie));
+	if (mmie->element_id != WLAN_EID_MMIE ||
+	    mmie->length != sizeof(*mmie) - 2)
+		return RX_DROP_UNUSABLE; /* Invalid MMIE */
+
+	bip_ipn_swap(ipn, mmie->sequence_number);
+
+	if (memcmp(ipn, key->u.aes_gmac.rx_pn, 6) <= 0) {
+		key->u.aes_gmac.replays++;
+		return RX_DROP_UNUSABLE;
+	}
+
+	if (!(status->flag & RX_FLAG_DECRYPTED)) {
+		/* hardware didn't decrypt/verify MIC */
+		bip_aad(skb, aad);
+
+		memcpy(nonce, hdr->addr2, ETH_ALEN);
+		memcpy(nonce + ETH_ALEN, ipn, 6);
+
+		if (ieee80211_aes_gmac(key->u.aes_gmac.tfm, aad, nonce,
+				       skb->data + 24, skb->len - 24,
+				       mic) < 0 ||
+		    memcmp(mic, mmie->mic, sizeof(mmie->mic)) != 0) {
+			key->u.aes_gmac.icverrors++;
+			return RX_DROP_UNUSABLE;
+		}
+	}
+
+	memcpy(key->u.aes_gmac.rx_pn, ipn, 6);
+
+	/* Remove MMIE */
+	skb_trim(skb, skb->len - sizeof(*mmie));
+
+	return RX_CONTINUE;
+}
+
 ieee80211_tx_result
 ieee80211_crypto_hw_encrypt(struct ieee80211_tx_data *tx)
 {
diff --git a/net/mac80211/wpa.h b/net/mac80211/wpa.h
index 06b7f167a176..d98011ee8f55 100644
--- a/net/mac80211/wpa.h
+++ b/net/mac80211/wpa.h
@@ -39,6 +39,10 @@ ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx);
 ieee80211_rx_result
 ieee80211_crypto_aes_cmac_256_decrypt(struct ieee80211_rx_data *rx);
 ieee80211_tx_result
+ieee80211_crypto_aes_gmac_encrypt(struct ieee80211_tx_data *tx);
+ieee80211_rx_result
+ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx);
+ieee80211_tx_result
 ieee80211_crypto_hw_encrypt(struct ieee80211_tx_data *tx);
 ieee80211_rx_result
 ieee80211_crypto_hw_decrypt(struct ieee80211_rx_data *rx);
-- 
cgit v1.2.3


From 06539d3071067ff146a9bffd1c801fa56d290909 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 27 Jan 2015 12:25:33 -0800
Subject: net: don't OOPS on socket aio

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index a2c33a4dc7ba..418795caa897 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -869,9 +869,6 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
 static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
 					 struct sock_iocb *siocb)
 {
-	if (!is_sync_kiocb(iocb))
-		BUG();
-
 	siocb->kiocb = iocb;
 	iocb->private = siocb;
 	return siocb;
-- 
cgit v1.2.3


From fda7a49cb991e9da15f5955d1ea292f8ec74f27a Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Tue, 27 Jan 2015 01:18:11 +0100
Subject: NFC: hci: Change event_received handler gate parameter to pipe

Several pipes may point to the same CLF gate, so getting the gate ID
as an input is not enough.
For example dual secure element may have 2 pipes (1 for uicc and
1 for eSE) pointing to the connectivity gate.

As resolving gate and host IDs can be done from a pipe, we now pass
the pipe ID to the event received handler.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/hci.h | 2 +-
 net/nfc/hci/core.c    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h
index 14bd0e1c47fa..031c0be9fb32 100644
--- a/include/net/nfc/hci.h
+++ b/include/net/nfc/hci.h
@@ -51,7 +51,7 @@ struct nfc_hci_ops {
 	int (*tm_send)(struct nfc_hci_dev *hdev, struct sk_buff *skb);
 	int (*check_presence)(struct nfc_hci_dev *hdev,
 			      struct nfc_target *target);
-	int (*event_received)(struct nfc_hci_dev *hdev, u8 gate, u8 event,
+	int (*event_received)(struct nfc_hci_dev *hdev, u8 pipe, u8 event,
 			      struct sk_buff *skb);
 	int (*fw_download)(struct nfc_hci_dev *hdev, const char *firmware_name);
 	int (*discover_se)(struct nfc_hci_dev *dev);
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index ef50e7716c4a..12a9a4b956d2 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -338,7 +338,7 @@ void nfc_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe, u8 event,
 	}
 
 	if (hdev->ops->event_received) {
-		r = hdev->ops->event_received(hdev, gate, event, skb);
+		r = hdev->ops->event_received(hdev, pipe, event, skb);
 		if (r <= 0)
 			goto exit_noskb;
 	}
-- 
cgit v1.2.3


From 118278f20aa89efe45fa1e2b1829f198d557f8fe Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Tue, 27 Jan 2015 01:18:12 +0100
Subject: NFC: hci: Add pipes table to reference them with a tuple {gate, host}

In order to keep host source information on specific hci event (such as
evt_connectivity or evt_transaction) and because 2 pipes can be connected
to the same gate, it is necessary to add a table referencing every pipe
with a {gate, host} tuple.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/hci.h | 13 ++++++++++++-
 net/nfc/hci/command.c |  6 ++++--
 net/nfc/hci/core.c    | 36 +++++++++++++++++++++++++++++++-----
 3 files changed, 47 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h
index 031c0be9fb32..5570f4a316d1 100644
--- a/include/net/nfc/hci.h
+++ b/include/net/nfc/hci.h
@@ -63,8 +63,10 @@ struct nfc_hci_ops {
 };
 
 /* Pipes */
-#define NFC_HCI_INVALID_PIPE	0x80
 #define NFC_HCI_DO_NOT_CREATE_PIPE	0x81
+#define NFC_HCI_INVALID_PIPE	0x80
+#define NFC_HCI_INVALID_GATE	0xFF
+#define NFC_HCI_INVALID_HOST	0x80
 #define NFC_HCI_LINK_MGMT_PIPE	0x00
 #define NFC_HCI_ADMIN_PIPE	0x01
 
@@ -73,7 +75,13 @@ struct nfc_hci_gate {
 	u8 pipe;
 };
 
+struct nfc_hci_pipe {
+	u8 gate;
+	u8 dest_host;
+};
+
 #define NFC_HCI_MAX_CUSTOM_GATES	50
+#define NFC_HCI_MAX_PIPES		127
 struct nfc_hci_init_data {
 	u8 gate_count;
 	struct nfc_hci_gate gates[NFC_HCI_MAX_CUSTOM_GATES];
@@ -125,6 +133,7 @@ struct nfc_hci_dev {
 	void *clientdata;
 
 	u8 gate2pipe[NFC_HCI_MAX_GATES];
+	struct nfc_hci_pipe pipes[NFC_HCI_MAX_PIPES];
 
 	u8 sw_romlib;
 	u8 sw_patch;
@@ -167,6 +176,8 @@ void *nfc_hci_get_clientdata(struct nfc_hci_dev *hdev);
 void nfc_hci_driver_failure(struct nfc_hci_dev *hdev, int err);
 
 int nfc_hci_result_to_errno(u8 result);
+void nfc_hci_reset_pipes(struct nfc_hci_dev *dev);
+void nfc_hci_reset_pipes_per_host(struct nfc_hci_dev *hdev, u8 host);
 
 /* Host IDs */
 #define NFC_HCI_HOST_CONTROLLER_ID	0x00
diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c
index 91df487aa0a9..9acf586c98d4 100644
--- a/net/nfc/hci/command.c
+++ b/net/nfc/hci/command.c
@@ -331,7 +331,7 @@ int nfc_hci_disconnect_all_gates(struct nfc_hci_dev *hdev)
 	if (r < 0)
 		return r;
 
-	memset(hdev->gate2pipe, NFC_HCI_INVALID_PIPE, sizeof(hdev->gate2pipe));
+	nfc_hci_reset_pipes(hdev);
 
 	return 0;
 }
@@ -345,7 +345,7 @@ int nfc_hci_connect_gate(struct nfc_hci_dev *hdev, u8 dest_host, u8 dest_gate,
 
 	pr_debug("\n");
 
-	if (hdev->gate2pipe[dest_gate] == NFC_HCI_DO_NOT_CREATE_PIPE)
+	if (pipe == NFC_HCI_DO_NOT_CREATE_PIPE)
 		return 0;
 
 	if (hdev->gate2pipe[dest_gate] != NFC_HCI_INVALID_PIPE)
@@ -380,6 +380,8 @@ open_pipe:
 		return r;
 	}
 
+	hdev->pipes[pipe].gate = dest_gate;
+	hdev->pipes[pipe].dest_host = dest_host;
 	hdev->gate2pipe[dest_gate] = pipe;
 
 	return 0;
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index 12a9a4b956d2..8f8abfed7f65 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -46,6 +46,32 @@ int nfc_hci_result_to_errno(u8 result)
 }
 EXPORT_SYMBOL(nfc_hci_result_to_errno);
 
+void nfc_hci_reset_pipes(struct nfc_hci_dev *hdev)
+{
+	int i = 0;
+
+	for (i = 0; i < NFC_HCI_MAX_PIPES; i++) {
+		hdev->pipes[i].gate = NFC_HCI_INVALID_GATE;
+		hdev->pipes[i].dest_host = NFC_HCI_INVALID_HOST;
+	}
+	memset(hdev->gate2pipe, NFC_HCI_INVALID_PIPE, sizeof(hdev->gate2pipe));
+}
+EXPORT_SYMBOL(nfc_hci_reset_pipes);
+
+void nfc_hci_reset_pipes_per_host(struct nfc_hci_dev *hdev, u8 host)
+{
+	int i = 0;
+
+	for (i = 0; i < NFC_HCI_MAX_PIPES; i++) {
+		if (hdev->pipes[i].dest_host != host)
+			continue;
+
+		hdev->pipes[i].gate = NFC_HCI_INVALID_GATE;
+		hdev->pipes[i].dest_host = NFC_HCI_INVALID_HOST;
+	}
+}
+EXPORT_SYMBOL(nfc_hci_reset_pipes_per_host);
+
 static void nfc_hci_msg_tx_work(struct work_struct *work)
 {
 	struct nfc_hci_dev *hdev = container_of(work, struct nfc_hci_dev,
@@ -168,7 +194,7 @@ void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
 			  struct sk_buff *skb)
 {
 	int r = 0;
-	u8 gate = nfc_hci_pipe2gate(hdev, pipe);
+	u8 gate = hdev->pipes[pipe].gate;
 	u8 local_gate, new_pipe;
 	u8 gate_opened = 0x00;
 
@@ -330,9 +356,9 @@ void nfc_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe, u8 event,
 			    struct sk_buff *skb)
 {
 	int r = 0;
-	u8 gate = nfc_hci_pipe2gate(hdev, pipe);
+	u8 gate = hdev->pipes[pipe].gate;
 
-	if (gate == 0xff) {
+	if (gate == NFC_HCI_INVALID_GATE) {
 		pr_err("Discarded event %x to unopened pipe %x\n", event, pipe);
 		goto exit;
 	}
@@ -573,7 +599,7 @@ static int hci_dev_down(struct nfc_dev *nfc_dev)
 	if (hdev->ops->close)
 		hdev->ops->close(hdev);
 
-	memset(hdev->gate2pipe, NFC_HCI_INVALID_PIPE, sizeof(hdev->gate2pipe));
+	nfc_hci_reset_pipes(hdev);
 
 	return 0;
 }
@@ -932,7 +958,7 @@ struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops,
 
 	nfc_set_drvdata(hdev->ndev, hdev);
 
-	memset(hdev->gate2pipe, NFC_HCI_INVALID_PIPE, sizeof(hdev->gate2pipe));
+	nfc_hci_reset_pipes(hdev);
 
 	hdev->quirks = quirks;
 
-- 
cgit v1.2.3


From af77522320aa0e5b4b52dce615ad067d92e15fbf Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Tue, 27 Jan 2015 01:18:13 +0100
Subject: NFC: hci: Change nfc_hci_send_response gate parameter to pipe

As there can be several pipes connected to the same gate, we need
to know which pipe ID to use when sending an HCI response. A gate
ID is not enough.

Instead of changing the nfc_hci_send_response() API to something
not aligned with the rest of the HCI API, we call nfc_hci_hcp_message_tx
directly.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/hci.h |  2 --
 net/nfc/hci/command.c | 17 -----------------
 net/nfc/hci/core.c    | 12 ++++++++----
 3 files changed, 8 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h
index 5570f4a316d1..1d1fd2b98f1e 100644
--- a/include/net/nfc/hci.h
+++ b/include/net/nfc/hci.h
@@ -260,8 +260,6 @@ int nfc_hci_send_cmd(struct nfc_hci_dev *hdev, u8 gate, u8 cmd,
 int nfc_hci_send_cmd_async(struct nfc_hci_dev *hdev, u8 gate, u8 cmd,
 			   const u8 *param, size_t param_len,
 			   data_exchange_cb_t cb, void *cb_context);
-int nfc_hci_send_response(struct nfc_hci_dev *hdev, u8 gate, u8 response,
-			  const u8 *param, size_t param_len);
 int nfc_hci_send_event(struct nfc_hci_dev *hdev, u8 gate, u8 event,
 		       const u8 *param, size_t param_len);
 int nfc_hci_target_discovered(struct nfc_hci_dev *hdev, u8 gate);
diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c
index 9acf586c98d4..844673cb7c18 100644
--- a/net/nfc/hci/command.c
+++ b/net/nfc/hci/command.c
@@ -116,23 +116,6 @@ int nfc_hci_send_event(struct nfc_hci_dev *hdev, u8 gate, u8 event,
 }
 EXPORT_SYMBOL(nfc_hci_send_event);
 
-int nfc_hci_send_response(struct nfc_hci_dev *hdev, u8 gate, u8 response,
-			  const u8 *param, size_t param_len)
-{
-	u8 pipe;
-
-	pr_debug("\n");
-
-	pipe = hdev->gate2pipe[gate];
-	if (pipe == NFC_HCI_INVALID_PIPE)
-		return -EADDRNOTAVAIL;
-
-	return nfc_hci_hcp_message_tx(hdev, pipe, NFC_HCI_HCP_RESPONSE,
-				      response, param, param_len, NULL, NULL,
-				      0);
-}
-EXPORT_SYMBOL(nfc_hci_send_response);
-
 /*
  * Execute an hci command sent to gate.
  * skb will contain response data if success. skb can be NULL if you are not
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index 8f8abfed7f65..e351e94f8d40 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -209,7 +209,8 @@ void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
 
 		local_gate = skb->data[3];
 		new_pipe = skb->data[4];
-		nfc_hci_send_response(hdev, gate, NFC_HCI_ANY_OK, NULL, 0);
+		nfc_hci_hcp_message_tx(hdev, pipe, NFC_HCI_HCP_RESPONSE,
+				       NFC_HCI_ANY_OK, NULL, 0, NULL, NULL, 0);
 
 		/* save the new created pipe and bind with local gate,
 		 * the description for skb->data[3] is destination gate id
@@ -223,11 +224,14 @@ void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
 		 * open it
 		 */
 		if (gate != 0xff)
-			nfc_hci_send_response(hdev, gate, NFC_HCI_ANY_OK,
-					      &gate_opened, 1);
+			nfc_hci_hcp_message_tx(hdev, pipe, NFC_HCI_HCP_RESPONSE,
+					       NFC_HCI_ANY_OK, &gate_opened, 1,
+					       NULL, NULL, 0);
 		break;
 	case NFC_HCI_ADM_NOTIFY_ALL_PIPE_CLEARED:
-		nfc_hci_send_response(hdev, gate, NFC_HCI_ANY_OK, NULL, 0);
+		nfc_hci_hcp_message_tx(hdev, pipe, NFC_HCI_HCP_RESPONSE,
+				       NFC_HCI_ANY_OK, NULL, 0, NULL, NULL, 0);
+
 		break;
 	default:
 		pr_info("Discarded unknown cmd %x to gate %x\n", cmd, gate);
-- 
cgit v1.2.3


From 615b524aca0bff52ce6654ddf26546546eb02e93 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Tue, 27 Jan 2015 01:18:14 +0100
Subject: NFC: hci: Reference every pipe information according to notification

We update the tracked pipes status when receiving HCI commands.
Also we forward HCI errors and we reply to any HCI command, even though
we don't support it.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 net/nfc/hci/core.c | 58 +++++++++++++++++++++++++++++++++---------------------
 net/nfc/hci/hci.h  |  8 ++++++++
 2 files changed, 44 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index e351e94f8d40..a664a67dff1c 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -193,52 +193,66 @@ exit:
 void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
 			  struct sk_buff *skb)
 {
-	int r = 0;
 	u8 gate = hdev->pipes[pipe].gate;
-	u8 local_gate, new_pipe;
-	u8 gate_opened = 0x00;
+	u8 status = NFC_HCI_ANY_OK;
+	struct hci_create_pipe_resp *create_info;
+	struct hci_delete_pipe_noti *delete_info;
+	struct hci_all_pipe_cleared_noti *cleared_info;
 
 	pr_debug("from gate %x pipe %x cmd %x\n", gate, pipe, cmd);
 
 	switch (cmd) {
 	case NFC_HCI_ADM_NOTIFY_PIPE_CREATED:
 		if (skb->len != 5) {
-			r = -EPROTO;
-			break;
+			status = NFC_HCI_ANY_E_NOK;
+			goto exit;
 		}
+		create_info = (struct hci_create_pipe_resp *)skb->data;
 
-		local_gate = skb->data[3];
-		new_pipe = skb->data[4];
-		nfc_hci_hcp_message_tx(hdev, pipe, NFC_HCI_HCP_RESPONSE,
-				       NFC_HCI_ANY_OK, NULL, 0, NULL, NULL, 0);
-
-		/* save the new created pipe and bind with local gate,
+		/* Save the new created pipe and bind with local gate,
 		 * the description for skb->data[3] is destination gate id
 		 * but since we received this cmd from host controller, we
 		 * are the destination and it is our local gate
 		 */
-		hdev->gate2pipe[local_gate] = new_pipe;
+		hdev->gate2pipe[create_info->dest_gate] = create_info->pipe;
+		hdev->pipes[create_info->pipe].gate = create_info->dest_gate;
+		hdev->pipes[create_info->pipe].dest_host =
+							create_info->src_host;
 		break;
 	case NFC_HCI_ANY_OPEN_PIPE:
-		/* if the pipe is already created, we allow remote host to
-		 * open it
-		 */
-		if (gate != 0xff)
-			nfc_hci_hcp_message_tx(hdev, pipe, NFC_HCI_HCP_RESPONSE,
-					       NFC_HCI_ANY_OK, &gate_opened, 1,
-					       NULL, NULL, 0);
+		if (gate == NFC_HCI_INVALID_GATE) {
+			status = NFC_HCI_ANY_E_NOK;
+			goto exit;
+		}
+		break;
+	case NFC_HCI_ADM_NOTIFY_PIPE_DELETED:
+		if (skb->len != 1) {
+			status = NFC_HCI_ANY_E_NOK;
+			goto exit;
+		}
+		delete_info = (struct hci_delete_pipe_noti *)skb->data;
+
+		hdev->pipes[delete_info->pipe].gate = NFC_HCI_INVALID_GATE;
+		hdev->pipes[delete_info->pipe].dest_host = NFC_HCI_INVALID_HOST;
 		break;
 	case NFC_HCI_ADM_NOTIFY_ALL_PIPE_CLEARED:
-		nfc_hci_hcp_message_tx(hdev, pipe, NFC_HCI_HCP_RESPONSE,
-				       NFC_HCI_ANY_OK, NULL, 0, NULL, NULL, 0);
+		if (skb->len != 1) {
+			status = NFC_HCI_ANY_E_NOK;
+			goto exit;
+		}
+		cleared_info = (struct hci_all_pipe_cleared_noti *)skb->data;
 
+		nfc_hci_reset_pipes_per_host(hdev, cleared_info->host);
 		break;
 	default:
 		pr_info("Discarded unknown cmd %x to gate %x\n", cmd, gate);
-		r = -EINVAL;
 		break;
 	}
 
+exit:
+	nfc_hci_hcp_message_tx(hdev, pipe, NFC_HCI_HCP_RESPONSE,
+			       status, NULL, 0, NULL, NULL, 0);
+
 	kfree_skb(skb);
 }
 
diff --git a/net/nfc/hci/hci.h b/net/nfc/hci/hci.h
index c3d2e2c1394c..c1278bd0fc5e 100644
--- a/net/nfc/hci/hci.h
+++ b/net/nfc/hci/hci.h
@@ -65,6 +65,14 @@ struct hci_create_pipe_resp {
 	u8 pipe;
 } __packed;
 
+struct hci_delete_pipe_noti {
+	u8 pipe;
+} __packed;
+
+struct hci_all_pipe_cleared_noti {
+	u8 host;
+} __packed;
+
 #define NFC_HCI_FRAGMENT	0x7f
 
 #define HCP_HEADER(type, instr) ((((type) & 0x03) << 6) | ((instr) & 0x3f))
-- 
cgit v1.2.3


From 8409e4283c1ca62ce107564de7ff93b4dd476d41 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Tue, 27 Jan 2015 01:18:15 +0100
Subject: NFC: hci: Add cmd_received handler

When a command is received, it is sometime needed to let the CLF driver do
some additional operations. (ex: count remaining pipe notification...)

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/hci.h | 8 ++++++++
 net/nfc/hci/core.c    | 3 +++
 2 files changed, 11 insertions(+)

(limited to 'net')

diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h
index 1d1fd2b98f1e..ab672b537dd4 100644
--- a/include/net/nfc/hci.h
+++ b/include/net/nfc/hci.h
@@ -53,6 +53,8 @@ struct nfc_hci_ops {
 			      struct nfc_target *target);
 	int (*event_received)(struct nfc_hci_dev *hdev, u8 pipe, u8 event,
 			      struct sk_buff *skb);
+	void (*cmd_received)(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
+			    struct sk_buff *skb);
 	int (*fw_download)(struct nfc_hci_dev *hdev, const char *firmware_name);
 	int (*discover_se)(struct nfc_hci_dev *dev);
 	int (*enable_se)(struct nfc_hci_dev *dev, u32 se_idx);
@@ -230,6 +232,12 @@ void nfc_hci_reset_pipes_per_host(struct nfc_hci_dev *hdev, u8 host);
 #define NFC_HCI_EVT_POST_DATA			0x02
 #define NFC_HCI_EVT_HOT_PLUG			0x03
 
+/* Generic commands */
+#define NFC_HCI_ANY_SET_PARAMETER	0x01
+#define NFC_HCI_ANY_GET_PARAMETER	0x02
+#define NFC_HCI_ANY_OPEN_PIPE		0x03
+#define NFC_HCI_ANY_CLOSE_PIPE		0x04
+
 /* Reader RF gates events */
 #define NFC_HCI_EVT_READER_REQUESTED	0x10
 #define NFC_HCI_EVT_END_OPERATION	0x11
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index a664a67dff1c..6e061da2258a 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -249,6 +249,9 @@ void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
 		break;
 	}
 
+	if (hdev->ops->cmd_received)
+		hdev->ops->cmd_received(hdev, pipe, cmd, skb);
+
 exit:
 	nfc_hci_hcp_message_tx(hdev, pipe, NFC_HCI_HCP_RESPONSE,
 			       status, NULL, 0, NULL, NULL, 0);
-- 
cgit v1.2.3


From ec14b6c93c2f804f302b8ea1736539d39b9c544b Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Tue, 27 Jan 2015 01:18:18 +0100
Subject: NFC: hci: Remove nfc_hci_pipe2gate function

With the newly introduced pipes table hci_dev fields,
the nfc_hci_pipe2gate routine is no longer needed.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 net/nfc/hci/hci.h |  2 --
 net/nfc/hci/hcp.c | 11 -----------
 2 files changed, 13 deletions(-)

(limited to 'net')

diff --git a/net/nfc/hci/hci.h b/net/nfc/hci/hci.h
index c1278bd0fc5e..ab4c8e80b1ad 100644
--- a/net/nfc/hci/hci.h
+++ b/net/nfc/hci/hci.h
@@ -85,8 +85,6 @@ int nfc_hci_hcp_message_tx(struct nfc_hci_dev *hdev, u8 pipe,
 			   data_exchange_cb_t cb, void *cb_context,
 			   unsigned long completion_delay);
 
-u8 nfc_hci_pipe2gate(struct nfc_hci_dev *hdev, u8 pipe);
-
 void nfc_hci_hcp_message_rx(struct nfc_hci_dev *hdev, u8 pipe, u8 type,
 			    u8 instruction, struct sk_buff *skb);
 
diff --git a/net/nfc/hci/hcp.c b/net/nfc/hci/hcp.c
index e9de1514656e..1fe725d66085 100644
--- a/net/nfc/hci/hcp.c
+++ b/net/nfc/hci/hcp.c
@@ -124,17 +124,6 @@ out_skb_err:
 	return err;
 }
 
-u8 nfc_hci_pipe2gate(struct nfc_hci_dev *hdev, u8 pipe)
-{
-	int gate;
-
-	for (gate = 0; gate < NFC_HCI_MAX_GATES; gate++)
-		if (hdev->gate2pipe[gate] == pipe)
-			return gate;
-
-	return 0xff;
-}
-
 /*
  * Receive hcp message for pipe, with type and cmd.
  * skb contains optional message data only.
-- 
cgit v1.2.3


From d25b78e2ed60ea33dc1008d0f50b3410b73212a0 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Tue, 27 Jan 2015 12:55:52 +0200
Subject: Bluetooth: Enforce zero-valued hash/rand192 for LE OOB

Until legacy SMP OOB pairing is implemented user space should be given a
clear error when trying to use it. This patch adds a corresponding check
to the Add Remote OOB Data handler function which returns "invalid
parameters" if non-zero Rand192 or Hash192 parameters were given for an
LE address.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/mgmt.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 25e40e82b9a2..8417ab387d1a 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -131,6 +131,9 @@ static const u16 mgmt_events[] = {
 
 #define CACHE_TIMEOUT	msecs_to_jiffies(2 * 1000)
 
+#define ZERO_KEY "\x00\x00\x00\x00\x00\x00\x00\x00" \
+		 "\x00\x00\x00\x00\x00\x00\x00\x00"
+
 struct pending_cmd {
 	struct list_head list;
 	u16 opcode;
@@ -3673,6 +3676,18 @@ static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev,
 		u8 status;
 
 		if (bdaddr_type_is_le(cp->addr.type)) {
+			/* Enforce zero-valued 192-bit parameters as
+			 * long as legacy SMP OOB isn't implemented.
+			 */
+			if (memcmp(cp->rand192, ZERO_KEY, 16) ||
+			    memcmp(cp->hash192, ZERO_KEY, 16)) {
+				err = cmd_complete(sk, hdev->id,
+						   MGMT_OP_ADD_REMOTE_OOB_DATA,
+						   MGMT_STATUS_INVALID_PARAMS,
+						   addr, sizeof(*addr));
+				goto unlock;
+			}
+
 			rand192 = NULL;
 			hash192 = NULL;
 		} else {
-- 
cgit v1.2.3


From 6665d057fba3376351346abe7bd1ebcaafc48a34 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Tue, 27 Jan 2015 16:04:31 -0800
Subject: Bluetooth: Clear P-192 values for OOB when in Secure Connections Only
 mode

When Secure Connections Only mode has been enabled and remote OOB data
is requested, then only provide P-256 hash and randomizer vaulues. The
fields for P-192 hash and randomizer should be set to zero.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_event.c | 46 ++++++++++++++++++++++++++--------------------
 1 file changed, 26 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index e2b81adc232f..0850cad8c368 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -4163,33 +4163,39 @@ static void hci_remote_oob_data_request_evt(struct hci_dev *hdev,
 		goto unlock;
 
 	data = hci_find_remote_oob_data(hdev, &ev->bdaddr, BDADDR_BREDR);
-	if (data) {
-		if (bredr_sc_enabled(hdev)) {
-			struct hci_cp_remote_oob_ext_data_reply cp;
-
-			bacpy(&cp.bdaddr, &ev->bdaddr);
-			memcpy(cp.hash192, data->hash192, sizeof(cp.hash192));
-			memcpy(cp.rand192, data->rand192, sizeof(cp.rand192));
-			memcpy(cp.hash256, data->hash256, sizeof(cp.hash256));
-			memcpy(cp.rand256, data->rand256, sizeof(cp.rand256));
+	if (!data) {
+		struct hci_cp_remote_oob_data_neg_reply cp;
 
-			hci_send_cmd(hdev, HCI_OP_REMOTE_OOB_EXT_DATA_REPLY,
-				     sizeof(cp), &cp);
-		} else {
-			struct hci_cp_remote_oob_data_reply cp;
+		bacpy(&cp.bdaddr, &ev->bdaddr);
+		hci_send_cmd(hdev, HCI_OP_REMOTE_OOB_DATA_NEG_REPLY,
+			     sizeof(cp), &cp);
+		goto unlock;
+	}
 
-			bacpy(&cp.bdaddr, &ev->bdaddr);
-			memcpy(cp.hash, data->hash192, sizeof(cp.hash));
-			memcpy(cp.rand, data->rand192, sizeof(cp.rand));
+	if (bredr_sc_enabled(hdev)) {
+		struct hci_cp_remote_oob_ext_data_reply cp;
 
-			hci_send_cmd(hdev, HCI_OP_REMOTE_OOB_DATA_REPLY,
-				     sizeof(cp), &cp);
+		bacpy(&cp.bdaddr, &ev->bdaddr);
+		if (test_bit(HCI_SC_ONLY, &hdev->dev_flags)) {
+			memset(cp.hash192, 0, sizeof(cp.hash192));
+			memset(cp.rand192, 0, sizeof(cp.rand192));
+		} else {
+			memcpy(cp.hash192, data->hash192, sizeof(cp.hash192));
+			memcpy(cp.rand192, data->rand192, sizeof(cp.rand192));
 		}
+		memcpy(cp.hash256, data->hash256, sizeof(cp.hash256));
+		memcpy(cp.rand256, data->rand256, sizeof(cp.rand256));
+
+		hci_send_cmd(hdev, HCI_OP_REMOTE_OOB_EXT_DATA_REPLY,
+			     sizeof(cp), &cp);
 	} else {
-		struct hci_cp_remote_oob_data_neg_reply cp;
+		struct hci_cp_remote_oob_data_reply cp;
 
 		bacpy(&cp.bdaddr, &ev->bdaddr);
-		hci_send_cmd(hdev, HCI_OP_REMOTE_OOB_DATA_NEG_REPLY,
+		memcpy(cp.hash, data->hash192, sizeof(cp.hash));
+		memcpy(cp.rand, data->rand192, sizeof(cp.rand));
+
+		hci_send_cmd(hdev, HCI_OP_REMOTE_OOB_DATA_REPLY,
 			     sizeof(cp), &cp);
 	}
 
-- 
cgit v1.2.3


From a83ed81ef5881445d96120bea2032f72884fe038 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Tue, 27 Jan 2015 16:04:32 -0800
Subject: Bluetooth: Use helper function to determine BR/EDR OOB data present

When replying to the IO capability request for Secure Simple Pairing and
Secure Connections, the OOB data present fields needs to set. Instead of
making the calculation inline, split this into a separate helper
function.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_event.c | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 0850cad8c368..8ed8516b18c7 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -3855,6 +3855,21 @@ static u8 hci_get_auth_req(struct hci_conn *conn)
 	return (conn->remote_auth & ~0x01) | (conn->auth_type & 0x01);
 }
 
+static u8 bredr_oob_data_present(struct hci_conn *conn)
+{
+	struct hci_dev *hdev = conn->hdev;
+	struct oob_data *data;
+
+	data = hci_find_remote_oob_data(hdev, &conn->dst, BDADDR_BREDR);
+	if (!data)
+		return 0x00;
+
+	if (conn->out || test_bit(HCI_CONN_REMOTE_OOB, &conn->flags))
+		return 0x01;
+
+	return 0x00;
+}
+
 static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_ev_io_capa_request *ev = (void *) skb->data;
@@ -3906,12 +3921,7 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
 			conn->auth_type &= HCI_AT_NO_BONDING_MITM;
 
 		cp.authentication = conn->auth_type;
-
-		if (hci_find_remote_oob_data(hdev, &conn->dst, BDADDR_BREDR) &&
-		    (conn->out || test_bit(HCI_CONN_REMOTE_OOB, &conn->flags)))
-			cp.oob_data = 0x01;
-		else
-			cp.oob_data = 0x00;
+		cp.oob_data = bredr_oob_data_present(conn);
 
 		hci_send_cmd(hdev, HCI_OP_IO_CAPABILITY_REPLY,
 			     sizeof(cp), &cp);
-- 
cgit v1.2.3


From aa5b03456500b57ab30313affa822d4cd90e2ab2 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Tue, 27 Jan 2015 16:04:33 -0800
Subject: Bluetooth: Check for P-256 OOB values in Secure Connections Only mode

If Secure Connections Only mode has been enabled, the it is important
to check that OOB data for P-256 values is provided. In case it is not,
then tell the remote side that no OOB data is present.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_event.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 8ed8516b18c7..00c160634e7b 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -36,6 +36,9 @@
 #include "amp.h"
 #include "smp.h"
 
+#define ZERO_KEY "\x00\x00\x00\x00\x00\x00\x00\x00" \
+		 "\x00\x00\x00\x00\x00\x00\x00\x00"
+
 /* Handle HCI Event packets */
 
 static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb)
@@ -3864,6 +3867,16 @@ static u8 bredr_oob_data_present(struct hci_conn *conn)
 	if (!data)
 		return 0x00;
 
+	/* When Secure Connections Only mode is enabled, then the P-256
+	 * values are required. If they are not available, then do not
+	 * declare that OOB data is present.
+	 */
+	if (bredr_sc_enabled(hdev) &&
+	    test_bit(HCI_SC_ONLY, &hdev->dev_flags) &&
+	    (!memcmp(data->rand256, ZERO_KEY, 16) ||
+	     !memcmp(data->hash256, ZERO_KEY, 16)))
+		return 0x00;
+
 	if (conn->out || test_bit(HCI_CONN_REMOTE_OOB, &conn->flags))
 		return 0x01;
 
-- 
cgit v1.2.3


From 592002863a03bd172a19d04854c5ed415693911f Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Wed, 28 Jan 2015 19:56:00 +0200
Subject: Bluetooth: Fix check for SSP when enabling SC

There's a check in set_secure_conn() that's supposed to ensure that SSP
is enabled before we try to request the controller to enable SC (since
SSP is a pre-requisite for it). However, this check only makes sense for
controllers actually supporting BR/EDR SC. If we have a 4.0 controller
we're only interested in the LE part of SC and should therefore not be
requiring SSP to be enabled. This patch adds an additional condition to
check for lmp_sc_capable(hdev) before requiring SSP to be enabled.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/mgmt.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 8417ab387d1a..78939e0ed1f4 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -4818,6 +4818,7 @@ static int set_secure_conn(struct sock *sk, struct hci_dev *hdev,
 				  MGMT_STATUS_NOT_SUPPORTED);
 
 	if (test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) &&
+	    lmp_sc_capable(hdev) &&
 	    !test_bit(HCI_SSP_ENABLED, &hdev->dev_flags))
 		return cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN,
 				  MGMT_STATUS_REJECTED);
-- 
cgit v1.2.3


From 39c5d970d4d1302d3407e0471c55520dd5537069 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Wed, 28 Jan 2015 19:56:01 +0200
Subject: Bluetooth: Fix notifying discovery state upon reset

When HCI_Reset is issued the discovery state is assumed to be stopped.
The hci_cc_reset() handler was trying to set the state but it was doing
it without using the hci_discovery_set_state() function. Because of this
e.g. the mgmt Discovering event could go without being sent. This patch
fixes the code to use the hci_discovery_set_state() function instead of
just blindly setting the state value.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_event.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 00c160634e7b..4175470ff48e 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -200,7 +200,8 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb)
 	/* Reset all non-persistent flags */
 	hdev->dev_flags &= ~HCI_PERSISTENT_MASK;
 
-	hdev->discovery.state = DISCOVERY_STOPPED;
+	hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
+
 	hdev->inq_tx_power = HCI_TX_POWER_INVALID;
 	hdev->adv_tx_power = HCI_TX_POWER_INVALID;
 
-- 
cgit v1.2.3


From 8f502f847a67d00412382aeda0a2fa412b04e2a7 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Wed, 28 Jan 2015 19:56:02 +0200
Subject: Bluetooth: Fix notifying discovery state when powering off

The discovery state should be set to stopped when the HCI device is
powered off. This patch adds the appropriate call to the
hci_discovery_set_state() function from hci_dev_do_close() which is
responsible for the power-off procedure.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_core.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index bb831d678868..46aa702c189d 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1628,6 +1628,8 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 
 	hci_dev_lock(hdev);
 
+	hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
+
 	if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) {
 		if (hdev->dev_type == HCI_BREDR)
 			mgmt_powered(hdev, 0);
-- 
cgit v1.2.3


From 5c912495b7a8ab6adae877979abfffba4340e06c Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 28 Jan 2015 11:53:05 -0800
Subject: Bluetooth: Introduce hci_dev_do_reset helper function

Split the hci_dev_reset ioctl handling into using hci_dev_do_reset
helper function. Similar to what has been done with hci_dev_do_open
and hci_dev_do_close.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_core.c | 57 +++++++++++++++++++++++++++++-------------------
 1 file changed, 34 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 46aa702c189d..d4c9152474a9 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1719,32 +1719,14 @@ done:
 	return err;
 }
 
-int hci_dev_reset(__u16 dev)
+static int hci_dev_do_reset(struct hci_dev *hdev)
 {
-	struct hci_dev *hdev;
-	int ret = 0;
+	int ret;
 
-	hdev = hci_dev_get(dev);
-	if (!hdev)
-		return -ENODEV;
+	BT_DBG("%s %p", hdev->name, hdev);
 
 	hci_req_lock(hdev);
 
-	if (!test_bit(HCI_UP, &hdev->flags)) {
-		ret = -ENETDOWN;
-		goto done;
-	}
-
-	if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) {
-		ret = -EBUSY;
-		goto done;
-	}
-
-	if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) {
-		ret = -EOPNOTSUPP;
-		goto done;
-	}
-
 	/* Drop queues */
 	skb_queue_purge(&hdev->rx_q);
 	skb_queue_purge(&hdev->cmd_q);
@@ -1767,12 +1749,41 @@ int hci_dev_reset(__u16 dev)
 
 	ret = __hci_req_sync(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT);
 
-done:
 	hci_req_unlock(hdev);
-	hci_dev_put(hdev);
 	return ret;
 }
 
+int hci_dev_reset(__u16 dev)
+{
+	struct hci_dev *hdev;
+	int err;
+
+	hdev = hci_dev_get(dev);
+	if (!hdev)
+		return -ENODEV;
+
+	if (!test_bit(HCI_UP, &hdev->flags)) {
+		err = -ENETDOWN;
+		goto done;
+	}
+
+	if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) {
+		err = -EBUSY;
+		goto done;
+	}
+
+	if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) {
+		err = -EOPNOTSUPP;
+		goto done;
+	}
+
+	err = hci_dev_do_reset(hdev);
+
+done:
+	hci_dev_put(hdev);
+	return err;
+}
+
 int hci_dev_reset_stat(__u16 dev)
 {
 	struct hci_dev *hdev;
-- 
cgit v1.2.3


From c7741d16a57cbf97eebe53f27e8216b1ff20e20c Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 28 Jan 2015 11:09:55 -0800
Subject: Bluetooth: Perform a power cycle when receiving hardware error event

When receiving a HCI Hardware Error event, the controller should be
assumed to be non-functional until issuing a HCI Reset command.

The Bluetooth hardware errors are vendor specific and so add a
new hdev->hw_error callback that drivers can provide to run extra
code to handle the hardware error.

After completing the vendor specific error handling perform a full
reset of the Bluetooth stack by closing and re-opening the transport.

Based-on-patch-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h |  3 +++
 net/bluetooth/hci_core.c         | 21 +++++++++++++++++++++
 net/bluetooth/hci_event.c        |  4 +++-
 3 files changed, 27 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 0f5e59f1e3cb..1780f1681ecf 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -232,6 +232,7 @@ struct hci_dev {
 	__u16		conn_info_min_age;
 	__u16		conn_info_max_age;
 	__u8		ssp_debug_mode;
+	__u8		hw_error_code;
 	__u32		clock;
 
 	__u16		devid_source;
@@ -293,6 +294,7 @@ struct hci_dev {
 
 	struct work_struct	power_on;
 	struct delayed_work	power_off;
+	struct work_struct	error_reset;
 
 	__u16			discov_timeout;
 	struct delayed_work	discov_off;
@@ -369,6 +371,7 @@ struct hci_dev {
 	int (*setup)(struct hci_dev *hdev);
 	int (*send)(struct hci_dev *hdev, struct sk_buff *skb);
 	void (*notify)(struct hci_dev *hdev, unsigned int evt);
+	void (*hw_error)(struct hci_dev *hdev, u8 code);
 	int (*set_bdaddr)(struct hci_dev *hdev, const bdaddr_t *bdaddr);
 };
 
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index d4c9152474a9..79693a9ef4eb 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2151,6 +2151,26 @@ static void hci_power_off(struct work_struct *work)
 	smp_unregister(hdev);
 }
 
+static void hci_error_reset(struct work_struct *work)
+{
+	struct hci_dev *hdev = container_of(work, struct hci_dev, error_reset);
+
+	BT_DBG("%s", hdev->name);
+
+	if (hdev->hw_error)
+		hdev->hw_error(hdev, hdev->hw_error_code);
+	else
+		BT_ERR("%s hardware error 0x%2.2x", hdev->name,
+		       hdev->hw_error_code);
+
+	if (hci_dev_do_close(hdev))
+		return;
+
+	smp_unregister(hdev);
+
+	hci_dev_do_open(hdev);
+}
+
 static void hci_discov_off(struct work_struct *work)
 {
 	struct hci_dev *hdev;
@@ -2943,6 +2963,7 @@ struct hci_dev *hci_alloc_dev(void)
 	INIT_WORK(&hdev->cmd_work, hci_cmd_work);
 	INIT_WORK(&hdev->tx_work, hci_tx_work);
 	INIT_WORK(&hdev->power_on, hci_power_on);
+	INIT_WORK(&hdev->error_reset, hci_error_reset);
 
 	INIT_DELAYED_WORK(&hdev->power_off, hci_power_off);
 	INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 4175470ff48e..a72a5f50728d 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -3100,7 +3100,9 @@ static void hci_hardware_error_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_ev_hardware_error *ev = (void *) skb->data;
 
-	BT_ERR("%s hardware error 0x%2.2x", hdev->name, ev->code);
+	hdev->hw_error_code = ev->code;
+
+	queue_work(hdev->req_workqueue, &hdev->error_reset);
 }
 
 static void hci_role_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
-- 
cgit v1.2.3


From 64dae967cac7e5e9a685aa79eee470b77a80acd0 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 28 Jan 2015 14:10:28 -0800
Subject: Bluetooth: Move smp_unregister() into hci_dev_do_close() function

The smp_unregister() function needs to be called every time the
controller is powered down. There are multiple entry points when
this can happen. One is "hciconfig hci0 reset" which will throw
a WARN_ON when LE support has been enabled.

[   78.564620] WARNING: CPU: 0 PID: 148 at net/bluetooth/smp.c:3075 smp_register+0xf1/0x170()
[   78.564622] Modules linked in:
[   78.564628] CPU: 0 PID: 148 Comm: kworker/u3:1 Not tainted 3.19.0-rc4-devel+ #404
[   78.564629] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
[   78.564635] Workqueue: hci0 hci_rx_work
[   78.564638]  ffffffff81b4a7a2 ffff88001cb2fb38 ffffffff8161d881 0000000080000000
[   78.564642]  0000000000000000 ffff88001cb2fb78 ffffffff8103b870 696e55206e6f6f6d
[   78.564645]  ffff88001d965000 0000000000000000 0000000000000000 ffff88001d965000
[   78.564648] Call Trace:
[   78.564655]  [<ffffffff8161d881>] dump_stack+0x4f/0x7b
[   78.564662]  [<ffffffff8103b870>] warn_slowpath_common+0x80/0xc0
[   78.564667]  [<ffffffff81544b00>] ? add_uuid+0x1f0/0x1f0
[   78.564671]  [<ffffffff8103b955>] warn_slowpath_null+0x15/0x20
[   78.564674]  [<ffffffff81562d81>] smp_register+0xf1/0x170
[   78.564680]  [<ffffffff81081236>] ? lock_timer_base.isra.30+0x26/0x50
[   78.564683]  [<ffffffff81544bf0>] powered_complete+0xf0/0x120
[   78.564688]  [<ffffffff8152e622>] hci_req_cmd_complete+0x82/0x260
[   78.564692]  [<ffffffff8153554f>] hci_cmd_complete_evt+0x6cf/0x2e20
[   78.564697]  [<ffffffff81623e43>] ? _raw_spin_unlock_irqrestore+0x13/0x30
[   78.564701]  [<ffffffff8106b0af>] ? __wake_up_sync_key+0x4f/0x60
[   78.564705]  [<ffffffff8153a2ab>] hci_event_packet+0xbcb/0x2e70
[   78.564709]  [<ffffffff814094d3>] ? skb_release_all+0x23/0x30
[   78.564711]  [<ffffffff81409529>] ? kfree_skb+0x29/0x40
[   78.564715]  [<ffffffff815296c8>] hci_rx_work+0x1c8/0x3f0
[   78.564719]  [<ffffffff8105bd91>] ? get_parent_ip+0x11/0x50
[   78.564722]  [<ffffffff8105be25>] ? preempt_count_add+0x55/0xb0
[   78.564727]  [<ffffffff8104f65f>] process_one_work+0x12f/0x360
[   78.564731]  [<ffffffff8104ff9b>] worker_thread+0x6b/0x4b0
[   78.564735]  [<ffffffff8104ff30>] ? cancel_delayed_work_sync+0x10/0x10
[   78.564738]  [<ffffffff810542fa>] kthread+0xea/0x100
[   78.564742]  [<ffffffff81620000>] ? __schedule+0x3e0/0x980
[   78.564745]  [<ffffffff81054210>] ? kthread_create_on_node+0x180/0x180
[   78.564749]  [<ffffffff816246ec>] ret_from_fork+0x7c/0xb0
[   78.564752]  [<ffffffff81054210>] ? kthread_create_on_node+0x180/0x180
[   78.564755] ---[ end trace 8b0d943af76d3736 ]---

This warning is not critical and has only been placed in the code to
actually catch this exact situation. To avoid triggering it move
the smp_unregister() into hci_dev_do_close() which will now also
take care of remove the SMP channel. It is safe to call this function
since it only remove the channel if it has been previously registered.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_core.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 79693a9ef4eb..5d4ac3fbbc08 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1640,6 +1640,8 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 	hci_conn_hash_flush(hdev);
 	hci_dev_unlock(hdev);
 
+	smp_unregister(hdev);
+
 	hci_notify(hdev, HCI_DEV_DOWN);
 
 	if (hdev->flush)
@@ -2147,8 +2149,6 @@ static void hci_power_off(struct work_struct *work)
 	BT_DBG("%s", hdev->name);
 
 	hci_dev_do_close(hdev);
-
-	smp_unregister(hdev);
 }
 
 static void hci_error_reset(struct work_struct *work)
@@ -2166,8 +2166,6 @@ static void hci_error_reset(struct work_struct *work)
 	if (hci_dev_do_close(hdev))
 		return;
 
-	smp_unregister(hdev);
-
 	hci_dev_do_open(hdev);
 }
 
@@ -3137,8 +3135,6 @@ void hci_unregister_dev(struct hci_dev *hdev)
 		rfkill_destroy(hdev->rfkill);
 	}
 
-	smp_unregister(hdev);
-
 	device_del(&hdev->dev);
 
 	debugfs_remove_recursive(hdev->debugfs);
-- 
cgit v1.2.3


From e73ebb0881ea5534ce606c1d71b4ac44db5c6930 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Wed, 28 Jan 2015 20:01:35 -0500
Subject: tcp: stretch ACK fixes prep

LRO, GRO, delayed ACKs, and middleboxes can cause "stretch ACKs" that
cover more than the RFC-specified maximum of 2 packets. These stretch
ACKs can cause serious performance shortfalls in common congestion
control algorithms that were designed and tuned years ago with
receiver hosts that were not using LRO or GRO, and were instead
politely ACKing every other packet.

This patch series fixes Reno and CUBIC to handle stretch ACKs.

This patch prepares for the upcoming stretch ACK bug fix patches. It
adds an "acked" parameter to tcp_cong_avoid_ai() to allow for future
fixes to tcp_cong_avoid_ai() to correctly handle stretch ACKs, and
changes all congestion control algorithms to pass in 1 for the ACKed
count. It also changes tcp_slow_start() to return the number of packet
ACK "credits" that were not processed in slow start mode, and can be
processed by the congestion control module in additive increase mode.

In future patches we will fix tcp_cong_avoid_ai() to handle stretch
ACKs, and fix Reno and CUBIC handling of stretch ACKs in slow start
and additive increase mode.

Reported-by: Eyal Perry <eyalpe@mellanox.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h       |  4 ++--
 net/ipv4/tcp_bic.c      |  2 +-
 net/ipv4/tcp_cong.c     | 11 +++++++----
 net/ipv4/tcp_cubic.c    |  2 +-
 net/ipv4/tcp_scalable.c |  3 ++-
 net/ipv4/tcp_veno.c     |  2 +-
 net/ipv4/tcp_yeah.c     |  2 +-
 7 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index f50f29faf76f..9d9111ef43ae 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -834,8 +834,8 @@ void tcp_get_available_congestion_control(char *buf, size_t len);
 void tcp_get_allowed_congestion_control(char *buf, size_t len);
 int tcp_set_allowed_congestion_control(char *allowed);
 int tcp_set_congestion_control(struct sock *sk, const char *name);
-void tcp_slow_start(struct tcp_sock *tp, u32 acked);
-void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w);
+u32 tcp_slow_start(struct tcp_sock *tp, u32 acked);
+void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked);
 
 u32 tcp_reno_ssthresh(struct sock *sk);
 void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked);
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index bb395d46a389..c037644eafb7 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -150,7 +150,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 		tcp_slow_start(tp, acked);
 	else {
 		bictcp_update(ca, tp->snd_cwnd);
-		tcp_cong_avoid_ai(tp, ca->cnt);
+		tcp_cong_avoid_ai(tp, ca->cnt, 1);
 	}
 }
 
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 27ead0dd16bc..6826017c12d1 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -291,25 +291,28 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
  * ABC caps N to 2. Slow start exits when cwnd grows over ssthresh and
  * returns the leftover acks to adjust cwnd in congestion avoidance mode.
  */
-void tcp_slow_start(struct tcp_sock *tp, u32 acked)
+u32 tcp_slow_start(struct tcp_sock *tp, u32 acked)
 {
 	u32 cwnd = tp->snd_cwnd + acked;
 
 	if (cwnd > tp->snd_ssthresh)
 		cwnd = tp->snd_ssthresh + 1;
+	acked -= cwnd - tp->snd_cwnd;
 	tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);
+
+	return acked;
 }
 EXPORT_SYMBOL_GPL(tcp_slow_start);
 
 /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd (or alternative w) */
-void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w)
+void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked)
 {
 	if (tp->snd_cwnd_cnt >= w) {
 		if (tp->snd_cwnd < tp->snd_cwnd_clamp)
 			tp->snd_cwnd++;
 		tp->snd_cwnd_cnt = 0;
 	} else {
-		tp->snd_cwnd_cnt++;
+		tp->snd_cwnd_cnt += acked;
 	}
 }
 EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai);
@@ -333,7 +336,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 		tcp_slow_start(tp, acked);
 	/* In dangerous area, increase slowly. */
 	else
-		tcp_cong_avoid_ai(tp, tp->snd_cwnd);
+		tcp_cong_avoid_ai(tp, tp->snd_cwnd, 1);
 }
 EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
 
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 6b6002416a73..df4bc4d87e58 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -320,7 +320,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 		tcp_slow_start(tp, acked);
 	} else {
 		bictcp_update(ca, tp->snd_cwnd);
-		tcp_cong_avoid_ai(tp, ca->cnt);
+		tcp_cong_avoid_ai(tp, ca->cnt, 1);
 	}
 }
 
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 6824afb65d93..333bcb2415ff 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -25,7 +25,8 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 	if (tp->snd_cwnd <= tp->snd_ssthresh)
 		tcp_slow_start(tp, acked);
 	else
-		tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT));
+		tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT),
+				  1);
 }
 
 static u32 tcp_scalable_ssthresh(struct sock *sk)
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index a4d2d2d88dca..112151eeee45 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -159,7 +159,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 				/* In the "non-congestive state", increase cwnd
 				 *  every rtt.
 				 */
-				tcp_cong_avoid_ai(tp, tp->snd_cwnd);
+				tcp_cong_avoid_ai(tp, tp->snd_cwnd, 1);
 			} else {
 				/* In the "congestive state", increase cwnd
 				 * every other rtt.
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index cd7273218598..17d35662930d 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -92,7 +92,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 
 	} else {
 		/* Reno */
-		tcp_cong_avoid_ai(tp, tp->snd_cwnd);
+		tcp_cong_avoid_ai(tp, tp->snd_cwnd, 1);
 	}
 
 	/* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt.
-- 
cgit v1.2.3


From 814d488c61260521b1b3cc97063700a5a6667c8f Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Wed, 28 Jan 2015 20:01:36 -0500
Subject: tcp: fix the timid additive increase on stretch ACKs

tcp_cong_avoid_ai() was too timid (snd_cwnd increased too slowly) on
"stretch ACKs" -- cases where the receiver ACKed more than 1 packet in
a single ACK. For example, suppose w is 10 and we get a stretch ACK
for 20 packets, so acked is 20. We ought to increase snd_cwnd by 2
(since acked/w = 20/10 = 2), but instead we were only increasing cwnd
by 1. This patch fixes that behavior.

Reported-by: Eyal Perry <eyalpe@mellanox.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_cong.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 6826017c12d1..faaee5338bea 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -304,16 +304,19 @@ u32 tcp_slow_start(struct tcp_sock *tp, u32 acked)
 }
 EXPORT_SYMBOL_GPL(tcp_slow_start);
 
-/* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd (or alternative w) */
+/* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd (or alternative w),
+ * for every packet that was ACKed.
+ */
 void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked)
 {
+	tp->snd_cwnd_cnt += acked;
 	if (tp->snd_cwnd_cnt >= w) {
-		if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-			tp->snd_cwnd++;
-		tp->snd_cwnd_cnt = 0;
-	} else {
-		tp->snd_cwnd_cnt += acked;
+		u32 delta = tp->snd_cwnd_cnt / w;
+
+		tp->snd_cwnd_cnt -= delta * w;
+		tp->snd_cwnd += delta;
 	}
+	tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp);
 }
 EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai);
 
-- 
cgit v1.2.3


From c22bdca94782f05b9337d8548bde51b2f38ef17f Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Wed, 28 Jan 2015 20:01:37 -0500
Subject: tcp: fix stretch ACK bugs in Reno

Change Reno to properly handle stretch ACKs in additive increase mode
by passing in the count of ACKed packets to tcp_cong_avoid_ai().

In addition, if snd_cwnd crosses snd_ssthresh during slow start
processing, and we then exit slow start mode, we need to carry over
any remaining "credit" for packets ACKed and apply that to additive
increase by passing this remaining "acked" count to
tcp_cong_avoid_ai().

Reported-by: Eyal Perry <eyalpe@mellanox.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_cong.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index faaee5338bea..8670e68e2ce6 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -335,11 +335,13 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 		return;
 
 	/* In "safe" area, increase. */
-	if (tp->snd_cwnd <= tp->snd_ssthresh)
-		tcp_slow_start(tp, acked);
+	if (tp->snd_cwnd <= tp->snd_ssthresh) {
+		acked = tcp_slow_start(tp, acked);
+		if (!acked)
+			return;
+	}
 	/* In dangerous area, increase slowly. */
-	else
-		tcp_cong_avoid_ai(tp, tp->snd_cwnd, 1);
+	tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked);
 }
 EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
 
-- 
cgit v1.2.3


From 9cd981dcf174d26805a032aefa791436da709bee Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Wed, 28 Jan 2015 20:01:38 -0500
Subject: tcp: fix stretch ACK bugs in CUBIC

Change CUBIC to properly handle stretch ACKs in additive increase mode
by passing in the count of ACKed packets to tcp_cong_avoid_ai().

In addition, because we are now precisely accounting for stretch ACKs,
including delayed ACKs, we can now remove the delayed ACK tracking and
estimation code that tracked recent delayed ACK behavior in
ca->delayed_ack.

Reported-by: Eyal Perry <eyalpe@mellanox.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_cubic.c | 31 +++++++++----------------------
 1 file changed, 9 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index df4bc4d87e58..ffc045da2fd5 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -93,9 +93,7 @@ struct bictcp {
 	u32	epoch_start;	/* beginning of an epoch */
 	u32	ack_cnt;	/* number of acks */
 	u32	tcp_cwnd;	/* estimated tcp cwnd */
-#define ACK_RATIO_SHIFT	4
-#define ACK_RATIO_LIMIT (32u << ACK_RATIO_SHIFT)
-	u16	delayed_ack;	/* estimate the ratio of Packets/ACKs << 4 */
+	u16	unused;
 	u8	sample_cnt;	/* number of samples to decide curr_rtt */
 	u8	found;		/* the exit point is found? */
 	u32	round_start;	/* beginning of each round */
@@ -114,7 +112,6 @@ static inline void bictcp_reset(struct bictcp *ca)
 	ca->bic_K = 0;
 	ca->delay_min = 0;
 	ca->epoch_start = 0;
-	ca->delayed_ack = 2 << ACK_RATIO_SHIFT;
 	ca->ack_cnt = 0;
 	ca->tcp_cwnd = 0;
 	ca->found = 0;
@@ -205,12 +202,12 @@ static u32 cubic_root(u64 a)
 /*
  * Compute congestion window to use.
  */
-static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
+static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked)
 {
 	u32 delta, bic_target, max_cnt;
 	u64 offs, t;
 
-	ca->ack_cnt++;	/* count the number of ACKs */
+	ca->ack_cnt += acked;	/* count the number of ACKed packets */
 
 	if (ca->last_cwnd == cwnd &&
 	    (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32)
@@ -221,7 +218,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
 
 	if (ca->epoch_start == 0) {
 		ca->epoch_start = tcp_time_stamp;	/* record beginning */
-		ca->ack_cnt = 1;			/* start counting */
+		ca->ack_cnt = acked;			/* start counting */
 		ca->tcp_cwnd = cwnd;			/* syn with cubic */
 
 		if (ca->last_max_cwnd <= cwnd) {
@@ -301,7 +298,6 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
 		}
 	}
 
-	ca->cnt = (ca->cnt << ACK_RATIO_SHIFT) / ca->delayed_ack;
 	if (ca->cnt == 0)			/* cannot be zero */
 		ca->cnt = 1;
 }
@@ -317,11 +313,12 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 	if (tp->snd_cwnd <= tp->snd_ssthresh) {
 		if (hystart && after(ack, ca->end_seq))
 			bictcp_hystart_reset(sk);
-		tcp_slow_start(tp, acked);
-	} else {
-		bictcp_update(ca, tp->snd_cwnd);
-		tcp_cong_avoid_ai(tp, ca->cnt, 1);
+		acked = tcp_slow_start(tp, acked);
+		if (!acked)
+			return;
 	}
+	bictcp_update(ca, tp->snd_cwnd, acked);
+	tcp_cong_avoid_ai(tp, ca->cnt, acked);
 }
 
 static u32 bictcp_recalc_ssthresh(struct sock *sk)
@@ -411,20 +408,10 @@ static void hystart_update(struct sock *sk, u32 delay)
  */
 static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
 {
-	const struct inet_connection_sock *icsk = inet_csk(sk);
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct bictcp *ca = inet_csk_ca(sk);
 	u32 delay;
 
-	if (icsk->icsk_ca_state == TCP_CA_Open) {
-		u32 ratio = ca->delayed_ack;
-
-		ratio -= ca->delayed_ack >> ACK_RATIO_SHIFT;
-		ratio += cnt;
-
-		ca->delayed_ack = clamp(ratio, 1U, ACK_RATIO_LIMIT);
-	}
-
 	/* Some calls are for duplicates without timetamps */
 	if (rtt_us < 0)
 		return;
-- 
cgit v1.2.3


From d6b1a8a92a1417f8859a6937d2e6ffe2dfab4e6d Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Wed, 28 Jan 2015 20:01:39 -0500
Subject: tcp: fix timing issue in CUBIC slope calculation

This patch fixes a bug in CUBIC that causes cwnd to increase slightly
too slowly when multiple ACKs arrive in the same jiffy.

If cwnd is supposed to increase at a rate of more than once per jiffy,
then CUBIC was sometimes too slow. Because the bic_target is
calculated for a future point in time, calculated with time in
jiffies, the cwnd can increase over the course of the jiffy while the
bic_target calculated as the proper CUBIC cwnd at time
t=tcp_time_stamp+rtt does not increase, because tcp_time_stamp only
increases on jiffy tick boundaries.

So since the cnt is set to:
	ca->cnt = cwnd / (bic_target - cwnd);
as cwnd increases but bic_target does not increase due to jiffy
granularity, the cnt becomes too large, causing cwnd to increase
too slowly.

For example:
- suppose at the beginning of a jiffy, cwnd=40, bic_target=44
- so CUBIC sets:
   ca->cnt =  cwnd / (bic_target - cwnd) = 40 / (44 - 40) = 40/4 = 10
- suppose we get 10 acks, each for 1 segment, so tcp_cong_avoid_ai()
   increases cwnd to 41
- so CUBIC sets:
   ca->cnt =  cwnd / (bic_target - cwnd) = 41 / (44 - 41) = 41 / 3 = 13

So now CUBIC will wait for 13 packets to be ACKed before increasing
cwnd to 42, insted of 10 as it should.

The fix is to avoid adjusting the slope (determined by ca->cnt)
multiple times within a jiffy, and instead skip to compute the Reno
cwnd, the "TCP friendliness" code path.

Reported-by: Eyal Perry <eyalpe@mellanox.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_cubic.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index ffc045da2fd5..4b276d1ed980 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -213,6 +213,13 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked)
 	    (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32)
 		return;
 
+	/* The CUBIC function can update ca->cnt at most once per jiffy.
+	 * On all cwnd reduction events, ca->epoch_start is set to 0,
+	 * which will force a recalculation of ca->cnt.
+	 */
+	if (ca->epoch_start && tcp_time_stamp == ca->last_time)
+		goto tcp_friendliness;
+
 	ca->last_cwnd = cwnd;
 	ca->last_time = tcp_time_stamp;
 
@@ -280,6 +287,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked)
 	if (ca->last_max_cwnd == 0 && ca->cnt > 20)
 		ca->cnt = 20;	/* increase cwnd 5% per RTT */
 
+tcp_friendliness:
 	/* TCP Friendly */
 	if (tcp_friendliness) {
 		u32 scale = beta_scale;
-- 
cgit v1.2.3


From 59ccaaaa49b5b096cdc1f16706a9f931416b2332 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Wed, 28 Jan 2015 16:23:11 -0800
Subject: bridge: dont send notification when skb->len == 0 in
 rtnl_bridge_notify

Reported in: https://bugzilla.kernel.org/show_bug.cgi?id=92081

This patch avoids calling rtnl_notify if the device ndo_bridge_getlink
handler does not return any bytes in the skb.

Alternately, the skb->len check can be moved inside rtnl_notify.

For the bridge vlan case described in 92081, there is also a fix needed
in bridge driver to generate a proper notification. Will fix that in
subsequent patch.

v2: rebase patch on net tree

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 9cf6fe9ddc0c..446cbaf81185 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2895,12 +2895,16 @@ static int rtnl_bridge_notify(struct net_device *dev, u16 flags)
 			goto errout;
 	}
 
+	if (!skb->len)
+		goto errout;
+
 	rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
 	return 0;
 errout:
 	WARN_ON(err == -EMSGSIZE);
 	kfree_skb(skb);
-	rtnl_set_sk_err(net, RTNLGRP_LINK, err);
+	if (err)
+		rtnl_set_sk_err(net, RTNLGRP_LINK, err);
 	return err;
 }
 
-- 
cgit v1.2.3


From b8693877ae016ac525d674d5d7a84ea0ea68ba60 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Wed, 28 Jan 2015 16:32:46 -0800
Subject: openvswitch: Add support for checksums on UDP tunnels.

Currently, it isn't possible to request checksums on the outer UDP
header of tunnels - the TUNNEL_CSUM flag is ignored. This adds
support for requesting that UDP checksums be computed on transmit
and properly reported if they are present on receive.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/geneve.h           | 2 +-
 net/ipv4/geneve.c              | 6 +++---
 net/openvswitch/vport-geneve.c | 2 +-
 net/openvswitch/vport-vxlan.c  | 7 +++++--
 4 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/include/net/geneve.h b/include/net/geneve.h
index 03aa2adb5bab..14fb8d3390b4 100644
--- a/include/net/geneve.h
+++ b/include/net/geneve.h
@@ -90,7 +90,7 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
 		    struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos,
 		    __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port,
 		    __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt,
-		    bool xnet);
+		    bool csum, bool xnet);
 #endif /*ifdef CONFIG_INET */
 
 #endif /*ifdef__NET_GENEVE_H */
diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index 93e51199e44b..5a4828ba05ad 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -107,13 +107,13 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
 		    struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos,
 		    __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port,
 		    __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt,
-		    bool xnet)
+		    bool csum, bool xnet)
 {
 	struct genevehdr *gnvh;
 	int min_headroom;
 	int err;
 
-	skb = udp_tunnel_handle_offloads(skb, !gs->sock->sk->sk_no_check_tx);
+	skb = udp_tunnel_handle_offloads(skb, csum);
 	if (IS_ERR(skb))
 		return PTR_ERR(skb);
 
@@ -138,7 +138,7 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
 
 	return udp_tunnel_xmit_skb(rt, skb, src, dst,
 				   tos, ttl, df, src_port, dst_port, xnet,
-				   gs->sock->sk->sk_no_check_tx);
+				   !csum);
 }
 EXPORT_SYMBOL_GPL(geneve_xmit_skb);
 
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 7ca3d454ff3b..bf02fd5808c9 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -212,7 +212,7 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
 			      tun_key->ipv4_dst, tun_key->ipv4_tos,
 			      tun_key->ipv4_ttl, df, sport, dport,
 			      tun_key->tun_flags, vni, opts_len, opts,
-			      false);
+			      !!(tun_key->tun_flags & TUNNEL_CSUM), false);
 	if (err < 0)
 		ip_rt_put(rt);
 	return err;
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 3cc983bf444a..ff07d4062d60 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -74,7 +74,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
 	__be64 key;
 	__be16 flags;
 
-	flags = TUNNEL_KEY;
+	flags = TUNNEL_KEY | (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0);
 	vxlan_port = vxlan_vport(vport);
 	if (vxlan_port->exts & VXLAN_F_GBP)
 		flags |= TUNNEL_VXLAN_OPT;
@@ -230,6 +230,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 	__be16 src_port;
 	__be16 df;
 	int err;
+	u32 vxflags;
 
 	if (unlikely(!OVS_CB(skb)->egress_tun_info)) {
 		err = -EINVAL;
@@ -251,11 +252,13 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 	src_port = udp_flow_src_port(net, skb, 0, 0, true);
 	md.vni = htonl(be64_to_cpu(tun_key->tun_id) << 8);
 	md.gbp = vxlan_ext_gbp(skb);
+	vxflags = vxlan_port->exts |
+		      (tun_key->tun_flags & TUNNEL_CSUM ? VXLAN_F_UDP_CSUM : 0);
 
 	err = vxlan_xmit_skb(rt, skb, fl.saddr, tun_key->ipv4_dst,
 			     tun_key->ipv4_tos, tun_key->ipv4_ttl, df,
 			     src_port, dst_port,
-			     &md, false, vxlan_port->exts);
+			     &md, false, vxflags);
 	if (err < 0)
 		ip_rt_put(rt);
 	return err;
-- 
cgit v1.2.3


From 7cc05662682da4b0e0a4fdf3c3f190577803ae81 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 28 Jan 2015 18:04:53 +0100
Subject: net: remove sock_iocb

The sock_iocb structure is allocate on stack for each read/write-like
operation on sockets, and contains various fields of which only the
embedded msghdr and sometimes a pointer to the scm_cookie is ever used.
Get rid of the sock_iocb and put a msghdr directly on the stack and pass
the scm_cookie explicitly to netlink_mmap_sendmsg.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h       | 23 ---------------
 net/netlink/af_netlink.c | 28 +++++++------------
 net/socket.c             | 45 +++--------------------------
 net/unix/af_unix.c       | 73 +++++++++++++++++++-----------------------------
 4 files changed, 43 insertions(+), 126 deletions(-)

(limited to 'net')

diff --git a/include/net/sock.h b/include/net/sock.h
index 2210fec65669..15341499786c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1374,29 +1374,6 @@ void sk_prot_clear_portaddr_nulls(struct sock *sk, int size);
 #define SOCK_BINDADDR_LOCK	4
 #define SOCK_BINDPORT_LOCK	8
 
-/* sock_iocb: used to kick off async processing of socket ios */
-struct sock_iocb {
-	struct list_head	list;
-
-	int			flags;
-	int			size;
-	struct socket		*sock;
-	struct sock		*sk;
-	struct scm_cookie	*scm;
-	struct msghdr		*msg, async_msg;
-	struct kiocb		*kiocb;
-};
-
-static inline struct sock_iocb *kiocb_to_siocb(struct kiocb *iocb)
-{
-	return (struct sock_iocb *)iocb->private;
-}
-
-static inline struct kiocb *siocb_to_kiocb(struct sock_iocb *si)
-{
-	return si->kiocb;
-}
-
 struct socket_alloc {
 	struct socket socket;
 	struct inode vfs_inode;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 2197af00673a..a36777b7cfb6 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -695,7 +695,7 @@ static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk,
 
 static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
 				u32 dst_portid, u32 dst_group,
-				struct sock_iocb *siocb)
+				struct scm_cookie *scm)
 {
 	struct netlink_sock *nlk = nlk_sk(sk);
 	struct netlink_ring *ring;
@@ -741,7 +741,7 @@ static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
 
 		NETLINK_CB(skb).portid	  = nlk->portid;
 		NETLINK_CB(skb).dst_group = dst_group;
-		NETLINK_CB(skb).creds	  = siocb->scm->creds;
+		NETLINK_CB(skb).creds	  = scm->creds;
 
 		err = security_netlink_send(sk, skb);
 		if (err) {
@@ -820,7 +820,7 @@ static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb)
 #define netlink_tx_is_mmaped(sk)	false
 #define netlink_mmap			sock_no_mmap
 #define netlink_poll			datagram_poll
-#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, siocb)	0
+#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, scm)	0
 #endif /* CONFIG_NETLINK_MMAP */
 
 static void netlink_skb_destructor(struct sk_buff *skb)
@@ -2259,7 +2259,6 @@ static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
 static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 			   struct msghdr *msg, size_t len)
 {
-	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
 	struct sock *sk = sock->sk;
 	struct netlink_sock *nlk = nlk_sk(sk);
 	DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name);
@@ -2273,10 +2272,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	if (msg->msg_flags&MSG_OOB)
 		return -EOPNOTSUPP;
 
-	if (NULL == siocb->scm)
-		siocb->scm = &scm;
-
-	err = scm_send(sock, msg, siocb->scm, true);
+	err = scm_send(sock, msg, &scm, true);
 	if (err < 0)
 		return err;
 
@@ -2305,7 +2301,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	if (netlink_tx_is_mmaped(sk) &&
 	    msg->msg_iter.iov->iov_base == NULL) {
 		err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group,
-					   siocb);
+					   &scm);
 		goto out;
 	}
 
@@ -2319,7 +2315,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 
 	NETLINK_CB(skb).portid	= nlk->portid;
 	NETLINK_CB(skb).dst_group = dst_group;
-	NETLINK_CB(skb).creds	= siocb->scm->creds;
+	NETLINK_CB(skb).creds	= scm.creds;
 	NETLINK_CB(skb).flags	= netlink_skb_flags;
 
 	err = -EFAULT;
@@ -2341,7 +2337,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT);
 
 out:
-	scm_destroy(siocb->scm);
+	scm_destroy(&scm);
 	return err;
 }
 
@@ -2349,7 +2345,6 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 			   struct msghdr *msg, size_t len,
 			   int flags)
 {
-	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
 	struct scm_cookie scm;
 	struct sock *sk = sock->sk;
 	struct netlink_sock *nlk = nlk_sk(sk);
@@ -2412,11 +2407,8 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 	if (nlk->flags & NETLINK_RECV_PKTINFO)
 		netlink_cmsg_recv_pktinfo(msg, skb);
 
-	if (NULL == siocb->scm) {
-		memset(&scm, 0, sizeof(scm));
-		siocb->scm = &scm;
-	}
-	siocb->scm->creds = *NETLINK_CREDS(skb);
+	memset(&scm, 0, sizeof(scm));
+	scm.creds = *NETLINK_CREDS(skb);
 	if (flags & MSG_TRUNC)
 		copied = data_skb->len;
 
@@ -2431,7 +2423,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 		}
 	}
 
-	scm_recv(sock, msg, siocb->scm, flags);
+	scm_recv(sock, msg, &scm, flags);
 out:
 	netlink_rcv_wake(sk);
 	return err ? : copied;
diff --git a/net/socket.c b/net/socket.c
index 3acd35f144d6..3326d67482ac 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -613,13 +613,6 @@ EXPORT_SYMBOL(__sock_tx_timestamp);
 static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
 				       struct msghdr *msg, size_t size)
 {
-	struct sock_iocb *si = kiocb_to_siocb(iocb);
-
-	si->sock = sock;
-	si->scm = NULL;
-	si->msg = msg;
-	si->size = size;
-
 	return sock->ops->sendmsg(iocb, sock, msg, size);
 }
 
@@ -635,11 +628,9 @@ static int do_sock_sendmsg(struct socket *sock, struct msghdr *msg,
 			   size_t size, bool nosec)
 {
 	struct kiocb iocb;
-	struct sock_iocb siocb;
 	int ret;
 
 	init_sync_kiocb(&iocb, NULL);
-	iocb.private = &siocb;
 	ret = nosec ? __sock_sendmsg_nosec(&iocb, sock, msg, size) :
 		      __sock_sendmsg(&iocb, sock, msg, size);
 	if (-EIOCBQUEUED == ret)
@@ -756,14 +747,6 @@ EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
 static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
 				       struct msghdr *msg, size_t size, int flags)
 {
-	struct sock_iocb *si = kiocb_to_siocb(iocb);
-
-	si->sock = sock;
-	si->scm = NULL;
-	si->msg = msg;
-	si->size = size;
-	si->flags = flags;
-
 	return sock->ops->recvmsg(iocb, sock, msg, size, flags);
 }
 
@@ -779,11 +762,9 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg,
 		 size_t size, int flags)
 {
 	struct kiocb iocb;
-	struct sock_iocb siocb;
 	int ret;
 
 	init_sync_kiocb(&iocb, NULL);
-	iocb.private = &siocb;
 	ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
 	if (-EIOCBQUEUED == ret)
 		ret = wait_on_sync_kiocb(&iocb);
@@ -795,11 +776,9 @@ static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
 			      size_t size, int flags)
 {
 	struct kiocb iocb;
-	struct sock_iocb siocb;
 	int ret;
 
 	init_sync_kiocb(&iocb, NULL);
-	iocb.private = &siocb;
 	ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
 	if (-EIOCBQUEUED == ret)
 		ret = wait_on_sync_kiocb(&iocb);
@@ -866,14 +845,6 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
 	return sock->ops->splice_read(sock, ppos, pipe, len, flags);
 }
 
-static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
-					 struct sock_iocb *siocb)
-{
-	siocb->kiocb = iocb;
-	iocb->private = siocb;
-	return siocb;
-}
-
 static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
 		struct file *file, const struct iovec *iov,
 		unsigned long nr_segs)
@@ -893,7 +864,7 @@ static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
 static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
 				unsigned long nr_segs, loff_t pos)
 {
-	struct sock_iocb siocb, *x;
+	struct msghdr msg;
 
 	if (pos != 0)
 		return -ESPIPE;
@@ -901,11 +872,7 @@ static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
 	if (iocb->ki_nbytes == 0)	/* Match SYS5 behaviour */
 		return 0;
 
-
-	x = alloc_sock_iocb(iocb, &siocb);
-	if (!x)
-		return -ENOMEM;
-	return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
+	return do_sock_read(&msg, iocb, iocb->ki_filp, iov, nr_segs);
 }
 
 static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
@@ -929,16 +896,12 @@ static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
 static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
 			  unsigned long nr_segs, loff_t pos)
 {
-	struct sock_iocb siocb, *x;
+	struct msghdr msg;
 
 	if (pos != 0)
 		return -ESPIPE;
 
-	x = alloc_sock_iocb(iocb, &siocb);
-	if (!x)
-		return -ENOMEM;
-
-	return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
+	return do_sock_write(&msg, iocb, iocb->ki_filp, iov, nr_segs);
 }
 
 /*
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 8e1b10274b02..526b6edab018 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1445,7 +1445,6 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 			      struct msghdr *msg, size_t len)
 {
-	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
 	struct sock *sk = sock->sk;
 	struct net *net = sock_net(sk);
 	struct unix_sock *u = unix_sk(sk);
@@ -1456,14 +1455,12 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	unsigned int hash;
 	struct sk_buff *skb;
 	long timeo;
-	struct scm_cookie tmp_scm;
+	struct scm_cookie scm;
 	int max_level;
 	int data_len = 0;
 
-	if (NULL == siocb->scm)
-		siocb->scm = &tmp_scm;
 	wait_for_unix_gc();
-	err = scm_send(sock, msg, siocb->scm, false);
+	err = scm_send(sock, msg, &scm, false);
 	if (err < 0)
 		return err;
 
@@ -1507,11 +1504,11 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	if (skb == NULL)
 		goto out;
 
-	err = unix_scm_to_skb(siocb->scm, skb, true);
+	err = unix_scm_to_skb(&scm, skb, true);
 	if (err < 0)
 		goto out_free;
 	max_level = err + 1;
-	unix_get_secdata(siocb->scm, skb);
+	unix_get_secdata(&scm, skb);
 
 	skb_put(skb, len - data_len);
 	skb->data_len = data_len;
@@ -1606,7 +1603,7 @@ restart:
 	unix_state_unlock(other);
 	other->sk_data_ready(other);
 	sock_put(other);
-	scm_destroy(siocb->scm);
+	scm_destroy(&scm);
 	return len;
 
 out_unlock:
@@ -1616,7 +1613,7 @@ out_free:
 out:
 	if (other)
 		sock_put(other);
-	scm_destroy(siocb->scm);
+	scm_destroy(&scm);
 	return err;
 }
 
@@ -1628,21 +1625,18 @@ out:
 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 			       struct msghdr *msg, size_t len)
 {
-	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
 	struct sock *sk = sock->sk;
 	struct sock *other = NULL;
 	int err, size;
 	struct sk_buff *skb;
 	int sent = 0;
-	struct scm_cookie tmp_scm;
+	struct scm_cookie scm;
 	bool fds_sent = false;
 	int max_level;
 	int data_len;
 
-	if (NULL == siocb->scm)
-		siocb->scm = &tmp_scm;
 	wait_for_unix_gc();
-	err = scm_send(sock, msg, siocb->scm, false);
+	err = scm_send(sock, msg, &scm, false);
 	if (err < 0)
 		return err;
 
@@ -1683,7 +1677,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 			goto out_err;
 
 		/* Only send the fds in the first buffer */
-		err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
+		err = unix_scm_to_skb(&scm, skb, !fds_sent);
 		if (err < 0) {
 			kfree_skb(skb);
 			goto out_err;
@@ -1715,8 +1709,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		sent += size;
 	}
 
-	scm_destroy(siocb->scm);
-	siocb->scm = NULL;
+	scm_destroy(&scm);
 
 	return sent;
 
@@ -1728,8 +1721,7 @@ pipe_err:
 		send_sig(SIGPIPE, current, 0);
 	err = -EPIPE;
 out_err:
-	scm_destroy(siocb->scm);
-	siocb->scm = NULL;
+	scm_destroy(&scm);
 	return sent ? : err;
 }
 
@@ -1778,8 +1770,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
 			      struct msghdr *msg, size_t size,
 			      int flags)
 {
-	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
-	struct scm_cookie tmp_scm;
+	struct scm_cookie scm;
 	struct sock *sk = sock->sk;
 	struct unix_sock *u = unix_sk(sk);
 	int noblock = flags & MSG_DONTWAIT;
@@ -1831,16 +1822,14 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (sock_flag(sk, SOCK_RCVTSTAMP))
 		__sock_recv_timestamp(msg, sk, skb);
 
-	if (!siocb->scm) {
-		siocb->scm = &tmp_scm;
-		memset(&tmp_scm, 0, sizeof(tmp_scm));
-	}
-	scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
-	unix_set_secdata(siocb->scm, skb);
+	memset(&scm, 0, sizeof(scm));
+
+	scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
+	unix_set_secdata(&scm, skb);
 
 	if (!(flags & MSG_PEEK)) {
 		if (UNIXCB(skb).fp)
-			unix_detach_fds(siocb->scm, skb);
+			unix_detach_fds(&scm, skb);
 
 		sk_peek_offset_bwd(sk, skb->len);
 	} else {
@@ -1860,11 +1849,11 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
 		sk_peek_offset_fwd(sk, size);
 
 		if (UNIXCB(skb).fp)
-			siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
+			scm.fp = scm_fp_dup(UNIXCB(skb).fp);
 	}
 	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
 
-	scm_recv(sock, msg, siocb->scm, flags);
+	scm_recv(sock, msg, &scm, flags);
 
 out_free:
 	skb_free_datagram(sk, skb);
@@ -1915,8 +1904,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 			       struct msghdr *msg, size_t size,
 			       int flags)
 {
-	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
-	struct scm_cookie tmp_scm;
+	struct scm_cookie scm;
 	struct sock *sk = sock->sk;
 	struct unix_sock *u = unix_sk(sk);
 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
@@ -1943,10 +1931,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 	 * while sleeps in memcpy_tomsg
 	 */
 
-	if (!siocb->scm) {
-		siocb->scm = &tmp_scm;
-		memset(&tmp_scm, 0, sizeof(tmp_scm));
-	}
+	memset(&scm, 0, sizeof(scm));
 
 	err = mutex_lock_interruptible(&u->readlock);
 	if (unlikely(err)) {
@@ -2012,13 +1997,13 @@ again:
 
 		if (check_creds) {
 			/* Never glue messages from different writers */
-			if ((UNIXCB(skb).pid  != siocb->scm->pid) ||
-			    !uid_eq(UNIXCB(skb).uid, siocb->scm->creds.uid) ||
-			    !gid_eq(UNIXCB(skb).gid, siocb->scm->creds.gid))
+			if ((UNIXCB(skb).pid  != scm.pid) ||
+			    !uid_eq(UNIXCB(skb).uid, scm.creds.uid) ||
+			    !gid_eq(UNIXCB(skb).gid, scm.creds.gid))
 				break;
 		} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
 			/* Copy credentials */
-			scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
+			scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
 			check_creds = 1;
 		}
 
@@ -2045,7 +2030,7 @@ again:
 			sk_peek_offset_bwd(sk, chunk);
 
 			if (UNIXCB(skb).fp)
-				unix_detach_fds(siocb->scm, skb);
+				unix_detach_fds(&scm, skb);
 
 			if (unix_skb_len(skb))
 				break;
@@ -2053,13 +2038,13 @@ again:
 			skb_unlink(skb, &sk->sk_receive_queue);
 			consume_skb(skb);
 
-			if (siocb->scm->fp)
+			if (scm.fp)
 				break;
 		} else {
 			/* It is questionable, see note in unix_dgram_recvmsg.
 			 */
 			if (UNIXCB(skb).fp)
-				siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
+				scm.fp = scm_fp_dup(UNIXCB(skb).fp);
 
 			sk_peek_offset_fwd(sk, chunk);
 
@@ -2068,7 +2053,7 @@ again:
 	} while (size);
 
 	mutex_unlock(&u->readlock);
-	scm_recv(sock, msg, siocb->scm, flags);
+	scm_recv(sock, msg, &scm, flags);
 out:
 	return copied ? : err;
 }
-- 
cgit v1.2.3


From 2dbce096ca3ac18711f72e517ed9674d53310b67 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 28 Jan 2015 17:30:09 +0100
Subject: act_connmark: fix dependencies better

NET_ACT_CONNMARK fails to build if NF_CONNTRACK_MARK is disabled,
and d7924450e14ea4 ("act_connmark: Add missing dependency on
NF_CONNTRACK_MARK") fixed that case, but missed the cased where
NF_CONNTRACK is a loadable module.

This adds the second dependency to ensure that NET_ACT_CONNMARK
can only be built-in if NF_CONNTRACK is also part of the kernel
rather than a loadable module.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 7a57f66e654a..899d0319f2b2 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -713,7 +713,7 @@ config NET_ACT_BPF
 config NET_ACT_CONNMARK
         tristate "Netfilter Connection Mark Retriever"
         depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
-        depends on NF_CONNTRACK_MARK
+        depends on NF_CONNTRACK && NF_CONNTRACK_MARK
         ---help---
 	  Say Y here to allow retrieving of conn mark
 
-- 
cgit v1.2.3


From 86b3bfe914f41c2d47d5882d06e1261cc58fb5e9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 28 Jan 2015 06:06:36 -0800
Subject: pkt_sched: fq: remove useless TIME_WAIT check

TIME_WAIT sockets are not owning any skb.

ip_send_unicast_reply() and tcp_v6_send_response() both use
regular sockets.

We can safely remove a test in sch_fq and save one cache line miss,
as sk_state is far away from sk_pacing_rate.

Tested at Google for about one year.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_fq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 9b05924cc386..2a50f5c62070 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -1,7 +1,7 @@
 /*
  * net/sched/sch_fq.c Fair Queue Packet Scheduler (per flow pacing)
  *
- *  Copyright (C) 2013 Eric Dumazet <edumazet@google.com>
+ *  Copyright (C) 2013-2015 Eric Dumazet <edumazet@google.com>
  *
  *	This program is free software; you can redistribute it and/or
  *	modify it under the terms of the GNU General Public License
@@ -471,7 +471,7 @@ begin:
 		goto out;
 
 	rate = q->flow_max_rate;
-	if (skb->sk && skb->sk->sk_state != TCP_TIME_WAIT)
+	if (skb->sk)
 		rate = min(skb->sk->sk_pacing_rate, rate);
 
 	if (rate != ~0U) {
-- 
cgit v1.2.3


From 811230cd853d62f09ed0addd0ce9a1b9b0e13fb5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 28 Jan 2015 05:47:11 -0800
Subject: tcp: ipv4: initialize unicast_sock sk_pacing_rate

When I added sk_pacing_rate field, I forgot to initialize its value
in the per cpu unicast_sock used in ip_send_unicast_reply()

This means that for sch_fq users, RST packets, or ACK packets sent
on behalf of TIME_WAIT sockets might be sent to slowly or even dropped
once we reach the per flow limit.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Fixes: 95bd09eb2750 ("tcp: TSO packets automatic sizing")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index b50861b22b6b..38a20a9cca1a 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1517,6 +1517,7 @@ static DEFINE_PER_CPU(struct inet_sock, unicast_sock) = {
 		.sk_wmem_alloc	= ATOMIC_INIT(1),
 		.sk_allocation	= GFP_ATOMIC,
 		.sk_flags	= (1UL << SOCK_USE_WRITE_QUEUE),
+		.sk_pacing_rate = ~0U,
 	},
 	.pmtudisc	= IP_PMTUDISC_WANT,
 	.uc_ttl		= -1,
-- 
cgit v1.2.3


From ac363cf9eb0b0378165ebfcf41ffbf78295a817e Mon Sep 17 00:00:00 2001
From: Szymon Janc <szymon.janc@tieto.com>
Date: Thu, 29 Jan 2015 16:36:59 +0100
Subject: Bluetooth: Fix sending Read Remote Extended Features command

This command should only be used if remote device reports that it
supports extended features. Otherwise command will fail and connection
will be dropped.

Some devices support SSP but don't support extended features so
current check for SSP support is not enought.

Instead of checking for SSP support just check if both ends support
Extended Feature.

< HCI Command: Create Connection (0x01|0x0005) plen 13
        Address: D0:9C:30:00:19:6F (Foster Electric Company, Limited)
        Packet type: 0xcc18
          DM1 may be used
          DH1 may be used
          DM3 may be used
          DH3 may be used
          DM5 may be used
          DH5 may be used
        Page scan repetition mode: R1 (0x01)
        Page scan mode: Mandatory (0x00)
        Clock offset: 0x94c8
        Role switch: Allow slave (0x01)
> HCI Event: Command Status (0x0f) plen 4
      Create Connection (0x01|0x0005) ncmd 1
        Status: Success (0x00)
> HCI Event: Connect Complete (0x03) plen 11
        Status: Success (0x00)
        Handle: 5
        Address: D0:9C:30:00:19:6F (Foster Electric Company, Limited)
        Link type: ACL (0x01)
        Encryption: Disabled (0x00)
< HCI Command: Read Remote Supported Features (0x01|0x001b) plen 2
        Handle: 5
> HCI Event: Command Status (0x0f) plen 4
      Read Remote Supported Features (0x01|0x001b) ncmd 1
        Status: Success (0x00)
> HCI Event: Page Scan Repetition Mode Change (0x20) plen 7
        Address: D0:9C:30:00:19:6F (Foster Electric Company, Limited)
        Page scan repetition mode: R1 (0x01)
> HCI Event: Read Remote Supported Features (0x0b) plen 11
        Status: Success (0x00)
        Handle: 5
        Features: 0xff 0xff 0x8f 0xfe 0xdb 0xff 0x5b 0x07
          3 slot packets
          5 slot packets
          Encryption
          Slot offset
          Timing accuracy
          Role switch
          Hold mode
          Sniff mode
          Park state
          Power control requests
          Channel quality driven data rate (CQDDR)
          SCO link
          HV2 packets
          HV3 packets
          u-law log synchronous data
          A-law log synchronous data
          CVSD synchronous data
          Paging parameter negotiation
          Power control
          Transparent synchronous data
          Broadcast Encryption
          Enhanced Data Rate ACL 2 Mbps mode
          Enhanced Data Rate ACL 3 Mbps mode
          Enhanced inquiry scan
          Interlaced inquiry scan
          Interlaced page scan
          RSSI with inquiry results
          Extended SCO link (EV3 packets)
          EV4 packets
          EV5 packets
          AFH capable slave
          AFH classification slave
          LE Supported (Controller)
          3-slot Enhanced Data Rate ACL packets
          5-slot Enhanced Data Rate ACL packets
          Sniff subrating
          Pause encryption
          AFH capable master
          AFH classification master
          Enhanced Data Rate eSCO 2 Mbps mode
          Enhanced Data Rate eSCO 3 Mbps mode
          3-slot Enhanced Data Rate eSCO packets
          Extended Inquiry Response
          Simultaneous LE and BR/EDR (Controller)
          Secure Simple Pairing
          Encapsulated PDU
          Non-flushable Packet Boundary Flag
          Link Supervision Timeout Changed Event
          Inquiry TX Power Level
          Enhanced Power Control
< HCI Command: Read Remote Extended Features (0x01|0x001c) plen 3
        Handle: 5
        Page: 1
> HCI Event: Command Status (0x0f) plen 4
      Read Remote Extended Features (0x01|0x001c) ncmd 1
        Status: Command Disallowed (0x0c)
< HCI Command: Read Clock Offset (0x01|0x001f) plen 2
        Handle: 5
> HCI Event: Command Status (0x0f) plen 4
      Read Clock Offset (0x01|0x001f) ncmd 1
        Status: Success (0x00)
< HCI Command: Disconnect (0x01|0x0006) plen 3
        Handle: 5
        Reason: Remote User Terminated Connection (0x13)

Signed-off-by: Szymon Janc <szymon.janc@tieto.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_event.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index a72a5f50728d..055625b7368f 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -2671,7 +2671,8 @@ static void hci_remote_features_evt(struct hci_dev *hdev,
 	if (conn->state != BT_CONFIG)
 		goto unlock;
 
-	if (!ev->status && lmp_ssp_capable(hdev) && lmp_ssp_capable(conn)) {
+	if (!ev->status && lmp_ext_feat_capable(hdev) &&
+	    lmp_ext_feat_capable(conn)) {
 		struct hci_cp_read_remote_ext_features cp;
 		cp.handle = ev->handle;
 		cp.page = 0x01;
-- 
cgit v1.2.3


From 8997c27ec41127bf57421cc0205413d525421ddc Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 26 Jan 2015 22:28:13 +0100
Subject: caif: remove wrong dev_net_set() call
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

src_net points to the netns where the netlink message has been received. This
netns may be different from the netns where the interface is created (because
the user may add IFLA_NET_NS_[PID|FD]). In this case, src_net is the link netns.

It seems wrong to override the netns in the newlink() handler because if it
was not already src_net, it means that the user explicitly asks to create the
netdevice in another netns.

CC: Sjur Brændeland <sjur.brandeland@stericsson.com>
CC: Dmitry Tarnyagin <dmitry.tarnyagin@lockless.no>
Fixes: 8391c4aab1aa ("caif: Bugfixes in CAIF netdevice for close and flow control")
Fixes: c41254006377 ("caif-hsi: Add rtnl support")
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/caif/caif_hsi.c | 1 -
 net/caif/chnl_net.c         | 1 -
 2 files changed, 2 deletions(-)

(limited to 'net')

diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c
index 5e40a8b68cbe..b3b922adc0e4 100644
--- a/drivers/net/caif/caif_hsi.c
+++ b/drivers/net/caif/caif_hsi.c
@@ -1415,7 +1415,6 @@ static int caif_hsi_newlink(struct net *src_net, struct net_device *dev,
 
 	cfhsi = netdev_priv(dev);
 	cfhsi_netlink_parms(data, cfhsi);
-	dev_net_set(cfhsi->ndev, src_net);
 
 	get_ops = symbol_get(cfhsi_get_ops);
 	if (!get_ops) {
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 4589ff67bfa9..67a4a36febd1 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -470,7 +470,6 @@ static int ipcaif_newlink(struct net *src_net, struct net_device *dev,
 	ASSERT_RTNL();
 	caifdev = netdev_priv(dev);
 	caif_netlink_parms(data, &caifdev->conn_req);
-	dev_net_set(caifdev->netdev, src_net);
 
 	ret = register_netdevice(dev);
 	if (ret)
-- 
cgit v1.2.3


From 7b4ce694b2030e7bb41f938cba6a0be4947a5aa5 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Tue, 27 Jan 2015 11:13:08 +0100
Subject: rtnetlink: pass link_net to the newlink handler

When IFLA_LINK_NETNSID is used, the netdevice should be built in this link netns
and moved at the end to another netns (pointed by the socket netns or
IFLA_NET_NS_[PID|FD]).

Existing user of the newlink handler will use the netns argument (src_net) to
find a link netdevice or to check some other information into the link netns.
For example, to find a netdevice, two information are required: an ifindex
(usually from IFLA_LINK) and a netns (this link netns).

Note: when using IFLA_LINK_NETNSID and IFLA_NET_NS_[PID|FD], a user may create a
netdevice that stands in netnsX and with its link part in netnsY, by sending a
rtnl message from netnsZ.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 07447d1665e6..fedd7ab4085a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2148,7 +2148,7 @@ replay:
 		dev->ifindex = ifm->ifi_index;
 
 		if (ops->newlink) {
-			err = ops->newlink(net, dev, tb, data);
+			err = ops->newlink(link_net ? : net, dev, tb, data);
 			/* Drivers should call free_netdev() in ->destructor
 			 * and unregister it on failure after registration
 			 * so that device could be finally freed in rtnl_unlock.
-- 
cgit v1.2.3


From 7866a621043fbaca3d7389e9b9f69dd1a2e5a855 Mon Sep 17 00:00:00 2001
From: Salam Noureddine <noureddine@arista.com>
Date: Tue, 27 Jan 2015 11:35:48 -0800
Subject: dev: add per net_device packet type chains

When many pf_packet listeners are created on a lot of interfaces the
current implementation using global packet type lists scales poorly.
This patch adds per net_device packet type lists to fix this problem.

The patch was originally written by Eric Biederman for linux-2.6.29.
Tested on linux-3.16.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Salam Noureddine <noureddine@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |   2 +
 net/core/dev.c            | 132 +++++++++++++++++++++++++++++-----------------
 2 files changed, 86 insertions(+), 48 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 642d426a668f..3d37c6eb1732 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1514,6 +1514,8 @@ struct net_device {
 	struct list_head	napi_list;
 	struct list_head	unreg_list;
 	struct list_head	close_list;
+	struct list_head	ptype_all;
+	struct list_head	ptype_specific;
 
 	struct {
 		struct list_head upper;
diff --git a/net/core/dev.c b/net/core/dev.c
index 7f028d441e98..1d564d68e31a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -371,9 +371,10 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
 static inline struct list_head *ptype_head(const struct packet_type *pt)
 {
 	if (pt->type == htons(ETH_P_ALL))
-		return &ptype_all;
+		return pt->dev ? &pt->dev->ptype_all : &ptype_all;
 	else
-		return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
+		return pt->dev ? &pt->dev->ptype_specific :
+				 &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
 }
 
 /**
@@ -1734,6 +1735,23 @@ static inline int deliver_skb(struct sk_buff *skb,
 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 }
 
+static inline void deliver_ptype_list_skb(struct sk_buff *skb,
+					  struct packet_type **pt,
+					  struct net_device *dev, __be16 type,
+					  struct list_head *ptype_list)
+{
+	struct packet_type *ptype, *pt_prev = *pt;
+
+	list_for_each_entry_rcu(ptype, ptype_list, list) {
+		if (ptype->type != type)
+			continue;
+		if (pt_prev)
+			deliver_skb(skb, pt_prev, dev);
+		pt_prev = ptype;
+	}
+	*pt = pt_prev;
+}
+
 static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
 {
 	if (!ptype->af_packet_priv || !skb->sk)
@@ -1757,45 +1775,54 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 	struct packet_type *ptype;
 	struct sk_buff *skb2 = NULL;
 	struct packet_type *pt_prev = NULL;
+	struct list_head *ptype_list = &ptype_all;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(ptype, &ptype_all, list) {
+again:
+	list_for_each_entry_rcu(ptype, ptype_list, list) {
 		/* Never send packets back to the socket
 		 * they originated from - MvS (miquels@drinkel.ow.org)
 		 */
-		if ((ptype->dev == dev || !ptype->dev) &&
-		    (!skb_loop_sk(ptype, skb))) {
-			if (pt_prev) {
-				deliver_skb(skb2, pt_prev, skb->dev);
-				pt_prev = ptype;
-				continue;
-			}
+		if (skb_loop_sk(ptype, skb))
+			continue;
 
-			skb2 = skb_clone(skb, GFP_ATOMIC);
-			if (!skb2)
-				break;
+		if (pt_prev) {
+			deliver_skb(skb2, pt_prev, skb->dev);
+			pt_prev = ptype;
+			continue;
+		}
 
-			net_timestamp_set(skb2);
+		/* need to clone skb, done only once */
+		skb2 = skb_clone(skb, GFP_ATOMIC);
+		if (!skb2)
+			goto out_unlock;
 
-			/* skb->nh should be correctly
-			   set by sender, so that the second statement is
-			   just protection against buggy protocols.
-			 */
-			skb_reset_mac_header(skb2);
-
-			if (skb_network_header(skb2) < skb2->data ||
-			    skb_network_header(skb2) > skb_tail_pointer(skb2)) {
-				net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
-						     ntohs(skb2->protocol),
-						     dev->name);
-				skb_reset_network_header(skb2);
-			}
+		net_timestamp_set(skb2);
 
-			skb2->transport_header = skb2->network_header;
-			skb2->pkt_type = PACKET_OUTGOING;
-			pt_prev = ptype;
+		/* skb->nh should be correctly
+		 * set by sender, so that the second statement is
+		 * just protection against buggy protocols.
+		 */
+		skb_reset_mac_header(skb2);
+
+		if (skb_network_header(skb2) < skb2->data ||
+		    skb_network_header(skb2) > skb_tail_pointer(skb2)) {
+			net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
+					     ntohs(skb2->protocol),
+					     dev->name);
+			skb_reset_network_header(skb2);
 		}
+
+		skb2->transport_header = skb2->network_header;
+		skb2->pkt_type = PACKET_OUTGOING;
+		pt_prev = ptype;
+	}
+
+	if (ptype_list == &ptype_all) {
+		ptype_list = &dev->ptype_all;
+		goto again;
 	}
+out_unlock:
 	if (pt_prev)
 		pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
 	rcu_read_unlock();
@@ -2617,7 +2644,7 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev,
 	unsigned int len;
 	int rc;
 
-	if (!list_empty(&ptype_all))
+	if (!list_empty(&ptype_all) || !list_empty(&dev->ptype_all))
 		dev_queue_xmit_nit(skb, dev);
 
 	len = skb->len;
@@ -3615,7 +3642,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
 	struct packet_type *ptype, *pt_prev;
 	rx_handler_func_t *rx_handler;
 	struct net_device *orig_dev;
-	struct net_device *null_or_dev;
 	bool deliver_exact = false;
 	int ret = NET_RX_DROP;
 	__be16 type;
@@ -3658,11 +3684,15 @@ another_round:
 		goto skip_taps;
 
 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
-		if (!ptype->dev || ptype->dev == skb->dev) {
-			if (pt_prev)
-				ret = deliver_skb(skb, pt_prev, orig_dev);
-			pt_prev = ptype;
-		}
+		if (pt_prev)
+			ret = deliver_skb(skb, pt_prev, orig_dev);
+		pt_prev = ptype;
+	}
+
+	list_for_each_entry_rcu(ptype, &skb->dev->ptype_all, list) {
+		if (pt_prev)
+			ret = deliver_skb(skb, pt_prev, orig_dev);
+		pt_prev = ptype;
 	}
 
 skip_taps:
@@ -3718,19 +3748,21 @@ ncls:
 		skb->vlan_tci = 0;
 	}
 
+	type = skb->protocol;
+
 	/* deliver only exact match when indicated */
-	null_or_dev = deliver_exact ? skb->dev : NULL;
+	if (likely(!deliver_exact)) {
+		deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
+				       &ptype_base[ntohs(type) &
+						   PTYPE_HASH_MASK]);
+	}
 
-	type = skb->protocol;
-	list_for_each_entry_rcu(ptype,
-			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
-		if (ptype->type == type &&
-		    (ptype->dev == null_or_dev || ptype->dev == skb->dev ||
-		     ptype->dev == orig_dev)) {
-			if (pt_prev)
-				ret = deliver_skb(skb, pt_prev, orig_dev);
-			pt_prev = ptype;
-		}
+	deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
+			       &orig_dev->ptype_specific);
+
+	if (unlikely(skb->dev != orig_dev)) {
+		deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
+				       &skb->dev->ptype_specific);
 	}
 
 	if (pt_prev) {
@@ -6579,6 +6611,8 @@ void netdev_run_todo(void)
 
 		/* paranoia */
 		BUG_ON(netdev_refcnt_read(dev));
+		BUG_ON(!list_empty(&dev->ptype_all));
+		BUG_ON(!list_empty(&dev->ptype_specific));
 		WARN_ON(rcu_access_pointer(dev->ip_ptr));
 		WARN_ON(rcu_access_pointer(dev->ip6_ptr));
 		WARN_ON(dev->dn_ptr);
@@ -6761,6 +6795,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	INIT_LIST_HEAD(&dev->adj_list.lower);
 	INIT_LIST_HEAD(&dev->all_adj_list.upper);
 	INIT_LIST_HEAD(&dev->all_adj_list.lower);
+	INIT_LIST_HEAD(&dev->ptype_all);
+	INIT_LIST_HEAD(&dev->ptype_specific);
 	dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
 	setup(dev);
 
-- 
cgit v1.2.3


From 3cdaa5be9e81a914e633a6be7b7d2ef75b528562 Mon Sep 17 00:00:00 2001
From: Li Wei <lw@cn.fujitsu.com>
Date: Thu, 29 Jan 2015 16:09:03 +0800
Subject: ipv4: Don't increase PMTU with Datagram Too Big message.

RFC 1191 said, "a host MUST not increase its estimate of the Path
MTU in response to the contents of a Datagram Too Big message."

Signed-off-by: Li Wei <lw@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d58dd0ec3e53..52e1f2bf0ca2 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -966,6 +966,9 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
 	if (dst->dev->mtu < mtu)
 		return;
 
+	if (rt->rt_pmtu && rt->rt_pmtu < mtu)
+		return;
+
 	if (mtu < ip_rt_min_pmtu)
 		mtu = ip_rt_min_pmtu;
 
-- 
cgit v1.2.3


From 579eb62ac35845686a7c4286c0a820b4eb1f96aa Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Thu, 18 Dec 2014 22:41:23 +0200
Subject: ipvs: rerouting to local clients is not needed anymore

commit f5a41847acc5 ("ipvs: move ip_route_me_harder for ICMP")
from 2.6.37 introduced ip_route_me_harder() call for responses to
local clients, so that we can provide valid rt_src after SNAT.
It was used by TCP to provide valid daddr for ip_send_reply().
After commit 0a5ebb8000c5 ("ipv4: Pass explicit daddr arg to
ip_send_reply()." from 3.0 this rerouting is not needed anymore
and should be avoided, especially in LOCAL_IN.

Fixes 3.12.33 crash in xfrm reported by Florian Wiessner:
"3.12.33 - BUG xfrm_selector_match+0x25/0x2f6"

Reported-by: Smart Weblications GmbH - Florian Wiessner <f.wiessner@smart-weblications.de>
Tested-by: Smart Weblications GmbH - Florian Wiessner <f.wiessner@smart-weblications.de>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_core.c | 33 ++++++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 990decba1fe4..b87ca32efa0b 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -659,16 +659,24 @@ static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
 	return err;
 }
 
-static int ip_vs_route_me_harder(int af, struct sk_buff *skb)
+static int ip_vs_route_me_harder(int af, struct sk_buff *skb,
+				 unsigned int hooknum)
 {
+	if (!sysctl_snat_reroute(skb))
+		return 0;
+	/* Reroute replies only to remote clients (FORWARD and LOCAL_OUT) */
+	if (NF_INET_LOCAL_IN == hooknum)
+		return 0;
 #ifdef CONFIG_IP_VS_IPV6
 	if (af == AF_INET6) {
-		if (sysctl_snat_reroute(skb) && ip6_route_me_harder(skb) != 0)
+		struct dst_entry *dst = skb_dst(skb);
+
+		if (dst->dev && !(dst->dev->flags & IFF_LOOPBACK) &&
+		    ip6_route_me_harder(skb) != 0)
 			return 1;
 	} else
 #endif
-		if ((sysctl_snat_reroute(skb) ||
-		     skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
+		if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
 		    ip_route_me_harder(skb, RTN_LOCAL) != 0)
 			return 1;
 
@@ -791,7 +799,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
 				union nf_inet_addr *snet,
 				__u8 protocol, struct ip_vs_conn *cp,
 				struct ip_vs_protocol *pp,
-				unsigned int offset, unsigned int ihl)
+				unsigned int offset, unsigned int ihl,
+				unsigned int hooknum)
 {
 	unsigned int verdict = NF_DROP;
 
@@ -821,7 +830,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
 #endif
 		ip_vs_nat_icmp(skb, pp, cp, 1);
 
-	if (ip_vs_route_me_harder(af, skb))
+	if (ip_vs_route_me_harder(af, skb, hooknum))
 		goto out;
 
 	/* do the statistics and put it back */
@@ -916,7 +925,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
 
 	snet.ip = iph->saddr;
 	return handle_response_icmp(AF_INET, skb, &snet, cih->protocol, cp,
-				    pp, ciph.len, ihl);
+				    pp, ciph.len, ihl, hooknum);
 }
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -981,7 +990,8 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
 	snet.in6 = ciph.saddr.in6;
 	writable = ciph.len;
 	return handle_response_icmp(AF_INET6, skb, &snet, ciph.protocol, cp,
-				    pp, writable, sizeof(struct ipv6hdr));
+				    pp, writable, sizeof(struct ipv6hdr),
+				    hooknum);
 }
 #endif
 
@@ -1040,7 +1050,8 @@ static inline bool is_new_conn(const struct sk_buff *skb,
  */
 static unsigned int
 handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
-		struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
+		struct ip_vs_conn *cp, struct ip_vs_iphdr *iph,
+		unsigned int hooknum)
 {
 	struct ip_vs_protocol *pp = pd->pp;
 
@@ -1078,7 +1089,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
 	 * if it came from this machine itself.  So re-compute
 	 * the routing information.
 	 */
-	if (ip_vs_route_me_harder(af, skb))
+	if (ip_vs_route_me_harder(af, skb, hooknum))
 		goto drop;
 
 	IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT");
@@ -1181,7 +1192,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
 	cp = pp->conn_out_get(af, skb, &iph, 0);
 
 	if (likely(cp))
-		return handle_response(af, skb, pd, cp, &iph);
+		return handle_response(af, skb, pd, cp, &iph, hooknum);
 	if (sysctl_nat_icmp_send(net) &&
 	    (pp->protocol == IPPROTO_TCP ||
 	     pp->protocol == IPPROTO_UDP ||
-- 
cgit v1.2.3


From f5553c19ff9058136e7082c0b1f4268e705ea538 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 29 Jan 2015 19:08:09 +0100
Subject: netfilter: nf_tables: fix leaks in error path of nf_tables_newchain()

Release statistics and module refcount on memory allocation problems.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index b54360634e95..1ff04bcd4871 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1264,8 +1264,10 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
 		nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
 		trans = nft_trans_alloc(&ctx, NFT_MSG_NEWCHAIN,
 					sizeof(struct nft_trans_chain));
-		if (trans == NULL)
+		if (trans == NULL) {
+			free_percpu(stats);
 			return -ENOMEM;
+		}
 
 		nft_trans_chain_stats(trans) = stats;
 		nft_trans_chain_update(trans) = true;
@@ -1321,8 +1323,10 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
 		hookfn = type->hooks[hooknum];
 
 		basechain = kzalloc(sizeof(*basechain), GFP_KERNEL);
-		if (basechain == NULL)
+		if (basechain == NULL) {
+			module_put(type->owner);
 			return -ENOMEM;
+		}
 
 		if (nla[NFTA_CHAIN_COUNTERS]) {
 			stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
-- 
cgit v1.2.3


From 8b7c36d810c61ab16997f4387fc16291410700f8 Mon Sep 17 00:00:00 2001
From: Pablo Neira <pablo@netfilter.org>
Date: Thu, 29 Jan 2015 10:51:53 +0100
Subject: netlink: fix wrong subscription bitmask to group mapping in

The subscription bitmask passed via struct sockaddr_nl is converted to
the group number when calling the netlink_bind() and netlink_unbind()
callbacks.

The conversion is however incorrect since bitmask (1 << 0) needs to be
mapped to group number 1. Note that you cannot specify the group number 0
(usually known as _NONE) from setsockopt() using NETLINK_ADD_MEMBERSHIP
since this is rejected through -EINVAL.

This problem became noticeable since 97840cb ("netfilter: nfnetlink:
fix insufficient validation in nfnetlink_bind") when binding to bitmask
(1 << 0) in ctnetlink.

Reported-by: Andre Tomt <andre@tomt.net>
Reported-by: Ivan Delalande <colona@arista.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 02fdde28dada..75532efa51cd 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1438,7 +1438,7 @@ static void netlink_undo_bind(int group, long unsigned int groups,
 
 	for (undo = 0; undo < group; undo++)
 		if (test_bit(undo, &groups))
-			nlk->netlink_unbind(sock_net(sk), undo);
+			nlk->netlink_unbind(sock_net(sk), undo + 1);
 }
 
 static int netlink_bind(struct socket *sock, struct sockaddr *addr,
@@ -1476,7 +1476,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 		for (group = 0; group < nlk->ngroups; group++) {
 			if (!test_bit(group, &groups))
 				continue;
-			err = nlk->netlink_bind(net, group);
+			err = nlk->netlink_bind(net, group + 1);
 			if (!err)
 				continue;
 			netlink_undo_bind(group, groups, sk);
-- 
cgit v1.2.3


From cfbf654efc6d78dc9812e030673b86f235bf677d Mon Sep 17 00:00:00 2001
From: Saran Maruti Ramanara <saran.neti@telus.com>
Date: Thu, 29 Jan 2015 11:05:58 +0100
Subject: net: sctp: fix passing wrong parameter header to param_type2af in
 sctp_process_param

When making use of RFC5061, section 4.2.4. for setting the primary IP
address, we're passing a wrong parameter header to param_type2af(),
resulting always in NULL being returned.

At this point, param.p points to a sctp_addip_param struct, containing
a sctp_paramhdr (type = 0xc004, length = var), and crr_id as a correlation
id. Followed by that, as also presented in RFC5061 section 4.2.4., comes
the actual sctp_addr_param, which also contains a sctp_paramhdr, but
this time with the correct type SCTP_PARAM_IPV{4,6}_ADDRESS that
param_type2af() can make use of. Since we already hold a pointer to
addr_param from previous line, just reuse it for param_type2af().

Fixes: d6de3097592b ("[SCTP]: Add the handling of "Set Primary IP Address" parameter to INIT")
Signed-off-by: Saran Maruti Ramanara <saran.neti@telus.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_make_chunk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index e49e231cef52..06320c8c1c86 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2608,7 +2608,7 @@ do_addr_param:
 
 		addr_param = param.v + sizeof(sctp_addip_param_t);
 
-		af = sctp_get_af_specific(param_type2af(param.p->type));
+		af = sctp_get_af_specific(param_type2af(addr_param->p.type));
 		if (af == NULL)
 			break;
 
-- 
cgit v1.2.3


From 207895fd388c7c4c48bc33055cd726d9e750298c Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Thu, 29 Jan 2015 12:15:03 +0100
Subject: net: mark some potential candidates __read_mostly

They are all either written once or extremly rarely (e.g. from init
code), so we can move them to the .data..read_mostly section.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipvlan/ipvlan_core.c | 2 +-
 net/bridge/br_netlink.c          | 2 +-
 net/ipv4/devinet.c               | 2 +-
 net/ipv6/addrconf.c              | 2 +-
 net/mpls/mpls_gso.c              | 4 ++--
 5 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index 2e195289ddf4..2a175006028b 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -9,7 +9,7 @@
 
 #include "ipvlan.h"
 
-static u32 ipvlan_jhash_secret;
+static u32 ipvlan_jhash_secret __read_mostly;
 
 void ipvlan_init_secret(void)
 {
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 3875ea51f6fe..e08b260f33fe 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -701,7 +701,7 @@ static size_t br_get_link_af_size(const struct net_device *dev)
 	return pv->num_vlans * nla_total_size(sizeof(struct bridge_vlan_info));
 }
 
-static struct rtnl_af_ops br_af_ops = {
+static struct rtnl_af_ops br_af_ops __read_mostly = {
 	.family			= AF_BRIDGE,
 	.get_link_af_size	= br_get_link_af_size,
 };
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 59ebe16d06fc..f0b4a31d7bd6 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2322,7 +2322,7 @@ static __net_initdata struct pernet_operations devinet_ops = {
 	.exit = devinet_exit_net,
 };
 
-static struct rtnl_af_ops inet_af_ops = {
+static struct rtnl_af_ops inet_af_ops __read_mostly = {
 	.family		  = AF_INET,
 	.fill_link_af	  = inet_fill_link_af,
 	.get_link_af_size = inet_get_link_af_size,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 7dcc065e2160..8623118cb2bb 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5405,7 +5405,7 @@ static struct pernet_operations addrconf_ops = {
 	.exit = addrconf_exit_net,
 };
 
-static struct rtnl_af_ops inet6_ops = {
+static struct rtnl_af_ops inet6_ops __read_mostly = {
 	.family		  = AF_INET6,
 	.fill_link_af	  = inet6_fill_link_af,
 	.get_link_af_size = inet6_get_link_af_size,
diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c
index 349295d21946..809df534a720 100644
--- a/net/mpls/mpls_gso.c
+++ b/net/mpls/mpls_gso.c
@@ -60,14 +60,14 @@ out:
 	return segs;
 }
 
-static struct packet_offload mpls_mc_offload = {
+static struct packet_offload mpls_mc_offload __read_mostly = {
 	.type = cpu_to_be16(ETH_P_MPLS_MC),
 	.callbacks = {
 		.gso_segment    =	mpls_gso_segment,
 	},
 };
 
-static struct packet_offload mpls_uc_offload = {
+static struct packet_offload mpls_uc_offload __read_mostly = {
 	.type = cpu_to_be16(ETH_P_MPLS_UC),
 	.callbacks = {
 		.gso_segment    =	mpls_gso_segment,
-- 
cgit v1.2.3


From d4bcef3fbe887ff93b58da4fcf6df1eee416e8fa Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Thu, 29 Jan 2015 20:37:07 +0900
Subject: net: Fix vlan_get_protocol for stacked vlan

vlan_get_protocol() could not get network protocol if a skb has a 802.1ad
vlan tag or multiple vlans, which caused incorrect checksum calculation
in several drivers.

Fix vlan_get_protocol() to retrieve network protocol instead of incorrect
vlan protocol.

As the logic is the same as skb_network_protocol(), create a common helper
function __vlan_get_protocol() and call it from existing functions.

Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 60 +++++++++++++++++++++++++++++++++++++------------
 net/core/dev.c          | 31 +------------------------
 2 files changed, 47 insertions(+), 44 deletions(-)

(limited to 'net')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 515a35e2a48a..960e666c51e4 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -472,27 +472,59 @@ static inline int vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci)
 /**
  * vlan_get_protocol - get protocol EtherType.
  * @skb: skbuff to query
+ * @type: first vlan protocol
+ * @depth: buffer to store length of eth and vlan tags in bytes
  *
  * Returns the EtherType of the packet, regardless of whether it is
  * vlan encapsulated (normal or hardware accelerated) or not.
  */
-static inline __be16 vlan_get_protocol(const struct sk_buff *skb)
+static inline __be16 __vlan_get_protocol(struct sk_buff *skb, __be16 type,
+					 int *depth)
 {
-	__be16 protocol = 0;
-
-	if (vlan_tx_tag_present(skb) ||
-	     skb->protocol != cpu_to_be16(ETH_P_8021Q))
-		protocol = skb->protocol;
-	else {
-		__be16 proto, *protop;
-		protop = skb_header_pointer(skb, offsetof(struct vlan_ethhdr,
-						h_vlan_encapsulated_proto),
-						sizeof(proto), &proto);
-		if (likely(protop))
-			protocol = *protop;
+	unsigned int vlan_depth = skb->mac_len;
+
+	/* if type is 802.1Q/AD then the header should already be
+	 * present at mac_len - VLAN_HLEN (if mac_len > 0), or at
+	 * ETH_HLEN otherwise
+	 */
+	if (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
+		if (vlan_depth) {
+			if (WARN_ON(vlan_depth < VLAN_HLEN))
+				return 0;
+			vlan_depth -= VLAN_HLEN;
+		} else {
+			vlan_depth = ETH_HLEN;
+		}
+		do {
+			struct vlan_hdr *vh;
+
+			if (unlikely(!pskb_may_pull(skb,
+						    vlan_depth + VLAN_HLEN)))
+				return 0;
+
+			vh = (struct vlan_hdr *)(skb->data + vlan_depth);
+			type = vh->h_vlan_encapsulated_proto;
+			vlan_depth += VLAN_HLEN;
+		} while (type == htons(ETH_P_8021Q) ||
+			 type == htons(ETH_P_8021AD));
 	}
 
-	return protocol;
+	if (depth)
+		*depth = vlan_depth;
+
+	return type;
+}
+
+/**
+ * vlan_get_protocol - get protocol EtherType.
+ * @skb: skbuff to query
+ *
+ * Returns the EtherType of the packet, regardless of whether it is
+ * vlan encapsulated (normal or hardware accelerated) or not.
+ */
+static inline __be16 vlan_get_protocol(struct sk_buff *skb)
+{
+	return __vlan_get_protocol(skb, skb->protocol, NULL);
 }
 
 static inline void vlan_set_encap_proto(struct sk_buff *skb,
diff --git a/net/core/dev.c b/net/core/dev.c
index 171420e75b03..c87a2264a02b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2352,7 +2352,6 @@ EXPORT_SYMBOL(skb_checksum_help);
 
 __be16 skb_network_protocol(struct sk_buff *skb, int *depth)
 {
-	unsigned int vlan_depth = skb->mac_len;
 	__be16 type = skb->protocol;
 
 	/* Tunnel gso handlers can set protocol to ethernet. */
@@ -2366,35 +2365,7 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth)
 		type = eth->h_proto;
 	}
 
-	/* if skb->protocol is 802.1Q/AD then the header should already be
-	 * present at mac_len - VLAN_HLEN (if mac_len > 0), or at
-	 * ETH_HLEN otherwise
-	 */
-	if (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
-		if (vlan_depth) {
-			if (WARN_ON(vlan_depth < VLAN_HLEN))
-				return 0;
-			vlan_depth -= VLAN_HLEN;
-		} else {
-			vlan_depth = ETH_HLEN;
-		}
-		do {
-			struct vlan_hdr *vh;
-
-			if (unlikely(!pskb_may_pull(skb,
-						    vlan_depth + VLAN_HLEN)))
-				return 0;
-
-			vh = (struct vlan_hdr *)(skb->data + vlan_depth);
-			type = vh->h_vlan_encapsulated_proto;
-			vlan_depth += VLAN_HLEN;
-		} while (type == htons(ETH_P_8021Q) ||
-			 type == htons(ETH_P_8021AD));
-	}
-
-	*depth = vlan_depth;
-
-	return type;
+	return __vlan_get_protocol(skb, type, depth);
 }
 
 /**
-- 
cgit v1.2.3


From a994a09097660d7ae6cdf868720f7a991b19b987 Mon Sep 17 00:00:00 2001
From: Nicholas Mc Guire <der.herr@hofr.at>
Date: Thu, 29 Jan 2015 18:22:51 +0100
Subject: irda: use msecs_to_jiffies for conversions

This is only an API consolidation and should make things more readable
it replaces  var * HZ / 1000  constructs by  msecs_to_jiffies(var).

Signed-off-by: Nicholas Mc Guire <der.herr@hofr.at>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/irlap.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/irda/irlap.c b/net/irda/irlap.c
index 7f2cafddfb6e..1cde711bcab5 100644
--- a/net/irda/irlap.c
+++ b/net/irda/irlap.c
@@ -533,7 +533,7 @@ void irlap_discovery_request(struct irlap_cb *self, discovery_t *discovery)
 	info.discovery = discovery;
 
 	/* sysctl_slot_timeout bounds are checked in irsysctl.c - Jean II */
-	self->slot_timeout = sysctl_slot_timeout * HZ / 1000;
+	self->slot_timeout = msecs_to_jiffies(sysctl_slot_timeout);
 
 	irlap_do_event(self, DISCOVERY_REQUEST, NULL, &info);
 }
@@ -1015,13 +1015,15 @@ void irlap_apply_connection_parameters(struct irlap_cb *self, int now)
 	 * Or, this is how much we can keep the pf bit in primary mode.
 	 * Therefore, it must be lower or equal than our *OWN* max turn around.
 	 * Jean II */
-	self->poll_timeout = self->qos_tx.max_turn_time.value * HZ / 1000;
+	self->poll_timeout = msecs_to_jiffies(
+				self->qos_tx.max_turn_time.value);
 	/* The Final timeout applies only to the primary station.
 	 * It defines the maximum time the primary wait (mostly in RECV mode)
 	 * for an answer from the secondary station before polling it again.
 	 * Therefore, it must be greater or equal than our *PARTNER*
 	 * max turn around time - Jean II */
-	self->final_timeout = self->qos_rx.max_turn_time.value * HZ / 1000;
+	self->final_timeout = msecs_to_jiffies(
+				self->qos_rx.max_turn_time.value);
 	/* The Watchdog Bit timeout applies only to the secondary station.
 	 * It defines the maximum time the secondary wait (mostly in RECV mode)
 	 * for poll from the primary station before getting annoyed.
-- 
cgit v1.2.3


From f7697b1602d13ef80779caf23d13fa1511193144 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Fri, 30 Jan 2015 23:20:55 -0800
Subject: Bluetooth: Store OOB data present value for each set of remote OOB
 data

Instead of doing complex calculation every time the OOB data is used,
just calculate the OOB data present value and store it with the OOB
data raw values.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h | 1 +
 net/bluetooth/hci_core.c         | 8 ++++++++
 2 files changed, 9 insertions(+)

(limited to 'net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 1780f1681ecf..a37e10f4e2b3 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -145,6 +145,7 @@ struct oob_data {
 	struct list_head list;
 	bdaddr_t bdaddr;
 	u8 bdaddr_type;
+	u8 present;
 	u8 hash192[16];
 	u8 rand192[16];
 	u8 hash256[16];
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 5d4ac3fbbc08..f045c062f8f0 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2581,9 +2581,15 @@ int hci_add_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr,
 	if (hash192 && rand192) {
 		memcpy(data->hash192, hash192, sizeof(data->hash192));
 		memcpy(data->rand192, rand192, sizeof(data->rand192));
+		if (hash256 && rand256)
+			data->present = 0x03;
 	} else {
 		memset(data->hash192, 0, sizeof(data->hash192));
 		memset(data->rand192, 0, sizeof(data->rand192));
+		if (hash256 && rand256)
+			data->present = 0x02;
+		else
+			data->present = 0x00;
 	}
 
 	if (hash256 && rand256) {
@@ -2592,6 +2598,8 @@ int hci_add_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr,
 	} else {
 		memset(data->hash256, 0, sizeof(data->hash256));
 		memset(data->rand256, 0, sizeof(data->rand256));
+		if (hash192 && rand192)
+			data->present = 0x01;
 	}
 
 	BT_DBG("%s for %pMR", hdev->name, bdaddr);
-- 
cgit v1.2.3


From 659c7fb08463b3dcf6aebb4daa0c5463289ccb97 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Fri, 30 Jan 2015 23:20:56 -0800
Subject: Bluetooth: Fix OOB data present value for BR/EDR Secure Connections

When BR/EDR Secure Connections has been enabled, the OOB data present
value can take 2 additional values. The host has to clearly provide
details about if P-192 OOB data, P-256 OOB data or a combination of
P-192 and P-256 OOB data is present.

In case BR/EDR Secure Connections is not enabled or not supported,
then check that P-192 OOB data is actually present and return the
correct value based on that.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_event.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 055625b7368f..af181f455f6f 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -3881,8 +3881,25 @@ static u8 bredr_oob_data_present(struct hci_conn *conn)
 	     !memcmp(data->hash256, ZERO_KEY, 16)))
 		return 0x00;
 
-	if (conn->out || test_bit(HCI_CONN_REMOTE_OOB, &conn->flags))
+	if (conn->out || test_bit(HCI_CONN_REMOTE_OOB, &conn->flags)) {
+		/* When Secure Connections has been enabled, then just
+		 * return the present value stored with the OOB data. It
+		 * will contain the right information about which data
+		 * is present.
+		 */
+		if (bredr_sc_enabled(hdev))
+			return data->present;
+
+		/* When Secure Connections is not enabled or actually
+		 * not supported by the hardware, then check that if
+		 * P-192 data values are present.
+		 */
+		if (!memcmp(data->rand192, ZERO_KEY, 16) ||
+		    !memcmp(data->hash192, ZERO_KEY, 16))
+			return 0x00;
+
 		return 0x01;
+	}
 
 	return 0x00;
 }
-- 
cgit v1.2.3


From 4775a4ea14d3e15645336e4b0f1355e526c57956 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 31 Jan 2015 00:15:52 -0800
Subject: Bluetooth: Fix OOB data present value for SMP pairing

Before setting the OOB data present flag with SMP pairing, check the
newly introduced present tracking that actual OOB data values have
been provided. The existence of remote OOB data structure does not
actually mean that the correct data values are available.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/smp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 37d9180bfe1c..c09a821f381d 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -620,7 +620,7 @@ static void build_pairing_cmd(struct l2cap_conn *conn,
 
 		oob_data = hci_find_remote_oob_data(hdev, &hcon->dst,
 						    bdaddr_type);
-		if (oob_data) {
+		if (oob_data && oob_data->present) {
 			set_bit(SMP_FLAG_OOB, &smp->flags);
 			oob_flag = SMP_OOB_PRESENT;
 			memcpy(smp->rr, oob_data->rand256, 16);
-- 
cgit v1.2.3


From 41bcfd50d52c83d43fe75fba7938a592dcb6589b Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 31 Jan 2015 00:37:02 -0800
Subject: Bluetooth: Allow remote OOB data to only provide P-192 or P-256
 values

In case the remote only provided P-192 or P-256 data for OOB pairing,
then make sure that the data value pointers are correctly set. That way
the core can provide correct information when remote OOB data present
information have to be communicated.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/mgmt.c | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 78939e0ed1f4..ba3b4a5820b1 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -3672,7 +3672,7 @@ static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev,
 				   status, &cp->addr, sizeof(cp->addr));
 	} else if (len == MGMT_ADD_REMOTE_OOB_EXT_DATA_SIZE) {
 		struct mgmt_cp_add_remote_oob_ext_data *cp = data;
-		u8 *rand192, *hash192;
+		u8 *rand192, *hash192, *rand256, *hash256;
 		u8 status;
 
 		if (bdaddr_type_is_le(cp->addr.type)) {
@@ -3691,13 +3691,34 @@ static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev,
 			rand192 = NULL;
 			hash192 = NULL;
 		} else {
-			rand192 = cp->rand192;
-			hash192 = cp->hash192;
+			/* In case one of the P-192 values is set to zero,
+			 * then just disable OOB data for P-192.
+			 */
+			if (!memcmp(cp->rand192, ZERO_KEY, 16) ||
+			    !memcmp(cp->hash192, ZERO_KEY, 16)) {
+				rand192 = NULL;
+				hash192 = NULL;
+			} else {
+				rand192 = cp->rand192;
+				hash192 = cp->hash192;
+			}
+		}
+
+		/* In case one of the P-256 values is set to zero, then just
+		 * disable OOB data for P-256.
+		 */
+		if (!memcmp(cp->rand256, ZERO_KEY, 16) ||
+		    !memcmp(cp->hash256, ZERO_KEY, 16)) {
+			rand256 = NULL;
+			hash256 = NULL;
+		} else {
+			rand256 = cp->rand256;
+			hash256 = cp->hash256;
 		}
 
 		err = hci_add_remote_oob_data(hdev, &cp->addr.bdaddr,
 					      cp->addr.type, hash192, rand192,
-					      cp->hash256, cp->rand256);
+					      hash256, rand256);
 		if (err < 0)
 			status = MGMT_STATUS_FAILED;
 		else
-- 
cgit v1.2.3


From 932eb7638ad7d9145620178992044b5e87356969 Mon Sep 17 00:00:00 2001
From: Kenneth Klette Jonassen <kennetkl@ifi.uio.no>
Date: Thu, 29 Jan 2015 20:08:03 +0100
Subject: tcp: use SACK RTTs for CC

Current behavior only passes RTTs from sequentially acked data to CC.

If sender gets a combined ACK for segment 1 and SACK for segment 3, then the
computed RTT for CC is the time between sending segment 1 and receiving SACK
for segment 3.

Pass the minimum computed RTT from any acked data to CC, i.e. time between
sending segment 3 and receiving SACK for segment 3.

Signed-off-by: Kenneth Klette Jonassen <kennetkl@ifi.uio.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 71fb37c70581..ed11931f340f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3183,8 +3183,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 
 		tp->fackets_out -= min(pkts_acked, tp->fackets_out);
 
-		if (ca_ops->pkts_acked)
-			ca_ops->pkts_acked(sk, pkts_acked, ca_seq_rtt_us);
+		if (ca_ops->pkts_acked) {
+			long rtt_us = min_t(ulong, ca_seq_rtt_us, sack_rtt_us);
+			ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
+		}
 
 	} else if (skb && rtt_update && sack_rtt_us >= 0 &&
 		   sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) {
-- 
cgit v1.2.3


From 349c9e3c7341bbab6efbea39acfadeba9ab19f61 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 29 Jan 2015 15:58:09 -0800
Subject: ipv4: icmp: use percpu allocation

Get rid of nr_cpu_ids and use modern percpu allocation.

Note that the sockets themselves are not yet allocated
using NUMA affinity.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h |  3 ++-
 net/ipv4/icmp.c          | 17 ++++++++---------
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 24945cefc4fd..7283f4d39ae2 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -48,7 +48,8 @@ struct netns_ipv4 {
 	struct hlist_head	*fib_table_hash;
 	struct sock		*fibnl;
 
-	struct sock		**icmp_sk;
+	struct sock  * __percpu	*icmp_sk;
+
 	struct inet_peer_base	*peers;
 	struct tcpm_hash_bucket	*tcp_metrics_hash;
 	unsigned int		tcp_metrics_hash_log;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 36f5584d93c5..5e564014a0b7 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -205,7 +205,7 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
  */
 static struct sock *icmp_sk(struct net *net)
 {
-	return net->ipv4.icmp_sk[smp_processor_id()];
+	return *this_cpu_ptr(net->ipv4.icmp_sk);
 }
 
 static inline struct sock *icmp_xmit_lock(struct net *net)
@@ -1140,8 +1140,8 @@ static void __net_exit icmp_sk_exit(struct net *net)
 	int i;
 
 	for_each_possible_cpu(i)
-		inet_ctl_sock_destroy(net->ipv4.icmp_sk[i]);
-	kfree(net->ipv4.icmp_sk);
+		inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.icmp_sk, i));
+	free_percpu(net->ipv4.icmp_sk);
 	net->ipv4.icmp_sk = NULL;
 }
 
@@ -1149,9 +1149,8 @@ static int __net_init icmp_sk_init(struct net *net)
 {
 	int i, err;
 
-	net->ipv4.icmp_sk =
-		kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
-	if (net->ipv4.icmp_sk == NULL)
+	net->ipv4.icmp_sk = alloc_percpu(struct sock *);
+	if (!net->ipv4.icmp_sk)
 		return -ENOMEM;
 
 	for_each_possible_cpu(i) {
@@ -1162,7 +1161,7 @@ static int __net_init icmp_sk_init(struct net *net)
 		if (err < 0)
 			goto fail;
 
-		net->ipv4.icmp_sk[i] = sk;
+		*per_cpu_ptr(net->ipv4.icmp_sk, i) = sk;
 
 		/* Enough space for 2 64K ICMP packets, including
 		 * sk_buff/skb_shared_info struct overhead.
@@ -1203,8 +1202,8 @@ static int __net_init icmp_sk_init(struct net *net)
 
 fail:
 	for_each_possible_cpu(i)
-		inet_ctl_sock_destroy(net->ipv4.icmp_sk[i]);
-	kfree(net->ipv4.icmp_sk);
+		inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.icmp_sk, i));
+	free_percpu(net->ipv4.icmp_sk);
 	return err;
 }
 
-- 
cgit v1.2.3


From 6e07231a80de33a3721c971e560316d04db16de8 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 31 Jan 2015 15:07:51 -0800
Subject: Bluetooth: Expose Secure Simple Pairing debug mode setting in debugfs

The value of the ssp_debug_mode should be accessible via debugfs to be
able to determine if a BR/EDR controller generates debugs keys or not.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_debugfs.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index ead89a5ad9ce..51a424cfa94a 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -354,6 +354,24 @@ static int voice_setting_get(void *data, u64 *val)
 DEFINE_SIMPLE_ATTRIBUTE(voice_setting_fops, voice_setting_get,
 			NULL, "0x%4.4llx\n");
 
+static ssize_t ssp_debug_mode_read(struct file *file, char __user *user_buf,
+				   size_t count, loff_t *ppos)
+{
+	struct hci_dev *hdev = file->private_data;
+	char buf[3];
+
+	buf[0] = hdev->ssp_debug_mode ? 'Y': 'N';
+	buf[1] = '\n';
+	buf[2] = '\0';
+	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
+}
+
+static const struct file_operations ssp_debug_mode_fops = {
+	.open		= simple_open,
+	.read		= ssp_debug_mode_read,
+	.llseek		= default_llseek,
+};
+
 static int auto_accept_delay_set(void *data, u64 val)
 {
 	struct hci_dev *hdev = data;
@@ -474,9 +492,12 @@ void hci_debugfs_create_bredr(struct hci_dev *hdev)
 	debugfs_create_file("voice_setting", 0444, hdev->debugfs, hdev,
 			    &voice_setting_fops);
 
-	if (lmp_ssp_capable(hdev))
+	if (lmp_ssp_capable(hdev)) {
+		debugfs_create_file("ssp_debug_mode", 0444, hdev->debugfs,
+				    hdev, &ssp_debug_mode_fops);
 		debugfs_create_file("auto_accept_delay", 0644, hdev->debugfs,
 				    hdev, &auto_accept_delay_fops);
+	}
 
 	if (lmp_sniff_capable(hdev)) {
 		debugfs_create_file("idle_timeout", 0644, hdev->debugfs,
-- 
cgit v1.2.3


From c50b33c80e6d6fe833aa42ff6ad9156b5d5e92b2 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 31 Jan 2015 15:07:52 -0800
Subject: Bluetooth: Track changes from HCI Write Simple Pairing Debug Mode
 command

When the HCI Write Simple Pairing Debug Mode command has been issued,
the result needs to be tracked and stored. The hdev->ssp_debug_mode
variable is already present, but was never updated when the mode in
the controller was actually changed.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_event.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index af181f455f6f..2f2abd986997 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1489,6 +1489,21 @@ unlock:
 	hci_dev_unlock(hdev);
 }
 
+static void hci_cc_write_ssp_debug_mode(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	u8 status = *((u8 *) skb->data);
+	u8 *mode;
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+	if (status)
+		return;
+
+	mode = hci_sent_cmd_data(hdev, HCI_OP_WRITE_SSP_DEBUG_MODE);
+	if (mode)
+		hdev->ssp_debug_mode = *mode;
+}
+
 static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status)
 {
 	BT_DBG("%s status 0x%2.2x", hdev->name, status);
@@ -2983,6 +2998,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_cc_read_tx_power(hdev, skb);
 		break;
 
+	case HCI_OP_WRITE_SSP_DEBUG_MODE:
+		hci_cc_write_ssp_debug_mode(hdev, skb);
+		break;
+
 	default:
 		BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode);
 		break;
-- 
cgit v1.2.3


From 0886aea6acd27006888c36bad1fa5f80dac1e171 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 31 Jan 2015 15:12:06 -0800
Subject: Bluetooth: Expose debug keys usage setting via debugfs

To allow easier debugging when debug keys are generated, provide debugfs
entry for checking the setting of debug keys usage.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_debugfs.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index 51a424cfa94a..2272384755d5 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -212,6 +212,24 @@ static int conn_info_max_age_get(void *data, u64 *val)
 DEFINE_SIMPLE_ATTRIBUTE(conn_info_max_age_fops, conn_info_max_age_get,
 			conn_info_max_age_set, "%llu\n");
 
+static ssize_t use_debug_keys_read(struct file *file, char __user *user_buf,
+				   size_t count, loff_t *ppos)
+{
+	struct hci_dev *hdev = file->private_data;
+	char buf[3];
+
+	buf[0] = test_bit(HCI_USE_DEBUG_KEYS, &hdev->dev_flags) ? 'Y': 'N';
+	buf[1] = '\n';
+	buf[2] = '\0';
+	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
+}
+
+static const struct file_operations use_debug_keys_fops = {
+	.open		= simple_open,
+	.read		= use_debug_keys_read,
+	.llseek		= default_llseek,
+};
+
 static ssize_t sc_only_mode_read(struct file *file, char __user *user_buf,
 				 size_t count, loff_t *ppos)
 {
@@ -249,6 +267,10 @@ void hci_debugfs_create_common(struct hci_dev *hdev)
 	debugfs_create_file("conn_info_max_age", 0644, hdev->debugfs, hdev,
 			    &conn_info_max_age_fops);
 
+	if (lmp_ssp_capable(hdev) || lmp_le_capable(hdev))
+		debugfs_create_file("use_debug_keys", 0444, hdev->debugfs,
+				    hdev, &use_debug_keys_fops);
+
 	if (lmp_sc_capable(hdev) || lmp_le_capable(hdev))
 		debugfs_create_file("sc_only_mode", 0444, hdev->debugfs,
 				    hdev, &sc_only_mode_fops);
-- 
cgit v1.2.3


From 5789f37cbc560aff45ff4d00673705eac92d3b4d Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 31 Jan 2015 19:54:39 -0800
Subject: Bluetooth: Expose hardware error code as debugfs entry

When the Hardware Error event is send by the controller, the Bluetooth
core stores the error code. Expose it via debugfs so it can be retrieved
later on.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_debugfs.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index 2272384755d5..5353f6ec8d68 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -256,6 +256,9 @@ void hci_debugfs_create_common(struct hci_dev *hdev)
 			   &hdev->manufacturer);
 	debugfs_create_u8("hci_version", 0444, hdev->debugfs, &hdev->hci_ver);
 	debugfs_create_u16("hci_revision", 0444, hdev->debugfs, &hdev->hci_rev);
+	debugfs_create_u8("hardware_error", 0444, hdev->debugfs,
+			  &hdev->hw_error_code);
+
 	debugfs_create_file("device_list", 0444, hdev->debugfs, hdev,
 			    &device_list_fops);
 	debugfs_create_file("blacklist", 0444, hdev->debugfs, hdev,
-- 
cgit v1.2.3


From 6858bcd073c9ff36f5d341dc6da011a53954fa9a Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 31 Jan 2015 21:01:07 -0800
Subject: Bluetooth: Expose remote OOB information as debugfs entry

For debugging purposes it is good to know which OOB data is actually
currently loaded for each controller. So expose that list via debugfs.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_debugfs.c | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index 5353f6ec8d68..65261e5d4b84 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -156,6 +156,35 @@ static const struct file_operations uuids_fops = {
 	.release	= single_release,
 };
 
+static int remote_oob_show(struct seq_file *f, void *ptr)
+{
+	struct hci_dev *hdev = f->private;
+	struct oob_data *data;
+
+	hci_dev_lock(hdev);
+	list_for_each_entry(data, &hdev->remote_oob_data, list) {
+		seq_printf(f, "%pMR (type %u) %u %*phN %*phN %*phN %*phN\n",
+			   &data->bdaddr, data->bdaddr_type, data->present,
+			   16, data->hash192, 16, data->rand192,
+			   16, data->hash256, 19, data->rand256);
+	}
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+static int remote_oob_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, remote_oob_show, inode->i_private);
+}
+
+static const struct file_operations remote_oob_fops = {
+	.open		= remote_oob_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 static int conn_info_min_age_set(void *data, u64 val)
 {
 	struct hci_dev *hdev = data;
@@ -264,6 +293,8 @@ void hci_debugfs_create_common(struct hci_dev *hdev)
 	debugfs_create_file("blacklist", 0444, hdev->debugfs, hdev,
 			    &blacklist_fops);
 	debugfs_create_file("uuids", 0444, hdev->debugfs, hdev, &uuids_fops);
+	debugfs_create_file("remote_oob", 0400, hdev->debugfs, hdev,
+			    &remote_oob_fops);
 
 	debugfs_create_file("conn_info_min_age", 0644, hdev->debugfs, hdev,
 			    &conn_info_min_age_fops);
-- 
cgit v1.2.3


From bf21d7931a5d33ef79b805074e3d23d3009bff98 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 31 Jan 2015 13:20:27 -0800
Subject: Bluetooth: Fix OOB data present for BR/EDR Secure Connections Only
 mode

When using Secure Connections Only mode, then only P-256 OOB data is
valid and should be provided. In case userspace provides P-192 and P-256
OOB data, then the P-192 values will be set to zero. However the present
value of the IO capability exchange still mentioned that both values
would be available. Fix this by telling the controller clearly that only
the P-256 OOB data is present.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_event.c | 38 +++++++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 2f2abd986997..a3fb094822b6 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -3890,24 +3890,28 @@ static u8 bredr_oob_data_present(struct hci_conn *conn)
 	if (!data)
 		return 0x00;
 
-	/* When Secure Connections Only mode is enabled, then the P-256
-	 * values are required. If they are not available, then do not
-	 * declare that OOB data is present.
-	 */
-	if (bredr_sc_enabled(hdev) &&
-	    test_bit(HCI_SC_ONLY, &hdev->dev_flags) &&
-	    (!memcmp(data->rand256, ZERO_KEY, 16) ||
-	     !memcmp(data->hash256, ZERO_KEY, 16)))
-		return 0x00;
-
 	if (conn->out || test_bit(HCI_CONN_REMOTE_OOB, &conn->flags)) {
-		/* When Secure Connections has been enabled, then just
-		 * return the present value stored with the OOB data. It
-		 * will contain the right information about which data
-		 * is present.
-		 */
-		if (bredr_sc_enabled(hdev))
-			return data->present;
+		if (bredr_sc_enabled(hdev)) {
+			/* When Secure Connections is enabled, then just
+			 * return the present value stored with the OOB
+			 * data. The stored value contains the right present
+			 * information. However it can only be trusted when
+			 * not in Secure Connection Only mode.
+			 */
+			if (!test_bit(HCI_SC_ONLY, &hdev->dev_flags))
+				return data->present;
+
+			/* When Secure Connections Only mode is enabled, then
+			 * the P-256 values are required. If they are not
+			 * available, then do not declare that OOB data is
+			 * present.
+			 */
+			if (!memcmp(data->rand256, ZERO_KEY, 16) ||
+			    !memcmp(data->hash256, ZERO_KEY, 16))
+				return 0x00;
+
+			return 0x02;
+		}
 
 		/* When Secure Connections is not enabled or actually
 		 * not supported by the hardware, then check that if
-- 
cgit v1.2.3


From bdbbb8527b6f6a358dbcb70dac247034d665b8e4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 29 Jan 2015 21:35:05 -0800
Subject: ipv4: tcp: get rid of ugly unicast_sock

In commit be9f4a44e7d41 ("ipv4: tcp: remove per net tcp_sock")
I tried to address contention on a socket lock, but the solution
I chose was horrible :

commit 3a7c384ffd57e ("ipv4: tcp: unicast_sock should not land outside
of TCP stack") addressed a selinux regression.

commit 0980e56e506b ("ipv4: tcp: set unicast_sock uc_ttl to -1")
took care of another regression.

commit b5ec8eeac46 ("ipv4: fix ip_send_skb()") fixed another regression.

commit 811230cd85 ("tcp: ipv4: initialize unicast_sock sk_pacing_rate")
was another shot in the dark.

Really, just use a proper socket per cpu, and remove the skb_orphan()
call, to re-enable flow control.

This solves a serious problem with FQ packet scheduler when used in
hostile environments, as we do not want to allocate a flow structure
for every RST packet sent in response to a spoofed packet.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h         |  2 +-
 include/net/netns/ipv4.h |  1 +
 net/ipv4/ip_output.c     | 30 +++---------------------------
 net/ipv4/tcp_ipv4.c      | 37 ++++++++++++++++++++++++++++++++-----
 4 files changed, 37 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/include/net/ip.h b/include/net/ip.h
index f7cbd703d15d..09cf5aebb283 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -181,7 +181,7 @@ static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg)
 	return (arg->flags & IP_REPLY_ARG_NOSRCCHECK) ? FLOWI_FLAG_ANYSRC : 0;
 }
 
-void ip_send_unicast_reply(struct net *net, struct sk_buff *skb,
+void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
 			   const struct ip_options *sopt,
 			   __be32 daddr, __be32 saddr,
 			   const struct ip_reply_arg *arg,
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 24945cefc4fd..0ffef1a38efc 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -52,6 +52,7 @@ struct netns_ipv4 {
 	struct inet_peer_base	*peers;
 	struct tcpm_hash_bucket	*tcp_metrics_hash;
 	unsigned int		tcp_metrics_hash_log;
+	struct sock  * __percpu	*tcp_sk;
 	struct netns_frags	frags;
 #ifdef CONFIG_NETFILTER
 	struct xt_table		*iptable_filter;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 38a20a9cca1a..c373c0708d97 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1506,24 +1506,8 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset,
 /*
  *	Generic function to send a packet as reply to another packet.
  *	Used to send some TCP resets/acks so far.
- *
- *	Use a fake percpu inet socket to avoid false sharing and contention.
  */
-static DEFINE_PER_CPU(struct inet_sock, unicast_sock) = {
-	.sk = {
-		.__sk_common = {
-			.skc_refcnt = ATOMIC_INIT(1),
-		},
-		.sk_wmem_alloc	= ATOMIC_INIT(1),
-		.sk_allocation	= GFP_ATOMIC,
-		.sk_flags	= (1UL << SOCK_USE_WRITE_QUEUE),
-		.sk_pacing_rate = ~0U,
-	},
-	.pmtudisc	= IP_PMTUDISC_WANT,
-	.uc_ttl		= -1,
-};
-
-void ip_send_unicast_reply(struct net *net, struct sk_buff *skb,
+void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
 			   const struct ip_options *sopt,
 			   __be32 daddr, __be32 saddr,
 			   const struct ip_reply_arg *arg,
@@ -1533,9 +1517,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb,
 	struct ipcm_cookie ipc;
 	struct flowi4 fl4;
 	struct rtable *rt = skb_rtable(skb);
+	struct net *net = sock_net(sk);
 	struct sk_buff *nskb;
-	struct sock *sk;
-	struct inet_sock *inet;
 	int err;
 
 	if (__ip_options_echo(&replyopts.opt.opt, skb, sopt))
@@ -1566,15 +1549,11 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb,
 	if (IS_ERR(rt))
 		return;
 
-	inet = &get_cpu_var(unicast_sock);
+	inet_sk(sk)->tos = arg->tos;
 
-	inet->tos = arg->tos;
-	sk = &inet->sk;
 	sk->sk_priority = skb->priority;
 	sk->sk_protocol = ip_hdr(skb)->protocol;
 	sk->sk_bound_dev_if = arg->bound_dev_if;
-	sock_net_set(sk, net);
-	__skb_queue_head_init(&sk->sk_write_queue);
 	sk->sk_sndbuf = sysctl_wmem_default;
 	err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base,
 			     len, 0, &ipc, &rt, MSG_DONTWAIT);
@@ -1590,13 +1569,10 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb,
 			  arg->csumoffset) = csum_fold(csum_add(nskb->csum,
 								arg->csum));
 		nskb->ip_summed = CHECKSUM_NONE;
-		skb_orphan(nskb);
 		skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb));
 		ip_push_pending_frames(sk, &fl4);
 	}
 out:
-	put_cpu_var(unicast_sock);
-
 	ip_rt_put(rt);
 }
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a3f72d7fc06c..d22f54482bab 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -683,7 +683,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 		arg.bound_dev_if = sk->sk_bound_dev_if;
 
 	arg.tos = ip_hdr(skb)->tos;
-	ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt,
+	ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
+			      skb, &TCP_SKB_CB(skb)->header.h4.opt,
 			      ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
 			      &arg, arg.iov[0].iov_len);
 
@@ -767,7 +768,8 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 	if (oif)
 		arg.bound_dev_if = oif;
 	arg.tos = tos;
-	ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt,
+	ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
+			      skb, &TCP_SKB_CB(skb)->header.h4.opt,
 			      ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
 			      &arg, arg.iov[0].iov_len);
 
@@ -2428,14 +2430,39 @@ struct proto tcp_prot = {
 };
 EXPORT_SYMBOL(tcp_prot);
 
+static void __net_exit tcp_sk_exit(struct net *net)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
+	free_percpu(net->ipv4.tcp_sk);
+}
+
 static int __net_init tcp_sk_init(struct net *net)
 {
+	int res, cpu;
+
+	net->ipv4.tcp_sk = alloc_percpu(struct sock *);
+	if (!net->ipv4.tcp_sk)
+		return -ENOMEM;
+
+	for_each_possible_cpu(cpu) {
+		struct sock *sk;
+
+		res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
+					   IPPROTO_TCP, net);
+		if (res)
+			goto fail;
+		*per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
+	}
 	net->ipv4.sysctl_tcp_ecn = 2;
 	return 0;
-}
 
-static void __net_exit tcp_sk_exit(struct net *net)
-{
+fail:
+	tcp_sk_exit(net);
+
+	return res;
 }
 
 static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
-- 
cgit v1.2.3


From add511b38266aa10c1079f9248854e6a415c4dc2 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Thu, 29 Jan 2015 22:40:12 -0800
Subject: bridge: add flags argument to ndo_bridge_setlink and
 ndo_bridge_dellink

bridge flags are needed inside ndo_bridge_setlink/dellink handlers to
avoid another call to parse IFLA_AF_SPEC inside these handlers

This is used later in this series

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/emulex/benet/be_main.c   |  3 ++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |  2 +-
 drivers/net/ethernet/rocker/rocker.c          |  2 +-
 include/linux/netdevice.h                     |  6 ++++--
 net/bridge/br_netlink.c                       |  4 ++--
 net/bridge/br_private.h                       |  4 ++--
 net/core/rtnetlink.c                          | 10 ++++++----
 7 files changed, 18 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 598c5070c629..efed92c7b731 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -4327,7 +4327,8 @@ fw_exit:
 	return status;
 }
 
-static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh)
+static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
+				 u16 flags)
 {
 	struct be_adapter *adapter = netdev_priv(dev);
 	struct nlattr *attr, *br_spec;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 7bb421bfd84e..e4086fea4be2 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -7786,7 +7786,7 @@ static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 }
 
 static int ixgbe_ndo_bridge_setlink(struct net_device *dev,
-				    struct nlmsghdr *nlh)
+				    struct nlmsghdr *nlh, u16 flags)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
 	struct nlattr *attr, *br_spec;
diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c
index 11f4ffcc113d..f0d607ca5e73 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -3722,7 +3722,7 @@ skip:
 }
 
 static int rocker_port_bridge_setlink(struct net_device *dev,
-				      struct nlmsghdr *nlh)
+				      struct nlmsghdr *nlh, u16 flags)
 {
 	struct rocker_port *rocker_port = netdev_priv(dev);
 	struct nlattr *protinfo;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3d37c6eb1732..16251e96e6aa 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1154,13 +1154,15 @@ struct net_device_ops {
 						int idx);
 
 	int			(*ndo_bridge_setlink)(struct net_device *dev,
-						      struct nlmsghdr *nlh);
+						      struct nlmsghdr *nlh,
+						      u16 flags);
 	int			(*ndo_bridge_getlink)(struct sk_buff *skb,
 						      u32 pid, u32 seq,
 						      struct net_device *dev,
 						      u32 filter_mask);
 	int			(*ndo_bridge_dellink)(struct net_device *dev,
-						      struct nlmsghdr *nlh);
+						      struct nlmsghdr *nlh,
+						      u16 flags);
 	int			(*ndo_change_carrier)(struct net_device *dev,
 						      bool new_carrier);
 	int			(*ndo_get_phys_port_id)(struct net_device *dev,
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index e08b260f33fe..088e80203845 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -494,7 +494,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
 }
 
 /* Change state and parameters on port. */
-int br_setlink(struct net_device *dev, struct nlmsghdr *nlh)
+int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
 {
 	struct nlattr *protinfo;
 	struct nlattr *afspec;
@@ -550,7 +550,7 @@ out:
 }
 
 /* Delete port information */
-int br_dellink(struct net_device *dev, struct nlmsghdr *nlh)
+int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
 {
 	struct nlattr *afspec;
 	struct net_bridge_port *p;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index e8e3f3681680..de0919975a25 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -819,8 +819,8 @@ extern struct rtnl_link_ops br_link_ops;
 int br_netlink_init(void);
 void br_netlink_fini(void);
 void br_ifinfo_notify(int event, struct net_bridge_port *port);
-int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg);
-int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg);
+int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags);
+int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags);
 int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev,
 	       u32 filter_mask);
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index fedd7ab4085a..673cb4c6f391 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2991,7 +2991,7 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
 			goto out;
 		}
 
-		err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh);
+		err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh, flags);
 		if (err)
 			goto out;
 
@@ -3002,7 +3002,8 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
 		if (!dev->netdev_ops->ndo_bridge_setlink)
 			err = -EOPNOTSUPP;
 		else
-			err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh);
+			err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh,
+								  flags);
 		if (!err) {
 			flags &= ~BRIDGE_FLAGS_SELF;
 
@@ -3064,7 +3065,7 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
 			goto out;
 		}
 
-		err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh);
+		err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh, flags);
 		if (err)
 			goto out;
 
@@ -3075,7 +3076,8 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
 		if (!dev->netdev_ops->ndo_bridge_dellink)
 			err = -EOPNOTSUPP;
 		else
-			err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh);
+			err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh,
+								  flags);
 
 		if (!err) {
 			flags &= ~BRIDGE_FLAGS_SELF;
-- 
cgit v1.2.3


From 8a44dbb202617aa66968ba74fdabf1b654dfe661 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Thu, 29 Jan 2015 22:40:13 -0800
Subject: swdevice: add new apis to set and del bridge port attributes

This patch adds two new api's netdev_switch_port_bridge_setlink
and netdev_switch_port_bridge_dellink to offload bridge port attributes
to switch port

(The names of the apis look odd with 'switch_port_bridge',
but am more inclined to change the prefix of the api to something else.
Will take any suggestions).

The api's look at the NETIF_F_HW_SWITCH_OFFLOAD feature flag to
pass bridge port attributes to the port device.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/switchdev.h   |  37 +++++++++++++++-
 net/switchdev/switchdev.c | 110 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 146 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 205e63698da9..cfcdac2e5d25 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -43,7 +43,14 @@ int register_netdev_switch_notifier(struct notifier_block *nb);
 int unregister_netdev_switch_notifier(struct notifier_block *nb);
 int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev,
 				 struct netdev_switch_notifier_info *info);
-
+int netdev_switch_port_bridge_setlink(struct net_device *dev,
+				struct nlmsghdr *nlh, u16 flags);
+int netdev_switch_port_bridge_dellink(struct net_device *dev,
+				struct nlmsghdr *nlh, u16 flags);
+int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev,
+					       struct nlmsghdr *nlh, u16 flags);
+int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev,
+					       struct nlmsghdr *nlh, u16 flags);
 #else
 
 static inline int netdev_switch_parent_id_get(struct net_device *dev,
@@ -74,6 +81,34 @@ static inline int call_netdev_switch_notifiers(unsigned long val, struct net_dev
 	return NOTIFY_DONE;
 }
 
+static inline int netdev_switch_port_bridge_setlink(struct net_device *dev,
+						    struct nlmsghdr *nlh,
+						    u16 flags)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int netdev_switch_port_bridge_dellink(struct net_device *dev,
+						    struct nlmsghdr *nlh,
+						    u16 flags)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev,
+							struct nlmsghdr *nlh,
+							u16 flags)
+{
+	return 0;
+}
+
+static inline int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev,
+							struct nlmsghdr *nlh,
+							u16 flags)
+{
+	return 0;
+}
+
 #endif
 
 #endif /* _LINUX_SWITCHDEV_H_ */
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 22e02f4edd99..8c1e558db118 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -115,3 +115,113 @@ int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev,
 	return err;
 }
 EXPORT_SYMBOL(call_netdev_switch_notifiers);
+
+/**
+ *	netdev_switch_port_bridge_setlink - Notify switch device port of bridge
+ *	port attributes
+ *
+ *	@dev: port device
+ *	@nlh: netlink msg with bridge port attributes
+ *	@flags: bridge setlink flags
+ *
+ *	Notify switch device port of bridge port attributes
+ */
+int netdev_switch_port_bridge_setlink(struct net_device *dev,
+				      struct nlmsghdr *nlh, u16 flags)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD))
+		return 0;
+
+	if (!ops->ndo_bridge_setlink)
+		return -EOPNOTSUPP;
+
+	return ops->ndo_bridge_setlink(dev, nlh, flags);
+}
+EXPORT_SYMBOL(netdev_switch_port_bridge_setlink);
+
+/**
+ *	netdev_switch_port_bridge_dellink - Notify switch device port of bridge
+ *	port attribute delete
+ *
+ *	@dev: port device
+ *	@nlh: netlink msg with bridge port attributes
+ *	@flags: bridge setlink flags
+ *
+ *	Notify switch device port of bridge port attribute delete
+ */
+int netdev_switch_port_bridge_dellink(struct net_device *dev,
+				      struct nlmsghdr *nlh, u16 flags)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD))
+		return 0;
+
+	if (!ops->ndo_bridge_dellink)
+		return -EOPNOTSUPP;
+
+	return ops->ndo_bridge_dellink(dev, nlh, flags);
+}
+EXPORT_SYMBOL(netdev_switch_port_bridge_dellink);
+
+/**
+ *	ndo_dflt_netdev_switch_port_bridge_setlink - default ndo bridge setlink
+ *						     op for master devices
+ *
+ *	@dev: port device
+ *	@nlh: netlink msg with bridge port attributes
+ *	@flags: bridge setlink flags
+ *
+ *	Notify master device slaves of bridge port attributes
+ */
+int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev,
+					       struct nlmsghdr *nlh, u16 flags)
+{
+	struct net_device *lower_dev;
+	struct list_head *iter;
+	int ret = 0, err = 0;
+
+	if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD))
+		return ret;
+
+	netdev_for_each_lower_dev(dev, lower_dev, iter) {
+		err = netdev_switch_port_bridge_setlink(lower_dev, nlh, flags);
+		if (err && err != -EOPNOTSUPP)
+			ret = err;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(ndo_dflt_netdev_switch_port_bridge_setlink);
+
+/**
+ *	ndo_dflt_netdev_switch_port_bridge_dellink - default ndo bridge dellink
+ *						     op for master devices
+ *
+ *	@dev: port device
+ *	@nlh: netlink msg with bridge port attributes
+ *	@flags: bridge dellink flags
+ *
+ *	Notify master device slaves of bridge port attribute deletes
+ */
+int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev,
+					       struct nlmsghdr *nlh, u16 flags)
+{
+	struct net_device *lower_dev;
+	struct list_head *iter;
+	int ret = 0, err = 0;
+
+	if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD))
+		return ret;
+
+	netdev_for_each_lower_dev(dev, lower_dev, iter) {
+		err = netdev_switch_port_bridge_dellink(lower_dev, nlh, flags);
+		if (err && err != -EOPNOTSUPP)
+			ret = err;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(ndo_dflt_netdev_switch_port_bridge_dellink);
-- 
cgit v1.2.3


From 68e331c785b85b78f4155e2ab6f90e976b609dc1 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Thu, 29 Jan 2015 22:40:14 -0800
Subject: bridge: offload bridge port attributes to switch asic if feature flag
 set

This patch adds support to set/del bridge port attributes in hardware from
the bridge driver.

With this, when the user sends a bridge setlink message with no flags or
master flags set,
   - the bridge driver ndo_bridge_setlink handler sets settings in the kernel
   - calls the swicthdev api to propagate the attrs to the switchdev
	hardware

   You can still use the self flag to go to the switch hw or switch port
   driver directly.

With this, it also makes sure a notification goes out only after the
attributes are set both in the kernel and hw.

The patch calls switchdev api only if BRIDGE_FLAGS_SELF is not set.
This is because the offload cases with BRIDGE_FLAGS_SELF are handled in
the caller (in rtnetlink.c).

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netlink.c | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 088e80203845..b93f42c515da 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -16,6 +16,7 @@
 #include <net/rtnetlink.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
+#include <net/switchdev.h>
 #include <uapi/linux/if_bridge.h>
 
 #include "br_private.h"
@@ -500,7 +501,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
 	struct nlattr *afspec;
 	struct net_bridge_port *p;
 	struct nlattr *tb[IFLA_BRPORT_MAX + 1];
-	int err = 0;
+	int err = 0, ret_offload = 0;
 
 	protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO);
 	afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
@@ -542,9 +543,18 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
 				afspec, RTM_SETLINK);
 	}
 
+	if (!(flags & BRIDGE_FLAGS_SELF)) {
+		/* set bridge attributes in hardware if supported
+		 */
+		ret_offload = netdev_switch_port_bridge_setlink(dev, nlh,
+								flags);
+		if (ret_offload && ret_offload != -EOPNOTSUPP)
+			br_warn(p->br, "error setting attrs on port %u(%s)\n",
+				(unsigned int)p->port_no, p->dev->name);
+	}
+
 	if (err == 0)
 		br_ifinfo_notify(RTM_NEWLINK, p);
-
 out:
 	return err;
 }
@@ -554,7 +564,7 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
 {
 	struct nlattr *afspec;
 	struct net_bridge_port *p;
-	int err;
+	int err = 0, ret_offload = 0;
 
 	afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
 	if (!afspec)
@@ -573,6 +583,16 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
 		 */
 		br_ifinfo_notify(RTM_NEWLINK, p);
 
+	if (!(flags & BRIDGE_FLAGS_SELF)) {
+		/* del bridge attributes in hardware
+		 */
+		ret_offload = netdev_switch_port_bridge_dellink(dev, nlh,
+								flags);
+		if (ret_offload && ret_offload != -EOPNOTSUPP)
+			br_warn(p->br, "error deleting attrs on port %u (%s)\n",
+				(unsigned int)p->port_no, p->dev->name);
+	}
+
 	return err;
 }
 static int br_validate(struct nlattr *tb[], struct nlattr *data[])
-- 
cgit v1.2.3


From 2d28cfe7aada495f87bb439151e9bcc86998fb6d Mon Sep 17 00:00:00 2001
From: Jakub Pawlowski <jpawlowski@google.com>
Date: Sun, 1 Feb 2015 23:07:54 -0800
Subject: Bluetooth: Add le_scan_restart work for LE scan restarting

Currently there is no way to restart le scan, and it's needed in
service scan method. The way it work: it disable, and then enable le
scan on controller.

During the restart, we must remember when the scan was started, and
it's duration, to later re-schedule the le_scan_disable work, that was
stopped during the stop scan phase.

Signed-off-by: Jakub Pawlowski <jpawlowski@google.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  5 +++
 net/bluetooth/hci_core.c         | 74 ++++++++++++++++++++++++++++++++++++++++
 net/bluetooth/mgmt.c             | 19 ++++++++++-
 3 files changed, 97 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index a37e10f4e2b3..d3a232be9d9b 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -79,6 +79,8 @@ struct discovery_state {
 	s8			rssi;
 	u16			uuid_count;
 	u8			(*uuids)[16];
+	unsigned long		scan_start;
+	unsigned long		scan_duration;
 };
 
 struct hci_conn_hash {
@@ -354,6 +356,7 @@ struct hci_dev {
 	unsigned long		dev_flags;
 
 	struct delayed_work	le_scan_disable;
+	struct delayed_work	le_scan_restart;
 
 	__s8			adv_tx_power;
 	__u8			adv_data[HCI_MAX_AD_LENGTH];
@@ -531,6 +534,8 @@ static inline void hci_discovery_filter_clear(struct hci_dev *hdev)
 	hdev->discovery.uuid_count = 0;
 	kfree(hdev->discovery.uuids);
 	hdev->discovery.uuids = NULL;
+	hdev->discovery.scan_start = 0;
+	hdev->discovery.scan_duration = 0;
 }
 
 bool hci_discovery_active(struct hci_dev *hdev);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index f045c062f8f0..3322d3f4c85a 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1617,6 +1617,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 		cancel_delayed_work(&hdev->service_cache);
 
 	cancel_delayed_work_sync(&hdev->le_scan_disable);
+	cancel_delayed_work_sync(&hdev->le_scan_restart);
 
 	if (test_bit(HCI_MGMT, &hdev->dev_flags))
 		cancel_delayed_work_sync(&hdev->rpa_expired);
@@ -2830,6 +2831,8 @@ static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status,
 		return;
 	}
 
+	hdev->discovery.scan_start = 0;
+
 	switch (hdev->discovery.type) {
 	case DISCOV_TYPE_LE:
 		hci_dev_lock(hdev);
@@ -2869,6 +2872,8 @@ static void le_scan_disable_work(struct work_struct *work)
 
 	BT_DBG("%s", hdev->name);
 
+	cancel_delayed_work_sync(&hdev->le_scan_restart);
+
 	hci_req_init(&req, hdev);
 
 	hci_req_add_le_scan_disable(&req);
@@ -2878,6 +2883,74 @@ static void le_scan_disable_work(struct work_struct *work)
 		BT_ERR("Disable LE scanning request failed: err %d", err);
 }
 
+static void le_scan_restart_work_complete(struct hci_dev *hdev, u8 status,
+					  u16 opcode)
+{
+	unsigned long timeout, duration, scan_start, now;
+
+	BT_DBG("%s", hdev->name);
+
+	if (status) {
+		BT_ERR("Failed to restart LE scan: status %d", status);
+		return;
+	}
+
+	if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) ||
+	    !hdev->discovery.scan_start)
+		return;
+
+	/* When the scan was started, hdev->le_scan_disable has been queued
+	 * after duration from scan_start. During scan restart this job
+	 * has been canceled, and we need to queue it again after proper
+	 * timeout, to make sure that scan does not run indefinitely.
+	 */
+	duration = hdev->discovery.scan_duration;
+	scan_start = hdev->discovery.scan_start;
+	now = jiffies;
+	if (now - scan_start <= duration) {
+		int elapsed;
+
+		if (now >= scan_start)
+			elapsed = now - scan_start;
+		else
+			elapsed = ULONG_MAX - scan_start + now;
+
+		timeout = duration - elapsed;
+	} else {
+		timeout = 0;
+	}
+	queue_delayed_work(hdev->workqueue,
+			   &hdev->le_scan_disable, timeout);
+}
+
+static void le_scan_restart_work(struct work_struct *work)
+{
+	struct hci_dev *hdev = container_of(work, struct hci_dev,
+					    le_scan_restart.work);
+	struct hci_request req;
+	struct hci_cp_le_set_scan_enable cp;
+	int err;
+
+	BT_DBG("%s", hdev->name);
+
+	/* If controller is not scanning we are done. */
+	if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags))
+		return;
+
+	hci_req_init(&req, hdev);
+
+	hci_req_add_le_scan_disable(&req);
+
+	memset(&cp, 0, sizeof(cp));
+	cp.enable = LE_SCAN_ENABLE;
+	cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
+	hci_req_add(&req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
+
+	err = hci_req_run(&req, le_scan_restart_work_complete);
+	if (err)
+		BT_ERR("Restart LE scan request failed: err %d", err);
+}
+
 /* Copy the Identity Address of the controller.
  *
  * If the controller has a public BD_ADDR, then by default use that one.
@@ -2974,6 +3047,7 @@ struct hci_dev *hci_alloc_dev(void)
 	INIT_DELAYED_WORK(&hdev->power_off, hci_power_off);
 	INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off);
 	INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work);
+	INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work);
 
 	skb_queue_head_init(&hdev->rx_q);
 	skb_queue_head_init(&hdev->cmd_q);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index ba3b4a5820b1..8c2520a7f386 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -3896,6 +3896,9 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status,
 
 	hci_discovery_set_state(hdev, DISCOVERY_FINDING);
 
+	/* If the scan involves LE scan, pick proper timeout to schedule
+	 * hdev->le_scan_disable that will stop it.
+	 */
 	switch (hdev->discovery.type) {
 	case DISCOV_TYPE_LE:
 		timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT);
@@ -3912,9 +3915,23 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status,
 		break;
 	}
 
-	if (timeout)
+	if (timeout) {
+		/* When service discovery is used and the controller has
+		 * a strict duplicate filter, it is important to remember
+		 * the start and duration of the scan. This is required
+		 * for restarting scanning during the discovery phase.
+		 */
+		if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER,
+			     &hdev->quirks) &&
+		    (hdev->discovery.uuid_count > 0 ||
+		     hdev->discovery.rssi != HCI_RSSI_INVALID)) {
+			hdev->discovery.scan_start = jiffies;
+			hdev->discovery.scan_duration = timeout;
+		}
+
 		queue_delayed_work(hdev->workqueue,
 				   &hdev->le_scan_disable, timeout);
+	}
 
 unlock:
 	hci_dev_unlock(hdev);
-- 
cgit v1.2.3


From 4b0e0ceddf085a89173805cace44cd3c4c9d9d77 Mon Sep 17 00:00:00 2001
From: Jakub Pawlowski <jpawlowski@google.com>
Date: Sun, 1 Feb 2015 23:07:55 -0800
Subject: Bluetooth: Add restarting to service discovery

When using LE_SCAN_FILTER_DUP_ENABLE, some controllers would send
advertising report from each LE device only once. That means that we
don't get any updates on RSSI value, and makes Service Discovery very
slow. This patch adds restarting scan when in Service Discovery, and
device with filtered uuid is found, but it's not in RSSI range to send
event yet. This way if device moves into range, we will quickly get RSSI
update.

Signed-off-by: Jakub Pawlowski <jpawlowski@google.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  1 +
 net/bluetooth/mgmt.c             | 53 ++++++++++++++++++++++++++++++++++++----
 2 files changed, 49 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index d3a232be9d9b..52863c3e0b13 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1334,6 +1334,7 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event);
 #define DISCOV_INTERLEAVED_TIMEOUT	5120	/* msec */
 #define DISCOV_INTERLEAVED_INQUIRY_LEN	0x04
 #define DISCOV_BREDR_INQUIRY_LEN	0x08
+#define DISCOV_LE_RESTART_DELAY		msecs_to_jiffies(200)	/* msec */
 
 int mgmt_control(struct sock *sk, struct msghdr *msg, size_t len);
 int mgmt_new_settings(struct hci_dev *hdev);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 8c2520a7f386..9e50b5c09b02 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -7262,6 +7262,21 @@ static bool eir_has_uuids(u8 *eir, u16 eir_len, u16 uuid_count, u8 (*uuids)[16])
 	return false;
 }
 
+static void restart_le_scan(struct hci_dev *hdev)
+{
+	/* If controller is not scanning we are done. */
+	if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags))
+		return;
+
+	if (time_after(jiffies + DISCOV_LE_RESTART_DELAY,
+		       hdev->discovery.scan_start +
+		       hdev->discovery.scan_duration))
+		return;
+
+	queue_delayed_work(hdev->workqueue, &hdev->le_scan_restart,
+			   DISCOV_LE_RESTART_DELAY);
+}
+
 void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
 		       u8 addr_type, u8 *dev_class, s8 rssi, u32 flags,
 		       u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len)
@@ -7284,14 +7299,18 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
 
 	/* When using service discovery with a RSSI threshold, then check
 	 * if such a RSSI threshold is specified. If a RSSI threshold has
-	 * been specified, then all results with a RSSI smaller than the
-	 * RSSI threshold will be dropped.
+	 * been specified, and HCI_QUIRK_STRICT_DUPLICATE_FILTER is not set,
+	 * then all results with a RSSI smaller than the RSSI threshold will be
+	 * dropped. If the quirk is set, let it through for further processing,
+	 * as we might need to restart the scan.
 	 *
 	 * For BR/EDR devices (pre 1.2) providing no RSSI during inquiry,
 	 * the results are also dropped.
 	 */
 	if (hdev->discovery.rssi != HCI_RSSI_INVALID &&
-	    (rssi < hdev->discovery.rssi || rssi == HCI_RSSI_INVALID))
+	    (rssi == HCI_RSSI_INVALID ||
+	    (rssi < hdev->discovery.rssi &&
+	     !test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks))))
 		return;
 
 	/* Make sure that the buffer is big enough. The 5 extra bytes
@@ -7326,12 +7345,20 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
 		 * kept and checking possible scan response data
 		 * will be skipped.
 		 */
-		if (hdev->discovery.uuid_count > 0)
+		if (hdev->discovery.uuid_count > 0) {
 			match = eir_has_uuids(eir, eir_len,
 					      hdev->discovery.uuid_count,
 					      hdev->discovery.uuids);
-		else
+			/* If duplicate filtering does not report RSSI changes,
+			 * then restart scanning to ensure updated result with
+			 * updated RSSI values.
+			 */
+			if (match && test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER,
+					      &hdev->quirks))
+				restart_le_scan(hdev);
+		} else {
 			match = true;
+		}
 
 		if (!match && !scan_rsp_len)
 			return;
@@ -7364,6 +7391,14 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
 						     hdev->discovery.uuid_count,
 						     hdev->discovery.uuids))
 				return;
+
+			/* If duplicate filtering does not report RSSI changes,
+			 * then restart scanning to ensure updated result with
+			 * updated RSSI values.
+			 */
+			if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER,
+				     &hdev->quirks))
+				restart_le_scan(hdev);
 		}
 
 		/* Append scan response data to event */
@@ -7377,6 +7412,14 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
 			return;
 	}
 
+	/* Validate the reported RSSI value against the RSSI threshold once more
+	 * incase HCI_QUIRK_STRICT_DUPLICATE_FILTER forced a restart of LE
+	 * scanning.
+	 */
+	if (hdev->discovery.rssi != HCI_RSSI_INVALID &&
+	    rssi < hdev->discovery.rssi)
+		return;
+
 	ev->eir_len = cpu_to_le16(eir_len + scan_rsp_len);
 	ev_size = sizeof(*ev) + eir_len + scan_rsp_len;
 
-- 
cgit v1.2.3


From 66f096f79166bcd56fe3c3607a51fb6aeff857b1 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Mon, 2 Feb 2015 13:23:42 +0200
Subject: Bluetooth: Remove mgmt_rp_read_local_oob_ext_data struct

This extended return parameters struct conflicts with the new Read Local
OOB Extended Data command definition. To avoid the conflict simply
rename the old "extended" version to the normal one and update the code
appropriately to take into account the two possible response PDU sizes.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/mgmt.h |  4 ----
 net/bluetooth/mgmt.c         | 25 +++++++++----------------
 2 files changed, 9 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 95c34d5180fa..e218a30f2061 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -301,10 +301,6 @@ struct mgmt_cp_user_passkey_neg_reply {
 #define MGMT_OP_READ_LOCAL_OOB_DATA	0x0020
 #define MGMT_READ_LOCAL_OOB_DATA_SIZE	0
 struct mgmt_rp_read_local_oob_data {
-	__u8	hash[16];
-	__u8	rand[16];
-} __packed;
-struct mgmt_rp_read_local_oob_ext_data {
 	__u8	hash192[16];
 	__u8	rand192[16];
 	__u8	hash256[16];
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 9e50b5c09b02..9ec5390c85eb 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -7168,28 +7168,21 @@ void mgmt_read_local_oob_data_complete(struct hci_dev *hdev, u8 *hash192,
 		cmd_status(cmd->sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA,
 			   mgmt_status(status));
 	} else {
-		if (bredr_sc_enabled(hdev) && hash256 && rand256) {
-			struct mgmt_rp_read_local_oob_ext_data rp;
+		struct mgmt_rp_read_local_oob_data rp;
+		size_t rp_size = sizeof(rp);
 
-			memcpy(rp.hash192, hash192, sizeof(rp.hash192));
-			memcpy(rp.rand192, rand192, sizeof(rp.rand192));
+		memcpy(rp.hash192, hash192, sizeof(rp.hash192));
+		memcpy(rp.rand192, rand192, sizeof(rp.rand192));
 
+		if (bredr_sc_enabled(hdev) && hash256 && rand256) {
 			memcpy(rp.hash256, hash256, sizeof(rp.hash256));
 			memcpy(rp.rand256, rand256, sizeof(rp.rand256));
-
-			cmd_complete(cmd->sk, hdev->id,
-				     MGMT_OP_READ_LOCAL_OOB_DATA, 0,
-				     &rp, sizeof(rp));
 		} else {
-			struct mgmt_rp_read_local_oob_data rp;
-
-			memcpy(rp.hash, hash192, sizeof(rp.hash));
-			memcpy(rp.rand, rand192, sizeof(rp.rand));
-
-			cmd_complete(cmd->sk, hdev->id,
-				     MGMT_OP_READ_LOCAL_OOB_DATA, 0,
-				     &rp, sizeof(rp));
+			rp_size -= sizeof(rp.hash256) + sizeof(rp.rand256);
 		}
+
+		cmd_complete(cmd->sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, 0,
+			     &rp, rp_size);
 	}
 
 	mgmt_pending_remove(cmd);
-- 
cgit v1.2.3


From 4aeee6871e8c3b043ef02996db8ac70a1af8be92 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Sun, 1 Feb 2015 22:26:08 +0100
Subject: NFC: nci: Add dynamic logical connections support

The current NCI core only support the RF static connection.
For other NFC features such as Secure Element communication, we
may need to create logical connections to the NFCEE (Execution
Environment.

In order to track each logical connection ID dynamically, we add a
linked list of connection info pointers to the nci_dev structure.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/nci_core.h | 32 ++++++++++++++++++++------
 net/nfc/nci/core.c         | 47 ++++++++++++++++++++++++++++++++------
 net/nfc/nci/data.c         | 56 +++++++++++++++++++++++++++++++++-------------
 net/nfc/nci/ntf.c          | 37 +++++++++++++++++++++---------
 net/nfc/nci/rsp.c          | 20 ++++++++++++++++-
 5 files changed, 152 insertions(+), 40 deletions(-)

(limited to 'net')

diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index 9e51bb4d841e..5e508741f208 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -82,6 +82,23 @@ struct nci_ops {
 
 #define NCI_MAX_SUPPORTED_RF_INTERFACES		4
 #define NCI_MAX_DISCOVERED_TARGETS		10
+#define NCI_MAX_NUM_NFCEE   255
+#define NCI_MAX_CONN_ID		7
+
+struct nci_conn_info {
+	struct list_head list;
+	__u8	id; /* can be an RF Discovery ID or an NFCEE ID */
+	__u8	conn_id;
+	__u8	max_pkt_payload_len;
+
+	atomic_t credits_cnt;
+	__u8	 initial_num_credits;
+
+	data_exchange_cb_t	data_exchange_cb;
+	void *data_exchange_cb_context;
+
+	struct sk_buff *rx_skb;
+};
 
 /* NCI Core structures */
 struct nci_dev {
@@ -95,7 +112,9 @@ struct nci_dev {
 	unsigned long		flags;
 
 	atomic_t		cmd_cnt;
-	atomic_t		credits_cnt;
+	__u8			cur_conn_id;
+
+	struct list_head	conn_info_list;
 
 	struct timer_list	cmd_timer;
 	struct timer_list	data_timer;
@@ -141,13 +160,10 @@ struct nci_dev {
 	__u8			manufact_id;
 	__u32			manufact_specific_info;
 
-	/* received during NCI_OP_RF_INTF_ACTIVATED_NTF */
-	__u8			max_data_pkt_payload_size;
-	__u8			initial_num_credits;
+	/* Save RF Discovery ID or NFCEE ID under conn_create */
+	__u8			cur_id;
 
 	/* stored during nci_data_exchange */
-	data_exchange_cb_t	data_exchange_cb;
-	void			*data_exchange_cb_context;
 	struct sk_buff		*rx_data_reassembly;
 
 	/* stored during intf_activated_ntf */
@@ -200,7 +216,7 @@ void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb);
 int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload);
 int nci_send_data(struct nci_dev *ndev, __u8 conn_id, struct sk_buff *skb);
 void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb,
-				int err);
+				__u8 conn_id, int err);
 void nci_clear_target_list(struct nci_dev *ndev);
 
 /* ----- NCI requests ----- */
@@ -209,6 +225,8 @@ void nci_clear_target_list(struct nci_dev *ndev);
 #define NCI_REQ_CANCELED	2
 
 void nci_req_complete(struct nci_dev *ndev, int result);
+struct nci_conn_info *nci_get_conn_info_by_conn_id(struct nci_dev *ndev,
+						   int conn_id);
 
 /* ----- NCI status code ----- */
 int nci_to_errno(__u8 code);
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 51feb5e63008..eb607970bd56 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -45,6 +45,19 @@ static void nci_cmd_work(struct work_struct *work);
 static void nci_rx_work(struct work_struct *work);
 static void nci_tx_work(struct work_struct *work);
 
+struct nci_conn_info *nci_get_conn_info_by_conn_id(struct nci_dev *ndev,
+						   int conn_id)
+{
+	struct nci_conn_info *conn_info;
+
+	list_for_each_entry(conn_info, &ndev->conn_info_list, list) {
+		if (conn_info->conn_id == conn_id)
+			return conn_info;
+	}
+
+	return NULL;
+}
+
 /* ---- NCI requests ---- */
 
 void nci_req_complete(struct nci_dev *ndev, int result)
@@ -712,6 +725,11 @@ static int nci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target,
 {
 	struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
 	int rc;
+	struct nci_conn_info    *conn_info;
+
+	conn_info = nci_get_conn_info_by_conn_id(ndev, NCI_STATIC_RF_CONN_ID);
+	if (!conn_info)
+		return -EPROTO;
 
 	pr_debug("target_idx %d, len %d\n", target->idx, skb->len);
 
@@ -724,8 +742,8 @@ static int nci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target,
 		return -EBUSY;
 
 	/* store cb and context to be used on receiving data */
-	ndev->data_exchange_cb = cb;
-	ndev->data_exchange_cb_context = cb_context;
+	conn_info->data_exchange_cb = cb;
+	conn_info->data_exchange_cb_context = cb_context;
 
 	rc = nci_send_data(ndev, NCI_STATIC_RF_CONN_ID, skb);
 	if (rc)
@@ -913,6 +931,7 @@ int nci_register_device(struct nci_dev *ndev)
 		    (unsigned long) ndev);
 
 	mutex_init(&ndev->req_lock);
+	INIT_LIST_HEAD(&ndev->conn_info_list);
 
 	rc = nfc_register_device(ndev->nfc_dev);
 	if (rc)
@@ -938,12 +957,19 @@ EXPORT_SYMBOL(nci_register_device);
  */
 void nci_unregister_device(struct nci_dev *ndev)
 {
+	struct nci_conn_info    *conn_info, *n;
+
 	nci_close_device(ndev);
 
 	destroy_workqueue(ndev->cmd_wq);
 	destroy_workqueue(ndev->rx_wq);
 	destroy_workqueue(ndev->tx_wq);
 
+	list_for_each_entry_safe(conn_info, n, &ndev->conn_info_list, list) {
+		list_del(&conn_info->list);
+		/* conn_info is allocated with devm_kzalloc */
+	}
+
 	nfc_unregister_device(ndev->nfc_dev);
 }
 EXPORT_SYMBOL(nci_unregister_device);
@@ -1027,20 +1053,25 @@ int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload)
 static void nci_tx_work(struct work_struct *work)
 {
 	struct nci_dev *ndev = container_of(work, struct nci_dev, tx_work);
+	struct nci_conn_info    *conn_info;
 	struct sk_buff *skb;
 
-	pr_debug("credits_cnt %d\n", atomic_read(&ndev->credits_cnt));
+	conn_info = nci_get_conn_info_by_conn_id(ndev, ndev->cur_conn_id);
+	if (!conn_info)
+		return;
+
+	pr_debug("credits_cnt %d\n", atomic_read(&conn_info->credits_cnt));
 
 	/* Send queued tx data */
-	while (atomic_read(&ndev->credits_cnt)) {
+	while (atomic_read(&conn_info->credits_cnt)) {
 		skb = skb_dequeue(&ndev->tx_q);
 		if (!skb)
 			return;
 
 		/* Check if data flow control is used */
-		if (atomic_read(&ndev->credits_cnt) !=
+		if (atomic_read(&conn_info->credits_cnt) !=
 		    NCI_DATA_FLOW_CONTROL_NOT_USED)
-			atomic_dec(&ndev->credits_cnt);
+			atomic_dec(&conn_info->credits_cnt);
 
 		pr_debug("NCI TX: MT=data, PBF=%d, conn_id=%d, plen=%d\n",
 			 nci_pbf(skb->data),
@@ -1092,7 +1123,9 @@ static void nci_rx_work(struct work_struct *work)
 	if (test_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags)) {
 		/* complete the data exchange transaction, if exists */
 		if (test_bit(NCI_DATA_EXCHANGE, &ndev->flags))
-			nci_data_exchange_complete(ndev, NULL, -ETIMEDOUT);
+			nci_data_exchange_complete(ndev, NULL,
+						   ndev->cur_conn_id,
+						   -ETIMEDOUT);
 
 		clear_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags);
 	}
diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c
index a2de2a8cb00e..566466d90048 100644
--- a/net/nfc/nci/data.c
+++ b/net/nfc/nci/data.c
@@ -36,10 +36,20 @@
 
 /* Complete data exchange transaction and forward skb to nfc core */
 void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb,
-				int err)
+				__u8 conn_id, int err)
 {
-	data_exchange_cb_t cb = ndev->data_exchange_cb;
-	void *cb_context = ndev->data_exchange_cb_context;
+	struct nci_conn_info    *conn_info;
+	data_exchange_cb_t cb;
+	void *cb_context;
+
+	conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id);
+	if (!conn_info) {
+		kfree_skb(skb);
+		goto exit;
+	}
+
+	cb = conn_info->data_exchange_cb;
+	cb_context = conn_info->data_exchange_cb_context;
 
 	pr_debug("len %d, err %d\n", skb ? skb->len : 0, err);
 
@@ -48,9 +58,6 @@ void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb,
 	clear_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags);
 
 	if (cb) {
-		ndev->data_exchange_cb = NULL;
-		ndev->data_exchange_cb_context = NULL;
-
 		/* forward skb to nfc core */
 		cb(cb_context, skb, err);
 	} else if (skb) {
@@ -60,6 +67,7 @@ void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb,
 		kfree_skb(skb);
 	}
 
+exit:
 	clear_bit(NCI_DATA_EXCHANGE, &ndev->flags);
 }
 
@@ -85,6 +93,7 @@ static inline void nci_push_data_hdr(struct nci_dev *ndev,
 static int nci_queue_tx_data_frags(struct nci_dev *ndev,
 				   __u8 conn_id,
 				   struct sk_buff *skb) {
+	struct nci_conn_info    *conn_info;
 	int total_len = skb->len;
 	unsigned char *data = skb->data;
 	unsigned long flags;
@@ -95,11 +104,17 @@ static int nci_queue_tx_data_frags(struct nci_dev *ndev,
 
 	pr_debug("conn_id 0x%x, total_len %d\n", conn_id, total_len);
 
+	conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id);
+	if (!conn_info) {
+		rc = -EPROTO;
+		goto free_exit;
+	}
+
 	__skb_queue_head_init(&frags_q);
 
 	while (total_len) {
 		frag_len =
-			min_t(int, total_len, ndev->max_data_pkt_payload_size);
+			min_t(int, total_len, conn_info->max_pkt_payload_len);
 
 		skb_frag = nci_skb_alloc(ndev,
 					 (NCI_DATA_HDR_SIZE + frag_len),
@@ -151,12 +166,19 @@ exit:
 /* Send NCI data */
 int nci_send_data(struct nci_dev *ndev, __u8 conn_id, struct sk_buff *skb)
 {
+	struct nci_conn_info    *conn_info;
 	int rc = 0;
 
 	pr_debug("conn_id 0x%x, plen %d\n", conn_id, skb->len);
 
+	conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id);
+	if (!conn_info) {
+		rc = -EPROTO;
+		goto free_exit;
+	}
+
 	/* check if the packet need to be fragmented */
-	if (skb->len <= ndev->max_data_pkt_payload_size) {
+	if (skb->len <= conn_info->max_pkt_payload_len) {
 		/* no need to fragment packet */
 		nci_push_data_hdr(ndev, conn_id, skb, NCI_PBF_LAST);
 
@@ -170,6 +192,7 @@ int nci_send_data(struct nci_dev *ndev, __u8 conn_id, struct sk_buff *skb)
 		}
 	}
 
+	ndev->cur_conn_id = conn_id;
 	queue_work(ndev->tx_wq, &ndev->tx_work);
 
 	goto exit;
@@ -185,7 +208,7 @@ exit:
 
 static void nci_add_rx_data_frag(struct nci_dev *ndev,
 				 struct sk_buff *skb,
-				 __u8 pbf, __u8 status)
+				 __u8 pbf, __u8 conn_id, __u8 status)
 {
 	int reassembly_len;
 	int err = 0;
@@ -229,16 +252,13 @@ static void nci_add_rx_data_frag(struct nci_dev *ndev,
 	}
 
 exit:
-	if (ndev->nfc_dev->rf_mode == NFC_RF_INITIATOR) {
-		nci_data_exchange_complete(ndev, skb, err);
-	} else if (ndev->nfc_dev->rf_mode == NFC_RF_TARGET) {
+	if (ndev->nfc_dev->rf_mode == NFC_RF_TARGET) {
 		/* Data received in Target mode, forward to nfc core */
 		err = nfc_tm_data_received(ndev->nfc_dev, skb);
 		if (err)
 			pr_err("unable to handle received data\n");
 	} else {
-		pr_err("rf mode unknown\n");
-		kfree_skb(skb);
+		nci_data_exchange_complete(ndev, skb, conn_id, err);
 	}
 }
 
@@ -247,6 +267,8 @@ void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb)
 {
 	__u8 pbf = nci_pbf(skb->data);
 	__u8 status = 0;
+	__u8 conn_id = nci_conn_id(skb->data);
+	struct nci_conn_info    *conn_info;
 
 	pr_debug("len %d\n", skb->len);
 
@@ -255,6 +277,10 @@ void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb)
 		 nci_conn_id(skb->data),
 		 nci_plen(skb->data));
 
+	conn_info = nci_get_conn_info_by_conn_id(ndev, nci_conn_id(skb->data));
+	if (!conn_info)
+		return;
+
 	/* strip the nci data header */
 	skb_pull(skb, NCI_DATA_HDR_SIZE);
 
@@ -268,5 +294,5 @@ void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb)
 		skb_trim(skb, (skb->len - 1));
 	}
 
-	nci_add_rx_data_frag(ndev, skb, pbf, nci_to_errno(status));
+	nci_add_rx_data_frag(ndev, skb, pbf, conn_id, nci_to_errno(status));
 }
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 22e453cb787d..28fdbe234bd4 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -43,6 +43,7 @@ static void nci_core_conn_credits_ntf_packet(struct nci_dev *ndev,
 					     struct sk_buff *skb)
 {
 	struct nci_core_conn_credit_ntf *ntf = (void *) skb->data;
+	struct nci_conn_info	*conn_info;
 	int i;
 
 	pr_debug("num_entries %d\n", ntf->num_entries);
@@ -59,11 +60,13 @@ static void nci_core_conn_credits_ntf_packet(struct nci_dev *ndev,
 			 i, ntf->conn_entries[i].conn_id,
 			 ntf->conn_entries[i].credits);
 
-		if (ntf->conn_entries[i].conn_id == NCI_STATIC_RF_CONN_ID) {
-			/* found static rf connection */
-			atomic_add(ntf->conn_entries[i].credits,
-				   &ndev->credits_cnt);
-		}
+		conn_info = nci_get_conn_info_by_conn_id(ndev,
+							 ntf->conn_entries[i].conn_id);
+		if (!conn_info)
+			return;
+
+		atomic_add(ntf->conn_entries[i].credits,
+			   &conn_info->credits_cnt);
 	}
 
 	/* trigger the next tx */
@@ -96,7 +99,7 @@ static void nci_core_conn_intf_error_ntf_packet(struct nci_dev *ndev,
 
 	/* complete the data exchange transaction, if exists */
 	if (test_bit(NCI_DATA_EXCHANGE, &ndev->flags))
-		nci_data_exchange_complete(ndev, NULL, -EIO);
+		nci_data_exchange_complete(ndev, NULL, ntf->conn_id, -EIO);
 }
 
 static __u8 *nci_extract_rf_params_nfca_passive_poll(struct nci_dev *ndev,
@@ -513,6 +516,7 @@ static int nci_store_general_bytes_nfc_dep(struct nci_dev *ndev,
 static void nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev,
 					     struct sk_buff *skb)
 {
+	struct nci_conn_info    *conn_info;
 	struct nci_rf_intf_activated_ntf ntf;
 	__u8 *data = skb->data;
 	int err = NCI_STATUS_OK;
@@ -614,11 +618,17 @@ static void nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev,
 
 exit:
 	if (err == NCI_STATUS_OK) {
-		ndev->max_data_pkt_payload_size = ntf.max_data_pkt_payload_size;
-		ndev->initial_num_credits = ntf.initial_num_credits;
+		conn_info = nci_get_conn_info_by_conn_id(ndev,
+							 NCI_STATIC_RF_CONN_ID);
+		if (!conn_info)
+			return;
+
+		conn_info->max_pkt_payload_len = ntf.max_data_pkt_payload_size;
+		conn_info->initial_num_credits = ntf.initial_num_credits;
 
 		/* set the available credits to initial value */
-		atomic_set(&ndev->credits_cnt, ndev->initial_num_credits);
+		atomic_set(&conn_info->credits_cnt,
+			   conn_info->initial_num_credits);
 
 		/* store general bytes to be reported later in dep_link_up */
 		if (ntf.rf_interface == NCI_RF_INTERFACE_NFC_DEP) {
@@ -661,10 +671,16 @@ exit:
 static void nci_rf_deactivate_ntf_packet(struct nci_dev *ndev,
 					 struct sk_buff *skb)
 {
+	struct nci_conn_info    *conn_info;
 	struct nci_rf_deactivate_ntf *ntf = (void *) skb->data;
 
 	pr_debug("entry, type 0x%x, reason 0x%x\n", ntf->type, ntf->reason);
 
+	conn_info =
+		nci_get_conn_info_by_conn_id(ndev, NCI_STATIC_RF_CONN_ID);
+	if (!conn_info)
+		return;
+
 	/* drop tx data queue */
 	skb_queue_purge(&ndev->tx_q);
 
@@ -676,7 +692,8 @@ static void nci_rf_deactivate_ntf_packet(struct nci_dev *ndev,
 
 	/* complete the data exchange transaction, if exists */
 	if (test_bit(NCI_DATA_EXCHANGE, &ndev->flags))
-		nci_data_exchange_complete(ndev, NULL, -EIO);
+		nci_data_exchange_complete(ndev, NULL, NCI_STATIC_RF_CONN_ID,
+					   -EIO);
 
 	switch (ntf->type) {
 	case NCI_DEACTIVATE_TYPE_IDLE_MODE:
diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c
index 041de51ccdbe..93b914937263 100644
--- a/net/nfc/nci/rsp.c
+++ b/net/nfc/nci/rsp.c
@@ -140,13 +140,31 @@ static void nci_rf_disc_map_rsp_packet(struct nci_dev *ndev,
 
 static void nci_rf_disc_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
 {
+	struct nci_conn_info    *conn_info;
 	__u8 status = skb->data[0];
 
 	pr_debug("status 0x%x\n", status);
 
-	if (status == NCI_STATUS_OK)
+	if (status == NCI_STATUS_OK) {
 		atomic_set(&ndev->state, NCI_DISCOVERY);
 
+		conn_info = nci_get_conn_info_by_conn_id(ndev,
+							 NCI_STATIC_RF_CONN_ID);
+		if (!conn_info) {
+			conn_info = devm_kzalloc(&ndev->nfc_dev->dev,
+						 sizeof(struct nci_conn_info),
+						 GFP_KERNEL);
+			if (!conn_info) {
+				status = NCI_STATUS_REJECTED;
+				goto exit;
+			}
+			conn_info->conn_id = NCI_STATIC_RF_CONN_ID;
+			INIT_LIST_HEAD(&conn_info->list);
+			list_add(&conn_info->list, &ndev->conn_info_list);
+		}
+	}
+
+exit:
 	nci_req_complete(ndev, status);
 }
 
-- 
cgit v1.2.3


From af9c8aa67d07adcd3b41fb2934af7af056eabecf Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Sun, 1 Feb 2015 22:26:10 +0100
Subject: NFC: nci: Add NFCEE discover support

NFCEEs (NFC Execution Environment) have to be explicitly
discovered by sending the NCI_OP_NFCEE_DISCOVER_CMD
command. The NFCC will respond to this command by telling
us how many NFCEEs are connected to it. Then the NFCC sends
a notification command for each and every NFCEE connected.
Here we implement support for sending
NCI_OP_NFCEE_DISCOVER_CMD command, receiving the response
and the potential notifications.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/nci_core.h |  4 ++++
 net/nfc/nci/core.c         | 17 +++++++++++++++++
 net/nfc/nci/ntf.c          | 30 ++++++++++++++++++++++++++++++
 net/nfc/nci/rsp.c          | 21 +++++++++++++++++++++
 4 files changed, 72 insertions(+)

(limited to 'net')

diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index 5e508741f208..31ad795aa4b5 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -100,6 +100,8 @@ struct nci_conn_info {
 	struct sk_buff *rx_skb;
 };
 
+#define NCI_INVALID_CONN_ID 0x80
+
 /* NCI Core structures */
 struct nci_dev {
 	struct nfc_dev		*nfc_dev;
@@ -182,6 +184,8 @@ void nci_unregister_device(struct nci_dev *ndev);
 int nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb);
 int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val);
 
+int nci_nfcee_discover(struct nci_dev *ndev, u8 action);
+
 static inline struct sk_buff *nci_skb_alloc(struct nci_dev *ndev,
 					    unsigned int len,
 					    gfp_t how)
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index eb607970bd56..a25857548524 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -469,6 +469,23 @@ int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val)
 }
 EXPORT_SYMBOL(nci_set_config);
 
+static void nci_nfcee_discover_req(struct nci_dev *ndev, unsigned long opt)
+{
+	struct nci_nfcee_discover_cmd cmd;
+	__u8 action = opt;
+
+	cmd.discovery_action = action;
+
+	nci_send_cmd(ndev, NCI_OP_NFCEE_DISCOVER_CMD, 1, &cmd);
+}
+
+int nci_nfcee_discover(struct nci_dev *ndev, u8 action)
+{
+	return nci_request(ndev, nci_nfcee_discover_req, action,
+				msecs_to_jiffies(NCI_CMD_TIMEOUT));
+}
+EXPORT_SYMBOL(nci_nfcee_discover);
+
 static int nci_set_local_general_bytes(struct nfc_dev *nfc_dev)
 {
 	struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 28fdbe234bd4..4c0be7e82d29 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -713,6 +713,33 @@ static void nci_rf_deactivate_ntf_packet(struct nci_dev *ndev,
 	nci_req_complete(ndev, NCI_STATUS_OK);
 }
 
+static void nci_nfcee_discover_ntf_packet(struct nci_dev *ndev,
+					  struct sk_buff *skb)
+{
+	u8 status = NCI_STATUS_OK;
+	struct nci_conn_info    *conn_info;
+	struct nci_nfcee_discover_ntf   *nfcee_ntf =
+				(struct nci_nfcee_discover_ntf *)skb->data;
+
+	pr_debug("\n");
+
+	conn_info = devm_kzalloc(&ndev->nfc_dev->dev,
+				 sizeof(struct nci_conn_info), GFP_KERNEL);
+	if (!conn_info) {
+		status = NCI_STATUS_REJECTED;
+		goto exit;
+	}
+
+	conn_info->id = nfcee_ntf->nfcee_id;
+	conn_info->conn_id = NCI_INVALID_CONN_ID;
+
+	INIT_LIST_HEAD(&conn_info->list);
+	list_add(&conn_info->list, &ndev->conn_info_list);
+
+exit:
+	nci_req_complete(ndev, status);
+}
+
 void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb)
 {
 	__u16 ntf_opcode = nci_opcode(skb->data);
@@ -751,6 +778,9 @@ void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb)
 		nci_rf_deactivate_ntf_packet(ndev, skb);
 		break;
 
+	case NCI_OP_NFCEE_DISCOVER_NTF:
+		nci_nfcee_discover_ntf_packet(ndev, skb);
+		break;
 	default:
 		pr_err("unknown ntf opcode 0x%x\n", ntf_opcode);
 		break;
diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c
index 93b914937263..ee094dfab2ed 100644
--- a/net/nfc/nci/rsp.c
+++ b/net/nfc/nci/rsp.c
@@ -196,6 +196,23 @@ static void nci_rf_deactivate_rsp_packet(struct nci_dev *ndev,
 	}
 }
 
+static void nci_nfcee_discover_rsp_packet(struct nci_dev *ndev,
+					  struct sk_buff *skb)
+{
+	struct nci_nfcee_discover_rsp *discover_rsp;
+
+	if (skb->len != 2) {
+		nci_req_complete(ndev, NCI_STATUS_NFCEE_PROTOCOL_ERROR);
+		return;
+	}
+
+	discover_rsp = (struct nci_nfcee_discover_rsp *)skb->data;
+
+	if (discover_rsp->status != NCI_STATUS_OK ||
+	    discover_rsp->num_nfcee == 0)
+		nci_req_complete(ndev, discover_rsp->status);
+}
+
 void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
 {
 	__u16 rsp_opcode = nci_opcode(skb->data);
@@ -241,6 +258,10 @@ void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
 		nci_rf_deactivate_rsp_packet(ndev, skb);
 		break;
 
+	case NCI_OP_NFCEE_DISCOVER_RSP:
+		nci_nfcee_discover_rsp_packet(ndev, skb);
+		break;
+
 	default:
 		pr_err("unknown rsp opcode 0x%x\n", rsp_opcode);
 		break;
-- 
cgit v1.2.3


From f7f793f31378d5e83276871339c2a8374b0e8657 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Sun, 1 Feb 2015 22:26:11 +0100
Subject: NFC: nci: Add NFCEE enabling and disabling support

NFCEEs can be enabled or disabled by sending the
NCI_OP_NFCEE_MODE_SET_CMD command to the NFCC. This patch
provides an API for drivers to enable and disable e.g. their
NCI discoveredd secure elements.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/nci.h      |  9 +++++++++
 include/net/nfc/nci_core.h |  1 +
 net/nfc/nci/core.c         | 21 +++++++++++++++++++++
 net/nfc/nci/rsp.c          | 13 +++++++++++++
 4 files changed, 44 insertions(+)

(limited to 'net')

diff --git a/include/net/nfc/nci.h b/include/net/nfc/nci.h
index 6d99e8f79835..230f227bb319 100644
--- a/include/net/nfc/nci.h
+++ b/include/net/nfc/nci.h
@@ -284,6 +284,14 @@ struct nci_nfcee_discover_cmd {
 	__u8	discovery_action;
 } __packed;
 
+#define NCI_OP_NFCEE_MODE_SET_CMD nci_opcode_pack(NCI_GID_NFCEE_MGMT, 0x01)
+#define NCI_NFCEE_DISABLE	0x00
+#define NCI_NFCEE_ENABLE	0x01
+struct nci_nfcee_mode_set_cmd {
+	__u8	nfcee_id;
+	__u8	nfcee_mode;
+} __packed;
+
 /* ----------------------- */
 /* ---- NCI Responses ---- */
 /* ----------------------- */
@@ -333,6 +341,7 @@ struct nci_nfcee_discover_rsp {
 	__u8	num_nfcee;
 } __packed;
 
+#define NCI_OP_NFCEE_MODE_SET_RSP nci_opcode_pack(NCI_GID_NFCEE_MGMT, 0x01)
 /* --------------------------- */
 /* ---- NCI Notifications ---- */
 /* --------------------------- */
diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index 31ad795aa4b5..6cf6ee2b696d 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -185,6 +185,7 @@ int nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb);
 int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val);
 
 int nci_nfcee_discover(struct nci_dev *ndev, u8 action);
+int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode);
 
 static inline struct sk_buff *nci_skb_alloc(struct nci_dev *ndev,
 					    unsigned int len,
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index a25857548524..e5fb8c8eed94 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -486,6 +486,27 @@ int nci_nfcee_discover(struct nci_dev *ndev, u8 action)
 }
 EXPORT_SYMBOL(nci_nfcee_discover);
 
+static void nci_nfcee_mode_set_req(struct nci_dev *ndev, unsigned long opt)
+{
+	struct nci_nfcee_mode_set_cmd *cmd =
+					(struct nci_nfcee_mode_set_cmd *)opt;
+
+	nci_send_cmd(ndev, NCI_OP_NFCEE_MODE_SET_CMD,
+		     sizeof(struct nci_nfcee_mode_set_cmd), cmd);
+}
+
+int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode)
+{
+	struct nci_nfcee_mode_set_cmd cmd;
+
+	cmd.nfcee_id = nfcee_id;
+	cmd.nfcee_mode = nfcee_mode;
+
+	return nci_request(ndev, nci_nfcee_mode_set_req, (unsigned long)&cmd,
+			   msecs_to_jiffies(NCI_CMD_TIMEOUT));
+}
+EXPORT_SYMBOL(nci_nfcee_mode_set);
+
 static int nci_set_local_general_bytes(struct nfc_dev *nfc_dev)
 {
 	struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c
index ee094dfab2ed..0a3e98240dd6 100644
--- a/net/nfc/nci/rsp.c
+++ b/net/nfc/nci/rsp.c
@@ -213,6 +213,15 @@ static void nci_nfcee_discover_rsp_packet(struct nci_dev *ndev,
 		nci_req_complete(ndev, discover_rsp->status);
 }
 
+static void nci_nfcee_mode_set_rsp_packet(struct nci_dev *ndev,
+					  struct sk_buff *skb)
+{
+	__u8 status = skb->data[0];
+
+	pr_debug("status 0x%x\n", status);
+	nci_req_complete(ndev, status);
+}
+
 void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
 {
 	__u16 rsp_opcode = nci_opcode(skb->data);
@@ -262,6 +271,10 @@ void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
 		nci_nfcee_discover_rsp_packet(ndev, skb);
 		break;
 
+	case NCI_OP_NFCEE_MODE_SET_RSP:
+		nci_nfcee_mode_set_rsp_packet(ndev, skb);
+		break;
+
 	default:
 		pr_err("unknown rsp opcode 0x%x\n", rsp_opcode);
 		break;
-- 
cgit v1.2.3


From 736bb9577407d3556d81c3c3cd57581cd3ae10ea Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Sun, 1 Feb 2015 22:26:12 +0100
Subject: NFC: nci: Support logical connections management

In order to communicate with an NFCEE, we need to open a logical
connection to it, by sending the NCI_OP_CORE_CONN_CREATE_CMD
command to the NFCC. It's left up to the drivers to decide when
to close an already opened logical connection.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/nci.h      | 30 +++++++++++++++++++++++++++++
 include/net/nfc/nci_core.h |  3 +++
 net/nfc/nci/core.c         | 38 +++++++++++++++++++++++++++++++++++++
 net/nfc/nci/rsp.c          | 47 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 118 insertions(+)

(limited to 'net')

diff --git a/include/net/nfc/nci.h b/include/net/nfc/nci.h
index 230f227bb319..deac78b9a53c 100644
--- a/include/net/nfc/nci.h
+++ b/include/net/nfc/nci.h
@@ -243,6 +243,26 @@ struct nci_core_set_config_cmd {
 	struct	set_config_param param; /* support 1 param per cmd is enough */
 } __packed;
 
+#define NCI_OP_CORE_CONN_CREATE_CMD	nci_opcode_pack(NCI_GID_CORE, 0x04)
+struct dest_spec_params {
+	__u8	id;
+	__u8	protocol;
+} __packed;
+
+struct core_conn_create_dest_spec_params {
+	__u8	type;
+	__u8	length;
+	struct dest_spec_params value;
+} __packed;
+
+struct nci_core_conn_create_cmd {
+	__u8	destination_type;
+	__u8	number_destination_params;
+	struct core_conn_create_dest_spec_params params;
+} __packed;
+
+#define NCI_OP_CORE_CONN_CLOSE_CMD	nci_opcode_pack(NCI_GID_CORE, 0x05)
+
 #define NCI_OP_RF_DISCOVER_MAP_CMD	nci_opcode_pack(NCI_GID_RF_MGMT, 0x00)
 struct disc_map_config {
 	__u8	rf_protocol;
@@ -327,6 +347,16 @@ struct nci_core_set_config_rsp {
 	__u8	params_id[0];	/* variable size array */
 } __packed;
 
+#define NCI_OP_CORE_CONN_CREATE_RSP	nci_opcode_pack(NCI_GID_CORE, 0x04)
+struct nci_core_conn_create_rsp {
+	__u8	status;
+	__u8	max_ctrl_pkt_payload_len;
+	__u8    credits;
+	__u8	conn_id;
+} __packed;
+
+#define NCI_OP_CORE_CONN_CLOSE_RSP	nci_opcode_pack(NCI_GID_CORE, 0x05)
+
 #define NCI_OP_RF_DISCOVER_MAP_RSP	nci_opcode_pack(NCI_GID_RF_MGMT, 0x00)
 
 #define NCI_OP_RF_DISCOVER_RSP		nci_opcode_pack(NCI_GID_RF_MGMT, 0x03)
diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index 6cf6ee2b696d..8ba3e38e4167 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -186,6 +186,9 @@ int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val);
 
 int nci_nfcee_discover(struct nci_dev *ndev, u8 action);
 int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode);
+int nci_core_conn_create(struct nci_dev *ndev,
+			 struct core_conn_create_dest_spec_params *params);
+int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id);
 
 static inline struct sk_buff *nci_skb_alloc(struct nci_dev *ndev,
 					    unsigned int len,
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index e5fb8c8eed94..2a96ed68c7bb 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -507,6 +507,44 @@ int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode)
 }
 EXPORT_SYMBOL(nci_nfcee_mode_set);
 
+static void nci_core_conn_create_req(struct nci_dev *ndev, unsigned long opt)
+{
+	struct nci_core_conn_create_cmd cmd;
+	struct core_conn_create_dest_spec_params *params =
+				(struct core_conn_create_dest_spec_params *)opt;
+
+	cmd.destination_type = NCI_DESTINATION_NFCEE;
+	cmd.number_destination_params = 1;
+	memcpy(&cmd.params.type, params,
+	       sizeof(struct core_conn_create_dest_spec_params));
+	nci_send_cmd(ndev, NCI_OP_CORE_CONN_CREATE_CMD,
+		     sizeof(struct nci_core_conn_create_cmd), &cmd);
+}
+
+int nci_core_conn_create(struct nci_dev *ndev,
+			 struct core_conn_create_dest_spec_params *params)
+{
+	ndev->cur_id = params->value.id;
+	return nci_request(ndev, nci_core_conn_create_req,
+			(unsigned long)params,
+			msecs_to_jiffies(NCI_CMD_TIMEOUT));
+}
+EXPORT_SYMBOL(nci_core_conn_create);
+
+static void nci_core_conn_close_req(struct nci_dev *ndev, unsigned long opt)
+{
+	__u8 conn_id = opt;
+
+	nci_send_cmd(ndev, NCI_OP_CORE_CONN_CLOSE_CMD, 1, &conn_id);
+}
+
+int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id)
+{
+	return nci_request(ndev, nci_core_conn_close_req, conn_id,
+				msecs_to_jiffies(NCI_CMD_TIMEOUT));
+}
+EXPORT_SYMBOL(nci_core_conn_close);
+
 static int nci_set_local_general_bytes(struct nfc_dev *nfc_dev)
 {
 	struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c
index 0a3e98240dd6..31ccf7d05e82 100644
--- a/net/nfc/nci/rsp.c
+++ b/net/nfc/nci/rsp.c
@@ -222,6 +222,45 @@ static void nci_nfcee_mode_set_rsp_packet(struct nci_dev *ndev,
 	nci_req_complete(ndev, status);
 }
 
+static void nci_core_conn_create_rsp_packet(struct nci_dev *ndev,
+					    struct sk_buff *skb)
+{
+	__u8 status = skb->data[0];
+	struct nci_conn_info *conn_info;
+	struct nci_core_conn_create_rsp *rsp;
+
+	pr_debug("status 0x%x\n", status);
+
+	if (status == NCI_STATUS_OK) {
+		rsp = (struct nci_core_conn_create_rsp *)skb->data;
+		list_for_each_entry(conn_info, &ndev->conn_info_list, list) {
+			if (conn_info->id == ndev->cur_id)
+				break;
+		}
+
+		if (!conn_info || conn_info->id != ndev->cur_id) {
+			status = NCI_STATUS_REJECTED;
+			goto exit;
+		}
+
+		conn_info->conn_id = rsp->conn_id;
+		conn_info->max_pkt_payload_len = rsp->max_ctrl_pkt_payload_len;
+		atomic_set(&conn_info->credits_cnt, rsp->credits);
+	}
+
+exit:
+	nci_req_complete(ndev, status);
+}
+
+static void nci_core_conn_close_rsp_packet(struct nci_dev *ndev,
+					   struct sk_buff *skb)
+{
+	__u8 status = skb->data[0];
+
+	pr_debug("status 0x%x\n", status);
+	nci_req_complete(ndev, status);
+}
+
 void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
 {
 	__u16 rsp_opcode = nci_opcode(skb->data);
@@ -251,6 +290,14 @@ void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
 		nci_core_set_config_rsp_packet(ndev, skb);
 		break;
 
+	case NCI_OP_CORE_CONN_CREATE_RSP:
+		nci_core_conn_create_rsp_packet(ndev, skb);
+		break;
+
+	case NCI_OP_CORE_CONN_CLOSE_RSP:
+		nci_core_conn_close_rsp_packet(ndev, skb);
+		break;
+
 	case NCI_OP_RF_DISCOVER_MAP_RSP:
 		nci_rf_disc_map_rsp_packet(ndev, skb);
 		break;
-- 
cgit v1.2.3


From 11f54f228643d0248ec00ce8c9fb8d872f87e7b8 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Sun, 1 Feb 2015 22:26:14 +0100
Subject: NFC: nci: Add HCI over NCI protocol support

According to the NCI specification, one can use HCI over NCI
to talk with specific NFCEE. The HCI network is viewed as one
logical NFCEE.
This is needed to support secure element running HCI only
firmwares embedded on an NCI capable chipset, like e.g. the
st21nfcb.
There is some duplication between this piece of code and the
HCI core code, but the latter would need to be abstracted even
more to be able to use NCI as a logical transport for HCP packets.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/nci_core.h |  93 ++++++
 net/nfc/nci/Makefile       |   2 +-
 net/nfc/nci/core.c         |  20 +-
 net/nfc/nci/hci.c          | 686 +++++++++++++++++++++++++++++++++++++++++++++
 net/nfc/nci/ntf.c          |  32 ++-
 5 files changed, 815 insertions(+), 18 deletions(-)
 create mode 100644 net/nfc/nci/hci.c

(limited to 'net')

diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index 8ba3e38e4167..be858870dace 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -78,6 +78,11 @@ struct nci_ops {
 	int   (*se_io)(struct nci_dev *ndev, u32 se_idx,
 				u8 *apdu, size_t apdu_length,
 				se_io_cb_t cb, void *cb_context);
+	int   (*hci_load_session)(struct nci_dev *ndev);
+	void  (*hci_event_received)(struct nci_dev *ndev, u8 pipe, u8 event,
+				    struct sk_buff *skb);
+	void  (*hci_cmd_received)(struct nci_dev *ndev, u8 pipe, u8 cmd,
+				  struct sk_buff *skb);
 };
 
 #define NCI_MAX_SUPPORTED_RF_INTERFACES		4
@@ -102,10 +107,77 @@ struct nci_conn_info {
 
 #define NCI_INVALID_CONN_ID 0x80
 
+#define NCI_HCI_ANY_OPEN_PIPE      0x03
+
+/* Gates */
+#define NCI_HCI_ADMIN_GATE         0x00
+#define NCI_HCI_LINK_MGMT_GATE     0x06
+
+/* Pipes */
+#define NCI_HCI_LINK_MGMT_PIPE             0x00
+#define NCI_HCI_ADMIN_PIPE                 0x01
+
+/* Generic responses */
+#define NCI_HCI_ANY_OK                     0x00
+#define NCI_HCI_ANY_E_NOT_CONNECTED        0x01
+#define NCI_HCI_ANY_E_CMD_PAR_UNKNOWN      0x02
+#define NCI_HCI_ANY_E_NOK                  0x03
+#define NCI_HCI_ANY_E_PIPES_FULL           0x04
+#define NCI_HCI_ANY_E_REG_PAR_UNKNOWN      0x05
+#define NCI_HCI_ANY_E_PIPE_NOT_OPENED      0x06
+#define NCI_HCI_ANY_E_CMD_NOT_SUPPORTED    0x07
+#define NCI_HCI_ANY_E_INHIBITED            0x08
+#define NCI_HCI_ANY_E_TIMEOUT              0x09
+#define NCI_HCI_ANY_E_REG_ACCESS_DENIED    0x0a
+#define NCI_HCI_ANY_E_PIPE_ACCESS_DENIED   0x0b
+
+#define NCI_HCI_DO_NOT_OPEN_PIPE           0x81
+#define NCI_HCI_INVALID_PIPE               0x80
+#define NCI_HCI_INVALID_GATE               0xFF
+#define NCI_HCI_INVALID_HOST               0x80
+
+#define NCI_HCI_MAX_CUSTOM_GATES   50
+#define NCI_HCI_MAX_PIPES          127
+
+struct nci_hci_gate {
+	u8 gate;
+	u8 pipe;
+	u8 dest_host;
+} __packed;
+
+struct nci_hci_pipe {
+	u8 gate;
+	u8 host;
+} __packed;
+
+struct nci_hci_init_data {
+	u8 gate_count;
+	struct nci_hci_gate gates[NCI_HCI_MAX_CUSTOM_GATES];
+	char session_id[9];
+};
+
+#define NCI_HCI_MAX_GATES          256
+
+struct nci_hci_dev {
+	struct nci_dev *ndev;
+	struct nci_conn_info *conn_info;
+
+	struct nci_hci_init_data init_data;
+	struct nci_hci_pipe pipes[NCI_HCI_MAX_PIPES];
+	u8 gate2pipe[NCI_HCI_MAX_GATES];
+	int expected_pipes;
+	int count_pipes;
+
+	struct sk_buff_head rx_hcp_frags;
+	struct work_struct msg_rx_work;
+	struct sk_buff_head msg_rx_queue;
+};
+
 /* NCI Core structures */
 struct nci_dev {
 	struct nfc_dev		*nfc_dev;
 	struct nci_ops		*ops;
+	struct nci_hci_dev	*hci_dev;
 
 	int			tx_headroom;
 	int			tx_tailroom;
@@ -181,6 +253,10 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops,
 void nci_free_device(struct nci_dev *ndev);
 int nci_register_device(struct nci_dev *ndev);
 void nci_unregister_device(struct nci_dev *ndev);
+int nci_request(struct nci_dev *ndev,
+		void (*req)(struct nci_dev *ndev,
+			    unsigned long opt),
+		unsigned long opt, __u32 timeout);
 int nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb);
 int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val);
 
@@ -190,6 +266,21 @@ int nci_core_conn_create(struct nci_dev *ndev,
 			 struct core_conn_create_dest_spec_params *params);
 int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id);
 
+struct nci_hci_dev *nci_hci_allocate(struct nci_dev *ndev);
+int nci_hci_send_event(struct nci_dev *ndev, u8 gate, u8 event,
+		       const u8 *param, size_t param_len);
+int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate,
+		     u8 cmd, const u8 *param, size_t param_len,
+		     struct sk_buff **skb);
+int nci_hci_open_pipe(struct nci_dev *ndev, u8 pipe);
+int nci_hci_connect_gate(struct nci_dev *ndev, u8 dest_host,
+			 u8 dest_gate, u8 pipe);
+int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx,
+		      const u8 *param, size_t param_len);
+int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx,
+		      struct sk_buff **skb);
+int nci_hci_dev_session_init(struct nci_dev *ndev);
+
 static inline struct sk_buff *nci_skb_alloc(struct nci_dev *ndev,
 					    unsigned int len,
 					    gfp_t how)
@@ -225,6 +316,8 @@ int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload);
 int nci_send_data(struct nci_dev *ndev, __u8 conn_id, struct sk_buff *skb);
 void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb,
 				__u8 conn_id, int err);
+void nci_hci_data_received_cb(void *context, struct sk_buff *skb, int err);
+
 void nci_clear_target_list(struct nci_dev *ndev);
 
 /* ----- NCI requests ----- */
diff --git a/net/nfc/nci/Makefile b/net/nfc/nci/Makefile
index 7aeedc43187d..7ed8949266cc 100644
--- a/net/nfc/nci/Makefile
+++ b/net/nfc/nci/Makefile
@@ -4,6 +4,6 @@
 
 obj-$(CONFIG_NFC_NCI) += nci.o
 
-nci-objs := core.o data.o lib.o ntf.o rsp.o
+nci-objs := core.o data.o lib.o ntf.o rsp.o hci.o
 
 nci-$(CONFIG_NFC_NCI_SPI) += spi.o
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 2a96ed68c7bb..f74d420e2ead 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -122,10 +122,10 @@ static int __nci_request(struct nci_dev *ndev,
 	return rc;
 }
 
-static inline int nci_request(struct nci_dev *ndev,
-			      void (*req)(struct nci_dev *ndev,
-					  unsigned long opt),
-			      unsigned long opt, __u32 timeout)
+inline int nci_request(struct nci_dev *ndev,
+		       void (*req)(struct nci_dev *ndev,
+				   unsigned long opt),
+		       unsigned long opt, __u32 timeout)
 {
 	int rc;
 
@@ -901,7 +901,6 @@ static struct nfc_ops nci_nfc_ops = {
 };
 
 /* ---- Interface to NCI drivers ---- */
-
 /**
  * nci_allocate_device - allocate a new nci device
  *
@@ -936,13 +935,20 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops,
 					    tx_headroom + NCI_DATA_HDR_SIZE,
 					    tx_tailroom);
 	if (!ndev->nfc_dev)
-		goto free_exit;
+		goto free_nci;
+
+	ndev->hci_dev = nci_hci_allocate(ndev);
+	if (!ndev->hci_dev)
+		goto free_nfc;
 
 	nfc_set_drvdata(ndev->nfc_dev, ndev);
 
 	return ndev;
 
-free_exit:
+free_nfc:
+	kfree(ndev->nfc_dev);
+
+free_nci:
 	kfree(ndev);
 	return NULL;
 }
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
new file mode 100644
index 000000000000..ecf253942606
--- /dev/null
+++ b/net/nfc/nci/hci.c
@@ -0,0 +1,686 @@
+/*
+ *  The NFC Controller Interface is the communication protocol between an
+ *  NFC Controller (NFCC) and a Device Host (DH).
+ *  This is the HCI over NCI implementation, as specified in the 10.2
+ *  section of the NCI 1.1 specification.
+ *
+ *  Copyright (C) 2014  STMicroelectronics SAS. All rights reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <linux/skbuff.h>
+
+#include "../nfc.h"
+#include <net/nfc/nci.h>
+#include <net/nfc/nci_core.h>
+#include <linux/nfc.h>
+
+struct nci_data {
+	u8              conn_id;
+	u8              pipe;
+	u8              cmd;
+	const u8        *data;
+	u32             data_len;
+} __packed;
+
+struct nci_hci_create_pipe_params {
+	u8 src_gate;
+	u8 dest_host;
+	u8 dest_gate;
+} __packed;
+
+struct nci_hci_create_pipe_resp {
+	u8 src_host;
+	u8 src_gate;
+	u8 dest_host;
+	u8 dest_gate;
+	u8 pipe;
+} __packed;
+
+struct nci_hci_delete_pipe_noti {
+	u8 pipe;
+} __packed;
+
+struct nci_hci_all_pipe_cleared_noti {
+	u8 host;
+} __packed;
+
+struct nci_hcp_message {
+	u8 header;      /* type -cmd,evt,rsp- + instruction */
+	u8 data[];
+} __packed;
+
+struct nci_hcp_packet {
+	u8 header;      /* cbit+pipe */
+	struct nci_hcp_message message;
+} __packed;
+
+#define NCI_HCI_ANY_SET_PARAMETER  0x01
+#define NCI_HCI_ANY_GET_PARAMETER  0x02
+#define NCI_HCI_ANY_CLOSE_PIPE     0x04
+
+#define NCI_HFP_NO_CHAINING        0x80
+
+#define NCI_NFCEE_ID_HCI                0x80
+
+#define NCI_EVT_HOT_PLUG           0x03
+
+#define NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY       0x01
+
+/* HCP headers */
+#define NCI_HCI_HCP_PACKET_HEADER_LEN      1
+#define NCI_HCI_HCP_MESSAGE_HEADER_LEN     1
+#define NCI_HCI_HCP_HEADER_LEN             2
+
+/* HCP types */
+#define NCI_HCI_HCP_COMMAND        0x00
+#define NCI_HCI_HCP_EVENT          0x01
+#define NCI_HCI_HCP_RESPONSE       0x02
+
+#define NCI_HCI_ADM_NOTIFY_PIPE_CREATED     0x12
+#define NCI_HCI_ADM_NOTIFY_PIPE_DELETED     0x13
+#define NCI_HCI_ADM_NOTIFY_ALL_PIPE_CLEARED 0x15
+
+#define NCI_HCI_FRAGMENT           0x7f
+#define NCI_HCP_HEADER(type, instr) ((((type) & 0x03) << 6) |\
+				      ((instr) & 0x3f))
+
+#define NCI_HCP_MSG_GET_TYPE(header) ((header & 0xc0) >> 6)
+#define NCI_HCP_MSG_GET_CMD(header)  (header & 0x3f)
+#define NCI_HCP_MSG_GET_PIPE(header) (header & 0x7f)
+
+/* HCI core */
+static void nci_hci_reset_pipes(struct nci_hci_dev *hdev)
+{
+	int i;
+
+	for (i = 0; i < NCI_HCI_MAX_PIPES; i++) {
+		hdev->pipes[i].gate = NCI_HCI_INVALID_GATE;
+		hdev->pipes[i].host = NCI_HCI_INVALID_HOST;
+	}
+	memset(hdev->gate2pipe, NCI_HCI_INVALID_PIPE, sizeof(hdev->gate2pipe));
+}
+
+static void nci_hci_reset_pipes_per_host(struct nci_dev *ndev, u8 host)
+{
+	int i;
+
+	for (i = 0; i < NCI_HCI_MAX_PIPES; i++) {
+		if (ndev->hci_dev->pipes[i].host == host) {
+			ndev->hci_dev->pipes[i].gate = NCI_HCI_INVALID_GATE;
+			ndev->hci_dev->pipes[i].host = NCI_HCI_INVALID_HOST;
+		}
+	}
+}
+
+/* Fragment HCI data over NCI packet.
+ * NFC Forum NCI 10.2.2 Data Exchange:
+ * The payload of the Data Packets sent on the Logical Connection SHALL be
+ * valid HCP packets, as defined within [ETSI_102622]. Each Data Packet SHALL
+ * contain a single HCP packet. NCI Segmentation and Reassembly SHALL NOT be
+ * applied to Data Messages in either direction. The HCI fragmentation mechanism
+ * is used if required.
+ */
+static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
+			     const u8 data_type, const u8 *data,
+			     size_t data_len)
+{
+	struct nci_conn_info    *conn_info;
+	struct sk_buff *skb;
+	int len, i, r;
+	u8 cb = pipe;
+
+	conn_info = ndev->hci_dev->conn_info;
+	if (!conn_info)
+		return -EPROTO;
+
+	skb = nci_skb_alloc(ndev, 2 + conn_info->max_pkt_payload_len +
+			    NCI_DATA_HDR_SIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	skb_reserve(skb, 2 + NCI_DATA_HDR_SIZE);
+	*skb_push(skb, 1) = data_type;
+
+	i = 0;
+	len = conn_info->max_pkt_payload_len;
+
+	do {
+		/* If last packet add NCI_HFP_NO_CHAINING */
+		if (i + conn_info->max_pkt_payload_len -
+		    (skb->len + 1) >= data_len) {
+			cb |= NCI_HFP_NO_CHAINING;
+			len = data_len - i;
+		} else {
+			len = conn_info->max_pkt_payload_len - skb->len - 1;
+		}
+
+		*skb_push(skb, 1) = cb;
+
+		if (len > 0)
+			memcpy(skb_put(skb, len), data + i, len);
+
+		r = nci_send_data(ndev, conn_info->conn_id, skb);
+		if (r < 0)
+			return r;
+
+		i += len;
+		if (i < data_len) {
+			skb_trim(skb, 0);
+			skb_pull(skb, len);
+		}
+	} while (i < data_len);
+
+	return i;
+}
+
+static void nci_hci_send_data_req(struct nci_dev *ndev, unsigned long opt)
+{
+	struct nci_data *data = (struct nci_data *)opt;
+
+	nci_hci_send_data(ndev, data->pipe, data->cmd,
+			  data->data, data->data_len);
+}
+
+int nci_hci_send_event(struct nci_dev *ndev, u8 gate, u8 event,
+		       const u8 *param, size_t param_len)
+{
+	u8 pipe = ndev->hci_dev->gate2pipe[gate];
+
+	if (pipe == NCI_HCI_INVALID_PIPE)
+		return -EADDRNOTAVAIL;
+
+	return nci_hci_send_data(ndev, pipe,
+			NCI_HCP_HEADER(NCI_HCI_HCP_EVENT, event),
+			param, param_len);
+}
+EXPORT_SYMBOL(nci_hci_send_event);
+
+int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd,
+		     const u8 *param, size_t param_len,
+		     struct sk_buff **skb)
+{
+	struct nci_conn_info    *conn_info;
+	struct nci_data data;
+	int r;
+	u8 pipe = ndev->hci_dev->gate2pipe[gate];
+
+	if (pipe == NCI_HCI_INVALID_PIPE)
+		return -EADDRNOTAVAIL;
+
+	conn_info = ndev->hci_dev->conn_info;
+	if (!conn_info)
+		return -EPROTO;
+
+	data.conn_id = conn_info->conn_id;
+	data.pipe = pipe;
+	data.cmd = NCI_HCP_HEADER(NCI_HCI_HCP_COMMAND, cmd);
+	data.data = param;
+	data.data_len = param_len;
+
+	r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data,
+			msecs_to_jiffies(NCI_DATA_TIMEOUT));
+
+	if (r == NCI_STATUS_OK)
+		*skb = conn_info->rx_skb;
+
+	return r;
+}
+EXPORT_SYMBOL(nci_hci_send_cmd);
+
+static void nci_hci_event_received(struct nci_dev *ndev, u8 pipe,
+				   u8 event, struct sk_buff *skb)
+{
+	if (ndev->ops->hci_event_received)
+		ndev->ops->hci_event_received(ndev, pipe, event, skb);
+}
+
+static void nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe,
+				 u8 cmd, struct sk_buff *skb)
+{
+	u8 gate = ndev->hci_dev->pipes[pipe].gate;
+	u8 status = NCI_HCI_ANY_OK | ~NCI_HCI_FRAGMENT;
+	u8 dest_gate, new_pipe;
+	struct nci_hci_create_pipe_resp *create_info;
+	struct nci_hci_delete_pipe_noti *delete_info;
+	struct nci_hci_all_pipe_cleared_noti *cleared_info;
+
+	pr_debug("from gate %x pipe %x cmd %x\n", gate, pipe, cmd);
+
+	switch (cmd) {
+	case NCI_HCI_ADM_NOTIFY_PIPE_CREATED:
+		if (skb->len != 5) {
+			status = NCI_HCI_ANY_E_NOK;
+			goto exit;
+		}
+		create_info = (struct nci_hci_create_pipe_resp *)skb->data;
+		dest_gate = create_info->dest_gate;
+		new_pipe = create_info->pipe;
+
+		/* Save the new created pipe and bind with local gate,
+		 * the description for skb->data[3] is destination gate id
+		 * but since we received this cmd from host controller, we
+		 * are the destination and it is our local gate
+		 */
+		ndev->hci_dev->gate2pipe[dest_gate] = new_pipe;
+		ndev->hci_dev->pipes[new_pipe].gate = dest_gate;
+		ndev->hci_dev->pipes[new_pipe].host =
+						create_info->src_host;
+		break;
+	case NCI_HCI_ANY_OPEN_PIPE:
+		/* If the pipe is not created report an error */
+		if (gate == NCI_HCI_INVALID_GATE) {
+			status = NCI_HCI_ANY_E_NOK;
+			goto exit;
+		}
+		break;
+	case NCI_HCI_ADM_NOTIFY_PIPE_DELETED:
+		if (skb->len != 1) {
+			status = NCI_HCI_ANY_E_NOK;
+			goto exit;
+		}
+		delete_info = (struct nci_hci_delete_pipe_noti *)skb->data;
+
+		ndev->hci_dev->pipes[delete_info->pipe].gate =
+						NCI_HCI_INVALID_GATE;
+		ndev->hci_dev->pipes[delete_info->pipe].host =
+						NCI_HCI_INVALID_HOST;
+		break;
+	case NCI_HCI_ADM_NOTIFY_ALL_PIPE_CLEARED:
+		if (skb->len != 1) {
+			status = NCI_HCI_ANY_E_NOK;
+			goto exit;
+		}
+
+		cleared_info =
+			(struct nci_hci_all_pipe_cleared_noti *)skb->data;
+		nci_hci_reset_pipes_per_host(ndev, cleared_info->host);
+		break;
+	default:
+		pr_debug("Discarded unknown cmd %x to gate %x\n", cmd, gate);
+		break;
+	}
+
+	if (ndev->ops->hci_cmd_received)
+		ndev->ops->hci_cmd_received(ndev, pipe, cmd, skb);
+
+exit:
+	nci_hci_send_data(ndev, pipe, status, NULL, 0);
+
+	kfree_skb(skb);
+}
+
+static void nci_hci_resp_received(struct nci_dev *ndev, u8 pipe,
+				  u8 result, struct sk_buff *skb)
+{
+	struct nci_conn_info    *conn_info;
+	u8 status = result;
+
+	if (result != NCI_HCI_ANY_OK)
+		goto exit;
+
+	conn_info = ndev->hci_dev->conn_info;
+	if (!conn_info) {
+		status = NCI_STATUS_REJECTED;
+		goto exit;
+	}
+
+	conn_info->rx_skb = skb;
+
+exit:
+	nci_req_complete(ndev, status);
+}
+
+/* Receive hcp message for pipe, with type and cmd.
+ * skb contains optional message data only.
+ */
+static void nci_hci_hcp_message_rx(struct nci_dev *ndev, u8 pipe,
+				   u8 type, u8 instruction, struct sk_buff *skb)
+{
+	switch (type) {
+	case NCI_HCI_HCP_RESPONSE:
+		nci_hci_resp_received(ndev, pipe, instruction, skb);
+		break;
+	case NCI_HCI_HCP_COMMAND:
+		nci_hci_cmd_received(ndev, pipe, instruction, skb);
+		break;
+	case NCI_HCI_HCP_EVENT:
+		nci_hci_event_received(ndev, pipe, instruction, skb);
+		break;
+	default:
+		pr_err("UNKNOWN MSG Type %d, instruction=%d\n",
+		       type, instruction);
+		kfree_skb(skb);
+		break;
+	}
+
+	nci_req_complete(ndev, 0);
+}
+
+static void nci_hci_msg_rx_work(struct work_struct *work)
+{
+	struct nci_hci_dev *hdev =
+		container_of(work, struct nci_hci_dev, msg_rx_work);
+	struct sk_buff *skb;
+	struct nci_hcp_message *message;
+	u8 pipe, type, instruction;
+
+	while ((skb = skb_dequeue(&hdev->msg_rx_queue)) != NULL) {
+		pipe = skb->data[0];
+		skb_pull(skb, NCI_HCI_HCP_PACKET_HEADER_LEN);
+		message = (struct nci_hcp_message *)skb->data;
+		type = NCI_HCP_MSG_GET_TYPE(message->header);
+		instruction = NCI_HCP_MSG_GET_CMD(message->header);
+		skb_pull(skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN);
+
+		nci_hci_hcp_message_rx(hdev->ndev, pipe,
+				       type, instruction, skb);
+	}
+}
+
+void nci_hci_data_received_cb(void *context,
+			      struct sk_buff *skb, int err)
+{
+	struct nci_dev *ndev = (struct nci_dev *)context;
+	struct nci_hcp_packet *packet;
+	u8 pipe, type, instruction;
+	struct sk_buff *hcp_skb;
+	struct sk_buff *frag_skb;
+	int msg_len;
+
+	pr_debug("\n");
+
+	if (err) {
+		nci_req_complete(ndev, err);
+		return;
+	}
+
+	packet = (struct nci_hcp_packet *)skb->data;
+	if ((packet->header & ~NCI_HCI_FRAGMENT) == 0) {
+		skb_queue_tail(&ndev->hci_dev->rx_hcp_frags, skb);
+		return;
+	}
+
+	/* it's the last fragment. Does it need re-aggregation? */
+	if (skb_queue_len(&ndev->hci_dev->rx_hcp_frags)) {
+		pipe = packet->header & NCI_HCI_FRAGMENT;
+		skb_queue_tail(&ndev->hci_dev->rx_hcp_frags, skb);
+
+		msg_len = 0;
+		skb_queue_walk(&ndev->hci_dev->rx_hcp_frags, frag_skb) {
+			msg_len += (frag_skb->len -
+				    NCI_HCI_HCP_PACKET_HEADER_LEN);
+		}
+
+		hcp_skb = nfc_alloc_recv_skb(NCI_HCI_HCP_PACKET_HEADER_LEN +
+					     msg_len, GFP_KERNEL);
+		if (!hcp_skb) {
+			nci_req_complete(ndev, -ENOMEM);
+			return;
+		}
+
+		*skb_put(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN) = pipe;
+
+		skb_queue_walk(&ndev->hci_dev->rx_hcp_frags, frag_skb) {
+		       msg_len = frag_skb->len - NCI_HCI_HCP_PACKET_HEADER_LEN;
+			memcpy(skb_put(hcp_skb, msg_len), frag_skb->data +
+			       NCI_HCI_HCP_PACKET_HEADER_LEN, msg_len);
+		}
+
+		skb_queue_purge(&ndev->hci_dev->rx_hcp_frags);
+	} else {
+		packet->header &= NCI_HCI_FRAGMENT;
+		hcp_skb = skb;
+	}
+
+	/* if this is a response, dispatch immediately to
+	 * unblock waiting cmd context. Otherwise, enqueue to dispatch
+	 * in separate context where handler can also execute command.
+	 */
+	packet = (struct nci_hcp_packet *)hcp_skb->data;
+	type = NCI_HCP_MSG_GET_TYPE(packet->message.header);
+	if (type == NCI_HCI_HCP_RESPONSE) {
+		pipe = packet->header;
+		instruction = NCI_HCP_MSG_GET_CMD(packet->message.header);
+		skb_pull(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN +
+			 NCI_HCI_HCP_MESSAGE_HEADER_LEN);
+		nci_hci_hcp_message_rx(ndev, pipe, type, instruction, hcp_skb);
+	} else {
+		skb_queue_tail(&ndev->hci_dev->msg_rx_queue, hcp_skb);
+		schedule_work(&ndev->hci_dev->msg_rx_work);
+	}
+}
+
+int nci_hci_open_pipe(struct nci_dev *ndev, u8 pipe)
+{
+	struct nci_data data;
+	struct nci_conn_info    *conn_info;
+
+	conn_info = ndev->hci_dev->conn_info;
+	if (!conn_info)
+		return -EPROTO;
+
+	data.conn_id = conn_info->conn_id;
+	data.pipe = pipe;
+	data.cmd = NCI_HCP_HEADER(NCI_HCI_HCP_COMMAND,
+				       NCI_HCI_ANY_OPEN_PIPE);
+	data.data = NULL;
+	data.data_len = 0;
+
+	return nci_request(ndev, nci_hci_send_data_req,
+			(unsigned long)&data,
+			msecs_to_jiffies(NCI_DATA_TIMEOUT));
+}
+EXPORT_SYMBOL(nci_hci_open_pipe);
+
+int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx,
+		      const u8 *param, size_t param_len)
+{
+	struct nci_conn_info *conn_info;
+	struct nci_data data;
+	int r;
+	u8 *tmp;
+	u8 pipe = ndev->hci_dev->gate2pipe[gate];
+
+	pr_debug("idx=%d to gate %d\n", idx, gate);
+
+	if (pipe == NCI_HCI_INVALID_PIPE)
+		return -EADDRNOTAVAIL;
+
+	conn_info = ndev->hci_dev->conn_info;
+	if (!conn_info)
+		return -EPROTO;
+
+	tmp = kmalloc(1 + param_len, GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	*tmp = idx;
+	memcpy(tmp + 1, param, param_len);
+
+	data.conn_id = conn_info->conn_id;
+	data.pipe = pipe;
+	data.cmd = NCI_HCP_HEADER(NCI_HCI_HCP_COMMAND,
+				       NCI_HCI_ANY_SET_PARAMETER);
+	data.data = tmp;
+	data.data_len = param_len + 1;
+
+	r = nci_request(ndev, nci_hci_send_data_req,
+			(unsigned long)&data,
+			msecs_to_jiffies(NCI_DATA_TIMEOUT));
+
+	kfree(tmp);
+	return r;
+}
+EXPORT_SYMBOL(nci_hci_set_param);
+
+int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx,
+		      struct sk_buff **skb)
+{
+	struct nci_conn_info    *conn_info;
+	struct nci_data data;
+	int r;
+	u8 pipe = ndev->hci_dev->gate2pipe[gate];
+
+	pr_debug("idx=%d to gate %d\n", idx, gate);
+
+	if (pipe == NCI_HCI_INVALID_PIPE)
+		return -EADDRNOTAVAIL;
+
+	conn_info = ndev->hci_dev->conn_info;
+	if (!conn_info)
+		return -EPROTO;
+
+	data.conn_id = conn_info->conn_id;
+	data.pipe = pipe;
+	data.cmd = NCI_HCP_HEADER(NCI_HCI_HCP_COMMAND,
+				  NCI_HCI_ANY_GET_PARAMETER);
+	data.data = &idx;
+	data.data_len = 1;
+
+	r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data,
+			msecs_to_jiffies(NCI_DATA_TIMEOUT));
+
+	if (r == NCI_STATUS_OK)
+		*skb = conn_info->rx_skb;
+
+	return r;
+}
+EXPORT_SYMBOL(nci_hci_get_param);
+
+int nci_hci_connect_gate(struct nci_dev *ndev,
+			 u8 dest_host, u8 dest_gate, u8 pipe)
+{
+	int r;
+
+	if (pipe == NCI_HCI_DO_NOT_OPEN_PIPE)
+		return 0;
+
+	if (ndev->hci_dev->gate2pipe[dest_gate] != NCI_HCI_INVALID_PIPE)
+		return -EADDRINUSE;
+
+	if (pipe != NCI_HCI_INVALID_PIPE)
+		goto open_pipe;
+
+	switch (dest_gate) {
+	case NCI_HCI_LINK_MGMT_GATE:
+		pipe = NCI_HCI_LINK_MGMT_PIPE;
+	break;
+	case NCI_HCI_ADMIN_GATE:
+		pipe = NCI_HCI_ADMIN_PIPE;
+	break;
+	}
+
+open_pipe:
+	r = nci_hci_open_pipe(ndev, pipe);
+	if (r < 0)
+		return r;
+
+	ndev->hci_dev->pipes[pipe].gate = dest_gate;
+	ndev->hci_dev->pipes[pipe].host = dest_host;
+	ndev->hci_dev->gate2pipe[dest_gate] = pipe;
+
+	return 0;
+}
+EXPORT_SYMBOL(nci_hci_connect_gate);
+
+static int nci_hci_dev_connect_gates(struct nci_dev *ndev,
+				     u8 gate_count,
+				     struct nci_hci_gate *gates)
+{
+	int r;
+
+	while (gate_count--) {
+		r = nci_hci_connect_gate(ndev, gates->dest_host,
+					 gates->gate, gates->pipe);
+		if (r < 0)
+			return r;
+		gates++;
+	}
+
+	return 0;
+}
+
+int nci_hci_dev_session_init(struct nci_dev *ndev)
+{
+	struct sk_buff *skb;
+	int r;
+
+	ndev->hci_dev->count_pipes = 0;
+	ndev->hci_dev->expected_pipes = 0;
+
+	nci_hci_reset_pipes(ndev->hci_dev);
+
+	if (ndev->hci_dev->init_data.gates[0].gate != NCI_HCI_ADMIN_GATE)
+		return -EPROTO;
+
+	r = nci_hci_connect_gate(ndev,
+				 ndev->hci_dev->init_data.gates[0].dest_host,
+				 ndev->hci_dev->init_data.gates[0].gate,
+				 ndev->hci_dev->init_data.gates[0].pipe);
+	if (r < 0)
+		goto exit;
+
+	r = nci_hci_get_param(ndev, NCI_HCI_ADMIN_GATE,
+			      NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY, &skb);
+	if (r < 0)
+		goto exit;
+
+	if (skb->len &&
+	    skb->len == strlen(ndev->hci_dev->init_data.session_id) &&
+	    memcmp(ndev->hci_dev->init_data.session_id,
+		   skb->data, skb->len) == 0 &&
+	    ndev->ops->hci_load_session) {
+		/* Restore gate<->pipe table from some proprietary location. */
+		r = ndev->ops->hci_load_session(ndev);
+		if (r < 0)
+			goto exit;
+	} else {
+		r = nci_hci_dev_connect_gates(ndev,
+					      ndev->hci_dev->init_data.gate_count,
+					      ndev->hci_dev->init_data.gates);
+		if (r < 0)
+			goto exit;
+
+		r = nci_hci_set_param(ndev, NCI_HCI_ADMIN_GATE,
+				      NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY,
+				      ndev->hci_dev->init_data.session_id,
+				      strlen(ndev->hci_dev->init_data.session_id));
+	}
+	if (r == 0)
+		goto exit;
+
+exit:
+	kfree_skb(skb);
+
+	return r;
+}
+EXPORT_SYMBOL(nci_hci_dev_session_init);
+
+struct nci_hci_dev *nci_hci_allocate(struct nci_dev *ndev)
+{
+	struct nci_hci_dev *hdev;
+
+	hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
+	if (!hdev)
+		return NULL;
+
+	skb_queue_head_init(&hdev->rx_hcp_frags);
+	INIT_WORK(&hdev->msg_rx_work, nci_hci_msg_rx_work);
+	skb_queue_head_init(&hdev->msg_rx_queue);
+	hdev->ndev = ndev;
+
+	return hdev;
+}
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 4c0be7e82d29..6e041ac49e17 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -723,18 +723,30 @@ static void nci_nfcee_discover_ntf_packet(struct nci_dev *ndev,
 
 	pr_debug("\n");
 
-	conn_info = devm_kzalloc(&ndev->nfc_dev->dev,
-				 sizeof(struct nci_conn_info), GFP_KERNEL);
-	if (!conn_info) {
-		status = NCI_STATUS_REJECTED;
-		goto exit;
-	}
+	/* NFCForum NCI 9.2.1 HCI Network Specific Handling
+	 * If the NFCC supports the HCI Network, it SHALL return one,
+	 * and only one, NFCEE_DISCOVER_NTF with a Protocol type of
+	 * “HCI Access”, even if the HCI Network contains multiple NFCEEs.
+	 */
+	if (!ndev->hci_dev->conn_info) {
+		conn_info = devm_kzalloc(&ndev->nfc_dev->dev,
+					 sizeof(*conn_info), GFP_KERNEL);
+		if (!conn_info) {
+			status = NCI_STATUS_REJECTED;
+			goto exit;
+		}
 
-	conn_info->id = nfcee_ntf->nfcee_id;
-	conn_info->conn_id = NCI_INVALID_CONN_ID;
+		conn_info->id = nfcee_ntf->nfcee_id;
+		conn_info->conn_id = NCI_INVALID_CONN_ID;
 
-	INIT_LIST_HEAD(&conn_info->list);
-	list_add(&conn_info->list, &ndev->conn_info_list);
+		conn_info->data_exchange_cb = nci_hci_data_received_cb;
+		conn_info->data_exchange_cb_context = ndev;
+
+		INIT_LIST_HEAD(&conn_info->list);
+		list_add(&conn_info->list, &ndev->conn_info_list);
+
+		ndev->hci_dev->conn_info = conn_info;
+	}
 
 exit:
 	nci_req_complete(ndev, status);
-- 
cgit v1.2.3


From 447b27c4f29b510b98e99395120d635f009ed563 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Sun, 1 Feb 2015 22:26:16 +0100
Subject: NFC: Forward NFC_EVT_TRANSACTION to user space

NFC_EVT_TRANSACTION is sent through netlink in order for a
specific application running on a secure element to notify
userspace of an event. Typically the secure element application
counterpart on the host could interpret that event and act
upon it.

Forwarded information contains:
- SE host generating the event
- Application IDentifier doing the operation
- Applications parameters

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/nfc.h    | 27 +++++++++++++++++++++++++++
 include/uapi/linux/nfc.h |  1 +
 net/nfc/core.c           | 21 +++++++++++++++++++++
 net/nfc/netlink.c        | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 net/nfc/nfc.h            |  2 ++
 5 files changed, 98 insertions(+)

(limited to 'net')

diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h
index 12adb817c27a..73190e65d5c1 100644
--- a/include/net/nfc/nfc.h
+++ b/include/net/nfc/nfc.h
@@ -135,6 +135,31 @@ struct nfc_se {
 	u16 state;
 };
 
+/**
+ * nfc_evt_transaction - A struct for NFC secure element event transaction.
+ *
+ * @aid: The application identifier triggering the event
+ *
+ * @aid_len: The application identifier length [5:16]
+ *
+ * @params: The application parameters transmitted during the transaction
+ *
+ * @params_len: The applications parameters length [0:255]
+ *
+ */
+#define NFC_MIN_AID_LENGTH	5
+#define	NFC_MAX_AID_LENGTH	16
+#define NFC_MAX_PARAMS_LENGTH	255
+
+#define NFC_EVT_TRANSACTION_AID_TAG	0x81
+#define NFC_EVT_TRANSACTION_PARAMS_TAG	0x82
+struct nfc_evt_transaction {
+	u32 aid_len;
+	u8 aid[NFC_MAX_AID_LENGTH];
+	u8 params_len;
+	u8 params[NFC_MAX_PARAMS_LENGTH];
+} __packed;
+
 struct nfc_genl_data {
 	u32 poll_req_portid;
 	struct mutex genl_data_mutex;
@@ -262,6 +287,8 @@ int nfc_tm_data_received(struct nfc_dev *dev, struct sk_buff *skb);
 
 void nfc_driver_failure(struct nfc_dev *dev, int err);
 
+int nfc_se_transaction(struct nfc_dev *dev, u8 se_idx,
+		       struct nfc_evt_transaction *evt_transaction);
 int nfc_add_se(struct nfc_dev *dev, u32 se_idx, u16 type);
 int nfc_remove_se(struct nfc_dev *dev, u32 se_idx);
 struct nfc_se *nfc_find_se(struct nfc_dev *dev, u32 se_idx);
diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h
index 8119255feae4..c1e2e63cf9b5 100644
--- a/include/uapi/linux/nfc.h
+++ b/include/uapi/linux/nfc.h
@@ -183,6 +183,7 @@ enum nfc_attrs {
 	NFC_ATTR_SE_APDU,
 	NFC_ATTR_TARGET_ISO15693_DSFID,
 	NFC_ATTR_TARGET_ISO15693_UID,
+	NFC_ATTR_SE_PARAMS,
 /* private: internal use only */
 	__NFC_ATTR_AFTER_LAST
 };
diff --git a/net/nfc/core.c b/net/nfc/core.c
index 7f1b6351755c..cff3f1614ad4 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -932,6 +932,27 @@ int nfc_remove_se(struct nfc_dev *dev, u32 se_idx)
 }
 EXPORT_SYMBOL(nfc_remove_se);
 
+int nfc_se_transaction(struct nfc_dev *dev, u8 se_idx,
+		       struct nfc_evt_transaction *evt_transaction)
+{
+	int rc;
+
+	pr_debug("transaction: %x\n", se_idx);
+
+	device_lock(&dev->dev);
+
+	if (!evt_transaction) {
+		rc = -EPROTO;
+		goto out;
+	}
+
+	rc = nfc_genl_se_transaction(dev, se_idx, evt_transaction);
+out:
+	device_unlock(&dev->dev);
+	return rc;
+}
+EXPORT_SYMBOL(nfc_se_transaction);
+
 static void nfc_release(struct device *d)
 {
 	struct nfc_dev *dev = to_nfc_dev(d);
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index be387e6219a0..14a2d11581da 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -497,6 +497,53 @@ free_msg:
 	return -EMSGSIZE;
 }
 
+int nfc_genl_se_transaction(struct nfc_dev *dev, u8 se_idx,
+			    struct nfc_evt_transaction *evt_transaction)
+{
+	struct nfc_se *se;
+	struct sk_buff *msg;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0,
+			  NFC_EVENT_SE_TRANSACTION);
+	if (!hdr)
+		goto free_msg;
+
+	se = nfc_find_se(dev, se_idx);
+	if (!se)
+		goto free_msg;
+
+	if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) ||
+	    nla_put_u32(msg, NFC_ATTR_SE_INDEX, se_idx) ||
+	    nla_put_u8(msg, NFC_ATTR_SE_TYPE, se->type) ||
+	    nla_put(msg, NFC_ATTR_SE_AID, evt_transaction->aid_len,
+		    evt_transaction->aid) ||
+	    nla_put(msg, NFC_ATTR_SE_PARAMS, evt_transaction->params_len,
+		    evt_transaction->params))
+		goto nla_put_failure;
+
+	/* evt_transaction is no more used */
+	devm_kfree(&dev->dev, evt_transaction);
+
+	genlmsg_end(msg, hdr);
+
+	genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+free_msg:
+	/* evt_transaction is no more used */
+	devm_kfree(&dev->dev, evt_transaction);
+	nlmsg_free(msg);
+	return -EMSGSIZE;
+}
+
 static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev,
 				u32 portid, u32 seq,
 				struct netlink_callback *cb,
diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
index 88d60064890e..a8ce80b47720 100644
--- a/net/nfc/nfc.h
+++ b/net/nfc/nfc.h
@@ -100,6 +100,8 @@ int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list);
 
 int nfc_genl_se_added(struct nfc_dev *dev, u32 se_idx, u16 type);
 int nfc_genl_se_removed(struct nfc_dev *dev, u32 se_idx);
+int nfc_genl_se_transaction(struct nfc_dev *dev, u8 se_idx,
+			    struct nfc_evt_transaction *evt_transaction);
 
 struct nfc_dev *nfc_get_device(unsigned int idx);
 
-- 
cgit v1.2.3


From a41bb8448ebaebe1d0d9a268d340fad73c247e09 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Sun, 1 Feb 2015 22:26:17 +0100
Subject: NFC: nci: Add RF NFCEE action notification support

The NFCC sends an NCI_OP_RF_NFCEE_ACTION_NTF notification
to the host (DH) to let it know that for example an RF
transaction with a payment reader is done.
For now the notification handler is empty.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/nci.h |  8 ++++++++
 net/nfc/nci/ntf.c     | 11 +++++++++++
 2 files changed, 19 insertions(+)

(limited to 'net')

diff --git a/include/net/nfc/nci.h b/include/net/nfc/nci.h
index deac78b9a53c..6c1beb2704b1 100644
--- a/include/net/nfc/nci.h
+++ b/include/net/nfc/nci.h
@@ -499,6 +499,14 @@ struct nci_rf_deactivate_ntf {
 	__u8	reason;
 } __packed;
 
+#define NCI_OP_RF_NFCEE_ACTION_NTF	nci_opcode_pack(NCI_GID_RF_MGMT, 0x09)
+struct nci_rf_nfcee_action_ntf {
+	__u8 nfcee_id;
+	__u8 trigger;
+	__u8 supported_data_length;
+	__u8 supported_data[0];
+} __packed;
+
 #define NCI_OP_NFCEE_DISCOVER_NTF nci_opcode_pack(NCI_GID_NFCEE_MGMT, 0x00)
 struct nci_nfcee_supported_protocol {
 	__u8	num_protocol;
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 6e041ac49e17..5924b812fb6a 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -752,6 +752,12 @@ exit:
 	nci_req_complete(ndev, status);
 }
 
+static void nci_nfcee_action_ntf_packet(struct nci_dev *ndev,
+					struct sk_buff *skb)
+{
+	pr_debug("\n");
+}
+
 void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb)
 {
 	__u16 ntf_opcode = nci_opcode(skb->data);
@@ -793,6 +799,11 @@ void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb)
 	case NCI_OP_NFCEE_DISCOVER_NTF:
 		nci_nfcee_discover_ntf_packet(ndev, skb);
 		break;
+
+	case NCI_OP_RF_NFCEE_ACTION_NTF:
+		nci_nfcee_action_ntf_packet(ndev, skb);
+		break;
+
 	default:
 		pr_err("unknown ntf opcode 0x%x\n", ntf_opcode);
 		break;
-- 
cgit v1.2.3


From 6095b0f07d9b1abd98484bc33b329e06a684115b Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Sun, 1 Feb 2015 22:26:18 +0100
Subject: NFC: nci: Change NCI state machine to LISTEN_ACTIVE

When receiving an interface activation notification, if
the RF interface is NCI_RF_INTERFACE_NFCEE_DIRECT, we
need to ignore the following parameters and change the NCI
state machine to NCI_LISTEN_ACTIVE. According to the NCI
specification, the parameters should be 0 and shall be
ignored.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 net/nfc/nci/ntf.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'net')

diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 5924b812fb6a..33f5f00ecf4c 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -541,6 +541,13 @@ static void nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev,
 	pr_debug("rf_tech_specific_params_len %d\n",
 		 ntf.rf_tech_specific_params_len);
 
+	/* If this contains a value of 0x00 (NFCEE Direct RF
+	 * Interface) then all following parameters SHALL contain a
+	 * value of 0 and SHALL be ignored.
+	 */
+	if (ntf.rf_interface == NCI_RF_INTERFACE_NFCEE_DIRECT)
+		goto listen;
+
 	if (ntf.rf_tech_specific_params_len > 0) {
 		switch (ntf.activation_rf_tech_and_mode) {
 		case NCI_NFC_A_PASSIVE_POLL_MODE:
@@ -653,6 +660,7 @@ exit:
 			nci_req_complete(ndev, err);
 		}
 	} else {
+listen:
 		/* Listen mode */
 		atomic_set(&ndev->state, NCI_LISTEN_ACTIVE);
 		if (err == NCI_STATUS_OK &&
-- 
cgit v1.2.3


From 49ca0d8bfaf3bc46d5eef60ce67b00eb195bd392 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 30 Jan 2015 13:29:31 -0500
Subject: net-timestamp: no-payload option

Add timestamping option SOF_TIMESTAMPING_OPT_TSONLY. For transmit
timestamps, this loops timestamps on top of empty packets.

Doing so reduces the pressure on SO_RCVBUF. Payload inspection and
cmsg reception (aside from timestamps) are no longer possible. This
works together with a follow on patch that allows administrators to
only allow tx timestamping if it does not loop payload or metadata.

Signed-off-by: Willem de Bruijn <willemb@google.com>

----

Changes (rfc -> v1)
  - add documentation
  - remove unnecessary skb->len test (thanks to Richard Cochran)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/timestamping.txt | 21 +++++++++++++++++++++
 include/uapi/linux/net_tstamp.h           |  3 ++-
 net/core/skbuff.c                         | 19 ++++++++++++++-----
 net/ipv4/ip_sockglue.c                    |  7 ++++---
 net/ipv6/datagram.c                       |  5 ++---
 net/rxrpc/ar-error.c                      |  5 +++++
 6 files changed, 48 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt
index a5c784c89312..5f0922613f1a 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@@ -162,6 +162,27 @@ SOF_TIMESTAMPING_OPT_CMSG:
   option IP_PKTINFO simultaneously.
 
 
+SOF_TIMESTAMPING_OPT_TSONLY:
+
+  Applies to transmit timestamps only. Makes the kernel return the
+  timestamp as a cmsg alongside an empty packet, as opposed to
+  alongside the original packet. This reduces the amount of memory
+  charged to the socket's receive budget (SO_RCVBUF) and delivers
+  the timestamp even if sysctl net.core.tstamp_allow_data is 0.
+  This option disables SOF_TIMESTAMPING_OPT_CMSG.
+
+
+New applications are encouraged to pass SOF_TIMESTAMPING_OPT_ID to
+disambiguate timestamps and SOF_TIMESTAMPING_OPT_TSONLY to operate
+regardless of the setting of sysctl net.core.tstamp_allow_data.
+
+An exception is when a process needs additional cmsg data, for
+instance SOL_IP/IP_PKTINFO to detect the egress network interface.
+Then pass option SOF_TIMESTAMPING_OPT_CMSG. This option depends on
+having access to the contents of the original packet, so cannot be
+combined with SOF_TIMESTAMPING_OPT_TSONLY.
+
+
 1.4 Bytestream Timestamps
 
 The SO_TIMESTAMPING interface supports timestamping of bytes in a
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index edbc888ceb51..6d1abea9746e 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -24,8 +24,9 @@ enum {
 	SOF_TIMESTAMPING_TX_SCHED = (1<<8),
 	SOF_TIMESTAMPING_TX_ACK = (1<<9),
 	SOF_TIMESTAMPING_OPT_CMSG = (1<<10),
+	SOF_TIMESTAMPING_OPT_TSONLY = (1<<11),
 
-	SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_CMSG,
+	SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_TSONLY,
 	SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
 				 SOF_TIMESTAMPING_LAST
 };
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 56db472e9b86..65a3798f43e6 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3710,19 +3710,28 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
 		     struct sock *sk, int tstype)
 {
 	struct sk_buff *skb;
+	bool tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
 
 	if (!sk)
 		return;
 
-	if (hwtstamps)
-		*skb_hwtstamps(orig_skb) = *hwtstamps;
+	if (tsonly)
+		skb = alloc_skb(0, GFP_ATOMIC);
 	else
-		orig_skb->tstamp = ktime_get_real();
-
-	skb = skb_clone(orig_skb, GFP_ATOMIC);
+		skb = skb_clone(orig_skb, GFP_ATOMIC);
 	if (!skb)
 		return;
 
+	if (tsonly) {
+		skb_shinfo(skb)->tx_flags = skb_shinfo(orig_skb)->tx_flags;
+		skb_shinfo(skb)->tskey = skb_shinfo(orig_skb)->tskey;
+	}
+
+	if (hwtstamps)
+		*skb_hwtstamps(skb) = *hwtstamps;
+	else
+		skb->tstamp = ktime_get_real();
+
 	__skb_complete_tx_timestamp(skb, sk, tstype);
 }
 EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index db5e0f81ce0a..31d8c71986b4 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -483,7 +483,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 
 	serr = SKB_EXT_ERR(skb);
 
-	if (sin) {
+	if (sin && skb->len) {
 		sin->sin_family = AF_INET;
 		sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) +
 						   serr->addr_offset);
@@ -496,8 +496,9 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 	sin = &errhdr.offender;
 	memset(sin, 0, sizeof(*sin));
 
-	if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
-	    ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin)) {
+	if (skb->len &&
+	    (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
+	     ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin))) {
 		sin->sin_family = AF_INET;
 		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
 		if (inet_sk(sk)->cmsg_flags)
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 49f5e73db122..c215be70cac0 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -369,7 +369,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 
 	serr = SKB_EXT_ERR(skb);
 
-	if (sin) {
+	if (sin && skb->len) {
 		const unsigned char *nh = skb_network_header(skb);
 		sin->sin6_family = AF_INET6;
 		sin->sin6_flowinfo = 0;
@@ -394,8 +394,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 	memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
 	sin = &errhdr.offender;
 	memset(sin, 0, sizeof(*sin));
-
-	if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) {
+	if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL && skb->len) {
 		sin->sin6_family = AF_INET6;
 		if (np->rxopt.all) {
 			if (serr->ee.ee_origin != SO_EE_ORIGIN_ICMP &&
diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c
index 74c0fcd36838..5394b6be46ec 100644
--- a/net/rxrpc/ar-error.c
+++ b/net/rxrpc/ar-error.c
@@ -42,6 +42,11 @@ void rxrpc_UDP_error_report(struct sock *sk)
 		_leave("UDP socket errqueue empty");
 		return;
 	}
+	if (!skb->len) {
+		_leave("UDP empty message");
+		kfree_skb(skb);
+		return;
+	}
 
 	rxrpc_new_skb(skb);
 
-- 
cgit v1.2.3


From b245be1f4db1a0394e4b6eb66059814b46670ac3 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 30 Jan 2015 13:29:32 -0500
Subject: net-timestamp: no-payload only sysctl

Tx timestamps are looped onto the error queue on top of an skb. This
mechanism leaks packet headers to processes unless the no-payload
options SOF_TIMESTAMPING_OPT_TSONLY is set.

Add a sysctl that optionally drops looped timestamp with data. This
only affects processes without CAP_NET_RAW.

The policy is checked when timestamps are generated in the stack.
It is possible for timestamps with data to be reported after the
sysctl is set, if these were queued internally earlier.

No vulnerability is immediately known that exploits knowledge
gleaned from packet headers, but it may still be preferable to allow
administrators to lock down this path at the cost of possible
breakage of legacy applications.

Signed-off-by: Willem de Bruijn <willemb@google.com>

----

Changes
  (v1 -> v2)
  - test socket CAP_NET_RAW instead of capable(CAP_NET_RAW)
  (rfc -> v1)
  - document the sysctl in Documentation/sysctl/net.txt
  - fix access control race: read .._OPT_TSONLY only once,
        use same value for permission check and skb generation.
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/sysctl/net.txt |  8 ++++++++
 include/net/sock.h           |  1 +
 net/core/skbuff.c            | 21 ++++++++++++++++++++-
 net/core/sock.c              |  3 +++
 net/core/sysctl_net_core.c   |  9 +++++++++
 5 files changed, 41 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index 666594b43cff..6294b5186ae5 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -97,6 +97,14 @@ rmem_max
 
 The maximum receive socket buffer size in bytes.
 
+tstamp_allow_data
+-----------------
+Allow processes to receive tx timestamps looped together with the original
+packet contents. If disabled, transmit timestamp requests from unprivileged
+processes are dropped unless socket option SOF_TIMESTAMPING_OPT_TSONLY is set.
+Default: 1 (on)
+
+
 wmem_default
 ------------
 
diff --git a/include/net/sock.h b/include/net/sock.h
index 15341499786c..511ef7c8889b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2239,6 +2239,7 @@ bool sk_net_capable(const struct sock *sk, int cap);
 extern __u32 sysctl_wmem_max;
 extern __u32 sysctl_rmem_max;
 
+extern int sysctl_tstamp_allow_data;
 extern int sysctl_optmem_max;
 
 extern __u32 sysctl_wmem_default;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 65a3798f43e6..a5bff2767f15 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -74,6 +74,8 @@
 #include <asm/uaccess.h>
 #include <trace/events/skb.h>
 #include <linux/highmem.h>
+#include <linux/capability.h>
+#include <linux/user_namespace.h>
 
 struct kmem_cache *skbuff_head_cache __read_mostly;
 static struct kmem_cache *skbuff_fclone_cache __read_mostly;
@@ -3690,11 +3692,28 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb,
 		kfree_skb(skb);
 }
 
+static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly)
+{
+	bool ret;
+
+	if (likely(sysctl_tstamp_allow_data || tsonly))
+		return true;
+
+	read_lock_bh(&sk->sk_callback_lock);
+	ret = sk->sk_socket && sk->sk_socket->file &&
+	      file_ns_capable(sk->sk_socket->file, &init_user_ns, CAP_NET_RAW);
+	read_unlock_bh(&sk->sk_callback_lock);
+	return ret;
+}
+
 void skb_complete_tx_timestamp(struct sk_buff *skb,
 			       struct skb_shared_hwtstamps *hwtstamps)
 {
 	struct sock *sk = skb->sk;
 
+	if (!skb_may_tx_timestamp(sk, false))
+		return;
+
 	/* take a reference to prevent skb_orphan() from freeing the socket */
 	sock_hold(sk);
 
@@ -3712,7 +3731,7 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
 	struct sk_buff *skb;
 	bool tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
 
-	if (!sk)
+	if (!sk || !skb_may_tx_timestamp(sk, tsonly))
 		return;
 
 	if (tsonly)
diff --git a/net/core/sock.c b/net/core/sock.c
index 1c7a33db1314..93c8b20c91e4 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -325,6 +325,8 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
 EXPORT_SYMBOL(sysctl_optmem_max);
 
+int sysctl_tstamp_allow_data __read_mostly = 1;
+
 struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
 EXPORT_SYMBOL_GPL(memalloc_socks);
 
@@ -840,6 +842,7 @@ set_rcvbuf:
 			ret = -EINVAL;
 			break;
 		}
+
 		if (val & SOF_TIMESTAMPING_OPT_ID &&
 		    !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
 			if (sk->sk_protocol == IPPROTO_TCP) {
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 31baba2a71ce..fde21d19e61b 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -321,6 +321,15 @@ static struct ctl_table net_core_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "tstamp_allow_data",
+		.data		= &sysctl_tstamp_allow_data,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &one
+	},
 #ifdef CONFIG_RPS
 	{
 		.procname	= "rps_sock_flow_entries",
-- 
cgit v1.2.3


From 843c2fdf7a12951815d981fa431d7e04d8259332 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 30 Jan 2015 20:45:20 +0100
Subject: net: dctcp: loosen requirement to assert ECT(0) during 3WHS

One deployment requirement of DCTCP is to be able to run
in a DC setting along with TCP traffic. As Glenn Judd's
NSDI'15 paper "Attaining the Promise and Avoiding the Pitfalls
of TCP in the Datacenter" [1] (tba) explains, one way to
solve this on switch side is to split DCTCP and TCP traffic
in two queues per switch port based on the DSCP: one queue
soley intended for DCTCP traffic and one for non-DCTCP traffic.

For the DCTCP queue, there's the marking threshold K as
explained in commit e3118e8359bb ("net: tcp: add DCTCP congestion
control algorithm") for RED marking ECT(0) packets with CE.
For the non-DCTCP queue, there's f.e. a classic tail drop queue.
As already explained in e3118e8359bb, running DCTCP at scale
when not marking SYN/SYN-ACK packets with ECT(0) has severe
consequences as for non-ECT(0) packets, traversing the RED
marking DCTCP queue will result in a severe reduction of
connection probability.

This is due to the DCTCP queue being dominated by ECT(0) traffic
and switches handle non-ECT traffic in the RED marking queue
after passing K as drops, where K is usually a low watermark
in order to leave enough tailroom for bursts. Splitting DCTCP
traffic among several queues (ECN and non-ECN queue) is being
considered a terrible idea in the network community as it
splits single flows across multiple network paths.

Therefore, commit e3118e8359bb implements this on Linux as
ECT(0) marked traffic, as we argue that marking all packets
of a DCTCP flow is the only viable solution and also doesn't
speak against the draft.

However, recently, a DCTCP implementation for FreeBSD hit also
their mainline kernel [2]. In order to let them play well
together with Linux' DCTCP, we would need to loosen the
requirement that ECT(0) has to be asserted during the 3WHS as
not implemented in FreeBSD. This simplifies the ECN test and
lets DCTCP work together with FreeBSD.

Joint work with Daniel Borkmann.

  [1] https://www.usenix.org/conference/nsdi15/technical-sessions/presentation/judd
  [2] https://github.com/freebsd/freebsd/commit/8ad879445281027858a7fa706d13e458095b595f

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Glenn Judd <glenn.judd@morganstanley.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ed11931f340f..d3dfff78fa19 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5872,10 +5872,9 @@ static inline void pr_drop_req(struct request_sock *req, __u16 port, int family)
  * TCP ECN negotiation.
  *
  * Exception: tcp_ca wants ECN. This is required for DCTCP
- * congestion control; it requires setting ECT on all packets,
- * including SYN. We inverse the test in this case: If our
- * local socket wants ECN, but peer only set ece/cwr (but not
- * ECT in IP header) its probably a non-DCTCP aware sender.
+ * congestion control: Linux DCTCP asserts ECT on all packets,
+ * including SYN, which is most optimal solution; however,
+ * others, such as FreeBSD do not.
  */
 static void tcp_ecn_create_request(struct request_sock *req,
 				   const struct sk_buff *skb,
@@ -5885,18 +5884,15 @@ static void tcp_ecn_create_request(struct request_sock *req,
 	const struct tcphdr *th = tcp_hdr(skb);
 	const struct net *net = sock_net(listen_sk);
 	bool th_ecn = th->ece && th->cwr;
-	bool ect, need_ecn, ecn_ok;
+	bool ect, ecn_ok;
 
 	if (!th_ecn)
 		return;
 
 	ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
-	need_ecn = tcp_ca_needs_ecn(listen_sk);
 	ecn_ok = net->ipv4.sysctl_tcp_ecn || dst_feature(dst, RTAX_FEATURE_ECN);
 
-	if (!ect && !need_ecn && ecn_ok)
-		inet_rsk(req)->ecn_ok = 1;
-	else if (ect && need_ecn)
+	if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk))
 		inet_rsk(req)->ecn_ok = 1;
 }
 
-- 
cgit v1.2.3


From 366e41d9774d7010cb63112b6db2fce6dc7809c0 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Sat, 31 Jan 2015 10:40:13 -0500
Subject: ipv6: pull cork initialization into its own function.

Pull IPv6 cork initialization into its own function that
can be re-used.  IPv6 specific cork data did not have an
explicit data structure.  This patch creats eone so that
just ipv6 cork data can be as arguemts.  Also, since
IPv6 tries to save the flow label into inet_cork_full
tructure, pass the full cork.

Adjust ip6_cork_release() to take cork data structures.

Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h  |  12 ++--
 net/ipv6/ip6_output.c | 158 ++++++++++++++++++++++++++++----------------------
 2 files changed, 96 insertions(+), 74 deletions(-)

(limited to 'net')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 2805062c013f..4d5169f5d7d1 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -125,6 +125,12 @@ struct ipv6_mc_socklist;
 struct ipv6_ac_socklist;
 struct ipv6_fl_socklist;
 
+struct inet6_cork {
+	struct ipv6_txoptions *opt;
+	u8 hop_limit;
+	u8 tclass;
+};
+
 /**
  * struct ipv6_pinfo - ipv6 private area
  *
@@ -217,11 +223,7 @@ struct ipv6_pinfo {
 	struct ipv6_txoptions	*opt;
 	struct sk_buff		*pktoptions;
 	struct sk_buff		*rxpmtu;
-	struct {
-		struct ipv6_txoptions *opt;
-		u8 hop_limit;
-		u8 tclass;
-	} cork;
+	struct inet6_cork	cork;
 };
 
 /* WARNING: don't change the layout of the members in {raw,udp,tcp}6_sock! */
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index ce69a12ae48c..f9f08c43c6e1 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1135,6 +1135,74 @@ static void ip6_append_data_mtu(unsigned int *mtu,
 	}
 }
 
+static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
+			  struct inet6_cork *v6_cork,
+			  int hlimit, int tclass, struct ipv6_txoptions *opt,
+			  struct rt6_info *rt, struct flowi6 *fl6)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	unsigned int mtu;
+
+	/*
+	 * setup for corking
+	 */
+	if (opt) {
+		if (WARN_ON(v6_cork->opt))
+			return -EINVAL;
+
+		v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
+		if (unlikely(v6_cork->opt == NULL))
+			return -ENOBUFS;
+
+		v6_cork->opt->tot_len = opt->tot_len;
+		v6_cork->opt->opt_flen = opt->opt_flen;
+		v6_cork->opt->opt_nflen = opt->opt_nflen;
+
+		v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
+						    sk->sk_allocation);
+		if (opt->dst0opt && !v6_cork->opt->dst0opt)
+			return -ENOBUFS;
+
+		v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
+						    sk->sk_allocation);
+		if (opt->dst1opt && !v6_cork->opt->dst1opt)
+			return -ENOBUFS;
+
+		v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
+						   sk->sk_allocation);
+		if (opt->hopopt && !v6_cork->opt->hopopt)
+			return -ENOBUFS;
+
+		v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
+						    sk->sk_allocation);
+		if (opt->srcrt && !v6_cork->opt->srcrt)
+			return -ENOBUFS;
+
+		/* need source address above miyazawa*/
+	}
+	dst_hold(&rt->dst);
+	cork->base.dst = &rt->dst;
+	cork->fl.u.ip6 = *fl6;
+	v6_cork->hop_limit = hlimit;
+	v6_cork->tclass = tclass;
+	if (rt->dst.flags & DST_XFRM_TUNNEL)
+		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
+		      rt->dst.dev->mtu : dst_mtu(&rt->dst);
+	else
+		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
+		      rt->dst.dev->mtu : dst_mtu(rt->dst.path);
+	if (np->frag_size < mtu) {
+		if (np->frag_size)
+			mtu = np->frag_size;
+	}
+	cork->base.fragsize = mtu;
+	if (dst_allfrag(rt->dst.path))
+		cork->base.flags |= IPCORK_ALLFRAG;
+	cork->base.length = 0;
+
+	return 0;
+}
+
 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 	int offset, int len, int odd, struct sk_buff *skb),
 	void *from, int length, int transhdrlen,
@@ -1162,59 +1230,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 		/*
 		 * setup for corking
 		 */
-		if (opt) {
-			if (WARN_ON(np->cork.opt))
-				return -EINVAL;
-
-			np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation);
-			if (unlikely(np->cork.opt == NULL))
-				return -ENOBUFS;
-
-			np->cork.opt->tot_len = opt->tot_len;
-			np->cork.opt->opt_flen = opt->opt_flen;
-			np->cork.opt->opt_nflen = opt->opt_nflen;
-
-			np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
-							    sk->sk_allocation);
-			if (opt->dst0opt && !np->cork.opt->dst0opt)
-				return -ENOBUFS;
-
-			np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
-							    sk->sk_allocation);
-			if (opt->dst1opt && !np->cork.opt->dst1opt)
-				return -ENOBUFS;
-
-			np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
-							   sk->sk_allocation);
-			if (opt->hopopt && !np->cork.opt->hopopt)
-				return -ENOBUFS;
-
-			np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
-							    sk->sk_allocation);
-			if (opt->srcrt && !np->cork.opt->srcrt)
-				return -ENOBUFS;
-
-			/* need source address above miyazawa*/
-		}
-		dst_hold(&rt->dst);
-		cork->dst = &rt->dst;
-		inet->cork.fl.u.ip6 = *fl6;
-		np->cork.hop_limit = hlimit;
-		np->cork.tclass = tclass;
-		if (rt->dst.flags & DST_XFRM_TUNNEL)
-			mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
-			      rt->dst.dev->mtu : dst_mtu(&rt->dst);
-		else
-			mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
-			      rt->dst.dev->mtu : dst_mtu(rt->dst.path);
-		if (np->frag_size < mtu) {
-			if (np->frag_size)
-				mtu = np->frag_size;
-		}
-		cork->fragsize = mtu;
-		if (dst_allfrag(rt->dst.path))
-			cork->flags |= IPCORK_ALLFRAG;
-		cork->length = 0;
+		err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit,
+				     tclass, opt, rt, fl6);
+		if (err)
+			return err;
 		exthdrlen = (opt ? opt->opt_flen : 0);
 		length += exthdrlen;
 		transhdrlen += exthdrlen;
@@ -1226,8 +1245,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 		transhdrlen = 0;
 		exthdrlen = 0;
 		dst_exthdrlen = 0;
-		mtu = cork->fragsize;
 	}
+	mtu = cork->fragsize;
 	orig_mtu = mtu;
 
 	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
@@ -1503,23 +1522,24 @@ error:
 }
 EXPORT_SYMBOL_GPL(ip6_append_data);
 
-static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
+static void ip6_cork_release(struct inet_cork_full *cork,
+			     struct inet6_cork *v6_cork)
 {
-	if (np->cork.opt) {
-		kfree(np->cork.opt->dst0opt);
-		kfree(np->cork.opt->dst1opt);
-		kfree(np->cork.opt->hopopt);
-		kfree(np->cork.opt->srcrt);
-		kfree(np->cork.opt);
-		np->cork.opt = NULL;
+	if (v6_cork->opt) {
+		kfree(v6_cork->opt->dst0opt);
+		kfree(v6_cork->opt->dst1opt);
+		kfree(v6_cork->opt->hopopt);
+		kfree(v6_cork->opt->srcrt);
+		kfree(v6_cork->opt);
+		v6_cork->opt = NULL;
 	}
 
-	if (inet->cork.base.dst) {
-		dst_release(inet->cork.base.dst);
-		inet->cork.base.dst = NULL;
-		inet->cork.base.flags &= ~IPCORK_ALLFRAG;
+	if (cork->base.dst) {
+		dst_release(cork->base.dst);
+		cork->base.dst = NULL;
+		cork->base.flags &= ~IPCORK_ALLFRAG;
 	}
-	memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
+	memset(&cork->fl, 0, sizeof(cork->fl));
 }
 
 int ip6_push_pending_frames(struct sock *sk)
@@ -1599,7 +1619,7 @@ int ip6_push_pending_frames(struct sock *sk)
 	}
 
 out:
-	ip6_cork_release(inet, np);
+	ip6_cork_release(&inet->cork, &np->cork);
 	return err;
 error:
 	IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
@@ -1618,6 +1638,6 @@ void ip6_flush_pending_frames(struct sock *sk)
 		kfree_skb(skb);
 	}
 
-	ip6_cork_release(inet_sk(sk), inet6_sk(sk));
+	ip6_cork_release(&inet_sk(sk)->cork, &inet6_sk(sk)->cork);
 }
 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
-- 
cgit v1.2.3


From 0bbe84a67b0b5460c2e67101b3aa95cb828845f3 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Sat, 31 Jan 2015 10:40:14 -0500
Subject: ipv6: Append sending data to arbitrary queue

Add the ability to append data to arbitrary queue.  This
will be needed later to implement lockless UDP sends.

Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 106 +++++++++++++++++++++++++++++++-------------------
 1 file changed, 67 insertions(+), 39 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index f9f08c43c6e1..1b66453a695d 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1041,6 +1041,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
 
 static inline int ip6_ufo_append_data(struct sock *sk,
+			struct sk_buff_head *queue,
 			int getfrag(void *from, char *to, int offset, int len,
 			int odd, struct sk_buff *skb),
 			void *from, int length, int hh_len, int fragheaderlen,
@@ -1056,7 +1057,7 @@ static inline int ip6_ufo_append_data(struct sock *sk,
 	 * device, so create one single skb packet containing complete
 	 * udp datagram
 	 */
-	skb = skb_peek_tail(&sk->sk_write_queue);
+	skb = skb_peek_tail(queue);
 	if (skb == NULL) {
 		skb = sock_alloc_send_skb(sk,
 			hh_len + fragheaderlen + transhdrlen + 20,
@@ -1079,7 +1080,7 @@ static inline int ip6_ufo_append_data(struct sock *sk,
 		skb->protocol = htons(ETH_P_IPV6);
 		skb->csum = 0;
 
-		__skb_queue_tail(&sk->sk_write_queue, skb);
+		__skb_queue_tail(queue, skb);
 	} else if (skb_is_gso(skb)) {
 		goto append;
 	}
@@ -1203,49 +1204,36 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
 	return 0;
 }
 
-int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
-	int offset, int len, int odd, struct sk_buff *skb),
-	void *from, int length, int transhdrlen,
-	int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
-	struct rt6_info *rt, unsigned int flags, int dontfrag)
+static int __ip6_append_data(struct sock *sk,
+			     struct flowi6 *fl6,
+			     struct sk_buff_head *queue,
+			     struct inet_cork *cork,
+			     struct inet6_cork *v6_cork,
+			     struct page_frag *pfrag,
+			     int getfrag(void *from, char *to, int offset,
+					 int len, int odd, struct sk_buff *skb),
+			     void *from, int length, int transhdrlen,
+			     unsigned int flags, int dontfrag)
 {
-	struct inet_sock *inet = inet_sk(sk);
-	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct inet_cork *cork;
 	struct sk_buff *skb, *skb_prev = NULL;
 	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
-	int exthdrlen;
-	int dst_exthdrlen;
+	int exthdrlen = 0;
+	int dst_exthdrlen = 0;
 	int hh_len;
 	int copy;
 	int err;
 	int offset = 0;
 	__u8 tx_flags = 0;
 	u32 tskey = 0;
+	struct rt6_info *rt = (struct rt6_info *)cork->dst;
+	struct ipv6_txoptions *opt = v6_cork->opt;
 
-	if (flags&MSG_PROBE)
-		return 0;
-	cork = &inet->cork.base;
-	if (skb_queue_empty(&sk->sk_write_queue)) {
-		/*
-		 * setup for corking
-		 */
-		err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit,
-				     tclass, opt, rt, fl6);
-		if (err)
-			return err;
-		exthdrlen = (opt ? opt->opt_flen : 0);
-		length += exthdrlen;
-		transhdrlen += exthdrlen;
+	skb = skb_peek_tail(queue);
+	if (!skb) {
+		exthdrlen = opt ? opt->opt_flen : 0;
 		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
-	} else {
-		rt = (struct rt6_info *)cork->dst;
-		fl6 = &inet->cork.fl.u.ip6;
-		opt = np->cork.opt;
-		transhdrlen = 0;
-		exthdrlen = 0;
-		dst_exthdrlen = 0;
 	}
+
 	mtu = cork->fragsize;
 	orig_mtu = mtu;
 
@@ -1311,13 +1299,12 @@ emsgsize:
 	 * --yoshfuji
 	 */
 
-	skb = skb_peek_tail(&sk->sk_write_queue);
 	cork->length += length;
 	if (((length > mtu) ||
 	     (skb && skb_is_gso(skb))) &&
 	    (sk->sk_protocol == IPPROTO_UDP) &&
 	    (rt->dst.dev->features & NETIF_F_UFO)) {
-		err = ip6_ufo_append_data(sk, getfrag, from, length,
+		err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
 					  hh_len, fragheaderlen,
 					  transhdrlen, mtu, flags, rt);
 		if (err)
@@ -1458,7 +1445,7 @@ alloc_new_skb:
 			/*
 			 * Put the packet on the pending queue
 			 */
-			__skb_queue_tail(&sk->sk_write_queue, skb);
+			__skb_queue_tail(queue, skb);
 			continue;
 		}
 
@@ -1477,7 +1464,6 @@ alloc_new_skb:
 			}
 		} else {
 			int i = skb_shinfo(skb)->nr_frags;
-			struct page_frag *pfrag = sk_page_frag(sk);
 
 			err = -ENOMEM;
 			if (!sk_page_frag_refill(sk, pfrag))
@@ -1520,6 +1506,42 @@ error:
 	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
 	return err;
 }
+
+int ip6_append_data(struct sock *sk,
+		    int getfrag(void *from, char *to, int offset, int len,
+				int odd, struct sk_buff *skb),
+		    void *from, int length, int transhdrlen, int hlimit,
+		    int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
+		    struct rt6_info *rt, unsigned int flags, int dontfrag)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	int exthdrlen;
+	int err;
+
+	if (flags&MSG_PROBE)
+		return 0;
+	if (skb_queue_empty(&sk->sk_write_queue)) {
+		/*
+		 * setup for corking
+		 */
+		err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit,
+				     tclass, opt, rt, fl6);
+		if (err)
+			return err;
+
+		exthdrlen = (opt ? opt->opt_flen : 0);
+		length += exthdrlen;
+		transhdrlen += exthdrlen;
+	} else {
+		fl6 = &inet->cork.fl.u.ip6;
+		transhdrlen = 0;
+	}
+
+	return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
+				 &np->cork, sk_page_frag(sk), getfrag,
+				 from, length, transhdrlen, flags, dontfrag);
+}
 EXPORT_SYMBOL_GPL(ip6_append_data);
 
 static void ip6_cork_release(struct inet_cork_full *cork,
@@ -1627,11 +1649,12 @@ error:
 }
 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
 
-void ip6_flush_pending_frames(struct sock *sk)
+static void __ip6_flush_pending_frames(struct sock *sk,
+				       struct sk_buff_head *queue)
 {
 	struct sk_buff *skb;
 
-	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
+	while ((skb = __skb_dequeue_tail(queue)) != NULL) {
 		if (skb_dst(skb))
 			IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
 				      IPSTATS_MIB_OUTDISCARDS);
@@ -1640,4 +1663,9 @@ void ip6_flush_pending_frames(struct sock *sk)
 
 	ip6_cork_release(&inet_sk(sk)->cork, &inet6_sk(sk)->cork);
 }
+
+void ip6_flush_pending_frames(struct sock *sk)
+{
+	__ip6_flush_pending_frames(sk, &sk->sk_write_queue);
+}
 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
-- 
cgit v1.2.3


From 6422398c2ab09268a55112f98cbf96bbf0184328 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Sat, 31 Jan 2015 10:40:15 -0500
Subject: ipv6: introduce ipv6_make_skb

This commit is very similar to
commit 1c32c5ad6fac8cee1a77449f5abf211e911ff830
Author: Herbert Xu <herbert@gondor.apana.org.au>
Date:   Tue Mar 1 02:36:47 2011 +0000

    inet: Add ip_make_skb and ip_finish_skb

It adds IPv6 version of the helpers ip6_make_skb and ip6_finish_skb.

The job of ip6_make_skb is to collect messages into an ipv6 packet
and poplulate ipv6 eader.  The job of ip6_finish_skb is to transmit
the generated skb.  Together they replicated the job of
ip6_push_pending_frames() while also provide the capability to be
called independently.  This will be needed to add lockless UDP sendmsg
support.

Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h    |  19 ++++++++++
 net/ipv6/ip6_output.c | 103 ++++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 103 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 4292929392b0..8027ca53e31f 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -788,6 +788,25 @@ int ip6_push_pending_frames(struct sock *sk);
 
 void ip6_flush_pending_frames(struct sock *sk);
 
+int ip6_send_skb(struct sk_buff *skb);
+
+struct sk_buff *__ip6_make_skb(struct sock *sk, struct sk_buff_head *queue,
+			       struct inet_cork_full *cork,
+			       struct inet6_cork *v6_cork);
+struct sk_buff *ip6_make_skb(struct sock *sk,
+			     int getfrag(void *from, char *to, int offset,
+					 int len, int odd, struct sk_buff *skb),
+			     void *from, int length, int transhdrlen,
+			     int hlimit, int tclass, struct ipv6_txoptions *opt,
+			     struct flowi6 *fl6, struct rt6_info *rt,
+			     unsigned int flags, int dontfrag);
+
+static inline struct sk_buff *ip6_finish_skb(struct sock *sk)
+{
+	return __ip6_make_skb(sk, &sk->sk_write_queue, &inet_sk(sk)->cork,
+			      &inet6_sk(sk)->cork);
+}
+
 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6);
 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
 				      const struct in6_addr *final_dst);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 1b66453a695d..b89d3c27dac7 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1564,22 +1564,23 @@ static void ip6_cork_release(struct inet_cork_full *cork,
 	memset(&cork->fl, 0, sizeof(cork->fl));
 }
 
-int ip6_push_pending_frames(struct sock *sk)
+struct sk_buff *__ip6_make_skb(struct sock *sk,
+			       struct sk_buff_head *queue,
+			       struct inet_cork_full *cork,
+			       struct inet6_cork *v6_cork)
 {
 	struct sk_buff *skb, *tmp_skb;
 	struct sk_buff **tail_skb;
 	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
-	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct net *net = sock_net(sk);
 	struct ipv6hdr *hdr;
-	struct ipv6_txoptions *opt = np->cork.opt;
-	struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
-	struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
+	struct ipv6_txoptions *opt = v6_cork->opt;
+	struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
+	struct flowi6 *fl6 = &cork->fl.u.ip6;
 	unsigned char proto = fl6->flowi6_proto;
-	int err = 0;
 
-	skb = __skb_dequeue(&sk->sk_write_queue);
+	skb = __skb_dequeue(queue);
 	if (skb == NULL)
 		goto out;
 	tail_skb = &(skb_shinfo(skb)->frag_list);
@@ -1587,7 +1588,7 @@ int ip6_push_pending_frames(struct sock *sk)
 	/* move skb->data to ip header from ext header */
 	if (skb->data < skb_network_header(skb))
 		__skb_pull(skb, skb_network_offset(skb));
-	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
+	while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
 		__skb_pull(tmp_skb, skb_network_header_len(skb));
 		*tail_skb = tmp_skb;
 		tail_skb = &(tmp_skb->next);
@@ -1612,10 +1613,10 @@ int ip6_push_pending_frames(struct sock *sk)
 	skb_reset_network_header(skb);
 	hdr = ipv6_hdr(skb);
 
-	ip6_flow_hdr(hdr, np->cork.tclass,
+	ip6_flow_hdr(hdr, v6_cork->tclass,
 		     ip6_make_flowlabel(net, skb, fl6->flowlabel,
 					np->autoflowlabel));
-	hdr->hop_limit = np->cork.hop_limit;
+	hdr->hop_limit = v6_cork->hop_limit;
 	hdr->nexthdr = proto;
 	hdr->saddr = fl6->saddr;
 	hdr->daddr = *final_dst;
@@ -1632,25 +1633,45 @@ int ip6_push_pending_frames(struct sock *sk)
 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
 	}
 
+	ip6_cork_release(cork, v6_cork);
+out:
+	return skb;
+}
+
+int ip6_send_skb(struct sk_buff *skb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
+	int err;
+
 	err = ip6_local_out(skb);
 	if (err) {
 		if (err > 0)
 			err = net_xmit_errno(err);
 		if (err)
-			goto error;
+			IP6_INC_STATS(net, rt->rt6i_idev,
+				      IPSTATS_MIB_OUTDISCARDS);
 	}
 
-out:
-	ip6_cork_release(&inet->cork, &np->cork);
 	return err;
-error:
-	IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
-	goto out;
+}
+
+int ip6_push_pending_frames(struct sock *sk)
+{
+	struct sk_buff *skb;
+
+	skb = ip6_finish_skb(sk);
+	if (!skb)
+		return 0;
+
+	return ip6_send_skb(skb);
 }
 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
 
 static void __ip6_flush_pending_frames(struct sock *sk,
-				       struct sk_buff_head *queue)
+				       struct sk_buff_head *queue,
+				       struct inet_cork_full *cork,
+				       struct inet6_cork *v6_cork)
 {
 	struct sk_buff *skb;
 
@@ -1661,11 +1682,55 @@ static void __ip6_flush_pending_frames(struct sock *sk,
 		kfree_skb(skb);
 	}
 
-	ip6_cork_release(&inet_sk(sk)->cork, &inet6_sk(sk)->cork);
+	ip6_cork_release(cork, v6_cork);
 }
 
 void ip6_flush_pending_frames(struct sock *sk)
 {
-	__ip6_flush_pending_frames(sk, &sk->sk_write_queue);
+	__ip6_flush_pending_frames(sk, &sk->sk_write_queue,
+				   &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
 }
 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
+
+struct sk_buff *ip6_make_skb(struct sock *sk,
+			     int getfrag(void *from, char *to, int offset,
+					 int len, int odd, struct sk_buff *skb),
+			     void *from, int length, int transhdrlen,
+			     int hlimit, int tclass,
+			     struct ipv6_txoptions *opt, struct flowi6 *fl6,
+			     struct rt6_info *rt, unsigned int flags,
+			     int dontfrag)
+{
+	struct inet_cork_full cork;
+	struct inet6_cork v6_cork;
+	struct sk_buff_head queue;
+	int exthdrlen = (opt ? opt->opt_flen : 0);
+	int err;
+
+	if (flags & MSG_PROBE)
+		return NULL;
+
+	__skb_queue_head_init(&queue);
+
+	cork.base.flags = 0;
+	cork.base.addr = 0;
+	cork.base.opt = NULL;
+	v6_cork.opt = NULL;
+	err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6);
+	if (err)
+		return ERR_PTR(err);
+
+	if (dontfrag < 0)
+		dontfrag = inet6_sk(sk)->dontfrag;
+
+	err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
+				&current->task_frag, getfrag, from,
+				length + exthdrlen, transhdrlen + exthdrlen,
+				flags, dontfrag);
+	if (err) {
+		__ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
+		return ERR_PTR(err);
+	}
+
+	return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
+}
-- 
cgit v1.2.3


From d39d938c8228a4c5860138a53cf2b9ae4c4baec2 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Sat, 31 Jan 2015 10:40:16 -0500
Subject: ipv6: Introduce udpv6_send_skb()

Now that we can individually construct IPv6 skbs to send, add a
udpv6_send_skb() function to populate the udp header and send the
skb.  This allows udp_v6_push_pending_frames() to re-use this
function as well as enables us to add lockless sendmsg() support.

Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/udp.c | 67 +++++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 40 insertions(+), 27 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index e41f017cd479..67a3d70f7ac4 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -990,9 +990,10 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
 {
 	unsigned int offset;
 	struct udphdr *uh = udp_hdr(skb);
+	struct sk_buff *frags = skb_shinfo(skb)->frag_list;
 	__wsum csum = 0;
 
-	if (skb_queue_len(&sk->sk_write_queue) == 1) {
+	if (!frags) {
 		/* Only one fragment on the socket.  */
 		skb->csum_start = skb_transport_header(skb) - skb->head;
 		skb->csum_offset = offsetof(struct udphdr, check);
@@ -1008,9 +1009,9 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
 
 		skb->ip_summed = CHECKSUM_NONE;
 
-		skb_queue_walk(&sk->sk_write_queue, skb) {
-			csum = csum_add(csum, skb->csum);
-		}
+		do {
+			csum = csum_add(csum, frags->csum);
+		} while ((frags = frags->next));
 
 		uh->check = csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP,
 					    csum);
@@ -1023,26 +1024,15 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
  *	Sending
  */
 
-static int udp_v6_push_pending_frames(struct sock *sk)
+static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6)
 {
-	struct sk_buff *skb;
+	struct sock *sk = skb->sk;
 	struct udphdr *uh;
-	struct udp_sock  *up = udp_sk(sk);
-	struct inet_sock *inet = inet_sk(sk);
-	struct flowi6 *fl6;
 	int err = 0;
 	int is_udplite = IS_UDPLITE(sk);
 	__wsum csum = 0;
-
-	if (up->pending == AF_INET)
-		return udp_push_pending_frames(sk);
-
-	fl6 = &inet->cork.fl.u.ip6;
-
-	/* Grab the skbuff where UDP header space exists. */
-	skb = skb_peek(&sk->sk_write_queue);
-	if (skb == NULL)
-		goto out;
+	int offset = skb_transport_offset(skb);
+	int len = skb->len - offset;
 
 	/*
 	 * Create a UDP header
@@ -1050,29 +1040,28 @@ static int udp_v6_push_pending_frames(struct sock *sk)
 	uh = udp_hdr(skb);
 	uh->source = fl6->fl6_sport;
 	uh->dest = fl6->fl6_dport;
-	uh->len = htons(up->len);
+	uh->len = htons(len);
 	uh->check = 0;
 
 	if (is_udplite)
-		csum = udplite_csum_outgoing(sk, skb);
-	else if (up->no_check6_tx) {   /* UDP csum disabled */
+		csum = udplite_csum(skb);
+	else if (udp_sk(sk)->no_check6_tx) {   /* UDP csum disabled */
 		skb->ip_summed = CHECKSUM_NONE;
 		goto send;
 	} else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
-		udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr,
-				     up->len);
+		udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr, len);
 		goto send;
 	} else
-		csum = udp_csum_outgoing(sk, skb);
+		csum = udp_csum(skb);
 
 	/* add protocol-dependent pseudo-header */
 	uh->check = csum_ipv6_magic(&fl6->saddr, &fl6->daddr,
-				    up->len, fl6->flowi6_proto, csum);
+				    len, fl6->flowi6_proto, csum);
 	if (uh->check == 0)
 		uh->check = CSUM_MANGLED_0;
 
 send:
-	err = ip6_push_pending_frames(sk);
+	err = ip6_send_skb(skb);
 	if (err) {
 		if (err == -ENOBUFS && !inet6_sk(sk)->recverr) {
 			UDP6_INC_STATS_USER(sock_net(sk),
@@ -1082,6 +1071,30 @@ send:
 	} else
 		UDP6_INC_STATS_USER(sock_net(sk),
 				    UDP_MIB_OUTDATAGRAMS, is_udplite);
+	return err;
+}
+
+static int udp_v6_push_pending_frames(struct sock *sk)
+{
+	struct sk_buff *skb;
+	struct udp_sock  *up = udp_sk(sk);
+	struct flowi6 fl6;
+	int err = 0;
+
+	if (up->pending == AF_INET)
+		return udp_push_pending_frames(sk);
+
+	/* ip6_finish_skb will release the cork, so make a copy of
+	 * fl6 here.
+	 */
+	fl6 = inet_sk(sk)->cork.fl.u.ip6;
+
+	skb = ip6_finish_skb(sk);
+	if (!skb)
+		goto out;
+
+	err = udp_v6_send_skb(skb, &fl6);
+
 out:
 	up->len = 0;
 	up->pending = 0;
-- 
cgit v1.2.3


From 03485f2adcde0c2d4e9228b659be78e872486bbb Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Sat, 31 Jan 2015 10:40:17 -0500
Subject: udpv6: Add lockless sendmsg() support

This commit adds the same functionaliy to IPv6 that
commit 903ab86d195cca295379699299c5fc10beba31c7
Author: Herbert Xu <herbert@gondor.apana.org.au>
Date:   Tue Mar 1 02:36:48 2011 +0000

    udp: Add lockless transmit path

added to IPv4.

UDP transmit path can now run without a socket lock,
thus allowing multiple threads to send to a single socket
more efficiently.
This is only used when corking/MSG_MORE is not used.

Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/udp.c | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 67a3d70f7ac4..d048d46779fc 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1177,6 +1177,7 @@ do_udp_sendmsg:
 	if (len > INT_MAX - sizeof(struct udphdr))
 		return -EMSGSIZE;
 
+	getfrag  =  is_udplite ?  udplite_getfrag : ip_generic_getfrag;
 	if (up->pending) {
 		/*
 		 * There are pending frames.
@@ -1307,6 +1308,20 @@ do_udp_sendmsg:
 		goto do_confirm;
 back_from_confirm:
 
+	/* Lockless fast path for the non-corking case */
+	if (!corkreq) {
+		struct sk_buff *skb;
+
+		skb = ip6_make_skb(sk, getfrag, msg, ulen,
+				   sizeof(struct udphdr), hlimit, tclass, opt,
+				   &fl6, (struct rt6_info *)dst,
+				   msg->msg_flags, dontfrag);
+		err = PTR_ERR(skb);
+		if (!IS_ERR_OR_NULL(skb))
+			err = udp_v6_send_skb(skb, &fl6);
+		goto release_dst;
+	}
+
 	lock_sock(sk);
 	if (unlikely(up->pending)) {
 		/* The socket is already corked while preparing it. */
@@ -1324,7 +1339,6 @@ do_append_data:
 	if (dontfrag < 0)
 		dontfrag = np->dontfrag;
 	up->len += ulen;
-	getfrag  =  is_udplite ?  udplite_getfrag : ip_generic_getfrag;
 	err = ip6_append_data(sk, getfrag, msg, ulen,
 		sizeof(struct udphdr), hlimit, tclass, opt, &fl6,
 		(struct rt6_info *)dst,
@@ -1336,6 +1350,11 @@ do_append_data:
 	else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
 		up->pending = 0;
 
+	if (err > 0)
+		err = np->recverr ? net_xmit_errno(err) : 0;
+	release_sock(sk);
+
+release_dst:
 	if (dst) {
 		if (connected) {
 			ip6_dst_store(sk, dst,
@@ -1352,9 +1371,6 @@ do_append_data:
 		dst = NULL;
 	}
 
-	if (err > 0)
-		err = np->recverr ? net_xmit_errno(err) : 0;
-	release_sock(sk);
 out:
 	dst_release(dst);
 	fl6_sock_release(flowlabel);
-- 
cgit v1.2.3


From 32dce968dd987adfb0c00946d78dad9154f64759 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Sat, 31 Jan 2015 10:40:18 -0500
Subject: ipv6: Allow for partial checksums on non-ufo packets

Currntly, if we are not doing UFO on the packet, all UDP
packets will start with CHECKSUM_NONE and thus perform full
checksum computations in software even if device support
IPv6 checksum offloading.

Let's start start with CHECKSUM_PARTIAL if the device
supports it and we are sending only a single packet at
or below mtu size.

Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index b89d3c27dac7..1a036f35d833 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1227,6 +1227,7 @@ static int __ip6_append_data(struct sock *sk,
 	u32 tskey = 0;
 	struct rt6_info *rt = (struct rt6_info *)cork->dst;
 	struct ipv6_txoptions *opt = v6_cork->opt;
+	int csummode = CHECKSUM_NONE;
 
 	skb = skb_peek_tail(queue);
 	if (!skb) {
@@ -1283,6 +1284,14 @@ emsgsize:
 			tskey = sk->sk_tskey++;
 	}
 
+	/* If this is the first and only packet and device
+	 * supports checksum offloading, let's use it.
+	 */
+	if (!skb &&
+	    length + fragheaderlen < mtu &&
+	    rt->dst.dev->features & NETIF_F_V6_CSUM &&
+	    !exthdrlen)
+		csummode = CHECKSUM_PARTIAL;
 	/*
 	 * Let's try using as much space as possible.
 	 * Use MTU if total length of the message fits into the MTU.
@@ -1395,7 +1404,7 @@ alloc_new_skb:
 			 *	Fill in the control structures
 			 */
 			skb->protocol = htons(ETH_P_IPV6);
-			skb->ip_summed = CHECKSUM_NONE;
+			skb->ip_summed = csummode;
 			skb->csum = 0;
 			/* reserve for fragmentation and ipsec header */
 			skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
-- 
cgit v1.2.3


From 7d37d0c1591b95ec4e663f7c00eccd5938797a99 Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Sat, 31 Jan 2015 18:10:03 +0100
Subject: net: sctp: Deletion of an unnecessary check before the function call
 "kfree"

The kfree() function tests whether its argument is NULL and then
returns immediately. Thus the test around the call is not needed.

This issue was detected by using the Coccinelle software.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Acked-By: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/associola.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 26d06dbcc1c8..197c3f59ecbf 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -391,8 +391,7 @@ void sctp_association_free(struct sctp_association *asoc)
 	sctp_asconf_queue_teardown(asoc);
 
 	/* Free pending address space being deleted */
-	if (asoc->asconf_addr_del_pending != NULL)
-		kfree(asoc->asconf_addr_del_pending);
+	kfree(asoc->asconf_addr_del_pending);
 
 	/* AUTH - Free the endpoint shared keys */
 	sctp_auth_destroy_keys(&asoc->endpoint_shared_keys);
-- 
cgit v1.2.3


From 88d9077c27d0c1a7c022d9dc987640beecf23560 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Tue, 3 Feb 2015 10:01:13 +0200
Subject: Bluetooth: Fix potential NULL dereference

The bnep_get_device function may be triggered by an ioctl just after a
connection has gone down. In such a case the respective L2CAP chan->conn
pointer will get set to NULL (by l2cap_chan_del). This patch adds a
missing NULL check for this case in the bnep_get_device() function.

Reported-by: Patrik Flykt <patrik.flykt@linux.intel.com>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/bnep/core.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index ce82722d049b..05f57e491ccb 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -511,13 +511,12 @@ static int bnep_session(void *arg)
 
 static struct device *bnep_get_device(struct bnep_session *session)
 {
-	struct hci_conn *conn;
+	struct l2cap_conn *conn = l2cap_pi(session->sock->sk)->chan->conn;
 
-	conn = l2cap_pi(session->sock->sk)->chan->conn->hcon;
-	if (!conn)
+	if (!conn || !conn->hcon)
 		return NULL;
 
-	return &conn->dev;
+	return &conn->hcon->dev;
 }
 
 static struct device_type bnep_type = {
-- 
cgit v1.2.3


From 79044f60caa7c377333dc8f13cf1e48c144e2521 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Thu, 29 Jan 2015 18:26:00 +0200
Subject: net: rfkill: Add Broadcom BCM2E40 bluetooth ACPI ID

This is yet another Broadcom bluetooth chip with ACPI ID BCM2E40.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/rfkill/rfkill-gpio.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index 2a4717967502..a6915b358354 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -170,6 +170,7 @@ static const struct acpi_device_id rfkill_acpi_match[] = {
 	{ "BCM2E1A", RFKILL_TYPE_BLUETOOTH },
 	{ "BCM2E39", RFKILL_TYPE_BLUETOOTH },
 	{ "BCM2E3D", RFKILL_TYPE_BLUETOOTH },
+	{ "BCM2E40", RFKILL_TYPE_BLUETOOTH },
 	{ "BCM2E64", RFKILL_TYPE_BLUETOOTH },
 	{ "BCM4752", RFKILL_TYPE_GPS },
 	{ "LNV4752", RFKILL_TYPE_GPS },
-- 
cgit v1.2.3


From 79b7cf60e190efa8f0689b48a45859f5c3534c76 Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Mon, 2 Feb 2015 10:01:45 +0100
Subject: netlabel: Deletion of an unnecessary check before the function call
 "cipso_v4_doi_putdef"

The cipso_v4_doi_putdef() function tests whether its argument is NULL and then
returns immediately. Thus the test around the call is not needed.

This issue was detected by using the Coccinelle software.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Acked-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlabel/netlabel_mgmt.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 8b3b789c43c2..f5807f57aebc 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -242,8 +242,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
 	return 0;
 
 add_failure:
-	if (cipsov4)
-		cipso_v4_doi_putdef(cipsov4);
+	cipso_v4_doi_putdef(cipsov4);
 	if (entry)
 		kfree(entry->domain);
 	kfree(addrmap);
-- 
cgit v1.2.3


From 7a11b1d303ee97e4880caa3efc2f5cecc28d4ba2 Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Mon, 2 Feb 2015 10:40:30 +0100
Subject: netlabel: Deletion of an unnecessary check before the function call
 "cipso_v4_doi_free"

The cipso_v4_doi_free() function tests whether its argument is NULL and then
returns immediately. Thus the test around the call is not needed.

This issue was detected by using the Coccinelle software.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Acked-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlabel/netlabel_cipso_v4.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index 179625353cac..7fd1104ba900 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -324,8 +324,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info,
 	return 0;
 
 add_std_failure:
-	if (doi_def)
-		cipso_v4_doi_free(doi_def);
+	cipso_v4_doi_free(doi_def);
 	return ret_val;
 }
 
-- 
cgit v1.2.3


From 4de46d5ebc000ea110bcb76a7478c1ac37724221 Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Mon, 2 Feb 2015 11:00:24 +0100
Subject: netlabel: Less function calls in netlbl_mgmt_add_common() after error
 detection

The functions "cipso_v4_doi_putdef" and "kfree" could be called in some cases
by the netlbl_mgmt_add_common() function during error handling even if the
passed variables contained still a null pointer.

* This implementation detail could be improved by adjustments for jump labels.

* Let us return immediately after the first failed function call according to
  the current Linux coding style convention.

* Let us delete also an unnecessary check for the variable "entry" there.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Acked-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlabel/netlabel_mgmt.c | 49 ++++++++++++++++++++++----------------------
 1 file changed, 24 insertions(+), 25 deletions(-)

(limited to 'net')

diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index f5807f57aebc..70440748fe5c 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -93,23 +93,20 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
 				  struct netlbl_audit *audit_info)
 {
 	int ret_val = -EINVAL;
-	struct netlbl_dom_map *entry = NULL;
 	struct netlbl_domaddr_map *addrmap = NULL;
 	struct cipso_v4_doi *cipsov4 = NULL;
 	u32 tmp_val;
+	struct netlbl_dom_map *entry = kzalloc(sizeof(*entry), GFP_KERNEL);
 
-	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
-	if (entry == NULL) {
-		ret_val = -ENOMEM;
-		goto add_failure;
-	}
+	if (!entry)
+		return -ENOMEM;
 	entry->def.type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]);
 	if (info->attrs[NLBL_MGMT_A_DOMAIN]) {
 		size_t tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]);
 		entry->domain = kmalloc(tmp_size, GFP_KERNEL);
 		if (entry->domain == NULL) {
 			ret_val = -ENOMEM;
-			goto add_failure;
+			goto add_free_entry;
 		}
 		nla_strlcpy(entry->domain,
 			    info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size);
@@ -125,16 +122,16 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
 		break;
 	case NETLBL_NLTYPE_CIPSOV4:
 		if (!info->attrs[NLBL_MGMT_A_CV4DOI])
-			goto add_failure;
+			goto add_free_domain;
 
 		tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]);
 		cipsov4 = cipso_v4_doi_getdef(tmp_val);
 		if (cipsov4 == NULL)
-			goto add_failure;
+			goto add_free_domain;
 		entry->def.cipso = cipsov4;
 		break;
 	default:
-		goto add_failure;
+		goto add_free_domain;
 	}
 
 	if (info->attrs[NLBL_MGMT_A_IPV4ADDR]) {
@@ -145,7 +142,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
 		addrmap = kzalloc(sizeof(*addrmap), GFP_KERNEL);
 		if (addrmap == NULL) {
 			ret_val = -ENOMEM;
-			goto add_failure;
+			goto add_doi_put_def;
 		}
 		INIT_LIST_HEAD(&addrmap->list4);
 		INIT_LIST_HEAD(&addrmap->list6);
@@ -153,12 +150,12 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
 		if (nla_len(info->attrs[NLBL_MGMT_A_IPV4ADDR]) !=
 		    sizeof(struct in_addr)) {
 			ret_val = -EINVAL;
-			goto add_failure;
+			goto add_free_addrmap;
 		}
 		if (nla_len(info->attrs[NLBL_MGMT_A_IPV4MASK]) !=
 		    sizeof(struct in_addr)) {
 			ret_val = -EINVAL;
-			goto add_failure;
+			goto add_free_addrmap;
 		}
 		addr = nla_data(info->attrs[NLBL_MGMT_A_IPV4ADDR]);
 		mask = nla_data(info->attrs[NLBL_MGMT_A_IPV4MASK]);
@@ -166,7 +163,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
 		map = kzalloc(sizeof(*map), GFP_KERNEL);
 		if (map == NULL) {
 			ret_val = -ENOMEM;
-			goto add_failure;
+			goto add_free_addrmap;
 		}
 		map->list.addr = addr->s_addr & mask->s_addr;
 		map->list.mask = mask->s_addr;
@@ -178,7 +175,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
 		ret_val = netlbl_af4list_add(&map->list, &addrmap->list4);
 		if (ret_val != 0) {
 			kfree(map);
-			goto add_failure;
+			goto add_free_addrmap;
 		}
 
 		entry->def.type = NETLBL_NLTYPE_ADDRSELECT;
@@ -192,7 +189,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
 		addrmap = kzalloc(sizeof(*addrmap), GFP_KERNEL);
 		if (addrmap == NULL) {
 			ret_val = -ENOMEM;
-			goto add_failure;
+			goto add_doi_put_def;
 		}
 		INIT_LIST_HEAD(&addrmap->list4);
 		INIT_LIST_HEAD(&addrmap->list6);
@@ -200,12 +197,12 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
 		if (nla_len(info->attrs[NLBL_MGMT_A_IPV6ADDR]) !=
 		    sizeof(struct in6_addr)) {
 			ret_val = -EINVAL;
-			goto add_failure;
+			goto add_free_addrmap;
 		}
 		if (nla_len(info->attrs[NLBL_MGMT_A_IPV6MASK]) !=
 		    sizeof(struct in6_addr)) {
 			ret_val = -EINVAL;
-			goto add_failure;
+			goto add_free_addrmap;
 		}
 		addr = nla_data(info->attrs[NLBL_MGMT_A_IPV6ADDR]);
 		mask = nla_data(info->attrs[NLBL_MGMT_A_IPV6MASK]);
@@ -213,7 +210,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
 		map = kzalloc(sizeof(*map), GFP_KERNEL);
 		if (map == NULL) {
 			ret_val = -ENOMEM;
-			goto add_failure;
+			goto add_free_addrmap;
 		}
 		map->list.addr = *addr;
 		map->list.addr.s6_addr32[0] &= mask->s6_addr32[0];
@@ -227,7 +224,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
 		ret_val = netlbl_af6list_add(&map->list, &addrmap->list6);
 		if (ret_val != 0) {
 			kfree(map);
-			goto add_failure;
+			goto add_free_addrmap;
 		}
 
 		entry->def.type = NETLBL_NLTYPE_ADDRSELECT;
@@ -237,15 +234,17 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
 
 	ret_val = netlbl_domhsh_add(entry, audit_info);
 	if (ret_val != 0)
-		goto add_failure;
+		goto add_free_addrmap;
 
 	return 0;
 
-add_failure:
-	cipso_v4_doi_putdef(cipsov4);
-	if (entry)
-		kfree(entry->domain);
+add_free_addrmap:
 	kfree(addrmap);
+add_doi_put_def:
+	cipso_v4_doi_putdef(cipsov4);
+add_free_domain:
+	kfree(entry->domain);
+add_free_entry:
 	kfree(entry);
 	return ret_val;
 }
-- 
cgit v1.2.3


From a8866ff6a5bce7d0ec465a63bc482a85c09b0d39 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 12 Dec 2014 23:02:36 -0500
Subject: netlink: make the check for "send from tx_ring" deterministic

As it is, zero msg_iovlen means that the first iovec in the kernel
array of iovecs is left uninitialized, so checking if its ->iov_base
is NULL is random.  Since the real users of that thing are doing
sendto(fd, NULL, 0, ...), they are getting msg_iovlen = 1 and
msg_iov[0] = {NULL, 0}, which is what this test is trying to catch.
As suggested by davem, let's just check that msg_iovlen was 1 and
msg_iov[0].iov_base was NULL - _that_ is well-defined and it catches
what we want to catch.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/netlink/af_netlink.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index a36777b7cfb6..4fd38a612ff6 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2298,7 +2298,12 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 			goto out;
 	}
 
+	/* It's a really convoluted way for userland to ask for mmaped
+	 * sendmsg(), but that's what we've got...
+	 */
 	if (netlink_tx_is_mmaped(sk) &&
+	    msg->msg_iter.type == ITER_IOVEC &&
+	    msg->msg_iter.nr_segs == 1 &&
 	    msg->msg_iter.iov->iov_base == NULL) {
 		err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group,
 					   &scm);
-- 
cgit v1.2.3


From 7ae9abfd9d6f3216500fc2874254e726cc30ca01 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 27 Nov 2014 19:30:51 -0500
Subject: ipv4: raw_send_hdrinc(): pass msghdr

Switch from passing msg->iov_iter.iov to passing msg itself

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/ipv4/raw.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 0bb68df5055d..2c9d252072a2 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -337,7 +337,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
 }
 
 static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
-			   void *from, size_t length,
+			   struct msghdr *msg, size_t length,
 			   struct rtable **rtp,
 			   unsigned int flags)
 {
@@ -382,7 +382,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
 
 	skb->transport_header = skb->network_header;
 	err = -EFAULT;
-	if (memcpy_fromiovecend((void *)iph, from, 0, length))
+	if (memcpy_fromiovecend((void *)iph, msg->msg_iter.iov, 0, length))
 		goto error_free;
 
 	iphlen = iph->ihl * 4;
@@ -625,8 +625,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 back_from_confirm:
 
 	if (inet->hdrincl)
-		/* XXX: stripping const */
-		err = raw_send_hdrinc(sk, &fl4, (struct iovec *)msg->msg_iter.iov, len,
+		err = raw_send_hdrinc(sk, &fl4, msg, len,
 				      &rt, msg->msg_flags);
 
 	 else {
-- 
cgit v1.2.3


From c3c1a7dbe24282ab8999cb8c964dc6371cde3ea3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 27 Nov 2014 19:36:28 -0500
Subject: ipv6: rawv6_send_hdrinc(): pass msghdr

Switch from passing msg->iov_iter.iov to passing msg itself

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/ipv6/raw.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index ee25631f8c29..0dbb328fa688 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -609,7 +609,7 @@ out:
 	return err;
 }
 
-static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
+static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
 			struct flowi6 *fl6, struct dst_entry **dstp,
 			unsigned int flags)
 {
@@ -648,7 +648,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
 	skb->ip_summed = CHECKSUM_NONE;
 
 	skb->transport_header = skb->network_header;
-	err = memcpy_fromiovecend((void *)iph, from, 0, length);
+	err = memcpy_fromiovecend((void *)iph, msg->msg_iter.iov, 0, length);
 	if (err)
 		goto error_fault;
 
@@ -886,8 +886,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 
 back_from_confirm:
 	if (inet->hdrincl)
-		/* XXX: stripping const */
-		err = rawv6_send_hdrinc(sk, (struct iovec *)msg->msg_iter.iov, len, &fl6, &dst, msg->msg_flags);
+		err = rawv6_send_hdrinc(sk, msg, len, &fl6, &dst, msg->msg_flags);
 	else {
 		lock_sock(sk);
 		err = ip6_append_data(sk, raw6_getfrag, &rfv,
-- 
cgit v1.2.3


From 4c946d9c11d173c2ea6b9081b248f8072e6b46f1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 27 Nov 2014 19:52:04 -0500
Subject: vmci: propagate msghdr all way down to __qp_memcpy_to_queue()

Switch from passing msg->iov_iter.iov to passing msg itself

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/misc/vmw_vmci/vmci_queue_pair.c | 16 ++++++++--------
 include/linux/vmw_vmci_api.h            |  2 +-
 net/vmw_vsock/vmci_transport.c          |  3 +--
 3 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c
index 7aaaf51e1596..35f19a683822 100644
--- a/drivers/misc/vmw_vmci/vmci_queue_pair.c
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c
@@ -370,12 +370,12 @@ static int __qp_memcpy_to_queue(struct vmci_queue *queue,
 			to_copy = size - bytes_copied;
 
 		if (is_iovec) {
-			struct iovec *iov = (struct iovec *)src;
+			struct msghdr *msg = (struct msghdr *)src;
 			int err;
 
 			/* The iovec will track bytes_copied internally. */
-			err = memcpy_fromiovec((u8 *)va + page_offset,
-					       iov, to_copy);
+			err = memcpy_from_msg((u8 *)va + page_offset,
+					      msg, to_copy);
 			if (err != 0) {
 				if (kernel_if->host)
 					kunmap(kernel_if->u.h.page[page_index]);
@@ -580,7 +580,7 @@ static int qp_memcpy_from_queue(void *dest,
  */
 static int qp_memcpy_to_queue_iov(struct vmci_queue *queue,
 				  u64 queue_offset,
-				  const void *src,
+				  const void *msg,
 				  size_t src_offset, size_t size)
 {
 
@@ -588,7 +588,7 @@ static int qp_memcpy_to_queue_iov(struct vmci_queue *queue,
 	 * We ignore src_offset because src is really a struct iovec * and will
 	 * maintain offset internally.
 	 */
-	return __qp_memcpy_to_queue(queue, queue_offset, src, size, true);
+	return __qp_memcpy_to_queue(queue, queue_offset, msg, size, true);
 }
 
 /*
@@ -3223,13 +3223,13 @@ EXPORT_SYMBOL_GPL(vmci_qpair_peek);
  * of bytes enqueued or < 0 on error.
  */
 ssize_t vmci_qpair_enquev(struct vmci_qp *qpair,
-			  void *iov,
+			  struct msghdr *msg,
 			  size_t iov_size,
 			  int buf_type)
 {
 	ssize_t result;
 
-	if (!qpair || !iov)
+	if (!qpair)
 		return VMCI_ERROR_INVALID_ARGS;
 
 	qp_lock(qpair);
@@ -3238,7 +3238,7 @@ ssize_t vmci_qpair_enquev(struct vmci_qp *qpair,
 		result = qp_enqueue_locked(qpair->produce_q,
 					   qpair->consume_q,
 					   qpair->produce_q_size,
-					   iov, iov_size,
+					   msg, iov_size,
 					   qp_memcpy_to_queue_iov);
 
 		if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
diff --git a/include/linux/vmw_vmci_api.h b/include/linux/vmw_vmci_api.h
index 5691f752ce8f..63df3a2a8ce5 100644
--- a/include/linux/vmw_vmci_api.h
+++ b/include/linux/vmw_vmci_api.h
@@ -74,7 +74,7 @@ ssize_t vmci_qpair_dequeue(struct vmci_qp *qpair,
 ssize_t vmci_qpair_peek(struct vmci_qp *qpair, void *buf, size_t buf_size,
 			int mode);
 ssize_t vmci_qpair_enquev(struct vmci_qp *qpair,
-			  void *iov, size_t iov_size, int mode);
+			  struct msghdr *msg, size_t iov_size, int mode);
 ssize_t vmci_qpair_dequev(struct vmci_qp *qpair,
 			  struct msghdr *msg, size_t iov_size, int mode);
 ssize_t vmci_qpair_peekv(struct vmci_qp *qpair, struct msghdr *msg, size_t iov_size,
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 02d2e5229240..7f3255084a6c 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1850,8 +1850,7 @@ static ssize_t vmci_transport_stream_enqueue(
 	struct msghdr *msg,
 	size_t len)
 {
-	/* XXX: stripping const */
-	return vmci_qpair_enquev(vmci_trans(vsk)->qpair, (struct iovec *)msg->msg_iter.iov, len, 0);
+	return vmci_qpair_enquev(vmci_trans(vsk)->qpair, msg, len, 0);
 }
 
 static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk)
-- 
cgit v1.2.3


From af2b040e470b470bfc881981db3c796072853eae Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 27 Nov 2014 21:44:24 -0500
Subject: rxrpc: switch rxrpc_send_data() to iov_iter primitives

Convert skb_add_data() to iov_iter; allows to get rid of the explicit
messing with iovec in its only caller - skb_add_data() will keep advancing
->msg_iter for us, so there's no need to similate that manually.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/skbuff.h | 11 +++++------
 net/rxrpc/ar-output.c  | 43 ++++++++++---------------------------------
 2 files changed, 15 insertions(+), 39 deletions(-)

(limited to 'net')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 85ab7d72b54c..9a8bafee1b67 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2484,19 +2484,18 @@ static inline int skb_put_padto(struct sk_buff *skb, unsigned int len)
 }
 
 static inline int skb_add_data(struct sk_buff *skb,
-			       char __user *from, int copy)
+			       struct iov_iter *from, int copy)
 {
 	const int off = skb->len;
 
 	if (skb->ip_summed == CHECKSUM_NONE) {
-		int err = 0;
-		__wsum csum = csum_and_copy_from_user(from, skb_put(skb, copy),
-							    copy, 0, &err);
-		if (!err) {
+		__wsum csum = 0;
+		if (csum_and_copy_from_iter(skb_put(skb, copy), copy,
+					    &csum, from) == copy) {
 			skb->csum = csum_block_add(skb->csum, csum, off);
 			return 0;
 		}
-	} else if (!copy_from_user(skb_put(skb, copy), from, copy))
+	} else if (copy_from_iter(skb_put(skb, copy), copy, from) == copy)
 		return 0;
 
 	__skb_trim(skb, off);
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index e1a9373e5979..963a5b91f3e8 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -529,13 +529,11 @@ static int rxrpc_send_data(struct kiocb *iocb,
 			   struct msghdr *msg, size_t len)
 {
 	struct rxrpc_skb_priv *sp;
-	unsigned char __user *from;
 	struct sk_buff *skb;
-	const struct iovec *iov;
 	struct sock *sk = &rx->sk;
 	long timeo;
 	bool more;
-	int ret, ioc, segment, copied;
+	int ret, copied;
 
 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
 
@@ -545,25 +543,17 @@ static int rxrpc_send_data(struct kiocb *iocb,
 	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
 		return -EPIPE;
 
-	iov = msg->msg_iter.iov;
-	ioc = msg->msg_iter.nr_segs - 1;
-	from = iov->iov_base;
-	segment = iov->iov_len;
-	iov++;
 	more = msg->msg_flags & MSG_MORE;
 
 	skb = call->tx_pending;
 	call->tx_pending = NULL;
 
 	copied = 0;
-	do {
+	if (len > iov_iter_count(&msg->msg_iter))
+		len = iov_iter_count(&msg->msg_iter);
+	while (len) {
 		int copy;
 
-		if (segment > len)
-			segment = len;
-
-		_debug("SEGMENT %d @%p", segment, from);
-
 		if (!skb) {
 			size_t size, chunk, max, space;
 
@@ -631,13 +621,13 @@ static int rxrpc_send_data(struct kiocb *iocb,
 		/* append next segment of data to the current buffer */
 		copy = skb_tailroom(skb);
 		ASSERTCMP(copy, >, 0);
-		if (copy > segment)
-			copy = segment;
+		if (copy > len)
+			copy = len;
 		if (copy > sp->remain)
 			copy = sp->remain;
 
 		_debug("add");
-		ret = skb_add_data(skb, from, copy);
+		ret = skb_add_data(skb, &msg->msg_iter, copy);
 		_debug("added");
 		if (ret < 0)
 			goto efault;
@@ -646,18 +636,6 @@ static int rxrpc_send_data(struct kiocb *iocb,
 		copied += copy;
 
 		len -= copy;
-		segment -= copy;
-		from += copy;
-		while (segment == 0 && ioc > 0) {
-			from = iov->iov_base;
-			segment = iov->iov_len;
-			iov++;
-			ioc--;
-		}
-		if (len == 0) {
-			segment = 0;
-			ioc = 0;
-		}
 
 		/* check for the far side aborting the call or a network error
 		 * occurring */
@@ -665,7 +643,7 @@ static int rxrpc_send_data(struct kiocb *iocb,
 			goto call_aborted;
 
 		/* add the packet to the send queue if it's now full */
-		if (sp->remain <= 0 || (segment == 0 && !more)) {
+		if (sp->remain <= 0 || (!len && !more)) {
 			struct rxrpc_connection *conn = call->conn;
 			uint32_t seq;
 			size_t pad;
@@ -711,11 +689,10 @@ static int rxrpc_send_data(struct kiocb *iocb,
 
 			memcpy(skb->head, &sp->hdr,
 			       sizeof(struct rxrpc_header));
-			rxrpc_queue_packet(call, skb, segment == 0 && !more);
+			rxrpc_queue_packet(call, skb, !iov_iter_count(&msg->msg_iter) && !more);
 			skb = NULL;
 		}
-
-	} while (segment > 0);
+	}
 
 success:
 	ret = copied;
-- 
cgit v1.2.3


From 2e90b1c45e34240eeeacab0b37d5f8f739462bdc Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 27 Nov 2014 21:50:31 -0500
Subject: rxrpc: make the users of rxrpc_kernel_send_data() set kvec-backed
 msg_iter properly

Use iov_iter_kvec() there, get rid of set_fs() games - now that
rxrpc_send_data() uses iov_iter primitives, it'll handle ITER_KVEC just
fine.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/afs/rxrpc.c        | 14 +++++++-------
 net/rxrpc/ar-output.c |  3 ---
 2 files changed, 7 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 06e14bfb3496..dbc732e9a5c0 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -306,8 +306,8 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg,
 
 			_debug("- range %u-%u%s",
 			       offset, to, msg->msg_flags ? " [more]" : "");
-			iov_iter_init(&msg->msg_iter, WRITE,
-				      (struct iovec *) iov, 1, to - offset);
+			iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC,
+				      iov, 1, to - offset);
 
 			/* have to change the state *before* sending the last
 			 * packet as RxRPC might give us the reply before it
@@ -384,7 +384,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 
 	msg.msg_name		= NULL;
 	msg.msg_namelen		= 0;
-	iov_iter_init(&msg.msg_iter, WRITE, (struct iovec *)iov, 1,
+	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 1,
 		      call->request_size);
 	msg.msg_control		= NULL;
 	msg.msg_controllen	= 0;
@@ -770,7 +770,7 @@ static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb,
 void afs_send_empty_reply(struct afs_call *call)
 {
 	struct msghdr msg;
-	struct iovec iov[1];
+	struct kvec iov[1];
 
 	_enter("");
 
@@ -778,7 +778,7 @@ void afs_send_empty_reply(struct afs_call *call)
 	iov[0].iov_len		= 0;
 	msg.msg_name		= NULL;
 	msg.msg_namelen		= 0;
-	iov_iter_init(&msg.msg_iter, WRITE, iov, 0, 0);	/* WTF? */
+	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 0, 0);	/* WTF? */
 	msg.msg_control		= NULL;
 	msg.msg_controllen	= 0;
 	msg.msg_flags		= 0;
@@ -805,7 +805,7 @@ void afs_send_empty_reply(struct afs_call *call)
 void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
 {
 	struct msghdr msg;
-	struct iovec iov[1];
+	struct kvec iov[1];
 	int n;
 
 	_enter("");
@@ -814,7 +814,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
 	iov[0].iov_len		= len;
 	msg.msg_name		= NULL;
 	msg.msg_namelen		= 0;
-	iov_iter_init(&msg.msg_iter, WRITE, iov, 1, len);
+	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 1, len);
 	msg.msg_control		= NULL;
 	msg.msg_controllen	= 0;
 	msg.msg_flags		= 0;
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index 963a5b91f3e8..8331c95e1522 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -232,10 +232,7 @@ int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg,
 		   call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
 		ret = -EPROTO; /* request phase complete for this client call */
 	} else {
-		mm_segment_t oldfs = get_fs();
-		set_fs(KERNEL_DS);
 		ret = rxrpc_send_data(NULL, call->socket, call, msg, len);
-		set_fs(oldfs);
 	}
 
 	release_sock(&call->socket->sk);
-- 
cgit v1.2.3


From cacdc7d2f9fa42e29b650e2879df42ea7d7833c1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 27 Nov 2014 20:34:16 -0500
Subject: ip: stash a pointer to msghdr in struct ping_fakehdr

... instead of storing its ->mgs_iter.iov there

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/net/ping.h | 2 +-
 net/ipv4/ping.c    | 7 +++----
 net/ipv6/ping.c    | 3 +--
 3 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/include/net/ping.h b/include/net/ping.h
index f074060bc5de..cc16d413f681 100644
--- a/include/net/ping.h
+++ b/include/net/ping.h
@@ -59,7 +59,7 @@ extern struct pingv6_ops pingv6_ops;
 
 struct pingfakehdr {
 	struct icmphdr icmph;
-	struct iovec *iov;
+	struct msghdr *msg;
 	sa_family_t family;
 	__wsum wcheck;
 };
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 2a3720fb5a5f..9e15ba701401 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -602,14 +602,14 @@ int ping_getfrag(void *from, char *to,
 		if (fraglen < sizeof(struct icmphdr))
 			BUG();
 		if (csum_partial_copy_fromiovecend(to + sizeof(struct icmphdr),
-			    pfh->iov, 0, fraglen - sizeof(struct icmphdr),
+			    pfh->msg->msg_iter.iov, 0, fraglen - sizeof(struct icmphdr),
 			    &pfh->wcheck))
 			return -EFAULT;
 	} else if (offset < sizeof(struct icmphdr)) {
 			BUG();
 	} else {
 		if (csum_partial_copy_fromiovecend
-				(to, pfh->iov, offset - sizeof(struct icmphdr),
+				(to, pfh->msg->msg_iter.iov, offset - sizeof(struct icmphdr),
 				 fraglen, &pfh->wcheck))
 			return -EFAULT;
 	}
@@ -811,8 +811,7 @@ back_from_confirm:
 	pfh.icmph.checksum = 0;
 	pfh.icmph.un.echo.id = inet->inet_sport;
 	pfh.icmph.un.echo.sequence = user_icmph.un.echo.sequence;
-	/* XXX: stripping const */
-	pfh.iov = (struct iovec *)msg->msg_iter.iov;
+	pfh.msg = msg;
 	pfh.wcheck = 0;
 	pfh.family = AF_INET;
 
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 2d3148378a1f..bd46f736f61d 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -163,8 +163,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	pfh.icmph.checksum = 0;
 	pfh.icmph.un.echo.id = inet->inet_sport;
 	pfh.icmph.un.echo.sequence = user_icmph.icmp6_sequence;
-	/* XXX: stripping const */
-	pfh.iov = (struct iovec *)msg->msg_iter.iov;
+	pfh.msg = msg;
 	pfh.wcheck = 0;
 	pfh.family = AF_INET6;
 
-- 
cgit v1.2.3


From 57be5bdad759b9dde8b0d0cc630782a1a4ac4b9f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 28 Nov 2014 13:40:20 -0500
Subject: ip: convert tcp_sendmsg() to iov_iter primitives

patch is actually smaller than it seems to be - most of it is unindenting
the inner loop body in tcp_sendmsg() itself...

the bit in tcp_input.c is going to get reverted very soon - that's what
memcpy_from_msg() will become, but not in this commit; let's keep it
reasonably contained...

There's one potentially subtle change here: in case of short copy from
userland, mainline tcp_send_syn_data() discards the skb it has allocated
and falls back to normal path, where we'll send as much as possible after
rereading the same data again.  This patch trims SYN+data skb instead -
that way we don't need to copy from the same place twice.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/net/sock.h    |  18 ++--
 net/ipv4/tcp.c        | 233 +++++++++++++++++++++++---------------------------
 net/ipv4/tcp_input.c  |   2 +-
 net/ipv4/tcp_output.c |  11 ++-
 4 files changed, 123 insertions(+), 141 deletions(-)

(limited to 'net')

diff --git a/include/net/sock.h b/include/net/sock.h
index 15341499786c..1e45e599a3ab 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1803,27 +1803,25 @@ static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags)
 }
 
 static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb,
-					   char __user *from, char *to,
+					   struct iov_iter *from, char *to,
 					   int copy, int offset)
 {
 	if (skb->ip_summed == CHECKSUM_NONE) {
-		int err = 0;
-		__wsum csum = csum_and_copy_from_user(from, to, copy, 0, &err);
-		if (err)
-			return err;
+		__wsum csum = 0;
+		if (csum_and_copy_from_iter(to, copy, &csum, from) != copy)
+			return -EFAULT;
 		skb->csum = csum_block_add(skb->csum, csum, offset);
 	} else if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) {
-		if (!access_ok(VERIFY_READ, from, copy) ||
-		    __copy_from_user_nocache(to, from, copy))
+		if (copy_from_iter_nocache(to, copy, from) != copy)
 			return -EFAULT;
-	} else if (copy_from_user(to, from, copy))
+	} else if (copy_from_iter(to, copy, from) != copy)
 		return -EFAULT;
 
 	return 0;
 }
 
 static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb,
-				       char __user *from, int copy)
+				       struct iov_iter *from, int copy)
 {
 	int err, offset = skb->len;
 
@@ -1835,7 +1833,7 @@ static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb,
 	return err;
 }
 
-static inline int skb_copy_to_page_nocache(struct sock *sk, char __user *from,
+static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *from,
 					   struct sk_buff *skb,
 					   struct page *page,
 					   int off, int copy)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3075723c729b..9d72a0fcd928 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1067,11 +1067,10 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
 int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		size_t size)
 {
-	const struct iovec *iov;
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
-	int iovlen, flags, err, copied = 0;
-	int mss_now = 0, size_goal, copied_syn = 0, offset = 0;
+	int flags, err, copied = 0;
+	int mss_now = 0, size_goal, copied_syn = 0;
 	bool sg;
 	long timeo;
 
@@ -1084,7 +1083,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 			goto out;
 		else if (err)
 			goto out_err;
-		offset = copied_syn;
 	}
 
 	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
@@ -1118,8 +1116,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	mss_now = tcp_send_mss(sk, &size_goal, flags);
 
 	/* Ok commence sending. */
-	iovlen = msg->msg_iter.nr_segs;
-	iov = msg->msg_iter.iov;
 	copied = 0;
 
 	err = -EPIPE;
@@ -1128,151 +1124,134 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
 	sg = !!(sk->sk_route_caps & NETIF_F_SG);
 
-	while (--iovlen >= 0) {
-		size_t seglen = iov->iov_len;
-		unsigned char __user *from = iov->iov_base;
+	while (iov_iter_count(&msg->msg_iter)) {
+		int copy = 0;
+		int max = size_goal;
 
-		iov++;
-		if (unlikely(offset > 0)) {  /* Skip bytes copied in SYN */
-			if (offset >= seglen) {
-				offset -= seglen;
-				continue;
-			}
-			seglen -= offset;
-			from += offset;
-			offset = 0;
+		skb = tcp_write_queue_tail(sk);
+		if (tcp_send_head(sk)) {
+			if (skb->ip_summed == CHECKSUM_NONE)
+				max = mss_now;
+			copy = max - skb->len;
 		}
 
-		while (seglen > 0) {
-			int copy = 0;
-			int max = size_goal;
-
-			skb = tcp_write_queue_tail(sk);
-			if (tcp_send_head(sk)) {
-				if (skb->ip_summed == CHECKSUM_NONE)
-					max = mss_now;
-				copy = max - skb->len;
-			}
-
-			if (copy <= 0) {
+		if (copy <= 0) {
 new_segment:
-				/* Allocate new segment. If the interface is SG,
-				 * allocate skb fitting to single page.
-				 */
-				if (!sk_stream_memory_free(sk))
-					goto wait_for_sndbuf;
+			/* Allocate new segment. If the interface is SG,
+			 * allocate skb fitting to single page.
+			 */
+			if (!sk_stream_memory_free(sk))
+				goto wait_for_sndbuf;
 
-				skb = sk_stream_alloc_skb(sk,
-							  select_size(sk, sg),
-							  sk->sk_allocation);
-				if (!skb)
-					goto wait_for_memory;
+			skb = sk_stream_alloc_skb(sk,
+						  select_size(sk, sg),
+						  sk->sk_allocation);
+			if (!skb)
+				goto wait_for_memory;
 
-				/*
-				 * Check whether we can use HW checksum.
-				 */
-				if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
-					skb->ip_summed = CHECKSUM_PARTIAL;
+			/*
+			 * Check whether we can use HW checksum.
+			 */
+			if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
+				skb->ip_summed = CHECKSUM_PARTIAL;
 
-				skb_entail(sk, skb);
-				copy = size_goal;
-				max = size_goal;
+			skb_entail(sk, skb);
+			copy = size_goal;
+			max = size_goal;
 
-				/* All packets are restored as if they have
-				 * already been sent. skb_mstamp isn't set to
-				 * avoid wrong rtt estimation.
-				 */
-				if (tp->repair)
-					TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED;
-			}
+			/* All packets are restored as if they have
+			 * already been sent. skb_mstamp isn't set to
+			 * avoid wrong rtt estimation.
+			 */
+			if (tp->repair)
+				TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED;
+		}
 
-			/* Try to append data to the end of skb. */
-			if (copy > seglen)
-				copy = seglen;
-
-			/* Where to copy to? */
-			if (skb_availroom(skb) > 0) {
-				/* We have some space in skb head. Superb! */
-				copy = min_t(int, copy, skb_availroom(skb));
-				err = skb_add_data_nocache(sk, skb, from, copy);
-				if (err)
-					goto do_fault;
-			} else {
-				bool merge = true;
-				int i = skb_shinfo(skb)->nr_frags;
-				struct page_frag *pfrag = sk_page_frag(sk);
-
-				if (!sk_page_frag_refill(sk, pfrag))
-					goto wait_for_memory;
-
-				if (!skb_can_coalesce(skb, i, pfrag->page,
-						      pfrag->offset)) {
-					if (i == MAX_SKB_FRAGS || !sg) {
-						tcp_mark_push(tp, skb);
-						goto new_segment;
-					}
-					merge = false;
-				}
+		/* Try to append data to the end of skb. */
+		if (copy > iov_iter_count(&msg->msg_iter))
+			copy = iov_iter_count(&msg->msg_iter);
+
+		/* Where to copy to? */
+		if (skb_availroom(skb) > 0) {
+			/* We have some space in skb head. Superb! */
+			copy = min_t(int, copy, skb_availroom(skb));
+			err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy);
+			if (err)
+				goto do_fault;
+		} else {
+			bool merge = true;
+			int i = skb_shinfo(skb)->nr_frags;
+			struct page_frag *pfrag = sk_page_frag(sk);
+
+			if (!sk_page_frag_refill(sk, pfrag))
+				goto wait_for_memory;
 
-				copy = min_t(int, copy, pfrag->size - pfrag->offset);
-
-				if (!sk_wmem_schedule(sk, copy))
-					goto wait_for_memory;
-
-				err = skb_copy_to_page_nocache(sk, from, skb,
-							       pfrag->page,
-							       pfrag->offset,
-							       copy);
-				if (err)
-					goto do_error;
-
-				/* Update the skb. */
-				if (merge) {
-					skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
-				} else {
-					skb_fill_page_desc(skb, i, pfrag->page,
-							   pfrag->offset, copy);
-					get_page(pfrag->page);
+			if (!skb_can_coalesce(skb, i, pfrag->page,
+					      pfrag->offset)) {
+				if (i == MAX_SKB_FRAGS || !sg) {
+					tcp_mark_push(tp, skb);
+					goto new_segment;
 				}
-				pfrag->offset += copy;
+				merge = false;
 			}
 
-			if (!copied)
-				TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
+			copy = min_t(int, copy, pfrag->size - pfrag->offset);
 
-			tp->write_seq += copy;
-			TCP_SKB_CB(skb)->end_seq += copy;
-			tcp_skb_pcount_set(skb, 0);
+			if (!sk_wmem_schedule(sk, copy))
+				goto wait_for_memory;
 
-			from += copy;
-			copied += copy;
-			if ((seglen -= copy) == 0 && iovlen == 0) {
-				tcp_tx_timestamp(sk, skb);
-				goto out;
+			err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb,
+						       pfrag->page,
+						       pfrag->offset,
+						       copy);
+			if (err)
+				goto do_error;
+
+			/* Update the skb. */
+			if (merge) {
+				skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
+			} else {
+				skb_fill_page_desc(skb, i, pfrag->page,
+						   pfrag->offset, copy);
+				get_page(pfrag->page);
 			}
+			pfrag->offset += copy;
+		}
 
-			if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair))
-				continue;
+		if (!copied)
+			TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
+
+		tp->write_seq += copy;
+		TCP_SKB_CB(skb)->end_seq += copy;
+		tcp_skb_pcount_set(skb, 0);
+
+		copied += copy;
+		if (!iov_iter_count(&msg->msg_iter)) {
+			tcp_tx_timestamp(sk, skb);
+			goto out;
+		}
 
-			if (forced_push(tp)) {
-				tcp_mark_push(tp, skb);
-				__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
-			} else if (skb == tcp_send_head(sk))
-				tcp_push_one(sk, mss_now);
+		if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair))
 			continue;
 
+		if (forced_push(tp)) {
+			tcp_mark_push(tp, skb);
+			__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
+		} else if (skb == tcp_send_head(sk))
+			tcp_push_one(sk, mss_now);
+		continue;
+
 wait_for_sndbuf:
-			set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 wait_for_memory:
-			if (copied)
-				tcp_push(sk, flags & ~MSG_MORE, mss_now,
-					 TCP_NAGLE_PUSH, size_goal);
+		if (copied)
+			tcp_push(sk, flags & ~MSG_MORE, mss_now,
+				 TCP_NAGLE_PUSH, size_goal);
 
-			if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
-				goto do_error;
+		if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
+			goto do_error;
 
-			mss_now = tcp_send_mss(sk, &size_goal, flags);
-		}
+		mss_now = tcp_send_mss(sk, &size_goal, flags);
 	}
 
 out:
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 71fb37c70581..93c74829cbce 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4368,7 +4368,7 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
 	if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
 		goto err_free;
 
-	if (memcpy_from_msg(skb_put(skb, size), msg, size))
+	if (copy_from_iter(skb_put(skb, size), size, &msg->msg_iter) != size)
 		goto err_free;
 
 	TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 20ab06b228ac..722c8bceaf9a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3055,7 +3055,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_fastopen_request *fo = tp->fastopen_req;
-	int syn_loss = 0, space, err = 0;
+	int syn_loss = 0, space, err = 0, copied;
 	unsigned long last_syn_loss = 0;
 	struct sk_buff *syn_data;
 
@@ -3093,11 +3093,16 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
 		goto fallback;
 	syn_data->ip_summed = CHECKSUM_PARTIAL;
 	memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
-	if (unlikely(memcpy_fromiovecend(skb_put(syn_data, space),
-					 fo->data->msg_iter.iov, 0, space))) {
+	copied = copy_from_iter(skb_put(syn_data, space), space,
+				&fo->data->msg_iter);
+	if (unlikely(!copied)) {
 		kfree_skb(syn_data);
 		goto fallback;
 	}
+	if (copied != space) {
+		skb_trim(syn_data, copied);
+		space = copied;
+	}
 
 	/* No more data pending in inet_wait_for_connect() */
 	if (space == fo->size)
-- 
cgit v1.2.3


From 21226abb4e9f14d88238964d89b279e461ddc30c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 28 Nov 2014 15:48:29 -0500
Subject: net: switch memcpy_fromiovec()/memcpy_fromiovecend() users to
 copy_from_iter()

That takes care of the majority of ->sendmsg() instances - most of them
via memcpy_to_msg() or assorted getfrag() callbacks.  One place where we
still keep memcpy_fromiovecend() is tipc - there we potentially read the
same data over and over; separate patch, that...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/skbuff.h |  3 +--
 include/net/udplite.h  |  3 +--
 net/ipv4/ip_output.c   |  6 ++----
 net/ipv4/ping.c        | 14 +++++++-------
 net/ipv4/raw.c         |  2 +-
 net/ipv4/tcp_input.c   |  2 +-
 net/ipv6/raw.c         |  2 +-
 7 files changed, 14 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9a8bafee1b67..b349c96dc80a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2692,8 +2692,7 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci);
 
 static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len)
 {
-	/* XXX: stripping const */
-	return memcpy_fromiovec(data, (struct iovec *)msg->msg_iter.iov, len);
+	return copy_from_iter(data, len, &msg->msg_iter) == len ? 0 : -EFAULT;
 }
 
 static inline int memcpy_to_msg(struct msghdr *msg, void *data, int len)
diff --git a/include/net/udplite.h b/include/net/udplite.h
index ae7c8d1fbcad..80761938b9a7 100644
--- a/include/net/udplite.h
+++ b/include/net/udplite.h
@@ -20,8 +20,7 @@ static __inline__ int udplite_getfrag(void *from, char *to, int  offset,
 				      int len, int odd, struct sk_buff *skb)
 {
 	struct msghdr *msg = from;
-	/* XXX: stripping const */
-	return memcpy_fromiovecend(to, (struct iovec *)msg->msg_iter.iov, offset, len);
+	return copy_from_iter(to, len, &msg->msg_iter) != len ? -EFAULT : 0;
 }
 
 /* Designate sk as UDP-Lite socket */
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index b50861b22b6b..f998bc87ae38 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -755,13 +755,11 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk
 	struct msghdr *msg = from;
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		/* XXX: stripping const */
-		if (memcpy_fromiovecend(to, (struct iovec *)msg->msg_iter.iov, offset, len) < 0)
+		if (copy_from_iter(to, len, &msg->msg_iter) != len)
 			return -EFAULT;
 	} else {
 		__wsum csum = 0;
-		/* XXX: stripping const */
-		if (csum_partial_copy_fromiovecend(to, (struct iovec *)msg->msg_iter.iov, offset, len, &csum) < 0)
+		if (csum_and_copy_from_iter(to, len, &csum, &msg->msg_iter) != len)
 			return -EFAULT;
 		skb->csum = csum_block_add(skb->csum, csum, odd);
 	}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 9e15ba701401..e9f66e1cda50 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -599,18 +599,18 @@ int ping_getfrag(void *from, char *to,
 	struct pingfakehdr *pfh = (struct pingfakehdr *)from;
 
 	if (offset == 0) {
-		if (fraglen < sizeof(struct icmphdr))
+		fraglen -= sizeof(struct icmphdr);
+		if (fraglen < 0)
 			BUG();
-		if (csum_partial_copy_fromiovecend(to + sizeof(struct icmphdr),
-			    pfh->msg->msg_iter.iov, 0, fraglen - sizeof(struct icmphdr),
-			    &pfh->wcheck))
+		if (csum_and_copy_from_iter(to + sizeof(struct icmphdr),
+			    fraglen, &pfh->wcheck,
+			    &pfh->msg->msg_iter) != fraglen)
 			return -EFAULT;
 	} else if (offset < sizeof(struct icmphdr)) {
 			BUG();
 	} else {
-		if (csum_partial_copy_fromiovecend
-				(to, pfh->msg->msg_iter.iov, offset - sizeof(struct icmphdr),
-				 fraglen, &pfh->wcheck))
+		if (csum_and_copy_from_iter(to, fraglen, &pfh->wcheck,
+					    &pfh->msg->msg_iter) != fraglen)
 			return -EFAULT;
 	}
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 2c9d252072a2..f027a708b7e0 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -382,7 +382,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
 
 	skb->transport_header = skb->network_header;
 	err = -EFAULT;
-	if (memcpy_fromiovecend((void *)iph, msg->msg_iter.iov, 0, length))
+	if (memcpy_from_msg(iph, msg, length))
 		goto error_free;
 
 	iphlen = iph->ihl * 4;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 93c74829cbce..71fb37c70581 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4368,7 +4368,7 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
 	if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
 		goto err_free;
 
-	if (copy_from_iter(skb_put(skb, size), size, &msg->msg_iter) != size)
+	if (memcpy_from_msg(skb_put(skb, size), msg, size))
 		goto err_free;
 
 	TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 0dbb328fa688..dae7f1a1e464 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -648,7 +648,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
 	skb->ip_summed = CHECKSUM_NONE;
 
 	skb->transport_header = skb->network_header;
-	err = memcpy_fromiovecend((void *)iph, msg->msg_iter.iov, 0, length);
+	err = memcpy_from_msg(iph, msg, length);
 	if (err)
 		goto error_fault;
 
-- 
cgit v1.2.3


From f25dcc7687d42a72de18aa41b04990a24c9e77c7 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 28 Nov 2014 15:52:29 -0500
Subject: tipc: tipc ->sendmsg() conversion

This one needs to copy the same data from user potentially more than
once.  Sadly, MTU changes can trigger that ;-/

Cc: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/tipc/msg.c    |  7 ++-----
 net/tipc/socket.c | 14 ++++++++++++--
 2 files changed, 14 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 18aba9e99345..da67c8d3edc6 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -189,7 +189,6 @@ err:
  * tipc_msg_build - create buffer chain containing specified header and data
  * @mhdr: Message header, to be prepended to data
  * @m: User message
- * @offset: Posision in iov to start copying from
  * @dsz: Total length of user data
  * @pktmax: Max packet size that can be used
  * @list: Buffer or chain of buffers to be returned to caller
@@ -221,8 +220,7 @@ int tipc_msg_build(struct net *net, struct tipc_msg *mhdr, struct msghdr *m,
 		__skb_queue_tail(list, skb);
 		skb_copy_to_linear_data(skb, mhdr, mhsz);
 		pktpos = skb->data + mhsz;
-		if (!dsz || !memcpy_fromiovecend(pktpos, m->msg_iter.iov, offset,
-						 dsz))
+		if (copy_from_iter(pktpos, dsz, &m->msg_iter) == dsz)
 			return dsz;
 		rc = -EFAULT;
 		goto error;
@@ -252,12 +250,11 @@ int tipc_msg_build(struct net *net, struct tipc_msg *mhdr, struct msghdr *m,
 		if (drem < pktrem)
 			pktrem = drem;
 
-		if (memcpy_fromiovecend(pktpos, m->msg_iter.iov, offset, pktrem)) {
+		if (copy_from_iter(pktpos, pktrem, &m->msg_iter) != pktrem) {
 			rc = -EFAULT;
 			goto error;
 		}
 		drem -= pktrem;
-		offset += pktrem;
 
 		if (!drem)
 			break;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 679a22082fcb..caa4d663fd90 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -733,6 +733,7 @@ static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
 	struct net *net = sock_net(sk);
 	struct tipc_msg *mhdr = &tipc_sk(sk)->phdr;
 	struct sk_buff_head head;
+	struct iov_iter save = msg->msg_iter;
 	uint mtu;
 	int rc;
 
@@ -758,8 +759,10 @@ new_mtu:
 			rc = dsz;
 			break;
 		}
-		if (rc == -EMSGSIZE)
+		if (rc == -EMSGSIZE) {
+			msg->msg_iter = save;
 			goto new_mtu;
+		}
 		if (rc != -ELINKCONG)
 			break;
 		tipc_sk(sk)->link_cong = 1;
@@ -895,6 +898,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock,
 	struct sk_buff_head head;
 	struct sk_buff *skb;
 	struct tipc_name_seq *seq = &dest->addr.nameseq;
+	struct iov_iter save;
 	u32 mtu;
 	long timeo;
 	int rc;
@@ -963,6 +967,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock,
 		msg_set_hdr_sz(mhdr, BASIC_H_SIZE);
 	}
 
+	save = m->msg_iter;
 new_mtu:
 	mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
 	__skb_queue_head_init(&head);
@@ -980,8 +985,10 @@ new_mtu:
 			rc = dsz;
 			break;
 		}
-		if (rc == -EMSGSIZE)
+		if (rc == -EMSGSIZE) {
+			m->msg_iter = save;
 			goto new_mtu;
+		}
 		if (rc != -ELINKCONG)
 			break;
 		tsk->link_cong = 1;
@@ -1052,6 +1059,7 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock,
 	long timeo;
 	u32 dnode;
 	uint mtu, send, sent = 0;
+	struct iov_iter save;
 
 	/* Handle implied connection establishment */
 	if (unlikely(dest)) {
@@ -1078,6 +1086,7 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock,
 	dnode = tsk_peer_node(tsk);
 
 next:
+	save = m->msg_iter;
 	mtu = tsk->max_pkt;
 	send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE);
 	__skb_queue_head_init(&head);
@@ -1097,6 +1106,7 @@ next:
 			if (rc == -EMSGSIZE) {
 				tsk->max_pkt = tipc_node_get_mtu(net, dnode,
 								 portid);
+				m->msg_iter = save;
 				goto next;
 			}
 			if (rc != -ELINKCONG)
-- 
cgit v1.2.3


From 31a25fae85956e3a9c778141d29e5e803fb0b124 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 28 Nov 2014 15:53:57 -0500
Subject: net: bury net/core/iovec.c - nothing in there is used anymore

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/socket.h |   7 ---
 net/core/Makefile      |   2 +-
 net/core/iovec.c       | 137 -------------------------------------------------
 3 files changed, 1 insertion(+), 145 deletions(-)
 delete mode 100644 net/core/iovec.c

(limited to 'net')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 6e49a14365dc..5c19cba34dce 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -318,13 +318,6 @@ struct ucred {
 /* IPX options */
 #define IPX_TYPE	1
 
-extern int csum_partial_copy_fromiovecend(unsigned char *kdata, 
-					  struct iovec *iov, 
-					  int offset, 
-					  unsigned int len, __wsum *csump);
-extern unsigned long iov_pages(const struct iovec *iov, int offset,
-			       unsigned long nr_segs);
-
 extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr);
 extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
 
diff --git a/net/core/Makefile b/net/core/Makefile
index 235e6c50708d..fec0856dd6c0 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the Linux networking core.
 #
 
-obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
+obj-y := sock.o request_sock.o skbuff.o datagram.o stream.o scm.o \
 	 gen_stats.o gen_estimator.o net_namespace.o secure_seq.o flow_dissector.o
 
 obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
diff --git a/net/core/iovec.c b/net/core/iovec.c
deleted file mode 100644
index dcbe98b3726a..000000000000
--- a/net/core/iovec.c
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- *	iovec manipulation routines.
- *
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
- *	Fixes:
- *		Andrew Lunn	:	Errors in iovec copying.
- *		Pedro Roque	:	Added memcpy_fromiovecend and
- *					csum_..._fromiovecend.
- *		Andi Kleen	:	fixed error handling for 2.1
- *		Alexey Kuznetsov:	2.1 optimisations
- *		Andi Kleen	:	Fix csum*fromiovecend for IPv6.
- */
-
-#include <linux/errno.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/net.h>
-#include <linux/in6.h>
-#include <asm/uaccess.h>
-#include <asm/byteorder.h>
-#include <net/checksum.h>
-#include <net/sock.h>
-
-/*
- *	And now for the all-in-one: copy and checksum from a user iovec
- *	directly to a datagram
- *	Calls to csum_partial but the last must be in 32 bit chunks
- *
- *	ip_build_xmit must ensure that when fragmenting only the last
- *	call to this function will be unaligned also.
- */
-int csum_partial_copy_fromiovecend(unsigned char *kdata, struct iovec *iov,
-				 int offset, unsigned int len, __wsum *csump)
-{
-	__wsum csum = *csump;
-	int partial_cnt = 0, err = 0;
-
-	/* Skip over the finished iovecs */
-	while (offset >= iov->iov_len) {
-		offset -= iov->iov_len;
-		iov++;
-	}
-
-	while (len > 0) {
-		u8 __user *base = iov->iov_base + offset;
-		int copy = min_t(unsigned int, len, iov->iov_len - offset);
-
-		offset = 0;
-
-		/* There is a remnant from previous iov. */
-		if (partial_cnt) {
-			int par_len = 4 - partial_cnt;
-
-			/* iov component is too short ... */
-			if (par_len > copy) {
-				if (copy_from_user(kdata, base, copy))
-					goto out_fault;
-				kdata += copy;
-				base += copy;
-				partial_cnt += copy;
-				len -= copy;
-				iov++;
-				if (len)
-					continue;
-				*csump = csum_partial(kdata - partial_cnt,
-							 partial_cnt, csum);
-				goto out;
-			}
-			if (copy_from_user(kdata, base, par_len))
-				goto out_fault;
-			csum = csum_partial(kdata - partial_cnt, 4, csum);
-			kdata += par_len;
-			base  += par_len;
-			copy  -= par_len;
-			len   -= par_len;
-			partial_cnt = 0;
-		}
-
-		if (len > copy) {
-			partial_cnt = copy % 4;
-			if (partial_cnt) {
-				copy -= partial_cnt;
-				if (copy_from_user(kdata + copy, base + copy,
-						partial_cnt))
-					goto out_fault;
-			}
-		}
-
-		if (copy) {
-			csum = csum_and_copy_from_user(base, kdata, copy,
-							csum, &err);
-			if (err)
-				goto out;
-		}
-		len   -= copy + partial_cnt;
-		kdata += copy + partial_cnt;
-		iov++;
-	}
-	*csump = csum;
-out:
-	return err;
-
-out_fault:
-	err = -EFAULT;
-	goto out;
-}
-EXPORT_SYMBOL(csum_partial_copy_fromiovecend);
-
-unsigned long iov_pages(const struct iovec *iov, int offset,
-			unsigned long nr_segs)
-{
-	unsigned long seg, base;
-	int pages = 0, len, size;
-
-	while (nr_segs && (offset >= iov->iov_len)) {
-		offset -= iov->iov_len;
-		++iov;
-		--nr_segs;
-	}
-
-	for (seg = 0; seg < nr_segs; seg++) {
-		base = (unsigned long)iov[seg].iov_base + offset;
-		len = iov[seg].iov_len - offset;
-		size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
-		pages += size;
-		offset = 0;
-	}
-
-	return pages;
-}
-EXPORT_SYMBOL(iov_pages);
-- 
cgit v1.2.3


From 6d65233020765ea25541952276217d49e3ecbf9e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 30 Jan 2015 16:12:56 -0500
Subject: net/socket.c: fold do_sock_{read,write} into callers

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/socket.c | 56 +++++++++++++++++++++-----------------------------------
 1 file changed, 21 insertions(+), 35 deletions(-)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index 3326d67482ac..4d08b50cad65 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -845,25 +845,11 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
 	return sock->ops->splice_read(sock, ppos, pipe, len, flags);
 }
 
-static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
-		struct file *file, const struct iovec *iov,
-		unsigned long nr_segs)
-{
-	struct socket *sock = file->private_data;
-
-	msg->msg_name = NULL;
-	msg->msg_namelen = 0;
-	msg->msg_control = NULL;
-	msg->msg_controllen = 0;
-	iov_iter_init(&msg->msg_iter, READ, iov, nr_segs, iocb->ki_nbytes);
-	msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
-
-	return __sock_recvmsg(iocb, sock, msg, iocb->ki_nbytes, msg->msg_flags);
-}
-
 static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
 				unsigned long nr_segs, loff_t pos)
 {
+	struct file *file = iocb->ki_filp;
+	struct socket *sock = file->private_data;
 	struct msghdr msg;
 
 	if (pos != 0)
@@ -872,36 +858,36 @@ static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
 	if (iocb->ki_nbytes == 0)	/* Match SYS5 behaviour */
 		return 0;
 
-	return do_sock_read(&msg, iocb, iocb->ki_filp, iov, nr_segs);
-}
-
-static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
-			struct file *file, const struct iovec *iov,
-			unsigned long nr_segs)
-{
-	struct socket *sock = file->private_data;
-
-	msg->msg_name = NULL;
-	msg->msg_namelen = 0;
-	msg->msg_control = NULL;
-	msg->msg_controllen = 0;
-	iov_iter_init(&msg->msg_iter, WRITE, iov, nr_segs, iocb->ki_nbytes);
-	msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
-	if (sock->type == SOCK_SEQPACKET)
-		msg->msg_flags |= MSG_EOR;
+	msg.msg_name = NULL;
+	msg.msg_namelen = 0;
+	msg.msg_control = NULL;
+	msg.msg_controllen = 0;
+	iov_iter_init(&msg.msg_iter, READ, iov, nr_segs, iocb->ki_nbytes);
+	msg.msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
 
-	return __sock_sendmsg(iocb, sock, msg, iocb->ki_nbytes);
+	return __sock_recvmsg(iocb, sock, &msg, iocb->ki_nbytes, msg.msg_flags);
 }
 
 static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
 			  unsigned long nr_segs, loff_t pos)
 {
+	struct file *file = iocb->ki_filp;
+	struct socket *sock = file->private_data;
 	struct msghdr msg;
 
 	if (pos != 0)
 		return -ESPIPE;
 
-	return do_sock_write(&msg, iocb, iocb->ki_filp, iov, nr_segs);
+	msg.msg_name = NULL;
+	msg.msg_namelen = 0;
+	msg.msg_control = NULL;
+	msg.msg_controllen = 0;
+	iov_iter_init(&msg.msg_iter, WRITE, iov, nr_segs, iocb->ki_nbytes);
+	msg.msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
+	if (sock->type == SOCK_SEQPACKET)
+		msg.msg_flags |= MSG_EOR;
+
+	return __sock_sendmsg(iocb, sock, &msg, iocb->ki_nbytes);
 }
 
 /*
-- 
cgit v1.2.3


From 8ae5e030f30e50a81df1b269d5a5c32d023aa66d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 28 Nov 2014 19:40:50 -0500
Subject: net: switch sockets to ->read_iter/->write_iter

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/socket.c | 56 +++++++++++++++++++++++++++-----------------------------
 1 file changed, 27 insertions(+), 29 deletions(-)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index 4d08b50cad65..bbedbfcb42c2 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -113,10 +113,8 @@ unsigned int sysctl_net_busy_read __read_mostly;
 unsigned int sysctl_net_busy_poll __read_mostly;
 #endif
 
-static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
-			 unsigned long nr_segs, loff_t pos);
-static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
-			  unsigned long nr_segs, loff_t pos);
+static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
+static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
 static int sock_mmap(struct file *file, struct vm_area_struct *vma);
 
 static int sock_close(struct inode *inode, struct file *file);
@@ -142,8 +140,10 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
 static const struct file_operations socket_file_ops = {
 	.owner =	THIS_MODULE,
 	.llseek =	no_llseek,
-	.aio_read =	sock_aio_read,
-	.aio_write =	sock_aio_write,
+	.read =		new_sync_read,
+	.write =	new_sync_write,
+	.read_iter =	sock_read_iter,
+	.write_iter =	sock_write_iter,
 	.poll =		sock_poll,
 	.unlocked_ioctl = sock_ioctl,
 #ifdef CONFIG_COMPAT
@@ -845,49 +845,47 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
 	return sock->ops->splice_read(sock, ppos, pipe, len, flags);
 }
 
-static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
-				unsigned long nr_segs, loff_t pos)
+static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
 	struct file *file = iocb->ki_filp;
 	struct socket *sock = file->private_data;
-	struct msghdr msg;
+	struct msghdr msg = {.msg_iter = *to};
+	ssize_t res;
+
+	if (file->f_flags & O_NONBLOCK)
+		msg.msg_flags = MSG_DONTWAIT;
 
-	if (pos != 0)
+	if (iocb->ki_pos != 0)
 		return -ESPIPE;
 
 	if (iocb->ki_nbytes == 0)	/* Match SYS5 behaviour */
 		return 0;
 
-	msg.msg_name = NULL;
-	msg.msg_namelen = 0;
-	msg.msg_control = NULL;
-	msg.msg_controllen = 0;
-	iov_iter_init(&msg.msg_iter, READ, iov, nr_segs, iocb->ki_nbytes);
-	msg.msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
-
-	return __sock_recvmsg(iocb, sock, &msg, iocb->ki_nbytes, msg.msg_flags);
+	res = __sock_recvmsg(iocb, sock, &msg,
+			     iocb->ki_nbytes, msg.msg_flags);
+	*to = msg.msg_iter;
+	return res;
 }
 
-static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
-			  unsigned long nr_segs, loff_t pos)
+static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct socket *sock = file->private_data;
-	struct msghdr msg;
+	struct msghdr msg = {.msg_iter = *from};
+	ssize_t res;
 
-	if (pos != 0)
+	if (iocb->ki_pos != 0)
 		return -ESPIPE;
 
-	msg.msg_name = NULL;
-	msg.msg_namelen = 0;
-	msg.msg_control = NULL;
-	msg.msg_controllen = 0;
-	iov_iter_init(&msg.msg_iter, WRITE, iov, nr_segs, iocb->ki_nbytes);
-	msg.msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
+	if (file->f_flags & O_NONBLOCK)
+		msg.msg_flags = MSG_DONTWAIT;
+
 	if (sock->type == SOCK_SEQPACKET)
 		msg.msg_flags |= MSG_EOR;
 
-	return __sock_sendmsg(iocb, sock, &msg, iocb->ki_nbytes);
+	res = __sock_sendmsg(iocb, sock, &msg, iocb->ki_nbytes);
+	*from = msg.msg_iter;
+	return res;
 }
 
 /*
-- 
cgit v1.2.3


From 0508c07f5e0c94f38afd5434e8b2a55b84553077 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Tue, 3 Feb 2015 16:36:15 -0500
Subject: ipv6: Select fragment id during UFO segmentation if not set.

If the IPv6 fragment id has not been set and we perform
fragmentation due to UFO, select a new fragment id.
We now consider a fragment id of 0 as unset and if id selection
process returns 0 (after all the pertrubations), we set it to
0x80000000, thus giving us ample space not to create collisions
with the next packet we may have to fragment.

When doing UFO integrity checking, we also select the
fragment id if it has not be set yet.   This is stored into
the skb_shinfo() thus allowing UFO to function correclty.

This patch also removes duplicate fragment id generation code
and moves ipv6_select_ident() into the header as it may be
used during GSO.

Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h     |  3 +++
 net/ipv6/ip6_output.c  | 14 --------------
 net/ipv6/output_core.c | 41 +++++++++++++++++++++++++++++++++++------
 net/ipv6/udp_offload.c | 10 +++++++++-
 4 files changed, 47 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 4292929392b0..9bf85d34c024 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -671,6 +671,9 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add
 	return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr));
 }
 
+u32 __ipv6_select_ident(u32 hashrnd, struct in6_addr *dst,
+			struct in6_addr *src);
+void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt);
 void ipv6_proxy_select_ident(struct sk_buff *skb);
 
 int ip6_dst_hoplimit(struct dst_entry *dst);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index ce69a12ae48c..d28f2a2efb32 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -537,20 +537,6 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 	skb_copy_secmark(to, from);
 }
 
-static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
-{
-	static u32 ip6_idents_hashrnd __read_mostly;
-	u32 hash, id;
-
-	net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
-
-	hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd);
-	hash = __ipv6_addr_jhash(&rt->rt6i_src.addr, hash);
-
-	id = ip_idents_reserve(hash, 1);
-	fhdr->identification = htonl(id);
-}
-
 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 {
 	struct sk_buff *frag;
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 97f41a3e68d9..54520a0bd5e3 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -9,6 +9,24 @@
 #include <net/addrconf.h>
 #include <net/secure_seq.h>
 
+u32 __ipv6_select_ident(u32 hashrnd, struct in6_addr *dst, struct in6_addr *src)
+{
+	u32 hash, id;
+
+	hash = __ipv6_addr_jhash(dst, hashrnd);
+	hash = __ipv6_addr_jhash(src, hash);
+
+	/* Treat id of 0 as unset and if we get 0 back from ip_idents_reserve,
+	 * set the hight order instead thus minimizing possible future
+	 * collisions.
+	 */
+	id = ip_idents_reserve(hash, 1);
+	if (unlikely(!id))
+		id = 1 << 31;
+
+	return id;
+}
+
 /* This function exists only for tap drivers that must support broken
  * clients requesting UFO without specifying an IPv6 fragment ID.
  *
@@ -22,7 +40,7 @@ void ipv6_proxy_select_ident(struct sk_buff *skb)
 	static u32 ip6_proxy_idents_hashrnd __read_mostly;
 	struct in6_addr buf[2];
 	struct in6_addr *addrs;
-	u32 hash, id;
+	u32 id;
 
 	addrs = skb_header_pointer(skb,
 				   skb_network_offset(skb) +
@@ -34,14 +52,25 @@ void ipv6_proxy_select_ident(struct sk_buff *skb)
 	net_get_random_once(&ip6_proxy_idents_hashrnd,
 			    sizeof(ip6_proxy_idents_hashrnd));
 
-	hash = __ipv6_addr_jhash(&addrs[1], ip6_proxy_idents_hashrnd);
-	hash = __ipv6_addr_jhash(&addrs[0], hash);
-
-	id = ip_idents_reserve(hash, 1);
-	skb_shinfo(skb)->ip6_frag_id = htonl(id);
+	id = __ipv6_select_ident(ip6_proxy_idents_hashrnd,
+				 &addrs[1], &addrs[0]);
+	skb_shinfo(skb)->ip6_frag_id = id;
 }
 EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident);
 
+void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
+{
+	static u32 ip6_idents_hashrnd __read_mostly;
+	u32 id;
+
+	net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
+
+	id = __ipv6_select_ident(ip6_idents_hashrnd, &rt->rt6i_dst.addr,
+				 &rt->rt6i_src.addr);
+	fhdr->identification = htonl(id);
+}
+EXPORT_SYMBOL(ipv6_select_ident);
+
 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 {
 	u16 offset = sizeof(struct ipv6hdr);
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index b6aa8ed18257..a56276996b72 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -52,6 +52,10 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 
 		skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
 
+		/* Set the IPv6 fragment id if not set yet */
+		if (!skb_shinfo(skb)->ip6_frag_id)
+			ipv6_proxy_select_ident(skb);
+
 		segs = NULL;
 		goto out;
 	}
@@ -108,7 +112,11 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 		fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
 		fptr->nexthdr = nexthdr;
 		fptr->reserved = 0;
-		fptr->identification = skb_shinfo(skb)->ip6_frag_id;
+		if (skb_shinfo(skb)->ip6_frag_id)
+			fptr->identification = skb_shinfo(skb)->ip6_frag_id;
+		else
+			ipv6_select_ident(fptr,
+					  (struct rt6_info *)skb_dst(skb));
 
 		/* Fragment the skb. ipv6 header and the remaining fields of the
 		 * fragment header are updated in ipv6_gso_segment()
-- 
cgit v1.2.3


From 12bdf27d46c9d5e490fa164551642e065105db78 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Tue, 3 Feb 2015 19:48:04 +0100
Subject: NFC: nci: Add reference to the RF logical connection

The NCI_STATIC_RF_CONN_ID logical connection is the most used
connection. Keeping it directly accessible in the nci_dev
structure will simplify and optimize the access.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/nci_core.h | 1 +
 net/nfc/nci/core.c         | 2 +-
 net/nfc/nci/ntf.c          | 6 ++----
 net/nfc/nci/rsp.c          | 4 ++--
 4 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index be858870dace..731fa5be9989 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -189,6 +189,7 @@ struct nci_dev {
 	__u8			cur_conn_id;
 
 	struct list_head	conn_info_list;
+	struct nci_conn_info	*rf_conn_info;
 
 	struct timer_list	cmd_timer;
 	struct timer_list	data_timer;
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index f74d420e2ead..17ff5f83393c 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -803,7 +803,7 @@ static int nci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target,
 	int rc;
 	struct nci_conn_info    *conn_info;
 
-	conn_info = nci_get_conn_info_by_conn_id(ndev, NCI_STATIC_RF_CONN_ID);
+	conn_info = ndev->rf_conn_info;
 	if (!conn_info)
 		return -EPROTO;
 
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 33f5f00ecf4c..6bbbf6fdacc0 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -625,8 +625,7 @@ static void nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev,
 
 exit:
 	if (err == NCI_STATUS_OK) {
-		conn_info = nci_get_conn_info_by_conn_id(ndev,
-							 NCI_STATIC_RF_CONN_ID);
+		conn_info = ndev->rf_conn_info;
 		if (!conn_info)
 			return;
 
@@ -684,8 +683,7 @@ static void nci_rf_deactivate_ntf_packet(struct nci_dev *ndev,
 
 	pr_debug("entry, type 0x%x, reason 0x%x\n", ntf->type, ntf->reason);
 
-	conn_info =
-		nci_get_conn_info_by_conn_id(ndev, NCI_STATIC_RF_CONN_ID);
+	conn_info = ndev->rf_conn_info;
 	if (!conn_info)
 		return;
 
diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c
index 31ccf7d05e82..05268eb473df 100644
--- a/net/nfc/nci/rsp.c
+++ b/net/nfc/nci/rsp.c
@@ -148,8 +148,7 @@ static void nci_rf_disc_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
 	if (status == NCI_STATUS_OK) {
 		atomic_set(&ndev->state, NCI_DISCOVERY);
 
-		conn_info = nci_get_conn_info_by_conn_id(ndev,
-							 NCI_STATIC_RF_CONN_ID);
+		conn_info = ndev->rf_conn_info;
 		if (!conn_info) {
 			conn_info = devm_kzalloc(&ndev->nfc_dev->dev,
 						 sizeof(struct nci_conn_info),
@@ -161,6 +160,7 @@ static void nci_rf_disc_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
 			conn_info->conn_id = NCI_STATIC_RF_CONN_ID;
 			INIT_LIST_HEAD(&conn_info->list);
 			list_add(&conn_info->list, &ndev->conn_info_list);
+			ndev->rf_conn_info = conn_info;
 		}
 	}
 
-- 
cgit v1.2.3


From b16ae7160a836c4a1e443ea6efca31421e86bae1 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Tue, 3 Feb 2015 19:48:05 +0100
Subject: NFC: nci: Support all destinations type when creating a connection

The current implementation limits nci_core_conn_create_req()
to only manage NCI_DESTINATION_NFCEE.
Add new parameters to nci_core_conn_create() to support all
destination types described in the NCI specification.
Because there are some parameters with variable size dynamic
buffer allocation is needed.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/nfc/st21nfcb/st21nfcb_se.c | 38 ++++++++++++++++++++++--------
 include/net/nfc/nci.h              | 18 +++++++-------
 include/net/nfc/nci_core.h         |  4 +++-
 net/nfc/nci/core.c                 | 48 ++++++++++++++++++++++++++------------
 4 files changed, 74 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/drivers/nfc/st21nfcb/st21nfcb_se.c b/drivers/nfc/st21nfcb/st21nfcb_se.c
index 9f4d8b744f32..3b465e4c85e3 100644
--- a/drivers/nfc/st21nfcb/st21nfcb_se.c
+++ b/drivers/nfc/st21nfcb/st21nfcb_se.c
@@ -494,7 +494,8 @@ EXPORT_SYMBOL_GPL(st21nfcb_nci_enable_se);
 
 static int st21nfcb_hci_network_init(struct nci_dev *ndev)
 {
-	struct core_conn_create_dest_spec_params dest_params;
+	struct core_conn_create_dest_spec_params *dest_params;
+	struct dest_spec_params spec_params;
 	struct nci_conn_info    *conn_info;
 	int r, dev_num;
 
@@ -502,17 +503,29 @@ static int st21nfcb_hci_network_init(struct nci_dev *ndev)
 	if (r != NCI_STATUS_OK)
 		goto exit;
 
-	dest_params.type = NCI_DESTINATION_SPECIFIC_PARAM_NFCEE_TYPE;
-	dest_params.length = sizeof(struct dest_spec_params);
-	dest_params.value.id = ndev->hci_dev->conn_info->id;
-	dest_params.value.protocol = NCI_NFCEE_INTERFACE_HCI_ACCESS;
-	r = nci_core_conn_create(ndev, &dest_params);
-	if (r != NCI_STATUS_OK)
+	dest_params =
+		kzalloc(sizeof(struct core_conn_create_dest_spec_params) +
+			sizeof(struct dest_spec_params), GFP_KERNEL);
+	if (dest_params == NULL) {
+		r = -ENOMEM;
 		goto exit;
+	}
+
+	dest_params->type = NCI_DESTINATION_SPECIFIC_PARAM_NFCEE_TYPE;
+	dest_params->length = sizeof(struct dest_spec_params);
+	spec_params.id = ndev->hci_dev->conn_info->id;
+	spec_params.protocol = NCI_NFCEE_INTERFACE_HCI_ACCESS;
+	memcpy(dest_params->value, &spec_params, sizeof(struct dest_spec_params));
+	r = nci_core_conn_create(ndev, NCI_DESTINATION_NFCEE, 1,
+				 sizeof(struct core_conn_create_dest_spec_params) +
+				 sizeof(struct dest_spec_params),
+				 dest_params);
+	if (r != NCI_STATUS_OK)
+		goto free_dest_params;
 
 	conn_info = ndev->hci_dev->conn_info;
 	if (!conn_info)
-		goto exit;
+		goto free_dest_params;
 
 	memcpy(ndev->hci_dev->init_data.gates, st21nfcb_gates,
 	       sizeof(st21nfcb_gates));
@@ -522,8 +535,10 @@ static int st21nfcb_hci_network_init(struct nci_dev *ndev)
 	 * persistent info to discriminate 2 identical chips
 	 */
 	dev_num = find_first_zero_bit(dev_mask, ST21NFCB_NUM_DEVICES);
-	if (dev_num >= ST21NFCB_NUM_DEVICES)
-		return -ENODEV;
+	if (dev_num >= ST21NFCB_NUM_DEVICES) {
+		r = -ENODEV;
+		goto free_dest_params;
+	}
 
 	scnprintf(ndev->hci_dev->init_data.session_id,
 		  sizeof(ndev->hci_dev->init_data.session_id),
@@ -540,6 +555,9 @@ static int st21nfcb_hci_network_init(struct nci_dev *ndev)
 
 	return 0;
 
+free_dest_params:
+	kfree(dest_params);
+
 exit:
 	return r;
 }
diff --git a/include/net/nfc/nci.h b/include/net/nfc/nci.h
index 6c1beb2704b1..695d33cb75e8 100644
--- a/include/net/nfc/nci.h
+++ b/include/net/nfc/nci.h
@@ -244,21 +244,23 @@ struct nci_core_set_config_cmd {
 } __packed;
 
 #define NCI_OP_CORE_CONN_CREATE_CMD	nci_opcode_pack(NCI_GID_CORE, 0x04)
+#define DEST_SPEC_PARAMS_ID_INDEX	0
+#define DEST_SPEC_PARAMS_PROTOCOL_INDEX	1
 struct dest_spec_params {
-	__u8	id;
-	__u8	protocol;
+	__u8    id;
+	__u8    protocol;
 } __packed;
 
 struct core_conn_create_dest_spec_params {
-	__u8	type;
-	__u8	length;
-	struct dest_spec_params value;
+	__u8    type;
+	__u8    length;
+	__u8    value[0];
 } __packed;
 
 struct nci_core_conn_create_cmd {
-	__u8	destination_type;
-	__u8	number_destination_params;
-	struct core_conn_create_dest_spec_params params;
+	__u8    destination_type;
+	__u8    number_destination_params;
+	struct core_conn_create_dest_spec_params params[0];
 } __packed;
 
 #define NCI_OP_CORE_CONN_CLOSE_CMD	nci_opcode_pack(NCI_GID_CORE, 0x05)
diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index 731fa5be9989..d34c1b2295d7 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -263,7 +263,9 @@ int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val);
 
 int nci_nfcee_discover(struct nci_dev *ndev, u8 action);
 int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode);
-int nci_core_conn_create(struct nci_dev *ndev,
+int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type,
+			 u8 number_destination_params,
+			 size_t params_len,
 			 struct core_conn_create_dest_spec_params *params);
 int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id);
 
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 17ff5f83393c..ddfe91e43c88 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -41,6 +41,11 @@
 #include <net/nfc/nci_core.h>
 #include <linux/nfc.h>
 
+struct core_conn_create_data {
+	int length;
+	struct nci_core_conn_create_cmd *cmd;
+};
+
 static void nci_cmd_work(struct work_struct *work);
 static void nci_rx_work(struct work_struct *work);
 static void nci_tx_work(struct work_struct *work);
@@ -509,25 +514,38 @@ EXPORT_SYMBOL(nci_nfcee_mode_set);
 
 static void nci_core_conn_create_req(struct nci_dev *ndev, unsigned long opt)
 {
-	struct nci_core_conn_create_cmd cmd;
-	struct core_conn_create_dest_spec_params *params =
-				(struct core_conn_create_dest_spec_params *)opt;
-
-	cmd.destination_type = NCI_DESTINATION_NFCEE;
-	cmd.number_destination_params = 1;
-	memcpy(&cmd.params.type, params,
-	       sizeof(struct core_conn_create_dest_spec_params));
-	nci_send_cmd(ndev, NCI_OP_CORE_CONN_CREATE_CMD,
-		     sizeof(struct nci_core_conn_create_cmd), &cmd);
+	struct core_conn_create_data *data =
+					(struct core_conn_create_data *)opt;
+
+	nci_send_cmd(ndev, NCI_OP_CORE_CONN_CREATE_CMD, data->length, data->cmd);
 }
 
-int nci_core_conn_create(struct nci_dev *ndev,
+int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type,
+			 u8 number_destination_params,
+			 size_t params_len,
 			 struct core_conn_create_dest_spec_params *params)
 {
-	ndev->cur_id = params->value.id;
-	return nci_request(ndev, nci_core_conn_create_req,
-			(unsigned long)params,
-			msecs_to_jiffies(NCI_CMD_TIMEOUT));
+	int r;
+	struct nci_core_conn_create_cmd *cmd;
+	struct core_conn_create_data data;
+
+	data.length = params_len + sizeof(struct nci_core_conn_create_cmd);
+	cmd = kzalloc(data.length, GFP_KERNEL);
+	if (!cmd)
+		return -ENOMEM;
+
+	cmd->destination_type = destination_type;
+	cmd->number_destination_params = number_destination_params;
+	memcpy(cmd->params, params, params_len);
+
+	data.cmd = cmd;
+	ndev->cur_id = params->value[DEST_SPEC_PARAMS_ID_INDEX];
+
+	r = __nci_request(ndev, nci_core_conn_create_req,
+			  (unsigned long)&data,
+			  msecs_to_jiffies(NCI_CMD_TIMEOUT));
+	kfree(cmd);
+	return r;
 }
 EXPORT_SYMBOL(nci_core_conn_create);
 
-- 
cgit v1.2.3


From 3ba5c8466b320c3fd5d5861b34aa8a31dd0cf6b3 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Tue, 3 Feb 2015 19:48:06 +0100
Subject: NFC: nci: Change credits field to credits_cnt

For consistency sake change nci_core_conn_create_rsp structure
credits field to credits_cnt.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/nci.h | 2 +-
 net/nfc/nci/rsp.c     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/net/nfc/nci.h b/include/net/nfc/nci.h
index 695d33cb75e8..a2f2f3d3196d 100644
--- a/include/net/nfc/nci.h
+++ b/include/net/nfc/nci.h
@@ -353,7 +353,7 @@ struct nci_core_set_config_rsp {
 struct nci_core_conn_create_rsp {
 	__u8	status;
 	__u8	max_ctrl_pkt_payload_len;
-	__u8    credits;
+	__u8    credits_cnt;
 	__u8	conn_id;
 } __packed;
 
diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c
index 05268eb473df..b419fed77ea3 100644
--- a/net/nfc/nci/rsp.c
+++ b/net/nfc/nci/rsp.c
@@ -245,7 +245,7 @@ static void nci_core_conn_create_rsp_packet(struct nci_dev *ndev,
 
 		conn_info->conn_id = rsp->conn_id;
 		conn_info->max_pkt_payload_len = rsp->max_ctrl_pkt_payload_len;
-		atomic_set(&conn_info->credits_cnt, rsp->credits);
+		atomic_set(&conn_info->credits_cnt, rsp->credits_cnt);
 	}
 
 exit:
-- 
cgit v1.2.3


From 15d4a8da0e440faf589a26346c8287e1ed0abe6c Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Tue, 3 Feb 2015 19:48:07 +0100
Subject: NFC: nci: Move logical connection structure allocation

conn_info is currently allocated only after nfcee_discovery_ntf
which is not generic enough for logical connection other than
NFCEE. The corresponding conn_info is now created in
nci_core_conn_create_rsp().

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/nfc/st21nfcb/st21nfcb_se.c |  2 +-
 include/net/nfc/nci_core.h         |  1 +
 net/nfc/nci/hci.c                  |  8 ++++++++
 net/nfc/nci/ntf.c                  | 23 ++---------------------
 net/nfc/nci/rsp.c                  | 29 ++++++++++++++++++++++++-----
 5 files changed, 36 insertions(+), 27 deletions(-)

(limited to 'net')

diff --git a/drivers/nfc/st21nfcb/st21nfcb_se.c b/drivers/nfc/st21nfcb/st21nfcb_se.c
index 3b465e4c85e3..d23e8f27c4aa 100644
--- a/drivers/nfc/st21nfcb/st21nfcb_se.c
+++ b/drivers/nfc/st21nfcb/st21nfcb_se.c
@@ -513,7 +513,7 @@ static int st21nfcb_hci_network_init(struct nci_dev *ndev)
 
 	dest_params->type = NCI_DESTINATION_SPECIFIC_PARAM_NFCEE_TYPE;
 	dest_params->length = sizeof(struct dest_spec_params);
-	spec_params.id = ndev->hci_dev->conn_info->id;
+	spec_params.id = ndev->hci_dev->nfcee_id;
 	spec_params.protocol = NCI_NFCEE_INTERFACE_HCI_ACCESS;
 	memcpy(dest_params->value, &spec_params, sizeof(struct dest_spec_params));
 	r = nci_core_conn_create(ndev, NCI_DESTINATION_NFCEE, 1,
diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index d34c1b2295d7..ff87f8611fa3 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -159,6 +159,7 @@ struct nci_hci_init_data {
 #define NCI_HCI_MAX_GATES          256
 
 struct nci_hci_dev {
+	u8 nfcee_id;
 	struct nci_dev *ndev;
 	struct nci_conn_info *conn_info;
 
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index ecf253942606..ed54ec533836 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -615,12 +615,20 @@ static int nci_hci_dev_connect_gates(struct nci_dev *ndev,
 
 int nci_hci_dev_session_init(struct nci_dev *ndev)
 {
+	struct nci_conn_info    *conn_info;
 	struct sk_buff *skb;
 	int r;
 
 	ndev->hci_dev->count_pipes = 0;
 	ndev->hci_dev->expected_pipes = 0;
 
+	conn_info = ndev->hci_dev->conn_info;
+	if (!conn_info)
+		return -EPROTO;
+
+	conn_info->data_exchange_cb = nci_hci_data_received_cb;
+	conn_info->data_exchange_cb_context = ndev;
+
 	nci_hci_reset_pipes(ndev->hci_dev);
 
 	if (ndev->hci_dev->init_data.gates[0].gate != NCI_HCI_ADMIN_GATE)
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 6bbbf6fdacc0..3218071072ac 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -723,7 +723,6 @@ static void nci_nfcee_discover_ntf_packet(struct nci_dev *ndev,
 					  struct sk_buff *skb)
 {
 	u8 status = NCI_STATUS_OK;
-	struct nci_conn_info    *conn_info;
 	struct nci_nfcee_discover_ntf   *nfcee_ntf =
 				(struct nci_nfcee_discover_ntf *)skb->data;
 
@@ -734,27 +733,9 @@ static void nci_nfcee_discover_ntf_packet(struct nci_dev *ndev,
 	 * and only one, NFCEE_DISCOVER_NTF with a Protocol type of
 	 * “HCI Access”, even if the HCI Network contains multiple NFCEEs.
 	 */
-	if (!ndev->hci_dev->conn_info) {
-		conn_info = devm_kzalloc(&ndev->nfc_dev->dev,
-					 sizeof(*conn_info), GFP_KERNEL);
-		if (!conn_info) {
-			status = NCI_STATUS_REJECTED;
-			goto exit;
-		}
-
-		conn_info->id = nfcee_ntf->nfcee_id;
-		conn_info->conn_id = NCI_INVALID_CONN_ID;
-
-		conn_info->data_exchange_cb = nci_hci_data_received_cb;
-		conn_info->data_exchange_cb_context = ndev;
+	ndev->hci_dev->nfcee_id = nfcee_ntf->nfcee_id;
+	ndev->cur_id = nfcee_ntf->nfcee_id;
 
-		INIT_LIST_HEAD(&conn_info->list);
-		list_add(&conn_info->list, &ndev->conn_info_list);
-
-		ndev->hci_dev->conn_info = conn_info;
-	}
-
-exit:
 	nci_req_complete(ndev, status);
 }
 
diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c
index b419fed77ea3..02486bc2ceea 100644
--- a/net/nfc/nci/rsp.c
+++ b/net/nfc/nci/rsp.c
@@ -233,16 +233,27 @@ static void nci_core_conn_create_rsp_packet(struct nci_dev *ndev,
 
 	if (status == NCI_STATUS_OK) {
 		rsp = (struct nci_core_conn_create_rsp *)skb->data;
-		list_for_each_entry(conn_info, &ndev->conn_info_list, list) {
-			if (conn_info->id == ndev->cur_id)
-				break;
-		}
 
-		if (!conn_info || conn_info->id != ndev->cur_id) {
+		conn_info = devm_kzalloc(&ndev->nfc_dev->dev,
+					 sizeof(*conn_info), GFP_KERNEL);
+		if (!conn_info) {
 			status = NCI_STATUS_REJECTED;
 			goto exit;
 		}
 
+		conn_info->id = ndev->cur_id;
+		conn_info->conn_id = rsp->conn_id;
+
+		/* Note: data_exchange_cb and data_exchange_cb_context need to
+		 * be specify out of nci_core_conn_create_rsp_packet
+		 */
+
+		INIT_LIST_HEAD(&conn_info->list);
+		list_add(&conn_info->list, &ndev->conn_info_list);
+
+		if (ndev->cur_id == ndev->hci_dev->nfcee_id)
+			ndev->hci_dev->conn_info = conn_info;
+
 		conn_info->conn_id = rsp->conn_id;
 		conn_info->max_pkt_payload_len = rsp->max_ctrl_pkt_payload_len;
 		atomic_set(&conn_info->credits_cnt, rsp->credits_cnt);
@@ -255,9 +266,17 @@ exit:
 static void nci_core_conn_close_rsp_packet(struct nci_dev *ndev,
 					   struct sk_buff *skb)
 {
+	struct nci_conn_info *conn_info;
 	__u8 status = skb->data[0];
 
 	pr_debug("status 0x%x\n", status);
+	if (status == NCI_STATUS_OK) {
+		conn_info = nci_get_conn_info_by_conn_id(ndev, ndev->cur_id);
+		if (conn_info) {
+			list_del(&conn_info->list);
+			devm_kfree(&ndev->nfc_dev->dev, conn_info);
+		}
+	}
 	nci_req_complete(ndev, status);
 }
 
-- 
cgit v1.2.3


From fa00e8fed457841cb24219dbe3cfba7d56de6317 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Tue, 3 Feb 2015 19:48:08 +0100
Subject: NFC: nci: Move NFCEE discovery logic

NFCEE_DISCOVER_CMD is a specified NCI command used to discover
NFCEE IDs.
Move nci_nfcee_discover() call to nci_discover_se() in order to
guarantee:
- NFCEE_DISCOVER_CMD run when the NCI state machine is initialized
- NFCEE_DISCOVER_CMD is not run in case there is not discover_se
  hook defined by a NFC device driver.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/nfc/st21nfcb/st21nfcb_se.c | 4 ----
 net/nfc/nci/core.c                 | 8 +++++++-
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/drivers/nfc/st21nfcb/st21nfcb_se.c b/drivers/nfc/st21nfcb/st21nfcb_se.c
index d23e8f27c4aa..7c82e9d87a65 100644
--- a/drivers/nfc/st21nfcb/st21nfcb_se.c
+++ b/drivers/nfc/st21nfcb/st21nfcb_se.c
@@ -499,10 +499,6 @@ static int st21nfcb_hci_network_init(struct nci_dev *ndev)
 	struct nci_conn_info    *conn_info;
 	int r, dev_num;
 
-	r = nci_nfcee_discover(ndev, NCI_NFCEE_DISCOVERY_ACTION_ENABLE);
-	if (r != NCI_STATUS_OK)
-		goto exit;
-
 	dest_params =
 		kzalloc(sizeof(struct core_conn_create_dest_spec_params) +
 			sizeof(struct dest_spec_params), GFP_KERNEL);
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index ddfe91e43c88..9575a1892607 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -880,10 +880,16 @@ static int nci_disable_se(struct nfc_dev *nfc_dev, u32 se_idx)
 
 static int nci_discover_se(struct nfc_dev *nfc_dev)
 {
+	int r;
 	struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
 
-	if (ndev->ops->discover_se)
+	if (ndev->ops->discover_se) {
+		r = nci_nfcee_discover(ndev, NCI_NFCEE_DISCOVERY_ACTION_ENABLE);
+		if (r != NCI_STATUS_OK)
+			return -EPROTO;
+
 		return ndev->ops->discover_se(ndev);
+	}
 
 	return 0;
 }
-- 
cgit v1.2.3


From 2bd82484bb4c5db1d5dc983ac7c409b2782e0154 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 3 Feb 2015 23:48:24 -0800
Subject: xps: fix xps for stacked devices

A typical qdisc setup is the following :

bond0 : bonding device, using HTB hierarchy
eth1/eth2 : slaves, multiqueue NIC, using MQ + FQ qdisc

XPS allows to spread packets on specific tx queues, based on the cpu
doing the send.

Problem is that dequeues from bond0 qdisc can happen on random cpus,
due to the fact that qdisc_run() can dequeue a batch of packets.

CPUA -> queue packet P1 on bond0 qdisc, P1->ooo_okay=1
CPUA -> queue packet P2 on bond0 qdisc, P2->ooo_okay=0

CPUB -> dequeue packet P1 from bond0
        enqueue packet on eth1/eth2
CPUC -> dequeue packet P2 from bond0
        enqueue packet on eth1/eth2 using sk cache (ooo_okay is 0)

get_xps_queue() then might select wrong queue for P1, since current cpu
might be different than CPUA.

P2 might be sent on the old queue (stored in sk->sk_tx_queue_mapping),
if CPUC runs a bit faster (or CPUB spins a bit on qdisc lock)

Effect of this bug is TCP reorders, and more generally not optimal
TX queue placement. (A victim bulk flow can be migrated to the wrong TX
queue for a while)

To fix this, we have to record sender cpu number the first time
dev_queue_xmit() is called for one tx skb.

We can union napi_id (used on receive path) and sender_cpu,
granted we clear sender_cpu in skb_scrub_packet() (credit to Willem for
this union idea)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Nandita Dukkipati <nanditad@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h    | 7 +++++--
 net/core/flow_dissector.c | 7 ++++++-
 net/core/skbuff.c         | 4 ++++
 3 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 85ab7d72b54c..2748ff639144 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -626,8 +626,11 @@ struct sk_buff {
 	__u32			hash;
 	__be16			vlan_proto;
 	__u16			vlan_tci;
-#ifdef CONFIG_NET_RX_BUSY_POLL
-	unsigned int	napi_id;
+#if defined(CONFIG_NET_RX_BUSY_POLL) || defined(CONFIG_XPS)
+	union {
+		unsigned int	napi_id;
+		unsigned int	sender_cpu;
+	};
 #endif
 #ifdef CONFIG_NETWORK_SECMARK
 	__u32			secmark;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index beb83d1ac1c6..2c35c02a931e 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -422,7 +422,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 	dev_maps = rcu_dereference(dev->xps_maps);
 	if (dev_maps) {
 		map = rcu_dereference(
-		    dev_maps->cpu_map[raw_smp_processor_id()]);
+		    dev_maps->cpu_map[skb->sender_cpu - 1]);
 		if (map) {
 			if (map->len == 1)
 				queue_index = map->queues[0];
@@ -468,6 +468,11 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 {
 	int queue_index = 0;
 
+#ifdef CONFIG_XPS
+	if (skb->sender_cpu == 0)
+		skb->sender_cpu = raw_smp_processor_id() + 1;
+#endif
+
 	if (dev->real_num_tx_queues != 1) {
 		const struct net_device_ops *ops = dev->netdev_ops;
 		if (ops->ndo_select_queue)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a5bff2767f15..88c613eab142 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -825,6 +825,9 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 #ifdef CONFIG_NET_RX_BUSY_POLL
 	CHECK_SKB_FIELD(napi_id);
 #endif
+#ifdef CONFIG_XPS
+	CHECK_SKB_FIELD(sender_cpu);
+#endif
 #ifdef CONFIG_NET_SCHED
 	CHECK_SKB_FIELD(tc_index);
 #ifdef CONFIG_NET_CLS_ACT
@@ -4169,6 +4172,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
 	skb->ignore_df = 0;
 	skb_dst_drop(skb);
 	skb->mark = 0;
+	skb->sender_cpu = 0;
 	skb_init_secmark(skb);
 	secpath_reset(skb);
 	nf_reset(skb);
-- 
cgit v1.2.3


From 9a05dde59a35eee5643366d3d1e1f43fc9069adb Mon Sep 17 00:00:00 2001
From: Siva Mannem <siva.mannem.lnx@gmail.com>
Date: Mon, 2 Feb 2015 22:51:54 +0530
Subject: bridge: Let bridge not age 'externally' learnt FDB entries, they are
 removed when 'external' entity notifies the aging

When 'learned_sync' flag is turned on, the offloaded switch
 port syncs learned MAC addresses to bridge's FDB via switchdev notifier
 (NETDEV_SWITCH_FDB_ADD). Currently, FDB entries learnt via this mechanism are
 wrongly being deleted by bridge aging logic. This patch ensures that FDB
 entries synced from offloaded switch ports are not deleted by bridging logic.
 Such entries can only be deleted via switchdev notifier
 (NETDEV_SWITCH_FDB_DEL).

Signed-off-by: Siva Mannem <siva.mannem.lnx@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 08bf04bdac58..6eb94b58637a 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -280,7 +280,7 @@ void br_fdb_cleanup(unsigned long _data)
 
 		hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) {
 			unsigned long this_timer;
-			if (f->is_static)
+			if (f->is_static || f->added_by_external_learn)
 				continue;
 			this_timer = f->updated + delay;
 			if (time_before_eq(this_timer, jiffies))
-- 
cgit v1.2.3


From dcdc8994697faa789669c3fdaca1a8bc27a8f356 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 2 Feb 2015 16:07:34 -0800
Subject: net: add skb functions to process remote checksum offload

This patch adds skb_remcsum_process and skb_gro_remcsum_process to
perform the appropriate adjustments to the skb when receiving
remote checksum offload.

Updated vxlan and gue to use these functions.

Tested: Ran TCP_RR and TCP_STREAM netperf for VXLAN and GUE, did
not see any change in performance.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c       | 18 ++----------------
 include/linux/netdevice.h | 15 +++++++++++++++
 include/linux/skbuff.h    | 21 +++++++++++++++++++++
 net/ipv4/fou.c            | 18 ++----------------
 4 files changed, 40 insertions(+), 32 deletions(-)

(limited to 'net')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 31bac2a21ce3..c184717e8b28 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -558,7 +558,6 @@ static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
 					  u32 data)
 {
 	size_t start, offset, plen;
-	__wsum delta;
 
 	if (skb->remcsum_offload)
 		return vh;
@@ -580,12 +579,7 @@ static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
 			return NULL;
 	}
 
-	delta = remcsum_adjust((void *)vh + hdrlen,
-			       NAPI_GRO_CB(skb)->csum, start, offset);
-
-	/* Adjust skb->csum since we changed the packet */
-	skb->csum = csum_add(skb->csum, delta);
-	NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta);
+	skb_gro_remcsum_process(skb, (void *)vh + hdrlen, start, offset);
 
 	skb->remcsum_offload = 1;
 
@@ -1159,7 +1153,6 @@ static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh,
 				      size_t hdrlen, u32 data)
 {
 	size_t start, offset, plen;
-	__wsum delta;
 
 	if (skb->remcsum_offload) {
 		/* Already processed in GRO path */
@@ -1179,14 +1172,7 @@ static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh,
 
 	vh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
 
-	if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE))
-		__skb_checksum_complete(skb);
-
-	delta = remcsum_adjust((void *)vh + hdrlen,
-			       skb->csum, start, offset);
-
-	/* Adjust skb->csum since we changed the packet */
-	skb->csum = csum_add(skb->csum, delta);
+	skb_remcsum_process(skb, (void *)vh + hdrlen, start, offset);
 
 	return vh;
 }
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 16251e96e6aa..1347ac50d2af 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2318,6 +2318,21 @@ do {									\
 					   compute_pseudo(skb, proto));	\
 } while (0)
 
+static inline void skb_gro_remcsum_process(struct sk_buff *skb, void *ptr,
+					   int start, int offset)
+{
+	__wsum delta;
+
+	BUG_ON(!NAPI_GRO_CB(skb)->csum_valid);
+
+	delta = remcsum_adjust(ptr, NAPI_GRO_CB(skb)->csum, start, offset);
+
+	/* Adjust skb->csum since we changed the packet */
+	skb->csum = csum_add(skb->csum, delta);
+	NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta);
+}
+
+
 static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 				  unsigned short type,
 				  const void *daddr, const void *saddr,
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2748ff639144..5405dfe02572 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3099,6 +3099,27 @@ do {									\
 				       compute_pseudo(skb, proto));	\
 } while (0)
 
+/* Update skbuf and packet to reflect the remote checksum offload operation.
+ * When called, ptr indicates the starting point for skb->csum when
+ * ip_summed is CHECKSUM_COMPLETE. If we need create checksum complete
+ * here, skb_postpull_rcsum is done so skb->csum start is ptr.
+ */
+static inline void skb_remcsum_process(struct sk_buff *skb, void *ptr,
+				       int start, int offset)
+{
+	__wsum delta;
+
+	 if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE)) {
+		__skb_checksum_complete(skb);
+		skb_postpull_rcsum(skb, skb->data, ptr - (void *)skb->data);
+	}
+
+	delta = remcsum_adjust(ptr, skb->csum, start, offset);
+
+	/* Adjust skb->csum since we changed the packet */
+	skb->csum = csum_add(skb->csum, delta);
+}
+
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 void nf_conntrack_destroy(struct nf_conntrack *nfct);
 static inline void nf_conntrack_put(struct nf_conntrack *nfct)
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 3bc0cf07661c..92ddea1e6457 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -70,7 +70,6 @@ static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
 	size_t start = ntohs(pd[0]);
 	size_t offset = ntohs(pd[1]);
 	size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start);
-	__wsum delta;
 
 	if (skb->remcsum_offload) {
 		/* Already processed in GRO path */
@@ -82,14 +81,7 @@ static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
 		return NULL;
 	guehdr = (struct guehdr *)&udp_hdr(skb)[1];
 
-	if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE))
-		__skb_checksum_complete(skb);
-
-	delta = remcsum_adjust((void *)guehdr + hdrlen,
-			       skb->csum, start, offset);
-
-	/* Adjust skb->csum since we changed the packet */
-	skb->csum = csum_add(skb->csum, delta);
+	skb_remcsum_process(skb, (void *)guehdr + hdrlen, start, offset);
 
 	return guehdr;
 }
@@ -228,7 +220,6 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
 	size_t start = ntohs(pd[0]);
 	size_t offset = ntohs(pd[1]);
 	size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start);
-	__wsum delta;
 
 	if (skb->remcsum_offload)
 		return guehdr;
@@ -243,12 +234,7 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
 			return NULL;
 	}
 
-	delta = remcsum_adjust((void *)guehdr + hdrlen,
-			       NAPI_GRO_CB(skb)->csum, start, offset);
-
-	/* Adjust skb->csum since we changed the packet */
-	skb->csum = csum_add(skb->csum, delta);
-	NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta);
+	skb_gro_remcsum_process(skb, (void *)guehdr + hdrlen, start, offset);
 
 	skb->remcsum_offload = 1;
 
-- 
cgit v1.2.3


From db27ebb111e9f69efece08e4cb6a34ff980f8896 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Tue, 3 Feb 2015 08:55:58 -0500
Subject: net: rds: use correct size for max unacked packets and bytes

Max unacked packets/bytes is an int while sizeof(long) was used in the
sysctl table.

This means that when they were getting read we'd also leak kernel memory
to userspace along with the timeout values.

Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/sysctl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c
index c3b0cd43eb56..c173f69e1479 100644
--- a/net/rds/sysctl.c
+++ b/net/rds/sysctl.c
@@ -71,14 +71,14 @@ static struct ctl_table rds_sysctl_rds_table[] = {
 	{
 		.procname	= "max_unacked_packets",
 		.data		= &rds_sysctl_max_unacked_packets,
-		.maxlen         = sizeof(unsigned long),
+		.maxlen         = sizeof(int),
 		.mode           = 0644,
 		.proc_handler   = proc_dointvec,
 	},
 	{
 		.procname	= "max_unacked_bytes",
 		.data		= &rds_sysctl_max_unacked_bytes,
-		.maxlen         = sizeof(unsigned long),
+		.maxlen         = sizeof(int),
 		.mode           = 0644,
 		.proc_handler   = proc_dointvec,
 	},
-- 
cgit v1.2.3


From 2d72d49553d8de113d4eb1f69b2291f449a4c6bc Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Tue, 3 Feb 2015 08:59:17 -0500
Subject: tipc: add reference count to struct tipc_link

When a bearer is disabled, all pertaining links will be reset and
deleted. However, if there is a second active link towards a killed
link's destination, the delete has to be postponed until the failover
is finished. During this interval, we currently put the link in zombie
mode, i.e., we take it out of traffic, delete its timer, but leave it
attached to the owner node structure until all missing packets have
been received.  When this is done, we detach the link from its node
and delete it, assuming that the synchronous timer deletion that was
initiated earlier in a different thread has finished.

This is unsafe, as the failover may finish before del_timer_sync()
has returned in the other thread.

We fix this by adding an atomic reference counter of type kref in
struct tipc_link. The counter keeps track of the references kept
to the link by the owner node and the timer. We then do a conditional
delete, based on the reference counter, both after the failover has
been finished and when the timer expires, if applicable. Whoever
comes last, will actually delete the link. This approach also implies
that we can make the deletion of the timer asynchronous.

Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 79 +++++++++++++++++++++++++++++++++++----------------------
 net/tipc/link.h |  2 ++
 2 files changed, 50 insertions(+), 31 deletions(-)

(limited to 'net')

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 2846ad802e43..46aa59955299 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -127,6 +127,21 @@ static unsigned int align(unsigned int i)
 	return (i + 3) & ~3u;
 }
 
+static void tipc_link_release(struct kref *kref)
+{
+	kfree(container_of(kref, struct tipc_link, ref));
+}
+
+static void tipc_link_get(struct tipc_link *l_ptr)
+{
+	kref_get(&l_ptr->ref);
+}
+
+static void tipc_link_put(struct tipc_link *l_ptr)
+{
+	kref_put(&l_ptr->ref, tipc_link_release);
+}
+
 static void link_init_max_pkt(struct tipc_link *l_ptr)
 {
 	struct tipc_node *node = l_ptr->owner;
@@ -222,11 +237,13 @@ static void link_timeout(unsigned long data)
 		tipc_link_push_packets(l_ptr);
 
 	tipc_node_unlock(l_ptr->owner);
+	tipc_link_put(l_ptr);
 }
 
 static void link_set_timer(struct tipc_link *link, unsigned long time)
 {
-	mod_timer(&link->timer, jiffies + time);
+	if (!mod_timer(&link->timer, jiffies + time))
+		tipc_link_get(link);
 }
 
 /**
@@ -267,7 +284,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
 		pr_warn("Link creation failed, no memory\n");
 		return NULL;
 	}
-
+	kref_init(&l_ptr->ref);
 	l_ptr->addr = peer;
 	if_name = strchr(b_ptr->name, ':') + 1;
 	sprintf(l_ptr->name, "%u.%u.%u:%s-%u.%u.%u:unknown",
@@ -305,46 +322,48 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
 	skb_queue_head_init(&l_ptr->waiting_sks);
 
 	link_reset_statistics(l_ptr);
-
 	tipc_node_attach_link(n_ptr, l_ptr);
-
 	setup_timer(&l_ptr->timer, link_timeout, (unsigned long)l_ptr);
-
 	link_state_event(l_ptr, STARTING_EVT);
 
 	return l_ptr;
 }
 
+/**
+ * link_delete - Conditional deletion of link.
+ *               If timer still running, real delete is done when it expires
+ * @link: link to be deleted
+ */
+void tipc_link_delete(struct tipc_link *link)
+{
+	tipc_link_reset_fragments(link);
+	tipc_node_detach_link(link->owner, link);
+	tipc_link_put(link);
+}
+
 void tipc_link_delete_list(struct net *net, unsigned int bearer_id,
 			   bool shutting_down)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct tipc_link *l_ptr;
-	struct tipc_node *n_ptr;
+	struct tipc_link *link;
+	struct tipc_node *node;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(n_ptr, &tn->node_list, list) {
-		tipc_node_lock(n_ptr);
-		l_ptr = n_ptr->links[bearer_id];
-		if (l_ptr) {
-			tipc_link_reset(l_ptr);
-			if (shutting_down || !tipc_node_is_up(n_ptr)) {
-				tipc_node_detach_link(l_ptr->owner, l_ptr);
-				tipc_link_reset_fragments(l_ptr);
-				tipc_node_unlock(n_ptr);
-
-				/* Nobody else can access this link now: */
-				del_timer_sync(&l_ptr->timer);
-				kfree(l_ptr);
-			} else {
-				/* Detach/delete when failover is finished: */
-				l_ptr->flags |= LINK_STOPPED;
-				tipc_node_unlock(n_ptr);
-				del_timer_sync(&l_ptr->timer);
-			}
+	list_for_each_entry_rcu(node, &tn->node_list, list) {
+		tipc_node_lock(node);
+		link = node->links[bearer_id];
+		if (!link) {
+			tipc_node_unlock(node);
 			continue;
 		}
-		tipc_node_unlock(n_ptr);
+		tipc_link_reset(link);
+		if (del_timer(&link->timer))
+			tipc_link_put(link);
+		link->flags |= LINK_STOPPED;
+		/* Delete link now, or when failover is finished: */
+		if (shutting_down || !tipc_node_is_up(node))
+			tipc_link_delete(link);
+		tipc_node_unlock(node);
 	}
 	rcu_read_unlock();
 }
@@ -1837,10 +1856,8 @@ static struct sk_buff *tipc_link_failover_rcv(struct tipc_link *l_ptr,
 		}
 	}
 exit:
-	if ((l_ptr->exp_msg_count == 0) && (l_ptr->flags & LINK_STOPPED)) {
-		tipc_node_detach_link(l_ptr->owner, l_ptr);
-		kfree(l_ptr);
-	}
+	if ((!l_ptr->exp_msg_count) && (l_ptr->flags & LINK_STOPPED))
+		tipc_link_delete(l_ptr);
 	return buf;
 }
 
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 9df7fa4d3bdd..f06b779c9f75 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -103,6 +103,7 @@ struct tipc_stats {
  * @media_addr: media address to use when sending messages over link
  * @timer: link timer
  * @owner: pointer to peer node
+ * @refcnt: reference counter for permanent references (owner node & timer)
  * @flags: execution state flags for link endpoint instance
  * @checkpoint: reference point for triggering link continuity checking
  * @peer_session: link session # being used by peer end of link
@@ -142,6 +143,7 @@ struct tipc_link {
 	struct tipc_media_addr media_addr;
 	struct timer_list timer;
 	struct tipc_node *owner;
+	struct kref ref;
 
 	/* Management and link supervision data */
 	unsigned int flags;
-- 
cgit v1.2.3


From 7d24dcdb3f3132e0ec36f19c49bd004bc874b8aa Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Tue, 3 Feb 2015 08:59:18 -0500
Subject: tipc: avoid stale link after aborted failover

During link failover it may happen that the remaining link goes
down while it is still in the process of taking over traffic
from a previously failed link. When this happens, we currently
abort the failover procedure and reset the first failed link to
non-failover mode, so that it will be ready to re-establish
contact with its peer when it comes available.

However, if the first link goes down because its bearer was manually
disabled, it is not enough to reset it; it must also be deleted;
which is supposed to happen when the failover procedure is finished.
Otherwise it will remain a zombie link: attached to the owner node
structure, in mode LINK_STOPPED, and permanently blocking any re-
establishing of the link to the peer via the interface in question.

We fix this by amending the failover abort procedure. Apart from
resetting the link to non-failover state, we test if the link is
also in LINK_STOPPED mode. If so, we delete it, using the conditional
tipc_link_delete() function introduced in the previous commit.

Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.h | 1 +
 net/tipc/node.c | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/tipc/link.h b/net/tipc/link.h
index f06b779c9f75..3e3432b3044e 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -202,6 +202,7 @@ struct tipc_port;
 struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
 			      struct tipc_bearer *b_ptr,
 			      const struct tipc_media_addr *media_addr);
+void tipc_link_delete(struct tipc_link *link);
 void tipc_link_delete_list(struct net *net, unsigned int bearer_id,
 			   bool shutting_down);
 void tipc_link_failover_send_queue(struct tipc_link *l_ptr);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index ee5d33cfcf80..d4cb8c127063 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -406,6 +406,10 @@ static void node_lost_contact(struct tipc_node *n_ptr)
 		l_ptr->reset_checkpoint = l_ptr->next_in_no;
 		l_ptr->exp_msg_count = 0;
 		tipc_link_reset_fragments(l_ptr);
+
+		/* Link marked for deletion after failover? => do it now */
+		if (l_ptr->flags & LINK_STOPPED)
+			tipc_link_delete(l_ptr);
 	}
 
 	n_ptr->action_flags &= ~TIPC_WAIT_OWN_LINKS_DOWN;
-- 
cgit v1.2.3


From b45db71b525d75e520d7ef46c796f49c5d26c07c Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Tue, 3 Feb 2015 08:59:19 -0500
Subject: tipc: eliminate race during node creation

Instances of struct node are created in the function tipc_disc_rcv()
under the assumption that there is no race between received discovery
messages arriving from the same node. This assumption is wrong.
When we use more than one bearer, it is possible that discovery
messages from the same node arrive at the same moment, resulting in
creation of two instances of struct tipc_node. This may later cause
confusion during link establishment, and may result in one of the links
never becoming activated.

We fix this by making lookup and potential creation of nodes atomic.
Instead of first looking up the node, and in case of failure, create it,
we now start with looking up the node inside node_link_create(), and
return a reference to that one if found. Otherwise, we go ahead and
create the node as we did before.

Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/discover.c |  9 ++-------
 net/tipc/node.c     | 11 +++++------
 2 files changed, 7 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 5b40cb89ff0a..a580a40d0208 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -1,7 +1,7 @@
 /*
  * net/tipc/discover.c
  *
- * Copyright (c) 2003-2006, 2014, Ericsson AB
+ * Copyright (c) 2003-2006, 2014-2015, Ericsson AB
  * Copyright (c) 2005-2006, 2010-2011, Wind River Systems
  * All rights reserved.
  *
@@ -47,7 +47,6 @@
 /* indicates no timer in use */
 #define TIPC_LINK_REQ_INACTIVE	0xffffffff
 
-
 /**
  * struct tipc_link_req - information about an ongoing link setup request
  * @bearer_id: identity of bearer issuing requests
@@ -163,13 +162,9 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *buf,
 	if (!tipc_in_scope(bearer->domain, onode))
 		return;
 
-	/* Locate, or if necessary, create, node: */
-	node = tipc_node_find(net, onode);
-	if (!node)
-		node = tipc_node_create(net, onode);
+	node = tipc_node_create(net, onode);
 	if (!node)
 		return;
-
 	tipc_node_lock(node);
 	link = node->links[bearer->identity];
 
diff --git a/net/tipc/node.c b/net/tipc/node.c
index d4cb8c127063..842bd7ad4b17 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -96,14 +96,14 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr)
 	struct tipc_node *n_ptr, *temp_node;
 
 	spin_lock_bh(&tn->node_list_lock);
-
+	n_ptr = tipc_node_find(net, addr);
+	if (n_ptr)
+		goto exit;
 	n_ptr = kzalloc(sizeof(*n_ptr), GFP_ATOMIC);
 	if (!n_ptr) {
-		spin_unlock_bh(&tn->node_list_lock);
 		pr_warn("Node creation failed, no memory\n");
-		return NULL;
+		goto exit;
 	}
-
 	n_ptr->addr = addr;
 	n_ptr->net = net;
 	spin_lock_init(&n_ptr->lock);
@@ -123,9 +123,8 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr)
 	list_add_tail_rcu(&n_ptr->list, &temp_node->list);
 	n_ptr->action_flags = TIPC_WAIT_PEER_LINKS_DOWN;
 	n_ptr->signature = INVALID_NODE_SIG;
-
 	tn->num_nodes++;
-
+exit:
 	spin_unlock_bh(&tn->node_list_lock);
 	return n_ptr;
 }
-- 
cgit v1.2.3


From af9946fde9983e1312e5bcda7d1658fee2a3cb1d Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Tue, 3 Feb 2015 08:59:20 -0500
Subject: tipc: separate link starting event from link timeout event

When a new link instance is created, it is trigged to start by
sending it a TIPC_STARTING_EVT, whereafter a regular link
reset is applied to it.

The starting event is codewise treated as a timeout event, and prompts
a link RESET message to be sent to the peer node, carrying a link
session identifier. The later link_reset() call nudges this session
identifier, whereafter all subsequent RESET messages will be sent out
with the new identifier. The latter session number overrides the former,
causing the peer to unconditionally accept it irrespective of its
current working state.

We don't think that this causes any problem, but it is not in accordance
with the protocol spec, and may cause confusion when debugging TIPC
sessions.

To avoid this, we make the starting event distinct from the
subsequent timeout events, by not allowing the former to send
out any RESET message. This eliminates the described problem.

Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 46aa59955299..77c7ccd492b5 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -649,7 +649,9 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
 			break;
 		case STARTING_EVT:
 			l_ptr->flags |= LINK_STARTED;
-			/* fall through */
+			l_ptr->fsm_msg_cnt++;
+			link_set_timer(l_ptr, cont_intv);
+			break;
 		case TIMEOUT_EVT:
 			tipc_link_proto_xmit(l_ptr, RESET_MSG, 0, 0, 0, 0, 0);
 			l_ptr->fsm_msg_cnt++;
-- 
cgit v1.2.3


From 61bd3857ff2c7daf756d49b41e6277bbdaa8f789 Mon Sep 17 00:00:00 2001
From: Moni Shoua <monis@mellanox.com>
Date: Tue, 3 Feb 2015 16:48:29 +0200
Subject: net/core: Add event for a change in slave state

Add event which provides an indication on a change in the state
of a bonding slave. The event handler should cast the pointer to the
appropriate type (struct netdev_bonding_info) in order to get the
full info about the slave.

Signed-off-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 15 +++++++++++++++
 net/core/dev.c            | 20 ++++++++++++++++++++
 net/core/rtnetlink.c      |  1 +
 3 files changed, 36 insertions(+)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1347ac50d2af..ce784d5018e0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -51,6 +51,7 @@
 #include <linux/netdev_features.h>
 #include <linux/neighbour.h>
 #include <uapi/linux/netdevice.h>
+#include <uapi/linux/if_bonding.h>
 
 struct netpoll_info;
 struct device;
@@ -2056,6 +2057,7 @@ struct pcpu_sw_netstats {
 #define NETDEV_RESEND_IGMP	0x0016
 #define NETDEV_PRECHANGEMTU	0x0017 /* notify before mtu change happened */
 #define NETDEV_CHANGEINFODATA	0x0018
+#define NETDEV_BONDING_INFO	0x0019
 
 int register_netdevice_notifier(struct notifier_block *nb);
 int unregister_netdevice_notifier(struct notifier_block *nb);
@@ -3494,6 +3496,19 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
 struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
 				    netdev_features_t features);
 
+struct netdev_bonding_info {
+	ifslave	slave;
+	ifbond	master;
+};
+
+struct netdev_notifier_bonding_info {
+	struct netdev_notifier_info info; /* must be first */
+	struct netdev_bonding_info  bonding_info;
+};
+
+void netdev_bonding_info_change(struct net_device *dev,
+				struct netdev_bonding_info *bonding_info);
+
 static inline
 struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features)
 {
diff --git a/net/core/dev.c b/net/core/dev.c
index 1d564d68e31a..ede0b161b115 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5355,6 +5355,26 @@ void netdev_upper_dev_unlink(struct net_device *dev,
 }
 EXPORT_SYMBOL(netdev_upper_dev_unlink);
 
+/**
+ * netdev_bonding_info_change - Dispatch event about slave change
+ * @dev: device
+ * @netdev_bonding_info: info to dispatch
+ *
+ * Send NETDEV_BONDING_INFO to netdev notifiers with info.
+ * The caller must hold the RTNL lock.
+ */
+void netdev_bonding_info_change(struct net_device *dev,
+				struct netdev_bonding_info *bonding_info)
+{
+	struct netdev_notifier_bonding_info	info;
+
+	memcpy(&info.bonding_info, bonding_info,
+	       sizeof(struct netdev_bonding_info));
+	call_netdevice_notifiers_info(NETDEV_BONDING_INFO, dev,
+				      &info.info);
+}
+EXPORT_SYMBOL(netdev_bonding_info_change);
+
 void netdev_adjacent_add_links(struct net_device *dev)
 {
 	struct netdev_adjacent *iter;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 673cb4c6f391..4cd5e350d129 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3180,6 +3180,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
 	case NETDEV_UNREGISTER_FINAL:
 	case NETDEV_RELEASE:
 	case NETDEV_JOIN:
+	case NETDEV_BONDING_INFO:
 		break;
 	default:
 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
-- 
cgit v1.2.3


From 3725a269815ba6dbb415feddc47da5af7d1fac58 Mon Sep 17 00:00:00 2001
From: Kenneth Klette Jonassen <kennetkl@ifi.uio.no>
Date: Tue, 3 Feb 2015 17:49:18 +0100
Subject: pkt_sched: fq: avoid hang when quantum 0

Configuring fq with quantum 0 hangs the system, presumably because of a
non-interruptible infinite loop. Either way quantum 0 does not make sense.

Reproduce with:
sudo tc qdisc add dev lo root fq quantum 0 initial_quantum 0
ping 127.0.0.1

Signed-off-by: Kenneth Klette Jonassen <kennetkl@ifi.uio.no>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_fq.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 9b05924cc386..333cd94ba381 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -670,8 +670,14 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
 	if (tb[TCA_FQ_FLOW_PLIMIT])
 		q->flow_plimit = nla_get_u32(tb[TCA_FQ_FLOW_PLIMIT]);
 
-	if (tb[TCA_FQ_QUANTUM])
-		q->quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]);
+	if (tb[TCA_FQ_QUANTUM]) {
+		u32 quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]);
+
+		if (quantum > 0)
+			q->quantum = quantum;
+		else
+			err = -EINVAL;
+	}
 
 	if (tb[TCA_FQ_INITIAL_QUANTUM])
 		q->initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]);
-- 
cgit v1.2.3


From b057df24a7536cce6c372efe9d0e3d1558afedf4 Mon Sep 17 00:00:00 2001
From: Ignacy Gawędzki <ignacy.gawedzki@green-communications.fr>
Date: Tue, 3 Feb 2015 19:05:18 +0100
Subject: cls_api.c: Fix dumping of non-existing actions' stats.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In tcf_exts_dump_stats(), ensure that exts->actions is not empty before
accessing the first element of that list and calling tcf_action_copy_stats()
on it.  This fixes some random segvs when adding filters of type "basic" with
no particular action.

This also fixes the dumping of those "no-action" filters, which more often
than not made calls to tcf_action_copy_stats() fail and consequently netlink
attributes added by the caller to be removed by a call to nla_nest_cancel().

Fixes: 33be62715991 ("net_sched: act: use standard struct list_head")
Signed-off-by: Ignacy Gawędzki <ignacy.gawedzki@green-communications.fr>
Acked-by: Cong Wang <cwang@twopensource.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_api.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index aad6a679fb13..baef987fe2c0 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -556,8 +556,9 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
 }
 EXPORT_SYMBOL(tcf_exts_change);
 
-#define tcf_exts_first_act(ext) \
-		list_first_entry(&(exts)->actions, struct tc_action, list)
+#define tcf_exts_first_act(ext)					\
+	list_first_entry_or_null(&(exts)->actions,		\
+				 struct tc_action, list)
 
 int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
 {
@@ -603,7 +604,7 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	struct tc_action *a = tcf_exts_first_act(exts);
-	if (tcf_action_copy_stats(skb, a, 1) < 0)
+	if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0)
 		return -1;
 #endif
 	return 0;
-- 
cgit v1.2.3


From 56d28b1e921b57b918a0ae6b13a1671115fe788d Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 4 Feb 2015 07:33:24 +1100
Subject: netlink: Use rhashtable walk iterator

This patch gets rid of the manual rhashtable walk in netlink
which touches rhashtable internals that should not be exposed.
It does so by using the rhashtable iterator primitives.

In fact the existing code was very buggy.  Some sockets weren't
shown at all while others were shown more than once.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 130 +++++++++++++++++++++++------------------------
 1 file changed, 64 insertions(+), 66 deletions(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index a36777b7cfb6..155854802d44 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2886,99 +2886,97 @@ EXPORT_SYMBOL(nlmsg_notify);
 #ifdef CONFIG_PROC_FS
 struct nl_seq_iter {
 	struct seq_net_private p;
+	struct rhashtable_iter hti;
 	int link;
-	int hash_idx;
 };
 
-static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
+static int netlink_walk_start(struct nl_seq_iter *iter)
 {
-	struct nl_seq_iter *iter = seq->private;
-	int i, j;
-	struct netlink_sock *nlk;
-	struct sock *s;
-	loff_t off = 0;
-
-	for (i = 0; i < MAX_LINKS; i++) {
-		struct rhashtable *ht = &nl_table[i].hash;
-		const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
-
-		for (j = 0; j < tbl->size; j++) {
-			struct rhash_head *node;
-
-			rht_for_each_entry_rcu(nlk, node, tbl, j, node) {
-				s = (struct sock *)nlk;
+	int err;
 
-				if (sock_net(s) != seq_file_net(seq))
-					continue;
-				if (off == pos) {
-					iter->link = i;
-					iter->hash_idx = j;
-					return s;
-				}
-				++off;
-			}
-		}
+	err = rhashtable_walk_init(&nl_table[iter->link].hash, &iter->hti);
+	if (err) {
+		iter->link = MAX_LINKS;
+		return err;
 	}
-	return NULL;
+
+	err = rhashtable_walk_start(&iter->hti);
+	return err == -EAGAIN ? 0 : err;
 }
 
-static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(RCU)
+static void netlink_walk_stop(struct nl_seq_iter *iter)
 {
-	rcu_read_lock();
-	return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+	rhashtable_walk_stop(&iter->hti);
+	rhashtable_walk_exit(&iter->hti);
 }
 
-static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+static void *__netlink_seq_next(struct seq_file *seq)
 {
-	struct rhashtable *ht;
-	const struct bucket_table *tbl;
-	struct rhash_head *node;
+	struct nl_seq_iter *iter = seq->private;
 	struct netlink_sock *nlk;
-	struct nl_seq_iter *iter;
-	struct net *net;
-	int i, j;
 
-	++*pos;
+	do {
+		for (;;) {
+			int err;
 
-	if (v == SEQ_START_TOKEN)
-		return netlink_seq_socket_idx(seq, 0);
+			nlk = rhashtable_walk_next(&iter->hti);
 
-	net = seq_file_net(seq);
-	iter = seq->private;
-	nlk = v;
+			if (IS_ERR(nlk)) {
+				if (PTR_ERR(nlk) == -EAGAIN)
+					continue;
 
-	i = iter->link;
-	ht = &nl_table[i].hash;
-	tbl = rht_dereference_rcu(ht->tbl, ht);
-	rht_for_each_entry_rcu_continue(nlk, node, nlk->node.next, tbl, iter->hash_idx, node)
-		if (net_eq(sock_net((struct sock *)nlk), net))
-			return nlk;
+				return nlk;
+			}
 
-	j = iter->hash_idx + 1;
+			if (nlk)
+				break;
 
-	do {
+			netlink_walk_stop(iter);
+			if (++iter->link >= MAX_LINKS)
+				return NULL;
 
-		for (; j < tbl->size; j++) {
-			rht_for_each_entry_rcu(nlk, node, tbl, j, node) {
-				if (net_eq(sock_net((struct sock *)nlk), net)) {
-					iter->link = i;
-					iter->hash_idx = j;
-					return nlk;
-				}
-			}
+			err = netlink_walk_start(iter);
+			if (err)
+				return ERR_PTR(err);
 		}
+	} while (sock_net(&nlk->sk) != seq_file_net(seq));
 
-		j = 0;
-	} while (++i < MAX_LINKS);
+	return nlk;
+}
 
-	return NULL;
+static void *netlink_seq_start(struct seq_file *seq, loff_t *posp)
+{
+	struct nl_seq_iter *iter = seq->private;
+	void *obj = SEQ_START_TOKEN;
+	loff_t pos;
+	int err;
+
+	iter->link = 0;
+
+	err = netlink_walk_start(iter);
+	if (err)
+		return ERR_PTR(err);
+
+	for (pos = *posp; pos && obj && !IS_ERR(obj); pos--)
+		obj = __netlink_seq_next(seq);
+
+	return obj;
+}
+
+static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+	return __netlink_seq_next(seq);
 }
 
 static void netlink_seq_stop(struct seq_file *seq, void *v)
-	__releases(RCU)
 {
-	rcu_read_unlock();
+	struct nl_seq_iter *iter = seq->private;
+
+	if (iter->link >= MAX_LINKS)
+		return;
+
+	netlink_walk_stop(iter);
 }
 
 
-- 
cgit v1.2.3


From 9a7766288274ef765245ed65e6176a2727b96706 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 4 Feb 2015 07:33:25 +1100
Subject: netfilter: Use rhashtable walk iterator

This patch gets rid of the manual rhashtable walk in nft_hash
which touches rhashtable internals that should not be exposed.
It does so by using the rhashtable iterator primitives.

Note that I'm leaving nft_hash_destroy alone since it's only
invoked on shutdown and it shouldn't be affected by changes
to rhashtable internals (or at least not what I'm planning to
change).

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nft_hash.c | 53 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 36 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 75887d7d2c6a..61e6c407476a 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -130,31 +130,50 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
 			  struct nft_set_iter *iter)
 {
 	struct rhashtable *priv = nft_set_priv(set);
-	const struct bucket_table *tbl;
 	const struct nft_hash_elem *he;
+	struct rhashtable_iter hti;
 	struct nft_set_elem elem;
-	unsigned int i;
+	int err;
 
-	tbl = rht_dereference_rcu(priv->tbl, priv);
-	for (i = 0; i < tbl->size; i++) {
-		struct rhash_head *pos;
+	err = rhashtable_walk_init(priv, &hti);
+	iter->err = err;
+	if (err)
+		return;
+
+	err = rhashtable_walk_start(&hti);
+	if (err && err != -EAGAIN) {
+		iter->err = err;
+		goto out;
+	}
 
-		rht_for_each_entry_rcu(he, pos, tbl, i, node) {
-			if (iter->count < iter->skip)
-				goto cont;
+	while ((he = rhashtable_walk_next(&hti))) {
+		if (IS_ERR(he)) {
+			err = PTR_ERR(he);
+			if (err != -EAGAIN) {
+				iter->err = err;
+				goto out;
+			}
+		}
+
+		if (iter->count < iter->skip)
+			goto cont;
+
+		memcpy(&elem.key, &he->key, sizeof(elem.key));
+		if (set->flags & NFT_SET_MAP)
+			memcpy(&elem.data, he->data, sizeof(elem.data));
+		elem.flags = 0;
 
-			memcpy(&elem.key, &he->key, sizeof(elem.key));
-			if (set->flags & NFT_SET_MAP)
-				memcpy(&elem.data, he->data, sizeof(elem.data));
-			elem.flags = 0;
+		iter->err = iter->fn(ctx, set, iter, &elem);
+		if (iter->err < 0)
+			goto out;
 
-			iter->err = iter->fn(ctx, set, iter, &elem);
-			if (iter->err < 0)
-				return;
 cont:
-			iter->count++;
-		}
+		iter->count++;
 	}
+
+out:
+	rhashtable_walk_stop(&hti);
+	rhashtable_walk_exit(&hti);
 }
 
 static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
-- 
cgit v1.2.3


From 9878196578286c5ed494778ada01da094377a686 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 3 Feb 2015 18:31:53 -0800
Subject: tcp: do not pace pure ack packets

When we added pacing to TCP, we decided to let sch_fq take care
of actual pacing.

All TCP had to do was to compute sk->pacing_rate using simple formula:

sk->pacing_rate = 2 * cwnd * mss / rtt

It works well for senders (bulk flows), but not very well for receivers
or even RPC :

cwnd on the receiver can be less than 10, rtt can be around 100ms, so we
can end up pacing ACK packets, slowing down the sender.

Really, only the sender should pace, according to its own logic.

Instead of adding a new bit in skb, or call yet another flow
dissection, we tweak skb->truesize to a small value (2), and
we instruct sch_fq to use new helper and not pace pure ack.

Note this also helps TCP small queue, as ack packets present
in qdisc/NIC do not prevent sending a data packet (RPC workload)

This helps to reduce tx completion overhead, ack packets can use regular
sock_wfree() instead of tcp_wfree() which is a bit more expensive.

This has no impact in the case packets are sent to loopback interface,
as we do not coalesce ack packets (were we would detect skb->truesize
lie)

In case netem (with a delay) is used, skb_orphan_partial() also sets
skb->truesize to 1.

This patch is a combination of two patches we used for about one year at
Google.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     | 15 +++++++++++++++
 net/ipv4/tcp_output.c | 10 +++++++++-
 net/sched/sch_fq.c    | 10 ++++++++--
 3 files changed, 32 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index b8fdc6bab3f3..637ee490ec81 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1713,4 +1713,19 @@ static inline struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
 	return dopt;
 }
 
+/* locally generated TCP pure ACKs have skb->truesize == 2
+ * (check tcp_send_ack() in net/ipv4/tcp_output.c )
+ * This is much faster than dissecting the packet to find out.
+ * (Think of GRE encapsulations, IPv4, IPv6, ...)
+ */
+static inline bool skb_is_tcp_pure_ack(const struct sk_buff *skb)
+{
+	return skb->truesize == 2;
+}
+
+static inline void skb_set_tcp_pure_ack(struct sk_buff *skb)
+{
+	skb->truesize = 2;
+}
+
 #endif	/* _TCP_H */
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 20ab06b228ac..1b326ed46f7b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -948,7 +948,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 
 	skb_orphan(skb);
 	skb->sk = sk;
-	skb->destructor = tcp_wfree;
+	skb->destructor = skb_is_tcp_pure_ack(skb) ? sock_wfree : tcp_wfree;
 	skb_set_hash_from_sk(skb, sk);
 	atomic_add(skb->truesize, &sk->sk_wmem_alloc);
 
@@ -3265,6 +3265,14 @@ void tcp_send_ack(struct sock *sk)
 	skb_reserve(buff, MAX_TCP_HEADER);
 	tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
 
+	/* We do not want pure acks influencing TCP Small Queues or fq/pacing
+	 * too much.
+	 * SKB_TRUESIZE(max(1 .. 66, MAX_TCP_HEADER)) is unfortunately ~784
+	 * We also avoid tcp_wfree() overhead (cache line miss accessing
+	 * tp->tsq_flags) by using regular sock_wfree()
+	 */
+	skb_set_tcp_pure_ack(buff);
+
 	/* Send it off, this clears delayed acks for us. */
 	skb_mstamp_get(&buff->skb_mstamp);
 	tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC));
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 2a50f5c62070..69a3dbf55c60 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -52,6 +52,7 @@
 #include <net/pkt_sched.h>
 #include <net/sock.h>
 #include <net/tcp_states.h>
+#include <net/tcp.h>
 
 /*
  * Per flow structure, dynamically allocated
@@ -445,7 +446,9 @@ begin:
 		goto begin;
 	}
 
-	if (unlikely(f->head && now < f->time_next_packet)) {
+	skb = f->head;
+	if (unlikely(skb && now < f->time_next_packet &&
+		     !skb_is_tcp_pure_ack(skb))) {
 		head->first = f->next;
 		fq_flow_set_throttled(q, f);
 		goto begin;
@@ -464,12 +467,15 @@ begin:
 		goto begin;
 	}
 	prefetch(&skb->end);
-	f->time_next_packet = now;
 	f->credit -= qdisc_pkt_len(skb);
 
 	if (f->credit > 0 || !q->rate_enable)
 		goto out;
 
+	/* Do not pace locally generated ack packets */
+	if (skb_is_tcp_pure_ack(skb))
+		goto out;
+
 	rate = q->flow_max_rate;
 	if (skb->sk)
 		rate = min(skb->sk->sk_pacing_rate, rate);
-- 
cgit v1.2.3


From 06eb395fa9856b5a87cf7d80baee2a0ed3cdb9d7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 4 Feb 2015 21:30:40 -0800
Subject: pkt_sched: fq: better control of DDOS traffic

FQ has a fast path for skb attached to a socket, as it does not
have to compute a flow hash. But for other packets, FQ being non
stochastic means that hosts exposed to random Internet traffic
can allocate million of flows structure (104 bytes each) pretty
easily. Not only host can OOM, but lookup in RB trees can take
too much cpu and memory resources.

This patch adds a new attribute, orphan_mask, that is adding
possibility of having a stochastic hash for orphaned skb.

Its default value is 1024 slots, to mimic SFQ behavior.

Note: This does not apply to locally generated TCP traffic,
and no locally generated traffic will share a flow structure
with another perfect or stochastic flow.

This patch also handles the specific case of SYNACK messages:

They are attached to the listener socket, and therefore all map
to a single hash bucket. If listener have set SO_MAX_PACING_RATE,
hoping to have new accepted socket inherit this rate, SYNACK
might be paced and even dropped.

This is very similar to an internal patch Google have used more
than one year.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_sched.h |  2 ++
 net/sched/sch_fq.c             | 19 +++++++++++++++++--
 2 files changed, 19 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index d62316baae94..534b84710745 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -774,6 +774,8 @@ enum {
 
 	TCA_FQ_FLOW_REFILL_DELAY,	/* flow credit refill delay in usec */
 
+	TCA_FQ_ORPHAN_MASK,	/* mask applied to orphaned skb hashes */
+
 	__TCA_FQ_MAX
 };
 
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 69a3dbf55c60..a00c43043001 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -93,6 +93,7 @@ struct fq_sched_data {
 	u32		flow_refill_delay;
 	u32		flow_max_rate;	/* optional max rate per flow */
 	u32		flow_plimit;	/* max packets per flow */
+	u32		orphan_mask;	/* mask for orphaned skb */
 	struct rb_root	*fq_root;
 	u8		rate_enable;
 	u8		fq_trees_log;
@@ -223,11 +224,20 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
 	if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL))
 		return &q->internal;
 
-	if (unlikely(!sk)) {
+	/* SYNACK messages are attached to a listener socket.
+	 * 1) They are not part of a 'flow' yet
+	 * 2) We do not want to rate limit them (eg SYNFLOOD attack),
+	 *    especially if the listener set SO_MAX_PACING_RATE
+	 * 3) We pretend they are orphaned
+	 */
+	if (!sk || sk->sk_state == TCP_LISTEN) {
+		unsigned long hash = skb_get_hash(skb) & q->orphan_mask;
+
 		/* By forcing low order bit to 1, we make sure to not
 		 * collide with a local flow (socket pointers are word aligned)
 		 */
-		sk = (struct sock *)(skb_get_hash(skb) | 1L);
+		sk = (struct sock *)((hash << 1) | 1UL);
+		skb_orphan(skb);
 	}
 
 	root = &q->fq_root[hash_32((u32)(long)sk, q->fq_trees_log)];
@@ -704,6 +714,9 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
 		q->flow_refill_delay = usecs_to_jiffies(usecs_delay);
 	}
 
+	if (tb[TCA_FQ_ORPHAN_MASK])
+		q->orphan_mask = nla_get_u32(tb[TCA_FQ_ORPHAN_MASK]);
+
 	if (!err) {
 		sch_tree_unlock(sch);
 		err = fq_resize(sch, fq_log);
@@ -749,6 +762,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt)
 	q->delayed		= RB_ROOT;
 	q->fq_root		= NULL;
 	q->fq_trees_log		= ilog2(1024);
+	q->orphan_mask		= 1024 - 1;
 	qdisc_watchdog_init(&q->watchdog, sch);
 
 	if (opt)
@@ -778,6 +792,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
 	    nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) ||
 	    nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY,
 			jiffies_to_usecs(q->flow_refill_delay)) ||
+	    nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) ||
 	    nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log))
 		goto nla_put_failure;
 
-- 
cgit v1.2.3


From 3fcf9011188755c883f377764c463b4271d594a2 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 4 Feb 2015 23:52:44 -0800
Subject: Revert "bridge: Let bridge not age 'externally' learnt FDB entries,
 they are removed when 'external' entity notifies the aging"

This reverts commit 9a05dde59a35eee5643366d3d1e1f43fc9069adb.

Requested by Scott Feldman.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 6eb94b58637a..08bf04bdac58 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -280,7 +280,7 @@ void br_fdb_cleanup(unsigned long _data)
 
 		hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) {
 			unsigned long this_timer;
-			if (f->is_static || f->added_by_external_learn)
+			if (f->is_static)
 				continue;
 			this_timer = f->updated + delay;
 			if (time_before_eq(this_timer, jiffies))
-- 
cgit v1.2.3


From d1e158e2d7a0a91110b206653f0e02376e809150 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Wed, 4 Feb 2015 15:25:09 +0100
Subject: ip6_gre: fix endianness errors in ip6gre_err

info is in network byte order, change it back to host byte order
before use. In particular, the current code sets the MTU of the tunnel
to a wrong (too big) value.

Fixes: c12b395a4664 ("gre: Support GRE over IPv6")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_gre.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 13cda4c6313b..01ccc28a686f 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -417,7 +417,7 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		if (code == ICMPV6_HDR_FIELD)
 			teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
 
-		if (teli && teli == info - 2) {
+		if (teli && teli == be32_to_cpu(info) - 2) {
 			tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
 			if (tel->encap_limit == 0) {
 				net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
@@ -429,7 +429,7 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		}
 		break;
 	case ICMPV6_PKT_TOOBIG:
-		mtu = info - offset;
+		mtu = be32_to_cpu(info) - offset;
 		if (mtu < IPV6_MIN_MTU)
 			mtu = IPV6_MIN_MTU;
 		t->dev->mtu = mtu;
-- 
cgit v1.2.3


From 2ce1ee1780564ba06ab4c1434aa03e347dc9169f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 4 Feb 2015 13:37:44 -0800
Subject: net: remove some sparse warnings

netdev_adjacent_add_links() and netdev_adjacent_del_links()
are static.

queue->qdisc has __rcu annotation, need to use RCU_INIT_POINTER()

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index c87a2264a02b..7fe82929f509 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5294,7 +5294,7 @@ void netdev_upper_dev_unlink(struct net_device *dev,
 }
 EXPORT_SYMBOL(netdev_upper_dev_unlink);
 
-void netdev_adjacent_add_links(struct net_device *dev)
+static void netdev_adjacent_add_links(struct net_device *dev)
 {
 	struct netdev_adjacent *iter;
 
@@ -5319,7 +5319,7 @@ void netdev_adjacent_add_links(struct net_device *dev)
 	}
 }
 
-void netdev_adjacent_del_links(struct net_device *dev)
+static void netdev_adjacent_del_links(struct net_device *dev)
 {
 	struct netdev_adjacent *iter;
 
@@ -6627,7 +6627,7 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
 	if (!queue)
 		return NULL;
 	netdev_init_one_queue(dev, queue, NULL);
-	queue->qdisc = &noop_qdisc;
+	RCU_INIT_POINTER(queue->qdisc, &noop_qdisc);
 	queue->qdisc_sleeping = &noop_qdisc;
 	rcu_assign_pointer(dev->ingress_queue, queue);
 #endif
-- 
cgit v1.2.3


From a409caecb2e17fc475533738dd1c69b32e13fe09 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 4 Feb 2015 15:12:04 -0800
Subject: sit: fix some __be16/u16 mismatches

Fixes following sparse warnings :

net/ipv6/sit.c:1509:32: warning: incorrect type in assignment (different base types)
net/ipv6/sit.c:1509:32:    expected restricted __be16 [usertype] sport
net/ipv6/sit.c:1509:32:    got unsigned short
net/ipv6/sit.c:1514:32: warning: incorrect type in assignment (different base types)
net/ipv6/sit.c:1514:32:    expected restricted __be16 [usertype] dport
net/ipv6/sit.c:1514:32:    got unsigned short
net/ipv6/sit.c:1711:38: warning: incorrect type in argument 3 (different base types)
net/ipv6/sit.c:1711:38:    expected unsigned short [unsigned] [usertype] value
net/ipv6/sit.c:1711:38:    got restricted __be16 [usertype] sport
net/ipv6/sit.c:1713:38: warning: incorrect type in argument 3 (different base types)
net/ipv6/sit.c:1713:38:    expected unsigned short [unsigned] [usertype] value
net/ipv6/sit.c:1713:38:    got restricted __be16 [usertype] dport

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sit.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 213546bd6d5d..cdbfe5af6187 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1506,12 +1506,12 @@ static bool ipip6_netlink_encap_parms(struct nlattr *data[],
 
 	if (data[IFLA_IPTUN_ENCAP_SPORT]) {
 		ret = true;
-		ipencap->sport = nla_get_u16(data[IFLA_IPTUN_ENCAP_SPORT]);
+		ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]);
 	}
 
 	if (data[IFLA_IPTUN_ENCAP_DPORT]) {
 		ret = true;
-		ipencap->dport = nla_get_u16(data[IFLA_IPTUN_ENCAP_DPORT]);
+		ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]);
 	}
 
 	return ret;
@@ -1707,9 +1707,9 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev)
 
 	if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
 			tunnel->encap.type) ||
-	    nla_put_u16(skb, IFLA_IPTUN_ENCAP_SPORT,
+	    nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT,
 			tunnel->encap.sport) ||
-	    nla_put_u16(skb, IFLA_IPTUN_ENCAP_DPORT,
+	    nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT,
 			tunnel->encap.dport) ||
 	    nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
 			tunnel->encap.flags))
-- 
cgit v1.2.3


From 233c96fc077d310772375d47522fb444ff546905 Mon Sep 17 00:00:00 2001
From: Miroslav Urbanek <mu@miroslavurbanek.com>
Date: Thu, 5 Feb 2015 16:36:50 +0100
Subject: flowcache: Fix kernel panic in flow_cache_flush_task
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

flow_cache_flush_task references a structure member flow_cache_gc_work
where it should reference flow_cache_flush_task instead.

Kernel panic occurs on kernels using IPsec during XFRM garbage
collection. The garbage collection interval can be shortened using the
following sysctl settings:

net.ipv4.xfrm4_gc_thresh=4
net.ipv6.xfrm6_gc_thresh=4

With the default settings, our productions servers crash approximately
once a week. With the settings above, they crash immediately.

Fixes: ca925cf1534e ("flowcache: Make flow cache name space aware")
Reported-by: Tomáš Charvát <tc@excello.cz>
Tested-by: Jan Hejl <jh@excello.cz>
Signed-off-by: Miroslav Urbanek <mu@miroslavurbanek.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/flow.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/flow.c b/net/core/flow.c
index a0348fde1fdf..1033725be40b 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -379,7 +379,7 @@ done:
 static void flow_cache_flush_task(struct work_struct *work)
 {
 	struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm,
-						flow_cache_gc_work);
+						flow_cache_flush_work);
 	struct net *net = container_of(xfrm, struct net, xfrm);
 
 	flow_cache_flush(net);
-- 
cgit v1.2.3


From c58da4c659803ac12eca5275c8a7064222adb4c7 Mon Sep 17 00:00:00 2001
From: Erik Kline <ek@google.com>
Date: Wed, 4 Feb 2015 20:01:23 +0900
Subject: net: ipv6: allow explicitly choosing optimistic addresses

RFC 4429 ("Optimistic DAD") states that optimistic addresses
should be treated as deprecated addresses.  From section 2.1:

   Unless noted otherwise, components of the IPv6 protocol stack
   should treat addresses in the Optimistic state equivalently to
   those in the Deprecated state, indicating that the address is
   available for use but should not be used if another suitable
   address is available.

Optimistic addresses are indeed avoided when other addresses are
available (i.e. at source address selection time), but they have
not heretofore been available for things like explicit bind() and
sendmsg() with struct in6_pktinfo, etc.

This change makes optimistic addresses treated more like
deprecated addresses than tentative ones.

Signed-off-by: Erik Kline <ek@google.com>
Acked-by: Lorenzo Colitti <lorenzo@google.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/addrconf.h |  3 +++
 net/ipv6/addrconf.c    | 19 +++++++++++++++++--
 net/ipv6/ndisc.c       |  4 +++-
 3 files changed, 23 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index d13573bb879e..80456f72d70a 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -62,6 +62,9 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg);
 
 int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
 		  const struct net_device *dev, int strict);
+int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
+			    const struct net_device *dev, int strict,
+			    u32 banned_flags);
 
 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f7c8bbeb27b7..62900aee4c58 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1518,16 +1518,31 @@ static int ipv6_count_addresses(struct inet6_dev *idev)
 
 int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
 		  const struct net_device *dev, int strict)
+{
+	return ipv6_chk_addr_and_flags(net, addr, dev, strict, IFA_F_TENTATIVE);
+}
+EXPORT_SYMBOL(ipv6_chk_addr);
+
+int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
+			    const struct net_device *dev, int strict,
+			    u32 banned_flags)
 {
 	struct inet6_ifaddr *ifp;
 	unsigned int hash = inet6_addr_hash(addr);
+	u32 ifp_flags;
 
 	rcu_read_lock_bh();
 	hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
+		/* Decouple optimistic from tentative for evaluation here.
+		 * Ban optimistic addresses explicitly, when required.
+		 */
+		ifp_flags = (ifp->flags&IFA_F_OPTIMISTIC)
+			    ? (ifp->flags&~IFA_F_TENTATIVE)
+			    : ifp->flags;
 		if (ipv6_addr_equal(&ifp->addr, addr) &&
-		    !(ifp->flags&IFA_F_TENTATIVE) &&
+		    !(ifp_flags&banned_flags) &&
 		    (dev == NULL || ifp->idev->dev == dev ||
 		     !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))) {
 			rcu_read_unlock_bh();
@@ -1538,7 +1553,7 @@ int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
 	rcu_read_unlock_bh();
 	return 0;
 }
-EXPORT_SYMBOL(ipv6_chk_addr);
+EXPORT_SYMBOL(ipv6_chk_addr_and_flags);
 
 static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
 			       struct net_device *dev)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 682866777d53..113fc6cd5a0c 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -655,7 +655,9 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
 	struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
 	int probes = atomic_read(&neigh->probes);
 
-	if (skb && ipv6_chk_addr(dev_net(dev), &ipv6_hdr(skb)->saddr, dev, 1))
+	if (skb && ipv6_chk_addr_and_flags(dev_net(dev), &ipv6_hdr(skb)->saddr,
+					   dev, 1,
+					   IFA_F_TENTATIVE|IFA_F_OPTIMISTIC))
 		saddr = &ipv6_hdr(skb)->saddr;
 	probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);
 	if (probes < 0) {
-- 
cgit v1.2.3


From 7744b5f3693cc06695cb9d6667671c790282730f Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Wed, 4 Feb 2015 23:08:50 +0100
Subject: pktgen: fix UDP checksum computation

This patch fixes two issues in UDP checksum computation in pktgen.

First, the pseudo-header uses the source and destination IP
addresses. Currently, the ports are used for IPv4.

Second, the UDP checksum covers both header and data.  So we need to
generate the data earlier (move pktgen_finalize_skb up), and compute
the checksum for UDP header + data.

Fixes: c26bf4a51308c ("pktgen: Add UDPCSUM flag to support UDP checksums")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index da934fc3faa8..9fa25b0ea145 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2842,25 +2842,25 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 	skb->dev = odev;
 	skb->pkt_type = PACKET_HOST;
 
+	pktgen_finalize_skb(pkt_dev, skb, datalen);
+
 	if (!(pkt_dev->flags & F_UDPCSUM)) {
 		skb->ip_summed = CHECKSUM_NONE;
 	} else if (odev->features & NETIF_F_V4_CSUM) {
 		skb->ip_summed = CHECKSUM_PARTIAL;
 		skb->csum = 0;
-		udp4_hwcsum(skb, udph->source, udph->dest);
+		udp4_hwcsum(skb, iph->saddr, iph->daddr);
 	} else {
-		__wsum csum = udp_csum(skb);
+		__wsum csum = skb_checksum(skb, skb_transport_offset(skb), datalen + 8, 0);
 
 		/* add protocol-dependent pseudo-header */
-		udph->check = csum_tcpudp_magic(udph->source, udph->dest,
+		udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
 						datalen + 8, IPPROTO_UDP, csum);
 
 		if (udph->check == 0)
 			udph->check = CSUM_MANGLED_0;
 	}
 
-	pktgen_finalize_skb(pkt_dev, skb, datalen);
-
 #ifdef CONFIG_XFRM
 	if (!process_ipsec(pkt_dev, skb, protocol))
 		return NULL;
@@ -2976,6 +2976,8 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	skb->dev = odev;
 	skb->pkt_type = PACKET_HOST;
 
+	pktgen_finalize_skb(pkt_dev, skb, datalen);
+
 	if (!(pkt_dev->flags & F_UDPCSUM)) {
 		skb->ip_summed = CHECKSUM_NONE;
 	} else if (odev->features & NETIF_F_V6_CSUM) {
@@ -2984,7 +2986,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 		skb->csum_offset = offsetof(struct udphdr, check);
 		udph->check = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, udplen, IPPROTO_UDP, 0);
 	} else {
-		__wsum csum = udp_csum(skb);
+		__wsum csum = skb_checksum(skb, skb_transport_offset(skb), udplen, 0);
 
 		/* add protocol-dependent pseudo-header */
 		udph->check = csum_ipv6_magic(&iph->saddr, &iph->daddr, udplen, IPPROTO_UDP, csum);
@@ -2993,8 +2995,6 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 			udph->check = CSUM_MANGLED_0;
 	}
 
-	pktgen_finalize_skb(pkt_dev, skb, datalen);
-
 	return skb;
 }
 
-- 
cgit v1.2.3


From c5898636c440da91d58f10beac00f073e68378df Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Thu, 5 Feb 2015 08:36:36 -0500
Subject: tipc: reduce usage of context info in socket and link

The most common usage of namespace information is when we fetch the
own node addess from the net structure. This leads to a lot of
passing around of a parameter of type 'struct net *' between
functions just to make them able to obtain this address.

However, in many cases this is unnecessary. The own node address
is readily available as a member of both struct tipc_sock and
tipc_link, and can be fetched from there instead.
The fact that the vast majority of functions in socket.c and link.c
anyway are maintaining a pointer to their respective base structures
makes this option even more compelling.

In this commit, we introduce the inline functions tsk_own_node()
and link_own_node() to make it easy for functions to fetch the node
address from those structs instead of having to pass along and
dereference the namespace struct.

In particular, we make calls to the msg_xx() functions in msg.{h,c}
context independent by directly passing them the own node address
as parameter when needed. Those functions should be regarded as
leaves in the code dependency tree, and it is hence desirable to
keep them namspace unaware.

Apart from a potential positive effect on cache behavior, these
changes make it easier to introduce the changes that will follow
later in this series.

Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c      |  7 ++++--
 net/tipc/bcast.h      |  2 +-
 net/tipc/discover.c   |  3 ++-
 net/tipc/link.c       | 56 ++++++++++++++++++++----------------------
 net/tipc/link.h       |  4 +++
 net/tipc/msg.c        | 33 ++++++++++++-------------
 net/tipc/msg.h        | 10 ++++----
 net/tipc/name_distr.c |  5 ++--
 net/tipc/node.c       |  2 +-
 net/tipc/socket.c     | 67 +++++++++++++++++++++++++++------------------------
 10 files changed, 98 insertions(+), 91 deletions(-)

(limited to 'net')

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 53f8bf059fec..3b886eb35c87 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -283,10 +283,11 @@ exit:
  *
  * RCU and node lock set
  */
-void tipc_bclink_update_link_state(struct net *net, struct tipc_node *n_ptr,
+void tipc_bclink_update_link_state(struct tipc_node *n_ptr,
 				   u32 last_sent)
 {
 	struct sk_buff *buf;
+	struct net *net = n_ptr->net;
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
 
 	/* Ignore "stale" link state info */
@@ -317,7 +318,7 @@ void tipc_bclink_update_link_state(struct net *net, struct tipc_node *n_ptr,
 		struct sk_buff *skb = skb_peek(&n_ptr->bclink.deferred_queue);
 		u32 to = skb ? buf_seqno(skb) - 1 : n_ptr->bclink.last_sent;
 
-		tipc_msg_init(net, msg, BCAST_PROTOCOL, STATE_MSG,
+		tipc_msg_init(tn->own_addr, msg, BCAST_PROTOCOL, STATE_MSG,
 			      INT_H_SIZE, n_ptr->addr);
 		msg_set_non_seq(msg, 1);
 		msg_set_mc_netid(msg, tn->net_id);
@@ -954,6 +955,8 @@ int tipc_bclink_init(struct net *net)
 	bcl->bearer_id = MAX_BEARERS;
 	rcu_assign_pointer(tn->bearer_list[MAX_BEARERS], &bcbearer->bearer);
 	bcl->state = WORKING_WORKING;
+	bcl->pmsg = (struct tipc_msg *)&bcl->proto_msg;
+	msg_set_prevnode(bcl->pmsg, tn->own_addr);
 	strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME);
 	tn->bcbearer = bcbearer;
 	tn->bclink = bclink;
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index a4583a109486..6ea190dccfe1 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -139,7 +139,7 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked);
 void tipc_bclink_rcv(struct net *net, struct sk_buff *buf);
 u32  tipc_bclink_get_last_sent(struct net *net);
 u32  tipc_bclink_acks_missing(struct tipc_node *n_ptr);
-void tipc_bclink_update_link_state(struct net *net, struct tipc_node *n_ptr,
+void tipc_bclink_update_link_state(struct tipc_node *node,
 				   u32 last_sent);
 int  tipc_bclink_stats(struct net *net, char *stats_buf, const u32 buf_size);
 int  tipc_bclink_reset_stats(struct net *net);
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index a580a40d0208..feef3753615d 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -85,7 +85,8 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type,
 	u32 dest_domain = b_ptr->domain;
 
 	msg = buf_msg(buf);
-	tipc_msg_init(net, msg, LINK_CONFIG, type, INT_H_SIZE, dest_domain);
+	tipc_msg_init(tn->own_addr, msg, LINK_CONFIG, type,
+		      INT_H_SIZE, dest_domain);
 	msg_set_non_seq(msg, 1);
 	msg_set_node_sig(msg, tn->random);
 	msg_set_dest_domain(msg, dest_domain);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 77c7ccd492b5..41cb09aa41de 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -101,13 +101,12 @@ static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = {
  */
 #define START_CHANGEOVER 100000u
 
-static void link_handle_out_of_seq_msg(struct net *net,
-				       struct tipc_link *l_ptr,
-				       struct sk_buff *buf);
-static void tipc_link_proto_rcv(struct net *net, struct tipc_link *l_ptr,
-				struct sk_buff *buf);
-static int  tipc_link_tunnel_rcv(struct net *net, struct tipc_node *n_ptr,
-				 struct sk_buff **buf);
+static void link_handle_out_of_seq_msg(struct tipc_link *link,
+				       struct sk_buff *skb);
+static void tipc_link_proto_rcv(struct tipc_link *link,
+				struct sk_buff *skb);
+static int  tipc_link_tunnel_rcv(struct tipc_node *node,
+				 struct sk_buff **skb);
 static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol);
 static void link_state_event(struct tipc_link *l_ptr, u32 event);
 static void link_reset_statistics(struct tipc_link *l_ptr);
@@ -303,7 +302,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
 
 	l_ptr->pmsg = (struct tipc_msg *)&l_ptr->proto_msg;
 	msg = l_ptr->pmsg;
-	tipc_msg_init(n_ptr->net, msg, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE,
+	tipc_msg_init(tn->own_addr, msg, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE,
 		      l_ptr->addr);
 	msg_set_size(msg, sizeof(l_ptr->proto_msg));
 	msg_set_session(msg, (tn->random & 0xffff));
@@ -379,12 +378,11 @@ void tipc_link_delete_list(struct net *net, unsigned int bearer_id,
 static bool link_schedule_user(struct tipc_link *link, u32 oport,
 			       uint chain_sz, uint imp)
 {
-	struct net *net = link->owner->net;
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct sk_buff *buf;
 
-	buf = tipc_msg_create(net, SOCK_WAKEUP, 0, INT_H_SIZE, 0, tn->own_addr,
-			      tn->own_addr, oport, 0, 0);
+	buf = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0,
+			      link_own_addr(link), link_own_addr(link),
+			      oport, 0, 0);
 	if (!buf)
 		return false;
 	TIPC_SKB_CB(buf)->chain_sz = chain_sz;
@@ -778,7 +776,7 @@ int __tipc_link_xmit(struct net *net, struct tipc_link *link,
 		} else if (tipc_msg_bundle(outqueue, skb, mtu)) {
 			link->stats.sent_bundled++;
 			continue;
-		} else if (tipc_msg_make_bundle(net, outqueue, skb, mtu,
+		} else if (tipc_msg_make_bundle(outqueue, skb, mtu,
 						link->addr)) {
 			link->stats.sent_bundled++;
 			link->stats.sent_bundles++;
@@ -877,7 +875,7 @@ static void tipc_link_sync_xmit(struct tipc_link *link)
 		return;
 
 	msg = buf_msg(skb);
-	tipc_msg_init(link->owner->net, msg, BCAST_PROTOCOL, STATE_MSG,
+	tipc_msg_init(link_own_addr(link), msg, BCAST_PROTOCOL, STATE_MSG,
 		      INT_H_SIZE, link->addr);
 	msg_set_last_bcast(msg, link->owner->bclink.acked);
 	__tipc_link_xmit_skb(link, skb);
@@ -1207,7 +1205,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr)
 		/* Process the incoming packet */
 		if (unlikely(!link_working_working(l_ptr))) {
 			if (msg_user(msg) == LINK_PROTOCOL) {
-				tipc_link_proto_rcv(net, l_ptr, skb);
+				tipc_link_proto_rcv(l_ptr, skb);
 				link_retrieve_defq(l_ptr, &head);
 				tipc_node_unlock(n_ptr);
 				continue;
@@ -1227,7 +1225,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr)
 
 		/* Link is now in state WORKING_WORKING */
 		if (unlikely(seq_no != mod(l_ptr->next_in_no))) {
-			link_handle_out_of_seq_msg(net, l_ptr, skb);
+			link_handle_out_of_seq_msg(l_ptr, skb);
 			link_retrieve_defq(l_ptr, &head);
 			tipc_node_unlock(n_ptr);
 			continue;
@@ -1275,7 +1273,7 @@ static int tipc_link_prepare_input(struct net *net, struct tipc_link *l,
 	msg = buf_msg(*buf);
 	switch (msg_user(msg)) {
 	case CHANGEOVER_PROTOCOL:
-		if (tipc_link_tunnel_rcv(net, n, buf))
+		if (tipc_link_tunnel_rcv(n, buf))
 			res = 0;
 		break;
 	case MSG_FRAGMENTER:
@@ -1375,14 +1373,13 @@ u32 tipc_link_defer_pkt(struct sk_buff_head *list, struct sk_buff *skb)
 /*
  * link_handle_out_of_seq_msg - handle arrival of out-of-sequence packet
  */
-static void link_handle_out_of_seq_msg(struct net *net,
-				       struct tipc_link *l_ptr,
+static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr,
 				       struct sk_buff *buf)
 {
 	u32 seq_no = buf_seqno(buf);
 
 	if (likely(msg_user(buf_msg(buf)) == LINK_PROTOCOL)) {
-		tipc_link_proto_rcv(net, l_ptr, buf);
+		tipc_link_proto_rcv(l_ptr, buf);
 		return;
 	}
 
@@ -1507,10 +1504,9 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg,
  * Note that network plane id propagates through the network, and may
  * change at any time. The node with lowest address rules
  */
-static void tipc_link_proto_rcv(struct net *net, struct tipc_link *l_ptr,
+static void tipc_link_proto_rcv(struct tipc_link *l_ptr,
 				struct sk_buff *buf)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	u32 rec_gap = 0;
 	u32 max_pkt_info;
 	u32 max_pkt_ack;
@@ -1522,7 +1518,7 @@ static void tipc_link_proto_rcv(struct net *net, struct tipc_link *l_ptr,
 		goto exit;
 
 	if (l_ptr->net_plane != msg_net_plane(msg))
-		if (tn->own_addr > msg_prevnode(msg))
+		if (link_own_addr(l_ptr) > msg_prevnode(msg))
 			l_ptr->net_plane = msg_net_plane(msg);
 
 	switch (msg_type(msg)) {
@@ -1625,7 +1621,7 @@ static void tipc_link_proto_rcv(struct net *net, struct tipc_link *l_ptr,
 
 		/* Protocol message before retransmits, reduce loss risk */
 		if (l_ptr->owner->bclink.recv_permitted)
-			tipc_bclink_update_link_state(net, l_ptr->owner,
+			tipc_bclink_update_link_state(l_ptr->owner,
 						      msg_last_bcast(msg));
 
 		if (rec_gap || (msg_probe(msg))) {
@@ -1690,7 +1686,7 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr)
 	if (!tunnel)
 		return;
 
-	tipc_msg_init(l_ptr->owner->net, &tunnel_hdr, CHANGEOVER_PROTOCOL,
+	tipc_msg_init(link_own_addr(l_ptr), &tunnel_hdr, CHANGEOVER_PROTOCOL,
 		      ORIGINAL_MSG, INT_H_SIZE, l_ptr->addr);
 	msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id);
 	msg_set_msgcnt(&tunnel_hdr, msgcount);
@@ -1748,7 +1744,7 @@ void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr,
 	struct sk_buff *skb;
 	struct tipc_msg tunnel_hdr;
 
-	tipc_msg_init(l_ptr->owner->net, &tunnel_hdr, CHANGEOVER_PROTOCOL,
+	tipc_msg_init(link_own_addr(l_ptr), &tunnel_hdr, CHANGEOVER_PROTOCOL,
 		      DUPLICATE_MSG, INT_H_SIZE, l_ptr->addr);
 	msg_set_msgcnt(&tunnel_hdr, skb_queue_len(&l_ptr->outqueue));
 	msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id);
@@ -1802,7 +1798,7 @@ static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos)
 /* tipc_link_dup_rcv(): Receive a tunnelled DUPLICATE_MSG packet.
  * Owner node is locked.
  */
-static void tipc_link_dup_rcv(struct net *net, struct tipc_link *l_ptr,
+static void tipc_link_dup_rcv(struct tipc_link *l_ptr,
 			      struct sk_buff *t_buf)
 {
 	struct sk_buff *buf;
@@ -1817,7 +1813,7 @@ static void tipc_link_dup_rcv(struct net *net, struct tipc_link *l_ptr,
 	}
 
 	/* Add buffer to deferred queue, if applicable: */
-	link_handle_out_of_seq_msg(net, l_ptr, buf);
+	link_handle_out_of_seq_msg(l_ptr, buf);
 }
 
 /*  tipc_link_failover_rcv(): Receive a tunnelled ORIGINAL_MSG packet
@@ -1869,7 +1865,7 @@ exit:
  *  returned to the active link for delivery upwards.
  *  Owner node is locked.
  */
-static int tipc_link_tunnel_rcv(struct net *net, struct tipc_node *n_ptr,
+static int tipc_link_tunnel_rcv(struct tipc_node *n_ptr,
 				struct sk_buff **buf)
 {
 	struct sk_buff *t_buf = *buf;
@@ -1887,7 +1883,7 @@ static int tipc_link_tunnel_rcv(struct net *net, struct tipc_node *n_ptr,
 		goto exit;
 
 	if (msg_type(t_msg) == DUPLICATE_MSG)
-		tipc_link_dup_rcv(net, l_ptr, t_buf);
+		tipc_link_dup_rcv(l_ptr, t_buf);
 	else if (msg_type(t_msg) == ORIGINAL_MSG)
 		*buf = tipc_link_failover_rcv(l_ptr, t_buf);
 	else
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 3e3432b3044e..5b9a17f26280 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -278,6 +278,10 @@ static inline u32 lesser(u32 left, u32 right)
 	return less_eq(left, right) ? left : right;
 }
 
+static inline u32 link_own_addr(struct tipc_link *l)
+{
+	return msg_prevnode(l->pmsg);
+}
 
 /*
  * Link status checking routines
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index da67c8d3edc6..940d74197b8c 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -70,25 +70,23 @@ struct sk_buff *tipc_buf_acquire(u32 size)
 	return skb;
 }
 
-void tipc_msg_init(struct net *net, struct tipc_msg *m, u32 user, u32 type,
-		   u32 hsize, u32 destnode)
+void tipc_msg_init(u32 own_node, struct tipc_msg *m, u32 user, u32 type,
+		   u32 hsize, u32 dnode)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-
 	memset(m, 0, hsize);
 	msg_set_version(m);
 	msg_set_user(m, user);
 	msg_set_hdr_sz(m, hsize);
 	msg_set_size(m, hsize);
-	msg_set_prevnode(m, tn->own_addr);
+	msg_set_prevnode(m, own_node);
 	msg_set_type(m, type);
 	if (hsize > SHORT_H_SIZE) {
-		msg_set_orignode(m, tn->own_addr);
-		msg_set_destnode(m, destnode);
+		msg_set_orignode(m, own_node);
+		msg_set_destnode(m, dnode);
 	}
 }
 
-struct sk_buff *tipc_msg_create(struct net *net, uint user, uint type,
+struct sk_buff *tipc_msg_create(uint user, uint type,
 				uint hdr_sz, uint data_sz, u32 dnode,
 				u32 onode, u32 dport, u32 oport, int errcode)
 {
@@ -100,9 +98,8 @@ struct sk_buff *tipc_msg_create(struct net *net, uint user, uint type,
 		return NULL;
 
 	msg = buf_msg(buf);
-	tipc_msg_init(net, msg, user, type, hdr_sz, dnode);
+	tipc_msg_init(onode, msg, user, type, hdr_sz, dnode);
 	msg_set_size(msg, hdr_sz + data_sz);
-	msg_set_prevnode(msg, onode);
 	msg_set_origport(msg, oport);
 	msg_set_destport(msg, dport);
 	msg_set_errcode(msg, errcode);
@@ -195,7 +192,7 @@ err:
  *
  * Returns message data size or errno: -ENOMEM, -EFAULT
  */
-int tipc_msg_build(struct net *net, struct tipc_msg *mhdr, struct msghdr *m,
+int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
 		   int offset, int dsz, int pktmax, struct sk_buff_head *list)
 {
 	int mhsz = msg_hdr_sz(mhdr);
@@ -227,8 +224,8 @@ int tipc_msg_build(struct net *net, struct tipc_msg *mhdr, struct msghdr *m,
 	}
 
 	/* Prepare reusable fragment header */
-	tipc_msg_init(net, &pkthdr, MSG_FRAGMENTER, FIRST_FRAGMENT, INT_H_SIZE,
-		      msg_destnode(mhdr));
+	tipc_msg_init(msg_prevnode(mhdr), &pkthdr, MSG_FRAGMENTER,
+		      FIRST_FRAGMENT, INT_H_SIZE, msg_destnode(mhdr));
 	msg_set_size(&pkthdr, pktmax);
 	msg_set_fragm_no(&pkthdr, pktno);
 
@@ -338,7 +335,7 @@ bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu)
  * Replaces buffer if successful
  * Returns true if success, otherwise false
  */
-bool tipc_msg_make_bundle(struct net *net, struct sk_buff_head *list,
+bool tipc_msg_make_bundle(struct sk_buff_head *list,
 			  struct sk_buff *skb, u32 mtu, u32 dnode)
 {
 	struct sk_buff *bskb;
@@ -362,7 +359,8 @@ bool tipc_msg_make_bundle(struct net *net, struct sk_buff_head *list,
 
 	skb_trim(bskb, INT_H_SIZE);
 	bmsg = buf_msg(bskb);
-	tipc_msg_init(net, bmsg, MSG_BUNDLER, 0, INT_H_SIZE, dnode);
+	tipc_msg_init(msg_prevnode(msg), bmsg, MSG_BUNDLER, 0,
+		      INT_H_SIZE, dnode);
 	msg_set_seqno(bmsg, msg_seqno(msg));
 	msg_set_ack(bmsg, msg_ack(msg));
 	msg_set_bcast_ack(bmsg, msg_bcast_ack(msg));
@@ -379,10 +377,9 @@ bool tipc_msg_make_bundle(struct net *net, struct sk_buff_head *list,
  * Consumes buffer if failure
  * Returns true if success, otherwise false
  */
-bool tipc_msg_reverse(struct net *net, struct sk_buff *buf, u32 *dnode,
+bool tipc_msg_reverse(u32 own_addr,  struct sk_buff *buf, u32 *dnode,
 		      int err)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_msg *msg = buf_msg(buf);
 	uint imp = msg_importance(msg);
 	struct tipc_msg ohdr;
@@ -402,7 +399,7 @@ bool tipc_msg_reverse(struct net *net, struct sk_buff *buf, u32 *dnode,
 	msg_set_errcode(msg, err);
 	msg_set_origport(msg, msg_destport(&ohdr));
 	msg_set_destport(msg, msg_origport(&ohdr));
-	msg_set_prevnode(msg, tn->own_addr);
+	msg_set_prevnode(msg, own_addr);
 	if (!msg_short(msg)) {
 		msg_set_orignode(msg, msg_destnode(&ohdr));
 		msg_set_destnode(msg, msg_orignode(&ohdr));
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 526ef345b70e..f7ea95458c6f 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -748,19 +748,19 @@ static inline u32 msg_tot_origport(struct tipc_msg *m)
 }
 
 struct sk_buff *tipc_buf_acquire(u32 size);
-bool tipc_msg_reverse(struct net *net, struct sk_buff *buf, u32 *dnode,
+bool tipc_msg_reverse(u32 own_addr, struct sk_buff *buf, u32 *dnode,
 		      int err);
 int tipc_msg_eval(struct net *net, struct sk_buff *buf, u32 *dnode);
-void tipc_msg_init(struct net *net, struct tipc_msg *m, u32 user, u32 type,
+void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type,
 		   u32 hsize, u32 destnode);
-struct sk_buff *tipc_msg_create(struct net *net, uint user, uint type,
+struct sk_buff *tipc_msg_create(uint user, uint type,
 				uint hdr_sz, uint data_sz, u32 dnode,
 				u32 onode, u32 dport, u32 oport, int errcode);
 int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf);
 bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu);
-bool tipc_msg_make_bundle(struct net *net, struct sk_buff_head *list,
+bool tipc_msg_make_bundle(struct sk_buff_head *list,
 			  struct sk_buff *skb, u32 mtu, u32 dnode);
-int tipc_msg_build(struct net *net, struct tipc_msg *mhdr, struct msghdr *m,
+int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
 		   int offset, int dsz, int mtu, struct sk_buff_head *list);
 struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list);
 
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 7f31cd4badc4..dd8564cd9dbb 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -71,13 +71,14 @@ static void publ_to_item(struct distr_item *i, struct publication *p)
 static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size,
 					 u32 dest)
 {
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE + size);
 	struct tipc_msg *msg;
 
 	if (buf != NULL) {
 		msg = buf_msg(buf);
-		tipc_msg_init(net, msg, NAME_DISTRIBUTOR, type, INT_H_SIZE,
-			      dest);
+		tipc_msg_init(tn->own_addr, msg, NAME_DISTRIBUTOR, type,
+			      INT_H_SIZE, dest);
 		msg_set_size(msg, INT_H_SIZE + size);
 	}
 	return buf;
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 842bd7ad4b17..1c409c45f0fe 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -204,7 +204,7 @@ void tipc_node_abort_sock_conns(struct net *net, struct list_head *conns)
 	struct sk_buff *buf;
 
 	list_for_each_entry_safe(conn, safe, conns, list) {
-		buf = tipc_msg_create(net, TIPC_CRITICAL_IMPORTANCE,
+		buf = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
 				      TIPC_CONN_MSG, SHORT_H_SIZE, 0,
 				      tn->own_addr, conn->peer_node,
 				      conn->port, conn->peer_port,
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index caa4d663fd90..b384e658dfeb 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -177,6 +177,11 @@ static const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = {
  *   - port reference
  */
 
+static u32 tsk_own_node(struct tipc_sock *tsk)
+{
+	return msg_prevnode(&tsk->phdr);
+}
+
 static u32 tsk_peer_node(struct tipc_sock *tsk)
 {
 	return msg_destnode(&tsk->phdr);
@@ -249,11 +254,11 @@ static void tsk_rej_rx_queue(struct sock *sk)
 {
 	struct sk_buff *skb;
 	u32 dnode;
-	struct net *net = sock_net(sk);
+	u32 own_node = tsk_own_node(tipc_sk(sk));
 
 	while ((skb = __skb_dequeue(&sk->sk_receive_queue))) {
-		if (tipc_msg_reverse(net, skb, &dnode, TIPC_ERR_NO_PORT))
-			tipc_link_xmit_skb(net, skb, dnode, 0);
+		if (tipc_msg_reverse(own_node, skb, &dnode, TIPC_ERR_NO_PORT))
+			tipc_link_xmit_skb(sock_net(sk), skb, dnode, 0);
 	}
 }
 
@@ -305,6 +310,7 @@ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg)
 static int tipc_sk_create(struct net *net, struct socket *sock,
 			  int protocol, int kern)
 {
+	struct tipc_net *tn;
 	const struct proto_ops *ops;
 	socket_state state;
 	struct sock *sk;
@@ -346,7 +352,8 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
 	tsk->max_pkt = MAX_PKT_DEFAULT;
 	INIT_LIST_HEAD(&tsk->publications);
 	msg = &tsk->phdr;
-	tipc_msg_init(net, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG,
+	tn = net_generic(sock_net(sk), tipc_net_id);
+	tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG,
 		      NAMED_H_SIZE, 0);
 
 	/* Finish initializing socket data structures */
@@ -471,7 +478,6 @@ static int tipc_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
 	struct net *net;
-	struct tipc_net *tn;
 	struct tipc_sock *tsk;
 	struct sk_buff *skb;
 	u32 dnode, probing_state;
@@ -484,8 +490,6 @@ static int tipc_release(struct socket *sock)
 		return 0;
 
 	net = sock_net(sk);
-	tn = net_generic(net, tipc_net_id);
-
 	tsk = tipc_sk(sk);
 	lock_sock(sk);
 
@@ -507,7 +511,7 @@ static int tipc_release(struct socket *sock)
 				tsk->connected = 0;
 				tipc_node_remove_conn(net, dnode, tsk->portid);
 			}
-			if (tipc_msg_reverse(net, skb, &dnode,
+			if (tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode,
 					     TIPC_ERR_NO_PORT))
 				tipc_link_xmit_skb(net, skb, dnode, 0);
 		}
@@ -520,9 +524,9 @@ static int tipc_release(struct socket *sock)
 		sock_put(sk);
 	tipc_sk_remove(tsk);
 	if (tsk->connected) {
-		skb = tipc_msg_create(net, TIPC_CRITICAL_IMPORTANCE,
+		skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
 				      TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode,
-				      tn->own_addr, tsk_peer_port(tsk),
+				      tsk_own_node(tsk), tsk_peer_port(tsk),
 				      tsk->portid, TIPC_ERR_NO_PORT);
 		if (skb)
 			tipc_link_xmit_skb(net, skb, dnode, tsk->portid);
@@ -730,8 +734,9 @@ static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
 			  struct msghdr *msg, size_t dsz, long timeo)
 {
 	struct sock *sk = sock->sk;
+	struct tipc_sock *tsk = tipc_sk(sk);
 	struct net *net = sock_net(sk);
-	struct tipc_msg *mhdr = &tipc_sk(sk)->phdr;
+	struct tipc_msg *mhdr = &tsk->phdr;
 	struct sk_buff_head head;
 	struct iov_iter save = msg->msg_iter;
 	uint mtu;
@@ -749,7 +754,7 @@ static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
 new_mtu:
 	mtu = tipc_bclink_get_mtu();
 	__skb_queue_head_init(&head);
-	rc = tipc_msg_build(net, mhdr, msg, 0, dsz, mtu, &head);
+	rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, &head);
 	if (unlikely(rc < 0))
 		return rc;
 
@@ -836,7 +841,7 @@ static int tipc_sk_proto_rcv(struct tipc_sock *tsk, u32 *dnode,
 		if (conn_cong)
 			tsk->sk.sk_write_space(&tsk->sk);
 	} else if (msg_type(msg) == CONN_PROBE) {
-		if (!tipc_msg_reverse(sock_net(&tsk->sk), buf, dnode, TIPC_OK))
+		if (!tipc_msg_reverse(tsk_own_node(tsk), buf, dnode, TIPC_OK))
 			return TIPC_OK;
 		msg_set_type(msg, CONN_PROBE_REPLY);
 		return TIPC_FWD_MSG;
@@ -971,7 +976,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock,
 new_mtu:
 	mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
 	__skb_queue_head_init(&head);
-	rc = tipc_msg_build(net, mhdr, m, 0, dsz, mtu, &head);
+	rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, &head);
 	if (rc < 0)
 		goto exit;
 
@@ -1090,7 +1095,7 @@ next:
 	mtu = tsk->max_pkt;
 	send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE);
 	__skb_queue_head_init(&head);
-	rc = tipc_msg_build(net, mhdr, m, sent, send, mtu, &head);
+	rc = tipc_msg_build(mhdr, m, sent, send, mtu, &head);
 	if (unlikely(rc < 0))
 		goto exit;
 	do {
@@ -1263,7 +1268,6 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
 static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack)
 {
 	struct net *net = sock_net(&tsk->sk);
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct sk_buff *skb = NULL;
 	struct tipc_msg *msg;
 	u32 peer_port = tsk_peer_port(tsk);
@@ -1271,9 +1275,9 @@ static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack)
 
 	if (!tsk->connected)
 		return;
-	skb = tipc_msg_create(net, CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0,
-			      dnode, tn->own_addr, peer_port, tsk->portid,
-			      TIPC_OK);
+	skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0,
+			      dnode, tsk_own_node(tsk), peer_port,
+			      tsk->portid, TIPC_OK);
 	if (!skb)
 		return;
 	msg = buf_msg(skb);
@@ -1756,7 +1760,7 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 		return 0;
 	}
 
-	if ((rc < 0) && !tipc_msg_reverse(net, skb, &onode, -rc))
+	if ((rc < 0) && !tipc_msg_reverse(tsk_own_node(tsk), skb, &onode, -rc))
 		return 0;
 
 	tipc_link_xmit_skb(net, skb, onode, 0);
@@ -1773,6 +1777,7 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 int tipc_sk_rcv(struct net *net, struct sk_buff *skb)
 {
 	struct tipc_sock *tsk;
+	struct tipc_net *tn;
 	struct sock *sk;
 	u32 dport = msg_destport(buf_msg(skb));
 	int rc = TIPC_OK;
@@ -1804,7 +1809,8 @@ int tipc_sk_rcv(struct net *net, struct sk_buff *skb)
 	if (likely(!rc))
 		return 0;
 exit:
-	if ((rc < 0) && !tipc_msg_reverse(net, skb, &dnode, -rc))
+	tn = net_generic(net, tipc_net_id);
+	if ((rc < 0) && !tipc_msg_reverse(tn->own_addr, skb, &dnode, -rc))
 		return -EHOSTUNREACH;
 
 	tipc_link_xmit_skb(net, skb, dnode, 0);
@@ -2065,7 +2071,6 @@ static int tipc_shutdown(struct socket *sock, int how)
 {
 	struct sock *sk = sock->sk;
 	struct net *net = sock_net(sk);
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_sock *tsk = tipc_sk(sk);
 	struct sk_buff *skb;
 	u32 dnode;
@@ -2088,16 +2093,17 @@ restart:
 				kfree_skb(skb);
 				goto restart;
 			}
-			if (tipc_msg_reverse(net, skb, &dnode,
+			if (tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode,
 					     TIPC_CONN_SHUTDOWN))
 				tipc_link_xmit_skb(net, skb, dnode,
 						   tsk->portid);
 			tipc_node_remove_conn(net, dnode, tsk->portid);
 		} else {
 			dnode = tsk_peer_node(tsk);
-			skb = tipc_msg_create(net, TIPC_CRITICAL_IMPORTANCE,
+
+			skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
 					      TIPC_CONN_MSG, SHORT_H_SIZE,
-					      0, dnode, tn->own_addr,
+					      0, dnode, tsk_own_node(tsk),
 					      tsk_peer_port(tsk),
 					      tsk->portid, TIPC_CONN_SHUTDOWN);
 			tipc_link_xmit_skb(net, skb, dnode, tsk->portid);
@@ -2129,10 +2135,9 @@ static void tipc_sk_timeout(unsigned long data)
 {
 	struct tipc_sock *tsk = (struct tipc_sock *)data;
 	struct sock *sk = &tsk->sk;
-	struct net *net = sock_net(sk);
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct sk_buff *skb = NULL;
 	u32 peer_port, peer_node;
+	u32 own_node = tsk_own_node(tsk);
 
 	bh_lock_sock(sk);
 	if (!tsk->connected) {
@@ -2144,13 +2149,13 @@ static void tipc_sk_timeout(unsigned long data)
 
 	if (tsk->probing_state == TIPC_CONN_PROBING) {
 		/* Previous probe not answered -> self abort */
-		skb = tipc_msg_create(net, TIPC_CRITICAL_IMPORTANCE,
+		skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
 				      TIPC_CONN_MSG, SHORT_H_SIZE, 0,
-				      tn->own_addr, peer_node, tsk->portid,
+				      own_node, peer_node, tsk->portid,
 				      peer_port, TIPC_ERR_NO_PORT);
 	} else {
-		skb = tipc_msg_create(net, CONN_MANAGER, CONN_PROBE, INT_H_SIZE,
-				      0, peer_node, tn->own_addr,
+		skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE,
+				      INT_H_SIZE, 0, peer_node, own_node,
 				      peer_port, tsk->portid, TIPC_OK);
 		tsk->probing_state = TIPC_CONN_PROBING;
 		sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv);
-- 
cgit v1.2.3


From 1186adf7df04e3b4298943fe89d9741ab42e30ff Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Thu, 5 Feb 2015 08:36:37 -0500
Subject: tipc: simplify message forwarding and rejection in socket layer

Despite recent improvements, the handling of error codes and return
values at reception of messages in the socket layer is still confusing.

In this commit, we try to make it more comprehensible. First, we
separate between the return values coming from the functions called
by tipc_sk_rcv(), -those are TIPC specific error codes, and the
return values returned by tipc_sk_rcv() itself. Second, we don't
use the returned TIPC error code as indication for whether a buffer
should be forwarded/rejected or not; instead we use the buffer pointer
passed along with filter_msg(). This separation is necessary because
we sometimes want to forward messages even when there is no error
(i.e., protocol messages and successfully secondary looked up data
messages).

Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 120 ++++++++++++++++++++++++++----------------------------
 1 file changed, 58 insertions(+), 62 deletions(-)

(limited to 'net')

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index b384e658dfeb..f9cd587e4090 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -818,17 +818,14 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff *buf)
 /**
  * tipc_sk_proto_rcv - receive a connection mng protocol message
  * @tsk: receiving socket
- * @dnode: node to send response message to, if any
- * @buf: buffer containing protocol message
- * Returns 0 (TIPC_OK) if message was consumed, 1 (TIPC_FWD_MSG) if
- * (CONN_PROBE_REPLY) message should be forwarded.
+ * @skb: pointer to message buffer. Set to NULL if buffer is consumed.
  */
-static int tipc_sk_proto_rcv(struct tipc_sock *tsk, u32 *dnode,
-			     struct sk_buff *buf)
+static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff **skb)
 {
-	struct tipc_msg *msg = buf_msg(buf);
+	struct tipc_msg *msg = buf_msg(*skb);
 	int conn_cong;
-
+	u32 dnode;
+	u32 own_node = tsk_own_node(tsk);
 	/* Ignore if connection cannot be validated: */
 	if (!tsk_peer_msg(tsk, msg))
 		goto exit;
@@ -841,15 +838,15 @@ static int tipc_sk_proto_rcv(struct tipc_sock *tsk, u32 *dnode,
 		if (conn_cong)
 			tsk->sk.sk_write_space(&tsk->sk);
 	} else if (msg_type(msg) == CONN_PROBE) {
-		if (!tipc_msg_reverse(tsk_own_node(tsk), buf, dnode, TIPC_OK))
-			return TIPC_OK;
-		msg_set_type(msg, CONN_PROBE_REPLY);
-		return TIPC_FWD_MSG;
+		if (tipc_msg_reverse(own_node, *skb, &dnode, TIPC_OK)) {
+			msg_set_type(msg, CONN_PROBE_REPLY);
+			return;
+		}
 	}
 	/* Do nothing if msg_type() == CONN_PROBE_REPLY */
 exit:
-	kfree_skb(buf);
-	return TIPC_OK;
+	kfree_skb(*skb);
+	*skb = NULL;
 }
 
 static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p)
@@ -1568,16 +1565,16 @@ static void tipc_data_ready(struct sock *sk)
 /**
  * filter_connect - Handle all incoming messages for a connection-based socket
  * @tsk: TIPC socket
- * @msg: message
+ * @skb: pointer to message buffer. Set to NULL if buffer is consumed
  *
  * Returns 0 (TIPC_OK) if everything ok, -TIPC_ERR_NO_PORT otherwise
  */
-static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf)
+static int filter_connect(struct tipc_sock *tsk, struct sk_buff **skb)
 {
 	struct sock *sk = &tsk->sk;
 	struct net *net = sock_net(sk);
 	struct socket *sock = sk->sk_socket;
-	struct tipc_msg *msg = buf_msg(*buf);
+	struct tipc_msg *msg = buf_msg(*skb);
 	int retval = -TIPC_ERR_NO_PORT;
 
 	if (msg_mcast(msg))
@@ -1627,8 +1624,8 @@ static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf)
 		 * connect() routine if sleeping.
 		 */
 		if (msg_data_sz(msg) == 0) {
-			kfree_skb(*buf);
-			*buf = NULL;
+			kfree_skb(*skb);
+			*skb = NULL;
 			if (waitqueue_active(sk_sleep(sk)))
 				wake_up_interruptible(sk_sleep(sk));
 		}
@@ -1680,32 +1677,33 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf)
 /**
  * filter_rcv - validate incoming message
  * @sk: socket
- * @buf: message
+ * @skb: pointer to message. Set to NULL if buffer is consumed.
  *
  * Enqueues message on receive queue if acceptable; optionally handles
  * disconnect indication for a connected socket.
  *
- * Called with socket lock already taken; port lock may also be taken.
+ * Called with socket lock already taken
  *
- * Returns 0 (TIPC_OK) if message was consumed, -TIPC error code if message
- * to be rejected, 1 (TIPC_FWD_MSG) if (CONN_MANAGER) message to be forwarded
+ * Returns 0 (TIPC_OK) if message was ok, -TIPC error code if rejected
  */
-static int filter_rcv(struct sock *sk, struct sk_buff *buf)
+static int filter_rcv(struct sock *sk, struct sk_buff **skb)
 {
 	struct socket *sock = sk->sk_socket;
 	struct tipc_sock *tsk = tipc_sk(sk);
-	struct tipc_msg *msg = buf_msg(buf);
-	unsigned int limit = rcvbuf_limit(sk, buf);
-	u32 onode;
+	struct tipc_msg *msg = buf_msg(*skb);
+	unsigned int limit = rcvbuf_limit(sk, *skb);
 	int rc = TIPC_OK;
 
-	if (unlikely(msg_user(msg) == CONN_MANAGER))
-		return tipc_sk_proto_rcv(tsk, &onode, buf);
+	if (unlikely(msg_user(msg) == CONN_MANAGER)) {
+		tipc_sk_proto_rcv(tsk, skb);
+		return TIPC_OK;
+	}
 
 	if (unlikely(msg_user(msg) == SOCK_WAKEUP)) {
-		kfree_skb(buf);
+		kfree_skb(*skb);
 		tsk->link_cong = 0;
 		sk->sk_write_space(sk);
+		*skb = NULL;
 		return TIPC_OK;
 	}
 
@@ -1717,21 +1715,22 @@ static int filter_rcv(struct sock *sk, struct sk_buff *buf)
 		if (msg_connected(msg))
 			return -TIPC_ERR_NO_PORT;
 	} else {
-		rc = filter_connect(tsk, &buf);
-		if (rc != TIPC_OK || buf == NULL)
+		rc = filter_connect(tsk, skb);
+		if (rc != TIPC_OK || !*skb)
 			return rc;
 	}
 
 	/* Reject message if there isn't room to queue it */
-	if (sk_rmem_alloc_get(sk) + buf->truesize >= limit)
+	if (sk_rmem_alloc_get(sk) + (*skb)->truesize >= limit)
 		return -TIPC_ERR_OVERLOAD;
 
 	/* Enqueue message */
-	TIPC_SKB_CB(buf)->handle = NULL;
-	__skb_queue_tail(&sk->sk_receive_queue, buf);
-	skb_set_owner_r(buf, sk);
+	TIPC_SKB_CB(*skb)->handle = NULL;
+	__skb_queue_tail(&sk->sk_receive_queue, *skb);
+	skb_set_owner_r(*skb, sk);
 
 	sk->sk_data_ready(sk);
+	*skb = NULL;
 	return TIPC_OK;
 }
 
@@ -1746,25 +1745,22 @@ static int filter_rcv(struct sock *sk, struct sk_buff *buf)
  */
 static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 {
-	int rc;
-	u32 onode;
+	int err;
+	atomic_t *dcnt;
+	u32 dnode;
 	struct tipc_sock *tsk = tipc_sk(sk);
 	struct net *net = sock_net(sk);
 	uint truesize = skb->truesize;
 
-	rc = filter_rcv(sk, skb);
-
-	if (likely(!rc)) {
-		if (atomic_read(&tsk->dupl_rcvcnt) < TIPC_CONN_OVERLOAD_LIMIT)
-			atomic_add(truesize, &tsk->dupl_rcvcnt);
+	err = filter_rcv(sk, &skb);
+	if (likely(!skb)) {
+		dcnt = &tsk->dupl_rcvcnt;
+		if (atomic_read(dcnt) < TIPC_CONN_OVERLOAD_LIMIT)
+			atomic_add(truesize, dcnt);
 		return 0;
 	}
-
-	if ((rc < 0) && !tipc_msg_reverse(tsk_own_node(tsk), skb, &onode, -rc))
-		return 0;
-
-	tipc_link_xmit_skb(net, skb, onode, 0);
-
+	if (!err || tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode, -err))
+		tipc_link_xmit_skb(net, skb, dnode, tsk->portid);
 	return 0;
 }
 
@@ -1780,14 +1776,14 @@ int tipc_sk_rcv(struct net *net, struct sk_buff *skb)
 	struct tipc_net *tn;
 	struct sock *sk;
 	u32 dport = msg_destport(buf_msg(skb));
-	int rc = TIPC_OK;
+	int err = TIPC_OK;
 	uint limit;
 	u32 dnode;
 
 	/* Validate destination and message */
 	tsk = tipc_sk_lookup(net, dport);
 	if (unlikely(!tsk)) {
-		rc = tipc_msg_eval(net, skb, &dnode);
+		err = tipc_msg_eval(net, skb, &dnode);
 		goto exit;
 	}
 	sk = &tsk->sk;
@@ -1796,25 +1792,25 @@ int tipc_sk_rcv(struct net *net, struct sk_buff *skb)
 	spin_lock_bh(&sk->sk_lock.slock);
 
 	if (!sock_owned_by_user(sk)) {
-		rc = filter_rcv(sk, skb);
+		err = filter_rcv(sk, &skb);
 	} else {
 		if (sk->sk_backlog.len == 0)
 			atomic_set(&tsk->dupl_rcvcnt, 0);
 		limit = rcvbuf_limit(sk, skb) + atomic_read(&tsk->dupl_rcvcnt);
-		if (sk_add_backlog(sk, skb, limit))
-			rc = -TIPC_ERR_OVERLOAD;
+		if (likely(!sk_add_backlog(sk, skb, limit)))
+			skb = NULL;
+		else
+			err = -TIPC_ERR_OVERLOAD;
 	}
 	spin_unlock_bh(&sk->sk_lock.slock);
 	sock_put(sk);
-	if (likely(!rc))
-		return 0;
 exit:
-	tn = net_generic(net, tipc_net_id);
-	if ((rc < 0) && !tipc_msg_reverse(tn->own_addr, skb, &dnode, -rc))
-		return -EHOSTUNREACH;
-
-	tipc_link_xmit_skb(net, skb, dnode, 0);
-	return (rc < 0) ? -EHOSTUNREACH : 0;
+	if (unlikely(skb)) {
+		tn = net_generic(net, tipc_net_id);
+		if (!err || tipc_msg_reverse(tn->own_addr, skb, &dnode, -err))
+			tipc_link_xmit_skb(net, skb, dnode, 0);
+	}
+	return err ? -EHOSTUNREACH : 0;
 }
 
 static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
-- 
cgit v1.2.3


From d570d86497eeb11410b1c096d82ade11bcdd966c Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Thu, 5 Feb 2015 08:36:38 -0500
Subject: tipc: enqueue arrived buffers in socket in separate function

The code for enqueuing arriving buffers in the function tipc_sk_rcv()
contains long code lines and currently goes to two indentation levels.
As a cosmetic preparaton for the next commits, we break it out into
a separate function.

Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 46 +++++++++++++++++++++++++++++++---------------
 1 file changed, 31 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index f9cd587e4090..1d98bfcda6f6 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1764,6 +1764,35 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 	return 0;
 }
 
+/**
+ * tipc_sk_enqueue_skb - enqueue buffer to socket or backlog queue
+ * @sk: socket
+ * @skb: pointer to message. Set to NULL if buffer is consumed.
+ * @dnode: if buffer should be forwarded/returned, send to this node
+ *
+ * Caller must hold socket lock
+ *
+ * Returns TIPC_OK (0) or -tipc error code
+ */
+static int tipc_sk_enqueue_skb(struct sock *sk, struct sk_buff **skb)
+{
+	unsigned int lim;
+	atomic_t *dcnt;
+
+	if (unlikely(!*skb))
+		return TIPC_OK;
+	if (!sock_owned_by_user(sk))
+		return filter_rcv(sk, skb);
+	dcnt = &tipc_sk(sk)->dupl_rcvcnt;
+	if (sk->sk_backlog.len)
+		atomic_set(dcnt, 0);
+	lim = rcvbuf_limit(sk, *skb) + atomic_read(dcnt);
+	if (unlikely(sk_add_backlog(sk, *skb, lim)))
+		return -TIPC_ERR_OVERLOAD;
+	*skb = NULL;
+	return TIPC_OK;
+}
+
 /**
  * tipc_sk_rcv - handle incoming message
  * @skb: buffer containing arriving message
@@ -1776,8 +1805,7 @@ int tipc_sk_rcv(struct net *net, struct sk_buff *skb)
 	struct tipc_net *tn;
 	struct sock *sk;
 	u32 dport = msg_destport(buf_msg(skb));
-	int err = TIPC_OK;
-	uint limit;
+	int err;
 	u32 dnode;
 
 	/* Validate destination and message */
@@ -1788,20 +1816,8 @@ int tipc_sk_rcv(struct net *net, struct sk_buff *skb)
 	}
 	sk = &tsk->sk;
 
-	/* Queue message */
 	spin_lock_bh(&sk->sk_lock.slock);
-
-	if (!sock_owned_by_user(sk)) {
-		err = filter_rcv(sk, &skb);
-	} else {
-		if (sk->sk_backlog.len == 0)
-			atomic_set(&tsk->dupl_rcvcnt, 0);
-		limit = rcvbuf_limit(sk, skb) + atomic_read(&tsk->dupl_rcvcnt);
-		if (likely(!sk_add_backlog(sk, skb, limit)))
-			skb = NULL;
-		else
-			err = -TIPC_ERR_OVERLOAD;
-	}
+	err = tipc_sk_enqueue_skb(sk, &skb);
 	spin_unlock_bh(&sk->sk_lock.slock);
 	sock_put(sk);
 exit:
-- 
cgit v1.2.3


From e3a77561e7d326e18881ef3cb84807892b353459 Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Thu, 5 Feb 2015 08:36:39 -0500
Subject: tipc: split up function tipc_msg_eval()

The function tipc_msg_eval() is in reality doing two related, but
different tasks. First it tries to find a new destination for named
messages, in case there was no first lookup, or if the first lookup
failed. Second, it does what its name suggests, evaluating the validity
of the message and its destination, and returning an appropriate error
code depending on the result.

This is confusing, and in this commit we choose to break it up into two
functions. A new function, tipc_msg_lookup_dest(), first attempts to find
a new destination, if the message is of the right type. If this lookup
fails, or if the message should not be subject to a second lookup, the
already existing tipc_msg_reverse() is called. This function performs
prepares the message for rejection, if applicable.

Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/msg.c    | 42 +++++++++++++++++++++---------------------
 net/tipc/msg.h    |  9 +++++----
 net/tipc/socket.c | 40 ++++++++++++++++++++++------------------
 3 files changed, 48 insertions(+), 43 deletions(-)

(limited to 'net')

diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 940d74197b8c..697223a21240 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -411,43 +411,43 @@ bool tipc_msg_reverse(u32 own_addr,  struct sk_buff *buf, u32 *dnode,
 	return true;
 exit:
 	kfree_skb(buf);
+	*dnode = 0;
 	return false;
 }
 
 /**
- * tipc_msg_eval: determine fate of message that found no destination
- * @buf: the buffer containing the message.
- * @dnode: return value: next-hop node, if message to be forwarded
- * @err: error code to use, if message to be rejected
- *
+ * tipc_msg_lookup_dest(): try to find new destination for named message
+ * @skb: the buffer containing the message.
+ * @dnode: return value: next-hop node, if destination found
+ * @err: return value: error code to use, if message to be rejected
  * Does not consume buffer
- * Returns 0 (TIPC_OK) if message ok and we can try again, -TIPC error
- * code if message to be rejected
+ * Returns true if a destination is found, false otherwise
  */
-int tipc_msg_eval(struct net *net, struct sk_buff *buf, u32 *dnode)
+bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb,
+			  u32 *dnode, int *err)
 {
-	struct tipc_msg *msg = buf_msg(buf);
+	struct tipc_msg *msg = buf_msg(skb);
 	u32 dport;
 
-	if (msg_type(msg) != TIPC_NAMED_MSG)
-		return -TIPC_ERR_NO_PORT;
-	if (skb_linearize(buf))
-		return -TIPC_ERR_NO_NAME;
-	if (msg_data_sz(msg) > MAX_FORWARD_SIZE)
-		return -TIPC_ERR_NO_NAME;
+	if (!msg_isdata(msg))
+		return false;
+	if (!msg_named(msg))
+		return false;
+	*err = -TIPC_ERR_NO_NAME;
+	if (skb_linearize(skb))
+		return false;
 	if (msg_reroute_cnt(msg) > 0)
-		return -TIPC_ERR_NO_NAME;
-
+		return false;
 	*dnode = addr_domain(net, msg_lookup_scope(msg));
 	dport = tipc_nametbl_translate(net, msg_nametype(msg),
-				       msg_nameinst(msg),
-				       dnode);
+				       msg_nameinst(msg), dnode);
 	if (!dport)
-		return -TIPC_ERR_NO_NAME;
+		return false;
 	msg_incr_reroute_cnt(msg);
 	msg_set_destnode(msg, *dnode);
 	msg_set_destport(msg, dport);
-	return TIPC_OK;
+	*err = TIPC_OK;
+	return true;
 }
 
 /* tipc_msg_reassemble() - clone a buffer chain of fragments and
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index f7ea95458c6f..60702992933d 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -750,18 +750,19 @@ static inline u32 msg_tot_origport(struct tipc_msg *m)
 struct sk_buff *tipc_buf_acquire(u32 size);
 bool tipc_msg_reverse(u32 own_addr, struct sk_buff *buf, u32 *dnode,
 		      int err);
-int tipc_msg_eval(struct net *net, struct sk_buff *buf, u32 *dnode);
 void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type,
 		   u32 hsize, u32 destnode);
-struct sk_buff *tipc_msg_create(uint user, uint type,
-				uint hdr_sz, uint data_sz, u32 dnode,
-				u32 onode, u32 dport, u32 oport, int errcode);
+struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz,
+				uint data_sz, u32 dnode, u32 onode,
+				u32 dport, u32 oport, int errcode);
 int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf);
 bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu);
 bool tipc_msg_make_bundle(struct sk_buff_head *list,
 			  struct sk_buff *skb, u32 mtu, u32 dnode);
 int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
 		   int offset, int dsz, int mtu, struct sk_buff_head *list);
+bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, u32 *dnode,
+			  int *err);
 struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list);
 
 #endif
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 1d98bfcda6f6..e14b2aedb212 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1739,7 +1739,7 @@ static int filter_rcv(struct sock *sk, struct sk_buff **skb)
  * @sk: socket
  * @skb: message
  *
- * Caller must hold socket lock, but not port lock.
+ * Caller must hold socket lock
  *
  * Returns 0
  */
@@ -1805,27 +1805,31 @@ int tipc_sk_rcv(struct net *net, struct sk_buff *skb)
 	struct tipc_net *tn;
 	struct sock *sk;
 	u32 dport = msg_destport(buf_msg(skb));
-	int err;
+	int err = -TIPC_ERR_NO_PORT;
 	u32 dnode;
 
-	/* Validate destination and message */
+	/* Find destination */
 	tsk = tipc_sk_lookup(net, dport);
-	if (unlikely(!tsk)) {
-		err = tipc_msg_eval(net, skb, &dnode);
-		goto exit;
-	}
-	sk = &tsk->sk;
-
-	spin_lock_bh(&sk->sk_lock.slock);
-	err = tipc_sk_enqueue_skb(sk, &skb);
-	spin_unlock_bh(&sk->sk_lock.slock);
-	sock_put(sk);
-exit:
-	if (unlikely(skb)) {
-		tn = net_generic(net, tipc_net_id);
-		if (!err || tipc_msg_reverse(tn->own_addr, skb, &dnode, -err))
-			tipc_link_xmit_skb(net, skb, dnode, 0);
+	if (likely(tsk)) {
+		sk = &tsk->sk;
+		spin_lock_bh(&sk->sk_lock.slock);
+		err = tipc_sk_enqueue_skb(sk, &skb);
+		spin_unlock_bh(&sk->sk_lock.slock);
+		sock_put(sk);
 	}
+	if (likely(!skb))
+		return 0;
+	if (tipc_msg_lookup_dest(net, skb, &dnode, &err))
+		goto xmit;
+	if (!err) {
+		dnode = msg_destnode(buf_msg(skb));
+		goto xmit;
+	}
+	tn = net_generic(net, tipc_net_id);
+	if (!tipc_msg_reverse(tn->own_addr, skb, &dnode, -err))
+		return -EHOSTUNREACH;
+xmit:
+	tipc_link_xmit_skb(net, skb, dnode, dport);
 	return err ? -EHOSTUNREACH : 0;
 }
 
-- 
cgit v1.2.3


From 94153e36e709e78fc4e1f93dc4e4da785690c7d1 Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Thu, 5 Feb 2015 08:36:40 -0500
Subject: tipc: use existing sk_write_queue for outgoing packet chain

The list for outgoing traffic buffers from a socket is currently
allocated on the stack. This forces us to initialize the queue for
each sent message, something costing extra CPU cycles in the most
critical data path. Later in this series we will introduce a new
safe input buffer queue, something that would force us to initialize
even the spinlock of the outgoing queue. A closer analysis reveals
that the queue always is filled and emptied within the same lock_sock()
session. It is therefore safe to use a queue aggregated in the socket
itself for this purpose. Since there already exists a queue for this
in struct sock, sk_write_queue, we introduce use of that queue in
this commit.

Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 31 +++++++++++++------------------
 1 file changed, 13 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index e14b2aedb212..611a04fb0ddc 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -69,8 +69,6 @@
  * @pub_count: total # of publications port has made during its lifetime
  * @probing_state:
  * @probing_intv:
- * @port: port - interacts with 'sk' and with the rest of the TIPC stack
- * @peer_name: the peer of the connection, if any
  * @conn_timeout: the time we can wait for an unresponded setup request
  * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
  * @link_cong: non-zero if owner must sleep because of link congestion
@@ -737,7 +735,7 @@ static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
 	struct tipc_sock *tsk = tipc_sk(sk);
 	struct net *net = sock_net(sk);
 	struct tipc_msg *mhdr = &tsk->phdr;
-	struct sk_buff_head head;
+	struct sk_buff_head *pktchain = &sk->sk_write_queue;
 	struct iov_iter save = msg->msg_iter;
 	uint mtu;
 	int rc;
@@ -753,13 +751,12 @@ static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
 
 new_mtu:
 	mtu = tipc_bclink_get_mtu();
-	__skb_queue_head_init(&head);
-	rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, &head);
+	rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, pktchain);
 	if (unlikely(rc < 0))
 		return rc;
 
 	do {
-		rc = tipc_bclink_xmit(net, &head);
+		rc = tipc_bclink_xmit(net, pktchain);
 		if (likely(rc >= 0)) {
 			rc = dsz;
 			break;
@@ -773,7 +770,7 @@ new_mtu:
 		tipc_sk(sk)->link_cong = 1;
 		rc = tipc_wait_for_sndmsg(sock, &timeo);
 		if (rc)
-			__skb_queue_purge(&head);
+			__skb_queue_purge(pktchain);
 	} while (!rc);
 	return rc;
 }
@@ -897,7 +894,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock,
 	struct net *net = sock_net(sk);
 	struct tipc_msg *mhdr = &tsk->phdr;
 	u32 dnode, dport;
-	struct sk_buff_head head;
+	struct sk_buff_head *pktchain = &sk->sk_write_queue;
 	struct sk_buff *skb;
 	struct tipc_name_seq *seq = &dest->addr.nameseq;
 	struct iov_iter save;
@@ -972,15 +969,14 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock,
 	save = m->msg_iter;
 new_mtu:
 	mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
-	__skb_queue_head_init(&head);
-	rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, &head);
+	rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, pktchain);
 	if (rc < 0)
 		goto exit;
 
 	do {
-		skb = skb_peek(&head);
+		skb = skb_peek(pktchain);
 		TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong;
-		rc = tipc_link_xmit(net, &head, dnode, tsk->portid);
+		rc = tipc_link_xmit(net, pktchain, dnode, tsk->portid);
 		if (likely(rc >= 0)) {
 			if (sock->state != SS_READY)
 				sock->state = SS_CONNECTING;
@@ -996,7 +992,7 @@ new_mtu:
 		tsk->link_cong = 1;
 		rc = tipc_wait_for_sndmsg(sock, &timeo);
 		if (rc)
-			__skb_queue_purge(&head);
+			__skb_queue_purge(pktchain);
 	} while (!rc);
 exit:
 	if (iocb)
@@ -1054,7 +1050,7 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock,
 	struct net *net = sock_net(sk);
 	struct tipc_sock *tsk = tipc_sk(sk);
 	struct tipc_msg *mhdr = &tsk->phdr;
-	struct sk_buff_head head;
+	struct sk_buff_head *pktchain = &sk->sk_write_queue;
 	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
 	u32 portid = tsk->portid;
 	int rc = -EINVAL;
@@ -1091,13 +1087,12 @@ next:
 	save = m->msg_iter;
 	mtu = tsk->max_pkt;
 	send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE);
-	__skb_queue_head_init(&head);
-	rc = tipc_msg_build(mhdr, m, sent, send, mtu, &head);
+	rc = tipc_msg_build(mhdr, m, sent, send, mtu, pktchain);
 	if (unlikely(rc < 0))
 		goto exit;
 	do {
 		if (likely(!tsk_conn_cong(tsk))) {
-			rc = tipc_link_xmit(net, &head, dnode, portid);
+			rc = tipc_link_xmit(net, pktchain, dnode, portid);
 			if (likely(!rc)) {
 				tsk->sent_unacked++;
 				sent += send;
@@ -1117,7 +1112,7 @@ next:
 		}
 		rc = tipc_wait_for_sndpkt(sock, &timeo);
 		if (rc)
-			__skb_queue_purge(&head);
+			__skb_queue_purge(pktchain);
 	} while (!rc);
 exit:
 	if (iocb)
-- 
cgit v1.2.3


From c637c1035534867b85b78b453c38c495b58e2c5a Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Thu, 5 Feb 2015 08:36:41 -0500
Subject: tipc: resolve race problem at unicast message reception

TIPC handles message cardinality and sequencing at the link layer,
before passing messages upwards to the destination sockets. During the
upcall from link to socket no locks are held. It is therefore possible,
and we see it happen occasionally, that messages arriving in different
threads and delivered in sequence still bypass each other before they
reach the destination socket. This must not happen, since it violates
the sequentiality guarantee.

We solve this by adding a new input buffer queue to the link structure.
Arriving messages are added safely to the tail of that queue by the
link, while the head of the queue is consumed, also safely, by the
receiving socket. Sequentiality is secured per socket by only allowing
buffers to be dequeued inside the socket lock. Since there may be multiple
simultaneous readers of the queue, we use a 'filter' parameter to reduce
the risk that they peek the same buffer from the queue, hence also
reducing the risk of contention on the receiving socket locks.

This solves the sequentiality problem, and seems to cause no measurable
performance degradation.

A nice side effect of this change is that lock handling in the functions
tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that
will enable future simplifications of those functions.

Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c      |  20 ++--
 net/tipc/link.c       | 247 +++++++++++++++++++++-----------------------------
 net/tipc/link.h       |  10 +-
 net/tipc/msg.c        |  34 +++++++
 net/tipc/msg.h        |  73 +++++++++++++++
 net/tipc/name_distr.c |  33 ++++---
 net/tipc/name_distr.h |   2 +-
 net/tipc/node.c       |  43 +++++----
 net/tipc/node.h       |  17 ++--
 net/tipc/socket.c     | 132 +++++++++++++++++----------
 net/tipc/socket.h     |   2 +-
 11 files changed, 372 insertions(+), 241 deletions(-)

(limited to 'net')

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 3b886eb35c87..2dfaf272928a 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -189,10 +189,8 @@ static void bclink_retransmit_pkt(struct tipc_net *tn, u32 after, u32 to)
 void tipc_bclink_wakeup_users(struct net *net)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct sk_buff *skb;
 
-	while ((skb = skb_dequeue(&tn->bclink->link.waiting_sks)))
-		tipc_sk_rcv(net, skb);
+	tipc_sk_rcv(net, &tn->bclink->link.wakeupq);
 }
 
 /**
@@ -271,9 +269,8 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked)
 		tipc_link_push_packets(tn->bcl);
 		bclink_set_last_sent(net);
 	}
-	if (unlikely(released && !skb_queue_empty(&tn->bcl->waiting_sks)))
+	if (unlikely(released && !skb_queue_empty(&tn->bcl->wakeupq)))
 		n_ptr->action_flags |= TIPC_WAKEUP_BCAST_USERS;
-
 exit:
 	tipc_bclink_unlock(net);
 }
@@ -450,6 +447,9 @@ void tipc_bclink_rcv(struct net *net, struct sk_buff *buf)
 	u32 next_in;
 	u32 seqno;
 	int deferred = 0;
+	int pos = 0;
+	struct sk_buff *iskb;
+	struct sk_buff_head msgs;
 
 	/* Screen out unwanted broadcast messages */
 	if (msg_mc_netid(msg) != tn->net_id)
@@ -506,7 +506,8 @@ receive:
 			bcl->stats.recv_bundled += msg_msgcnt(msg);
 			tipc_bclink_unlock(net);
 			tipc_node_unlock(node);
-			tipc_link_bundle_rcv(net, buf);
+			while (tipc_msg_extract(buf, &iskb, &pos))
+				tipc_sk_mcast_rcv(net, iskb);
 		} else if (msg_user(msg) == MSG_FRAGMENTER) {
 			tipc_buf_append(&node->bclink.reasm_buf, &buf);
 			if (unlikely(!buf && !node->bclink.reasm_buf))
@@ -527,7 +528,9 @@ receive:
 			bclink_accept_pkt(node, seqno);
 			tipc_bclink_unlock(net);
 			tipc_node_unlock(node);
-			tipc_named_rcv(net, buf);
+			skb_queue_head_init(&msgs);
+			skb_queue_tail(&msgs, buf);
+			tipc_named_rcv(net, &msgs);
 		} else {
 			tipc_bclink_lock(net);
 			bclink_accept_pkt(node, seqno);
@@ -944,10 +947,9 @@ int tipc_bclink_init(struct net *net)
 	spin_lock_init(&bclink->lock);
 	__skb_queue_head_init(&bcl->outqueue);
 	__skb_queue_head_init(&bcl->deferred_queue);
-	skb_queue_head_init(&bcl->waiting_sks);
+	skb_queue_head_init(&bcl->wakeupq);
 	bcl->next_out_no = 1;
 	spin_lock_init(&bclink->node.lock);
-	__skb_queue_head_init(&bclink->node.waiting_sks);
 	bcl->owner = &bclink->node;
 	bcl->owner->net = net;
 	bcl->max_pkt = MAX_PKT_DEFAULT_MCAST;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 41cb09aa41de..942491234099 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -113,10 +113,8 @@ static void link_reset_statistics(struct tipc_link *l_ptr);
 static void link_print(struct tipc_link *l_ptr, const char *str);
 static void tipc_link_sync_xmit(struct tipc_link *l);
 static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf);
-static int tipc_link_input(struct net *net, struct tipc_link *l,
-			   struct sk_buff *buf);
-static int tipc_link_prepare_input(struct net *net, struct tipc_link *l,
-				   struct sk_buff **buf);
+static void tipc_link_input(struct tipc_link *l, struct sk_buff *skb);
+static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb);
 
 /*
  *  Simple link routines
@@ -318,8 +316,9 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
 	l_ptr->next_out_no = 1;
 	__skb_queue_head_init(&l_ptr->outqueue);
 	__skb_queue_head_init(&l_ptr->deferred_queue);
-	skb_queue_head_init(&l_ptr->waiting_sks);
-
+	skb_queue_head_init(&l_ptr->wakeupq);
+	skb_queue_head_init(&l_ptr->inputq);
+	skb_queue_head_init(&l_ptr->namedq);
 	link_reset_statistics(l_ptr);
 	tipc_node_attach_link(n_ptr, l_ptr);
 	setup_timer(&l_ptr->timer, link_timeout, (unsigned long)l_ptr);
@@ -387,7 +386,7 @@ static bool link_schedule_user(struct tipc_link *link, u32 oport,
 		return false;
 	TIPC_SKB_CB(buf)->chain_sz = chain_sz;
 	TIPC_SKB_CB(buf)->chain_imp = imp;
-	skb_queue_tail(&link->waiting_sks, buf);
+	skb_queue_tail(&link->wakeupq, buf);
 	link->stats.link_congs++;
 	return true;
 }
@@ -398,17 +397,19 @@ static bool link_schedule_user(struct tipc_link *link, u32 oport,
  * Move a number of waiting users, as permitted by available space in
  * the send queue, from link wait queue to node wait queue for wakeup
  */
-static void link_prepare_wakeup(struct tipc_link *link)
+void link_prepare_wakeup(struct tipc_link *link)
 {
 	uint pend_qsz = skb_queue_len(&link->outqueue);
 	struct sk_buff *skb, *tmp;
 
-	skb_queue_walk_safe(&link->waiting_sks, skb, tmp) {
+	skb_queue_walk_safe(&link->wakeupq, skb, tmp) {
 		if (pend_qsz >= link->queue_limit[TIPC_SKB_CB(skb)->chain_imp])
 			break;
 		pend_qsz += TIPC_SKB_CB(skb)->chain_sz;
-		skb_unlink(skb, &link->waiting_sks);
-		skb_queue_tail(&link->owner->waiting_sks, skb);
+		skb_unlink(skb, &link->wakeupq);
+		skb_queue_tail(&link->inputq, skb);
+		link->owner->inputq = &link->inputq;
+		link->owner->action_flags |= TIPC_MSG_EVT;
 	}
 }
 
@@ -461,13 +462,13 @@ void tipc_link_reset(struct tipc_link *l_ptr)
 		l_ptr->exp_msg_count = START_CHANGEOVER;
 	}
 
-	/* Clean up all queues: */
+	/* Clean up all queues, except inputq: */
 	__skb_queue_purge(&l_ptr->outqueue);
 	__skb_queue_purge(&l_ptr->deferred_queue);
-	if (!skb_queue_empty(&l_ptr->waiting_sks)) {
-		skb_queue_splice_init(&l_ptr->waiting_sks, &owner->waiting_sks);
-		owner->action_flags |= TIPC_WAKEUP_USERS;
-	}
+	skb_queue_splice_init(&l_ptr->wakeupq, &l_ptr->inputq);
+	if (!skb_queue_empty(&l_ptr->inputq))
+		owner->action_flags |= TIPC_MSG_EVT;
+	owner->inputq = &l_ptr->inputq;
 	l_ptr->next_out = NULL;
 	l_ptr->unacked_window = 0;
 	l_ptr->checkpoint = 1;
@@ -795,7 +796,7 @@ int __tipc_link_xmit(struct net *net, struct tipc_link *link,
 
 static void skb2list(struct sk_buff *skb, struct sk_buff_head *list)
 {
-	__skb_queue_head_init(list);
+	skb_queue_head_init(list);
 	__skb_queue_tail(list, skb);
 }
 
@@ -841,19 +842,13 @@ int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dnode,
 			rc = __tipc_link_xmit(net, link, list);
 		tipc_node_unlock(node);
 	}
-
 	if (link)
 		return rc;
 
-	if (likely(in_own_node(net, dnode))) {
-		/* As a node local message chain never contains more than one
-		 * buffer, we just need to dequeue one SKB buffer from the
-		 * head list.
-		 */
-		return tipc_sk_rcv(net, __skb_dequeue(list));
-	}
-	__skb_queue_purge(list);
+	if (likely(in_own_node(net, dnode)))
+		return tipc_sk_rcv(net, list);
 
+	__skb_queue_purge(list);
 	return rc;
 }
 
@@ -1162,7 +1157,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr)
 		/* Locate unicast link endpoint that should handle message */
 		l_ptr = n_ptr->links[b_ptr->identity];
 		if (unlikely(!l_ptr))
-			goto unlock_discard;
+			goto unlock;
 
 		/* Verify that communication with node is currently allowed */
 		if ((n_ptr->action_flags & TIPC_WAIT_PEER_LINKS_DOWN) &&
@@ -1173,7 +1168,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr)
 			n_ptr->action_flags &= ~TIPC_WAIT_PEER_LINKS_DOWN;
 
 		if (tipc_node_blocked(n_ptr))
-			goto unlock_discard;
+			goto unlock;
 
 		/* Validate message sequence number info */
 		seq_no = msg_seqno(msg);
@@ -1197,18 +1192,16 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr)
 		if (unlikely(l_ptr->next_out))
 			tipc_link_push_packets(l_ptr);
 
-		if (released && !skb_queue_empty(&l_ptr->waiting_sks)) {
+		if (released && !skb_queue_empty(&l_ptr->wakeupq))
 			link_prepare_wakeup(l_ptr);
-			l_ptr->owner->action_flags |= TIPC_WAKEUP_USERS;
-		}
 
 		/* Process the incoming packet */
 		if (unlikely(!link_working_working(l_ptr))) {
 			if (msg_user(msg) == LINK_PROTOCOL) {
 				tipc_link_proto_rcv(l_ptr, skb);
 				link_retrieve_defq(l_ptr, &head);
-				tipc_node_unlock(n_ptr);
-				continue;
+				skb = NULL;
+				goto unlock;
 			}
 
 			/* Traffic message. Conditionally activate link */
@@ -1217,18 +1210,18 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr)
 			if (link_working_working(l_ptr)) {
 				/* Re-insert buffer in front of queue */
 				__skb_queue_head(&head, skb);
-				tipc_node_unlock(n_ptr);
-				continue;
+				skb = NULL;
+				goto unlock;
 			}
-			goto unlock_discard;
+			goto unlock;
 		}
 
 		/* Link is now in state WORKING_WORKING */
 		if (unlikely(seq_no != mod(l_ptr->next_in_no))) {
 			link_handle_out_of_seq_msg(l_ptr, skb);
 			link_retrieve_defq(l_ptr, &head);
-			tipc_node_unlock(n_ptr);
-			continue;
+			skb = NULL;
+			goto unlock;
 		}
 		l_ptr->next_in_no++;
 		if (unlikely(!skb_queue_empty(&l_ptr->deferred_queue)))
@@ -1238,97 +1231,102 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr)
 			l_ptr->stats.sent_acks++;
 			tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0, 0);
 		}
-
-		if (tipc_link_prepare_input(net, l_ptr, &skb)) {
-			tipc_node_unlock(n_ptr);
-			continue;
-		}
-		tipc_node_unlock(n_ptr);
-
-		if (tipc_link_input(net, l_ptr, skb) != 0)
-			goto discard;
-		continue;
-unlock_discard:
+		tipc_link_input(l_ptr, skb);
+		skb = NULL;
+unlock:
 		tipc_node_unlock(n_ptr);
 discard:
-		kfree_skb(skb);
+		if (unlikely(skb))
+			kfree_skb(skb);
 	}
 }
 
-/**
- * tipc_link_prepare_input - process TIPC link messages
- *
- * returns nonzero if the message was consumed
+/* tipc_data_input - deliver data and name distr msgs to upper layer
  *
+ * Consumes buffer if message is of right type
  * Node lock must be held
  */
-static int tipc_link_prepare_input(struct net *net, struct tipc_link *l,
-				   struct sk_buff **buf)
+static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb)
 {
-	struct tipc_node *n;
-	struct tipc_msg *msg;
-	int res = -EINVAL;
+	struct tipc_node *node = link->owner;
+	struct tipc_msg *msg = buf_msg(skb);
+	u32 dport = msg_destport(msg);
 
-	n = l->owner;
-	msg = buf_msg(*buf);
 	switch (msg_user(msg)) {
-	case CHANGEOVER_PROTOCOL:
-		if (tipc_link_tunnel_rcv(n, buf))
-			res = 0;
-		break;
-	case MSG_FRAGMENTER:
-		l->stats.recv_fragments++;
-		if (tipc_buf_append(&l->reasm_buf, buf)) {
-			l->stats.recv_fragmented++;
-			res = 0;
-		} else if (!l->reasm_buf) {
-			tipc_link_reset(l);
+	case TIPC_LOW_IMPORTANCE:
+	case TIPC_MEDIUM_IMPORTANCE:
+	case TIPC_HIGH_IMPORTANCE:
+	case TIPC_CRITICAL_IMPORTANCE:
+	case CONN_MANAGER:
+		if (tipc_skb_queue_tail(&link->inputq, skb, dport)) {
+			node->inputq = &link->inputq;
+			node->action_flags |= TIPC_MSG_EVT;
 		}
-		break;
-	case MSG_BUNDLER:
-		l->stats.recv_bundles++;
-		l->stats.recv_bundled += msg_msgcnt(msg);
-		res = 0;
-		break;
+		return true;
 	case NAME_DISTRIBUTOR:
-		n->bclink.recv_permitted = true;
-		res = 0;
-		break;
+		node->bclink.recv_permitted = true;
+		node->namedq = &link->namedq;
+		skb_queue_tail(&link->namedq, skb);
+		if (skb_queue_len(&link->namedq) == 1)
+			node->action_flags |= TIPC_NAMED_MSG_EVT;
+		return true;
+	case MSG_BUNDLER:
+	case CHANGEOVER_PROTOCOL:
+	case MSG_FRAGMENTER:
 	case BCAST_PROTOCOL:
-		tipc_link_sync_rcv(n, *buf);
-		break;
+		return false;
 	default:
-		res = 0;
-	}
-	return res;
+		pr_warn("Dropping received illegal msg type\n");
+		kfree_skb(skb);
+		return false;
+	};
 }
-/**
- * tipc_link_input - Deliver message too higher layers
+
+/* tipc_link_input - process packet that has passed link protocol check
+ *
+ * Consumes buffer
+ * Node lock must be held
  */
-static int tipc_link_input(struct net *net, struct tipc_link *l,
-			   struct sk_buff *buf)
+static void tipc_link_input(struct tipc_link *link, struct sk_buff *skb)
 {
-	struct tipc_msg *msg = buf_msg(buf);
-	int res = 0;
+	struct tipc_node *node = link->owner;
+	struct tipc_msg *msg = buf_msg(skb);
+	struct sk_buff *iskb;
+	int pos = 0;
+
+	if (likely(tipc_data_input(link, skb)))
+		return;
 
 	switch (msg_user(msg)) {
-	case TIPC_LOW_IMPORTANCE:
-	case TIPC_MEDIUM_IMPORTANCE:
-	case TIPC_HIGH_IMPORTANCE:
-	case TIPC_CRITICAL_IMPORTANCE:
-	case CONN_MANAGER:
-		tipc_sk_rcv(net, buf);
+	case CHANGEOVER_PROTOCOL:
+		if (!tipc_link_tunnel_rcv(node, &skb))
+			break;
+		if (msg_user(buf_msg(skb)) != MSG_BUNDLER) {
+			tipc_data_input(link, skb);
+			break;
+		}
+	case MSG_BUNDLER:
+		link->stats.recv_bundles++;
+		link->stats.recv_bundled += msg_msgcnt(msg);
+
+		while (tipc_msg_extract(skb, &iskb, &pos))
+			tipc_data_input(link, iskb);
 		break;
-	case NAME_DISTRIBUTOR:
-		tipc_named_rcv(net, buf);
+	case MSG_FRAGMENTER:
+		link->stats.recv_fragments++;
+		if (tipc_buf_append(&link->reasm_buf, &skb)) {
+			link->stats.recv_fragmented++;
+			tipc_data_input(link, skb);
+		} else if (!link->reasm_buf) {
+			tipc_link_reset(link);
+		}
 		break;
-	case MSG_BUNDLER:
-		tipc_link_bundle_rcv(net, buf);
+	case BCAST_PROTOCOL:
+		tipc_link_sync_rcv(node, skb);
 		break;
 	default:
-		res = -EINVAL;
-	}
-	return res;
+		break;
+	};
 }
 
 /**
@@ -1779,7 +1777,7 @@ void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr,
  * @from_pos: offset to extract from
  *
  * Returns a new message buffer containing an embedded message.  The
- * encapsulating message itself is left unchanged.
+ * encapsulating buffer is left unchanged.
  */
 static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos)
 {
@@ -1793,8 +1791,6 @@ static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos)
 	return eb;
 }
 
-
-
 /* tipc_link_dup_rcv(): Receive a tunnelled DUPLICATE_MSG packet.
  * Owner node is locked.
  */
@@ -1893,41 +1889,6 @@ exit:
 	return *buf != NULL;
 }
 
-/*
- *  Bundler functionality:
- */
-void tipc_link_bundle_rcv(struct net *net, struct sk_buff *buf)
-{
-	u32 msgcount = msg_msgcnt(buf_msg(buf));
-	u32 pos = INT_H_SIZE;
-	struct sk_buff *obuf;
-	struct tipc_msg *omsg;
-
-	while (msgcount--) {
-		obuf = buf_extract(buf, pos);
-		if (obuf == NULL) {
-			pr_warn("Link unable to unbundle message(s)\n");
-			break;
-		}
-		omsg = buf_msg(obuf);
-		pos += align(msg_size(omsg));
-		if (msg_isdata(omsg)) {
-			if (unlikely(msg_type(omsg) == TIPC_MCAST_MSG))
-				tipc_sk_mcast_rcv(net, obuf);
-			else
-				tipc_sk_rcv(net, obuf);
-		} else if (msg_user(omsg) == CONN_MANAGER) {
-			tipc_sk_rcv(net, obuf);
-		} else if (msg_user(omsg) == NAME_DISTRIBUTOR) {
-			tipc_named_rcv(net, obuf);
-		} else {
-			pr_warn("Illegal bundled msg: %u\n", msg_user(omsg));
-			kfree_skb(obuf);
-		}
-	}
-	kfree_skb(buf);
-}
-
 static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol)
 {
 	unsigned long intv = ((tol / 4) > 500) ? 500 : tol / 4;
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 5b9a17f26280..34d3f55c4cea 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -131,8 +131,10 @@ struct tipc_stats {
  * @next_in_no: next sequence number to expect for inbound messages
  * @deferred_queue: deferred queue saved OOS b'cast message received from node
  * @unacked_window: # of inbound messages rx'd without ack'ing back to peer
+ * @inputq: buffer queue for messages to be delivered upwards
+ * @namedq: buffer queue for name table messages to be delivered upwards
  * @next_out: ptr to first unsent outbound message in queue
- * @waiting_sks: linked list of sockets waiting for link congestion to abate
+ * @wakeupq: linked list of wakeup msgs waiting for link congestion to abate
  * @long_msg_seq_no: next identifier to use for outbound fragmented messages
  * @reasm_buf: head of partially reassembled inbound message fragments
  * @stats: collects statistics regarding link activity
@@ -184,10 +186,12 @@ struct tipc_link {
 	u32 next_in_no;
 	struct sk_buff_head deferred_queue;
 	u32 unacked_window;
+	struct sk_buff_head inputq;
+	struct sk_buff_head namedq;
 
 	/* Congestion handling */
 	struct sk_buff *next_out;
-	struct sk_buff_head waiting_sks;
+	struct sk_buff_head wakeupq;
 
 	/* Fragmentation/reassembly */
 	u32 long_msg_seq_no;
@@ -228,7 +232,6 @@ int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dest,
 		   u32 selector);
 int __tipc_link_xmit(struct net *net, struct tipc_link *link,
 		     struct sk_buff_head *list);
-void tipc_link_bundle_rcv(struct net *net, struct sk_buff *buf);
 void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob,
 			  u32 gap, u32 tolerance, u32 priority, u32 acked_mtu);
 void tipc_link_push_packets(struct tipc_link *l_ptr);
@@ -244,6 +247,7 @@ int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]);
+void link_prepare_wakeup(struct tipc_link *l);
 
 /*
  * Link sequence number manipulation routines (uses modulo 2**16 arithmetic)
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 697223a21240..b6eb90cd3ef7 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -326,6 +326,40 @@ bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu)
 	return true;
 }
 
+/**
+ *  tipc_msg_extract(): extract bundled inner packet from buffer
+ *  @skb: linear outer buffer, to be extracted from.
+ *  @iskb: extracted inner buffer, to be returned
+ *  @pos: position of msg to be extracted. Returns with pointer of next msg
+ *  Consumes outer buffer when last packet extracted
+ *  Returns true when when there is an extracted buffer, otherwise false
+ */
+bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos)
+{
+	struct tipc_msg *msg = buf_msg(skb);
+	int imsz;
+	struct tipc_msg *imsg = (struct tipc_msg *)(msg_data(msg) + *pos);
+
+	/* Is there space left for shortest possible message? */
+	if (*pos > (msg_data_sz(msg) - SHORT_H_SIZE))
+		goto none;
+	imsz = msg_size(imsg);
+
+	/* Is there space left for current message ? */
+	if ((*pos + imsz) > msg_data_sz(msg))
+		goto none;
+	*iskb = tipc_buf_acquire(imsz);
+	if (!*iskb)
+		goto none;
+	skb_copy_to_linear_data(*iskb, imsg, imsz);
+	*pos += align(imsz);
+	return true;
+none:
+	kfree_skb(skb);
+	*iskb = NULL;
+	return false;
+}
+
 /**
  * tipc_msg_make_bundle(): Create bundle buf and append message to its tail
  * @list: the buffer chain
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 60702992933d..ab467261bd9d 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -45,6 +45,7 @@
  * Note: Some items are also used with TIPC internal message headers
  */
 #define TIPC_VERSION              2
+struct plist;
 
 /*
  * Payload message users are defined in TIPC's public API:
@@ -759,10 +760,82 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf);
 bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu);
 bool tipc_msg_make_bundle(struct sk_buff_head *list,
 			  struct sk_buff *skb, u32 mtu, u32 dnode);
+bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos);
 int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
 		   int offset, int dsz, int mtu, struct sk_buff_head *list);
 bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, u32 *dnode,
 			  int *err);
 struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list);
 
+/* tipc_skb_peek_port(): find a destination port, ignoring all destinations
+ *                       up to and including 'filter'.
+ * Note: ignoring previously tried destinations minimizes the risk of
+ *       contention on the socket lock
+ * @list: list to be peeked in
+ * @filter: last destination to be ignored from search
+ * Returns a destination port number, of applicable.
+ */
+static inline u32 tipc_skb_peek_port(struct sk_buff_head *list, u32 filter)
+{
+	struct sk_buff *skb;
+	u32 dport = 0;
+	bool ignore = true;
+
+	spin_lock_bh(&list->lock);
+	skb_queue_walk(list, skb) {
+		dport = msg_destport(buf_msg(skb));
+		if (!filter || skb_queue_is_last(list, skb))
+			break;
+		if (dport == filter)
+			ignore = false;
+		else if (!ignore)
+			break;
+	}
+	spin_unlock_bh(&list->lock);
+	return dport;
+}
+
+/* tipc_skb_dequeue(): unlink first buffer with dest 'dport' from list
+ * @list: list to be unlinked from
+ * @dport: selection criteria for buffer to unlink
+ */
+static inline struct sk_buff *tipc_skb_dequeue(struct sk_buff_head *list,
+					       u32 dport)
+{
+	struct sk_buff *_skb, *tmp, *skb = NULL;
+
+	spin_lock_bh(&list->lock);
+	skb_queue_walk_safe(list, _skb, tmp) {
+		if (msg_destport(buf_msg(_skb)) == dport) {
+			__skb_unlink(_skb, list);
+			skb = _skb;
+			break;
+		}
+	}
+	spin_unlock_bh(&list->lock);
+	return skb;
+}
+
+/* tipc_skb_queue_tail(): add buffer to tail of list;
+ * @list: list to be appended to
+ * @skb: buffer to append. Always appended
+ * @dport: the destination port of the buffer
+ * returns true if dport differs from previous destination
+ */
+static inline bool tipc_skb_queue_tail(struct sk_buff_head *list,
+				       struct sk_buff *skb, u32 dport)
+{
+	struct sk_buff *_skb = NULL;
+	bool rv = false;
+
+	spin_lock_bh(&list->lock);
+	_skb = skb_peek_tail(list);
+	if (!_skb || (msg_destport(buf_msg(_skb)) != dport) ||
+	    (skb_queue_len(list) > 32))
+		rv = true;
+	__skb_queue_tail(list, skb);
+	spin_unlock_bh(&list->lock);
+	return rv;
+}
+
 #endif
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index dd8564cd9dbb..fcb07915aaac 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -381,25 +381,34 @@ void tipc_named_process_backlog(struct net *net)
 }
 
 /**
- * tipc_named_rcv - process name table update message sent by another node
+ * tipc_named_rcv - process name table update messages sent by another node
  */
-void tipc_named_rcv(struct net *net, struct sk_buff *buf)
+void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct tipc_msg *msg = buf_msg(buf);
-	struct distr_item *item = (struct distr_item *)msg_data(msg);
-	u32 count = msg_data_sz(msg) / ITEM_SIZE;
-	u32 node = msg_orignode(msg);
+	struct tipc_msg *msg;
+	struct distr_item *item;
+	uint count;
+	u32 node;
+	struct sk_buff *skb;
+	int mtype;
 
 	spin_lock_bh(&tn->nametbl_lock);
-	while (count--) {
-		if (!tipc_update_nametbl(net, item, node, msg_type(msg)))
-			tipc_named_add_backlog(item, msg_type(msg), node);
-		item++;
+	for (skb = skb_dequeue(inputq); skb; skb = skb_dequeue(inputq)) {
+		msg = buf_msg(skb);
+		mtype = msg_type(msg);
+		item = (struct distr_item *)msg_data(msg);
+		count = msg_data_sz(msg) / ITEM_SIZE;
+		node = msg_orignode(msg);
+		while (count--) {
+			if (!tipc_update_nametbl(net, item, node, mtype))
+				tipc_named_add_backlog(item, mtype, node);
+			item++;
+		}
+		kfree_skb(skb);
+		tipc_named_process_backlog(net);
 	}
-	tipc_named_process_backlog(net);
 	spin_unlock_bh(&tn->nametbl_lock);
-	kfree_skb(buf);
 }
 
 /**
diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h
index 5ec10b59527b..dd2d9fd80da2 100644
--- a/net/tipc/name_distr.h
+++ b/net/tipc/name_distr.h
@@ -71,7 +71,7 @@ struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ);
 struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ);
 void named_cluster_distribute(struct net *net, struct sk_buff *buf);
 void tipc_named_node_up(struct net *net, u32 dnode);
-void tipc_named_rcv(struct net *net, struct sk_buff *buf);
+void tipc_named_rcv(struct net *net, struct sk_buff_head *msg_queue);
 void tipc_named_reinit(struct net *net);
 void tipc_named_process_backlog(struct net *net);
 void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 1c409c45f0fe..dcb83d9b2193 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -111,11 +111,8 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr)
 	INIT_LIST_HEAD(&n_ptr->list);
 	INIT_LIST_HEAD(&n_ptr->publ_list);
 	INIT_LIST_HEAD(&n_ptr->conn_sks);
-	skb_queue_head_init(&n_ptr->waiting_sks);
 	__skb_queue_head_init(&n_ptr->bclink.deferred_queue);
-
 	hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]);
-
 	list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
 		if (n_ptr->addr < temp_node->addr)
 			break;
@@ -201,19 +198,22 @@ void tipc_node_abort_sock_conns(struct net *net, struct list_head *conns)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_sock_conn *conn, *safe;
-	struct sk_buff *buf;
+	struct sk_buff *skb;
+	struct sk_buff_head skbs;
 
+	skb_queue_head_init(&skbs);
 	list_for_each_entry_safe(conn, safe, conns, list) {
-		buf = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
+		skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
 				      TIPC_CONN_MSG, SHORT_H_SIZE, 0,
 				      tn->own_addr, conn->peer_node,
 				      conn->port, conn->peer_port,
 				      TIPC_ERR_NO_NODE);
-		if (likely(buf))
-			tipc_sk_rcv(net, buf);
+		if (likely(skb))
+			skb_queue_tail(&skbs, skb);
 		list_del(&conn->list);
 		kfree(conn);
 	}
+	tipc_sk_rcv(net, &skbs);
 }
 
 /**
@@ -568,37 +568,36 @@ void tipc_node_unlock(struct tipc_node *node)
 	struct net *net = node->net;
 	LIST_HEAD(nsub_list);
 	LIST_HEAD(conn_sks);
-	struct sk_buff_head waiting_sks;
 	u32 addr = 0;
-	int flags = node->action_flags;
+	u32 flags = node->action_flags;
 	u32 link_id = 0;
+	struct sk_buff_head *inputq = node->inputq;
+	struct sk_buff_head *namedq = node->inputq;
 
-	if (likely(!flags)) {
+	if (likely(!flags || (flags == TIPC_MSG_EVT))) {
+		node->action_flags = 0;
 		spin_unlock_bh(&node->lock);
+		if (flags == TIPC_MSG_EVT)
+			tipc_sk_rcv(net, inputq);
 		return;
 	}
 
 	addr = node->addr;
 	link_id = node->link_id;
-	__skb_queue_head_init(&waiting_sks);
-
-	if (flags & TIPC_WAKEUP_USERS)
-		skb_queue_splice_init(&node->waiting_sks, &waiting_sks);
+	namedq = node->namedq;
 
 	if (flags & TIPC_NOTIFY_NODE_DOWN) {
 		list_replace_init(&node->publ_list, &nsub_list);
 		list_replace_init(&node->conn_sks, &conn_sks);
 	}
-	node->action_flags &= ~(TIPC_WAKEUP_USERS | TIPC_NOTIFY_NODE_DOWN |
+	node->action_flags &= ~(TIPC_MSG_EVT | TIPC_NOTIFY_NODE_DOWN |
 				TIPC_NOTIFY_NODE_UP | TIPC_NOTIFY_LINK_UP |
 				TIPC_NOTIFY_LINK_DOWN |
-				TIPC_WAKEUP_BCAST_USERS);
+				TIPC_WAKEUP_BCAST_USERS |
+				TIPC_NAMED_MSG_EVT);
 
 	spin_unlock_bh(&node->lock);
 
-	while (!skb_queue_empty(&waiting_sks))
-		tipc_sk_rcv(net, __skb_dequeue(&waiting_sks));
-
 	if (!list_empty(&conn_sks))
 		tipc_node_abort_sock_conns(net, &conn_sks);
 
@@ -618,6 +617,12 @@ void tipc_node_unlock(struct tipc_node *node)
 	if (flags & TIPC_NOTIFY_LINK_DOWN)
 		tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr,
 				      link_id, addr);
+
+	if (flags & TIPC_MSG_EVT)
+		tipc_sk_rcv(net, inputq);
+
+	if (flags & TIPC_NAMED_MSG_EVT)
+		tipc_named_rcv(net, namedq);
 }
 
 /* Caller should hold node lock for the passed node */
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 43ef88ef3035..c2b0fcf4042b 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -55,14 +55,15 @@
  * TIPC_DISTRIBUTE_NAME: publish or withdraw link state name type
  */
 enum {
+	TIPC_MSG_EVT                    = 1,
 	TIPC_WAIT_PEER_LINKS_DOWN	= (1 << 1),
 	TIPC_WAIT_OWN_LINKS_DOWN	= (1 << 2),
 	TIPC_NOTIFY_NODE_DOWN		= (1 << 3),
 	TIPC_NOTIFY_NODE_UP		= (1 << 4),
-	TIPC_WAKEUP_USERS		= (1 << 5),
-	TIPC_WAKEUP_BCAST_USERS		= (1 << 6),
-	TIPC_NOTIFY_LINK_UP		= (1 << 7),
-	TIPC_NOTIFY_LINK_DOWN		= (1 << 8)
+	TIPC_WAKEUP_BCAST_USERS		= (1 << 5),
+	TIPC_NOTIFY_LINK_UP		= (1 << 6),
+	TIPC_NOTIFY_LINK_DOWN		= (1 << 7),
+	TIPC_NAMED_MSG_EVT		= (1 << 8)
 };
 
 /**
@@ -92,6 +93,9 @@ struct tipc_node_bclink {
  * @lock: spinlock governing access to structure
  * @net: the applicable net namespace
  * @hash: links to adjacent nodes in unsorted hash chain
+ * @inputq: pointer to input queue containing messages for msg event
+ * @namedq: pointer to name table input queue with name table messages
+ * @curr_link: the link holding the node lock, if any
  * @active_links: pointers to active links to node
  * @links: pointers to all links to node
  * @action_flags: bit mask of different types of node actions
@@ -109,10 +113,12 @@ struct tipc_node {
 	spinlock_t lock;
 	struct net *net;
 	struct hlist_node hash;
+	struct sk_buff_head *inputq;
+	struct sk_buff_head *namedq;
 	struct tipc_link *active_links[2];
 	u32 act_mtus[2];
 	struct tipc_link *links[MAX_BEARERS];
-	unsigned int action_flags;
+	int action_flags;
 	struct tipc_node_bclink bclink;
 	struct list_head list;
 	int link_cnt;
@@ -120,7 +126,6 @@ struct tipc_node {
 	u32 signature;
 	u32 link_id;
 	struct list_head publ_list;
-	struct sk_buff_head waiting_sks;
 	struct list_head conn_sks;
 	struct rcu_head rcu;
 };
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 611a04fb0ddc..c1a4611649ab 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -41,6 +41,7 @@
 #include "node.h"
 #include "link.h"
 #include "config.h"
+#include "name_distr.h"
 #include "socket.h"
 
 #define SS_LISTENING		-1	/* socket is listening */
@@ -785,10 +786,16 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff *buf)
 	struct sk_buff *b;
 	uint i, last, dst = 0;
 	u32 scope = TIPC_CLUSTER_SCOPE;
+	struct sk_buff_head msgs;
 
 	if (in_own_node(net, msg_orignode(msg)))
 		scope = TIPC_NODE_SCOPE;
 
+	if (unlikely(!msg_mcast(msg))) {
+		pr_warn("Received non-multicast msg in multicast\n");
+		kfree_skb(buf);
+		goto exit;
+	}
 	/* Create destination port list: */
 	tipc_nametbl_mc_translate(net, msg_nametype(msg), msg_namelower(msg),
 				  msg_nameupper(msg), scope, &dports);
@@ -806,9 +813,12 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff *buf)
 				continue;
 			}
 			msg_set_destport(msg, item->ports[i]);
-			tipc_sk_rcv(net, b);
+			skb_queue_head_init(&msgs);
+			skb_queue_tail(&msgs, b);
+			tipc_sk_rcv(net, &msgs);
 		}
 	}
+exit:
 	tipc_port_list_free(&dports);
 }
 
@@ -1760,71 +1770,99 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 }
 
 /**
- * tipc_sk_enqueue_skb - enqueue buffer to socket or backlog queue
- * @sk: socket
- * @skb: pointer to message. Set to NULL if buffer is consumed.
- * @dnode: if buffer should be forwarded/returned, send to this node
+ * tipc_sk_enqueue - extract all buffers with destination 'dport' from
+ *                   inputq and try adding them to socket or backlog queue
+ * @inputq: list of incoming buffers with potentially different destinations
+ * @sk: socket where the buffers should be enqueued
+ * @dport: port number for the socket
+ * @_skb: returned buffer to be forwarded or rejected, if applicable
  *
  * Caller must hold socket lock
  *
- * Returns TIPC_OK (0) or -tipc error code
+ * Returns TIPC_OK if all buffers enqueued, otherwise -TIPC_ERR_OVERLOAD
+ * or -TIPC_ERR_NO_PORT
  */
-static int tipc_sk_enqueue_skb(struct sock *sk, struct sk_buff **skb)
+static int tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
+			   u32 dport, struct sk_buff **_skb)
 {
 	unsigned int lim;
 	atomic_t *dcnt;
-
-	if (unlikely(!*skb))
-		return TIPC_OK;
-	if (!sock_owned_by_user(sk))
-		return filter_rcv(sk, skb);
-	dcnt = &tipc_sk(sk)->dupl_rcvcnt;
-	if (sk->sk_backlog.len)
-		atomic_set(dcnt, 0);
-	lim = rcvbuf_limit(sk, *skb) + atomic_read(dcnt);
-	if (unlikely(sk_add_backlog(sk, *skb, lim)))
+	int err;
+	struct sk_buff *skb;
+	unsigned long time_limit = jiffies + 2;
+
+	while (skb_queue_len(inputq)) {
+		skb = tipc_skb_dequeue(inputq, dport);
+		if (unlikely(!skb))
+			return TIPC_OK;
+		/* Return if softirq window exhausted */
+		if (unlikely(time_after_eq(jiffies, time_limit)))
+			return TIPC_OK;
+		if (!sock_owned_by_user(sk)) {
+			err = filter_rcv(sk, &skb);
+			if (likely(!skb))
+				continue;
+			*_skb = skb;
+			return err;
+		}
+		dcnt = &tipc_sk(sk)->dupl_rcvcnt;
+		if (sk->sk_backlog.len)
+			atomic_set(dcnt, 0);
+		lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt);
+		if (likely(!sk_add_backlog(sk, skb, lim)))
+			continue;
+		*_skb = skb;
 		return -TIPC_ERR_OVERLOAD;
-	*skb = NULL;
+	}
 	return TIPC_OK;
 }
 
 /**
- * tipc_sk_rcv - handle incoming message
- * @skb: buffer containing arriving message
- * Consumes buffer
- * Returns 0 if success, or errno: -EHOSTUNREACH
+ * tipc_sk_rcv - handle a chain of incoming buffers
+ * @inputq: buffer list containing the buffers
+ * Consumes all buffers in list until inputq is empty
+ * Note: may be called in multiple threads referring to the same queue
+ * Returns 0 if last buffer was accepted, otherwise -EHOSTUNREACH
+ * Only node local calls check the return value, sending single-buffer queues
  */
-int tipc_sk_rcv(struct net *net, struct sk_buff *skb)
+int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq)
 {
+	u32 dnode, dport = 0;
+	int err = -TIPC_ERR_NO_PORT;
+	struct sk_buff *skb;
 	struct tipc_sock *tsk;
 	struct tipc_net *tn;
 	struct sock *sk;
-	u32 dport = msg_destport(buf_msg(skb));
-	int err = -TIPC_ERR_NO_PORT;
-	u32 dnode;
 
-	/* Find destination */
-	tsk = tipc_sk_lookup(net, dport);
-	if (likely(tsk)) {
-		sk = &tsk->sk;
-		spin_lock_bh(&sk->sk_lock.slock);
-		err = tipc_sk_enqueue_skb(sk, &skb);
-		spin_unlock_bh(&sk->sk_lock.slock);
-		sock_put(sk);
-	}
-	if (likely(!skb))
-		return 0;
-	if (tipc_msg_lookup_dest(net, skb, &dnode, &err))
-		goto xmit;
-	if (!err) {
-		dnode = msg_destnode(buf_msg(skb));
-		goto xmit;
-	}
-	tn = net_generic(net, tipc_net_id);
-	if (!tipc_msg_reverse(tn->own_addr, skb, &dnode, -err))
-		return -EHOSTUNREACH;
+	while (skb_queue_len(inputq)) {
+		skb = NULL;
+		dport = tipc_skb_peek_port(inputq, dport);
+		tsk = tipc_sk_lookup(net, dport);
+		if (likely(tsk)) {
+			sk = &tsk->sk;
+			if (likely(spin_trylock_bh(&sk->sk_lock.slock))) {
+				err = tipc_sk_enqueue(inputq, sk, dport, &skb);
+				spin_unlock_bh(&sk->sk_lock.slock);
+				dport = 0;
+			}
+			sock_put(sk);
+		} else {
+			skb = tipc_skb_dequeue(inputq, dport);
+		}
+		if (likely(!skb))
+			continue;
+		if (tipc_msg_lookup_dest(net, skb, &dnode, &err))
+			goto xmit;
+		if (!err) {
+			dnode = msg_destnode(buf_msg(skb));
+			goto xmit;
+		}
+		tn = net_generic(net, tipc_net_id);
+		if (!tipc_msg_reverse(tn->own_addr, skb, &dnode, -err))
+			continue;
 xmit:
-	tipc_link_xmit_skb(net, skb, dnode, dport);
+		tipc_link_xmit_skb(net, skb, dnode, dport);
+	}
 	return err ? -EHOSTUNREACH : 0;
 }
 
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index f56c3fded51f..e3dbdc0e1be7 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h
@@ -49,7 +49,7 @@ int tipc_sock_create_local(struct net *net, int type, struct socket **res);
 void tipc_sock_release_local(struct socket *sock);
 int tipc_sock_accept_local(struct socket *sock, struct socket **newsock,
 			   int flags);
-int tipc_sk_rcv(struct net *net, struct sk_buff *buf);
+int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq);
 struct sk_buff *tipc_sk_socks_show(struct net *net);
 void tipc_sk_mcast_rcv(struct net *net, struct sk_buff *buf);
 void tipc_sk_reinit(struct net *net);
-- 
cgit v1.2.3


From 708ac32cb5e1305cf3670e147eedcc699d110ed0 Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Thu, 5 Feb 2015 08:36:42 -0500
Subject: tipc: simplify connection abort notifications when links break

The new input message queue in struct tipc_link can be used for
delivering connection abort messages to subscribing sockets. This
makes it possible to simplify the code for such cases.

This commit removes the temporary list in tipc_node_unlock()
used for transforming abort subscriptions to messages. Instead, the
abort messages are now created at the moment of lost contact, and
then added to the last failed link's generic input queue for delivery
to the sockets concerned.

Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/node.c | 69 ++++++++++++++++++++++++---------------------------------
 1 file changed, 29 insertions(+), 40 deletions(-)

(limited to 'net')

diff --git a/net/tipc/node.c b/net/tipc/node.c
index dcb83d9b2193..c7fdf3dec92c 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -194,28 +194,6 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port)
 	tipc_node_unlock(node);
 }
 
-void tipc_node_abort_sock_conns(struct net *net, struct list_head *conns)
-{
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct tipc_sock_conn *conn, *safe;
-	struct sk_buff *skb;
-	struct sk_buff_head skbs;
-
-	skb_queue_head_init(&skbs);
-	list_for_each_entry_safe(conn, safe, conns, list) {
-		skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
-				      TIPC_CONN_MSG, SHORT_H_SIZE, 0,
-				      tn->own_addr, conn->peer_node,
-				      conn->port, conn->peer_port,
-				      TIPC_ERR_NO_NODE);
-		if (likely(skb))
-			skb_queue_tail(&skbs, skb);
-		list_del(&conn->list);
-		kfree(conn);
-	}
-	tipc_sk_rcv(net, &skbs);
-}
-
 /**
  * tipc_node_link_up - handle addition of link
  *
@@ -377,7 +355,11 @@ static void node_established_contact(struct tipc_node *n_ptr)
 static void node_lost_contact(struct tipc_node *n_ptr)
 {
 	char addr_string[16];
-	u32 i;
+	struct tipc_sock_conn *conn, *safe;
+	struct list_head *conns = &n_ptr->conn_sks;
+	struct sk_buff *skb;
+	struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id);
+	uint i;
 
 	pr_debug("Lost contact with %s\n",
 		 tipc_addr_string_fill(addr_string, n_ptr->addr));
@@ -413,11 +395,25 @@ static void node_lost_contact(struct tipc_node *n_ptr)
 
 	n_ptr->action_flags &= ~TIPC_WAIT_OWN_LINKS_DOWN;
 
-	/* Notify subscribers and prevent re-contact with node until
-	 * cleanup is done.
-	 */
-	n_ptr->action_flags |= TIPC_WAIT_PEER_LINKS_DOWN |
-			       TIPC_NOTIFY_NODE_DOWN;
+	/* Prevent re-contact with node until cleanup is done */
+	n_ptr->action_flags |= TIPC_WAIT_PEER_LINKS_DOWN;
+
+	/* Notify publications from this node */
+	n_ptr->action_flags |= TIPC_NOTIFY_NODE_DOWN;
+
+	/* Notify sockets connected to node */
+	list_for_each_entry_safe(conn, safe, conns, list) {
+		skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG,
+				      SHORT_H_SIZE, 0, tn->own_addr,
+				      conn->peer_node, conn->port,
+				      conn->peer_port, TIPC_ERR_NO_NODE);
+		if (likely(skb)) {
+			skb_queue_tail(n_ptr->inputq, skb);
+			n_ptr->action_flags |= TIPC_MSG_EVT;
+		}
+		list_del(&conn->list);
+		kfree(conn);
+	}
 }
 
 struct sk_buff *tipc_node_get_nodes(struct net *net, const void *req_tlv_area,
@@ -566,13 +562,12 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr,
 void tipc_node_unlock(struct tipc_node *node)
 {
 	struct net *net = node->net;
-	LIST_HEAD(nsub_list);
-	LIST_HEAD(conn_sks);
 	u32 addr = 0;
 	u32 flags = node->action_flags;
 	u32 link_id = 0;
+	struct list_head *publ_list;
 	struct sk_buff_head *inputq = node->inputq;
-	struct sk_buff_head *namedq = node->inputq;
+	struct sk_buff_head *namedq;
 
 	if (likely(!flags || (flags == TIPC_MSG_EVT))) {
 		node->action_flags = 0;
@@ -585,11 +580,8 @@ void tipc_node_unlock(struct tipc_node *node)
 	addr = node->addr;
 	link_id = node->link_id;
 	namedq = node->namedq;
+	publ_list = &node->publ_list;
 
-	if (flags & TIPC_NOTIFY_NODE_DOWN) {
-		list_replace_init(&node->publ_list, &nsub_list);
-		list_replace_init(&node->conn_sks, &conn_sks);
-	}
 	node->action_flags &= ~(TIPC_MSG_EVT | TIPC_NOTIFY_NODE_DOWN |
 				TIPC_NOTIFY_NODE_UP | TIPC_NOTIFY_LINK_UP |
 				TIPC_NOTIFY_LINK_DOWN |
@@ -598,11 +590,8 @@ void tipc_node_unlock(struct tipc_node *node)
 
 	spin_unlock_bh(&node->lock);
 
-	if (!list_empty(&conn_sks))
-		tipc_node_abort_sock_conns(net, &conn_sks);
-
-	if (!list_empty(&nsub_list))
-		tipc_publ_notify(net, &nsub_list, addr);
+	if (flags & TIPC_NOTIFY_NODE_DOWN)
+		tipc_publ_notify(net, publ_list, addr);
 
 	if (flags & TIPC_WAKEUP_BCAST_USERS)
 		tipc_bclink_wakeup_users(net);
-- 
cgit v1.2.3


From 3c724acdd5049907555a831f814bfd5927c3350c Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Thu, 5 Feb 2015 08:36:43 -0500
Subject: tipc: simplify socket multicast reception

The structure 'tipc_port_list' is used to collect port numbers
representing multicast destination socket on a receiving node.
The list is not based on a standard linked list, and is in reality
optimized for the uncommon case that there are more than one
multicast destinations per node. This makes the list handling
unecessarily complex, and as a consequence, even the socket
multicast reception becomes more complex.

In this commit, we replace 'tipc_port_list' with a new 'struct
tipc_plist', which is based on a standard list. We give the new
list stack (push/pop) semantics, someting that simplifies
the implementation of the function tipc_sk_mcast_rcv().

Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c      | 49 +------------------------------------------------
 net/tipc/bcast.h      | 24 ++++--------------------
 net/tipc/name_table.c | 44 +++++++++++++++++++++++++++++++++++++++++---
 net/tipc/name_table.h | 18 ++++++++++++++++--
 net/tipc/socket.c     | 48 +++++++++++++++++++++---------------------------
 net/tipc/socket.h     |  2 +-
 6 files changed, 84 insertions(+), 101 deletions(-)

(limited to 'net')

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 2dfaf272928a..3eaa931e2e8c 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -1,7 +1,7 @@
 /*
  * net/tipc/bcast.c: TIPC broadcast code
  *
- * Copyright (c) 2004-2006, 2014, Ericsson AB
+ * Copyright (c) 2004-2006, 2014-2015, Ericsson AB
  * Copyright (c) 2004, Intel Corporation.
  * Copyright (c) 2005, 2010-2011, Wind River Systems
  * All rights reserved.
@@ -1037,50 +1037,3 @@ static void tipc_nmap_diff(struct tipc_node_map *nm_a,
 		}
 	}
 }
-
-/**
- * tipc_port_list_add - add a port to a port list, ensuring no duplicates
- */
-void tipc_port_list_add(struct tipc_port_list *pl_ptr, u32 port)
-{
-	struct tipc_port_list *item = pl_ptr;
-	int i;
-	int item_sz = PLSIZE;
-	int cnt = pl_ptr->count;
-
-	for (; ; cnt -= item_sz, item = item->next) {
-		if (cnt < PLSIZE)
-			item_sz = cnt;
-		for (i = 0; i < item_sz; i++)
-			if (item->ports[i] == port)
-				return;
-		if (i < PLSIZE) {
-			item->ports[i] = port;
-			pl_ptr->count++;
-			return;
-		}
-		if (!item->next) {
-			item->next = kmalloc(sizeof(*item), GFP_ATOMIC);
-			if (!item->next) {
-				pr_warn("Incomplete multicast delivery, no memory\n");
-				return;
-			}
-			item->next->next = NULL;
-		}
-	}
-}
-
-/**
- * tipc_port_list_free - free dynamically created entries in port_list chain
- *
- */
-void tipc_port_list_free(struct tipc_port_list *pl_ptr)
-{
-	struct tipc_port_list *item;
-	struct tipc_port_list *next;
-
-	for (item = pl_ptr->next; item; item = next) {
-		next = item->next;
-		kfree(item);
-	}
-}
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 6ea190dccfe1..8f4d4dc38e11 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -1,7 +1,7 @@
 /*
  * net/tipc/bcast.h: Include file for TIPC broadcast code
  *
- * Copyright (c) 2003-2006, 2014, Ericsson AB
+ * Copyright (c) 2003-2006, 2014-2015, Ericsson AB
  * Copyright (c) 2005, 2010-2011, Wind River Systems
  * All rights reserved.
  *
@@ -41,22 +41,6 @@
 #include "link.h"
 #include "node.h"
 
-#define TIPC_BCLINK_RESET	1
-#define PLSIZE			32
-#define	BCBEARER		MAX_BEARERS
-
-/**
- * struct tipc_port_list - set of node local destination ports
- * @count: # of ports in set (only valid for first entry in list)
- * @next: pointer to next entry in list
- * @ports: array of port references
- */
-struct tipc_port_list {
-	int count;
-	struct tipc_port_list *next;
-	u32 ports[PLSIZE];
-};
-
 /**
  * struct tipc_bcbearer_pair - a pair of bearers used by broadcast link
  * @primary: pointer to primary bearer
@@ -71,6 +55,9 @@ struct tipc_bcbearer_pair {
 	struct tipc_bearer *secondary;
 };
 
+#define TIPC_BCLINK_RESET	1
+#define	BCBEARER		MAX_BEARERS
+
 /**
  * struct tipc_bcbearer - bearer used by broadcast link
  * @bearer: (non-standard) broadcast bearer structure
@@ -126,9 +113,6 @@ static inline int tipc_nmap_equal(struct tipc_node_map *nm_a,
 	return !memcmp(nm_a, nm_b, sizeof(*nm_a));
 }
 
-void tipc_port_list_add(struct tipc_port_list *pl_ptr, u32 port);
-void tipc_port_list_free(struct tipc_port_list *pl_ptr);
-
 int tipc_bclink_init(struct net *net);
 void tipc_bclink_stop(struct net *net);
 void tipc_bclink_set_flags(struct net *tn, unsigned int flags);
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index ce09b863528c..18a3d44238bc 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -1,7 +1,7 @@
 /*
  * net/tipc/name_table.c: TIPC name table code
  *
- * Copyright (c) 2000-2006, 2014, Ericsson AB
+ * Copyright (c) 2000-2006, 2014-2015, Ericsson AB
  * Copyright (c) 2004-2008, 2010-2014, Wind River Systems
  * All rights reserved.
  *
@@ -618,7 +618,7 @@ not_found:
  * Returns non-zero if any off-node ports overlap
  */
 int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
-			      u32 limit, struct tipc_port_list *dports)
+			      u32 limit, struct tipc_plist *dports)
 {
 	struct name_seq *seq;
 	struct sub_seq *sseq;
@@ -643,7 +643,7 @@ int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
 		info = sseq->info;
 		list_for_each_entry(publ, &info->node_list, node_list) {
 			if (publ->scope <= limit)
-				tipc_port_list_add(dports, publ->ref);
+				tipc_plist_push(dports, publ->ref);
 		}
 
 		if (info->cluster_list_size != info->node_list_size)
@@ -1212,3 +1212,41 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb)
 
 	return skb->len;
 }
+
+void tipc_plist_push(struct tipc_plist *pl, u32 port)
+{
+	struct tipc_plist *nl;
+
+	if (likely(!pl->port)) {
+		pl->port = port;
+		return;
+	}
+	if (pl->port == port)
+		return;
+	list_for_each_entry(nl, &pl->list, list) {
+		if (nl->port == port)
+			return;
+	}
+	nl = kmalloc(sizeof(*nl), GFP_ATOMIC);
+	if (nl) {
+		nl->port = port;
+		list_add(&nl->list, &pl->list);
+	}
+}
+
+u32 tipc_plist_pop(struct tipc_plist *pl)
+{
+	struct tipc_plist *nl;
+	u32 port = 0;
+
+	if (likely(list_empty(&pl->list))) {
+		port = pl->port;
+		pl->port = 0;
+		return port;
+	}
+	nl = list_first_entry(&pl->list, typeof(*nl), list);
+	port = nl->port;
+	list_del(&nl->list);
+	kfree(nl);
+	return port;
+}
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index f67b3d8d4b2f..52501fdaafa5 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -38,7 +38,7 @@
 #define _TIPC_NAME_TABLE_H
 
 struct tipc_subscription;
-struct tipc_port_list;
+struct tipc_plist;
 
 /*
  * TIPC name types reserved for internal TIPC use (both current and planned)
@@ -101,7 +101,7 @@ struct sk_buff *tipc_nametbl_get(struct net *net, const void *req_tlv_area,
 				 int req_tlv_space);
 u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node);
 int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
-			      u32 limit, struct tipc_port_list *dports);
+			      u32 limit, struct tipc_plist *dports);
 struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
 					 u32 upper, u32 scope, u32 port_ref,
 					 u32 key);
@@ -118,4 +118,18 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s);
 int tipc_nametbl_init(struct net *net);
 void tipc_nametbl_stop(struct net *net);
 
+struct tipc_plist {
+	struct list_head list;
+	u32 port;
+};
+
+static inline void tipc_plist_init(struct tipc_plist *pl)
+{
+	INIT_LIST_HEAD(&pl->list);
+	pl->port = 0;
+}
+
+void tipc_plist_push(struct tipc_plist *pl, u32 port);
+u32 tipc_plist_pop(struct tipc_plist *pl);
+
 #endif
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index c1a4611649ab..26aec8414ac1 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1,7 +1,7 @@
 /*
  * net/tipc/socket.c: TIPC socket API
  *
- * Copyright (c) 2001-2007, 2012-2014, Ericsson AB
+ * Copyright (c) 2001-2007, 2012-2015, Ericsson AB
  * Copyright (c) 2004-2008, 2010-2013, Wind River Systems
  * All rights reserved.
  *
@@ -778,48 +778,42 @@ new_mtu:
 
 /* tipc_sk_mcast_rcv - Deliver multicast message to all destination sockets
  */
-void tipc_sk_mcast_rcv(struct net *net, struct sk_buff *buf)
+void tipc_sk_mcast_rcv(struct net *net, struct sk_buff *skb)
 {
-	struct tipc_msg *msg = buf_msg(buf);
-	struct tipc_port_list dports = {0, NULL, };
-	struct tipc_port_list *item;
-	struct sk_buff *b;
-	uint i, last, dst = 0;
+	struct tipc_msg *msg = buf_msg(skb);
+	struct tipc_plist dports;
+	struct sk_buff *cskb;
+	u32 portid;
 	u32 scope = TIPC_CLUSTER_SCOPE;
-	struct sk_buff_head msgs;
+	struct sk_buff_head msgq;
+	uint hsz = skb_headroom(skb) + msg_hdr_sz(msg);
+
+	skb_queue_head_init(&msgq);
+	tipc_plist_init(&dports);
 
 	if (in_own_node(net, msg_orignode(msg)))
 		scope = TIPC_NODE_SCOPE;
 
 	if (unlikely(!msg_mcast(msg))) {
 		pr_warn("Received non-multicast msg in multicast\n");
-		kfree_skb(buf);
 		goto exit;
 	}
 	/* Create destination port list: */
 	tipc_nametbl_mc_translate(net, msg_nametype(msg), msg_namelower(msg),
 				  msg_nameupper(msg), scope, &dports);
-	last = dports.count;
-	if (!last) {
-		kfree_skb(buf);
-		return;
-	}
-
-	for (item = &dports; item; item = item->next) {
-		for (i = 0; i < PLSIZE && ++dst <= last; i++) {
-			b = (dst != last) ? skb_clone(buf, GFP_ATOMIC) : buf;
-			if (!b) {
-				pr_warn("Failed do clone mcast rcv buffer\n");
-				continue;
-			}
-			msg_set_destport(msg, item->ports[i]);
-			skb_queue_head_init(&msgs);
-			skb_queue_tail(&msgs, b);
-			tipc_sk_rcv(net, &msgs);
+	portid = tipc_plist_pop(&dports);
+	for (; portid; portid = tipc_plist_pop(&dports)) {
+		cskb = __pskb_copy(skb, hsz, GFP_ATOMIC);
+		if (!cskb) {
+			pr_warn("Failed do clone mcast rcv buffer\n");
+			continue;
 		}
+		msg_set_destport(buf_msg(cskb), portid);
+		skb_queue_tail(&msgq, cskb);
 	}
+	tipc_sk_rcv(net, &msgq);
 exit:
-	tipc_port_list_free(&dports);
+	kfree_skb(skb);
 }
 
 /**
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index e3dbdc0e1be7..95b015909ac1 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h
@@ -1,6 +1,6 @@
 /* net/tipc/socket.h: Include file for TIPC socket code
  *
- * Copyright (c) 2014, Ericsson AB
+ * Copyright (c) 2014-2015, Ericsson AB
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
-- 
cgit v1.2.3


From cb1b728096f54e7408d60fb571944bed00c5b771 Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Thu, 5 Feb 2015 08:36:44 -0500
Subject: tipc: eliminate race condition at multicast reception

In a previous commit in this series we resolved a race problem during
unicast message reception.

Here, we resolve the same problem at multicast reception. We apply the
same technique: an input queue serializing the delivery of arriving
buffers. The main difference is that here we do it in two steps.
First, the broadcast link feeds arriving buffers into the tail of an
arrival queue, which head is consumed at the socket level, and where
destination lookup is performed. Second, if the lookup is successful,
the resulting buffer clones are fed into a second queue, the input
queue. This queue is consumed at reception in the socket just like
in the unicast case. Both queues are protected by the same lock, -the
one of the input queue.

Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c      | 57 ++++++++++++++++++++++++----------------
 net/tipc/bcast.h      |  3 +++
 net/tipc/msg.h        | 17 ++++++++++++
 net/tipc/name_table.h |  2 +-
 net/tipc/node.c       | 11 +++++---
 net/tipc/node.h       |  7 +++--
 net/tipc/socket.c     | 72 +++++++++++++++++++++++++++++++--------------------
 net/tipc/socket.h     |  4 +--
 8 files changed, 114 insertions(+), 59 deletions(-)

(limited to 'net')

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 3eaa931e2e8c..81b1fef1f5e0 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -79,6 +79,13 @@ static void tipc_bclink_unlock(struct net *net)
 		tipc_link_reset_all(node);
 }
 
+void tipc_bclink_input(struct net *net)
+{
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
+
+	tipc_sk_mcast_rcv(net, &tn->bclink->arrvq, &tn->bclink->inputq);
+}
+
 uint  tipc_bclink_get_mtu(void)
 {
 	return MAX_PKT_DEFAULT_MCAST;
@@ -356,7 +363,7 @@ static void bclink_peek_nack(struct net *net, struct tipc_msg *msg)
 	tipc_node_unlock(n_ptr);
 }
 
-/* tipc_bclink_xmit - broadcast buffer chain to all nodes in cluster
+/* tipc_bclink_xmit - deliver buffer chain to all nodes in cluster
  *                    and to identified node local sockets
  * @net: the applicable net namespace
  * @list: chain of buffers containing message
@@ -371,6 +378,8 @@ int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list)
 	int rc = 0;
 	int bc = 0;
 	struct sk_buff *skb;
+	struct sk_buff_head arrvq;
+	struct sk_buff_head inputq;
 
 	/* Prepare clone of message for local node */
 	skb = tipc_msg_reassemble(list);
@@ -379,7 +388,7 @@ int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list)
 		return -EHOSTUNREACH;
 	}
 
-	/* Broadcast to all other nodes */
+	/* Broadcast to all nodes */
 	if (likely(bclink)) {
 		tipc_bclink_lock(net);
 		if (likely(bclink->bcast_nodes.count)) {
@@ -399,12 +408,15 @@ int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list)
 	if (unlikely(!bc))
 		__skb_queue_purge(list);
 
-	/* Deliver message clone */
-	if (likely(!rc))
-		tipc_sk_mcast_rcv(net, skb);
-	else
+	if (unlikely(rc)) {
 		kfree_skb(skb);
-
+		return rc;
+	}
+	/* Deliver message clone */
+	__skb_queue_head_init(&arrvq);
+	skb_queue_head_init(&inputq);
+	__skb_queue_tail(&arrvq, skb);
+	tipc_sk_mcast_rcv(net, &arrvq, &inputq);
 	return rc;
 }
 
@@ -449,7 +461,7 @@ void tipc_bclink_rcv(struct net *net, struct sk_buff *buf)
 	int deferred = 0;
 	int pos = 0;
 	struct sk_buff *iskb;
-	struct sk_buff_head msgs;
+	struct sk_buff_head *arrvq, *inputq;
 
 	/* Screen out unwanted broadcast messages */
 	if (msg_mc_netid(msg) != tn->net_id)
@@ -486,6 +498,8 @@ void tipc_bclink_rcv(struct net *net, struct sk_buff *buf)
 	/* Handle in-sequence broadcast message */
 	seqno = msg_seqno(msg);
 	next_in = mod(node->bclink.last_in + 1);
+	arrvq = &tn->bclink->arrvq;
+	inputq = &tn->bclink->inputq;
 
 	if (likely(seqno == next_in)) {
 receive:
@@ -493,21 +507,26 @@ receive:
 		if (likely(msg_isdata(msg))) {
 			tipc_bclink_lock(net);
 			bclink_accept_pkt(node, seqno);
+			spin_lock_bh(&inputq->lock);
+			__skb_queue_tail(arrvq, buf);
+			spin_unlock_bh(&inputq->lock);
+			node->action_flags |= TIPC_BCAST_MSG_EVT;
 			tipc_bclink_unlock(net);
 			tipc_node_unlock(node);
-			if (likely(msg_mcast(msg)))
-				tipc_sk_mcast_rcv(net, buf);
-			else
-				kfree_skb(buf);
 		} else if (msg_user(msg) == MSG_BUNDLER) {
 			tipc_bclink_lock(net);
 			bclink_accept_pkt(node, seqno);
 			bcl->stats.recv_bundles++;
 			bcl->stats.recv_bundled += msg_msgcnt(msg);
+			pos = 0;
+			while (tipc_msg_extract(buf, &iskb, &pos)) {
+				spin_lock_bh(&inputq->lock);
+				__skb_queue_tail(arrvq, iskb);
+				spin_unlock_bh(&inputq->lock);
+			}
+			node->action_flags |= TIPC_BCAST_MSG_EVT;
 			tipc_bclink_unlock(net);
 			tipc_node_unlock(node);
-			while (tipc_msg_extract(buf, &iskb, &pos))
-				tipc_sk_mcast_rcv(net, iskb);
 		} else if (msg_user(msg) == MSG_FRAGMENTER) {
 			tipc_buf_append(&node->bclink.reasm_buf, &buf);
 			if (unlikely(!buf && !node->bclink.reasm_buf))
@@ -523,14 +542,6 @@ receive:
 			}
 			tipc_bclink_unlock(net);
 			tipc_node_unlock(node);
-		} else if (msg_user(msg) == NAME_DISTRIBUTOR) {
-			tipc_bclink_lock(net);
-			bclink_accept_pkt(node, seqno);
-			tipc_bclink_unlock(net);
-			tipc_node_unlock(node);
-			skb_queue_head_init(&msgs);
-			skb_queue_tail(&msgs, buf);
-			tipc_named_rcv(net, &msgs);
 		} else {
 			tipc_bclink_lock(net);
 			bclink_accept_pkt(node, seqno);
@@ -950,6 +961,8 @@ int tipc_bclink_init(struct net *net)
 	skb_queue_head_init(&bcl->wakeupq);
 	bcl->next_out_no = 1;
 	spin_lock_init(&bclink->node.lock);
+	__skb_queue_head_init(&bclink->arrvq);
+	skb_queue_head_init(&bclink->inputq);
 	bcl->owner = &bclink->node;
 	bcl->owner->net = net;
 	bcl->max_pkt = MAX_PKT_DEFAULT_MCAST;
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 8f4d4dc38e11..a910c0b9f249 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -97,6 +97,8 @@ struct tipc_bclink {
 	struct tipc_link link;
 	struct tipc_node node;
 	unsigned int flags;
+	struct sk_buff_head arrvq;
+	struct sk_buff_head inputq;
 	struct tipc_node_map bcast_nodes;
 	struct tipc_node *retransmit_to;
 };
@@ -134,5 +136,6 @@ uint  tipc_bclink_get_mtu(void);
 int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list);
 void tipc_bclink_wakeup_users(struct net *net);
 int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg);
+void tipc_bclink_input(struct net *net);
 
 #endif
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index ab467261bd9d..9ace47f44a69 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -767,6 +767,23 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, u32 *dnode,
 			  int *err);
 struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list);
 
+/* tipc_skb_peek(): peek and reserve first buffer in list
+ * @list: list to be peeked in
+ * Returns pointer to first buffer in list, if any
+ */
+static inline struct sk_buff *tipc_skb_peek(struct sk_buff_head *list,
+					    spinlock_t *lock)
+{
+	struct sk_buff *skb;
+
+	spin_lock_bh(lock);
+	skb = skb_peek(list);
+	if (skb)
+		skb_get(skb);
+	spin_unlock_bh(lock);
+	return skb;
+}
+
 /* tipc_skb_peek_port(): find a destination port, ignoring all destinations
  *                       up to and including 'filter'.
  * Note: ignoring previously tried destinations minimizes the risk of
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 52501fdaafa5..0304ddc6b101 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -1,7 +1,7 @@
 /*
  * net/tipc/name_table.h: Include file for TIPC name table code
  *
- * Copyright (c) 2000-2006, 2014, Ericsson AB
+ * Copyright (c) 2000-2006, 2014-2015, Ericsson AB
  * Copyright (c) 2004-2005, 2010-2011, Wind River Systems
  * All rights reserved.
  *
diff --git a/net/tipc/node.c b/net/tipc/node.c
index c7fdf3dec92c..52308498f208 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -582,10 +582,10 @@ void tipc_node_unlock(struct tipc_node *node)
 	namedq = node->namedq;
 	publ_list = &node->publ_list;
 
-	node->action_flags &= ~(TIPC_MSG_EVT | TIPC_NOTIFY_NODE_DOWN |
-				TIPC_NOTIFY_NODE_UP | TIPC_NOTIFY_LINK_UP |
-				TIPC_NOTIFY_LINK_DOWN |
-				TIPC_WAKEUP_BCAST_USERS |
+	node->action_flags &= ~(TIPC_MSG_EVT |
+				TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP |
+				TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP |
+				TIPC_WAKEUP_BCAST_USERS | TIPC_BCAST_MSG_EVT |
 				TIPC_NAMED_MSG_EVT);
 
 	spin_unlock_bh(&node->lock);
@@ -612,6 +612,9 @@ void tipc_node_unlock(struct tipc_node *node)
 
 	if (flags & TIPC_NAMED_MSG_EVT)
 		tipc_named_rcv(net, namedq);
+
+	if (flags & TIPC_BCAST_MSG_EVT)
+		tipc_bclink_input(net);
 }
 
 /* Caller should hold node lock for the passed node */
diff --git a/net/tipc/node.h b/net/tipc/node.h
index c2b0fcf4042b..20ec13f9bede 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -1,7 +1,7 @@
 /*
  * net/tipc/node.h: Include file for TIPC node management routines
  *
- * Copyright (c) 2000-2006, 2014, Ericsson AB
+ * Copyright (c) 2000-2006, 2014-2015, Ericsson AB
  * Copyright (c) 2005, 2010-2014, Wind River Systems
  * All rights reserved.
  *
@@ -63,7 +63,8 @@ enum {
 	TIPC_WAKEUP_BCAST_USERS		= (1 << 5),
 	TIPC_NOTIFY_LINK_UP		= (1 << 6),
 	TIPC_NOTIFY_LINK_DOWN		= (1 << 7),
-	TIPC_NAMED_MSG_EVT		= (1 << 8)
+	TIPC_NAMED_MSG_EVT		= (1 << 8),
+	TIPC_BCAST_MSG_EVT		= (1 << 9)
 };
 
 /**
@@ -74,6 +75,7 @@ enum {
  * @oos_state: state tracker for handling OOS b'cast messages
  * @deferred_queue: deferred queue saved OOS b'cast message received from node
  * @reasm_buf: broadcast reassembly queue head from node
+ * @inputq_map: bitmap indicating which inqueues should be kicked
  * @recv_permitted: true if node is allowed to receive b'cast messages
  */
 struct tipc_node_bclink {
@@ -84,6 +86,7 @@ struct tipc_node_bclink {
 	u32 deferred_size;
 	struct sk_buff_head deferred_queue;
 	struct sk_buff *reasm_buf;
+	int inputq_map;
 	bool recv_permitted;
 };
 
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 26aec8414ac1..66666805b53c 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -776,44 +776,60 @@ new_mtu:
 	return rc;
 }
 
-/* tipc_sk_mcast_rcv - Deliver multicast message to all destination sockets
+/**
+ * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets
+ * @arrvq: queue with arriving messages, to be cloned after destination lookup
+ * @inputq: queue with cloned messages, delivered to socket after dest lookup
+ *
+ * Multi-threaded: parallel calls with reference to same queues may occur
  */
-void tipc_sk_mcast_rcv(struct net *net, struct sk_buff *skb)
+void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
+		       struct sk_buff_head *inputq)
 {
-	struct tipc_msg *msg = buf_msg(skb);
+	struct tipc_msg *msg;
 	struct tipc_plist dports;
-	struct sk_buff *cskb;
 	u32 portid;
 	u32 scope = TIPC_CLUSTER_SCOPE;
-	struct sk_buff_head msgq;
-	uint hsz = skb_headroom(skb) + msg_hdr_sz(msg);
+	struct sk_buff_head tmpq;
+	uint hsz;
+	struct sk_buff *skb, *_skb;
 
-	skb_queue_head_init(&msgq);
+	__skb_queue_head_init(&tmpq);
 	tipc_plist_init(&dports);
 
-	if (in_own_node(net, msg_orignode(msg)))
-		scope = TIPC_NODE_SCOPE;
-
-	if (unlikely(!msg_mcast(msg))) {
-		pr_warn("Received non-multicast msg in multicast\n");
-		goto exit;
-	}
-	/* Create destination port list: */
-	tipc_nametbl_mc_translate(net, msg_nametype(msg), msg_namelower(msg),
-				  msg_nameupper(msg), scope, &dports);
-	portid = tipc_plist_pop(&dports);
-	for (; portid; portid = tipc_plist_pop(&dports)) {
-		cskb = __pskb_copy(skb, hsz, GFP_ATOMIC);
-		if (!cskb) {
-			pr_warn("Failed do clone mcast rcv buffer\n");
-			continue;
+	skb = tipc_skb_peek(arrvq, &inputq->lock);
+	for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
+		msg = buf_msg(skb);
+		hsz = skb_headroom(skb) + msg_hdr_sz(msg);
+
+		if (in_own_node(net, msg_orignode(msg)))
+			scope = TIPC_NODE_SCOPE;
+
+		/* Create destination port list and message clones: */
+		tipc_nametbl_mc_translate(net,
+					  msg_nametype(msg), msg_namelower(msg),
+					  msg_nameupper(msg), scope, &dports);
+		portid = tipc_plist_pop(&dports);
+		for (; portid; portid = tipc_plist_pop(&dports)) {
+			_skb = __pskb_copy(skb, hsz, GFP_ATOMIC);
+			if (_skb) {
+				msg_set_destport(buf_msg(_skb), portid);
+				__skb_queue_tail(&tmpq, _skb);
+				continue;
+			}
+			pr_warn("Failed to clone mcast rcv buffer\n");
 		}
-		msg_set_destport(buf_msg(cskb), portid);
-		skb_queue_tail(&msgq, cskb);
+		/* Append to inputq if not already done by other thread */
+		spin_lock_bh(&inputq->lock);
+		if (skb_peek(arrvq) == skb) {
+			skb_queue_splice_tail_init(&tmpq, inputq);
+			kfree_skb(__skb_dequeue(arrvq));
+		}
+		spin_unlock_bh(&inputq->lock);
+		__skb_queue_purge(&tmpq);
+		kfree_skb(skb);
 	}
-	tipc_sk_rcv(net, &msgq);
-exit:
-	kfree_skb(skb);
+	tipc_sk_rcv(net, inputq);
 }
 
 /**
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index 95b015909ac1..8be0da7df8fc 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h
@@ -42,7 +42,6 @@
 #define TIPC_FLOWCTRL_WIN        (TIPC_CONNACK_INTV * 2)
 #define TIPC_CONN_OVERLOAD_LIMIT ((TIPC_FLOWCTRL_WIN * 2 + 1) * \
 				  SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE))
-
 int tipc_socket_init(void);
 void tipc_socket_stop(void);
 int tipc_sock_create_local(struct net *net, int type, struct socket **res);
@@ -51,7 +50,8 @@ int tipc_sock_accept_local(struct socket *sock, struct socket **newsock,
 			   int flags);
 int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq);
 struct sk_buff *tipc_sk_socks_show(struct net *net);
-void tipc_sk_mcast_rcv(struct net *net, struct sk_buff *buf);
+void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
+		       struct sk_buff_head *inputq);
 void tipc_sk_reinit(struct net *net);
 int tipc_sk_rht_init(struct net *net);
 void tipc_sk_rht_destroy(struct net *net);
-- 
cgit v1.2.3


From 11b1f8288d4341af5d755281c871bff6c3e270dd Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 5 Feb 2015 14:39:11 +0100
Subject: ipv6: addrconf: add missing validate_link_af handler

We still need a validate_link_af() handler with an appropriate nla policy,
similarly as we have in IPv4 case, otherwise size validations are not being
done properly in that case.

Fixes: f53adae4eae5 ("net: ipv6: add tokenized interface identifier support")
Fixes: bc91b0f07ada ("ipv6: addrconf: implement address generation modes")
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 62900aee4c58..754e683240f9 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4587,6 +4587,22 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
 	return 0;
 }
 
+static const struct nla_policy inet6_af_policy[IFLA_INET6_MAX + 1] = {
+	[IFLA_INET6_ADDR_GEN_MODE]	= { .type = NLA_U8 },
+	[IFLA_INET6_TOKEN]		= { .len = sizeof(struct in6_addr) },
+};
+
+static int inet6_validate_link_af(const struct net_device *dev,
+				  const struct nlattr *nla)
+{
+	struct nlattr *tb[IFLA_INET6_MAX + 1];
+
+	if (dev && !__in6_dev_get(dev))
+		return -EAFNOSUPPORT;
+
+	return nla_parse_nested(tb, IFLA_INET6_MAX, nla, inet6_af_policy);
+}
+
 static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla)
 {
 	int err = -EINVAL;
@@ -5408,6 +5424,7 @@ static struct rtnl_af_ops inet6_ops = {
 	.family		  = AF_INET6,
 	.fill_link_af	  = inet6_fill_link_af,
 	.get_link_af_size = inet6_get_link_af_size,
+	.validate_link_af = inet6_validate_link_af,
 	.set_link_af	  = inet6_set_link_af,
 };
 
-- 
cgit v1.2.3


From e04449fcf2cd63dec176355a028bd28b4d469be9 Mon Sep 17 00:00:00 2001
From: Tobias Waldekranz <tobias@waldekranz.com>
Date: Thu, 5 Feb 2015 14:54:09 +0100
Subject: dsa: correctly determine the number of switches in a system

The number of connected switches was sourced from the number of
children to the DSA node, change it to the number of available
children, skipping any disabled switches.

Fixes: 5e95329b701c4 ("dsa: add device tree bindings to register DSA switches")
Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/dsa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 37317149f918..2173402d87e0 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -603,7 +603,7 @@ static int dsa_of_probe(struct platform_device *pdev)
 
 	pdev->dev.platform_data = pd;
 	pd->netdev = &ethernet_dev->dev;
-	pd->nr_chips = of_get_child_count(np);
+	pd->nr_chips = of_get_available_child_count(np);
 	if (pd->nr_chips > DSA_MAX_SWITCHES)
 		pd->nr_chips = DSA_MAX_SWITCHES;
 
-- 
cgit v1.2.3


From 364d5716a7adb91b731a35765d369602d68d2881 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 5 Feb 2015 18:44:04 +0100
Subject: rtnetlink: ifla_vf_policy: fix misuses of NLA_BINARY

ifla_vf_policy[] is wrong in advertising its individual member types as
NLA_BINARY since .type = NLA_BINARY in combination with .len declares the
len member as *max* attribute length [0, len].

The issue is that when do_setvfinfo() is being called to set up a VF
through ndo handler, we could set corrupted data if the attribute length
is less than the size of the related structure itself.

The intent is exactly the opposite, namely to make sure to pass at least
data of minimum size of len.

Fixes: ebc08a6f47ee ("rtnetlink: Add VF config code to rtnetlink")
Cc: Mitch Williams <mitch.a.williams@intel.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 446cbaf81185..5daabfda6f6f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1237,18 +1237,12 @@ static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = {
 };
 
 static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
-	[IFLA_VF_MAC]		= { .type = NLA_BINARY,
-				    .len = sizeof(struct ifla_vf_mac) },
-	[IFLA_VF_VLAN]		= { .type = NLA_BINARY,
-				    .len = sizeof(struct ifla_vf_vlan) },
-	[IFLA_VF_TX_RATE]	= { .type = NLA_BINARY,
-				    .len = sizeof(struct ifla_vf_tx_rate) },
-	[IFLA_VF_SPOOFCHK]	= { .type = NLA_BINARY,
-				    .len = sizeof(struct ifla_vf_spoofchk) },
-	[IFLA_VF_RATE]		= { .type = NLA_BINARY,
-				    .len = sizeof(struct ifla_vf_rate) },
-	[IFLA_VF_LINK_STATE]	= { .type = NLA_BINARY,
-				    .len = sizeof(struct ifla_vf_link_state) },
+	[IFLA_VF_MAC]		= { .len = sizeof(struct ifla_vf_mac) },
+	[IFLA_VF_VLAN]		= { .len = sizeof(struct ifla_vf_vlan) },
+	[IFLA_VF_TX_RATE]	= { .len = sizeof(struct ifla_vf_tx_rate) },
+	[IFLA_VF_SPOOFCHK]	= { .len = sizeof(struct ifla_vf_spoofchk) },
+	[IFLA_VF_RATE]		= { .len = sizeof(struct ifla_vf_rate) },
+	[IFLA_VF_LINK_STATE]	= { .len = sizeof(struct ifla_vf_link_state) },
 };
 
 static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
-- 
cgit v1.2.3


From 83d2b9ba1abca241df44a502b6da950a25856b5b Mon Sep 17 00:00:00 2001
From: Jarno Rajahalme <jrajahalme@nicira.com>
Date: Thu, 5 Feb 2015 13:40:49 -0800
Subject: net: openvswitch: Support masked set actions.

OVS userspace already probes the openvswitch kernel module for
OVS_ACTION_ATTR_SET_MASKED support.  This patch adds the kernel module
implementation of masked set actions.

The existing set action sets many fields at once.  When only a subset
of the IP header fields, for example, should be modified, all the IP
fields need to be exact matched so that the other field values can be
copied to the set action.  A masked set action allows modification of
an arbitrary subset of the supported header bits without requiring the
rest to be matched.

Masked set action is now supported for all writeable key types, except
for the tunnel key.  The set tunnel action is an exception as any
input tunnel info is cleared before action processing starts, so there
is no tunnel info to mask.

The kernel module converts all (non-tunnel) set actions to masked set
actions.  This makes action processing more uniform, and results in
less branching and duplicating the action processing code.  When
returning actions to userspace, the fully masked set actions are
converted back to normal set actions.  We use a kernel internal action
code to be able to tell the userspace provided and converted masked
set actions apart.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/openvswitch.h |  22 ++-
 net/openvswitch/actions.c        | 373 ++++++++++++++++++++++++---------------
 net/openvswitch/flow_netlink.c   | 161 +++++++++++++----
 3 files changed, 383 insertions(+), 173 deletions(-)

(limited to 'net')

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 7a8785a99243..bbd49a0c46c7 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -599,6 +599,12 @@ struct ovs_action_hash {
  * @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header.  The
  * single nested %OVS_KEY_ATTR_* attribute specifies a header to modify and its
  * value.
+ * @OVS_ACTION_ATTR_SET_MASKED: Replaces the contents of an existing header.  A
+ * nested %OVS_KEY_ATTR_* attribute specifies a header to modify, its value,
+ * and a mask.  For every bit set in the mask, the corresponding bit value
+ * is copied from the value to the packet header field, rest of the bits are
+ * left unchanged.  The non-masked value bits must be passed in as zeroes.
+ * Masking is not supported for the %OVS_KEY_ATTR_TUNNEL attribute.
  * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q header onto the
  * packet.
  * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet.
@@ -617,6 +623,9 @@ struct ovs_action_hash {
  * Only a single header can be set with a single %OVS_ACTION_ATTR_SET.  Not all
  * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
  * type may not be changed.
+ *
+ * @OVS_ACTION_ATTR_SET_TO_MASKED: Kernel internal masked set action translated
+ * from the @OVS_ACTION_ATTR_SET.
  */
 
 enum ovs_action_attr {
@@ -631,8 +640,19 @@ enum ovs_action_attr {
 	OVS_ACTION_ATTR_HASH,	      /* struct ovs_action_hash. */
 	OVS_ACTION_ATTR_PUSH_MPLS,    /* struct ovs_action_push_mpls. */
 	OVS_ACTION_ATTR_POP_MPLS,     /* __be16 ethertype. */
+	OVS_ACTION_ATTR_SET_MASKED,   /* One nested OVS_KEY_ATTR_* including
+				       * data immediately followed by a mask.
+				       * The data must be zero for the unmasked
+				       * bits. */
+
+	__OVS_ACTION_ATTR_MAX,	      /* Nothing past this will be accepted
+				       * from userspace. */
 
-	__OVS_ACTION_ATTR_MAX
+#ifdef __KERNEL__
+	OVS_ACTION_ATTR_SET_TO_MASKED, /* Kernel module internal masked
+					* set action converted from
+					* OVS_ACTION_ATTR_SET. */
+#endif
 };
 
 #define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1)
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index b4cffe686126..b491c1c296fe 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -185,10 +185,15 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
 	return 0;
 }
 
-static int set_mpls(struct sk_buff *skb, struct sw_flow_key *key,
-		    const __be32 *mpls_lse)
+/* 'KEY' must not have any bits set outside of the 'MASK' */
+#define MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK)))
+#define SET_MASKED(OLD, KEY, MASK) ((OLD) = MASKED(OLD, KEY, MASK))
+
+static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
+		    const __be32 *mpls_lse, const __be32 *mask)
 {
 	__be32 *stack;
+	__be32 lse;
 	int err;
 
 	err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
@@ -196,14 +201,16 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *key,
 		return err;
 
 	stack = (__be32 *)skb_mpls_header(skb);
+	lse = MASKED(*stack, *mpls_lse, *mask);
 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
-		__be32 diff[] = { ~(*stack), *mpls_lse };
+		__be32 diff[] = { ~(*stack), lse };
+
 		skb->csum = ~csum_partial((char *)diff, sizeof(diff),
 					  ~skb->csum);
 	}
 
-	*stack = *mpls_lse;
-	key->mpls.top_lse = *mpls_lse;
+	*stack = lse;
+	flow_key->mpls.top_lse = lse;
 	return 0;
 }
 
@@ -230,23 +237,39 @@ static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
 			     ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
 }
 
-static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *key,
-			const struct ovs_key_ethernet *eth_key)
+/* 'src' is already properly masked. */
+static void ether_addr_copy_masked(u8 *dst_, const u8 *src_, const u8 *mask_)
+{
+	u16 *dst = (u16 *)dst_;
+	const u16 *src = (const u16 *)src_;
+	const u16 *mask = (const u16 *)mask_;
+
+	SET_MASKED(dst[0], src[0], mask[0]);
+	SET_MASKED(dst[1], src[1], mask[1]);
+	SET_MASKED(dst[2], src[2], mask[2]);
+}
+
+static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
+			const struct ovs_key_ethernet *key,
+			const struct ovs_key_ethernet *mask)
 {
 	int err;
+
 	err = skb_ensure_writable(skb, ETH_HLEN);
 	if (unlikely(err))
 		return err;
 
 	skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
 
-	ether_addr_copy(eth_hdr(skb)->h_source, eth_key->eth_src);
-	ether_addr_copy(eth_hdr(skb)->h_dest, eth_key->eth_dst);
+	ether_addr_copy_masked(eth_hdr(skb)->h_source, key->eth_src,
+			       mask->eth_src);
+	ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst,
+			       mask->eth_dst);
 
 	ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
 
-	ether_addr_copy(key->eth.src, eth_key->eth_src);
-	ether_addr_copy(key->eth.dst, eth_key->eth_dst);
+	ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source);
+	ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest);
 	return 0;
 }
 
@@ -304,6 +327,15 @@ static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
 	}
 }
 
+static void mask_ipv6_addr(const __be32 old[4], const __be32 addr[4],
+			   const __be32 mask[4], __be32 masked[4])
+{
+	masked[0] = MASKED(old[0], addr[0], mask[0]);
+	masked[1] = MASKED(old[1], addr[1], mask[1]);
+	masked[2] = MASKED(old[2], addr[2], mask[2]);
+	masked[3] = MASKED(old[3], addr[3], mask[3]);
+}
+
 static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
 			  __be32 addr[4], const __be32 new_addr[4],
 			  bool recalculate_csum)
@@ -315,29 +347,29 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
 	memcpy(addr, new_addr, sizeof(__be32[4]));
 }
 
-static void set_ipv6_tc(struct ipv6hdr *nh, u8 tc)
+static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask)
 {
-	nh->priority = tc >> 4;
-	nh->flow_lbl[0] = (nh->flow_lbl[0] & 0x0F) | ((tc & 0x0F) << 4);
+	/* Bits 21-24 are always unmasked, so this retains their values. */
+	SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16));
+	SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8));
+	SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask);
 }
 
-static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl)
+static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl,
+		       u8 mask)
 {
-	nh->flow_lbl[0] = (nh->flow_lbl[0] & 0xF0) | (fl & 0x000F0000) >> 16;
-	nh->flow_lbl[1] = (fl & 0x0000FF00) >> 8;
-	nh->flow_lbl[2] = fl & 0x000000FF;
-}
+	new_ttl = MASKED(nh->ttl, new_ttl, mask);
 
-static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl)
-{
 	csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
 	nh->ttl = new_ttl;
 }
 
-static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *key,
-		    const struct ovs_key_ipv4 *ipv4_key)
+static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *flow_key,
+		    const struct ovs_key_ipv4 *key,
+		    const struct ovs_key_ipv4 *mask)
 {
 	struct iphdr *nh;
+	__be32 new_addr;
 	int err;
 
 	err = skb_ensure_writable(skb, skb_network_offset(skb) +
@@ -347,36 +379,49 @@ static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *key,
 
 	nh = ip_hdr(skb);
 
-	if (ipv4_key->ipv4_src != nh->saddr) {
-		set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src);
-		key->ipv4.addr.src = ipv4_key->ipv4_src;
-	}
+	/* Setting an IP addresses is typically only a side effect of
+	 * matching on them in the current userspace implementation, so it
+	 * makes sense to check if the value actually changed.
+	 */
+	if (mask->ipv4_src) {
+		new_addr = MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src);
 
-	if (ipv4_key->ipv4_dst != nh->daddr) {
-		set_ip_addr(skb, nh, &nh->daddr, ipv4_key->ipv4_dst);
-		key->ipv4.addr.dst = ipv4_key->ipv4_dst;
+		if (unlikely(new_addr != nh->saddr)) {
+			set_ip_addr(skb, nh, &nh->saddr, new_addr);
+			flow_key->ipv4.addr.src = new_addr;
+		}
 	}
+	if (mask->ipv4_dst) {
+		new_addr = MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst);
 
-	if (ipv4_key->ipv4_tos != nh->tos) {
-		ipv4_change_dsfield(nh, 0, ipv4_key->ipv4_tos);
-		key->ip.tos = nh->tos;
+		if (unlikely(new_addr != nh->daddr)) {
+			set_ip_addr(skb, nh, &nh->daddr, new_addr);
+			flow_key->ipv4.addr.dst = new_addr;
+		}
 	}
-
-	if (ipv4_key->ipv4_ttl != nh->ttl) {
-		set_ip_ttl(skb, nh, ipv4_key->ipv4_ttl);
-		key->ip.ttl = ipv4_key->ipv4_ttl;
+	if (mask->ipv4_tos) {
+		ipv4_change_dsfield(nh, ~mask->ipv4_tos, key->ipv4_tos);
+		flow_key->ip.tos = nh->tos;
+	}
+	if (mask->ipv4_ttl) {
+		set_ip_ttl(skb, nh, key->ipv4_ttl, mask->ipv4_ttl);
+		flow_key->ip.ttl = nh->ttl;
 	}
 
 	return 0;
 }
 
-static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *key,
-		    const struct ovs_key_ipv6 *ipv6_key)
+static bool is_ipv6_mask_nonzero(const __be32 addr[4])
+{
+	return !!(addr[0] | addr[1] | addr[2] | addr[3]);
+}
+
+static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
+		    const struct ovs_key_ipv6 *key,
+		    const struct ovs_key_ipv6 *mask)
 {
 	struct ipv6hdr *nh;
 	int err;
-	__be32 *saddr;
-	__be32 *daddr;
 
 	err = skb_ensure_writable(skb, skb_network_offset(skb) +
 				  sizeof(struct ipv6hdr));
@@ -384,71 +429,77 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *key,
 		return err;
 
 	nh = ipv6_hdr(skb);
-	saddr = (__be32 *)&nh->saddr;
-	daddr = (__be32 *)&nh->daddr;
-
-	if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src))) {
-		set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr,
-			      ipv6_key->ipv6_src, true);
-		memcpy(&key->ipv6.addr.src, ipv6_key->ipv6_src,
-		       sizeof(ipv6_key->ipv6_src));
-	}
 
-	if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) {
+	/* Setting an IP addresses is typically only a side effect of
+	 * matching on them in the current userspace implementation, so it
+	 * makes sense to check if the value actually changed.
+	 */
+	if (is_ipv6_mask_nonzero(mask->ipv6_src)) {
+		__be32 *saddr = (__be32 *)&nh->saddr;
+		__be32 masked[4];
+
+		mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked);
+
+		if (unlikely(memcmp(saddr, masked, sizeof(masked)))) {
+			set_ipv6_addr(skb, key->ipv6_proto, saddr, masked,
+				      true);
+			memcpy(&flow_key->ipv6.addr.src, masked,
+			       sizeof(flow_key->ipv6.addr.src));
+		}
+	}
+	if (is_ipv6_mask_nonzero(mask->ipv6_dst)) {
 		unsigned int offset = 0;
 		int flags = IP6_FH_F_SKIP_RH;
 		bool recalc_csum = true;
-
-		if (ipv6_ext_hdr(nh->nexthdr))
-			recalc_csum = ipv6_find_hdr(skb, &offset,
-						    NEXTHDR_ROUTING, NULL,
-						    &flags) != NEXTHDR_ROUTING;
-
-		set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr,
-			      ipv6_key->ipv6_dst, recalc_csum);
-		memcpy(&key->ipv6.addr.dst, ipv6_key->ipv6_dst,
-		       sizeof(ipv6_key->ipv6_dst));
+		__be32 *daddr = (__be32 *)&nh->daddr;
+		__be32 masked[4];
+
+		mask_ipv6_addr(daddr, key->ipv6_dst, mask->ipv6_dst, masked);
+
+		if (unlikely(memcmp(daddr, masked, sizeof(masked)))) {
+			if (ipv6_ext_hdr(nh->nexthdr))
+				recalc_csum = (ipv6_find_hdr(skb, &offset,
+							     NEXTHDR_ROUTING,
+							     NULL, &flags)
+					       != NEXTHDR_ROUTING);
+
+			set_ipv6_addr(skb, key->ipv6_proto, daddr, masked,
+				      recalc_csum);
+			memcpy(&flow_key->ipv6.addr.dst, masked,
+			       sizeof(flow_key->ipv6.addr.dst));
+		}
+	}
+	if (mask->ipv6_tclass) {
+		ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass);
+		flow_key->ip.tos = ipv6_get_dsfield(nh);
+	}
+	if (mask->ipv6_label) {
+		set_ipv6_fl(nh, ntohl(key->ipv6_label),
+			    ntohl(mask->ipv6_label));
+		flow_key->ipv6.label =
+		    *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
+	}
+	if (mask->ipv6_hlimit) {
+		SET_MASKED(nh->hop_limit, key->ipv6_hlimit, mask->ipv6_hlimit);
+		flow_key->ip.ttl = nh->hop_limit;
 	}
-
-	set_ipv6_tc(nh, ipv6_key->ipv6_tclass);
-	key->ip.tos = ipv6_get_dsfield(nh);
-
-	set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label));
-	key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
-
-	nh->hop_limit = ipv6_key->ipv6_hlimit;
-	key->ip.ttl = ipv6_key->ipv6_hlimit;
 	return 0;
 }
 
 /* Must follow skb_ensure_writable() since that can move the skb data. */
 static void set_tp_port(struct sk_buff *skb, __be16 *port,
-			 __be16 new_port, __sum16 *check)
+			__be16 new_port, __sum16 *check)
 {
 	inet_proto_csum_replace2(check, skb, *port, new_port, 0);
 	*port = new_port;
-	skb_clear_hash(skb);
-}
-
-static void set_udp_port(struct sk_buff *skb, __be16 *port, __be16 new_port)
-{
-	struct udphdr *uh = udp_hdr(skb);
-
-	if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
-		set_tp_port(skb, port, new_port, &uh->check);
-
-		if (!uh->check)
-			uh->check = CSUM_MANGLED_0;
-	} else {
-		*port = new_port;
-		skb_clear_hash(skb);
-	}
 }
 
-static int set_udp(struct sk_buff *skb, struct sw_flow_key *key,
-		   const struct ovs_key_udp *udp_port_key)
+static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key,
+		   const struct ovs_key_udp *key,
+		   const struct ovs_key_udp *mask)
 {
 	struct udphdr *uh;
+	__be16 src, dst;
 	int err;
 
 	err = skb_ensure_writable(skb, skb_transport_offset(skb) +
@@ -457,23 +508,40 @@ static int set_udp(struct sk_buff *skb, struct sw_flow_key *key,
 		return err;
 
 	uh = udp_hdr(skb);
-	if (udp_port_key->udp_src != uh->source) {
-		set_udp_port(skb, &uh->source, udp_port_key->udp_src);
-		key->tp.src = udp_port_key->udp_src;
-	}
+	/* Either of the masks is non-zero, so do not bother checking them. */
+	src = MASKED(uh->source, key->udp_src, mask->udp_src);
+	dst = MASKED(uh->dest, key->udp_dst, mask->udp_dst);
 
-	if (udp_port_key->udp_dst != uh->dest) {
-		set_udp_port(skb, &uh->dest, udp_port_key->udp_dst);
-		key->tp.dst = udp_port_key->udp_dst;
+	if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
+		if (likely(src != uh->source)) {
+			set_tp_port(skb, &uh->source, src, &uh->check);
+			flow_key->tp.src = src;
+		}
+		if (likely(dst != uh->dest)) {
+			set_tp_port(skb, &uh->dest, dst, &uh->check);
+			flow_key->tp.dst = dst;
+		}
+
+		if (unlikely(!uh->check))
+			uh->check = CSUM_MANGLED_0;
+	} else {
+		uh->source = src;
+		uh->dest = dst;
+		flow_key->tp.src = src;
+		flow_key->tp.dst = dst;
 	}
 
+	skb_clear_hash(skb);
+
 	return 0;
 }
 
-static int set_tcp(struct sk_buff *skb, struct sw_flow_key *key,
-		   const struct ovs_key_tcp *tcp_port_key)
+static int set_tcp(struct sk_buff *skb, struct sw_flow_key *flow_key,
+		   const struct ovs_key_tcp *key,
+		   const struct ovs_key_tcp *mask)
 {
 	struct tcphdr *th;
+	__be16 src, dst;
 	int err;
 
 	err = skb_ensure_writable(skb, skb_transport_offset(skb) +
@@ -482,50 +550,49 @@ static int set_tcp(struct sk_buff *skb, struct sw_flow_key *key,
 		return err;
 
 	th = tcp_hdr(skb);
-	if (tcp_port_key->tcp_src != th->source) {
-		set_tp_port(skb, &th->source, tcp_port_key->tcp_src, &th->check);
-		key->tp.src = tcp_port_key->tcp_src;
+	src = MASKED(th->source, key->tcp_src, mask->tcp_src);
+	if (likely(src != th->source)) {
+		set_tp_port(skb, &th->source, src, &th->check);
+		flow_key->tp.src = src;
 	}
-
-	if (tcp_port_key->tcp_dst != th->dest) {
-		set_tp_port(skb, &th->dest, tcp_port_key->tcp_dst, &th->check);
-		key->tp.dst = tcp_port_key->tcp_dst;
+	dst = MASKED(th->dest, key->tcp_dst, mask->tcp_dst);
+	if (likely(dst != th->dest)) {
+		set_tp_port(skb, &th->dest, dst, &th->check);
+		flow_key->tp.dst = dst;
 	}
+	skb_clear_hash(skb);
 
 	return 0;
 }
 
-static int set_sctp(struct sk_buff *skb, struct sw_flow_key *key,
-		    const struct ovs_key_sctp *sctp_port_key)
+static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
+		    const struct ovs_key_sctp *key,
+		    const struct ovs_key_sctp *mask)
 {
+	unsigned int sctphoff = skb_transport_offset(skb);
 	struct sctphdr *sh;
+	__le32 old_correct_csum, new_csum, old_csum;
 	int err;
-	unsigned int sctphoff = skb_transport_offset(skb);
 
 	err = skb_ensure_writable(skb, sctphoff + sizeof(struct sctphdr));
 	if (unlikely(err))
 		return err;
 
 	sh = sctp_hdr(skb);
-	if (sctp_port_key->sctp_src != sh->source ||
-	    sctp_port_key->sctp_dst != sh->dest) {
-		__le32 old_correct_csum, new_csum, old_csum;
+	old_csum = sh->checksum;
+	old_correct_csum = sctp_compute_cksum(skb, sctphoff);
 
-		old_csum = sh->checksum;
-		old_correct_csum = sctp_compute_cksum(skb, sctphoff);
+	sh->source = MASKED(sh->source, key->sctp_src, mask->sctp_src);
+	sh->dest = MASKED(sh->dest, key->sctp_dst, mask->sctp_dst);
 
-		sh->source = sctp_port_key->sctp_src;
-		sh->dest = sctp_port_key->sctp_dst;
+	new_csum = sctp_compute_cksum(skb, sctphoff);
 
-		new_csum = sctp_compute_cksum(skb, sctphoff);
+	/* Carry any checksum errors through. */
+	sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
 
-		/* Carry any checksum errors through. */
-		sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
-
-		skb_clear_hash(skb);
-		key->tp.src = sctp_port_key->sctp_src;
-		key->tp.dst = sctp_port_key->sctp_dst;
-	}
+	skb_clear_hash(skb);
+	flow_key->tp.src = sh->source;
+	flow_key->tp.dst = sh->dest;
 
 	return 0;
 }
@@ -653,52 +720,77 @@ static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
 	key->ovs_flow_hash = hash;
 }
 
-static int execute_set_action(struct sk_buff *skb, struct sw_flow_key *key,
-			      const struct nlattr *nested_attr)
+static int execute_set_action(struct sk_buff *skb,
+			      struct sw_flow_key *flow_key,
+			      const struct nlattr *a)
+{
+	/* Only tunnel set execution is supported without a mask. */
+	if (nla_type(a) == OVS_KEY_ATTR_TUNNEL_INFO) {
+		OVS_CB(skb)->egress_tun_info = nla_data(a);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+/* Mask is at the midpoint of the data. */
+#define get_mask(a, type) ((const type)nla_data(a) + 1)
+
+static int execute_masked_set_action(struct sk_buff *skb,
+				     struct sw_flow_key *flow_key,
+				     const struct nlattr *a)
 {
 	int err = 0;
 
-	switch (nla_type(nested_attr)) {
+	switch (nla_type(a)) {
 	case OVS_KEY_ATTR_PRIORITY:
-		skb->priority = nla_get_u32(nested_attr);
-		key->phy.priority = skb->priority;
+		SET_MASKED(skb->priority, nla_get_u32(a), *get_mask(a, u32 *));
+		flow_key->phy.priority = skb->priority;
 		break;
 
 	case OVS_KEY_ATTR_SKB_MARK:
-		skb->mark = nla_get_u32(nested_attr);
-		key->phy.skb_mark = skb->mark;
+		SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32 *));
+		flow_key->phy.skb_mark = skb->mark;
 		break;
 
 	case OVS_KEY_ATTR_TUNNEL_INFO:
-		OVS_CB(skb)->egress_tun_info = nla_data(nested_attr);
+		/* Masked data not supported for tunnel. */
+		err = -EINVAL;
 		break;
 
 	case OVS_KEY_ATTR_ETHERNET:
-		err = set_eth_addr(skb, key, nla_data(nested_attr));
+		err = set_eth_addr(skb, flow_key, nla_data(a),
+				   get_mask(a, struct ovs_key_ethernet *));
 		break;
 
 	case OVS_KEY_ATTR_IPV4:
-		err = set_ipv4(skb, key, nla_data(nested_attr));
+		err = set_ipv4(skb, flow_key, nla_data(a),
+			       get_mask(a, struct ovs_key_ipv4 *));
 		break;
 
 	case OVS_KEY_ATTR_IPV6:
-		err = set_ipv6(skb, key, nla_data(nested_attr));
+		err = set_ipv6(skb, flow_key, nla_data(a),
+			       get_mask(a, struct ovs_key_ipv6 *));
 		break;
 
 	case OVS_KEY_ATTR_TCP:
-		err = set_tcp(skb, key, nla_data(nested_attr));
+		err = set_tcp(skb, flow_key, nla_data(a),
+			      get_mask(a, struct ovs_key_tcp *));
 		break;
 
 	case OVS_KEY_ATTR_UDP:
-		err = set_udp(skb, key, nla_data(nested_attr));
+		err = set_udp(skb, flow_key, nla_data(a),
+			      get_mask(a, struct ovs_key_udp *));
 		break;
 
 	case OVS_KEY_ATTR_SCTP:
-		err = set_sctp(skb, key, nla_data(nested_attr));
+		err = set_sctp(skb, flow_key, nla_data(a),
+			       get_mask(a, struct ovs_key_sctp *));
 		break;
 
 	case OVS_KEY_ATTR_MPLS:
-		err = set_mpls(skb, key, nla_data(nested_attr));
+		err = set_mpls(skb, flow_key, nla_data(a), get_mask(a,
+								    __be32 *));
 		break;
 	}
 
@@ -818,6 +910,11 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 			err = execute_set_action(skb, key, nla_data(a));
 			break;
 
+		case OVS_ACTION_ATTR_SET_MASKED:
+		case OVS_ACTION_ATTR_SET_TO_MASKED:
+			err = execute_masked_set_action(skb, key, nla_data(a));
+			break;
+
 		case OVS_ACTION_ATTR_SAMPLE:
 			err = sample(dp, skb, key, a);
 			break;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 8b9a612b39d1..993281e6278d 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -1695,16 +1695,6 @@ static int validate_and_copy_sample(const struct nlattr *attr,
 	return 0;
 }
 
-static int validate_tp_port(const struct sw_flow_key *flow_key,
-			    __be16 eth_type)
-{
-	if ((eth_type == htons(ETH_P_IP) || eth_type == htons(ETH_P_IPV6)) &&
-	    (flow_key->tp.src || flow_key->tp.dst))
-		return 0;
-
-	return -EINVAL;
-}
-
 void ovs_match_init(struct sw_flow_match *match,
 		    struct sw_flow_key *key,
 		    struct sw_flow_mask *mask)
@@ -1805,23 +1795,45 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
 	return err;
 }
 
+/* Return false if there are any non-masked bits set.
+ * Mask follows data immediately, before any netlink padding.
+ */
+static bool validate_masked(u8 *data, int len)
+{
+	u8 *mask = data + len;
+
+	while (len--)
+		if (*data++ & ~*mask++)
+			return false;
+
+	return true;
+}
+
 static int validate_set(const struct nlattr *a,
 			const struct sw_flow_key *flow_key,
 			struct sw_flow_actions **sfa,
-			bool *set_tun, __be16 eth_type, bool log)
+			bool *skip_copy, __be16 eth_type, bool masked, bool log)
 {
 	const struct nlattr *ovs_key = nla_data(a);
 	int key_type = nla_type(ovs_key);
+	size_t key_len;
 
 	/* There can be only one key in a action */
 	if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
 		return -EINVAL;
 
+	key_len = nla_len(ovs_key);
+	if (masked)
+		key_len /= 2;
+
 	if (key_type > OVS_KEY_ATTR_MAX ||
-	    (ovs_key_lens[key_type].len != nla_len(ovs_key) &&
+	    (ovs_key_lens[key_type].len != key_len &&
 	     ovs_key_lens[key_type].len != OVS_ATTR_NESTED))
 		return -EINVAL;
 
+	if (masked && !validate_masked(nla_data(ovs_key), key_len))
+		return -EINVAL;
+
 	switch (key_type) {
 	const struct ovs_key_ipv4 *ipv4_key;
 	const struct ovs_key_ipv6 *ipv6_key;
@@ -1836,7 +1848,10 @@ static int validate_set(const struct nlattr *a,
 		if (eth_p_mpls(eth_type))
 			return -EINVAL;
 
-		*set_tun = true;
+		if (masked)
+			return -EINVAL; /* Masked tunnel set not supported. */
+
+		*skip_copy = true;
 		err = validate_and_copy_set_tun(a, sfa, log);
 		if (err)
 			return err;
@@ -1846,48 +1861,66 @@ static int validate_set(const struct nlattr *a,
 		if (eth_type != htons(ETH_P_IP))
 			return -EINVAL;
 
-		if (!flow_key->ip.proto)
-			return -EINVAL;
-
 		ipv4_key = nla_data(ovs_key);
-		if (ipv4_key->ipv4_proto != flow_key->ip.proto)
-			return -EINVAL;
 
-		if (ipv4_key->ipv4_frag != flow_key->ip.frag)
-			return -EINVAL;
+		if (masked) {
+			const struct ovs_key_ipv4 *mask = ipv4_key + 1;
 
+			/* Non-writeable fields. */
+			if (mask->ipv4_proto || mask->ipv4_frag)
+				return -EINVAL;
+		} else {
+			if (ipv4_key->ipv4_proto != flow_key->ip.proto)
+				return -EINVAL;
+
+			if (ipv4_key->ipv4_frag != flow_key->ip.frag)
+				return -EINVAL;
+		}
 		break;
 
 	case OVS_KEY_ATTR_IPV6:
 		if (eth_type != htons(ETH_P_IPV6))
 			return -EINVAL;
 
-		if (!flow_key->ip.proto)
-			return -EINVAL;
-
 		ipv6_key = nla_data(ovs_key);
-		if (ipv6_key->ipv6_proto != flow_key->ip.proto)
-			return -EINVAL;
 
-		if (ipv6_key->ipv6_frag != flow_key->ip.frag)
-			return -EINVAL;
+		if (masked) {
+			const struct ovs_key_ipv6 *mask = ipv6_key + 1;
+
+			/* Non-writeable fields. */
+			if (mask->ipv6_proto || mask->ipv6_frag)
+				return -EINVAL;
+
+			/* Invalid bits in the flow label mask? */
+			if (ntohl(mask->ipv6_label) & 0xFFF00000)
+				return -EINVAL;
+		} else {
+			if (ipv6_key->ipv6_proto != flow_key->ip.proto)
+				return -EINVAL;
 
+			if (ipv6_key->ipv6_frag != flow_key->ip.frag)
+				return -EINVAL;
+		}
 		if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
 			return -EINVAL;
 
 		break;
 
 	case OVS_KEY_ATTR_TCP:
-		if (flow_key->ip.proto != IPPROTO_TCP)
+		if ((eth_type != htons(ETH_P_IP) &&
+		     eth_type != htons(ETH_P_IPV6)) ||
+		    flow_key->ip.proto != IPPROTO_TCP)
 			return -EINVAL;
 
-		return validate_tp_port(flow_key, eth_type);
+		break;
 
 	case OVS_KEY_ATTR_UDP:
-		if (flow_key->ip.proto != IPPROTO_UDP)
+		if ((eth_type != htons(ETH_P_IP) &&
+		     eth_type != htons(ETH_P_IPV6)) ||
+		    flow_key->ip.proto != IPPROTO_UDP)
 			return -EINVAL;
 
-		return validate_tp_port(flow_key, eth_type);
+		break;
 
 	case OVS_KEY_ATTR_MPLS:
 		if (!eth_p_mpls(eth_type))
@@ -1895,15 +1928,45 @@ static int validate_set(const struct nlattr *a,
 		break;
 
 	case OVS_KEY_ATTR_SCTP:
-		if (flow_key->ip.proto != IPPROTO_SCTP)
+		if ((eth_type != htons(ETH_P_IP) &&
+		     eth_type != htons(ETH_P_IPV6)) ||
+		    flow_key->ip.proto != IPPROTO_SCTP)
 			return -EINVAL;
 
-		return validate_tp_port(flow_key, eth_type);
+		break;
 
 	default:
 		return -EINVAL;
 	}
 
+	/* Convert non-masked non-tunnel set actions to masked set actions. */
+	if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) {
+		int start, len = key_len * 2;
+		struct nlattr *at;
+
+		*skip_copy = true;
+
+		start = add_nested_action_start(sfa,
+						OVS_ACTION_ATTR_SET_TO_MASKED,
+						log);
+		if (start < 0)
+			return start;
+
+		at = __add_action(sfa, key_type, NULL, len, log);
+		if (IS_ERR(at))
+			return PTR_ERR(at);
+
+		memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */
+		memset(nla_data(at) + key_len, 0xff, key_len);    /* Mask. */
+		/* Clear non-writeable bits from otherwise writeable fields. */
+		if (key_type == OVS_KEY_ATTR_IPV6) {
+			struct ovs_key_ipv6 *mask = nla_data(at) + key_len;
+
+			mask->ipv6_label &= htonl(0x000FFFFF);
+		}
+		add_nested_action_end(*sfa, start);
+	}
+
 	return 0;
 }
 
@@ -1965,6 +2028,7 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
 			[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
 			[OVS_ACTION_ATTR_POP_VLAN] = 0,
 			[OVS_ACTION_ATTR_SET] = (u32)-1,
+			[OVS_ACTION_ATTR_SET_MASKED] = (u32)-1,
 			[OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
 			[OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash)
 		};
@@ -2060,7 +2124,14 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
 
 		case OVS_ACTION_ATTR_SET:
 			err = validate_set(a, key, sfa,
-					   &skip_copy, eth_type, log);
+					   &skip_copy, eth_type, false, log);
+			if (err)
+				return err;
+			break;
+
+		case OVS_ACTION_ATTR_SET_MASKED:
+			err = validate_set(a, key, sfa,
+					   &skip_copy, eth_type, true, log);
 			if (err)
 				return err;
 			break;
@@ -2090,6 +2161,7 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
 	return 0;
 }
 
+/* 'key' must be the masked key. */
 int ovs_nla_copy_actions(const struct nlattr *attr,
 			 const struct sw_flow_key *key,
 			 struct sw_flow_actions **sfa, bool log)
@@ -2177,6 +2249,21 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
 	return 0;
 }
 
+static int masked_set_action_to_set_action_attr(const struct nlattr *a,
+						struct sk_buff *skb)
+{
+	const struct nlattr *ovs_key = nla_data(a);
+	size_t key_len = nla_len(ovs_key) / 2;
+
+	/* Revert the conversion we did from a non-masked set action to
+	 * masked set action.
+	 */
+	if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a) - key_len, ovs_key))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
 int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
 {
 	const struct nlattr *a;
@@ -2192,6 +2279,12 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
 				return err;
 			break;
 
+		case OVS_ACTION_ATTR_SET_TO_MASKED:
+			err = masked_set_action_to_set_action_attr(a, skb);
+			if (err)
+				return err;
+			break;
+
 		case OVS_ACTION_ATTR_SAMPLE:
 			err = sample_action_to_attr(a, skb);
 			if (err)
-- 
cgit v1.2.3


From 11ac11999bae3c353f86b6e7dd0e43d4a0eada12 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 5 Feb 2015 23:17:20 +0100
Subject: net: rds: Remove repeated function names from debug output

The macro rdsdebug is defined as

  pr_debug("%s(): " fmt, __func__ , ##args)

Hence it doesn't make sense to include the name of the calling
function explicitly in the format string passed to rdsdebug.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/ib_send.c | 4 ++--
 net/rds/iw_cm.c   | 4 ++--
 net/rds/iw_send.c | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 1dde91e3dc70..bd3825d38abc 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -409,7 +409,7 @@ try_again:
 	posted = IB_GET_POST_CREDITS(oldval);
 	avail = IB_GET_SEND_CREDITS(oldval);
 
-	rdsdebug("rds_ib_send_grab_credits(%u): credits=%u posted=%u\n",
+	rdsdebug("wanted=%u credits=%u posted=%u\n",
 			wanted, avail, posted);
 
 	/* The last credit must be used to send a credit update. */
@@ -453,7 +453,7 @@ void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits)
 	if (credits == 0)
 		return;
 
-	rdsdebug("rds_ib_send_add_credits(%u): current=%u%s\n",
+	rdsdebug("credits=%u current=%u%s\n",
 			credits,
 			IB_GET_SEND_CREDITS(atomic_read(&ic->i_credits)),
 			test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ? ", ll_send_full" : "");
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index a91e1db62ee6..a6c2bea9f8f9 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -590,8 +590,8 @@ void rds_iw_conn_shutdown(struct rds_connection *conn)
 			/* Actually this may happen quite frequently, when
 			 * an outgoing connect raced with an incoming connect.
 			 */
-			rdsdebug("rds_iw_conn_shutdown: failed to disconnect,"
-				   " cm: %p err %d\n", ic->i_cm_id, err);
+			rdsdebug("failed to disconnect, cm: %p err %d\n",
+				 ic->i_cm_id, err);
 		}
 
 		if (ic->i_cm_id->qp) {
diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c
index 9105ea03aec5..13834780a308 100644
--- a/net/rds/iw_send.c
+++ b/net/rds/iw_send.c
@@ -361,7 +361,7 @@ try_again:
 	posted = IB_GET_POST_CREDITS(oldval);
 	avail = IB_GET_SEND_CREDITS(oldval);
 
-	rdsdebug("rds_iw_send_grab_credits(%u): credits=%u posted=%u\n",
+	rdsdebug("wanted=%u credits=%u posted=%u\n",
 			wanted, avail, posted);
 
 	/* The last credit must be used to send a credit update. */
@@ -405,7 +405,7 @@ void rds_iw_send_add_credits(struct rds_connection *conn, unsigned int credits)
 	if (credits == 0)
 		return;
 
-	rdsdebug("rds_iw_send_add_credits(%u): current=%u%s\n",
+	rdsdebug("credits=%u current=%u%s\n",
 			credits,
 			IB_GET_SEND_CREDITS(atomic_read(&ic->i_credits)),
 			test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ? ", ll_send_full" : "");
-- 
cgit v1.2.3


From d0a47d32724bf0765b8768086ef1a7a6d074a7a0 Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Thu, 5 Feb 2015 17:41:43 -0500
Subject: rds: Make rds_message_copy_from_user() return 0 on success.

Commit 083735f4b01b ("rds: switch rds_message_copy_from_user() to iov_iter")
breaks rds_message_copy_from_user() semantics on success, and causes it
to return nbytes copied, when it should return 0.  This commit fixes that bug.

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/message.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/rds/message.c b/net/rds/message.c
index 5a21e6f5986f..756c73729126 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -266,7 +266,7 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
 
 int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from)
 {
-	unsigned long to_copy;
+	unsigned long to_copy, nbytes;
 	unsigned long sg_off;
 	struct scatterlist *sg;
 	int ret = 0;
@@ -293,9 +293,9 @@ int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from)
 				sg->length - sg_off);
 
 		rds_stats_add(s_copy_from_user, to_copy);
-		ret = copy_page_from_iter(sg_page(sg), sg->offset + sg_off,
-					  to_copy, from);
-		if (ret != to_copy)
+		nbytes = copy_page_from_iter(sg_page(sg), sg->offset + sg_off,
+					     to_copy, from);
+		if (nbytes != to_copy)
 			return -EFAULT;
 
 		sg_off += to_copy;
-- 
cgit v1.2.3


From 91e83133e70ebe1572746d1ad858b4eb28ab9b53 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 5 Feb 2015 14:58:14 -0800
Subject: net: use netif_rx_ni() from process context

Hotpluging a cpu might be rare, yet we have to use proper
handlers when taking over packets found in backlog queues.

dev_cpu_callback() runs from process context, thus we should
call netif_rx_ni() to properly invoke softirq handler.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 7fe82929f509..6c1556aeec29 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -7064,11 +7064,11 @@ static int dev_cpu_callback(struct notifier_block *nfb,
 
 	/* Process offline CPU's input_pkt_queue */
 	while ((skb = __skb_dequeue(&oldsd->process_queue))) {
-		netif_rx_internal(skb);
+		netif_rx_ni(skb);
 		input_queue_head_incr(oldsd);
 	}
 	while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
-		netif_rx_internal(skb);
+		netif_rx_ni(skb);
 		input_queue_head_incr(oldsd);
 	}
 
-- 
cgit v1.2.3


From 1fd0bddb618aa970b87e7907a5703682aa119a6f Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Thu, 5 Feb 2015 22:24:45 -0800
Subject: bridge: add missing bridge port check for offloads

This patch fixes a missing bridge port check caught by smatch.

setlink/dellink of attributes like vlans can come for a bridge device
and there is no need to offload those today. So, this patch adds a bridge
port check. (In these cases however, the BRIDGE_SELF flags will always be set
and we may not hit a problem with the current code).

smatch complaint:

The patch 68e331c785b8: "bridge: offload bridge port attributes to
switch asic if feature flag set" from Jan 29, 2015, leads to the
following Smatch complaint:

net/bridge/br_netlink.c:552 br_setlink()
	 error: we previously assumed 'p' could be null (see line 518)

net/bridge/br_netlink.c
   517
   518		if (p && protinfo) {
                    ^
Check for NULL.

Reported-By: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netlink.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index b93f42c515da..4fbcea0e7ecb 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -543,7 +543,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
 				afspec, RTM_SETLINK);
 	}
 
-	if (!(flags & BRIDGE_FLAGS_SELF)) {
+	if (p && !(flags & BRIDGE_FLAGS_SELF)) {
 		/* set bridge attributes in hardware if supported
 		 */
 		ret_offload = netdev_switch_port_bridge_setlink(dev, nlh,
@@ -583,7 +583,7 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
 		 */
 		br_ifinfo_notify(RTM_NEWLINK, p);
 
-	if (!(flags & BRIDGE_FLAGS_SELF)) {
+	if (p && !(flags & BRIDGE_FLAGS_SELF)) {
 		/* del bridge attributes in hardware
 		 */
 		ret_offload = netdev_switch_port_bridge_dellink(dev, nlh,
-- 
cgit v1.2.3


From ca539345f8767cca221b5aa77bf4329c725d0d7e Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Fri, 6 Feb 2015 11:17:13 -0800
Subject: openvswitch: Initialize unmasked key and uid len

Flow alloc needs to initialize unmasked key pointer. Otherwise
it can crash kernel trying to free random unmasked-key pointer.

general protection fault: 0000 [#1] SMP
3.19.0-rc6-net-next+ #457
Hardware name: Supermicro X7DWU/X7DWU, BIOS  1.1 04/30/2008
RIP: 0010:[<ffffffff8111df0e>] [<ffffffff8111df0e>] kfree+0xac/0x196
Call Trace:
 [<ffffffffa060bd87>] flow_free+0x21/0x59 [openvswitch]
 [<ffffffffa060bde0>] ovs_flow_free+0x21/0x23 [openvswitch]
 [<ffffffffa0605b4a>] ovs_packet_cmd_execute+0x2f3/0x35f [openvswitch]
 [<ffffffffa0605995>] ? ovs_packet_cmd_execute+0x13e/0x35f [openvswitch]
 [<ffffffff811fe6fb>] ? nla_parse+0x4f/0xec
 [<ffffffff8139a2fc>] genl_family_rcv_msg+0x26d/0x2c9
 [<ffffffff8107620f>] ? __lock_acquire+0x90e/0x9aa
 [<ffffffff8139a3be>] genl_rcv_msg+0x66/0x89
 [<ffffffff8139a358>] ? genl_family_rcv_msg+0x2c9/0x2c9
 [<ffffffff81399591>] netlink_rcv_skb+0x3e/0x95
 [<ffffffff81399898>] ? genl_rcv+0x18/0x37
 [<ffffffff813998a7>] genl_rcv+0x27/0x37
 [<ffffffff81399033>] netlink_unicast+0x103/0x191
 [<ffffffff81399382>] netlink_sendmsg+0x2c1/0x310
 [<ffffffff811007ad>] ? might_fault+0x50/0xa0
 [<ffffffff8135c773>] do_sock_sendmsg+0x5f/0x7a
 [<ffffffff8135c799>] sock_sendmsg+0xb/0xd
 [<ffffffff8135cacf>] ___sys_sendmsg+0x1a3/0x218
 [<ffffffff8113e54b>] ? get_close_on_exec+0x86/0x86
 [<ffffffff8115a9d0>] ? fsnotify+0x32c/0x348
 [<ffffffff8115a720>] ? fsnotify+0x7c/0x348
 [<ffffffff8113e5f5>] ? __fget+0xaa/0xbf
 [<ffffffff8113e54b>] ? get_close_on_exec+0x86/0x86
 [<ffffffff8135cccd>] __sys_sendmsg+0x3d/0x5e
 [<ffffffff8135cd02>] SyS_sendmsg+0x14/0x16
 [<ffffffff81411852>] system_call_fastpath+0x12/0x17

Fixes: 74ed7ab9264("openvswitch: Add support for unique flow IDs.")
CC: Joe Stringer <joestringer@nicira.com>
Reported-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/flow_table.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 5e57628e6584..4613df8c8290 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -85,6 +85,8 @@ struct sw_flow *ovs_flow_alloc(void)
 
 	flow->sf_acts = NULL;
 	flow->mask = NULL;
+	flow->id.unmasked_key = NULL;
+	flow->id.ufid_len = 0;
 	flow->stats_last_writer = NUMA_NO_NODE;
 
 	/* Initialize the default stat node. */
-- 
cgit v1.2.3


From 032ee4236954eb214651cb9bfc1b38ffa8fd7a01 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Fri, 6 Feb 2015 16:04:38 -0500
Subject: tcp: helpers to mitigate ACK loops by rate-limiting out-of-window
 dupacks

Helpers for mitigating ACK loops by rate-limiting dupacks sent in
response to incoming out-of-window packets.

This patch includes:

- rate-limiting logic
- sysctl to control how often we allow dupacks to out-of-window packets
- SNMP counter for cases where we rate-limited our dupack sending

The rate-limiting logic in this patch decides to not send dupacks in
response to out-of-window segments if (a) they are SYNs or pure ACKs
and (b) the remote endpoint is sending them faster than the configured
rate limit.

We rate-limit our responses rather than blocking them entirely or
resetting the connection, because legitimate connections can rely on
dupacks in response to some out-of-window segments. For example, zero
window probes are typically sent with a sequence number that is below
the current window, and ZWPs thus expect to thus elicit a dupack in
response.

We allow dupacks in response to TCP segments with data, because these
may be spurious retransmissions for which the remote endpoint wants to
receive DSACKs. This is safe because segments with data can't
realistically be part of ACK loops, which by their nature consist of
each side sending pure/data-less ACKs to each other.

The dupack interval is controlled by a new sysctl knob,
tcp_invalid_ratelimit, given in milliseconds, in case an administrator
needs to dial this upward in the face of a high-rate DoS attack. The
name and units are chosen to be analogous to the existing analogous
knob for ICMP, icmp_ratelimit.

The default value for tcp_invalid_ratelimit is 500ms, which allows at
most one such dupack per 500ms. This is chosen to be 2x faster than
the 1-second minimum RTO interval allowed by RFC 6298 (section 2, rule
2.4). We allow the extra 2x factor because network delay variations
can cause packets sent at 1 second intervals to be compressed and
arrive much closer.

Reported-by: Avery Fay <avery@mixpanel.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 22 ++++++++++++++++++++++
 include/net/tcp.h                      | 32 ++++++++++++++++++++++++++++++++
 include/uapi/linux/snmp.h              |  6 ++++++
 net/ipv4/proc.c                        |  6 ++++++
 net/ipv4/sysctl_net_ipv4.c             |  7 +++++++
 net/ipv4/tcp_input.c                   |  1 +
 6 files changed, 74 insertions(+)

(limited to 'net')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index a5e4c813f17f..1b8c964b0d17 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -290,6 +290,28 @@ tcp_frto - INTEGER
 
 	By default it's enabled with a non-zero value. 0 disables F-RTO.
 
+tcp_invalid_ratelimit - INTEGER
+	Limit the maximal rate for sending duplicate acknowledgments
+	in response to incoming TCP packets that are for an existing
+	connection but that are invalid due to any of these reasons:
+
+	  (a) out-of-window sequence number,
+	  (b) out-of-window acknowledgment number, or
+	  (c) PAWS (Protection Against Wrapped Sequence numbers) check failure
+
+	This can help mitigate simple "ack loop" DoS attacks, wherein
+	a buggy or malicious middlebox or man-in-the-middle can
+	rewrite TCP header fields in manner that causes each endpoint
+	to think that the other is sending invalid TCP segments, thus
+	causing each side to send an unterminating stream of duplicate
+	acknowledgments for invalid segments.
+
+	Using 0 disables rate-limiting of dupacks in response to
+	invalid segments; otherwise this value specifies the minimal
+	space between sending such dupacks, in milliseconds.
+
+	Default: 500 (milliseconds).
+
 tcp_keepalive_time - INTEGER
 	How often TCP sends out keepalive messages when keepalive is enabled.
 	Default: 2hours.
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 28e9bd3abceb..b81f45c67b2e 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -274,6 +274,7 @@ extern int sysctl_tcp_challenge_ack_limit;
 extern unsigned int sysctl_tcp_notsent_lowat;
 extern int sysctl_tcp_min_tso_segs;
 extern int sysctl_tcp_autocorking;
+extern int sysctl_tcp_invalid_ratelimit;
 
 extern atomic_long_t tcp_memory_allocated;
 extern struct percpu_counter tcp_sockets_allocated;
@@ -1236,6 +1237,37 @@ static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt,
 	return true;
 }
 
+/* Return true if we're currently rate-limiting out-of-window ACKs and
+ * thus shouldn't send a dupack right now. We rate-limit dupacks in
+ * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS
+ * attacks that send repeated SYNs or ACKs for the same connection. To
+ * do this, we do not send a duplicate SYNACK or ACK if the remote
+ * endpoint is sending out-of-window SYNs or pure ACKs at a high rate.
+ */
+static inline bool tcp_oow_rate_limited(struct net *net,
+					const struct sk_buff *skb,
+					int mib_idx, u32 *last_oow_ack_time)
+{
+	/* Data packets without SYNs are not likely part of an ACK loop. */
+	if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
+	    !tcp_hdr(skb)->syn)
+		goto not_rate_limited;
+
+	if (*last_oow_ack_time) {
+		s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
+
+		if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
+			NET_INC_STATS_BH(net, mib_idx);
+			return true;	/* rate-limited: don't send yet! */
+		}
+	}
+
+	*last_oow_ack_time = tcp_time_stamp;
+
+not_rate_limited:
+	return false;	/* not rate-limited: go ahead, send dupack now! */
+}
+
 static inline void tcp_mib_init(struct net *net)
 {
 	/* See RFC 2012 */
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index b22224100011..6a6fb747c78d 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -270,6 +270,12 @@ enum
 	LINUX_MIB_TCPHYSTARTTRAINCWND,		/* TCPHystartTrainCwnd */
 	LINUX_MIB_TCPHYSTARTDELAYDETECT,	/* TCPHystartDelayDetect */
 	LINUX_MIB_TCPHYSTARTDELAYCWND,		/* TCPHystartDelayCwnd */
+	LINUX_MIB_TCPACKSKIPPEDSYNRECV,		/* TCPACKSkippedSynRecv */
+	LINUX_MIB_TCPACKSKIPPEDPAWS,		/* TCPACKSkippedPAWS */
+	LINUX_MIB_TCPACKSKIPPEDSEQ,		/* TCPACKSkippedSeq */
+	LINUX_MIB_TCPACKSKIPPEDFINWAIT2,	/* TCPACKSkippedFinWait2 */
+	LINUX_MIB_TCPACKSKIPPEDTIMEWAIT,	/* TCPACKSkippedTimeWait */
+	LINUX_MIB_TCPACKSKIPPEDCHALLENGE,	/* TCPACKSkippedChallenge */
 	__LINUX_MIB_MAX
 };
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 8f9cd200ce20..d8953ef0770c 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -292,6 +292,12 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPHystartTrainCwnd", LINUX_MIB_TCPHYSTARTTRAINCWND),
 	SNMP_MIB_ITEM("TCPHystartDelayDetect", LINUX_MIB_TCPHYSTARTDELAYDETECT),
 	SNMP_MIB_ITEM("TCPHystartDelayCwnd", LINUX_MIB_TCPHYSTARTDELAYCWND),
+	SNMP_MIB_ITEM("TCPACKSkippedSynRecv", LINUX_MIB_TCPACKSKIPPEDSYNRECV),
+	SNMP_MIB_ITEM("TCPACKSkippedPAWS", LINUX_MIB_TCPACKSKIPPEDPAWS),
+	SNMP_MIB_ITEM("TCPACKSkippedSeq", LINUX_MIB_TCPACKSKIPPEDSEQ),
+	SNMP_MIB_ITEM("TCPACKSkippedFinWait2", LINUX_MIB_TCPACKSKIPPEDFINWAIT2),
+	SNMP_MIB_ITEM("TCPACKSkippedTimeWait", LINUX_MIB_TCPACKSKIPPEDTIMEWAIT),
+	SNMP_MIB_ITEM("TCPACKSkippedChallenge", LINUX_MIB_TCPACKSKIPPEDCHALLENGE),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index e0ee384a448f..82601a68cf90 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -728,6 +728,13 @@ static struct ctl_table ipv4_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
+	{
+		.procname	= "tcp_invalid_ratelimit",
+		.data		= &sysctl_tcp_invalid_ratelimit,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_ms_jiffies,
+	},
 	{
 		.procname	= "icmp_msgs_per_sec",
 		.data		= &sysctl_icmp_msgs_per_sec,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d3dfff78fa19..9401aa43b814 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -100,6 +100,7 @@ int sysctl_tcp_thin_dupack __read_mostly;
 
 int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
 int sysctl_tcp_early_retrans __read_mostly = 3;
+int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
 
 #define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
 #define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/
-- 
cgit v1.2.3


From a9b2c06dbef48ed31cff1764c5ce824829106f4f Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Fri, 6 Feb 2015 16:04:39 -0500
Subject: tcp: mitigate ACK loops for connections as tcp_request_sock

In the SYN_RECV state, where the TCP connection is represented by
tcp_request_sock, we now rate-limit SYNACKs in response to a client's
retransmitted SYNs: we do not send a SYNACK in response to client SYN
if it has been less than sysctl_tcp_invalid_ratelimit (default 500ms)
since we last sent a SYNACK in response to a client's retransmitted
SYN.

This allows the vast majority of legitimate client connections to
proceed unimpeded, even for the most aggressive platforms, iOS and
MacOS, which actually retransmit SYNs 1-second intervals for several
times in a row. They use SYN RTO timeouts following the progression:
1,1,1,1,1,2,4,8,16,32.

Reported-by: Avery Fay <avery@mixpanel.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      | 1 +
 include/net/tcp.h        | 1 +
 net/ipv4/tcp_minisocks.c | 6 +++++-
 3 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 67309ece0772..bcc828d3b9b9 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -115,6 +115,7 @@ struct tcp_request_sock {
 	u32				rcv_isn;
 	u32				snt_isn;
 	u32				snt_synack; /* synack sent time */
+	u32				last_oow_ack_time; /* last SYNACK */
 	u32				rcv_nxt; /* the ack # by SYNACK. For
 						  * FastOpen it's the seq#
 						  * after data-in-SYN.
diff --git a/include/net/tcp.h b/include/net/tcp.h
index b81f45c67b2e..da4196fb78db 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1145,6 +1145,7 @@ static inline void tcp_openreq_init(struct request_sock *req,
 	tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
 	tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
 	tcp_rsk(req)->snt_synack = tcp_time_stamp;
+	tcp_rsk(req)->last_oow_ack_time = 0;
 	req->mss = rx_opt->mss_clamp;
 	req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
 	ireq->tstamp_ok = rx_opt->tstamp_ok;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index bc9216dc9de1..131aa4950d1c 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -605,7 +605,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 		 * Reset timer after retransmitting SYNACK, similar to
 		 * the idea of fast retransmit in recovery.
 		 */
-		if (!inet_rtx_syn_ack(sk, req))
+		if (!tcp_oow_rate_limited(sock_net(sk), skb,
+					  LINUX_MIB_TCPACKSKIPPEDSYNRECV,
+					  &tcp_rsk(req)->last_oow_ack_time) &&
+
+		    !inet_rtx_syn_ack(sk, req))
 			req->expires = min(TCP_TIMEOUT_INIT << req->num_timeout,
 					   TCP_RTO_MAX) + jiffies;
 		return NULL;
-- 
cgit v1.2.3


From f2b2c582e82429270d5818fbabe653f4359d7024 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Fri, 6 Feb 2015 16:04:40 -0500
Subject: tcp: mitigate ACK loops for connections as tcp_sock

Ensure that in state ESTABLISHED, where the connection is represented
by a tcp_sock, we rate limit dupacks in response to incoming packets
(a) with TCP timestamps that fail PAWS checks, or (b) with sequence
numbers or ACK numbers that are out of the acceptable window.

We do not send a dupack in response to out-of-window packets if it has
been less than sysctl_tcp_invalid_ratelimit (default 500ms) since we
last sent a dupack in response to an out-of-window packet.

There is already a similar (although global) rate-limiting mechanism
for "challenge ACKs". When deciding whether to send a challence ACK,
we first consult the new per-connection rate limit, and then the
global rate limit.

Reported-by: Avery Fay <avery@mixpanel.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      |  1 +
 net/ipv4/tcp_input.c     | 29 ++++++++++++++++++++++-------
 net/ipv4/tcp_minisocks.c |  1 +
 3 files changed, 24 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index bcc828d3b9b9..66d85a80a1ec 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -153,6 +153,7 @@ struct tcp_sock {
  	u32	snd_sml;	/* Last byte of the most recently transmitted small packet */
 	u32	rcv_tstamp;	/* timestamp of last received ACK (for keepalives) */
 	u32	lsndtime;	/* timestamp of last sent data packet (for restart window) */
+	u32	last_oow_ack_time;  /* timestamp of last out-of-window ACK */
 
 	u32	tsoffset;	/* timestamp offset */
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9401aa43b814..8fdd27b17306 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3322,13 +3322,22 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
 }
 
 /* RFC 5961 7 [ACK Throttling] */
-static void tcp_send_challenge_ack(struct sock *sk)
+static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
 {
 	/* unprotected vars, we dont care of overwrites */
 	static u32 challenge_timestamp;
 	static unsigned int challenge_count;
-	u32 now = jiffies / HZ;
+	struct tcp_sock *tp = tcp_sk(sk);
+	u32 now;
+
+	/* First check our per-socket dupack rate limit. */
+	if (tcp_oow_rate_limited(sock_net(sk), skb,
+				 LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
+				 &tp->last_oow_ack_time))
+		return;
 
+	/* Then check the check host-wide RFC 5961 rate limit. */
+	now = jiffies / HZ;
 	if (now != challenge_timestamp) {
 		challenge_timestamp = now;
 		challenge_count = 0;
@@ -3424,7 +3433,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	if (before(ack, prior_snd_una)) {
 		/* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */
 		if (before(ack, prior_snd_una - tp->max_window)) {
-			tcp_send_challenge_ack(sk);
+			tcp_send_challenge_ack(sk, skb);
 			return -1;
 		}
 		goto old_ack;
@@ -4993,7 +5002,10 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
 	    tcp_paws_discard(sk, skb)) {
 		if (!th->rst) {
 			NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
-			tcp_send_dupack(sk, skb);
+			if (!tcp_oow_rate_limited(sock_net(sk), skb,
+						  LINUX_MIB_TCPACKSKIPPEDPAWS,
+						  &tp->last_oow_ack_time))
+				tcp_send_dupack(sk, skb);
 			goto discard;
 		}
 		/* Reset is accepted even if it did not pass PAWS. */
@@ -5010,7 +5022,10 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
 		if (!th->rst) {
 			if (th->syn)
 				goto syn_challenge;
-			tcp_send_dupack(sk, skb);
+			if (!tcp_oow_rate_limited(sock_net(sk), skb,
+						  LINUX_MIB_TCPACKSKIPPEDSEQ,
+						  &tp->last_oow_ack_time))
+				tcp_send_dupack(sk, skb);
 		}
 		goto discard;
 	}
@@ -5026,7 +5041,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
 		if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt)
 			tcp_reset(sk);
 		else
-			tcp_send_challenge_ack(sk);
+			tcp_send_challenge_ack(sk, skb);
 		goto discard;
 	}
 
@@ -5040,7 +5055,7 @@ syn_challenge:
 		if (syn_inerr)
 			TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
-		tcp_send_challenge_ack(sk);
+		tcp_send_challenge_ack(sk, skb);
 		goto discard;
 	}
 
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 131aa4950d1c..98a840561ec8 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -467,6 +467,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		tcp_enable_early_retrans(newtp);
 		newtp->tlp_high_seq = 0;
 		newtp->lsndtime = treq->snt_synack;
+		newtp->last_oow_ack_time = 0;
 		newtp->total_retrans = req->num_retrans;
 
 		/* So many TCP implementations out there (incorrectly) count the
-- 
cgit v1.2.3


From 4fb17a6091674f469e8ac85dc770fbf9a9ba7cc8 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Fri, 6 Feb 2015 16:04:41 -0500
Subject: tcp: mitigate ACK loops for connections as tcp_timewait_sock

Ensure that in state FIN_WAIT2 or TIME_WAIT, where the connection is
represented by a tcp_timewait_sock, we rate limit dupacks in response
to incoming packets (a) with TCP timestamps that fail PAWS checks, or
(b) with sequence numbers that are out of the acceptable window.

We do not send a dupack in response to out-of-window packets if it has
been less than sysctl_tcp_invalid_ratelimit (default 500ms) since we
last sent a dupack in response to an out-of-window packet.

Reported-by: Avery Fay <avery@mixpanel.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      |  4 ++++
 net/ipv4/tcp_minisocks.c | 29 ++++++++++++++++++++++++-----
 2 files changed, 28 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 66d85a80a1ec..1a7adb411647 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -342,6 +342,10 @@ struct tcp_timewait_sock {
 	u32			  tw_rcv_wnd;
 	u32			  tw_ts_offset;
 	u32			  tw_ts_recent;
+
+	/* The time we sent the last out-of-window ACK: */
+	u32			  tw_last_oow_ack_time;
+
 	long			  tw_ts_recent_stamp;
 #ifdef CONFIG_TCP_MD5SIG
 	struct tcp_md5sig_key	  *tw_md5_key;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 98a840561ec8..dd11ac7798c6 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -58,6 +58,25 @@ static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
 	return seq == e_win && seq == end_seq;
 }
 
+static enum tcp_tw_status
+tcp_timewait_check_oow_rate_limit(struct inet_timewait_sock *tw,
+				  const struct sk_buff *skb, int mib_idx)
+{
+	struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
+
+	if (!tcp_oow_rate_limited(twsk_net(tw), skb, mib_idx,
+				  &tcptw->tw_last_oow_ack_time)) {
+		/* Send ACK. Note, we do not put the bucket,
+		 * it will be released by caller.
+		 */
+		return TCP_TW_ACK;
+	}
+
+	/* We are rate-limiting, so just release the tw sock and drop skb. */
+	inet_twsk_put(tw);
+	return TCP_TW_SUCCESS;
+}
+
 /*
  * * Main purpose of TIME-WAIT state is to close connection gracefully,
  *   when one of ends sits in LAST-ACK or CLOSING retransmitting FIN
@@ -116,7 +135,8 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 		    !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
 				   tcptw->tw_rcv_nxt,
 				   tcptw->tw_rcv_nxt + tcptw->tw_rcv_wnd))
-			return TCP_TW_ACK;
+			return tcp_timewait_check_oow_rate_limit(
+				tw, skb, LINUX_MIB_TCPACKSKIPPEDFINWAIT2);
 
 		if (th->rst)
 			goto kill;
@@ -250,10 +270,8 @@ kill:
 			inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
 					   TCP_TIMEWAIT_LEN);
 
-		/* Send ACK. Note, we do not put the bucket,
-		 * it will be released by caller.
-		 */
-		return TCP_TW_ACK;
+		return tcp_timewait_check_oow_rate_limit(
+			tw, skb, LINUX_MIB_TCPACKSKIPPEDTIMEWAIT);
 	}
 	inet_twsk_put(tw);
 	return TCP_TW_SUCCESS;
@@ -289,6 +307,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 		tcptw->tw_ts_recent	= tp->rx_opt.ts_recent;
 		tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
 		tcptw->tw_ts_offset	= tp->tsoffset;
+		tcptw->tw_last_oow_ack_time = 0;
 
 #if IS_ENABLED(CONFIG_IPV6)
 		if (tw->tw_family == PF_INET6) {
-- 
cgit v1.2.3


From 662f5533c40a97f02949ac81b1643538353c7e59 Mon Sep 17 00:00:00 2001
From: Michael Büsch <m@bues.ch>
Date: Sun, 8 Feb 2015 10:14:07 +0100
Subject: rt6_probe_deferred: Do not depend on struct ordering

rt6_probe allocates a struct __rt6_probe_work and schedules a work handler rt6_probe_deferred.
But rt6_probe_deferred kfree's the struct work_struct instead of struct __rt6_probe_work.
This works, because struct work_struct is the first element of struct __rt6_probe_work.

Change it to kfree struct __rt6_probe_work to not implicitly depend on
struct work_struct being the first element.

This does not affect the generated code.

Signed-off-by: Michael Buesch <m@bues.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7622951e5fbb..98565ce0ebcd 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -499,7 +499,7 @@ static void rt6_probe_deferred(struct work_struct *w)
 	addrconf_addr_solict_mult(&work->target, &mcaddr);
 	ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
 	dev_put(work->dev);
-	kfree(w);
+	kfree(work);
 }
 
 static void rt6_probe(struct rt6_info *rt)
-- 
cgit v1.2.3


From 51a00daf7369b581e5241c5cae5924886deda261 Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Sun, 8 Feb 2015 11:10:50 -0500
Subject: tipc: fix bug in socket reception function

In commit c637c1035534867b85b78b453c38c495b58e2c5a ("tipc: resolve race
problem at unicast message reception") we introduced a time limit
for how long the function tipc_sk_eneque() would be allowed to execute
its loop. Unfortunately, the test for when this limit is passed was put
in the wrong place, resulting in a lost message when the test is true.

We fix this by moving the test to before we dequeue the next buffer
from the input queue.

Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 66666805b53c..4a98d15a1323 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1802,12 +1802,11 @@ static int tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
 	unsigned long time_limit = jiffies + 2;
 
 	while (skb_queue_len(inputq)) {
+		if (unlikely(time_after_eq(jiffies, time_limit)))
+			return TIPC_OK;
 		skb = tipc_skb_dequeue(inputq, dport);
 		if (unlikely(!skb))
 			return TIPC_OK;
-		/* Return if softirq window exhausted */
-		if (unlikely(time_after_eq(jiffies, time_limit)))
-			return TIPC_OK;
 		if (!sock_owned_by_user(sk)) {
 			err = filter_rcv(sk, &skb);
 			if (likely(!skb))
-- 
cgit v1.2.3


From 3e97fa7059c19f7cc2566dfb30fe9282f6f1e673 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 6 Feb 2015 17:22:22 +0100
Subject: gre/ipip: use be16 variants of netlink functions

encap.sport and encap.dport are __be16, use nla_{get,put}_be16 instead
of nla_{get,put}_u16.

Fixes the sparse warnings:

warning: incorrect type in assignment (different base types)
   expected restricted __be32 [addressable] [usertype] o_key
   got restricted __be16 [addressable] [usertype] i_flags
warning: incorrect type in assignment (different base types)
   expected restricted __be16 [usertype] sport
   got unsigned short
warning: incorrect type in assignment (different base types)
   expected restricted __be16 [usertype] dport
   got unsigned short
warning: incorrect type in argument 3 (different base types)
   expected unsigned short [unsigned] [usertype] value
   got restricted __be16 [usertype] sport
warning: incorrect type in argument 3 (different base types)
   expected unsigned short [unsigned] [usertype] value
   got restricted __be16 [usertype] dport

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 12 ++++++------
 net/ipv4/ipip.c   | 12 ++++++------
 2 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 6e7727f27393..6207275fc749 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -659,12 +659,12 @@ static bool ipgre_netlink_encap_parms(struct nlattr *data[],
 
 	if (data[IFLA_GRE_ENCAP_SPORT]) {
 		ret = true;
-		ipencap->sport = nla_get_u16(data[IFLA_GRE_ENCAP_SPORT]);
+		ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
 	}
 
 	if (data[IFLA_GRE_ENCAP_DPORT]) {
 		ret = true;
-		ipencap->dport = nla_get_u16(data[IFLA_GRE_ENCAP_DPORT]);
+		ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
 	}
 
 	return ret;
@@ -786,10 +786,10 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
 
 	if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
 			t->encap.type) ||
-	    nla_put_u16(skb, IFLA_GRE_ENCAP_SPORT,
-			t->encap.sport) ||
-	    nla_put_u16(skb, IFLA_GRE_ENCAP_DPORT,
-			t->encap.dport) ||
+	    nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
+			 t->encap.sport) ||
+	    nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
+			 t->encap.dport) ||
 	    nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
 			t->encap.flags))
 		goto nla_put_failure;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index b58d6689874c..915d215a7d14 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -366,12 +366,12 @@ static bool ipip_netlink_encap_parms(struct nlattr *data[],
 
 	if (data[IFLA_IPTUN_ENCAP_SPORT]) {
 		ret = true;
-		ipencap->sport = nla_get_u16(data[IFLA_IPTUN_ENCAP_SPORT]);
+		ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]);
 	}
 
 	if (data[IFLA_IPTUN_ENCAP_DPORT]) {
 		ret = true;
-		ipencap->dport = nla_get_u16(data[IFLA_IPTUN_ENCAP_DPORT]);
+		ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]);
 	}
 
 	return ret;
@@ -460,10 +460,10 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
 
 	if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
 			tunnel->encap.type) ||
-	    nla_put_u16(skb, IFLA_IPTUN_ENCAP_SPORT,
-			tunnel->encap.sport) ||
-	    nla_put_u16(skb, IFLA_IPTUN_ENCAP_DPORT,
-			tunnel->encap.dport) ||
+	    nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT,
+			 tunnel->encap.sport) ||
+	    nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT,
+			 tunnel->encap.dport) ||
 	    nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
 			tunnel->encap.flags))
 		goto nla_put_failure;
-- 
cgit v1.2.3


From 567e4b79731c352a17d73c483959f795d3593e03 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 6 Feb 2015 12:59:01 -0800
Subject: net: rfs: add hash collision detection

Receive Flow Steering is a nice solution but suffers from
hash collisions when a mix of connected and unconnected traffic
is received on the host, when flow hash table is populated.

Also, clearing flow in inet_release() makes RFS not very good
for short lived flows, as many packets can follow close().
(FIN , ACK packets, ...)

This patch extends the information stored into global hash table
to not only include cpu number, but upper part of the hash value.

I use a 32bit value, and dynamically split it in two parts.

For host with less than 64 possible cpus, this gives 6 bits for the
cpu number, and 26 (32-6) bits for the upper part of the hash.

Since hash bucket selection use low order bits of the hash, we have
a full hash match, if /proc/sys/net/core/rps_sock_flow_entries is big
enough.

If the hash found in flow table does not match, we fallback to RPS (if
it is enabled for the rxqueue).

This means that a packet for an non connected flow can avoid the
IPI through a unrelated/victim CPU.

This also means we no longer have to clear the table at socket
close time, and this helps short lived flows performance.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c          |  5 +----
 include/linux/netdevice.h  | 34 ++++++++++++++++----------------
 include/net/sock.h         | 24 +----------------------
 net/core/dev.c             | 48 ++++++++++++++++++++++++++--------------------
 net/core/sysctl_net_core.c |  2 +-
 net/ipv4/af_inet.c         |  2 --
 6 files changed, 47 insertions(+), 68 deletions(-)

(limited to 'net')

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index ad7d3d5f3ee5..857dca47bf80 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -256,7 +256,6 @@ static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e)
 {
 	tun_debug(KERN_INFO, tun, "delete flow: hash %u index %u\n",
 		  e->rxhash, e->queue_index);
-	sock_rps_reset_flow_hash(e->rps_rxhash);
 	hlist_del_rcu(&e->hash_link);
 	kfree_rcu(e, rcu);
 	--tun->flow_count;
@@ -373,10 +372,8 @@ unlock:
  */
 static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash)
 {
-	if (unlikely(e->rps_rxhash != hash)) {
-		sock_rps_reset_flow_hash(e->rps_rxhash);
+	if (unlikely(e->rps_rxhash != hash))
 		e->rps_rxhash = hash;
-	}
 }
 
 /* We try to identify a flow through its rxhash first. The reason that
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ce784d5018e0..ab3b7cef4638 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -644,39 +644,39 @@ struct rps_dev_flow_table {
 /*
  * The rps_sock_flow_table contains mappings of flows to the last CPU
  * on which they were processed by the application (set in recvmsg).
+ * Each entry is a 32bit value. Upper part is the high order bits
+ * of flow hash, lower part is cpu number.
+ * rps_cpu_mask is used to partition the space, depending on number of
+ * possible cpus : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1
+ * For example, if 64 cpus are possible, rps_cpu_mask = 0x3f,
+ * meaning we use 32-6=26 bits for the hash.
  */
 struct rps_sock_flow_table {
-	unsigned int mask;
-	u16 ents[0];
+	u32	mask;
+	u32	ents[0];
 };
-#define	RPS_SOCK_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_sock_flow_table) + \
-    ((_num) * sizeof(u16)))
+#define	RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num]))
 
 #define RPS_NO_CPU 0xffff
 
+extern u32 rps_cpu_mask;
+extern struct rps_sock_flow_table __rcu *rps_sock_flow_table;
+
 static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
 					u32 hash)
 {
 	if (table && hash) {
-		unsigned int cpu, index = hash & table->mask;
+		unsigned int index = hash & table->mask;
+		u32 val = hash & ~rps_cpu_mask;
 
 		/* We only give a hint, preemption can change cpu under us */
-		cpu = raw_smp_processor_id();
+		val |= raw_smp_processor_id();
 
-		if (table->ents[index] != cpu)
-			table->ents[index] = cpu;
+		if (table->ents[index] != val)
+			table->ents[index] = val;
 	}
 }
 
-static inline void rps_reset_sock_flow(struct rps_sock_flow_table *table,
-				       u32 hash)
-{
-	if (table && hash)
-		table->ents[hash & table->mask] = RPS_NO_CPU;
-}
-
-extern struct rps_sock_flow_table __rcu *rps_sock_flow_table;
-
 #ifdef CONFIG_RFS_ACCEL
 bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id,
 			 u16 filter_id);
diff --git a/include/net/sock.h b/include/net/sock.h
index d28b8fededd6..e13824570b0f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -857,18 +857,6 @@ static inline void sock_rps_record_flow_hash(__u32 hash)
 #endif
 }
 
-static inline void sock_rps_reset_flow_hash(__u32 hash)
-{
-#ifdef CONFIG_RPS
-	struct rps_sock_flow_table *sock_flow_table;
-
-	rcu_read_lock();
-	sock_flow_table = rcu_dereference(rps_sock_flow_table);
-	rps_reset_sock_flow(sock_flow_table, hash);
-	rcu_read_unlock();
-#endif
-}
-
 static inline void sock_rps_record_flow(const struct sock *sk)
 {
 #ifdef CONFIG_RPS
@@ -876,28 +864,18 @@ static inline void sock_rps_record_flow(const struct sock *sk)
 #endif
 }
 
-static inline void sock_rps_reset_flow(const struct sock *sk)
-{
-#ifdef CONFIG_RPS
-	sock_rps_reset_flow_hash(sk->sk_rxhash);
-#endif
-}
-
 static inline void sock_rps_save_rxhash(struct sock *sk,
 					const struct sk_buff *skb)
 {
 #ifdef CONFIG_RPS
-	if (unlikely(sk->sk_rxhash != skb->hash)) {
-		sock_rps_reset_flow(sk);
+	if (unlikely(sk->sk_rxhash != skb->hash))
 		sk->sk_rxhash = skb->hash;
-	}
 #endif
 }
 
 static inline void sock_rps_reset_rxhash(struct sock *sk)
 {
 #ifdef CONFIG_RPS
-	sock_rps_reset_flow(sk);
 	sk->sk_rxhash = 0;
 #endif
 }
diff --git a/net/core/dev.c b/net/core/dev.c
index a3a96ffc67f4..8be38675e1a8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3030,6 +3030,8 @@ static inline void ____napi_schedule(struct softnet_data *sd,
 /* One global table that all flow-based protocols share. */
 struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
 EXPORT_SYMBOL(rps_sock_flow_table);
+u32 rps_cpu_mask __read_mostly;
+EXPORT_SYMBOL(rps_cpu_mask);
 
 struct static_key rps_needed __read_mostly;
 
@@ -3086,16 +3088,17 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 		       struct rps_dev_flow **rflowp)
 {
-	struct netdev_rx_queue *rxqueue;
-	struct rps_map *map;
+	const struct rps_sock_flow_table *sock_flow_table;
+	struct netdev_rx_queue *rxqueue = dev->_rx;
 	struct rps_dev_flow_table *flow_table;
-	struct rps_sock_flow_table *sock_flow_table;
+	struct rps_map *map;
 	int cpu = -1;
-	u16 tcpu;
+	u32 tcpu;
 	u32 hash;
 
 	if (skb_rx_queue_recorded(skb)) {
 		u16 index = skb_get_rx_queue(skb);
+
 		if (unlikely(index >= dev->real_num_rx_queues)) {
 			WARN_ONCE(dev->real_num_rx_queues > 1,
 				  "%s received packet on queue %u, but number "
@@ -3103,39 +3106,40 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 				  dev->name, index, dev->real_num_rx_queues);
 			goto done;
 		}
-		rxqueue = dev->_rx + index;
-	} else
-		rxqueue = dev->_rx;
+		rxqueue += index;
+	}
 
+	/* Avoid computing hash if RFS/RPS is not active for this rxqueue */
+
+	flow_table = rcu_dereference(rxqueue->rps_flow_table);
 	map = rcu_dereference(rxqueue->rps_map);
-	if (map) {
-		if (map->len == 1 &&
-		    !rcu_access_pointer(rxqueue->rps_flow_table)) {
-			tcpu = map->cpus[0];
-			if (cpu_online(tcpu))
-				cpu = tcpu;
-			goto done;
-		}
-	} else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {
+	if (!flow_table && !map)
 		goto done;
-	}
 
 	skb_reset_network_header(skb);
 	hash = skb_get_hash(skb);
 	if (!hash)
 		goto done;
 
-	flow_table = rcu_dereference(rxqueue->rps_flow_table);
 	sock_flow_table = rcu_dereference(rps_sock_flow_table);
 	if (flow_table && sock_flow_table) {
-		u16 next_cpu;
 		struct rps_dev_flow *rflow;
+		u32 next_cpu;
+		u32 ident;
+
+		/* First check into global flow table if there is a match */
+		ident = sock_flow_table->ents[hash & sock_flow_table->mask];
+		if ((ident ^ hash) & ~rps_cpu_mask)
+			goto try_rps;
 
+		next_cpu = ident & rps_cpu_mask;
+
+		/* OK, now we know there is a match,
+		 * we can look at the local (per receive queue) flow table
+		 */
 		rflow = &flow_table->flows[hash & flow_table->mask];
 		tcpu = rflow->cpu;
 
-		next_cpu = sock_flow_table->ents[hash & sock_flow_table->mask];
-
 		/*
 		 * If the desired CPU (where last recvmsg was done) is
 		 * different from current CPU (one in the rx-queue flow
@@ -3162,6 +3166,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 		}
 	}
 
+try_rps:
+
 	if (map) {
 		tcpu = map->cpus[reciprocal_scale(hash, map->len)];
 		if (cpu_online(tcpu)) {
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index fde21d19e61b..7a31be5e361f 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -65,7 +65,7 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
 					mutex_unlock(&sock_flow_mutex);
 					return -ENOMEM;
 				}
-
+				rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1;
 				sock_table->mask = size - 1;
 			} else
 				sock_table = orig_sock_table;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index a44773c8346c..d2e49baaff63 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -395,8 +395,6 @@ int inet_release(struct socket *sock)
 	if (sk) {
 		long timeout;
 
-		sock_rps_reset_flow(sk);
-
 		/* Applications forget to leave groups before exiting */
 		ip_mc_drop_socket(sk);
 
-- 
cgit v1.2.3


From 93c1af6ca94c1e763efba76a127b5c135e3d23a6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 8 Feb 2015 20:39:13 -0800
Subject: net:rfs: adjust table size checking

Make sure root user does not try something stupid.

Also make sure mask field in struct rps_sock_flow_table
does not share a cache line with the potentially often dirtied
flow table.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Fixes: 567e4b79731c ("net: rfs: add hash collision detection")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h  | 3 ++-
 net/core/sysctl_net_core.c | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ab3b7cef4638..d115256ed5a2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -653,7 +653,8 @@ struct rps_dev_flow_table {
  */
 struct rps_sock_flow_table {
 	u32	mask;
-	u32	ents[0];
+
+	u32	ents[0] ____cacheline_aligned_in_smp;
 };
 #define	RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num]))
 
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 7a31be5e361f..eaa51ddf2368 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -52,7 +52,7 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
 
 	if (write) {
 		if (size) {
-			if (size > 1<<30) {
+			if (size > 1<<29) {
 				/* Enforce limit to prevent overflow */
 				mutex_unlock(&sock_flow_mutex);
 				return -EINVAL;
-- 
cgit v1.2.3


From bfb3e5dd8dfd84dfd13649393abab63e43267b00 Mon Sep 17 00:00:00 2001
From: Richard Alpe <richard.alpe@ericsson.com>
Date: Mon, 9 Feb 2015 09:50:03 +0100
Subject: tipc: move and rename the legacy nl api to "nl compat"

The new netlink API is no longer "v2" but rather the standard API and
the legacy API is now "nl compat". We split them into separate
start/stop and put them in different files in order to further
distinguish them.

Signed-off-by: Richard Alpe <richard.alpe@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/Makefile         |   4 +-
 net/tipc/bcast.c          |   2 +-
 net/tipc/bearer.c         |   4 +-
 net/tipc/core.c           |   7 ++++
 net/tipc/core.h           |   1 -
 net/tipc/link.c           |   2 +-
 net/tipc/name_table.c     |   2 +-
 net/tipc/net.c            |   2 +-
 net/tipc/netlink.c        |  67 ++---------------------------
 net/tipc/netlink.h        |   4 +-
 net/tipc/netlink_compat.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++
 net/tipc/node.c           |   2 +-
 net/tipc/socket.c         |   4 +-
 13 files changed, 130 insertions(+), 76 deletions(-)
 create mode 100644 net/tipc/netlink_compat.c

(limited to 'net')

diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index 333e4592772c..69b82bbc60d3 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -7,8 +7,8 @@ obj-$(CONFIG_TIPC) := tipc.o
 tipc-y	+= addr.o bcast.o bearer.o config.o \
 	   core.o link.o discover.o msg.o  \
 	   name_distr.o  subscr.o name_table.o net.o  \
-	   netlink.o node.o socket.o log.o eth_media.o \
-	   server.o
+	   netlink.o netlink_compat.o node.o socket.o log.o eth_media.o \
+	   server.o socket.o
 
 tipc-$(CONFIG_TIPC_MEDIA_IB)	+= ib_media.o
 tipc-$(CONFIG_SYSCTL)		+= sysctl.o
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 81b1fef1f5e0..e96fd6a6d5c2 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -810,7 +810,7 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg)
 
 	tipc_bclink_lock(net);
 
-	hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_v2_family,
+	hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
 			  NLM_F_MULTI, TIPC_NL_LINK_GET);
 	if (!hdr)
 		return -EMSGSIZE;
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 33dc3486d16c..35d400e8c2e5 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -658,7 +658,7 @@ static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg,
 	struct nlattr *attrs;
 	struct nlattr *prop;
 
-	hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_v2_family,
+	hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
 			  NLM_F_MULTI, TIPC_NL_BEARER_GET);
 	if (!hdr)
 		return -EMSGSIZE;
@@ -924,7 +924,7 @@ static int __tipc_nl_add_media(struct tipc_nl_msg *msg,
 	struct nlattr *attrs;
 	struct nlattr *prop;
 
-	hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_v2_family,
+	hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
 			  NLM_F_MULTI, TIPC_NL_MEDIA_GET);
 	if (!hdr)
 		return -EMSGSIZE;
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 674bd2698528..2d06d1f8b6e6 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -111,6 +111,10 @@ static int __init tipc_init(void)
 	if (err)
 		goto out_netlink;
 
+	err = tipc_netlink_compat_start();
+	if (err)
+		goto out_netlink_compat;
+
 	err = tipc_socket_init();
 	if (err)
 		goto out_socket;
@@ -136,6 +140,8 @@ out_pernet:
 out_sysctl:
 	tipc_socket_stop();
 out_socket:
+	tipc_netlink_compat_stop();
+out_netlink_compat:
 	tipc_netlink_stop();
 out_netlink:
 	pr_err("Unable to start in single node mode\n");
@@ -146,6 +152,7 @@ static void __exit tipc_exit(void)
 {
 	tipc_bearer_cleanup();
 	tipc_netlink_stop();
+	tipc_netlink_compat_stop();
 	tipc_socket_stop();
 	tipc_unregister_sysctl();
 	unregister_pernet_subsys(&tipc_net_ops);
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 817b2e9d4227..451c346fd3cf 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -115,5 +115,4 @@ void tipc_unregister_sysctl(void);
 #define tipc_register_sysctl() 0
 #define tipc_unregister_sysctl()
 #endif
-
 #endif
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 942491234099..466f28fcf215 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2498,7 +2498,7 @@ static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg,
 	struct nlattr *prop;
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
 
-	hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_v2_family,
+	hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
 			  NLM_F_MULTI, TIPC_NL_LINK_GET);
 	if (!hdr)
 		return -EMSGSIZE;
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 18a3d44238bc..2251912264a2 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -1055,7 +1055,7 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg,
 		*last_publ = p->key;
 
 		hdr = genlmsg_put(msg->skb, msg->portid, msg->seq,
-				  &tipc_genl_v2_family, NLM_F_MULTI,
+				  &tipc_genl_family, NLM_F_MULTI,
 				  TIPC_NL_NAME_TABLE_GET);
 		if (!hdr)
 			return -EMSGSIZE;
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 263267e0e7fe..8b0fb0966628 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -156,7 +156,7 @@ static int __tipc_nl_add_net(struct net *net, struct tipc_nl_msg *msg)
 	void *hdr;
 	struct nlattr *attrs;
 
-	hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_v2_family,
+	hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
 			  NLM_F_MULTI, TIPC_NL_NET_GET);
 	if (!hdr)
 		return -EMSGSIZE;
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index fe0f5134ce15..7f6475efc984 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -35,7 +35,6 @@
  */
 
 #include "core.h"
-#include "config.h"
 #include "socket.h"
 #include "name_table.h"
 #include "bearer.h"
@@ -44,39 +43,6 @@
 #include "net.h"
 #include <net/genetlink.h>
 
-static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
-{
-	struct net *net = genl_info_net(info);
-	struct sk_buff *rep_buf;
-	struct nlmsghdr *rep_nlh;
-	struct nlmsghdr *req_nlh = info->nlhdr;
-	struct tipc_genlmsghdr *req_userhdr = info->userhdr;
-	int hdr_space = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN);
-	u16 cmd;
-
-	if ((req_userhdr->cmd & 0xC000) &&
-	    (!netlink_net_capable(skb, CAP_NET_ADMIN)))
-		cmd = TIPC_CMD_NOT_NET_ADMIN;
-	else
-		cmd = req_userhdr->cmd;
-
-	rep_buf = tipc_cfg_do_cmd(net, req_userhdr->dest, cmd,
-				  nlmsg_data(req_nlh) + GENL_HDRLEN +
-				  TIPC_GENL_HDRLEN,
-				  nlmsg_attrlen(req_nlh, GENL_HDRLEN +
-				  TIPC_GENL_HDRLEN), hdr_space);
-
-	if (rep_buf) {
-		skb_push(rep_buf, hdr_space);
-		rep_nlh = nlmsg_hdr(rep_buf);
-		memcpy(rep_nlh, req_nlh, hdr_space);
-		rep_nlh->nlmsg_len = rep_buf->len;
-		genlmsg_unicast(net, rep_buf, NETLINK_CB(skb).portid);
-	}
-
-	return 0;
-}
-
 static const struct nla_policy tipc_nl_policy[TIPC_NLA_MAX + 1] = {
 	[TIPC_NLA_UNSPEC]	= { .type = NLA_UNSPEC, },
 	[TIPC_NLA_BEARER]	= { .type = NLA_NESTED, },
@@ -89,28 +55,10 @@ static const struct nla_policy tipc_nl_policy[TIPC_NLA_MAX + 1] = {
 	[TIPC_NLA_NAME_TABLE]	= { .type = NLA_NESTED, }
 };
 
-/* Legacy ASCII API */
-static struct genl_family tipc_genl_family = {
-	.id		= GENL_ID_GENERATE,
-	.name		= TIPC_GENL_NAME,
-	.version	= TIPC_GENL_VERSION,
-	.hdrsize	= TIPC_GENL_HDRLEN,
-	.maxattr	= 0,
-	.netnsok	= true,
-};
-
-/* Legacy ASCII API */
-static struct genl_ops tipc_genl_ops[] = {
-	{
-		.cmd		= TIPC_GENL_CMD,
-		.doit		= handle_cmd,
-	},
-};
-
 /* Users of the legacy API (tipc-config) can't handle that we add operations,
  * so we have a separate genl handling for the new API.
  */
-struct genl_family tipc_genl_v2_family = {
+struct genl_family tipc_genl_family = {
 	.id		= GENL_ID_GENERATE,
 	.name		= TIPC_GENL_V2_NAME,
 	.version	= TIPC_GENL_V2_VERSION,
@@ -202,9 +150,9 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
 
 int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr)
 {
-	u32 maxattr = tipc_genl_v2_family.maxattr;
+	u32 maxattr = tipc_genl_family.maxattr;
 
-	*attr = tipc_genl_v2_family.attrbuf;
+	*attr = tipc_genl_family.attrbuf;
 	if (!*attr)
 		return -EOPNOTSUPP;
 
@@ -215,13 +163,7 @@ int tipc_netlink_start(void)
 {
 	int res;
 
-	res = genl_register_family_with_ops(&tipc_genl_family, tipc_genl_ops);
-	if (res) {
-		pr_err("Failed to register legacy interface\n");
-		return res;
-	}
-
-	res = genl_register_family_with_ops(&tipc_genl_v2_family,
+	res = genl_register_family_with_ops(&tipc_genl_family,
 					    tipc_genl_v2_ops);
 	if (res) {
 		pr_err("Failed to register netlink interface\n");
@@ -233,5 +175,4 @@ int tipc_netlink_start(void)
 void tipc_netlink_stop(void)
 {
 	genl_unregister_family(&tipc_genl_family);
-	genl_unregister_family(&tipc_genl_v2_family);
 }
diff --git a/net/tipc/netlink.h b/net/tipc/netlink.h
index ae2f2d923a15..08a1db67b927 100644
--- a/net/tipc/netlink.h
+++ b/net/tipc/netlink.h
@@ -36,7 +36,7 @@
 #ifndef _TIPC_NETLINK_H
 #define _TIPC_NETLINK_H
 
-extern struct genl_family tipc_genl_v2_family;
+extern struct genl_family tipc_genl_family;
 int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***buf);
 
 struct tipc_nl_msg {
@@ -46,6 +46,8 @@ struct tipc_nl_msg {
 };
 
 int tipc_netlink_start(void);
+int tipc_netlink_compat_start(void);
 void tipc_netlink_stop(void);
+void tipc_netlink_compat_stop(void);
 
 #endif
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
new file mode 100644
index 000000000000..f752854c8b10
--- /dev/null
+++ b/net/tipc/netlink_compat.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2014, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "config.h"
+#include <net/genetlink.h>
+#include <linux/tipc_config.h>
+
+static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+	struct net *net = genl_info_net(info);
+	struct sk_buff *rep_buf;
+	struct nlmsghdr *rep_nlh;
+	struct nlmsghdr *req_nlh = info->nlhdr;
+	struct tipc_genlmsghdr *req_userhdr = info->userhdr;
+	int hdr_space = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN);
+	u16 cmd;
+
+	if ((req_userhdr->cmd & 0xC000) &&
+	    (!netlink_net_capable(skb, CAP_NET_ADMIN)))
+		cmd = TIPC_CMD_NOT_NET_ADMIN;
+	else
+		cmd = req_userhdr->cmd;
+
+	rep_buf = tipc_cfg_do_cmd(net, req_userhdr->dest, cmd,
+				  nlmsg_data(req_nlh) + GENL_HDRLEN +
+				  TIPC_GENL_HDRLEN,
+				  nlmsg_attrlen(req_nlh, GENL_HDRLEN +
+				  TIPC_GENL_HDRLEN), hdr_space);
+
+	if (rep_buf) {
+		skb_push(rep_buf, hdr_space);
+		rep_nlh = nlmsg_hdr(rep_buf);
+		memcpy(rep_nlh, req_nlh, hdr_space);
+		rep_nlh->nlmsg_len = rep_buf->len;
+		genlmsg_unicast(net, rep_buf, NETLINK_CB(skb).portid);
+	}
+
+	return 0;
+}
+
+static struct genl_family tipc_genl_compat_family = {
+	.id		= GENL_ID_GENERATE,
+	.name		= TIPC_GENL_NAME,
+	.version	= TIPC_GENL_VERSION,
+	.hdrsize	= TIPC_GENL_HDRLEN,
+	.maxattr	= 0,
+	.netnsok	= true,
+};
+
+static struct genl_ops tipc_genl_compat_ops[] = {
+	{
+		.cmd		= TIPC_GENL_CMD,
+		.doit		= handle_cmd,
+	},
+};
+
+int tipc_netlink_compat_start(void)
+{
+	int res;
+
+	res = genl_register_family_with_ops(&tipc_genl_compat_family,
+					    tipc_genl_compat_ops);
+	if (res) {
+		pr_err("Failed to register legacy compat interface\n");
+		return res;
+	}
+
+	return 0;
+}
+
+void tipc_netlink_compat_stop(void)
+{
+	genl_unregister_family(&tipc_genl_compat_family);
+}
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 52308498f208..995618d6da9d 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -623,7 +623,7 @@ static int __tipc_nl_add_node(struct tipc_nl_msg *msg, struct tipc_node *node)
 	void *hdr;
 	struct nlattr *attrs;
 
-	hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_v2_family,
+	hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
 			  NLM_F_MULTI, TIPC_NL_NODE_GET);
 	if (!hdr)
 		return -EMSGSIZE;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 4a98d15a1323..d76c171f7b7e 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2783,7 +2783,7 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb,
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
 
 	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
-			  &tipc_genl_v2_family, NLM_F_MULTI, TIPC_NL_SOCK_GET);
+			  &tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET);
 	if (!hdr)
 		goto msg_cancel;
 
@@ -2864,7 +2864,7 @@ static int __tipc_nl_add_sk_publ(struct sk_buff *skb,
 	struct nlattr *attrs;
 
 	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
-			  &tipc_genl_v2_family, NLM_F_MULTI, TIPC_NL_PUBL_GET);
+			  &tipc_genl_family, NLM_F_MULTI, TIPC_NL_PUBL_GET);
 	if (!hdr)
 		goto msg_cancel;
 
-- 
cgit v1.2.3


From d0796d1ef63deb38147729664691ba3090930b26 Mon Sep 17 00:00:00 2001
From: Richard Alpe <richard.alpe@ericsson.com>
Date: Mon, 9 Feb 2015 09:50:04 +0100
Subject: tipc: convert legacy nl bearer dump to nl compat

Introduce a framework for dumping netlink data from the new netlink
API and formatting it to the old legacy API format. This is done by
looping the dump data and calling a format handler for each entity, in
this case a bearer.

We dump until either all data is dumped or we reach the limited buffer
size of the legacy API. Remember, the legacy API doesn't scale.

In this commit we convert TIPC_CMD_GET_BEARER_NAMES to use the compat
layer.

Signed-off-by: Richard Alpe <richard.alpe@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc_config.h |   5 +
 net/tipc/bearer.c                |  29 -----
 net/tipc/bearer.h                |   1 -
 net/tipc/config.c                |   3 -
 net/tipc/netlink_compat.c        | 274 ++++++++++++++++++++++++++++++++++++++-
 5 files changed, 278 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/include/uapi/linux/tipc_config.h b/include/uapi/linux/tipc_config.h
index 876d0a14863c..e1f4f05f4c5c 100644
--- a/include/uapi/linux/tipc_config.h
+++ b/include/uapi/linux/tipc_config.h
@@ -272,6 +272,11 @@ static inline int TLV_CHECK(const void *tlv, __u16 space, __u16 exp_type)
 		(ntohs(((struct tlv_desc *)tlv)->tlv_type) == exp_type);
 }
 
+static inline int TLV_GET_LEN(struct tlv_desc *tlv)
+{
+	return ntohs(tlv->tlv_len);
+}
+
 static inline int TLV_SET(void *tlv, __u16 type, void *data, __u16 len)
 {
 	struct tlv_desc *tlv_ptr;
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 35d400e8c2e5..7a9e29641e61 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -205,35 +205,6 @@ struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name)
 	return NULL;
 }
 
-/**
- * tipc_bearer_get_names - record names of bearers in buffer
- */
-struct sk_buff *tipc_bearer_get_names(struct net *net)
-{
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct sk_buff *buf;
-	struct tipc_bearer *b;
-	int i, j;
-
-	buf = tipc_cfg_reply_alloc(MAX_BEARERS * TLV_SPACE(TIPC_MAX_BEARER_NAME));
-	if (!buf)
-		return NULL;
-
-	for (i = 0; media_info_array[i] != NULL; i++) {
-		for (j = 0; j < MAX_BEARERS; j++) {
-			b = rtnl_dereference(tn->bearer_list[j]);
-			if (!b)
-				continue;
-			if (b->media == media_info_array[i]) {
-				tipc_cfg_append_tlv(buf, TIPC_TLV_BEARER_NAME,
-						    b->name,
-						    strlen(b->name) + 1);
-			}
-		}
-	}
-	return buf;
-}
-
 void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index c035e3e24764..956858276d93 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -205,7 +205,6 @@ void tipc_disable_l2_media(struct tipc_bearer *b);
 int tipc_l2_send_msg(struct net *net, struct sk_buff *buf,
 		     struct tipc_bearer *b, struct tipc_media_addr *dest);
 
-struct sk_buff *tipc_bearer_get_names(struct net *net);
 void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest);
 void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest);
 struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name);
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 6873360cda53..52e84b0ac48a 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -252,9 +252,6 @@ struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
 		rep_tlv_buf = tipc_nametbl_get(net, req_tlv_area,
 					       req_tlv_space);
 		break;
-	case TIPC_CMD_GET_BEARER_NAMES:
-		rep_tlv_buf = tipc_bearer_get_names(net);
-		break;
 	case TIPC_CMD_GET_MEDIA_NAMES:
 		rep_tlv_buf = tipc_media_get_names();
 		break;
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index f752854c8b10..bd75ea290e3a 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -33,9 +33,265 @@
 
 #include "core.h"
 #include "config.h"
+#include "bearer.h"
 #include <net/genetlink.h>
 #include <linux/tipc_config.h>
 
+/* The legacy API had an artificial message length limit called
+ * ULTRA_STRING_MAX_LEN.
+ */
+#define ULTRA_STRING_MAX_LEN 32768
+
+#define TIPC_SKB_MAX TLV_SPACE(ULTRA_STRING_MAX_LEN)
+
+#define REPLY_TRUNCATED "<truncated>\n"
+
+struct tipc_nl_compat_msg {
+	u16 cmd;
+	int rep_size;
+	struct sk_buff *rep;
+	struct tlv_desc *req;
+	struct sock *dst_sk;
+};
+
+struct tipc_nl_compat_cmd_dump {
+	int (*dumpit)(struct sk_buff *, struct netlink_callback *);
+	int (*format)(struct tipc_nl_compat_msg *msg, struct nlattr **attrs);
+};
+
+static int tipc_skb_tailroom(struct sk_buff *skb)
+{
+	int tailroom;
+	int limit;
+
+	tailroom = skb_tailroom(skb);
+	limit = TIPC_SKB_MAX - skb->len;
+
+	if (tailroom < limit)
+		return tailroom;
+
+	return limit;
+}
+
+static int tipc_add_tlv(struct sk_buff *skb, u16 type, void *data, u16 len)
+{
+	struct tlv_desc *tlv = (struct tlv_desc *)skb_tail_pointer(skb);
+
+	if (tipc_skb_tailroom(skb) < TLV_SPACE(len))
+		return -EMSGSIZE;
+
+	skb_put(skb, TLV_SPACE(len));
+	tlv->tlv_type = htons(type);
+	tlv->tlv_len = htons(TLV_LENGTH(len));
+	if (len && data)
+		memcpy(TLV_DATA(tlv), data, len);
+
+	return 0;
+}
+
+static struct sk_buff *tipc_tlv_alloc(int size)
+{
+	int hdr_len;
+	struct sk_buff *buf;
+
+	size = TLV_SPACE(size);
+	hdr_len = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN);
+
+	buf = alloc_skb(hdr_len + size, GFP_KERNEL);
+	if (!buf)
+		return NULL;
+
+	skb_reserve(buf, hdr_len);
+
+	return buf;
+}
+
+static struct sk_buff *tipc_get_err_tlv(char *str)
+{
+	int str_len = strlen(str) + 1;
+	struct sk_buff *buf;
+
+	buf = tipc_tlv_alloc(TLV_SPACE(str_len));
+	if (buf)
+		tipc_add_tlv(buf, TIPC_TLV_ERROR_STRING, str, str_len);
+
+	return buf;
+}
+
+static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
+				   struct tipc_nl_compat_msg *msg,
+				   struct sk_buff *arg)
+{
+	int len = 0;
+	int err;
+	struct sk_buff *buf;
+	struct nlmsghdr *nlmsg;
+	struct netlink_callback cb;
+
+	memset(&cb, 0, sizeof(cb));
+	cb.nlh = (struct nlmsghdr *)arg->data;
+	cb.skb = arg;
+
+	buf = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	buf->sk = msg->dst_sk;
+
+	do {
+		int rem;
+
+		len = (*cmd->dumpit)(buf, &cb);
+
+		nlmsg_for_each_msg(nlmsg, nlmsg_hdr(buf), len, rem) {
+			struct nlattr **attrs;
+
+			err = tipc_nlmsg_parse(nlmsg, &attrs);
+			if (err)
+				goto err_out;
+
+			err = (*cmd->format)(msg, attrs);
+			if (err)
+				goto err_out;
+
+			if (tipc_skb_tailroom(msg->rep) <= 1) {
+				err = -EMSGSIZE;
+				goto err_out;
+			}
+		}
+
+		skb_reset_tail_pointer(buf);
+		buf->len = 0;
+
+	} while (len);
+
+	err = 0;
+
+err_out:
+	kfree_skb(buf);
+
+	if (err == -EMSGSIZE) {
+		/* The legacy API only considered messages filling
+		 * "ULTRA_STRING_MAX_LEN" to be truncated.
+		 */
+		if ((TIPC_SKB_MAX - msg->rep->len) <= 1) {
+			char *tail = skb_tail_pointer(msg->rep);
+
+			if (*tail != '\0')
+				sprintf(tail - sizeof(REPLY_TRUNCATED) - 1,
+					REPLY_TRUNCATED);
+		}
+
+		return 0;
+	}
+
+	return err;
+}
+
+static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
+				 struct tipc_nl_compat_msg *msg)
+{
+	int err;
+	struct sk_buff *arg;
+
+	msg->rep = tipc_tlv_alloc(msg->rep_size);
+	if (!msg->rep)
+		return -ENOMEM;
+
+	arg = nlmsg_new(0, GFP_KERNEL);
+	if (!arg) {
+		kfree_skb(msg->rep);
+		return -ENOMEM;
+	}
+
+	err = __tipc_nl_compat_dumpit(cmd, msg, arg);
+	if (err)
+		kfree_skb(msg->rep);
+
+	kfree_skb(arg);
+
+	return err;
+}
+
+static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg,
+				      struct nlattr **attrs)
+{
+	struct nlattr *bearer[TIPC_NLA_BEARER_MAX + 1];
+
+	nla_parse_nested(bearer, TIPC_NLA_BEARER_MAX, attrs[TIPC_NLA_BEARER],
+			 NULL);
+
+	return tipc_add_tlv(msg->rep, TIPC_TLV_BEARER_NAME,
+			    nla_data(bearer[TIPC_NLA_BEARER_NAME]),
+			    nla_len(bearer[TIPC_NLA_BEARER_NAME]));
+}
+
+static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
+{
+	struct tipc_nl_compat_cmd_dump dump;
+
+	memset(&dump, 0, sizeof(dump));
+
+	switch (msg->cmd) {
+	case TIPC_CMD_GET_BEARER_NAMES:
+		msg->rep_size = MAX_BEARERS * TLV_SPACE(TIPC_MAX_BEARER_NAME);
+		dump.dumpit = tipc_nl_bearer_dump;
+		dump.format = tipc_nl_compat_bearer_dump;
+		return tipc_nl_compat_dumpit(&dump, msg);
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info)
+{
+	int err;
+	int len;
+	struct tipc_nl_compat_msg msg;
+	struct nlmsghdr *req_nlh;
+	struct nlmsghdr *rep_nlh;
+	struct tipc_genlmsghdr *req_userhdr = info->userhdr;
+	struct net *net = genl_info_net(info);
+
+	memset(&msg, 0, sizeof(msg));
+
+	req_nlh = (struct nlmsghdr *)skb->data;
+	msg.req = nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN;
+	msg.cmd = req_userhdr->cmd;
+	msg.dst_sk = info->dst_sk;
+
+	if ((msg.cmd & 0xC000) && (!netlink_net_capable(skb, CAP_NET_ADMIN))) {
+		msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_NET_ADMIN);
+		err = -EACCES;
+		goto send;
+	}
+
+	len = nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN);
+	if (TLV_GET_LEN(msg.req) && !TLV_OK(msg.req, len)) {
+		msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED);
+		err = -EOPNOTSUPP;
+		goto send;
+	}
+
+	err = tipc_nl_compat_handle(&msg);
+	if (err == -EOPNOTSUPP)
+		msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED);
+	else if (err == -EINVAL)
+		msg.rep = tipc_get_err_tlv(TIPC_CFG_TLV_ERROR);
+send:
+	if (!msg.rep)
+		return err;
+
+	len = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN);
+	skb_push(msg.rep, len);
+	rep_nlh = nlmsg_hdr(msg.rep);
+	memcpy(rep_nlh, info->nlhdr, len);
+	rep_nlh->nlmsg_len = msg.rep->len;
+	genlmsg_unicast(net, msg.rep, NETLINK_CB(skb).portid);
+
+	return err;
+}
+
 static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
 {
 	struct net *net = genl_info_net(info);
@@ -69,6 +325,22 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
 	return 0;
 }
 
+/* Temporary function to keep functionality throughout the patchset
+ * without having to mess with the global variables and other trickery
+ * of the old API.
+ */
+static int tipc_nl_compat_tmp_wrap(struct sk_buff *skb, struct genl_info *info)
+{
+	struct tipc_genlmsghdr *req = info->userhdr;
+
+	switch (req->cmd) {
+	case TIPC_CMD_GET_BEARER_NAMES:
+		return tipc_nl_compat_recv(skb, info);
+	}
+
+	return handle_cmd(skb, info);
+}
+
 static struct genl_family tipc_genl_compat_family = {
 	.id		= GENL_ID_GENERATE,
 	.name		= TIPC_GENL_NAME,
@@ -81,7 +353,7 @@ static struct genl_family tipc_genl_compat_family = {
 static struct genl_ops tipc_genl_compat_ops[] = {
 	{
 		.cmd		= TIPC_GENL_CMD,
-		.doit		= handle_cmd,
+		.doit		= tipc_nl_compat_tmp_wrap,
 	},
 };
 
-- 
cgit v1.2.3


From 9ab154658a7ff2c5076607e02f18581c6859fc2a Mon Sep 17 00:00:00 2001
From: Richard Alpe <richard.alpe@ericsson.com>
Date: Mon, 9 Feb 2015 09:50:05 +0100
Subject: tipc: convert legacy nl bearer enable/disable to nl compat

Introduce a framework for transcoding legacy nl action into actions
(.doit) calls from the new nl API. This is done by converting the
incoming TLV data into netlink data with nested netlink attributes.
Unfortunately due to the randomness of the legacy API we can't do this
generically so each legacy netlink command requires a specific
transcoding recipe. In this case for bearer enable and bearer disable.

Convert TIPC_CMD_ENABLE_BEARER and TIPC_CMD_DISABLE_BEARER into doit
compat calls.

Signed-off-by: Richard Alpe <richard.alpe@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc_config.h |   5 ++
 net/tipc/bearer.c                |  26 ++-----
 net/tipc/bearer.h                |   3 -
 net/tipc/config.c                |  33 ---------
 net/tipc/netlink_compat.c        | 144 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 154 insertions(+), 57 deletions(-)

(limited to 'net')

diff --git a/include/uapi/linux/tipc_config.h b/include/uapi/linux/tipc_config.h
index e1f4f05f4c5c..f9226566c1b8 100644
--- a/include/uapi/linux/tipc_config.h
+++ b/include/uapi/linux/tipc_config.h
@@ -277,6 +277,11 @@ static inline int TLV_GET_LEN(struct tlv_desc *tlv)
 	return ntohs(tlv->tlv_len);
 }
 
+static inline int TLV_CHECK_TYPE(struct tlv_desc *tlv,  __u16 type)
+{
+	return (ntohs(tlv->tlv_type) == type);
+}
+
 static inline int TLV_SET(void *tlv, __u16 type, void *data, __u16 len)
 {
 	struct tlv_desc *tlv_ptr;
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 7a9e29641e61..de1c800ef806 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -236,8 +236,8 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest)
 /**
  * tipc_enable_bearer - enable bearer with the given name
  */
-int tipc_enable_bearer(struct net *net, const char *name, u32 disc_domain,
-		       u32 priority)
+static int tipc_enable_bearer(struct net *net, const char *name,
+			      u32 disc_domain, u32 priority)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_bearer *b_ptr;
@@ -393,22 +393,6 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr,
 	kfree_rcu(b_ptr, rcu);
 }
 
-int tipc_disable_bearer(struct net *net, const char *name)
-{
-	struct tipc_bearer *b_ptr;
-	int res;
-
-	b_ptr = tipc_bearer_find(net, name);
-	if (b_ptr == NULL) {
-		pr_warn("Attempt to disable unknown bearer <%s>\n", name);
-		res = -EINVAL;
-	} else {
-		bearer_disable(net, b_ptr, false);
-		res = 0;
-	}
-	return res;
-}
-
 int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b)
 {
 	struct net_device *dev;
@@ -756,7 +740,7 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
 	char *name;
 	struct tipc_bearer *bearer;
 	struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
-	struct net *net = genl_info_net(info);
+	struct net *net = sock_net(skb->sk);
 
 	if (!info->attrs[TIPC_NLA_BEARER])
 		return -EINVAL;
@@ -787,11 +771,11 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
 
 int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
 {
-	struct net *net = genl_info_net(info);
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	int err;
 	char *bearer;
 	struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
+	struct net *net = sock_net(skb->sk);
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	u32 domain;
 	u32 prio;
 
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 956858276d93..06f25d144871 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -173,9 +173,6 @@ struct tipc_bearer_names {
  */
 
 void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr);
-int tipc_enable_bearer(struct net *net, const char *bearer_name,
-		       u32 disc_domain, u32 priority);
-int tipc_disable_bearer(struct net *net, const char *name);
 
 /*
  * Routines made available to TIPC by supported media types
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 52e84b0ac48a..f8cd5e1b545f 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -134,33 +134,6 @@ static struct sk_buff *tipc_show_stats(void)
 	return buf;
 }
 
-static struct sk_buff *cfg_enable_bearer(struct net *net)
-{
-	struct tipc_bearer_config *args;
-
-	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_BEARER_CONFIG))
-		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-
-	args = (struct tipc_bearer_config *)TLV_DATA(req_tlv_area);
-	if (tipc_enable_bearer(net, args->name,
-			       ntohl(args->disc_domain),
-			       ntohl(args->priority)))
-		return tipc_cfg_reply_error_string("unable to enable bearer");
-
-	return tipc_cfg_reply_none();
-}
-
-static struct sk_buff *cfg_disable_bearer(struct net *net)
-{
-	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_BEARER_NAME))
-		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-
-	if (tipc_disable_bearer(net, (char *)TLV_DATA(req_tlv_area)))
-		return tipc_cfg_reply_error_string("unable to disable bearer");
-
-	return tipc_cfg_reply_none();
-}
-
 static struct sk_buff *cfg_set_own_addr(struct net *net)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
@@ -267,12 +240,6 @@ struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
 		rep_tlv_buf = tipc_link_cmd_config(net, req_tlv_area,
 						   req_tlv_space, cmd);
 		break;
-	case TIPC_CMD_ENABLE_BEARER:
-		rep_tlv_buf = cfg_enable_bearer(net);
-		break;
-	case TIPC_CMD_DISABLE_BEARER:
-		rep_tlv_buf = cfg_disable_bearer(net);
-		break;
 	case TIPC_CMD_SET_NODE_ADDR:
 		rep_tlv_buf = cfg_set_own_addr(net);
 		break;
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index bd75ea290e3a..12b0f4424797 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -49,6 +49,7 @@
 struct tipc_nl_compat_msg {
 	u16 cmd;
 	int rep_size;
+	int req_type;
 	struct sk_buff *rep;
 	struct tlv_desc *req;
 	struct sock *dst_sk;
@@ -59,6 +60,11 @@ struct tipc_nl_compat_cmd_dump {
 	int (*format)(struct tipc_nl_compat_msg *msg, struct nlattr **attrs);
 };
 
+struct tipc_nl_compat_cmd_doit {
+	int (*doit)(struct sk_buff *skb, struct genl_info *info);
+	int (*transcode)(struct sk_buff *skb, struct tipc_nl_compat_msg *msg);
+};
+
 static int tipc_skb_tailroom(struct sk_buff *skb)
 {
 	int tailroom;
@@ -213,6 +219,78 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
 	return err;
 }
 
+static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd,
+				 struct tipc_nl_compat_msg *msg)
+{
+	int err;
+	struct sk_buff *doit_buf;
+	struct sk_buff *trans_buf;
+	struct nlattr **attrbuf;
+	struct genl_info info;
+
+	trans_buf = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!trans_buf)
+		return -ENOMEM;
+
+	err = (*cmd->transcode)(trans_buf, msg);
+	if (err)
+		goto trans_out;
+
+	attrbuf = kmalloc((tipc_genl_family.maxattr + 1) *
+			sizeof(struct nlattr *), GFP_KERNEL);
+	if (!attrbuf) {
+		err = -ENOMEM;
+		goto trans_out;
+	}
+
+	err = nla_parse(attrbuf, tipc_genl_family.maxattr,
+			(const struct nlattr *)trans_buf->data,
+			trans_buf->len, NULL);
+	if (err)
+		goto parse_out;
+
+	doit_buf = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!doit_buf) {
+		err = -ENOMEM;
+		goto parse_out;
+	}
+
+	doit_buf->sk = msg->dst_sk;
+
+	memset(&info, 0, sizeof(info));
+	info.attrs = attrbuf;
+
+	err = (*cmd->doit)(doit_buf, &info);
+
+	kfree_skb(doit_buf);
+parse_out:
+	kfree(attrbuf);
+trans_out:
+	kfree_skb(trans_buf);
+
+	return err;
+}
+
+static int tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd,
+			       struct tipc_nl_compat_msg *msg)
+{
+	int err;
+
+	if (msg->req_type && !TLV_CHECK_TYPE(msg->req, msg->req_type))
+		return -EINVAL;
+
+	err = __tipc_nl_compat_doit(cmd, msg);
+	if (err)
+		return err;
+
+	/* The legacy API considered an empty message a success message */
+	msg->rep = tipc_tlv_alloc(0);
+	if (!msg->rep)
+		return -ENOMEM;
+
+	return 0;
+}
+
 static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg,
 				      struct nlattr **attrs)
 {
@@ -226,11 +304,65 @@ static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg,
 			    nla_len(bearer[TIPC_NLA_BEARER_NAME]));
 }
 
+static int tipc_nl_compat_bearer_enable(struct sk_buff *skb,
+					struct tipc_nl_compat_msg *msg)
+{
+	struct nlattr *prop;
+	struct nlattr *bearer;
+	struct tipc_bearer_config *b;
+
+	b = (struct tipc_bearer_config *)TLV_DATA(msg->req);
+
+	bearer = nla_nest_start(skb, TIPC_NLA_BEARER);
+	if (!bearer)
+		return -EMSGSIZE;
+
+	if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, b->name))
+		return -EMSGSIZE;
+
+	if (nla_put_u32(skb, TIPC_NLA_BEARER_DOMAIN, ntohl(b->disc_domain)))
+		return -EMSGSIZE;
+
+	if (ntohl(b->priority) <= TIPC_MAX_LINK_PRI) {
+		prop = nla_nest_start(skb, TIPC_NLA_BEARER_PROP);
+		if (!prop)
+			return -EMSGSIZE;
+		if (nla_put_u32(skb, TIPC_NLA_PROP_PRIO, ntohl(b->priority)))
+			return -EMSGSIZE;
+		nla_nest_end(skb, prop);
+	}
+	nla_nest_end(skb, bearer);
+
+	return 0;
+}
+
+static int tipc_nl_compat_bearer_disable(struct sk_buff *skb,
+					 struct tipc_nl_compat_msg *msg)
+{
+	char *name;
+	struct nlattr *bearer;
+
+	name = (char *)TLV_DATA(msg->req);
+
+	bearer = nla_nest_start(skb, TIPC_NLA_BEARER);
+	if (!bearer)
+		return -EMSGSIZE;
+
+	if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, name))
+		return -EMSGSIZE;
+
+	nla_nest_end(skb, bearer);
+
+	return 0;
+}
+
 static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 {
 	struct tipc_nl_compat_cmd_dump dump;
+	struct tipc_nl_compat_cmd_doit doit;
 
 	memset(&dump, 0, sizeof(dump));
+	memset(&doit, 0, sizeof(doit));
 
 	switch (msg->cmd) {
 	case TIPC_CMD_GET_BEARER_NAMES:
@@ -238,6 +370,16 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 		dump.dumpit = tipc_nl_bearer_dump;
 		dump.format = tipc_nl_compat_bearer_dump;
 		return tipc_nl_compat_dumpit(&dump, msg);
+	case TIPC_CMD_ENABLE_BEARER:
+		msg->req_type = TIPC_TLV_BEARER_CONFIG;
+		doit.doit = tipc_nl_bearer_enable;
+		doit.transcode = tipc_nl_compat_bearer_enable;
+		return tipc_nl_compat_doit(&doit, msg);
+	case TIPC_CMD_DISABLE_BEARER:
+		msg->req_type = TIPC_TLV_BEARER_NAME;
+		doit.doit = tipc_nl_bearer_disable;
+		doit.transcode = tipc_nl_compat_bearer_disable;
+		return tipc_nl_compat_doit(&doit, msg);
 	}
 
 	return -EOPNOTSUPP;
@@ -335,6 +477,8 @@ static int tipc_nl_compat_tmp_wrap(struct sk_buff *skb, struct genl_info *info)
 
 	switch (req->cmd) {
 	case TIPC_CMD_GET_BEARER_NAMES:
+	case TIPC_CMD_ENABLE_BEARER:
+	case TIPC_CMD_DISABLE_BEARER:
 		return tipc_nl_compat_recv(skb, info);
 	}
 
-- 
cgit v1.2.3


From f2b3b2d4ccbf9666f5f42a21347cd1aaa532b2fa Mon Sep 17 00:00:00 2001
From: Richard Alpe <richard.alpe@ericsson.com>
Date: Mon, 9 Feb 2015 09:50:06 +0100
Subject: tipc: convert legacy nl link stat to nl compat

Add functionality for safely appending string data to a TLV without
keeping write count in the caller.

Convert TIPC_CMD_SHOW_LINK_STATS to compat dumpit.

Signed-off-by: Richard Alpe <richard.alpe@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc_config.h |  10 ++
 net/tipc/bcast.c                 |  43 --------
 net/tipc/bcast.h                 |   1 -
 net/tipc/config.c                |   4 -
 net/tipc/link.c                  | 141 ---------------------------
 net/tipc/link.h                  |   3 -
 net/tipc/netlink_compat.c        | 205 +++++++++++++++++++++++++++++++++++++++
 7 files changed, 215 insertions(+), 192 deletions(-)

(limited to 'net')

diff --git a/include/uapi/linux/tipc_config.h b/include/uapi/linux/tipc_config.h
index f9226566c1b8..087b0ef82c07 100644
--- a/include/uapi/linux/tipc_config.h
+++ b/include/uapi/linux/tipc_config.h
@@ -277,11 +277,21 @@ static inline int TLV_GET_LEN(struct tlv_desc *tlv)
 	return ntohs(tlv->tlv_len);
 }
 
+static inline void TLV_SET_LEN(struct tlv_desc *tlv, __u16 len)
+{
+	tlv->tlv_len = htons(len);
+}
+
 static inline int TLV_CHECK_TYPE(struct tlv_desc *tlv,  __u16 type)
 {
 	return (ntohs(tlv->tlv_type) == type);
 }
 
+static inline void TLV_SET_TYPE(struct tlv_desc *tlv, __u16 type)
+{
+	tlv->tlv_type = htons(type);
+}
+
 static inline int TLV_SET(void *tlv, __u16 type, void *data, __u16 len)
 {
 	struct tlv_desc *tlv_ptr;
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index e96fd6a6d5c2..3e41704832de 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -860,49 +860,6 @@ msg_full:
 	return -EMSGSIZE;
 }
 
-int tipc_bclink_stats(struct net *net, char *buf, const u32 buf_size)
-{
-	int ret;
-	struct tipc_stats *s;
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct tipc_link *bcl = tn->bcl;
-
-	if (!bcl)
-		return 0;
-
-	tipc_bclink_lock(net);
-
-	s = &bcl->stats;
-
-	ret = tipc_snprintf(buf, buf_size, "Link <%s>\n"
-			    "  Window:%u packets\n",
-			    bcl->name, bcl->queue_limit[0]);
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  RX packets:%u fragments:%u/%u bundles:%u/%u\n",
-			     s->recv_info, s->recv_fragments,
-			     s->recv_fragmented, s->recv_bundles,
-			     s->recv_bundled);
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  TX packets:%u fragments:%u/%u bundles:%u/%u\n",
-			     s->sent_info, s->sent_fragments,
-			     s->sent_fragmented, s->sent_bundles,
-			     s->sent_bundled);
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  RX naks:%u defs:%u dups:%u\n",
-			     s->recv_nacks, s->deferred_recv, s->duplicates);
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  TX naks:%u acks:%u dups:%u\n",
-			     s->sent_nacks, s->sent_acks, s->retransmitted);
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  Congestion link:%u  Send queue max:%u avg:%u\n",
-			     s->link_congs, s->max_queue_sz,
-			     s->queue_sz_counts ?
-			     (s->accu_queue_sz / s->queue_sz_counts) : 0);
-
-	tipc_bclink_unlock(net);
-	return ret;
-}
-
 int tipc_bclink_reset_stats(struct net *net)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index a910c0b9f249..43f397fbac55 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -127,7 +127,6 @@ u32  tipc_bclink_get_last_sent(struct net *net);
 u32  tipc_bclink_acks_missing(struct tipc_node *n_ptr);
 void tipc_bclink_update_link_state(struct tipc_node *node,
 				   u32 last_sent);
-int  tipc_bclink_stats(struct net *net, char *stats_buf, const u32 buf_size);
 int  tipc_bclink_reset_stats(struct net *net);
 int  tipc_bclink_set_queue_limits(struct net *net, u32 limit);
 void tipc_bcbearer_sort(struct net *net, struct tipc_node_map *nm_ptr,
diff --git a/net/tipc/config.c b/net/tipc/config.c
index f8cd5e1b545f..67b76ee847f3 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -213,10 +213,6 @@ struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
 		rep_tlv_buf = tipc_node_get_links(net, req_tlv_area,
 						  req_tlv_space);
 		break;
-	case TIPC_CMD_SHOW_LINK_STATS:
-		rep_tlv_buf = tipc_link_cmd_show_stats(net, req_tlv_area,
-						       req_tlv_space);
-		break;
 	case TIPC_CMD_RESET_LINK_STATS:
 		rep_tlv_buf = tipc_link_cmd_reset_stats(net, req_tlv_area,
 							req_tlv_space);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 466f28fcf215..2622fb99344a 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2146,147 +2146,6 @@ struct sk_buff *tipc_link_cmd_reset_stats(struct net *net,
 	return tipc_cfg_reply_none();
 }
 
-/**
- * percent - convert count to a percentage of total (rounding up or down)
- */
-static u32 percent(u32 count, u32 total)
-{
-	return (count * 100 + (total / 2)) / total;
-}
-
-/**
- * tipc_link_stats - print link statistics
- * @net: the applicable net namespace
- * @name: link name
- * @buf: print buffer area
- * @buf_size: size of print buffer area
- *
- * Returns length of print buffer data string (or 0 if error)
- */
-static int tipc_link_stats(struct net *net, const char *name, char *buf,
-			   const u32 buf_size)
-{
-	struct tipc_link *l;
-	struct tipc_stats *s;
-	struct tipc_node *node;
-	char *status;
-	u32 profile_total = 0;
-	unsigned int bearer_id;
-	int ret;
-
-	if (!strcmp(name, tipc_bclink_name))
-		return tipc_bclink_stats(net, buf, buf_size);
-
-	node = tipc_link_find_owner(net, name, &bearer_id);
-	if (!node)
-		return 0;
-
-	tipc_node_lock(node);
-
-	l = node->links[bearer_id];
-	if (!l) {
-		tipc_node_unlock(node);
-		return 0;
-	}
-
-	s = &l->stats;
-
-	if (tipc_link_is_active(l))
-		status = "ACTIVE";
-	else if (tipc_link_is_up(l))
-		status = "STANDBY";
-	else
-		status = "DEFUNCT";
-
-	ret = tipc_snprintf(buf, buf_size, "Link <%s>\n"
-			    "  %s  MTU:%u  Priority:%u  Tolerance:%u ms"
-			    "  Window:%u packets\n",
-			    l->name, status, l->max_pkt, l->priority,
-			    l->tolerance, l->queue_limit[0]);
-
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  RX packets:%u fragments:%u/%u bundles:%u/%u\n",
-			     l->next_in_no - s->recv_info, s->recv_fragments,
-			     s->recv_fragmented, s->recv_bundles,
-			     s->recv_bundled);
-
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  TX packets:%u fragments:%u/%u bundles:%u/%u\n",
-			     l->next_out_no - s->sent_info, s->sent_fragments,
-			     s->sent_fragmented, s->sent_bundles,
-			     s->sent_bundled);
-
-	profile_total = s->msg_length_counts;
-	if (!profile_total)
-		profile_total = 1;
-
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  TX profile sample:%u packets  average:%u octets\n"
-			     "  0-64:%u%% -256:%u%% -1024:%u%% -4096:%u%% "
-			     "-16384:%u%% -32768:%u%% -66000:%u%%\n",
-			     s->msg_length_counts,
-			     s->msg_lengths_total / profile_total,
-			     percent(s->msg_length_profile[0], profile_total),
-			     percent(s->msg_length_profile[1], profile_total),
-			     percent(s->msg_length_profile[2], profile_total),
-			     percent(s->msg_length_profile[3], profile_total),
-			     percent(s->msg_length_profile[4], profile_total),
-			     percent(s->msg_length_profile[5], profile_total),
-			     percent(s->msg_length_profile[6], profile_total));
-
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  RX states:%u probes:%u naks:%u defs:%u"
-			     " dups:%u\n", s->recv_states, s->recv_probes,
-			     s->recv_nacks, s->deferred_recv, s->duplicates);
-
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  TX states:%u probes:%u naks:%u acks:%u"
-			     " dups:%u\n", s->sent_states, s->sent_probes,
-			     s->sent_nacks, s->sent_acks, s->retransmitted);
-
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  Congestion link:%u  Send queue"
-			     " max:%u avg:%u\n", s->link_congs,
-			     s->max_queue_sz, s->queue_sz_counts ?
-			     (s->accu_queue_sz / s->queue_sz_counts) : 0);
-
-	tipc_node_unlock(node);
-	return ret;
-}
-
-struct sk_buff *tipc_link_cmd_show_stats(struct net *net,
-					 const void *req_tlv_area,
-					 int req_tlv_space)
-{
-	struct sk_buff *buf;
-	struct tlv_desc *rep_tlv;
-	int str_len;
-	int pb_len;
-	char *pb;
-
-	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_LINK_NAME))
-		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-
-	buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN));
-	if (!buf)
-		return NULL;
-
-	rep_tlv = (struct tlv_desc *)buf->data;
-	pb = TLV_DATA(rep_tlv);
-	pb_len = ULTRA_STRING_MAX_LEN;
-	str_len = tipc_link_stats(net, (char *)TLV_DATA(req_tlv_area),
-				  pb, pb_len);
-	if (!str_len) {
-		kfree_skb(buf);
-		return tipc_cfg_reply_error_string("link not found");
-	}
-	str_len += 1;	/* for "\0" */
-	skb_put(buf, TLV_SPACE(str_len));
-	TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
-
-	return buf;
-}
-
 static void link_print(struct tipc_link *l_ptr, const char *str)
 {
 	struct tipc_net *tn = net_generic(l_ptr->owner->net, tipc_net_id);
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 34d3f55c4cea..8c8340cf991f 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -217,9 +217,6 @@ int tipc_link_is_active(struct tipc_link *l_ptr);
 void tipc_link_purge_queues(struct tipc_link *l_ptr);
 struct sk_buff *tipc_link_cmd_config(struct net *net, const void *req_tlv_area,
 				     int req_tlv_space, u16 cmd);
-struct sk_buff *tipc_link_cmd_show_stats(struct net *net,
-					 const void *req_tlv_area,
-					 int req_tlv_space);
 struct sk_buff *tipc_link_cmd_reset_stats(struct net *net,
 					  const void *req_tlv_area,
 					  int req_tlv_space);
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 12b0f4424797..899bd94da467 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -34,6 +34,7 @@
 #include "core.h"
 #include "config.h"
 #include "bearer.h"
+#include "link.h"
 #include <net/genetlink.h>
 #include <linux/tipc_config.h>
 
@@ -48,6 +49,7 @@
 
 struct tipc_nl_compat_msg {
 	u16 cmd;
+	int rep_type;
 	int rep_size;
 	int req_type;
 	struct sk_buff *rep;
@@ -95,6 +97,40 @@ static int tipc_add_tlv(struct sk_buff *skb, u16 type, void *data, u16 len)
 	return 0;
 }
 
+static void tipc_tlv_init(struct sk_buff *skb, u16 type)
+{
+	struct tlv_desc *tlv = (struct tlv_desc *)skb->data;
+
+	TLV_SET_LEN(tlv, 0);
+	TLV_SET_TYPE(tlv, type);
+	skb_put(skb, sizeof(struct tlv_desc));
+}
+
+static int tipc_tlv_sprintf(struct sk_buff *skb, const char *fmt, ...)
+{
+	int n;
+	u16 len;
+	u32 rem;
+	char *buf;
+	struct tlv_desc *tlv;
+	va_list args;
+
+	rem = tipc_skb_tailroom(skb);
+
+	tlv = (struct tlv_desc *)skb->data;
+	len = TLV_GET_LEN(tlv);
+	buf = TLV_DATA(tlv) + len;
+
+	va_start(args, fmt);
+	n = vscnprintf(buf, rem, fmt, args);
+	va_end(args);
+
+	TLV_SET_LEN(tlv, n + len);
+	skb_put(skb, n);
+
+	return n;
+}
+
 static struct sk_buff *tipc_tlv_alloc(int size)
 {
 	int hdr_len;
@@ -200,10 +236,16 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
 	int err;
 	struct sk_buff *arg;
 
+	if (msg->req_type && !TLV_CHECK_TYPE(msg->req, msg->req_type))
+		return -EINVAL;
+
 	msg->rep = tipc_tlv_alloc(msg->rep_size);
 	if (!msg->rep)
 		return -ENOMEM;
 
+	if (msg->rep_type)
+		tipc_tlv_init(msg->rep, msg->rep_type);
+
 	arg = nlmsg_new(0, GFP_KERNEL);
 	if (!arg) {
 		kfree_skb(msg->rep);
@@ -356,6 +398,161 @@ static int tipc_nl_compat_bearer_disable(struct sk_buff *skb,
 	return 0;
 }
 
+static inline u32 perc(u32 count, u32 total)
+{
+	return (count * 100 + (total / 2)) / total;
+}
+
+static void __fill_bc_link_stat(struct tipc_nl_compat_msg *msg,
+				struct nlattr *prop[], struct nlattr *stats[])
+{
+	tipc_tlv_sprintf(msg->rep, "  Window:%u packets\n",
+			 nla_get_u32(prop[TIPC_NLA_PROP_WIN]));
+
+	tipc_tlv_sprintf(msg->rep,
+			 "  RX packets:%u fragments:%u/%u bundles:%u/%u\n",
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_INFO]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTED]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLES]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLED]));
+
+	tipc_tlv_sprintf(msg->rep,
+			 "  TX packets:%u fragments:%u/%u bundles:%u/%u\n",
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_INFO]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTED]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLES]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLED]));
+
+	tipc_tlv_sprintf(msg->rep, "  RX naks:%u defs:%u dups:%u\n",
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_NACKS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_DEFERRED]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_DUPLICATES]));
+
+	tipc_tlv_sprintf(msg->rep, "  TX naks:%u acks:%u dups:%u\n",
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_NACKS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_ACKS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RETRANSMITTED]));
+
+	tipc_tlv_sprintf(msg->rep,
+			 "  Congestion link:%u  Send queue max:%u avg:%u",
+			 nla_get_u32(stats[TIPC_NLA_STATS_LINK_CONGS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_MAX_QUEUE]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_AVG_QUEUE]));
+}
+
+static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg,
+					 struct nlattr **attrs)
+{
+	char *name;
+	struct nlattr *link[TIPC_NLA_LINK_MAX + 1];
+	struct nlattr *prop[TIPC_NLA_PROP_MAX + 1];
+	struct nlattr *stats[TIPC_NLA_STATS_MAX + 1];
+
+	nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], NULL);
+
+	nla_parse_nested(prop, TIPC_NLA_PROP_MAX, link[TIPC_NLA_LINK_PROP],
+			 NULL);
+
+	nla_parse_nested(stats, TIPC_NLA_STATS_MAX, link[TIPC_NLA_LINK_STATS],
+			 NULL);
+
+	name = (char *)TLV_DATA(msg->req);
+	if (strcmp(name, nla_data(link[TIPC_NLA_LINK_NAME])) != 0)
+		return 0;
+
+	tipc_tlv_sprintf(msg->rep, "\nLink <%s>\n",
+			 nla_data(link[TIPC_NLA_LINK_NAME]));
+
+	if (link[TIPC_NLA_LINK_BROADCAST]) {
+		__fill_bc_link_stat(msg, prop, stats);
+		return 0;
+	}
+
+	if (link[TIPC_NLA_LINK_ACTIVE])
+		tipc_tlv_sprintf(msg->rep, "  ACTIVE");
+	else if (link[TIPC_NLA_LINK_UP])
+		tipc_tlv_sprintf(msg->rep, "  STANDBY");
+	else
+		tipc_tlv_sprintf(msg->rep, "  DEFUNCT");
+
+	tipc_tlv_sprintf(msg->rep, "  MTU:%u  Priority:%u",
+			 nla_get_u32(link[TIPC_NLA_LINK_MTU]),
+			 nla_get_u32(prop[TIPC_NLA_PROP_PRIO]));
+
+	tipc_tlv_sprintf(msg->rep, "  Tolerance:%u ms  Window:%u packets\n",
+			 nla_get_u32(prop[TIPC_NLA_PROP_TOL]),
+			 nla_get_u32(prop[TIPC_NLA_PROP_WIN]));
+
+	tipc_tlv_sprintf(msg->rep,
+			 "  RX packets:%u fragments:%u/%u bundles:%u/%u\n",
+			 nla_get_u32(link[TIPC_NLA_LINK_RX]) -
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_INFO]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTED]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLES]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLED]));
+
+	tipc_tlv_sprintf(msg->rep,
+			 "  TX packets:%u fragments:%u/%u bundles:%u/%u\n",
+			 nla_get_u32(link[TIPC_NLA_LINK_TX]) -
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_INFO]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTED]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLES]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLED]));
+
+	tipc_tlv_sprintf(msg->rep,
+			 "  TX profile sample:%u packets  average:%u octets\n",
+			 nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_CNT]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_TOT]) /
+			 nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT]));
+
+	tipc_tlv_sprintf(msg->rep,
+			 "  0-64:%u%% -256:%u%% -1024:%u%% -4096:%u%% ",
+			 perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P0]),
+			      nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])),
+			 perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P1]),
+			      nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])),
+			 perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P2]),
+			      nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])),
+			 perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P3]),
+			      nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])));
+
+	tipc_tlv_sprintf(msg->rep, "-16384:%u%% -32768:%u%% -66000:%u%%\n",
+			 perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P4]),
+			      nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])),
+			 perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P5]),
+			      nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])),
+			 perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P6]),
+			      nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])));
+
+	tipc_tlv_sprintf(msg->rep,
+			 "  RX states:%u probes:%u naks:%u defs:%u dups:%u\n",
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_STATES]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_PROBES]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_NACKS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_DEFERRED]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_DUPLICATES]));
+
+	tipc_tlv_sprintf(msg->rep,
+			 "  TX states:%u probes:%u naks:%u acks:%u dups:%u\n",
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_STATES]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_PROBES]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_NACKS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_ACKS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RETRANSMITTED]));
+
+	tipc_tlv_sprintf(msg->rep,
+			 "  Congestion link:%u  Send queue max:%u avg:%u",
+			 nla_get_u32(stats[TIPC_NLA_STATS_LINK_CONGS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_MAX_QUEUE]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_AVG_QUEUE]));
+
+	return 0;
+}
+
 static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 {
 	struct tipc_nl_compat_cmd_dump dump;
@@ -380,6 +577,13 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 		doit.doit = tipc_nl_bearer_disable;
 		doit.transcode = tipc_nl_compat_bearer_disable;
 		return tipc_nl_compat_doit(&doit, msg);
+	case TIPC_CMD_SHOW_LINK_STATS:
+		msg->req_type = TIPC_TLV_LINK_NAME;
+		msg->rep_size = ULTRA_STRING_MAX_LEN;
+		msg->rep_type = TIPC_TLV_ULTRA_STRING;
+		dump.dumpit = tipc_nl_link_dump;
+		dump.format = tipc_nl_compat_link_stat_dump;
+		return tipc_nl_compat_dumpit(&dump, msg);
 	}
 
 	return -EOPNOTSUPP;
@@ -479,6 +683,7 @@ static int tipc_nl_compat_tmp_wrap(struct sk_buff *skb, struct genl_info *info)
 	case TIPC_CMD_GET_BEARER_NAMES:
 	case TIPC_CMD_ENABLE_BEARER:
 	case TIPC_CMD_DISABLE_BEARER:
+	case TIPC_CMD_SHOW_LINK_STATS:
 		return tipc_nl_compat_recv(skb, info);
 	}
 
-- 
cgit v1.2.3


From 357ebdbfca0baa9a8d8d85307393e9ec3406affc Mon Sep 17 00:00:00 2001
From: Richard Alpe <richard.alpe@ericsson.com>
Date: Mon, 9 Feb 2015 09:50:07 +0100
Subject: tipc: convert legacy nl link dump to nl compat

Convert TIPC_CMD_GET_LINKS to compat dumpit and remove global link
counter solely used by the legacy API.

Signed-off-by: Richard Alpe <richard.alpe@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/config.c         |  4 ---
 net/tipc/netlink_compat.c | 23 +++++++++++++++
 net/tipc/node.c           | 73 -----------------------------------------------
 net/tipc/node.h           |  2 --
 4 files changed, 23 insertions(+), 79 deletions(-)

(limited to 'net')

diff --git a/net/tipc/config.c b/net/tipc/config.c
index 67b76ee847f3..1b17f5846e86 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -209,10 +209,6 @@ struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
 		rep_tlv_buf = tipc_node_get_nodes(net, req_tlv_area,
 						  req_tlv_space);
 		break;
-	case TIPC_CMD_GET_LINKS:
-		rep_tlv_buf = tipc_node_get_links(net, req_tlv_area,
-						  req_tlv_space);
-		break;
 	case TIPC_CMD_RESET_LINK_STATS:
 		rep_tlv_buf = tipc_link_cmd_reset_stats(net, req_tlv_area,
 							req_tlv_space);
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 899bd94da467..bff9403899ed 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -553,6 +553,22 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg,
 	return 0;
 }
 
+static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg,
+				    struct nlattr **attrs)
+{
+	struct nlattr *link[TIPC_NLA_LINK_MAX + 1];
+	struct tipc_link_info link_info;
+
+	nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], NULL);
+
+	link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]);
+	link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP]));
+	strcpy(link_info.str, nla_data(link[TIPC_NLA_LINK_NAME]));
+
+	return tipc_add_tlv(msg->rep, TIPC_TLV_LINK_INFO,
+			    &link_info, sizeof(link_info));
+}
+
 static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 {
 	struct tipc_nl_compat_cmd_dump dump;
@@ -584,6 +600,12 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 		dump.dumpit = tipc_nl_link_dump;
 		dump.format = tipc_nl_compat_link_stat_dump;
 		return tipc_nl_compat_dumpit(&dump, msg);
+	case TIPC_CMD_GET_LINKS:
+		msg->req_type = TIPC_TLV_NET_ADDR;
+		msg->rep_size = ULTRA_STRING_MAX_LEN;
+		dump.dumpit = tipc_nl_link_dump;
+		dump.format = tipc_nl_compat_link_dump;
+		return tipc_nl_compat_dumpit(&dump, msg);
 	}
 
 	return -EOPNOTSUPP;
@@ -684,6 +706,7 @@ static int tipc_nl_compat_tmp_wrap(struct sk_buff *skb, struct genl_info *info)
 	case TIPC_CMD_ENABLE_BEARER:
 	case TIPC_CMD_DISABLE_BEARER:
 	case TIPC_CMD_SHOW_LINK_STATS:
+	case TIPC_CMD_GET_LINKS:
 		return tipc_nl_compat_recv(skb, info);
 	}
 
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 995618d6da9d..46b87f77d342 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -319,27 +319,18 @@ int tipc_node_is_up(struct tipc_node *n_ptr)
 
 void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
 {
-	struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id);
-
 	n_ptr->links[l_ptr->bearer_id] = l_ptr;
-	spin_lock_bh(&tn->node_list_lock);
-	tn->num_links++;
-	spin_unlock_bh(&tn->node_list_lock);
 	n_ptr->link_cnt++;
 }
 
 void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
 {
-	struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id);
 	int i;
 
 	for (i = 0; i < MAX_BEARERS; i++) {
 		if (l_ptr != n_ptr->links[i])
 			continue;
 		n_ptr->links[i] = NULL;
-		spin_lock_bh(&tn->node_list_lock);
-		tn->num_links--;
-		spin_unlock_bh(&tn->node_list_lock);
 		n_ptr->link_cnt--;
 	}
 }
@@ -467,70 +458,6 @@ struct sk_buff *tipc_node_get_nodes(struct net *net, const void *req_tlv_area,
 	return buf;
 }
 
-struct sk_buff *tipc_node_get_links(struct net *net, const void *req_tlv_area,
-				    int req_tlv_space)
-{
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	u32 domain;
-	struct sk_buff *buf;
-	struct tipc_node *n_ptr;
-	struct tipc_link_info link_info;
-	u32 payload_size;
-
-	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR))
-		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-
-	domain = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
-	if (!tipc_addr_domain_valid(domain))
-		return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
-						   " (network address)");
-
-	if (!tn->own_addr)
-		return tipc_cfg_reply_none();
-
-	spin_lock_bh(&tn->node_list_lock);
-	/* Get space for all unicast links + broadcast link */
-	payload_size = TLV_SPACE((sizeof(link_info)) * (tn->num_links + 1));
-	if (payload_size > 32768u) {
-		spin_unlock_bh(&tn->node_list_lock);
-		return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
-						   " (too many links)");
-	}
-	spin_unlock_bh(&tn->node_list_lock);
-
-	buf = tipc_cfg_reply_alloc(payload_size);
-	if (!buf)
-		return NULL;
-
-	/* Add TLV for broadcast link */
-	link_info.dest = htonl(tipc_cluster_mask(tn->own_addr));
-	link_info.up = htonl(1);
-	strlcpy(link_info.str, tipc_bclink_name, TIPC_MAX_LINK_NAME);
-	tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info));
-
-	/* Add TLVs for any other links in scope */
-	rcu_read_lock();
-	list_for_each_entry_rcu(n_ptr, &tn->node_list, list) {
-		u32 i;
-
-		if (!tipc_in_scope(domain, n_ptr->addr))
-			continue;
-		tipc_node_lock(n_ptr);
-		for (i = 0; i < MAX_BEARERS; i++) {
-			if (!n_ptr->links[i])
-				continue;
-			link_info.dest = htonl(n_ptr->addr);
-			link_info.up = htonl(tipc_link_is_up(n_ptr->links[i]));
-			strcpy(link_info.str, n_ptr->links[i]->name);
-			tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO,
-					    &link_info, sizeof(link_info));
-		}
-		tipc_node_unlock(n_ptr);
-	}
-	rcu_read_unlock();
-	return buf;
-}
-
 /**
  * tipc_node_get_linkname - get the name of a link
  *
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 20ec13f9bede..59dafee61aa0 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -142,8 +142,6 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
 void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
 int tipc_node_active_links(struct tipc_node *n_ptr);
 int tipc_node_is_up(struct tipc_node *n_ptr);
-struct sk_buff *tipc_node_get_links(struct net *net, const void *req_tlv_area,
-				    int req_tlv_space);
 struct sk_buff *tipc_node_get_nodes(struct net *net, const void *req_tlv_area,
 				    int req_tlv_space);
 int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node,
-- 
cgit v1.2.3


From 37e2d4843f9e2f5aad6bf3be5dad174f2838f375 Mon Sep 17 00:00:00 2001
From: Richard Alpe <richard.alpe@ericsson.com>
Date: Mon, 9 Feb 2015 09:50:08 +0100
Subject: tipc: convert legacy nl link prop set to nl compat

Convert setting of link proprieties to compat doit calls.

Commands converted in this patch:
TIPC_CMD_SET_LINK_TOL
TIPC_CMD_SET_LINK_PRI
TIPC_CMD_SET_LINK_WINDOW

Signed-off-by: Richard Alpe <richard.alpe@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/config.c         |   6 --
 net/tipc/link.c           | 146 +---------------------------------------------
 net/tipc/link.h           |   2 -
 net/tipc/netlink_compat.c |  47 +++++++++++++++
 4 files changed, 48 insertions(+), 153 deletions(-)

(limited to 'net')

diff --git a/net/tipc/config.c b/net/tipc/config.c
index 1b17f5846e86..75a130e493f5 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -226,12 +226,6 @@ struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
 	case TIPC_CMD_SHOW_STATS:
 		rep_tlv_buf = tipc_show_stats();
 		break;
-	case TIPC_CMD_SET_LINK_TOL:
-	case TIPC_CMD_SET_LINK_PRI:
-	case TIPC_CMD_SET_LINK_WINDOW:
-		rep_tlv_buf = tipc_link_cmd_config(net, req_tlv_area,
-						   req_tlv_space, cmd);
-		break;
 	case TIPC_CMD_SET_NODE_ADDR:
 		rep_tlv_buf = cfg_set_own_addr(net);
 		break;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 2622fb99344a..ec136b5cc339 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1958,150 +1958,6 @@ static struct tipc_node *tipc_link_find_owner(struct net *net,
 	return found_node;
 }
 
-/**
- * link_value_is_valid -- validate proposed link tolerance/priority/window
- *
- * @cmd: value type (TIPC_CMD_SET_LINK_*)
- * @new_value: the new value
- *
- * Returns 1 if value is within range, 0 if not.
- */
-static int link_value_is_valid(u16 cmd, u32 new_value)
-{
-	switch (cmd) {
-	case TIPC_CMD_SET_LINK_TOL:
-		return (new_value >= TIPC_MIN_LINK_TOL) &&
-			(new_value <= TIPC_MAX_LINK_TOL);
-	case TIPC_CMD_SET_LINK_PRI:
-		return (new_value <= TIPC_MAX_LINK_PRI);
-	case TIPC_CMD_SET_LINK_WINDOW:
-		return (new_value >= TIPC_MIN_LINK_WIN) &&
-			(new_value <= TIPC_MAX_LINK_WIN);
-	}
-	return 0;
-}
-
-/**
- * link_cmd_set_value - change priority/tolerance/window for link/bearer/media
- * @net: the applicable net namespace
- * @name: ptr to link, bearer, or media name
- * @new_value: new value of link, bearer, or media setting
- * @cmd: which link, bearer, or media attribute to set (TIPC_CMD_SET_LINK_*)
- *
- * Caller must hold RTNL lock to ensure link/bearer/media is not deleted.
- *
- * Returns 0 if value updated and negative value on error.
- */
-static int link_cmd_set_value(struct net *net, const char *name, u32 new_value,
-			      u16 cmd)
-{
-	struct tipc_node *node;
-	struct tipc_link *l_ptr;
-	struct tipc_bearer *b_ptr;
-	struct tipc_media *m_ptr;
-	int bearer_id;
-	int res = 0;
-
-	node = tipc_link_find_owner(net, name, &bearer_id);
-	if (node) {
-		tipc_node_lock(node);
-		l_ptr = node->links[bearer_id];
-
-		if (l_ptr) {
-			switch (cmd) {
-			case TIPC_CMD_SET_LINK_TOL:
-				link_set_supervision_props(l_ptr, new_value);
-				tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0,
-						     new_value, 0, 0);
-				break;
-			case TIPC_CMD_SET_LINK_PRI:
-				l_ptr->priority = new_value;
-				tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0,
-						     0, new_value, 0);
-				break;
-			case TIPC_CMD_SET_LINK_WINDOW:
-				tipc_link_set_queue_limits(l_ptr, new_value);
-				break;
-			default:
-				res = -EINVAL;
-				break;
-			}
-		}
-		tipc_node_unlock(node);
-		return res;
-	}
-
-	b_ptr = tipc_bearer_find(net, name);
-	if (b_ptr) {
-		switch (cmd) {
-		case TIPC_CMD_SET_LINK_TOL:
-			b_ptr->tolerance = new_value;
-			break;
-		case TIPC_CMD_SET_LINK_PRI:
-			b_ptr->priority = new_value;
-			break;
-		case TIPC_CMD_SET_LINK_WINDOW:
-			b_ptr->window = new_value;
-			break;
-		default:
-			res = -EINVAL;
-			break;
-		}
-		return res;
-	}
-
-	m_ptr = tipc_media_find(name);
-	if (!m_ptr)
-		return -ENODEV;
-	switch (cmd) {
-	case TIPC_CMD_SET_LINK_TOL:
-		m_ptr->tolerance = new_value;
-		break;
-	case TIPC_CMD_SET_LINK_PRI:
-		m_ptr->priority = new_value;
-		break;
-	case TIPC_CMD_SET_LINK_WINDOW:
-		m_ptr->window = new_value;
-		break;
-	default:
-		res = -EINVAL;
-		break;
-	}
-	return res;
-}
-
-struct sk_buff *tipc_link_cmd_config(struct net *net, const void *req_tlv_area,
-				     int req_tlv_space, u16 cmd)
-{
-	struct tipc_link_config *args;
-	u32 new_value;
-	int res;
-
-	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_LINK_CONFIG))
-		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-
-	args = (struct tipc_link_config *)TLV_DATA(req_tlv_area);
-	new_value = ntohl(args->value);
-
-	if (!link_value_is_valid(cmd, new_value))
-		return tipc_cfg_reply_error_string(
-			"cannot change, value invalid");
-
-	if (!strcmp(args->name, tipc_bclink_name)) {
-		if ((cmd == TIPC_CMD_SET_LINK_WINDOW) &&
-		    (tipc_bclink_set_queue_limits(net, new_value) == 0))
-			return tipc_cfg_reply_none();
-		return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
-						   " (cannot change setting on broadcast link)");
-	}
-
-	res = link_cmd_set_value(net, args->name, new_value, cmd);
-	if (res)
-		return tipc_cfg_reply_error_string("cannot change link setting");
-
-	return tipc_cfg_reply_none();
-}
-
 /**
  * link_reset_statistics - reset link statistics
  * @l_ptr: pointer to link
@@ -2216,7 +2072,7 @@ int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info)
 	struct tipc_link *link;
 	struct tipc_node *node;
 	struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1];
-	struct net *net = genl_info_net(info);
+	struct net *net = sock_net(skb->sk);
 
 	if (!info->attrs[TIPC_NLA_LINK])
 		return -EINVAL;
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 8c8340cf991f..6eb9d606063b 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -215,8 +215,6 @@ void tipc_link_reset_fragments(struct tipc_link *l_ptr);
 int tipc_link_is_up(struct tipc_link *l_ptr);
 int tipc_link_is_active(struct tipc_link *l_ptr);
 void tipc_link_purge_queues(struct tipc_link *l_ptr);
-struct sk_buff *tipc_link_cmd_config(struct net *net, const void *req_tlv_area,
-				     int req_tlv_space, u16 cmd);
 struct sk_buff *tipc_link_cmd_reset_stats(struct net *net,
 					  const void *req_tlv_area,
 					  int req_tlv_space);
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index bff9403899ed..056532b41e7e 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -569,6 +569,43 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg,
 			    &link_info, sizeof(link_info));
 }
 
+static int tipc_nl_compat_link_set(struct sk_buff *skb,
+				   struct tipc_nl_compat_msg *msg)
+{
+	struct nlattr *link;
+	struct nlattr *prop;
+	struct tipc_link_config *lc;
+
+	lc = (struct tipc_link_config *)TLV_DATA(msg->req);
+
+	link = nla_nest_start(skb, TIPC_NLA_LINK);
+	if (!link)
+		return -EMSGSIZE;
+
+	if (nla_put_string(skb, TIPC_NLA_LINK_NAME, lc->name))
+		return -EMSGSIZE;
+
+	prop = nla_nest_start(skb, TIPC_NLA_LINK_PROP);
+	if (!prop)
+		return -EMSGSIZE;
+
+	if (msg->cmd == TIPC_CMD_SET_LINK_PRI) {
+		if (nla_put_u32(skb, TIPC_NLA_PROP_PRIO, ntohl(lc->value)))
+			return -EMSGSIZE;
+	} else if (msg->cmd == TIPC_CMD_SET_LINK_TOL) {
+		if (nla_put_u32(skb, TIPC_NLA_PROP_TOL, ntohl(lc->value)))
+			return -EMSGSIZE;
+	} else if (msg->cmd == TIPC_CMD_SET_LINK_WINDOW) {
+		if (nla_put_u32(skb, TIPC_NLA_PROP_WIN, ntohl(lc->value)))
+			return -EMSGSIZE;
+	}
+
+	nla_nest_end(skb, prop);
+	nla_nest_end(skb, link);
+
+	return 0;
+}
+
 static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 {
 	struct tipc_nl_compat_cmd_dump dump;
@@ -606,6 +643,13 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 		dump.dumpit = tipc_nl_link_dump;
 		dump.format = tipc_nl_compat_link_dump;
 		return tipc_nl_compat_dumpit(&dump, msg);
+	case TIPC_CMD_SET_LINK_TOL:
+	case TIPC_CMD_SET_LINK_PRI:
+	case TIPC_CMD_SET_LINK_WINDOW:
+		msg->req_type =  TIPC_TLV_LINK_CONFIG;
+		doit.doit = tipc_nl_link_set;
+		doit.transcode = tipc_nl_compat_link_set;
+		return tipc_nl_compat_doit(&doit, msg);
 	}
 
 	return -EOPNOTSUPP;
@@ -707,6 +751,9 @@ static int tipc_nl_compat_tmp_wrap(struct sk_buff *skb, struct genl_info *info)
 	case TIPC_CMD_DISABLE_BEARER:
 	case TIPC_CMD_SHOW_LINK_STATS:
 	case TIPC_CMD_GET_LINKS:
+	case TIPC_CMD_SET_LINK_TOL:
+	case TIPC_CMD_SET_LINK_PRI:
+	case TIPC_CMD_SET_LINK_WINDOW:
 		return tipc_nl_compat_recv(skb, info);
 	}
 
-- 
cgit v1.2.3


From 1817877b3cd7b4dc73e4a1514d5f48eaa3989ec9 Mon Sep 17 00:00:00 2001
From: Richard Alpe <richard.alpe@ericsson.com>
Date: Mon, 9 Feb 2015 09:50:09 +0100
Subject: tipc: convert legacy nl link stat reset to nl compat

Convert TIPC_CMD_RESET_LINK_STATS to compat doit.

Signed-off-by: Richard Alpe <richard.alpe@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/config.c         |  4 ----
 net/tipc/link.c           | 35 +----------------------------------
 net/tipc/link.h           |  3 ---
 net/tipc/netlink_compat.c | 26 ++++++++++++++++++++++++++
 4 files changed, 27 insertions(+), 41 deletions(-)

(limited to 'net')

diff --git a/net/tipc/config.c b/net/tipc/config.c
index 75a130e493f5..11c16d191fa2 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -209,10 +209,6 @@ struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
 		rep_tlv_buf = tipc_node_get_nodes(net, req_tlv_area,
 						  req_tlv_space);
 		break;
-	case TIPC_CMD_RESET_LINK_STATS:
-		rep_tlv_buf = tipc_link_cmd_reset_stats(net, req_tlv_area,
-							req_tlv_space);
-		break;
 	case TIPC_CMD_SHOW_NAME_TABLE:
 		rep_tlv_buf = tipc_nametbl_get(net, req_tlv_area,
 					       req_tlv_space);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index ec136b5cc339..dfe6f4d0b402 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1969,39 +1969,6 @@ static void link_reset_statistics(struct tipc_link *l_ptr)
 	l_ptr->stats.recv_info = l_ptr->next_in_no;
 }
 
-struct sk_buff *tipc_link_cmd_reset_stats(struct net *net,
-					  const void *req_tlv_area,
-					  int req_tlv_space)
-{
-	char *link_name;
-	struct tipc_link *l_ptr;
-	struct tipc_node *node;
-	unsigned int bearer_id;
-
-	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_LINK_NAME))
-		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-
-	link_name = (char *)TLV_DATA(req_tlv_area);
-	if (!strcmp(link_name, tipc_bclink_name)) {
-		if (tipc_bclink_reset_stats(net))
-			return tipc_cfg_reply_error_string("link not found");
-		return tipc_cfg_reply_none();
-	}
-	node = tipc_link_find_owner(net, link_name, &bearer_id);
-	if (!node)
-		return tipc_cfg_reply_error_string("link not found");
-
-	tipc_node_lock(node);
-	l_ptr = node->links[bearer_id];
-	if (!l_ptr) {
-		tipc_node_unlock(node);
-		return tipc_cfg_reply_error_string("link not found");
-	}
-	link_reset_statistics(l_ptr);
-	tipc_node_unlock(node);
-	return tipc_cfg_reply_none();
-}
-
 static void link_print(struct tipc_link *l_ptr, const char *str)
 {
 	struct tipc_net *tn = net_generic(l_ptr->owner->net, tipc_net_id);
@@ -2424,7 +2391,7 @@ int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info)
 	struct tipc_link *link;
 	struct tipc_node *node;
 	struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1];
-	struct net *net = genl_info_net(info);
+	struct net *net = sock_net(skb->sk);
 
 	if (!info->attrs[TIPC_NLA_LINK])
 		return -EINVAL;
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 6eb9d606063b..7aeb52092bf3 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -215,9 +215,6 @@ void tipc_link_reset_fragments(struct tipc_link *l_ptr);
 int tipc_link_is_up(struct tipc_link *l_ptr);
 int tipc_link_is_active(struct tipc_link *l_ptr);
 void tipc_link_purge_queues(struct tipc_link *l_ptr);
-struct sk_buff *tipc_link_cmd_reset_stats(struct net *net,
-					  const void *req_tlv_area,
-					  int req_tlv_space);
 void tipc_link_reset_all(struct tipc_node *node);
 void tipc_link_reset(struct tipc_link *l_ptr);
 void tipc_link_reset_list(struct net *net, unsigned int bearer_id);
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 056532b41e7e..02461233b6d8 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -606,6 +606,26 @@ static int tipc_nl_compat_link_set(struct sk_buff *skb,
 	return 0;
 }
 
+static int tipc_nl_compat_link_reset_stats(struct sk_buff *skb,
+					   struct tipc_nl_compat_msg *msg)
+{
+	char *name;
+	struct nlattr *link;
+
+	name = (char *)TLV_DATA(msg->req);
+
+	link = nla_nest_start(skb, TIPC_NLA_LINK);
+	if (!link)
+		return -EMSGSIZE;
+
+	if (nla_put_string(skb, TIPC_NLA_LINK_NAME, name))
+		return -EMSGSIZE;
+
+	nla_nest_end(skb, link);
+
+	return 0;
+}
+
 static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 {
 	struct tipc_nl_compat_cmd_dump dump;
@@ -650,6 +670,11 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 		doit.doit = tipc_nl_link_set;
 		doit.transcode = tipc_nl_compat_link_set;
 		return tipc_nl_compat_doit(&doit, msg);
+	case TIPC_CMD_RESET_LINK_STATS:
+		msg->req_type = TIPC_TLV_LINK_NAME;
+		doit.doit = tipc_nl_link_reset_stats;
+		doit.transcode = tipc_nl_compat_link_reset_stats;
+		return tipc_nl_compat_doit(&doit, msg);
 	}
 
 	return -EOPNOTSUPP;
@@ -754,6 +779,7 @@ static int tipc_nl_compat_tmp_wrap(struct sk_buff *skb, struct genl_info *info)
 	case TIPC_CMD_SET_LINK_TOL:
 	case TIPC_CMD_SET_LINK_PRI:
 	case TIPC_CMD_SET_LINK_WINDOW:
+	case TIPC_CMD_RESET_LINK_STATS:
 		return tipc_nl_compat_recv(skb, info);
 	}
 
-- 
cgit v1.2.3


From 44a8ae94fd5525aa06a8c71cb52efbc418fb8b7c Mon Sep 17 00:00:00 2001
From: Richard Alpe <richard.alpe@ericsson.com>
Date: Mon, 9 Feb 2015 09:50:10 +0100
Subject: tipc: convert legacy nl name table dump to nl compat

Add functionality for printing a dump header and convert
TIPC_CMD_SHOW_NAME_TABLE to compat dumpit.

Signed-off-by: Richard Alpe <richard.alpe@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/config.c         |   4 -
 net/tipc/name_table.c     | 184 ----------------------------------------------
 net/tipc/name_table.h     |   2 -
 net/tipc/netlink_compat.c | 101 +++++++++++++++++++++++++
 4 files changed, 101 insertions(+), 190 deletions(-)

(limited to 'net')

diff --git a/net/tipc/config.c b/net/tipc/config.c
index 11c16d191fa2..7b053fcc4b87 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -209,10 +209,6 @@ struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
 		rep_tlv_buf = tipc_node_get_nodes(net, req_tlv_area,
 						  req_tlv_space);
 		break;
-	case TIPC_CMD_SHOW_NAME_TABLE:
-		rep_tlv_buf = tipc_nametbl_get(net, req_tlv_area,
-					       req_tlv_space);
-		break;
 	case TIPC_CMD_GET_MEDIA_NAMES:
 		rep_tlv_buf = tipc_media_get_names();
 		break;
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 2251912264a2..c8eaa2afe875 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -773,190 +773,6 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s)
 	spin_unlock_bh(&tn->nametbl_lock);
 }
 
-/**
- * subseq_list - print specified sub-sequence contents into the given buffer
- */
-static int subseq_list(struct sub_seq *sseq, char *buf, int len, u32 depth,
-		       u32 index)
-{
-	char portIdStr[27];
-	const char *scope_str[] = {"", " zone", " cluster", " node"};
-	struct publication *publ;
-	struct name_info *info;
-	int ret;
-
-	ret = tipc_snprintf(buf, len, "%-10u %-10u ", sseq->lower, sseq->upper);
-
-	if (depth == 2) {
-		ret += tipc_snprintf(buf - ret, len + ret, "\n");
-		return ret;
-	}
-
-	info = sseq->info;
-
-	list_for_each_entry(publ, &info->zone_list, zone_list) {
-		sprintf(portIdStr, "<%u.%u.%u:%u>",
-			 tipc_zone(publ->node), tipc_cluster(publ->node),
-			 tipc_node(publ->node), publ->ref);
-		ret += tipc_snprintf(buf + ret, len - ret, "%-26s ", portIdStr);
-		if (depth > 3) {
-			ret += tipc_snprintf(buf + ret, len - ret, "%-10u %s",
-					     publ->key, scope_str[publ->scope]);
-		}
-		if (!list_is_last(&publ->zone_list, &info->zone_list))
-			ret += tipc_snprintf(buf + ret, len - ret,
-					     "\n%33s", " ");
-	}
-
-	ret += tipc_snprintf(buf + ret, len - ret, "\n");
-	return ret;
-}
-
-/**
- * nameseq_list - print specified name sequence contents into the given buffer
- */
-static int nameseq_list(struct name_seq *seq, char *buf, int len, u32 depth,
-			u32 type, u32 lowbound, u32 upbound, u32 index)
-{
-	struct sub_seq *sseq;
-	char typearea[11];
-	int ret = 0;
-
-	if (seq->first_free == 0)
-		return 0;
-
-	sprintf(typearea, "%-10u", seq->type);
-
-	if (depth == 1) {
-		ret += tipc_snprintf(buf, len, "%s\n", typearea);
-		return ret;
-	}
-
-	for (sseq = seq->sseqs; sseq != &seq->sseqs[seq->first_free]; sseq++) {
-		if ((lowbound <= sseq->upper) && (upbound >= sseq->lower)) {
-			ret += tipc_snprintf(buf + ret, len - ret, "%s ",
-					    typearea);
-			spin_lock_bh(&seq->lock);
-			ret += subseq_list(sseq, buf + ret, len - ret,
-					  depth, index);
-			spin_unlock_bh(&seq->lock);
-			sprintf(typearea, "%10s", " ");
-		}
-	}
-	return ret;
-}
-
-/**
- * nametbl_header - print name table header into the given buffer
- */
-static int nametbl_header(char *buf, int len, u32 depth)
-{
-	const char *header[] = {
-		"Type       ",
-		"Lower      Upper      ",
-		"Port Identity              ",
-		"Publication Scope"
-	};
-
-	int i;
-	int ret = 0;
-
-	if (depth > 4)
-		depth = 4;
-	for (i = 0; i < depth; i++)
-		ret += tipc_snprintf(buf + ret, len - ret, header[i]);
-	ret += tipc_snprintf(buf + ret, len - ret, "\n");
-	return ret;
-}
-
-/**
- * nametbl_list - print specified name table contents into the given buffer
- */
-static int nametbl_list(struct net *net, char *buf, int len, u32 depth_info,
-			u32 type, u32 lowbound, u32 upbound)
-{
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct hlist_head *seq_head;
-	struct name_seq *seq;
-	int all_types;
-	int ret = 0;
-	u32 depth;
-	u32 i;
-
-	all_types = (depth_info & TIPC_NTQ_ALLTYPES);
-	depth = (depth_info & ~TIPC_NTQ_ALLTYPES);
-
-	if (depth == 0)
-		return 0;
-
-	if (all_types) {
-		/* display all entries in name table to specified depth */
-		ret += nametbl_header(buf, len, depth);
-		lowbound = 0;
-		upbound = ~0;
-		for (i = 0; i < TIPC_NAMETBL_SIZE; i++) {
-			seq_head = &tn->nametbl->seq_hlist[i];
-			hlist_for_each_entry_rcu(seq, seq_head, ns_list) {
-				ret += nameseq_list(seq, buf + ret, len - ret,
-						   depth, seq->type,
-						   lowbound, upbound, i);
-			}
-		}
-	} else {
-		/* display only the sequence that matches the specified type */
-		if (upbound < lowbound) {
-			ret += tipc_snprintf(buf + ret, len - ret,
-					"invalid name sequence specified\n");
-			return ret;
-		}
-		ret += nametbl_header(buf + ret, len - ret, depth);
-		i = hash(type);
-		seq_head = &tn->nametbl->seq_hlist[i];
-		hlist_for_each_entry_rcu(seq, seq_head, ns_list) {
-			if (seq->type == type) {
-				ret += nameseq_list(seq, buf + ret, len - ret,
-						   depth, type,
-						   lowbound, upbound, i);
-				break;
-			}
-		}
-	}
-	return ret;
-}
-
-struct sk_buff *tipc_nametbl_get(struct net *net, const void *req_tlv_area,
-				 int req_tlv_space)
-{
-	struct sk_buff *buf;
-	struct tipc_name_table_query *argv;
-	struct tlv_desc *rep_tlv;
-	char *pb;
-	int pb_len;
-	int str_len;
-
-	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NAME_TBL_QUERY))
-		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-
-	buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN));
-	if (!buf)
-		return NULL;
-
-	rep_tlv = (struct tlv_desc *)buf->data;
-	pb = TLV_DATA(rep_tlv);
-	pb_len = ULTRA_STRING_MAX_LEN;
-	argv = (struct tipc_name_table_query *)TLV_DATA(req_tlv_area);
-	rcu_read_lock();
-	str_len = nametbl_list(net, pb, pb_len, ntohl(argv->depth),
-			       ntohl(argv->type),
-			       ntohl(argv->lowbound), ntohl(argv->upbound));
-	rcu_read_unlock();
-	str_len += 1;	/* for "\0" */
-	skb_put(buf, TLV_SPACE(str_len));
-	TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
-
-	return buf;
-}
-
 int tipc_nametbl_init(struct net *net)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 0304ddc6b101..1524a73830f7 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -97,8 +97,6 @@ struct name_table {
 
 int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
 
-struct sk_buff *tipc_nametbl_get(struct net *net, const void *req_tlv_area,
-				 int req_tlv_space);
 u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node);
 int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
 			      u32 limit, struct tipc_plist *dports);
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 02461233b6d8..40c24ea31231 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -35,6 +35,7 @@
 #include "config.h"
 #include "bearer.h"
 #include "link.h"
+#include "name_table.h"
 #include <net/genetlink.h>
 #include <linux/tipc_config.h>
 
@@ -58,6 +59,7 @@ struct tipc_nl_compat_msg {
 };
 
 struct tipc_nl_compat_cmd_dump {
+	int (*header)(struct tipc_nl_compat_msg *);
 	int (*dumpit)(struct sk_buff *, struct netlink_callback *);
 	int (*format)(struct tipc_nl_compat_msg *msg, struct nlattr **attrs);
 };
@@ -246,6 +248,9 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
 	if (msg->rep_type)
 		tipc_tlv_init(msg->rep, msg->rep_type);
 
+	if (cmd->header)
+		(*cmd->header)(msg);
+
 	arg = nlmsg_new(0, GFP_KERNEL);
 	if (!arg) {
 		kfree_skb(msg->rep);
@@ -626,6 +631,93 @@ static int tipc_nl_compat_link_reset_stats(struct sk_buff *skb,
 	return 0;
 }
 
+static int tipc_nl_compat_name_table_dump_header(struct tipc_nl_compat_msg *msg)
+{
+	int i;
+	u32 depth;
+	struct tipc_name_table_query *ntq;
+	static const char * const header[] = {
+		"Type       ",
+		"Lower      Upper      ",
+		"Port Identity              ",
+		"Publication Scope"
+	};
+
+	ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req);
+
+	depth = ntohl(ntq->depth);
+
+	if (depth > 4)
+		depth = 4;
+	for (i = 0; i < depth; i++)
+		tipc_tlv_sprintf(msg->rep, header[i]);
+	tipc_tlv_sprintf(msg->rep, "\n");
+
+	return 0;
+}
+
+static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg,
+					  struct nlattr **attrs)
+{
+	char port_str[27];
+	struct tipc_name_table_query *ntq;
+	struct nlattr *nt[TIPC_NLA_NAME_TABLE_MAX + 1];
+	struct nlattr *publ[TIPC_NLA_PUBL_MAX + 1];
+	u32 node, depth, type, lowbound, upbound;
+	static const char * const scope_str[] = {"", " zone", " cluster",
+						 " node"};
+
+	nla_parse_nested(nt, TIPC_NLA_NAME_TABLE_MAX,
+			 attrs[TIPC_NLA_NAME_TABLE], NULL);
+
+	nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, nt[TIPC_NLA_NAME_TABLE_PUBL],
+			 NULL);
+
+	ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req);
+
+	depth = ntohl(ntq->depth);
+	type = ntohl(ntq->type);
+	lowbound = ntohl(ntq->lowbound);
+	upbound = ntohl(ntq->upbound);
+
+	if (!(depth & TIPC_NTQ_ALLTYPES) &&
+	    (type != nla_get_u32(publ[TIPC_NLA_PUBL_TYPE])))
+		return 0;
+	if (lowbound && (lowbound > nla_get_u32(publ[TIPC_NLA_PUBL_UPPER])))
+		return 0;
+	if (upbound && (upbound < nla_get_u32(publ[TIPC_NLA_PUBL_LOWER])))
+		return 0;
+
+	tipc_tlv_sprintf(msg->rep, "%-10u ",
+			 nla_get_u32(publ[TIPC_NLA_PUBL_TYPE]));
+
+	if (depth == 1)
+		goto out;
+
+	tipc_tlv_sprintf(msg->rep, "%-10u %-10u ",
+			 nla_get_u32(publ[TIPC_NLA_PUBL_LOWER]),
+			 nla_get_u32(publ[TIPC_NLA_PUBL_UPPER]));
+
+	if (depth == 2)
+		goto out;
+
+	node = nla_get_u32(publ[TIPC_NLA_PUBL_NODE]);
+	sprintf(port_str, "<%u.%u.%u:%u>", tipc_zone(node), tipc_cluster(node),
+		tipc_node(node), nla_get_u32(publ[TIPC_NLA_PUBL_REF]));
+	tipc_tlv_sprintf(msg->rep, "%-26s ", port_str);
+
+	if (depth == 3)
+		goto out;
+
+	tipc_tlv_sprintf(msg->rep, "%-10u %s",
+			 nla_get_u32(publ[TIPC_NLA_PUBL_REF]),
+			 scope_str[nla_get_u32(publ[TIPC_NLA_PUBL_SCOPE])]);
+out:
+	tipc_tlv_sprintf(msg->rep, "\n");
+
+	return 0;
+}
+
 static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 {
 	struct tipc_nl_compat_cmd_dump dump;
@@ -675,6 +767,14 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 		doit.doit = tipc_nl_link_reset_stats;
 		doit.transcode = tipc_nl_compat_link_reset_stats;
 		return tipc_nl_compat_doit(&doit, msg);
+	case TIPC_CMD_SHOW_NAME_TABLE:
+		msg->req_type = TIPC_TLV_NAME_TBL_QUERY;
+		msg->rep_size = ULTRA_STRING_MAX_LEN;
+		msg->rep_type = TIPC_TLV_ULTRA_STRING;
+		dump.header = tipc_nl_compat_name_table_dump_header;
+		dump.dumpit = tipc_nl_name_table_dump;
+		dump.format = tipc_nl_compat_name_table_dump;
+		return tipc_nl_compat_dumpit(&dump, msg);
 	}
 
 	return -EOPNOTSUPP;
@@ -780,6 +880,7 @@ static int tipc_nl_compat_tmp_wrap(struct sk_buff *skb, struct genl_info *info)
 	case TIPC_CMD_SET_LINK_PRI:
 	case TIPC_CMD_SET_LINK_WINDOW:
 	case TIPC_CMD_RESET_LINK_STATS:
+	case TIPC_CMD_SHOW_NAME_TABLE:
 		return tipc_nl_compat_recv(skb, info);
 	}
 
-- 
cgit v1.2.3


From 487d2a3a1326d339ce273ffbcd03247f2b7b052e Mon Sep 17 00:00:00 2001
From: Richard Alpe <richard.alpe@ericsson.com>
Date: Mon, 9 Feb 2015 09:50:11 +0100
Subject: tipc: convert legacy nl socket dump to nl compat

Convert socket (port) listing to compat dumpit call. If a socket
(port) has publications a second dumpit call is issued to collect them
and format then into the legacy buffer before continuing to process
the sockets (ports).

Command converted in this patch:
TIPC_CMD_SHOW_PORTS

Signed-off-by: Richard Alpe <richard.alpe@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/config.c         |   3 --
 net/tipc/netlink_compat.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++
 net/tipc/socket.c         |  85 -----------------------------------
 net/tipc/socket.h         |   1 -
 4 files changed, 111 insertions(+), 89 deletions(-)

(limited to 'net')

diff --git a/net/tipc/config.c b/net/tipc/config.c
index 7b053fcc4b87..6e4c215879c8 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -212,9 +212,6 @@ struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
 	case TIPC_CMD_GET_MEDIA_NAMES:
 		rep_tlv_buf = tipc_media_get_names();
 		break;
-	case TIPC_CMD_SHOW_PORTS:
-		rep_tlv_buf = tipc_sk_socks_show(net);
-		break;
 	case TIPC_CMD_SHOW_STATS:
 		rep_tlv_buf = tipc_show_stats();
 		break;
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 40c24ea31231..48e15a4a36d8 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -36,6 +36,7 @@
 #include "bearer.h"
 #include "link.h"
 #include "name_table.h"
+#include "socket.h"
 #include <net/genetlink.h>
 #include <linux/tipc_config.h>
 
@@ -718,6 +719,109 @@ out:
 	return 0;
 }
 
+static int __tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg,
+				      struct nlattr **attrs)
+{
+	u32 type, lower, upper;
+	struct nlattr *publ[TIPC_NLA_PUBL_MAX + 1];
+
+	nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, attrs[TIPC_NLA_PUBL], NULL);
+
+	type = nla_get_u32(publ[TIPC_NLA_PUBL_TYPE]);
+	lower = nla_get_u32(publ[TIPC_NLA_PUBL_LOWER]);
+	upper = nla_get_u32(publ[TIPC_NLA_PUBL_UPPER]);
+
+	if (lower == upper)
+		tipc_tlv_sprintf(msg->rep, " {%u,%u}", type, lower);
+	else
+		tipc_tlv_sprintf(msg->rep, " {%u,%u,%u}", type, lower, upper);
+
+	return 0;
+}
+
+static int tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, u32 sock)
+{
+	int err;
+	void *hdr;
+	struct nlattr *nest;
+	struct sk_buff *args;
+	struct tipc_nl_compat_cmd_dump dump;
+
+	args = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!args)
+		return -ENOMEM;
+
+	hdr = genlmsg_put(args, 0, 0, &tipc_genl_family, NLM_F_MULTI,
+			  TIPC_NL_PUBL_GET);
+
+	nest = nla_nest_start(args, TIPC_NLA_SOCK);
+	if (!nest) {
+		kfree_skb(args);
+		return -EMSGSIZE;
+	}
+
+	if (nla_put_u32(args, TIPC_NLA_SOCK_REF, sock)) {
+		kfree_skb(args);
+		return -EMSGSIZE;
+	}
+
+	nla_nest_end(args, nest);
+	genlmsg_end(args, hdr);
+
+	dump.dumpit = tipc_nl_publ_dump;
+	dump.format = __tipc_nl_compat_publ_dump;
+
+	err = __tipc_nl_compat_dumpit(&dump, msg, args);
+
+	kfree_skb(args);
+
+	return err;
+}
+
+static int tipc_nl_compat_sk_dump(struct tipc_nl_compat_msg *msg,
+				  struct nlattr **attrs)
+{
+	int err;
+	u32 sock_ref;
+	struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1];
+
+	nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, attrs[TIPC_NLA_SOCK], NULL);
+
+	sock_ref = nla_get_u32(sock[TIPC_NLA_SOCK_REF]);
+	tipc_tlv_sprintf(msg->rep, "%u:", sock_ref);
+
+	if (sock[TIPC_NLA_SOCK_CON]) {
+		u32 node;
+		struct nlattr *con[TIPC_NLA_CON_MAX + 1];
+
+		nla_parse_nested(con, TIPC_NLA_CON_MAX, sock[TIPC_NLA_SOCK_CON],
+				 NULL);
+
+		node = nla_get_u32(con[TIPC_NLA_CON_NODE]);
+		tipc_tlv_sprintf(msg->rep, "  connected to <%u.%u.%u:%u>",
+				 tipc_zone(node),
+				 tipc_cluster(node),
+				 tipc_node(node),
+				 nla_get_u32(con[TIPC_NLA_CON_SOCK]));
+
+		if (con[TIPC_NLA_CON_FLAG])
+			tipc_tlv_sprintf(msg->rep, " via {%u,%u}\n",
+					 nla_get_u32(con[TIPC_NLA_CON_TYPE]),
+					 nla_get_u32(con[TIPC_NLA_CON_INST]));
+		else
+			tipc_tlv_sprintf(msg->rep, "\n");
+	} else if (sock[TIPC_NLA_SOCK_HAS_PUBL]) {
+		tipc_tlv_sprintf(msg->rep, " bound to");
+
+		err = tipc_nl_compat_publ_dump(msg, sock_ref);
+		if (err)
+			return err;
+	}
+	tipc_tlv_sprintf(msg->rep, "\n");
+
+	return 0;
+}
+
 static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 {
 	struct tipc_nl_compat_cmd_dump dump;
@@ -775,6 +879,12 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 		dump.dumpit = tipc_nl_name_table_dump;
 		dump.format = tipc_nl_compat_name_table_dump;
 		return tipc_nl_compat_dumpit(&dump, msg);
+	case TIPC_CMD_SHOW_PORTS:
+		msg->rep_size = ULTRA_STRING_MAX_LEN;
+		msg->rep_type = TIPC_TLV_ULTRA_STRING;
+		dump.dumpit = tipc_nl_sk_dump;
+		dump.format = tipc_nl_compat_sk_dump;
+		return tipc_nl_compat_dumpit(&dump, msg);
 	}
 
 	return -EOPNOTSUPP;
@@ -881,6 +991,7 @@ static int tipc_nl_compat_tmp_wrap(struct sk_buff *skb, struct genl_info *info)
 	case TIPC_CMD_SET_LINK_WINDOW:
 	case TIPC_CMD_RESET_LINK_STATS:
 	case TIPC_CMD_SHOW_NAME_TABLE:
+	case TIPC_CMD_SHOW_PORTS:
 		return tipc_nl_compat_recv(skb, info);
 	}
 
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index d76c171f7b7e..e77d738bb771 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2281,91 +2281,6 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
 	return rc;
 }
 
-static int tipc_sk_show(struct tipc_sock *tsk, char *buf,
-			int len, int full_id)
-{
-	struct net *net = sock_net(&tsk->sk);
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct publication *publ;
-	int ret;
-
-	if (full_id)
-		ret = tipc_snprintf(buf, len, "<%u.%u.%u:%u>:",
-				    tipc_zone(tn->own_addr),
-				    tipc_cluster(tn->own_addr),
-				    tipc_node(tn->own_addr), tsk->portid);
-	else
-		ret = tipc_snprintf(buf, len, "%-10u:", tsk->portid);
-
-	if (tsk->connected) {
-		u32 dport = tsk_peer_port(tsk);
-		u32 destnode = tsk_peer_node(tsk);
-
-		ret += tipc_snprintf(buf + ret, len - ret,
-				     " connected to <%u.%u.%u:%u>",
-				     tipc_zone(destnode),
-				     tipc_cluster(destnode),
-				     tipc_node(destnode), dport);
-		if (tsk->conn_type != 0)
-			ret += tipc_snprintf(buf + ret, len - ret,
-					     " via {%u,%u}", tsk->conn_type,
-					     tsk->conn_instance);
-	} else if (tsk->published) {
-		ret += tipc_snprintf(buf + ret, len - ret, " bound to");
-		list_for_each_entry(publ, &tsk->publications, pport_list) {
-			if (publ->lower == publ->upper)
-				ret += tipc_snprintf(buf + ret, len - ret,
-						     " {%u,%u}", publ->type,
-						     publ->lower);
-			else
-				ret += tipc_snprintf(buf + ret, len - ret,
-						     " {%u,%u,%u}", publ->type,
-						     publ->lower, publ->upper);
-		}
-	}
-	ret += tipc_snprintf(buf + ret, len - ret, "\n");
-	return ret;
-}
-
-struct sk_buff *tipc_sk_socks_show(struct net *net)
-{
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	const struct bucket_table *tbl;
-	struct rhash_head *pos;
-	struct sk_buff *buf;
-	struct tlv_desc *rep_tlv;
-	char *pb;
-	int pb_len;
-	struct tipc_sock *tsk;
-	int str_len = 0;
-	int i;
-
-	buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN));
-	if (!buf)
-		return NULL;
-	rep_tlv = (struct tlv_desc *)buf->data;
-	pb = TLV_DATA(rep_tlv);
-	pb_len = ULTRA_STRING_MAX_LEN;
-
-	rcu_read_lock();
-	tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht);
-	for (i = 0; i < tbl->size; i++) {
-		rht_for_each_entry_rcu(tsk, pos, tbl, i, node) {
-			spin_lock_bh(&tsk->sk.sk_lock.slock);
-			str_len += tipc_sk_show(tsk, pb + str_len,
-						pb_len - str_len, 0);
-			spin_unlock_bh(&tsk->sk.sk_lock.slock);
-		}
-	}
-	rcu_read_unlock();
-
-	str_len += 1;	/* for "\0" */
-	skb_put(buf, TLV_SPACE(str_len));
-	TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
-
-	return buf;
-}
-
 /* tipc_sk_reinit: set non-zero address in all existing sockets
  *                 when we go from standalone to network mode.
  */
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index 8be0da7df8fc..238f1b7bd9bd 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h
@@ -49,7 +49,6 @@ void tipc_sock_release_local(struct socket *sock);
 int tipc_sock_accept_local(struct socket *sock, struct socket **newsock,
 			   int flags);
 int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq);
-struct sk_buff *tipc_sk_socks_show(struct net *net);
 void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
 		       struct sk_buff_head *inputq);
 void tipc_sk_reinit(struct net *net);
-- 
cgit v1.2.3


From 5bfc335a637ad4aecededb67b0075aae72a6dbb3 Mon Sep 17 00:00:00 2001
From: Richard Alpe <richard.alpe@ericsson.com>
Date: Mon, 9 Feb 2015 09:50:12 +0100
Subject: tipc: convert legacy nl media dump to nl compat

Convert TIPC_CMD_GET_MEDIA_NAMES to compat dumpit.

Signed-off-by: Richard Alpe <richard.alpe@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bearer.c         | 20 --------------------
 net/tipc/bearer.h         |  1 -
 net/tipc/config.c         |  3 ---
 net/tipc/netlink_compat.c | 19 +++++++++++++++++++
 4 files changed, 19 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index de1c800ef806..c7e3b5d3baa9 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -123,26 +123,6 @@ void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a)
 	}
 }
 
-/**
- * tipc_media_get_names - record names of registered media in buffer
- */
-struct sk_buff *tipc_media_get_names(void)
-{
-	struct sk_buff *buf;
-	int i;
-
-	buf = tipc_cfg_reply_alloc(MAX_MEDIA * TLV_SPACE(TIPC_MAX_MEDIA_NAME));
-	if (!buf)
-		return NULL;
-
-	for (i = 0; media_info_array[i] != NULL; i++) {
-		tipc_cfg_append_tlv(buf, TIPC_TLV_MEDIA_NAME,
-				    media_info_array[i]->name,
-				    strlen(media_info_array[i]->name) + 1);
-	}
-	return buf;
-}
-
 /**
  * bearer_name_validate - validate & (optionally) deconstruct bearer name
  * @name: ptr to bearer name string
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 06f25d144871..6b17795ff8bc 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -196,7 +196,6 @@ int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info);
 int tipc_media_set_priority(const char *name, u32 new_value);
 int tipc_media_set_window(const char *name, u32 new_value);
 void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a);
-struct sk_buff *tipc_media_get_names(void);
 int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b);
 void tipc_disable_l2_media(struct tipc_bearer *b);
 int tipc_l2_send_msg(struct net *net, struct sk_buff *buf,
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 6e4c215879c8..df410823e889 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -209,9 +209,6 @@ struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
 		rep_tlv_buf = tipc_node_get_nodes(net, req_tlv_area,
 						  req_tlv_space);
 		break;
-	case TIPC_CMD_GET_MEDIA_NAMES:
-		rep_tlv_buf = tipc_media_get_names();
-		break;
 	case TIPC_CMD_SHOW_STATS:
 		rep_tlv_buf = tipc_show_stats();
 		break;
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 48e15a4a36d8..d23075efaa76 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -822,6 +822,19 @@ static int tipc_nl_compat_sk_dump(struct tipc_nl_compat_msg *msg,
 	return 0;
 }
 
+static int tipc_nl_compat_media_dump(struct tipc_nl_compat_msg *msg,
+				     struct nlattr **attrs)
+{
+	struct nlattr *media[TIPC_NLA_MEDIA_MAX + 1];
+
+	nla_parse_nested(media, TIPC_NLA_MEDIA_MAX, attrs[TIPC_NLA_MEDIA],
+			 NULL);
+
+	return tipc_add_tlv(msg->rep, TIPC_TLV_MEDIA_NAME,
+			    nla_data(media[TIPC_NLA_MEDIA_NAME]),
+			    nla_len(media[TIPC_NLA_MEDIA_NAME]));
+}
+
 static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 {
 	struct tipc_nl_compat_cmd_dump dump;
@@ -885,6 +898,11 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 		dump.dumpit = tipc_nl_sk_dump;
 		dump.format = tipc_nl_compat_sk_dump;
 		return tipc_nl_compat_dumpit(&dump, msg);
+	case TIPC_CMD_GET_MEDIA_NAMES:
+		msg->rep_size = MAX_MEDIA * TLV_SPACE(TIPC_MAX_MEDIA_NAME);
+		dump.dumpit = tipc_nl_media_dump;
+		dump.format = tipc_nl_compat_media_dump;
+		return tipc_nl_compat_dumpit(&dump, msg);
 	}
 
 	return -EOPNOTSUPP;
@@ -992,6 +1010,7 @@ static int tipc_nl_compat_tmp_wrap(struct sk_buff *skb, struct genl_info *info)
 	case TIPC_CMD_RESET_LINK_STATS:
 	case TIPC_CMD_SHOW_NAME_TABLE:
 	case TIPC_CMD_SHOW_PORTS:
+	case TIPC_CMD_GET_MEDIA_NAMES:
 		return tipc_nl_compat_recv(skb, info);
 	}
 
-- 
cgit v1.2.3


From 4b28cb581dd0df8d8ff19151f39683f641e576ba Mon Sep 17 00:00:00 2001
From: Richard Alpe <richard.alpe@ericsson.com>
Date: Mon, 9 Feb 2015 09:50:13 +0100
Subject: tipc: convert legacy nl node dump to nl compat

Convert TIPC_CMD_GET_NODES to compat dumpit and remove global node
counter solely used by the legacy API.

Signed-off-by: Richard Alpe <richard.alpe@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/config.c         |  4 ----
 net/tipc/netlink_compat.c | 22 +++++++++++++++++++
 net/tipc/node.c           | 54 -----------------------------------------------
 net/tipc/node.h           |  2 --
 4 files changed, 22 insertions(+), 60 deletions(-)

(limited to 'net')

diff --git a/net/tipc/config.c b/net/tipc/config.c
index df410823e889..74babd75c07d 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -205,10 +205,6 @@ struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
 	case TIPC_CMD_NOOP:
 		rep_tlv_buf = tipc_cfg_reply_none();
 		break;
-	case TIPC_CMD_GET_NODES:
-		rep_tlv_buf = tipc_node_get_nodes(net, req_tlv_area,
-						  req_tlv_space);
-		break;
 	case TIPC_CMD_SHOW_STATS:
 		rep_tlv_buf = tipc_show_stats();
 		break;
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index d23075efaa76..0dc448e77e13 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -37,6 +37,7 @@
 #include "link.h"
 #include "name_table.h"
 #include "socket.h"
+#include "node.h"
 #include <net/genetlink.h>
 #include <linux/tipc_config.h>
 
@@ -835,6 +836,21 @@ static int tipc_nl_compat_media_dump(struct tipc_nl_compat_msg *msg,
 			    nla_len(media[TIPC_NLA_MEDIA_NAME]));
 }
 
+static int tipc_nl_compat_node_dump(struct tipc_nl_compat_msg *msg,
+				    struct nlattr **attrs)
+{
+	struct tipc_node_info node_info;
+	struct nlattr *node[TIPC_NLA_NODE_MAX + 1];
+
+	nla_parse_nested(node, TIPC_NLA_NODE_MAX, attrs[TIPC_NLA_NODE], NULL);
+
+	node_info.addr = htonl(nla_get_u32(node[TIPC_NLA_NODE_ADDR]));
+	node_info.up = htonl(nla_get_flag(node[TIPC_NLA_NODE_UP]));
+
+	return tipc_add_tlv(msg->rep, TIPC_TLV_NODE_INFO, &node_info,
+			    sizeof(node_info));
+}
+
 static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 {
 	struct tipc_nl_compat_cmd_dump dump;
@@ -903,6 +919,11 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 		dump.dumpit = tipc_nl_media_dump;
 		dump.format = tipc_nl_compat_media_dump;
 		return tipc_nl_compat_dumpit(&dump, msg);
+	case TIPC_CMD_GET_NODES:
+		msg->rep_size = ULTRA_STRING_MAX_LEN;
+		dump.dumpit = tipc_nl_node_dump;
+		dump.format = tipc_nl_compat_node_dump;
+		return tipc_nl_compat_dumpit(&dump, msg);
 	}
 
 	return -EOPNOTSUPP;
@@ -1011,6 +1032,7 @@ static int tipc_nl_compat_tmp_wrap(struct sk_buff *skb, struct genl_info *info)
 	case TIPC_CMD_SHOW_NAME_TABLE:
 	case TIPC_CMD_SHOW_PORTS:
 	case TIPC_CMD_GET_MEDIA_NAMES:
+	case TIPC_CMD_GET_NODES:
 		return tipc_nl_compat_recv(skb, info);
 	}
 
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 46b87f77d342..ddaa2bbaa35d 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -120,7 +120,6 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr)
 	list_add_tail_rcu(&n_ptr->list, &temp_node->list);
 	n_ptr->action_flags = TIPC_WAIT_PEER_LINKS_DOWN;
 	n_ptr->signature = INVALID_NODE_SIG;
-	tn->num_nodes++;
 exit:
 	spin_unlock_bh(&tn->node_list_lock);
 	return n_ptr;
@@ -131,8 +130,6 @@ static void tipc_node_delete(struct tipc_net *tn, struct tipc_node *n_ptr)
 	list_del_rcu(&n_ptr->list);
 	hlist_del_rcu(&n_ptr->hash);
 	kfree_rcu(n_ptr, rcu);
-
-	tn->num_nodes--;
 }
 
 void tipc_node_stop(struct net *net)
@@ -407,57 +404,6 @@ static void node_lost_contact(struct tipc_node *n_ptr)
 	}
 }
 
-struct sk_buff *tipc_node_get_nodes(struct net *net, const void *req_tlv_area,
-				    int req_tlv_space)
-{
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	u32 domain;
-	struct sk_buff *buf;
-	struct tipc_node *n_ptr;
-	struct tipc_node_info node_info;
-	u32 payload_size;
-
-	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR))
-		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-
-	domain = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
-	if (!tipc_addr_domain_valid(domain))
-		return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
-						   " (network address)");
-
-	spin_lock_bh(&tn->node_list_lock);
-	if (!tn->num_nodes) {
-		spin_unlock_bh(&tn->node_list_lock);
-		return tipc_cfg_reply_none();
-	}
-
-	/* For now, get space for all other nodes */
-	payload_size = TLV_SPACE(sizeof(node_info)) * tn->num_nodes;
-	if (payload_size > 32768u) {
-		spin_unlock_bh(&tn->node_list_lock);
-		return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
-						   " (too many nodes)");
-	}
-	spin_unlock_bh(&tn->node_list_lock);
-
-	buf = tipc_cfg_reply_alloc(payload_size);
-	if (!buf)
-		return NULL;
-
-	/* Add TLVs for all nodes in scope */
-	rcu_read_lock();
-	list_for_each_entry_rcu(n_ptr, &tn->node_list, list) {
-		if (!tipc_in_scope(domain, n_ptr->addr))
-			continue;
-		node_info.addr = htonl(n_ptr->addr);
-		node_info.up = htonl(tipc_node_is_up(n_ptr));
-		tipc_cfg_append_tlv(buf, TIPC_TLV_NODE_INFO,
-				    &node_info, sizeof(node_info));
-	}
-	rcu_read_unlock();
-	return buf;
-}
-
 /**
  * tipc_node_get_linkname - get the name of a link
  *
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 59dafee61aa0..3d18c66b7f78 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -142,8 +142,6 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
 void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
 int tipc_node_active_links(struct tipc_node *n_ptr);
 int tipc_node_is_up(struct tipc_node *n_ptr);
-struct sk_buff *tipc_node_get_nodes(struct net *net, const void *req_tlv_area,
-				    int req_tlv_space);
 int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node,
 			   char *linkname, size_t len);
 void tipc_node_unlock(struct tipc_node *node);
-- 
cgit v1.2.3


From d7cc75d3cb6beac88a2bd041a7700154b73a989d Mon Sep 17 00:00:00 2001
From: Richard Alpe <richard.alpe@ericsson.com>
Date: Mon, 9 Feb 2015 09:50:14 +0100
Subject: tipc: convert legacy nl node addr set to nl compat

Convert TIPC_CMD_SET_NODE_ADDR to compat doit.

Signed-off-by: Richard Alpe <richard.alpe@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/config.c         | 26 --------------------------
 net/tipc/net.c            |  2 +-
 net/tipc/netlink_compat.c | 26 ++++++++++++++++++++++++++
 3 files changed, 27 insertions(+), 27 deletions(-)

(limited to 'net')

diff --git a/net/tipc/config.c b/net/tipc/config.c
index 74babd75c07d..2a2d05be8a27 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -134,29 +134,6 @@ static struct sk_buff *tipc_show_stats(void)
 	return buf;
 }
 
-static struct sk_buff *cfg_set_own_addr(struct net *net)
-{
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	u32 addr;
-
-	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR))
-		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-
-	addr = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
-	if (addr == tn->own_addr)
-		return tipc_cfg_reply_none();
-	if (!tipc_addr_node_valid(addr))
-		return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
-						   " (node address)");
-	if (tn->own_addr)
-		return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
-						   " (cannot change node address once assigned)");
-	if (!tipc_net_start(net, addr))
-		return tipc_cfg_reply_none();
-
-	return tipc_cfg_reply_error_string("cannot change to network mode");
-}
-
 static struct sk_buff *cfg_set_netid(struct net *net)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
@@ -208,9 +185,6 @@ struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
 	case TIPC_CMD_SHOW_STATS:
 		rep_tlv_buf = tipc_show_stats();
 		break;
-	case TIPC_CMD_SET_NODE_ADDR:
-		rep_tlv_buf = cfg_set_own_addr(net);
-		break;
 	case TIPC_CMD_SET_NETID:
 		rep_tlv_buf = cfg_set_netid(net);
 		break;
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 8b0fb0966628..e4028f927c8a 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -208,7 +208,7 @@ out:
 
 int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
 {
-	struct net *net = genl_info_net(info);
+	struct net *net = sock_net(skb->sk);
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct nlattr *attrs[TIPC_NLA_NET_MAX + 1];
 	int err;
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 0dc448e77e13..aa0e3256ae93 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -851,6 +851,26 @@ static int tipc_nl_compat_node_dump(struct tipc_nl_compat_msg *msg,
 			    sizeof(node_info));
 }
 
+static int tipc_nl_compat_net_set(struct sk_buff *skb,
+				  struct tipc_nl_compat_msg *msg)
+{
+	u32 val;
+	struct nlattr *net;
+
+	val = ntohl(*(__be32 *)TLV_DATA(msg->req));
+
+	net = nla_nest_start(skb, TIPC_NLA_NET);
+	if (!net)
+		return -EMSGSIZE;
+
+	if (nla_put_u32(skb, TIPC_NLA_NET_ADDR, val))
+		return -EMSGSIZE;
+
+	nla_nest_end(skb, net);
+
+	return 0;
+}
+
 static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 {
 	struct tipc_nl_compat_cmd_dump dump;
@@ -924,6 +944,11 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 		dump.dumpit = tipc_nl_node_dump;
 		dump.format = tipc_nl_compat_node_dump;
 		return tipc_nl_compat_dumpit(&dump, msg);
+	case TIPC_CMD_SET_NODE_ADDR:
+		msg->req_type = TIPC_TLV_NET_ADDR;
+		doit.doit = tipc_nl_net_set;
+		doit.transcode = tipc_nl_compat_net_set;
+		return tipc_nl_compat_doit(&doit, msg);
 	}
 
 	return -EOPNOTSUPP;
@@ -1033,6 +1058,7 @@ static int tipc_nl_compat_tmp_wrap(struct sk_buff *skb, struct genl_info *info)
 	case TIPC_CMD_SHOW_PORTS:
 	case TIPC_CMD_GET_MEDIA_NAMES:
 	case TIPC_CMD_GET_NODES:
+	case TIPC_CMD_SET_NODE_ADDR:
 		return tipc_nl_compat_recv(skb, info);
 	}
 
-- 
cgit v1.2.3


From 964f9501c1910c0835eee3cf870ba5aa44db1241 Mon Sep 17 00:00:00 2001
From: Richard Alpe <richard.alpe@ericsson.com>
Date: Mon, 9 Feb 2015 09:50:15 +0100
Subject: tipc: convert legacy nl net id set to nl compat

Convert TIPC_CMD_SET_NETID to compat doit.

Signed-off-by: Richard Alpe <richard.alpe@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/config.c         | 23 -----------------------
 net/tipc/netlink_compat.c | 17 ++++++++++++++---
 2 files changed, 14 insertions(+), 26 deletions(-)

(limited to 'net')

diff --git a/net/tipc/config.c b/net/tipc/config.c
index 2a2d05be8a27..080cc92eedca 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -134,26 +134,6 @@ static struct sk_buff *tipc_show_stats(void)
 	return buf;
 }
 
-static struct sk_buff *cfg_set_netid(struct net *net)
-{
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	u32 value;
-
-	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
-		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-	value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
-	if (value == tn->net_id)
-		return tipc_cfg_reply_none();
-	if (value < 1 || value > 9999)
-		return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
-						   " (network id must be 1-9999)");
-	if (tn->own_addr)
-		return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
-			" (cannot change network id once TIPC has joined a network)");
-	tn->net_id = value;
-	return tipc_cfg_reply_none();
-}
-
 struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
 				const void *request_area, int request_space,
 				int reply_headroom)
@@ -185,9 +165,6 @@ struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
 	case TIPC_CMD_SHOW_STATS:
 		rep_tlv_buf = tipc_show_stats();
 		break;
-	case TIPC_CMD_SET_NETID:
-		rep_tlv_buf = cfg_set_netid(net);
-		break;
 	case TIPC_CMD_GET_NETID:
 		rep_tlv_buf = tipc_cfg_reply_unsigned(tn->net_id);
 		break;
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index aa0e3256ae93..c355476a6e7c 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -38,6 +38,7 @@
 #include "name_table.h"
 #include "socket.h"
 #include "node.h"
+#include "net.h"
 #include <net/genetlink.h>
 #include <linux/tipc_config.h>
 
@@ -863,9 +864,13 @@ static int tipc_nl_compat_net_set(struct sk_buff *skb,
 	if (!net)
 		return -EMSGSIZE;
 
-	if (nla_put_u32(skb, TIPC_NLA_NET_ADDR, val))
-		return -EMSGSIZE;
-
+	if (msg->cmd == TIPC_CMD_SET_NODE_ADDR) {
+		if (nla_put_u32(skb, TIPC_NLA_NET_ADDR, val))
+			return -EMSGSIZE;
+	} else if (msg->cmd == TIPC_CMD_SET_NETID) {
+		if (nla_put_u32(skb, TIPC_NLA_NET_ID, val))
+			return -EMSGSIZE;
+	}
 	nla_nest_end(skb, net);
 
 	return 0;
@@ -949,6 +954,11 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 		doit.doit = tipc_nl_net_set;
 		doit.transcode = tipc_nl_compat_net_set;
 		return tipc_nl_compat_doit(&doit, msg);
+	case TIPC_CMD_SET_NETID:
+		msg->req_type = TIPC_TLV_UNSIGNED;
+		doit.doit = tipc_nl_net_set;
+		doit.transcode = tipc_nl_compat_net_set;
+		return tipc_nl_compat_doit(&doit, msg);
 	}
 
 	return -EOPNOTSUPP;
@@ -1059,6 +1069,7 @@ static int tipc_nl_compat_tmp_wrap(struct sk_buff *skb, struct genl_info *info)
 	case TIPC_CMD_GET_MEDIA_NAMES:
 	case TIPC_CMD_GET_NODES:
 	case TIPC_CMD_SET_NODE_ADDR:
+	case TIPC_CMD_SET_NETID:
 		return tipc_nl_compat_recv(skb, info);
 	}
 
-- 
cgit v1.2.3


From 3c26181c5b1b0b49e147bac8a8d012234f813a5e Mon Sep 17 00:00:00 2001
From: Richard Alpe <richard.alpe@ericsson.com>
Date: Mon, 9 Feb 2015 09:50:16 +0100
Subject: tipc: convert legacy nl net id get to nl compat

Convert TIPC_CMD_GET_NETID to compat dumpit.

Signed-off-by: Richard Alpe <richard.alpe@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/config.c         | 23 -----------------------
 net/tipc/netlink_compat.c | 18 ++++++++++++++++++
 2 files changed, 18 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/tipc/config.c b/net/tipc/config.c
index 080cc92eedca..019f46c0690c 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -72,25 +72,6 @@ int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type,
 	return 1;
 }
 
-static struct sk_buff *tipc_cfg_reply_unsigned_type(u16 tlv_type, u32 value)
-{
-	struct sk_buff *buf;
-	__be32 value_net;
-
-	buf = tipc_cfg_reply_alloc(TLV_SPACE(sizeof(value)));
-	if (buf) {
-		value_net = htonl(value);
-		tipc_cfg_append_tlv(buf, tlv_type, &value_net,
-				    sizeof(value_net));
-	}
-	return buf;
-}
-
-static struct sk_buff *tipc_cfg_reply_unsigned(u32 value)
-{
-	return tipc_cfg_reply_unsigned_type(TIPC_TLV_UNSIGNED, value);
-}
-
 struct sk_buff *tipc_cfg_reply_string_type(u16 tlv_type, char *string)
 {
 	struct sk_buff *buf;
@@ -139,7 +120,6 @@ struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
 				int reply_headroom)
 {
 	struct sk_buff *rep_tlv_buf;
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
 
 	rtnl_lock();
 
@@ -165,9 +145,6 @@ struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
 	case TIPC_CMD_SHOW_STATS:
 		rep_tlv_buf = tipc_show_stats();
 		break;
-	case TIPC_CMD_GET_NETID:
-		rep_tlv_buf = tipc_cfg_reply_unsigned(tn->net_id);
-		break;
 	case TIPC_CMD_NOT_NET_ADMIN:
 		rep_tlv_buf =
 			tipc_cfg_reply_error_string(TIPC_CFG_NOT_NET_ADMIN);
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index c355476a6e7c..415909054ec0 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -876,6 +876,18 @@ static int tipc_nl_compat_net_set(struct sk_buff *skb,
 	return 0;
 }
 
+static int tipc_nl_compat_net_dump(struct tipc_nl_compat_msg *msg,
+				   struct nlattr **attrs)
+{
+	__be32 id;
+	struct nlattr *net[TIPC_NLA_NET_MAX + 1];
+
+	nla_parse_nested(net, TIPC_NLA_NET_MAX, attrs[TIPC_NLA_NET], NULL);
+	id = htonl(nla_get_u32(net[TIPC_NLA_NET_ID]));
+
+	return tipc_add_tlv(msg->rep, TIPC_TLV_UNSIGNED, &id, sizeof(id));
+}
+
 static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 {
 	struct tipc_nl_compat_cmd_dump dump;
@@ -959,6 +971,11 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 		doit.doit = tipc_nl_net_set;
 		doit.transcode = tipc_nl_compat_net_set;
 		return tipc_nl_compat_doit(&doit, msg);
+	case TIPC_CMD_GET_NETID:
+		msg->rep_size = sizeof(u32);
+		dump.dumpit = tipc_nl_net_dump;
+		dump.format = tipc_nl_compat_net_dump;
+		return tipc_nl_compat_dumpit(&dump, msg);
 	}
 
 	return -EOPNOTSUPP;
@@ -1070,6 +1087,7 @@ static int tipc_nl_compat_tmp_wrap(struct sk_buff *skb, struct genl_info *info)
 	case TIPC_CMD_GET_NODES:
 	case TIPC_CMD_SET_NODE_ADDR:
 	case TIPC_CMD_SET_NETID:
+	case TIPC_CMD_GET_NETID:
 		return tipc_nl_compat_recv(skb, info);
 	}
 
-- 
cgit v1.2.3


From 5a81a6377b6083fccffdfb7a21ec080b8d58475b Mon Sep 17 00:00:00 2001
From: Richard Alpe <richard.alpe@ericsson.com>
Date: Mon, 9 Feb 2015 09:50:17 +0100
Subject: tipc: convert legacy nl stats show to nl compat

Convert TIPC_CMD_SHOW_STATS to compat layer. This command does not
have any counterpart in the new API, meaning it now solely exists as a
function in the compat layer.

Signed-off-by: Richard Alpe <richard.alpe@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/config.c         | 35 -----------------------------------
 net/tipc/netlink_compat.c | 15 +++++++++++++++
 2 files changed, 15 insertions(+), 35 deletions(-)

(limited to 'net')

diff --git a/net/tipc/config.c b/net/tipc/config.c
index 019f46c0690c..c2ad2ff32a15 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -83,38 +83,6 @@ struct sk_buff *tipc_cfg_reply_string_type(u16 tlv_type, char *string)
 	return buf;
 }
 
-static struct sk_buff *tipc_show_stats(void)
-{
-	struct sk_buff *buf;
-	struct tlv_desc *rep_tlv;
-	char *pb;
-	int pb_len;
-	int str_len;
-	u32 value;
-
-	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
-		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-
-	value = ntohl(*(u32 *)TLV_DATA(req_tlv_area));
-	if (value != 0)
-		return tipc_cfg_reply_error_string("unsupported argument");
-
-	buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN));
-	if (buf == NULL)
-		return NULL;
-
-	rep_tlv = (struct tlv_desc *)buf->data;
-	pb = TLV_DATA(rep_tlv);
-	pb_len = ULTRA_STRING_MAX_LEN;
-
-	str_len = tipc_snprintf(pb, pb_len, "TIPC version " TIPC_MOD_VER "\n");
-	str_len += 1;	/* for "\0" */
-	skb_put(buf, TLV_SPACE(str_len));
-	TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
-
-	return buf;
-}
-
 struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
 				const void *request_area, int request_space,
 				int reply_headroom)
@@ -142,9 +110,6 @@ struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
 	case TIPC_CMD_NOOP:
 		rep_tlv_buf = tipc_cfg_reply_none();
 		break;
-	case TIPC_CMD_SHOW_STATS:
-		rep_tlv_buf = tipc_show_stats();
-		break;
 	case TIPC_CMD_NOT_NET_ADMIN:
 		rep_tlv_buf =
 			tipc_cfg_reply_error_string(TIPC_CFG_NOT_NET_ADMIN);
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 415909054ec0..cb9086d259df 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -888,6 +888,18 @@ static int tipc_nl_compat_net_dump(struct tipc_nl_compat_msg *msg,
 	return tipc_add_tlv(msg->rep, TIPC_TLV_UNSIGNED, &id, sizeof(id));
 }
 
+static int tipc_cmd_show_stats_compat(struct tipc_nl_compat_msg *msg)
+{
+	msg->rep = tipc_tlv_alloc(ULTRA_STRING_MAX_LEN);
+	if (!msg->rep)
+		return -ENOMEM;
+
+	tipc_tlv_init(msg->rep, TIPC_TLV_ULTRA_STRING);
+	tipc_tlv_sprintf(msg->rep, "TIPC version " TIPC_MOD_VER "\n");
+
+	return 0;
+}
+
 static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 {
 	struct tipc_nl_compat_cmd_dump dump;
@@ -976,6 +988,8 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 		dump.dumpit = tipc_nl_net_dump;
 		dump.format = tipc_nl_compat_net_dump;
 		return tipc_nl_compat_dumpit(&dump, msg);
+	case TIPC_CMD_SHOW_STATS:
+		return tipc_cmd_show_stats_compat(msg);
 	}
 
 	return -EOPNOTSUPP;
@@ -1088,6 +1102,7 @@ static int tipc_nl_compat_tmp_wrap(struct sk_buff *skb, struct genl_info *info)
 	case TIPC_CMD_SET_NODE_ADDR:
 	case TIPC_CMD_SET_NETID:
 	case TIPC_CMD_GET_NETID:
+	case TIPC_CMD_SHOW_STATS:
 		return tipc_nl_compat_recv(skb, info);
 	}
 
-- 
cgit v1.2.3


From 22ae7cff509f3bb22caaa0003f67eeb93d338fed Mon Sep 17 00:00:00 2001
From: Richard Alpe <richard.alpe@ericsson.com>
Date: Mon, 9 Feb 2015 09:50:18 +0100
Subject: tipc: nl compat add noop and remove legacy nl framework

Add TIPC_CMD_NOOP to compat layer and remove the old framework.

All legacy nl commands are now converted to the compat layer in
netlink_compat.c.

Signed-off-by: Richard Alpe <richard.alpe@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/Makefile         |   2 +-
 net/tipc/bearer.c         |   1 -
 net/tipc/config.c         | 157 ----------------------------------------------
 net/tipc/config.h         |  67 --------------------
 net/tipc/core.c           |   3 +-
 net/tipc/link.c           |   1 -
 net/tipc/log.c            |   1 -
 net/tipc/name_table.c     |   4 +-
 net/tipc/net.c            |   2 -
 net/tipc/netlink_compat.c |  73 ++-------------------
 net/tipc/node.c           |   2 +-
 net/tipc/socket.c         |   1 -
 12 files changed, 13 insertions(+), 301 deletions(-)
 delete mode 100644 net/tipc/config.c
 delete mode 100644 net/tipc/config.h

(limited to 'net')

diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index 69b82bbc60d3..6864b9de2404 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -4,7 +4,7 @@
 
 obj-$(CONFIG_TIPC) := tipc.o
 
-tipc-y	+= addr.o bcast.o bearer.o config.o \
+tipc-y	+= addr.o bcast.o bearer.o \
 	   core.o link.o discover.o msg.o  \
 	   name_distr.o  subscr.o name_table.o net.o  \
 	   netlink.o netlink_compat.o node.o socket.o log.o eth_media.o \
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index c7e3b5d3baa9..858c5a6c32df 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -36,7 +36,6 @@
 
 #include <net/sock.h>
 #include "core.h"
-#include "config.h"
 #include "bearer.h"
 #include "link.h"
 #include "discover.h"
diff --git a/net/tipc/config.c b/net/tipc/config.c
deleted file mode 100644
index c2ad2ff32a15..000000000000
--- a/net/tipc/config.c
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * net/tipc/config.c: TIPC configuration management code
- *
- * Copyright (c) 2002-2006, Ericsson AB
- * Copyright (c) 2004-2007, 2010-2013, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "core.h"
-#include "socket.h"
-#include "name_table.h"
-#include "config.h"
-#include "server.h"
-
-#define REPLY_TRUNCATED "<truncated>\n"
-
-static const void *req_tlv_area;	/* request message TLV area */
-static int req_tlv_space;		/* request message TLV area size */
-static int rep_headroom;		/* reply message headroom to use */
-
-struct sk_buff *tipc_cfg_reply_alloc(int payload_size)
-{
-	struct sk_buff *buf;
-
-	buf = alloc_skb(rep_headroom + payload_size, GFP_ATOMIC);
-	if (buf)
-		skb_reserve(buf, rep_headroom);
-	return buf;
-}
-
-int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type,
-			void *tlv_data, int tlv_data_size)
-{
-	struct tlv_desc *tlv = (struct tlv_desc *)skb_tail_pointer(buf);
-	int new_tlv_space = TLV_SPACE(tlv_data_size);
-
-	if (skb_tailroom(buf) < new_tlv_space)
-		return 0;
-	skb_put(buf, new_tlv_space);
-	tlv->tlv_type = htons(tlv_type);
-	tlv->tlv_len  = htons(TLV_LENGTH(tlv_data_size));
-	if (tlv_data_size && tlv_data)
-		memcpy(TLV_DATA(tlv), tlv_data, tlv_data_size);
-	return 1;
-}
-
-struct sk_buff *tipc_cfg_reply_string_type(u16 tlv_type, char *string)
-{
-	struct sk_buff *buf;
-	int string_len = strlen(string) + 1;
-
-	buf = tipc_cfg_reply_alloc(TLV_SPACE(string_len));
-	if (buf)
-		tipc_cfg_append_tlv(buf, tlv_type, string, string_len);
-	return buf;
-}
-
-struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
-				const void *request_area, int request_space,
-				int reply_headroom)
-{
-	struct sk_buff *rep_tlv_buf;
-
-	rtnl_lock();
-
-	/* Save request and reply details in a well-known location */
-	req_tlv_area = request_area;
-	req_tlv_space = request_space;
-	rep_headroom = reply_headroom;
-
-	/* Check command authorization */
-	if (likely(in_own_node(net, orig_node))) {
-		/* command is permitted */
-	} else {
-		rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
-							  " (cannot be done remotely)");
-		goto exit;
-	}
-
-	/* Call appropriate processing routine */
-	switch (cmd) {
-	case TIPC_CMD_NOOP:
-		rep_tlv_buf = tipc_cfg_reply_none();
-		break;
-	case TIPC_CMD_NOT_NET_ADMIN:
-		rep_tlv_buf =
-			tipc_cfg_reply_error_string(TIPC_CFG_NOT_NET_ADMIN);
-		break;
-	case TIPC_CMD_SET_MAX_ZONES:
-	case TIPC_CMD_GET_MAX_ZONES:
-	case TIPC_CMD_SET_MAX_SLAVES:
-	case TIPC_CMD_GET_MAX_SLAVES:
-	case TIPC_CMD_SET_MAX_CLUSTERS:
-	case TIPC_CMD_GET_MAX_CLUSTERS:
-	case TIPC_CMD_SET_MAX_NODES:
-	case TIPC_CMD_GET_MAX_NODES:
-	case TIPC_CMD_SET_MAX_SUBSCR:
-	case TIPC_CMD_GET_MAX_SUBSCR:
-	case TIPC_CMD_SET_MAX_PUBL:
-	case TIPC_CMD_GET_MAX_PUBL:
-	case TIPC_CMD_SET_LOG_SIZE:
-	case TIPC_CMD_SET_REMOTE_MNG:
-	case TIPC_CMD_GET_REMOTE_MNG:
-	case TIPC_CMD_DUMP_LOG:
-	case TIPC_CMD_SET_MAX_PORTS:
-	case TIPC_CMD_GET_MAX_PORTS:
-		rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
-							  " (obsolete command)");
-		break;
-	default:
-		rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
-							  " (unknown command)");
-		break;
-	}
-
-	WARN_ON(rep_tlv_buf->len > TLV_SPACE(ULTRA_STRING_MAX_LEN));
-
-	/* Append an error message if we cannot return all requested data */
-	if (rep_tlv_buf->len == TLV_SPACE(ULTRA_STRING_MAX_LEN)) {
-		if (*(rep_tlv_buf->data + ULTRA_STRING_MAX_LEN) != '\0')
-			sprintf(rep_tlv_buf->data + rep_tlv_buf->len -
-				sizeof(REPLY_TRUNCATED) - 1, REPLY_TRUNCATED);
-	}
-
-	/* Return reply buffer */
-exit:
-	rtnl_unlock();
-	return rep_tlv_buf;
-}
diff --git a/net/tipc/config.h b/net/tipc/config.h
deleted file mode 100644
index 9e9b575fc429..000000000000
--- a/net/tipc/config.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * net/tipc/config.h: Include file for TIPC configuration service code
- *
- * Copyright (c) 2003-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _TIPC_CONFIG_H
-#define _TIPC_CONFIG_H
-
-#include "link.h"
-
-#define ULTRA_STRING_MAX_LEN	32768
-
-struct sk_buff *tipc_cfg_reply_alloc(int payload_size);
-int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type,
-			void *tlv_data, int tlv_data_size);
-struct sk_buff *tipc_cfg_reply_string_type(u16 tlv_type, char *string);
-
-static inline struct sk_buff *tipc_cfg_reply_none(void)
-{
-	return tipc_cfg_reply_alloc(0);
-}
-
-static inline struct sk_buff *tipc_cfg_reply_error_string(char *string)
-{
-	return tipc_cfg_reply_string_type(TIPC_TLV_ERROR_STRING, string);
-}
-
-static inline struct sk_buff *tipc_cfg_reply_ultra_string(char *string)
-{
-	return tipc_cfg_reply_string_type(TIPC_TLV_ULTRA_STRING, string);
-}
-
-struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
-				const void *req_tlv_area, int req_tlv_space,
-				int headroom);
-#endif
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 2d06d1f8b6e6..935205e6bcfe 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -39,7 +39,8 @@
 #include "core.h"
 #include "name_table.h"
 #include "subscr.h"
-#include "config.h"
+#include "bearer.h"
+#include "net.h"
 #include "socket.h"
 
 #include <linux/module.h>
diff --git a/net/tipc/link.c b/net/tipc/link.c
index dfe6f4d0b402..a4cf364316de 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -40,7 +40,6 @@
 #include "socket.h"
 #include "name_distr.h"
 #include "discover.h"
-#include "config.h"
 #include "netlink.h"
 
 #include <linux/pkt_sched.h>
diff --git a/net/tipc/log.c b/net/tipc/log.c
index abef644f27d8..b186af06e361 100644
--- a/net/tipc/log.c
+++ b/net/tipc/log.c
@@ -35,7 +35,6 @@
  */
 
 #include "core.h"
-#include "config.h"
 
 /**
  * tipc_snprintf - append formatted output to print buffer
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index c8eaa2afe875..105ba7adf06f 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -36,11 +36,13 @@
 
 #include <net/sock.h>
 #include "core.h"
-#include "config.h"
+#include "netlink.h"
 #include "name_table.h"
 #include "name_distr.h"
 #include "subscr.h"
 #include "bcast.h"
+#include "addr.h"
+#include <net/genetlink.h>
 
 #define TIPC_NAMETBL_SIZE 1024		/* must be a power of 2 */
 
diff --git a/net/tipc/net.c b/net/tipc/net.c
index e4028f927c8a..a54f3cbe2246 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -40,8 +40,6 @@
 #include "subscr.h"
 #include "socket.h"
 #include "node.h"
-#include "config.h"
-#include "bcast.h"
 
 static const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = {
 	[TIPC_NLA_NET_UNSPEC]	= { .type = NLA_UNSPEC },
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index cb9086d259df..ce9121e8e990 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -32,7 +32,6 @@
  */
 
 #include "core.h"
-#include "config.h"
 #include "bearer.h"
 #include "link.h"
 #include "name_table.h"
@@ -909,6 +908,11 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 	memset(&doit, 0, sizeof(doit));
 
 	switch (msg->cmd) {
+	case TIPC_CMD_NOOP:
+		msg->rep = tipc_tlv_alloc(0);
+		if (!msg->rep)
+			return -ENOMEM;
+		return 0;
 	case TIPC_CMD_GET_BEARER_NAMES:
 		msg->rep_size = MAX_BEARERS * TLV_SPACE(TIPC_MAX_BEARER_NAME);
 		dump.dumpit = tipc_nl_bearer_dump;
@@ -1044,71 +1048,6 @@ send:
 	return err;
 }
 
-static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
-{
-	struct net *net = genl_info_net(info);
-	struct sk_buff *rep_buf;
-	struct nlmsghdr *rep_nlh;
-	struct nlmsghdr *req_nlh = info->nlhdr;
-	struct tipc_genlmsghdr *req_userhdr = info->userhdr;
-	int hdr_space = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN);
-	u16 cmd;
-
-	if ((req_userhdr->cmd & 0xC000) &&
-	    (!netlink_net_capable(skb, CAP_NET_ADMIN)))
-		cmd = TIPC_CMD_NOT_NET_ADMIN;
-	else
-		cmd = req_userhdr->cmd;
-
-	rep_buf = tipc_cfg_do_cmd(net, req_userhdr->dest, cmd,
-				  nlmsg_data(req_nlh) + GENL_HDRLEN +
-				  TIPC_GENL_HDRLEN,
-				  nlmsg_attrlen(req_nlh, GENL_HDRLEN +
-				  TIPC_GENL_HDRLEN), hdr_space);
-
-	if (rep_buf) {
-		skb_push(rep_buf, hdr_space);
-		rep_nlh = nlmsg_hdr(rep_buf);
-		memcpy(rep_nlh, req_nlh, hdr_space);
-		rep_nlh->nlmsg_len = rep_buf->len;
-		genlmsg_unicast(net, rep_buf, NETLINK_CB(skb).portid);
-	}
-
-	return 0;
-}
-
-/* Temporary function to keep functionality throughout the patchset
- * without having to mess with the global variables and other trickery
- * of the old API.
- */
-static int tipc_nl_compat_tmp_wrap(struct sk_buff *skb, struct genl_info *info)
-{
-	struct tipc_genlmsghdr *req = info->userhdr;
-
-	switch (req->cmd) {
-	case TIPC_CMD_GET_BEARER_NAMES:
-	case TIPC_CMD_ENABLE_BEARER:
-	case TIPC_CMD_DISABLE_BEARER:
-	case TIPC_CMD_SHOW_LINK_STATS:
-	case TIPC_CMD_GET_LINKS:
-	case TIPC_CMD_SET_LINK_TOL:
-	case TIPC_CMD_SET_LINK_PRI:
-	case TIPC_CMD_SET_LINK_WINDOW:
-	case TIPC_CMD_RESET_LINK_STATS:
-	case TIPC_CMD_SHOW_NAME_TABLE:
-	case TIPC_CMD_SHOW_PORTS:
-	case TIPC_CMD_GET_MEDIA_NAMES:
-	case TIPC_CMD_GET_NODES:
-	case TIPC_CMD_SET_NODE_ADDR:
-	case TIPC_CMD_SET_NETID:
-	case TIPC_CMD_GET_NETID:
-	case TIPC_CMD_SHOW_STATS:
-		return tipc_nl_compat_recv(skb, info);
-	}
-
-	return handle_cmd(skb, info);
-}
-
 static struct genl_family tipc_genl_compat_family = {
 	.id		= GENL_ID_GENERATE,
 	.name		= TIPC_GENL_NAME,
@@ -1121,7 +1060,7 @@ static struct genl_family tipc_genl_compat_family = {
 static struct genl_ops tipc_genl_compat_ops[] = {
 	{
 		.cmd		= TIPC_GENL_CMD,
-		.doit		= tipc_nl_compat_tmp_wrap,
+		.doit		= tipc_nl_compat_recv,
 	},
 };
 
diff --git a/net/tipc/node.c b/net/tipc/node.c
index ddaa2bbaa35d..86152de8248d 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -35,7 +35,7 @@
  */
 
 #include "core.h"
-#include "config.h"
+#include "link.h"
 #include "node.h"
 #include "name_distr.h"
 #include "socket.h"
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index e77d738bb771..f73e975af80b 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -40,7 +40,6 @@
 #include "name_table.h"
 #include "node.h"
 #include "link.h"
-#include "config.h"
 #include "name_distr.h"
 #include "socket.h"
 
-- 
cgit v1.2.3


From 941787b82982b3f33ac398c8c00035ddd0f8c514 Mon Sep 17 00:00:00 2001
From: Richard Alpe <richard.alpe@ericsson.com>
Date: Mon, 9 Feb 2015 09:50:19 +0100
Subject: tipc: remove tipc_snprintf

tipc_snprintf() was heavily utilized by the old netlink API which no
longer exists (now netlink compat).

In this patch we swap tipc_snprintf() to the identical scnprintf() in
the only remaining occurrence.

Signed-off-by: Richard Alpe <richard.alpe@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/Makefile |  2 +-
 net/tipc/bearer.c |  6 +++---
 net/tipc/core.h   |  2 --
 net/tipc/log.c    | 54 ------------------------------------------------------
 4 files changed, 4 insertions(+), 60 deletions(-)
 delete mode 100644 net/tipc/log.c

(limited to 'net')

diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index 6864b9de2404..599b1a540d2b 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_TIPC) := tipc.o
 tipc-y	+= addr.o bcast.o bearer.o \
 	   core.o link.o discover.o msg.o  \
 	   name_distr.o  subscr.o name_table.o net.o  \
-	   netlink.o netlink_compat.o node.o socket.o log.o eth_media.o \
+	   netlink.o netlink_compat.o node.o socket.o eth_media.o \
 	   server.o socket.o
 
 tipc-$(CONFIG_TIPC_MEDIA_IB)	+= ib_media.o
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 858c5a6c32df..48852c2dcc03 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -111,13 +111,13 @@ void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a)
 	m_ptr = media_find_id(a->media_id);
 
 	if (m_ptr && !m_ptr->addr2str(a, addr_str, sizeof(addr_str)))
-		ret = tipc_snprintf(buf, len, "%s(%s)", m_ptr->name, addr_str);
+		ret = scnprintf(buf, len, "%s(%s)", m_ptr->name, addr_str);
 	else {
 		u32 i;
 
-		ret = tipc_snprintf(buf, len, "UNKNOWN(%u)", a->media_id);
+		ret = scnprintf(buf, len, "UNKNOWN(%u)", a->media_id);
 		for (i = 0; i < sizeof(a->value); i++)
-			ret += tipc_snprintf(buf - ret, len + ret,
+			ret += scnprintf(buf - ret, len + ret,
 					    "-%02x", a->value[i]);
 	}
 }
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 451c346fd3cf..3dc68c7a966d 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -70,8 +70,6 @@
 
 #define TIPC_MOD_VER "2.0.0"
 
-int tipc_snprintf(char *buf, int len, const char *fmt, ...);
-
 extern int tipc_net_id __read_mostly;
 extern int sysctl_tipc_rmem[3] __read_mostly;
 extern int sysctl_tipc_named_timeout __read_mostly;
diff --git a/net/tipc/log.c b/net/tipc/log.c
deleted file mode 100644
index b186af06e361..000000000000
--- a/net/tipc/log.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * net/tipc/log.c: TIPC print buffer routines for debugging
- *
- * Copyright (c) 1996-2006, Ericsson AB
- * Copyright (c) 2005-2007, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "core.h"
-
-/**
- * tipc_snprintf - append formatted output to print buffer
- * @buf: pointer to print buffer
- * @len: buffer length
- * @fmt: formatted info to be printed
- */
-int tipc_snprintf(char *buf, int len, const char *fmt, ...)
-{
-	int i;
-	va_list args;
-
-	va_start(args, fmt);
-	i = vscnprintf(buf, len, fmt, args);
-	va_end(args);
-	return i;
-}
-- 
cgit v1.2.3


From b750f5b4273316b4bb4d0a4a474c1eeaf0833648 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Mon, 9 Feb 2015 02:29:55 +0100
Subject: net: dsa: Remove redundant phy_attach()

dsa_slave_phy_setup() finds the phy for the port via device tree and
using of_phy_connect(), or it uses the fall back of taking a phy from
the switch internal mdio bus and calling phy_connect_direct(). Either
way, if a phy is found, phy_attach_direct() is called to attach the
phy to the slave device.

In dsa_slave_create(), a second call to phy_attach() is made. This
results in the warning "PHY already attached". Remove this second,
redundant attaching of the phy.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Tested-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/slave.c | 13 -------------
 1 file changed, 13 deletions(-)

(limited to 'net')

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 589aafd01fc5..d104ae15836f 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -676,18 +676,5 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 
 	netif_carrier_off(slave_dev);
 
-	if (p->phy != NULL) {
-		if (ds->drv->get_phy_flags)
-			p->phy->dev_flags |= ds->drv->get_phy_flags(ds, port);
-
-		phy_attach(slave_dev, dev_name(&p->phy->dev),
-			   PHY_INTERFACE_MODE_GMII);
-
-		p->phy->autoneg = AUTONEG_ENABLE;
-		p->phy->speed = 0;
-		p->phy->duplex = 0;
-		p->phy->advertising = p->phy->supported | ADVERTISED_Autoneg;
-	}
-
 	return slave_dev;
 }
-- 
cgit v1.2.3


From 25d3b493a52d4ece811ba07881558fc7f6778fb8 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Mon, 9 Feb 2015 20:16:17 +0900
Subject: bridge: Fix inability to add non-vlan fdb entry

Bridge's default_pvid adds a vid by default, by which we cannot add a
non-vlan fdb entry by default, because br_fdb_add() adds fdb entries for
all vlans instead of a non-vlan one when any vlan is configured.

 # ip link add br0 type bridge
 # ip link set eth0 master br0
 # bridge fdb add 12:34:56:78:90:ab dev eth0 master temp
 # bridge fdb show brport eth0 | grep 12:34:56:78:90:ab
 12:34:56:78:90:ab dev eth0 vlan 1 static

We expect a non-vlan fdb entry as well as vlan 1:
 12:34:56:78:90:ab dev eth0 static

To fix this, we need to insert a non-vlan fdb entry if vlan is not
specified, even when any vlan is configured.

Fixes: 5be5a2df40f0 ("bridge: Add filtering support for default_pvid")
Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index cc36e59db7d7..c041f99f5a78 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -840,10 +840,9 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 		/* VID was specified, so use it. */
 		err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
 	} else {
-		if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID)) {
-			err = __br_fdb_add(ndm, p, addr, nlh_flags, 0);
+		err = __br_fdb_add(ndm, p, addr, nlh_flags, 0);
+		if (err || !pv)
 			goto out;
-		}
 
 		/* We have vlans configured on this port and user didn't
 		 * specify a VLAN.  To be nice, add/update entry for every
@@ -911,16 +910,15 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
 
 		err = __br_fdb_delete(p, addr, vid);
 	} else {
-		if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID)) {
-			err = __br_fdb_delete(p, addr, 0);
+		err = -ENOENT;
+		err &= __br_fdb_delete(p, addr, 0);
+		if (!pv)
 			goto out;
-		}
 
 		/* We have vlans configured on this port and user didn't
 		 * specify a VLAN.  To be nice, add/update entry for every
 		 * vlan on this port.
 		 */
-		err = -ENOENT;
 		for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
 			err &= __br_fdb_delete(p, addr, vid);
 		}
-- 
cgit v1.2.3


From 51f30770e50eb787200f30a79105e2615b379334 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Mon, 9 Feb 2015 09:38:20 -0500
Subject: ipv6: Fix fragment id assignment on LE arches.

Recent commit:
0508c07f5e0c94f38afd5434e8b2a55b84553077
Author: Vlad Yasevich <vyasevich@gmail.com>
Date:   Tue Feb 3 16:36:15 2015 -0500

    ipv6: Select fragment id during UFO segmentation if not set.

Introduced a bug on LE in how ipv6 fragment id is assigned.
This was cought by nightly sparce check:

Resolve the following sparce error:
 net/ipv6/output_core.c:57:38: sparse: incorrect type in assignment
 (different base types)
   net/ipv6/output_core.c:57:38:    expected restricted __be32
[usertype] ip6_frag_id
   net/ipv6/output_core.c:57:38:    got unsigned int [unsigned]
[assigned] [usertype] id

Fixes: 0508c07f5e0c9 (ipv6: Select fragment id during UFO segmentation if not set.)
Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/output_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 54520a0bd5e3..a86cf60f0ca6 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -54,7 +54,7 @@ void ipv6_proxy_select_ident(struct sk_buff *skb)
 
 	id = __ipv6_select_ident(ip6_proxy_idents_hashrnd,
 				 &addrs[1], &addrs[0]);
-	skb_shinfo(skb)->ip6_frag_id = id;
+	skb_shinfo(skb)->ip6_frag_id = htonl(id);
 }
 EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident);
 
-- 
cgit v1.2.3


From 8381eacf5c3b35cf7755f4bc521c4d56d24c1cd9 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Mon, 9 Feb 2015 09:38:21 -0500
Subject: ipv6: Make __ipv6_select_ident static

Make __ipv6_select_ident() static as it isn't used outside
the file.

Fixes: 0508c07f5e0c9 (ipv6: Select fragment id during UFO segmentation if not set.)
Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h     | 2 --
 net/ipv6/output_core.c | 3 ++-
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 6e416f6d3e3c..fde3b593c3f2 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -671,8 +671,6 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add
 	return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr));
 }
 
-u32 __ipv6_select_ident(u32 hashrnd, struct in6_addr *dst,
-			struct in6_addr *src);
 void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt);
 void ipv6_proxy_select_ident(struct sk_buff *skb);
 
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index a86cf60f0ca6..74581f706c4d 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -9,7 +9,8 @@
 #include <net/addrconf.h>
 #include <net/secure_seq.h>
 
-u32 __ipv6_select_ident(u32 hashrnd, struct in6_addr *dst, struct in6_addr *src)
+static u32 __ipv6_select_ident(u32 hashrnd, struct in6_addr *dst,
+			       struct in6_addr *src)
 {
 	u32 hash, id;
 
-- 
cgit v1.2.3


From fd3137cd33ae5590c45c81e9a46fe53b6ab5f66e Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Mon, 9 Feb 2015 16:56:37 +0100
Subject: openvswitch: Only set TUNNEL_VXLAN_OPT if VXLAN-GBP metadata is set

This avoids setting TUNNEL_VXLAN_OPT for VXLAN frames which don't
have any GBP metadata set. It is not invalid to set it but unnecessary.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/vport-vxlan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index ff07d4062d60..3277a7520e31 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -76,7 +76,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
 
 	flags = TUNNEL_KEY | (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0);
 	vxlan_port = vxlan_vport(vport);
-	if (vxlan_port->exts & VXLAN_F_GBP)
+	if (vxlan_port->exts & VXLAN_F_GBP && md->gbp)
 		flags |= TUNNEL_VXLAN_OPT;
 
 	/* Save outer tunnel values */
-- 
cgit v1.2.3


From 531c94a9681b8c253fd0490a4ca8bbe01a38c78b Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Mon, 9 Feb 2015 12:35:23 -0800
Subject: tcp: don't include Fast Open option in SYN-ACK on pure SYN-data

If a server has enabled Fast Open and it receives a pure SYN-data
packet (without a Fast Open option), it won't accept the data but it
incorrectly returns a SYN-ACK with a Fast Open cookie and also
increments the SNMP stat LINUX_MIB_TCPFASTOPENPASSIVEFAIL.

This patch makes the server include a Fast Open cookie in SYN-ACK
only if the SYN has some Fast Open option (i.e., when client
requests or presents a cookie).

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_fastopen.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 815c85e3b1e0..53db2c309572 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -255,6 +255,9 @@ bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
 	struct tcp_fastopen_cookie valid_foc = { .len = -1 };
 	bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
 
+	if (foc->len == 0) /* Client requests a cookie */
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD);
+
 	if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) &&
 	      (syn_data || foc->len >= 0) &&
 	      tcp_fastopen_queue_check(sk))) {
@@ -265,7 +268,8 @@ bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
 	if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD))
 		goto fastopen;
 
-	if (tcp_fastopen_cookie_gen(req, skb, &valid_foc) &&
+	if (foc->len >= 0 &&  /* Client presents or requests a cookie */
+	    tcp_fastopen_cookie_gen(req, skb, &valid_foc) &&
 	    foc->len == TCP_FASTOPEN_COOKIE_SIZE &&
 	    foc->len == valid_foc.len &&
 	    !memcmp(foc->val, valid_foc.val, foc->len)) {
@@ -284,11 +288,10 @@ fastopen:
 					 LINUX_MIB_TCPFASTOPENPASSIVE);
 			return true;
 		}
-	}
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
+	} else if (foc->len > 0) /* Client presents an invalid cookie */
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
 
-	NET_INC_STATS_BH(sock_net(sk), foc->len ?
-			 LINUX_MIB_TCPFASTOPENPASSIVEFAIL :
-			 LINUX_MIB_TCPFASTOPENCOOKIEREQD);
 	*foc = valid_foc;
 	return false;
 }
-- 
cgit v1.2.3


From b0f9ca53cbb103e9240a29a974e0b6085e58f9f7 Mon Sep 17 00:00:00 2001
From: Fan Du <fan.du@intel.com>
Date: Tue, 10 Feb 2015 09:53:16 +0800
Subject: ipv4: Namespecify TCP PMTU mechanism

Packetization Layer Path MTU Discovery works separately beside
Path MTU Discovery at IP level, different net namespace has
various requirements on which one to chose, e.g., a virutalized
container instance would require TCP PMTU to probe an usable
effective mtu for underlying tunnel, while the host would
employ classical ICMP based PMTU to function.

Hence making TCP PMTU mechanism per net namespace to decouple
two functionality. Furthermore the probe base MSS should also
be configured separately for each namespace.

Signed-off-by: Fan Du <fan.du@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h   |  2 ++
 include/net/tcp.h          |  2 --
 net/ipv4/sysctl_net_ipv4.c | 28 ++++++++++++++--------------
 net/ipv4/tcp_ipv4.c        |  1 +
 net/ipv4/tcp_output.c      |  8 +++-----
 net/ipv4/tcp_timer.c       |  7 +++++--
 6 files changed, 25 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index e0bdcb147326..dbe225478adb 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -82,6 +82,8 @@ struct netns_ipv4 {
 
 	int sysctl_fwmark_reflect;
 	int sysctl_tcp_fwmark_accept;
+	int sysctl_tcp_mtu_probing;
+	int sysctl_tcp_base_mss;
 
 	struct ping_group_range ping_group_range;
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index da4196fb78db..8d6b983d5099 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -262,8 +262,6 @@ extern int sysctl_tcp_low_latency;
 extern int sysctl_tcp_nometrics_save;
 extern int sysctl_tcp_moderate_rcvbuf;
 extern int sysctl_tcp_tso_win_divisor;
-extern int sysctl_tcp_mtu_probing;
-extern int sysctl_tcp_base_mss;
 extern int sysctl_tcp_workaround_signed_windows;
 extern int sysctl_tcp_slow_start_after_idle;
 extern int sysctl_tcp_thin_linear_timeouts;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 82601a68cf90..d151539da8e6 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -603,20 +603,6 @@ static struct ctl_table ipv4_table[] = {
 		.maxlen		= TCP_CA_NAME_MAX,
 		.proc_handler	= proc_tcp_congestion_control,
 	},
-	{
-		.procname	= "tcp_mtu_probing",
-		.data		= &sysctl_tcp_mtu_probing,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "tcp_base_mss",
-		.data		= &sysctl_tcp_base_mss,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
 	{
 		.procname	= "tcp_workaround_signed_windows",
 		.data		= &sysctl_tcp_workaround_signed_windows,
@@ -883,6 +869,20 @@ static struct ctl_table ipv4_net_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "tcp_mtu_probing",
+		.data		= &init_net.ipv4.sysctl_tcp_mtu_probing,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "tcp_base_mss",
+		.data		= &init_net.ipv4.sysctl_tcp_base_mss,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 	{ }
 };
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 67bc95fb5d9e..5a2dfed4783b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2459,6 +2459,7 @@ static int __net_init tcp_sk_init(struct net *net)
 		*per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
 	}
 	net->ipv4.sysctl_tcp_ecn = 2;
+	net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
 	return 0;
 
 fail:
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 4fcc9a768849..a2a796c5536b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -59,9 +59,6 @@ int sysctl_tcp_limit_output_bytes __read_mostly = 131072;
  */
 int sysctl_tcp_tso_win_divisor __read_mostly = 3;
 
-int sysctl_tcp_mtu_probing __read_mostly = 0;
-int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
-
 /* By default, RFC2861 behavior.  */
 int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
 
@@ -1350,11 +1347,12 @@ void tcp_mtup_init(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct net *net = sock_net(sk);
 
-	icsk->icsk_mtup.enabled = sysctl_tcp_mtu_probing > 1;
+	icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1;
 	icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
 			       icsk->icsk_af_ops->net_header_len;
-	icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss);
+	icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
 	icsk->icsk_mtup.probe_size = 0;
 }
 EXPORT_SYMBOL(tcp_mtup_init);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 1829c7fbc77e..0732b787904e 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -101,17 +101,20 @@ static int tcp_orphan_retries(struct sock *sk, int alive)
 
 static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
 {
+	struct net *net = sock_net(sk);
+
 	/* Black hole detection */
-	if (sysctl_tcp_mtu_probing) {
+	if (net->ipv4.sysctl_tcp_mtu_probing) {
 		if (!icsk->icsk_mtup.enabled) {
 			icsk->icsk_mtup.enabled = 1;
 			tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 		} else {
+			struct net *net = sock_net(sk);
 			struct tcp_sock *tp = tcp_sk(sk);
 			int mss;
 
 			mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
-			mss = min(sysctl_tcp_base_mss, mss);
+			mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
 			mss = max(mss, 68 - tp->tcp_header_len);
 			icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
 			tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
-- 
cgit v1.2.3


From 04f81f0154e4bf002be6f4d85668ce1257efa4d9 Mon Sep 17 00:00:00 2001
From: Paul Moore <pmoore@redhat.com>
Date: Wed, 11 Feb 2015 14:46:37 -0500
Subject: cipso: don't use IPCB() to locate the CIPSO IP option

Using the IPCB() macro to get the IPv4 options is convenient, but
unfortunately NetLabel often needs to examine the CIPSO option outside
of the scope of the IP layer in the stack.  While historically IPCB()
worked above the IP layer, due to the inclusion of the inet_skb_param
struct at the head of the {tcp,udp}_skb_cb structs, recent commit
971f10ec ("tcp: better TCP_SKB_CB layout to reduce cache line misses")
reordered the tcp_skb_cb struct and invalidated this IPCB() trick.

This patch fixes the problem by creating a new function,
cipso_v4_optptr(), which locates the CIPSO option inside the IP header
without calling IPCB().  Unfortunately, this isn't as fast as a simple
lookup so some additional tweaks were made to limit the use of this
new function.

Cc: <stable@vger.kernel.org> # 3.18
Reported-by: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: Paul Moore <pmoore@redhat.com>
Tested-by: Casey Schaufler <casey@schaufler-ca.com>
---
 include/net/cipso_ipv4.h     | 25 ++++++++++++++--------
 net/ipv4/cipso_ipv4.c        | 51 ++++++++++++++++++++++++++------------------
 net/netlabel/netlabel_kapi.c | 15 ++++++++-----
 3 files changed, 56 insertions(+), 35 deletions(-)

(limited to 'net')

diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h
index a6fd939f202d..3ebb168b9afc 100644
--- a/include/net/cipso_ipv4.h
+++ b/include/net/cipso_ipv4.h
@@ -120,13 +120,6 @@ extern int cipso_v4_rbm_optfmt;
 extern int cipso_v4_rbm_strictvalid;
 #endif
 
-/*
- * Helper Functions
- */
-
-#define CIPSO_V4_OPTEXIST(x) (IPCB(x)->opt.cipso != 0)
-#define CIPSO_V4_OPTPTR(x) (skb_network_header(x) + IPCB(x)->opt.cipso)
-
 /*
  * DOI List Functions
  */
@@ -190,7 +183,7 @@ static inline int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def,
 
 #ifdef CONFIG_NETLABEL
 void cipso_v4_cache_invalidate(void);
-int cipso_v4_cache_add(const struct sk_buff *skb,
+int cipso_v4_cache_add(const unsigned char *cipso_ptr,
 		       const struct netlbl_lsm_secattr *secattr);
 #else
 static inline void cipso_v4_cache_invalidate(void)
@@ -198,7 +191,7 @@ static inline void cipso_v4_cache_invalidate(void)
 	return;
 }
 
-static inline int cipso_v4_cache_add(const struct sk_buff *skb,
+static inline int cipso_v4_cache_add(const unsigned char *cipso_ptr,
 				     const struct netlbl_lsm_secattr *secattr)
 {
 	return 0;
@@ -211,6 +204,8 @@ static inline int cipso_v4_cache_add(const struct sk_buff *skb,
 
 #ifdef CONFIG_NETLABEL
 void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway);
+int cipso_v4_getattr(const unsigned char *cipso,
+		     struct netlbl_lsm_secattr *secattr);
 int cipso_v4_sock_setattr(struct sock *sk,
 			  const struct cipso_v4_doi *doi_def,
 			  const struct netlbl_lsm_secattr *secattr);
@@ -226,6 +221,7 @@ int cipso_v4_skbuff_setattr(struct sk_buff *skb,
 int cipso_v4_skbuff_delattr(struct sk_buff *skb);
 int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
 			    struct netlbl_lsm_secattr *secattr);
+unsigned char *cipso_v4_optptr(const struct sk_buff *skb);
 int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option);
 #else
 static inline void cipso_v4_error(struct sk_buff *skb,
@@ -235,6 +231,12 @@ static inline void cipso_v4_error(struct sk_buff *skb,
 	return;
 }
 
+static inline int cipso_v4_getattr(const unsigned char *cipso,
+				   struct netlbl_lsm_secattr *secattr)
+{
+	return -ENOSYS;
+}
+
 static inline int cipso_v4_sock_setattr(struct sock *sk,
 				      const struct cipso_v4_doi *doi_def,
 				      const struct netlbl_lsm_secattr *secattr)
@@ -282,6 +284,11 @@ static inline int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
 	return -ENOSYS;
 }
 
+static inline unsigned char *cipso_v4_optptr(const struct sk_buff *skb)
+{
+	return NULL;
+}
+
 static inline int cipso_v4_validate(const struct sk_buff *skb,
 				    unsigned char **option)
 {
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 5160c710f2eb..e361ea6f3fc8 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -378,20 +378,18 @@ static int cipso_v4_cache_check(const unsigned char *key,
  * negative values on failure.
  *
  */
-int cipso_v4_cache_add(const struct sk_buff *skb,
+int cipso_v4_cache_add(const unsigned char *cipso_ptr,
 		       const struct netlbl_lsm_secattr *secattr)
 {
 	int ret_val = -EPERM;
 	u32 bkt;
 	struct cipso_v4_map_cache_entry *entry = NULL;
 	struct cipso_v4_map_cache_entry *old_entry = NULL;
-	unsigned char *cipso_ptr;
 	u32 cipso_ptr_len;
 
 	if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0)
 		return 0;
 
-	cipso_ptr = CIPSO_V4_OPTPTR(skb);
 	cipso_ptr_len = cipso_ptr[1];
 
 	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
@@ -1578,6 +1576,33 @@ static int cipso_v4_parsetag_loc(const struct cipso_v4_doi *doi_def,
 	return 0;
 }
 
+/**
+ * cipso_v4_optptr - Find the CIPSO option in the packet
+ * @skb: the packet
+ *
+ * Description:
+ * Parse the packet's IP header looking for a CIPSO option.  Returns a pointer
+ * to the start of the CIPSO option on success, NULL if one if not found.
+ *
+ */
+unsigned char *cipso_v4_optptr(const struct sk_buff *skb)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+	unsigned char *optptr = (unsigned char *)&(ip_hdr(skb)[1]);
+	int optlen;
+	int taglen;
+
+	for (optlen = iph->ihl*4 - sizeof(struct iphdr); optlen > 0; ) {
+		if (optptr[0] == IPOPT_CIPSO)
+			return optptr;
+		taglen = optptr[1];
+		optlen -= taglen;
+		optptr += taglen;
+	}
+
+	return NULL;
+}
+
 /**
  * cipso_v4_validate - Validate a CIPSO option
  * @option: the start of the option, on error it is set to point to the error
@@ -2119,8 +2144,8 @@ void cipso_v4_req_delattr(struct request_sock *req)
  * on success and negative values on failure.
  *
  */
-static int cipso_v4_getattr(const unsigned char *cipso,
-			    struct netlbl_lsm_secattr *secattr)
+int cipso_v4_getattr(const unsigned char *cipso,
+		     struct netlbl_lsm_secattr *secattr)
 {
 	int ret_val = -ENOMSG;
 	u32 doi;
@@ -2305,22 +2330,6 @@ int cipso_v4_skbuff_delattr(struct sk_buff *skb)
 	return 0;
 }
 
-/**
- * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option
- * @skb: the packet
- * @secattr: the security attributes
- *
- * Description:
- * Parse the given packet's CIPSO option and return the security attributes.
- * Returns zero on success and negative values on failure.
- *
- */
-int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
-			    struct netlbl_lsm_secattr *secattr)
-{
-	return cipso_v4_getattr(CIPSO_V4_OPTPTR(skb), secattr);
-}
-
 /*
  * Setup Functions
  */
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index a845cd4cf21e..28cddc85b700 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -1065,10 +1065,12 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb,
 			  u16 family,
 			  struct netlbl_lsm_secattr *secattr)
 {
+	unsigned char *ptr;
+
 	switch (family) {
 	case AF_INET:
-		if (CIPSO_V4_OPTEXIST(skb) &&
-		    cipso_v4_skbuff_getattr(skb, secattr) == 0)
+		ptr = cipso_v4_optptr(skb);
+		if (ptr && cipso_v4_getattr(ptr, secattr) == 0)
 			return 0;
 		break;
 #if IS_ENABLED(CONFIG_IPV6)
@@ -1094,7 +1096,7 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb,
  */
 void netlbl_skbuff_err(struct sk_buff *skb, int error, int gateway)
 {
-	if (CIPSO_V4_OPTEXIST(skb))
+	if (cipso_v4_optptr(skb))
 		cipso_v4_error(skb, error, gateway);
 }
 
@@ -1126,11 +1128,14 @@ void netlbl_cache_invalidate(void)
 int netlbl_cache_add(const struct sk_buff *skb,
 		     const struct netlbl_lsm_secattr *secattr)
 {
+	unsigned char *ptr;
+
 	if ((secattr->flags & NETLBL_SECATTR_CACHE) == 0)
 		return -ENOMSG;
 
-	if (CIPSO_V4_OPTEXIST(skb))
-		return cipso_v4_cache_add(skb, secattr);
+	ptr = cipso_v4_optptr(skb);
+	if (ptr)
+		return cipso_v4_cache_add(ptr, secattr);
 
 	return -ENOMSG;
 }
-- 
cgit v1.2.3


From 650c5e565492f9092552bfe4d65935196c7d9567 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Wed, 11 Feb 2015 15:26:03 -0800
Subject: mm: page_counter: pull "-1" handling out of page_counter_memparse()

The unified hierarchy interface for memory cgroups will no longer use "-1"
to mean maximum possible resource value.  In preparation for this, make
the string an argument and let the caller supply it.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Cc: Greg Thelen <gthelen@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_counter.h | 3 ++-
 mm/hugetlb_cgroup.c          | 2 +-
 mm/memcontrol.c              | 4 ++--
 mm/page_counter.c            | 7 ++++---
 net/ipv4/tcp_memcontrol.c    | 2 +-
 5 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
index 955421575d16..17fa4f8de3a6 100644
--- a/include/linux/page_counter.h
+++ b/include/linux/page_counter.h
@@ -41,7 +41,8 @@ int page_counter_try_charge(struct page_counter *counter,
 			    struct page_counter **fail);
 void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages);
 int page_counter_limit(struct page_counter *counter, unsigned long limit);
-int page_counter_memparse(const char *buf, unsigned long *nr_pages);
+int page_counter_memparse(const char *buf, const char *max,
+			  unsigned long *nr_pages);
 
 static inline void page_counter_reset_watermark(struct page_counter *counter)
 {
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index 037e1c00a5b7..6e0057439a46 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -279,7 +279,7 @@ static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
 		return -EINVAL;
 
 	buf = strstrip(buf);
-	ret = page_counter_memparse(buf, &nr_pages);
+	ret = page_counter_memparse(buf, "-1", &nr_pages);
 	if (ret)
 		return ret;
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index dc5c4cd0afdf..6453ea5a27aa 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3414,7 +3414,7 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
 	int ret;
 
 	buf = strstrip(buf);
-	ret = page_counter_memparse(buf, &nr_pages);
+	ret = page_counter_memparse(buf, "-1", &nr_pages);
 	if (ret)
 		return ret;
 
@@ -3786,7 +3786,7 @@ static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg,
 	unsigned long usage;
 	int i, size, ret;
 
-	ret = page_counter_memparse(args, &threshold);
+	ret = page_counter_memparse(args, "-1", &threshold);
 	if (ret)
 		return ret;
 
diff --git a/mm/page_counter.c b/mm/page_counter.c
index a009574fbba9..11b4beda14ba 100644
--- a/mm/page_counter.c
+++ b/mm/page_counter.c
@@ -166,18 +166,19 @@ int page_counter_limit(struct page_counter *counter, unsigned long limit)
 /**
  * page_counter_memparse - memparse() for page counter limits
  * @buf: string to parse
+ * @max: string meaning maximum possible value
  * @nr_pages: returns the result in number of pages
  *
  * Returns -EINVAL, or 0 and @nr_pages on success.  @nr_pages will be
  * limited to %PAGE_COUNTER_MAX.
  */
-int page_counter_memparse(const char *buf, unsigned long *nr_pages)
+int page_counter_memparse(const char *buf, const char *max,
+			  unsigned long *nr_pages)
 {
-	char unlimited[] = "-1";
 	char *end;
 	u64 bytes;
 
-	if (!strncmp(buf, unlimited, sizeof(unlimited))) {
+	if (!strcmp(buf, max)) {
 		*nr_pages = PAGE_COUNTER_MAX;
 		return 0;
 	}
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index 272327134a1b..c2a75c6957a1 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -120,7 +120,7 @@ static ssize_t tcp_cgroup_write(struct kernfs_open_file *of,
 	switch (of_cft(of)->private) {
 	case RES_LIMIT:
 		/* see memcontrol.c */
-		ret = page_counter_memparse(buf, &nr_pages);
+		ret = page_counter_memparse(buf, "-1", &nr_pages);
 		if (ret)
 			break;
 		mutex_lock(&tcp_limit_mutex);
-- 
cgit v1.2.3


From 7e339128496284cc21977fba5416166ee81f5172 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Wed, 11 Feb 2015 15:27:26 -0800
Subject: mm: gup: use get_user_pages_unlocked

This allows those get_user_pages calls to pass FAULT_FLAG_ALLOW_RETRY to
the page fault in order to release the mmap_sem during the I/O.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andres Lagar-Cavilla <andreslc@google.com>
Cc: Peter Feiner <pfeiner@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/media/pci/ivtv/ivtv-udma.c | 6 ++----
 drivers/scsi/st.c                  | 7 ++-----
 drivers/video/fbdev/pvr2fb.c       | 6 ++----
 mm/process_vm_access.c             | 7 ++-----
 net/ceph/pagevec.c                 | 6 ++----
 5 files changed, 10 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/drivers/media/pci/ivtv/ivtv-udma.c b/drivers/media/pci/ivtv/ivtv-udma.c
index bee2329e0b2e..24152accc66c 100644
--- a/drivers/media/pci/ivtv/ivtv-udma.c
+++ b/drivers/media/pci/ivtv/ivtv-udma.c
@@ -124,10 +124,8 @@ int ivtv_udma_setup(struct ivtv *itv, unsigned long ivtv_dest_addr,
 	}
 
 	/* Get user pages for DMA Xfer */
-	down_read(&current->mm->mmap_sem);
-	err = get_user_pages(current, current->mm,
-			user_dma.uaddr, user_dma.page_count, 0, 1, dma->map, NULL);
-	up_read(&current->mm->mmap_sem);
+	err = get_user_pages_unlocked(current, current->mm,
+			user_dma.uaddr, user_dma.page_count, 0, 1, dma->map);
 
 	if (user_dma.page_count != err) {
 		IVTV_DEBUG_WARN("failed to map user pages, returned %d instead of %d\n",
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 128d3b55bdd9..9a1c34205254 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -4551,18 +4551,15 @@ static int sgl_map_user_pages(struct st_buffer *STbp,
 		return -ENOMEM;
 
         /* Try to fault in all of the necessary pages */
-	down_read(&current->mm->mmap_sem);
         /* rw==READ means read from drive, write into memory area */
-	res = get_user_pages(
+	res = get_user_pages_unlocked(
 		current,
 		current->mm,
 		uaddr,
 		nr_pages,
 		rw == READ,
 		0, /* don't force */
-		pages,
-		NULL);
-	up_read(&current->mm->mmap_sem);
+		pages);
 
 	/* Errors and no page mapped should return here */
 	if (res < nr_pages)
diff --git a/drivers/video/fbdev/pvr2fb.c b/drivers/video/fbdev/pvr2fb.c
index 7c74f58fc101..0e24eb9c219c 100644
--- a/drivers/video/fbdev/pvr2fb.c
+++ b/drivers/video/fbdev/pvr2fb.c
@@ -686,10 +686,8 @@ static ssize_t pvr2fb_write(struct fb_info *info, const char *buf,
 	if (!pages)
 		return -ENOMEM;
 
-	down_read(&current->mm->mmap_sem);
-	ret = get_user_pages(current, current->mm, (unsigned long)buf,
-			     nr_pages, WRITE, 0, pages, NULL);
-	up_read(&current->mm->mmap_sem);
+	ret = get_user_pages_unlocked(current, current->mm, (unsigned long)buf,
+				      nr_pages, WRITE, 0, pages);
 
 	if (ret < nr_pages) {
 		nr_pages = ret;
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index 5077afcd9e11..b1597690530c 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -99,11 +99,8 @@ static int process_vm_rw_single_vec(unsigned long addr,
 		size_t bytes;
 
 		/* Get the pages we're interested in */
-		down_read(&mm->mmap_sem);
-		pages = get_user_pages(task, mm, pa, pages,
-				      vm_write, 0, process_pages, NULL);
-		up_read(&mm->mmap_sem);
-
+		pages = get_user_pages_unlocked(task, mm, pa, pages,
+						vm_write, 0, process_pages);
 		if (pages <= 0)
 			return -EFAULT;
 
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 555013034f7a..096d91447e06 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -23,17 +23,15 @@ struct page **ceph_get_direct_page_vector(const void __user *data,
 	if (!pages)
 		return ERR_PTR(-ENOMEM);
 
-	down_read(&current->mm->mmap_sem);
 	while (got < num_pages) {
-		rc = get_user_pages(current, current->mm,
+		rc = get_user_pages_unlocked(current, current->mm,
 		    (unsigned long)data + ((unsigned long)got * PAGE_SIZE),
-		    num_pages - got, write_page, 0, pages + got, NULL);
+		    num_pages - got, write_page, 0, pages + got);
 		if (rc < 0)
 			break;
 		BUG_ON(rc == 0);
 		got += rc;
 	}
-	up_read(&current->mm->mmap_sem);
 	if (rc < 0)
 		goto fail;
 	return pages;
-- 
cgit v1.2.3