From cd67cd5eb25ae9a7bafbfd3d52d4c05e1d80af3b Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Fri, 6 Feb 2015 09:44:44 +0200
Subject: ipvs: use 64-bit rates in stats

IPVS stats are limited to 2^(32-10) conns/s and packets/s,
2^(32-5) bytes/s. It is time to use 64 bits:

* Change all conn/packet kernel counters to 64-bit and update
them in u64_stats_update_{begin,end} section

* In kernel use struct ip_vs_kstats instead of the user-space
struct ip_vs_stats_user and use new func ip_vs_export_stats_user
to export it to sockopt users to preserve compatibility with
32-bit values

* Rename cpu counters "ustats" to "cnt"

* To netlink users provide additionally 64-bit stats:
IPVS_SVC_ATTR_STATS64 and IPVS_DEST_ATTR_STATS64. Old stats
remain for old binaries.

* We can use ip_vs_copy_stats in ip_vs_stats_percpu_show

Thanks to Chris Caputo for providing initial patch for ip_vs_est.c

Signed-off-by: Chris Caputo <ccaputo@alt.net>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h        | 50 ++++++++++++++++++++++++++++++----------------
 include/uapi/linux/ip_vs.h |  7 ++++++-
 2 files changed, 39 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 615b20b58545..a627fe690c19 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -365,15 +365,15 @@ struct ip_vs_seq {
 
 /* counters per cpu */
 struct ip_vs_counters {
-	__u32		conns;		/* connections scheduled */
-	__u32		inpkts;		/* incoming packets */
-	__u32		outpkts;	/* outgoing packets */
+	__u64		conns;		/* connections scheduled */
+	__u64		inpkts;		/* incoming packets */
+	__u64		outpkts;	/* outgoing packets */
 	__u64		inbytes;	/* incoming bytes */
 	__u64		outbytes;	/* outgoing bytes */
 };
 /* Stats per cpu */
 struct ip_vs_cpu_stats {
-	struct ip_vs_counters   ustats;
+	struct ip_vs_counters   cnt;
 	struct u64_stats_sync   syncp;
 };
 
@@ -383,23 +383,40 @@ struct ip_vs_estimator {
 
 	u64			last_inbytes;
 	u64			last_outbytes;
-	u32			last_conns;
-	u32			last_inpkts;
-	u32			last_outpkts;
-
-	u32			cps;
-	u32			inpps;
-	u32			outpps;
-	u32			inbps;
-	u32			outbps;
+	u64			last_conns;
+	u64			last_inpkts;
+	u64			last_outpkts;
+
+	u64			cps;
+	u64			inpps;
+	u64			outpps;
+	u64			inbps;
+	u64			outbps;
+};
+
+/*
+ * IPVS statistics object, 64-bit kernel version of struct ip_vs_stats_user
+ */
+struct ip_vs_kstats {
+	u64			conns;		/* connections scheduled */
+	u64			inpkts;		/* incoming packets */
+	u64			outpkts;	/* outgoing packets */
+	u64			inbytes;	/* incoming bytes */
+	u64			outbytes;	/* outgoing bytes */
+
+	u64			cps;		/* current connection rate */
+	u64			inpps;		/* current in packet rate */
+	u64			outpps;		/* current out packet rate */
+	u64			inbps;		/* current in byte rate */
+	u64			outbps;		/* current out byte rate */
 };
 
 struct ip_vs_stats {
-	struct ip_vs_stats_user	ustats;		/* statistics */
+	struct ip_vs_kstats	kstats;		/* kernel statistics */
 	struct ip_vs_estimator	est;		/* estimator */
 	struct ip_vs_cpu_stats __percpu	*cpustats;	/* per cpu counters */
 	spinlock_t		lock;		/* spin lock */
-	struct ip_vs_stats_user	ustats0;	/* reset values */
+	struct ip_vs_kstats	kstats0;	/* reset values */
 };
 
 struct dst_entry;
@@ -1388,8 +1405,7 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts);
 void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats);
 void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats);
 void ip_vs_zero_estimator(struct ip_vs_stats *stats);
-void ip_vs_read_estimator(struct ip_vs_stats_user *dst,
-			  struct ip_vs_stats *stats);
+void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats);
 
 /* Various IPVS packet transmitters (from ip_vs_xmit.c) */
 int ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
diff --git a/include/uapi/linux/ip_vs.h b/include/uapi/linux/ip_vs.h
index cabe95d5b461..3199243f2028 100644
--- a/include/uapi/linux/ip_vs.h
+++ b/include/uapi/linux/ip_vs.h
@@ -358,6 +358,8 @@ enum {
 
 	IPVS_SVC_ATTR_PE_NAME,		/* name of ct retriever */
 
+	IPVS_SVC_ATTR_STATS64,		/* nested attribute for service stats */
+
 	__IPVS_SVC_ATTR_MAX,
 };
 
@@ -387,6 +389,8 @@ enum {
 
 	IPVS_DEST_ATTR_ADDR_FAMILY,	/* Address family of address */
 
+	IPVS_DEST_ATTR_STATS64,		/* nested attribute for dest stats */
+
 	__IPVS_DEST_ATTR_MAX,
 };
 
@@ -410,7 +414,8 @@ enum {
 /*
  * Attributes used to describe service or destination entry statistics
  *
- * Used inside nested attributes IPVS_SVC_ATTR_STATS and IPVS_DEST_ATTR_STATS
+ * Used inside nested attributes IPVS_SVC_ATTR_STATS, IPVS_DEST_ATTR_STATS,
+ * IPVS_SVC_ATTR_STATS64 and IPVS_DEST_ATTR_STATS64.
  */
 enum {
 	IPVS_STATS_ATTR_UNSPEC = 0,
-- 
cgit v1.2.3


From 306f7aa1807be7588f115d7cafa475f65e72e3d1 Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Wed, 11 Feb 2015 14:39:15 +0100
Subject: ieee802154: correct ieee802154_is_valid_psdu_len

This patch corrects the ieee802154_is_valid_psdu_len function that this
function also checks on reserved values 6-8 for validation the psdu
length.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/linux/ieee802154.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h
index 6e82d888287c..40b0ab953937 100644
--- a/include/linux/ieee802154.h
+++ b/include/linux/ieee802154.h
@@ -28,7 +28,8 @@
 #include <asm/byteorder.h>
 
 #define IEEE802154_MTU			127
-#define IEEE802154_MIN_PSDU_LEN		5
+#define IEEE802154_ACK_PSDU_LEN		5
+#define IEEE802154_MIN_PSDU_LEN		9
 
 #define IEEE802154_PAN_ID_BROADCAST	0xffff
 #define IEEE802154_ADDR_SHORT_BROADCAST	0xffff
@@ -204,11 +205,18 @@ enum {
 
 /**
  * ieee802154_is_valid_psdu_len - check if psdu len is valid
+ * available lengths:
+ *	0-4	Reserved
+ *	5	MPDU (Acknowledgment)
+ *	6-8	Reserved
+ *	9-127	MPDU
+ *
  * @len: psdu len with (MHR + payload + MFR)
  */
 static inline bool ieee802154_is_valid_psdu_len(const u8 len)
 {
-	return (len >= IEEE802154_MIN_PSDU_LEN && len <= IEEE802154_MTU);
+	return (len == IEEE802154_ACK_PSDU_LEN ||
+		(len >= IEEE802154_MIN_PSDU_LEN && len <= IEEE802154_MTU));
 }
 
 /**
-- 
cgit v1.2.3


From ba5bf17e8343c0b5b87a1240aa75c35c76b88e5e Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Wed, 11 Feb 2015 14:39:16 +0100
Subject: ieee802154: cleanup ieee802154_be64_to_le64

This patch cleanups the ieee802154_be64_to_le64 function. This patch
removes an unnecessary temporary variable.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/mac802154.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/mac802154.h b/include/net/mac802154.h
index 850647811749..c5c64455bcfa 100644
--- a/include/net/mac802154.h
+++ b/include/net/mac802154.h
@@ -19,6 +19,7 @@
 #include <net/af_ieee802154.h>
 #include <linux/ieee802154.h>
 #include <linux/skbuff.h>
+#include <linux/unaligned/memmove.h>
 
 #include <net/cfg802154.h>
 
@@ -233,9 +234,7 @@ struct ieee802154_ops {
  */
 static inline void ieee802154_be64_to_le64(void *le64_dst, const void *be64_src)
 {
-	__le64 tmp = (__force __le64)swab64p(be64_src);
-
-	memcpy(le64_dst, &tmp, IEEE802154_EXTENDED_ADDR_LEN);
+	__put_unaligned_memmove64(swab64p(be64_src), le64_dst);
 }
 
 /**
-- 
cgit v1.2.3


From b976796950c7a41fe1b6b51236ddd08dd6480b80 Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Wed, 11 Feb 2015 14:39:17 +0100
Subject: ieee802154: cleanup ieee802154_le64_to_be64

This patch cleanups the ieee802154_le64_to_be64 function. This patch
removes an unnecessary temporary variable.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/mac802154.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/mac802154.h b/include/net/mac802154.h
index c5c64455bcfa..fb4e8a3d6229 100644
--- a/include/net/mac802154.h
+++ b/include/net/mac802154.h
@@ -244,9 +244,7 @@ static inline void ieee802154_be64_to_le64(void *le64_dst, const void *be64_src)
  */
 static inline void ieee802154_le64_to_be64(void *be64_dst, const void *le64_src)
 {
-	__be64 tmp = (__force __be64)swab64p(le64_src);
-
-	memcpy(be64_dst, &tmp, IEEE802154_EXTENDED_ADDR_LEN);
+	__put_unaligned_memmove64(swab64p(le64_src), be64_dst);
 }
 
 /* Basic interface to register ieee802154 hwice */
-- 
cgit v1.2.3


From a44fecbd52a4d9c36f07eb2161c153047d8765d4 Mon Sep 17 00:00:00 2001
From: Tedd Ho-Jeong An <tedd.an@intel.com>
Date: Fri, 13 Feb 2015 09:20:50 -0800
Subject: Bluetooth: Add shutdown callback before closing the device

This callback allows a vendor to send the vendor specific commands
before cloing the hci interface.

Signed-off-by: Tedd Ho-Jeong An <tedd.an@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 1 +
 net/bluetooth/hci_core.c         | 6 ++++++
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 52863c3e0b13..5f1ca3359c1a 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -373,6 +373,7 @@ struct hci_dev {
 	int (*close)(struct hci_dev *hdev);
 	int (*flush)(struct hci_dev *hdev);
 	int (*setup)(struct hci_dev *hdev);
+	int (*shutdown)(struct hci_dev *hdev);
 	int (*send)(struct hci_dev *hdev, struct sk_buff *skb);
 	void (*notify)(struct hci_dev *hdev, unsigned int evt);
 	void (*hw_error)(struct hci_dev *hdev, u8 code);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 3322d3f4c85a..4135a4406aed 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1591,6 +1591,12 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 {
 	BT_DBG("%s %p", hdev->name, hdev);
 
+	if (!test_bit(HCI_UNREGISTER, &hdev->dev_flags)) {
+		/* Execute vendor specific shutdown routine */
+		if (hdev->shutdown)
+			hdev->shutdown(hdev);
+	}
+
 	cancel_delayed_work(&hdev->power_off);
 
 	hci_req_cancel(hdev, ENODEV);
-- 
cgit v1.2.3


From fba7ecf09bc458b15f9d578e4213c8c349f9592d Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Wed, 18 Feb 2015 14:53:55 +0200
Subject: Bluetooth: Convert hci_cb_list_lock to a mutex

We'll soon need to be able to sleep inside the loops that iterate the
hci_cb list, so neither a spinlock, rwlock or rcu are usable. This patch
changes the lock to a mutex which permits sleeping while holding the
lock.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 18 +++++++++---------
 net/bluetooth/hci_core.c         | 10 +++++-----
 2 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 5f1ca3359c1a..137a18226975 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -499,7 +499,7 @@ struct hci_conn_params {
 extern struct list_head hci_dev_list;
 extern struct list_head hci_cb_list;
 extern rwlock_t hci_dev_list_lock;
-extern rwlock_t hci_cb_list_lock;
+extern struct mutex hci_cb_list_lock;
 
 /* ----- HCI interface to upper protocols ----- */
 int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr);
@@ -1160,12 +1160,12 @@ static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status)
 
 	encrypt = test_bit(HCI_CONN_ENCRYPT, &conn->flags) ? 0x01 : 0x00;
 
-	read_lock(&hci_cb_list_lock);
+	mutex_lock(&hci_cb_list_lock);
 	list_for_each_entry(cb, &hci_cb_list, list) {
 		if (cb->security_cfm)
 			cb->security_cfm(conn, status, encrypt);
 	}
-	read_unlock(&hci_cb_list_lock);
+	mutex_unlock(&hci_cb_list_lock);
 }
 
 static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status,
@@ -1181,24 +1181,24 @@ static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status,
 
 	hci_proto_encrypt_cfm(conn, status, encrypt);
 
-	read_lock(&hci_cb_list_lock);
+	mutex_lock(&hci_cb_list_lock);
 	list_for_each_entry(cb, &hci_cb_list, list) {
 		if (cb->security_cfm)
 			cb->security_cfm(conn, status, encrypt);
 	}
-	read_unlock(&hci_cb_list_lock);
+	mutex_unlock(&hci_cb_list_lock);
 }
 
 static inline void hci_key_change_cfm(struct hci_conn *conn, __u8 status)
 {
 	struct hci_cb *cb;
 
-	read_lock(&hci_cb_list_lock);
+	mutex_lock(&hci_cb_list_lock);
 	list_for_each_entry(cb, &hci_cb_list, list) {
 		if (cb->key_change_cfm)
 			cb->key_change_cfm(conn, status);
 	}
-	read_unlock(&hci_cb_list_lock);
+	mutex_unlock(&hci_cb_list_lock);
 }
 
 static inline void hci_role_switch_cfm(struct hci_conn *conn, __u8 status,
@@ -1206,12 +1206,12 @@ static inline void hci_role_switch_cfm(struct hci_conn *conn, __u8 status,
 {
 	struct hci_cb *cb;
 
-	read_lock(&hci_cb_list_lock);
+	mutex_lock(&hci_cb_list_lock);
 	list_for_each_entry(cb, &hci_cb_list, list) {
 		if (cb->role_switch_cfm)
 			cb->role_switch_cfm(conn, status, role);
 	}
-	read_unlock(&hci_cb_list_lock);
+	mutex_unlock(&hci_cb_list_lock);
 }
 
 static inline bool eir_has_data_type(u8 *data, size_t data_len, u8 type)
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 91f557b0318a..dbd26bcb9210 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -51,7 +51,7 @@ DEFINE_RWLOCK(hci_dev_list_lock);
 
 /* HCI callback list */
 LIST_HEAD(hci_cb_list);
-DEFINE_RWLOCK(hci_cb_list_lock);
+DEFINE_MUTEX(hci_cb_list_lock);
 
 /* HCI ID Numbering */
 static DEFINE_IDA(hci_index_ida);
@@ -3464,9 +3464,9 @@ int hci_register_cb(struct hci_cb *cb)
 {
 	BT_DBG("%p name %s", cb, cb->name);
 
-	write_lock(&hci_cb_list_lock);
+	mutex_lock(&hci_cb_list_lock);
 	list_add_tail(&cb->list, &hci_cb_list);
-	write_unlock(&hci_cb_list_lock);
+	mutex_unlock(&hci_cb_list_lock);
 
 	return 0;
 }
@@ -3476,9 +3476,9 @@ int hci_unregister_cb(struct hci_cb *cb)
 {
 	BT_DBG("%p name %s", cb, cb->name);
 
-	write_lock(&hci_cb_list_lock);
+	mutex_lock(&hci_cb_list_lock);
 	list_del(&cb->list);
-	write_unlock(&hci_cb_list_lock);
+	mutex_unlock(&hci_cb_list_lock);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 354fe804edb29625eee6dd7b1f3c72b43392704d Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Wed, 18 Feb 2015 14:53:56 +0200
Subject: Bluetooth: Convert L2CAP security callback to use hci_cb

There's no reason to have the custom hci_proto_auth/encrypt_cfm helpers
when the hci_cb list works equally well. This patch adds L2CAP to the
hci_cb list and makes l2cap_security_cfm a private function of
l2cap_core.c.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 40 ++++++----------------------------------
 net/bluetooth/l2cap_core.c       | 14 ++++++++++----
 2 files changed, 16 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 137a18226975..4a2db5e62699 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -506,7 +506,6 @@ int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr);
 void l2cap_connect_cfm(struct hci_conn *hcon, u8 status);
 int l2cap_disconn_ind(struct hci_conn *hcon);
 void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason);
-int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt);
 int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags);
 
 int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags);
@@ -1107,35 +1106,6 @@ static inline void hci_proto_disconn_cfm(struct hci_conn *conn, __u8 reason)
 		conn->disconn_cfm_cb(conn, reason);
 }
 
-static inline void hci_proto_auth_cfm(struct hci_conn *conn, __u8 status)
-{
-	__u8 encrypt;
-
-	if (conn->type != ACL_LINK && conn->type != LE_LINK)
-		return;
-
-	if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags))
-		return;
-
-	encrypt = test_bit(HCI_CONN_ENCRYPT, &conn->flags) ? 0x01 : 0x00;
-	l2cap_security_cfm(conn, status, encrypt);
-
-	if (conn->security_cfm_cb)
-		conn->security_cfm_cb(conn, status);
-}
-
-static inline void hci_proto_encrypt_cfm(struct hci_conn *conn, __u8 status,
-								__u8 encrypt)
-{
-	if (conn->type != ACL_LINK && conn->type != LE_LINK)
-		return;
-
-	l2cap_security_cfm(conn, status, encrypt);
-
-	if (conn->security_cfm_cb)
-		conn->security_cfm_cb(conn, status);
-}
-
 /* ----- HCI callbacks ----- */
 struct hci_cb {
 	struct list_head list;
@@ -1153,8 +1123,6 @@ static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status)
 	struct hci_cb *cb;
 	__u8 encrypt;
 
-	hci_proto_auth_cfm(conn, status);
-
 	if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags))
 		return;
 
@@ -1166,6 +1134,9 @@ static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status)
 			cb->security_cfm(conn, status, encrypt);
 	}
 	mutex_unlock(&hci_cb_list_lock);
+
+	if (conn->security_cfm_cb)
+		conn->security_cfm_cb(conn, status);
 }
 
 static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status,
@@ -1179,14 +1150,15 @@ static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status,
 	if (conn->pending_sec_level > conn->sec_level)
 		conn->sec_level = conn->pending_sec_level;
 
-	hci_proto_encrypt_cfm(conn, status, encrypt);
-
 	mutex_lock(&hci_cb_list_lock);
 	list_for_each_entry(cb, &hci_cb_list, list) {
 		if (cb->security_cfm)
 			cb->security_cfm(conn, status, encrypt);
 	}
 	mutex_unlock(&hci_cb_list_lock);
+
+	if (conn->security_cfm_cb)
+		conn->security_cfm_cb(conn, status);
 }
 
 static inline void hci_key_change_cfm(struct hci_conn *conn, __u8 status)
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index ec6f78e481dc..424fcc5c4980 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -7345,13 +7345,13 @@ static inline void l2cap_check_encryption(struct l2cap_chan *chan, u8 encrypt)
 	}
 }
 
-int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
+static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
 {
 	struct l2cap_conn *conn = hcon->l2cap_data;
 	struct l2cap_chan *chan;
 
 	if (!conn)
-		return 0;
+		return;
 
 	BT_DBG("conn %p status 0x%2.2x encrypt %u", conn, status, encrypt);
 
@@ -7434,8 +7434,6 @@ int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
 	}
 
 	mutex_unlock(&conn->chan_lock);
-
-	return 0;
 }
 
 int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
@@ -7543,6 +7541,11 @@ drop:
 	return 0;
 }
 
+static struct hci_cb l2cap_cb = {
+	.name		= "L2CAP",
+	.security_cfm	= l2cap_security_cfm,
+};
+
 static int l2cap_debugfs_show(struct seq_file *f, void *p)
 {
 	struct l2cap_chan *c;
@@ -7584,6 +7587,8 @@ int __init l2cap_init(void)
 	if (err < 0)
 		return err;
 
+	hci_register_cb(&l2cap_cb);
+
 	if (IS_ERR_OR_NULL(bt_debugfs))
 		return 0;
 
@@ -7601,6 +7606,7 @@ int __init l2cap_init(void)
 void l2cap_exit(void)
 {
 	debugfs_remove(l2cap_debugfs);
+	hci_unregister_cb(&l2cap_cb);
 	l2cap_cleanup_sockets();
 }
 
-- 
cgit v1.2.3


From 539c496d88f7f96d42abde4e9d901c8f8167d615 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Wed, 18 Feb 2015 14:53:57 +0200
Subject: Bluetooth: Convert connect_cfm to be triggered through hci_cb

This patch moves all the connect_cfm callbacks to be based on the hci_cb
list. This means making l2cap_connect_cfm private to l2cap_core.c and
sco_connect_cb private to sco.c respectively. Since the hci_conn type
filtering isn't done any more on the wrapper level the callbacks
themselves need to check that they were passed a relevant type of
connection.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 40 ++++++++++++++++------------------------
 net/bluetooth/hci_conn.c         |  4 ++--
 net/bluetooth/hci_event.c        | 36 ++++++++++++++++++------------------
 net/bluetooth/l2cap_core.c       |  6 +++++-
 net/bluetooth/sco.c              | 15 ++++++++++++++-
 5 files changed, 55 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 4a2db5e62699..0f00f0e9f257 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -503,13 +503,11 @@ extern struct mutex hci_cb_list_lock;
 
 /* ----- HCI interface to upper protocols ----- */
 int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr);
-void l2cap_connect_cfm(struct hci_conn *hcon, u8 status);
 int l2cap_disconn_ind(struct hci_conn *hcon);
 void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason);
 int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags);
 
 int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags);
-void sco_connect_cfm(struct hci_conn *hcon, __u8 status);
 void sco_disconn_cfm(struct hci_conn *hcon, __u8 reason);
 int sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb);
 
@@ -1050,28 +1048,6 @@ static inline int hci_proto_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr,
 	}
 }
 
-static inline void hci_proto_connect_cfm(struct hci_conn *conn, __u8 status)
-{
-	switch (conn->type) {
-	case ACL_LINK:
-	case LE_LINK:
-		l2cap_connect_cfm(conn, status);
-		break;
-
-	case SCO_LINK:
-	case ESCO_LINK:
-		sco_connect_cfm(conn, status);
-		break;
-
-	default:
-		BT_ERR("unknown link type %d", conn->type);
-		break;
-	}
-
-	if (conn->connect_cfm_cb)
-		conn->connect_cfm_cb(conn, status);
-}
-
 static inline int hci_proto_disconn_ind(struct hci_conn *conn)
 {
 	if (conn->type != ACL_LINK && conn->type != LE_LINK)
@@ -1112,12 +1088,28 @@ struct hci_cb {
 
 	char *name;
 
+	void (*connect_cfm)	(struct hci_conn *conn, __u8 status);
 	void (*security_cfm)	(struct hci_conn *conn, __u8 status,
 								__u8 encrypt);
 	void (*key_change_cfm)	(struct hci_conn *conn, __u8 status);
 	void (*role_switch_cfm)	(struct hci_conn *conn, __u8 status, __u8 role);
 };
 
+static inline void hci_connect_cfm(struct hci_conn *conn, __u8 status)
+{
+	struct hci_cb *cb;
+
+	mutex_lock(&hci_cb_list_lock);
+	list_for_each_entry(cb, &hci_cb_list, list) {
+		if (cb->connect_cfm)
+			cb->connect_cfm(conn, status);
+	}
+	mutex_unlock(&hci_cb_list_lock);
+
+	if (conn->connect_cfm_cb)
+		conn->connect_cfm_cb(conn, status);
+}
+
 static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status)
 {
 	struct hci_cb *cb;
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index e3263b61bcf3..e9206734e024 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -309,7 +309,7 @@ void hci_sco_setup(struct hci_conn *conn, __u8 status)
 		else
 			hci_add_sco(sco, conn->handle);
 	} else {
-		hci_proto_connect_cfm(sco, status);
+		hci_connect_cfm(sco, status);
 		hci_conn_del(sco);
 	}
 }
@@ -618,7 +618,7 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status)
 	mgmt_connect_failed(hdev, &conn->dst, conn->type, conn->dst_type,
 			    status);
 
-	hci_proto_connect_cfm(conn, status);
+	hci_connect_cfm(conn, status);
 
 	hci_conn_del(conn);
 
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index a3fb094822b6..0b599129c64c 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1537,7 +1537,7 @@ static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status)
 		if (conn && conn->state == BT_CONNECT) {
 			if (status != 0x0c || conn->attempt > 2) {
 				conn->state = BT_CLOSED;
-				hci_proto_connect_cfm(conn, status);
+				hci_connect_cfm(conn, status);
 				hci_conn_del(conn);
 			} else
 				conn->state = BT_CONNECT2;
@@ -1581,7 +1581,7 @@ static void hci_cs_add_sco(struct hci_dev *hdev, __u8 status)
 		if (sco) {
 			sco->state = BT_CLOSED;
 
-			hci_proto_connect_cfm(sco, status);
+			hci_connect_cfm(sco, status);
 			hci_conn_del(sco);
 		}
 	}
@@ -1608,7 +1608,7 @@ static void hci_cs_auth_requested(struct hci_dev *hdev, __u8 status)
 	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle));
 	if (conn) {
 		if (conn->state == BT_CONFIG) {
-			hci_proto_connect_cfm(conn, status);
+			hci_connect_cfm(conn, status);
 			hci_conn_drop(conn);
 		}
 	}
@@ -1635,7 +1635,7 @@ static void hci_cs_set_conn_encrypt(struct hci_dev *hdev, __u8 status)
 	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle));
 	if (conn) {
 		if (conn->state == BT_CONFIG) {
-			hci_proto_connect_cfm(conn, status);
+			hci_connect_cfm(conn, status);
 			hci_conn_drop(conn);
 		}
 	}
@@ -1811,7 +1811,7 @@ static void hci_cs_read_remote_features(struct hci_dev *hdev, __u8 status)
 	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle));
 	if (conn) {
 		if (conn->state == BT_CONFIG) {
-			hci_proto_connect_cfm(conn, status);
+			hci_connect_cfm(conn, status);
 			hci_conn_drop(conn);
 		}
 	}
@@ -1838,7 +1838,7 @@ static void hci_cs_read_remote_ext_features(struct hci_dev *hdev, __u8 status)
 	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle));
 	if (conn) {
 		if (conn->state == BT_CONFIG) {
-			hci_proto_connect_cfm(conn, status);
+			hci_connect_cfm(conn, status);
 			hci_conn_drop(conn);
 		}
 	}
@@ -1873,7 +1873,7 @@ static void hci_cs_setup_sync_conn(struct hci_dev *hdev, __u8 status)
 		if (sco) {
 			sco->state = BT_CLOSED;
 
-			hci_proto_connect_cfm(sco, status);
+			hci_connect_cfm(sco, status);
 			hci_conn_del(sco);
 		}
 	}
@@ -2255,10 +2255,10 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_sco_setup(conn, ev->status);
 
 	if (ev->status) {
-		hci_proto_connect_cfm(conn, ev->status);
+		hci_connect_cfm(conn, ev->status);
 		hci_conn_del(conn);
 	} else if (ev->link_type != ACL_LINK)
-		hci_proto_connect_cfm(conn, ev->status);
+		hci_connect_cfm(conn, ev->status);
 
 unlock:
 	hci_dev_unlock(hdev);
@@ -2366,7 +2366,7 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
 			     &cp);
 	} else {
 		conn->state = BT_CONNECT2;
-		hci_proto_connect_cfm(conn, 0);
+		hci_connect_cfm(conn, 0);
 	}
 }
 
@@ -2501,7 +2501,7 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 				     &cp);
 		} else {
 			conn->state = BT_CONNECTED;
-			hci_proto_connect_cfm(conn, ev->status);
+			hci_connect_cfm(conn, ev->status);
 			hci_conn_drop(conn);
 		}
 	} else {
@@ -2629,12 +2629,12 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		if (test_bit(HCI_SC_ONLY, &hdev->dev_flags) &&
 		    (!test_bit(HCI_CONN_AES_CCM, &conn->flags) ||
 		     conn->key_type != HCI_LK_AUTH_COMBINATION_P256)) {
-			hci_proto_connect_cfm(conn, HCI_ERROR_AUTH_FAILURE);
+			hci_connect_cfm(conn, HCI_ERROR_AUTH_FAILURE);
 			hci_conn_drop(conn);
 			goto unlock;
 		}
 
-		hci_proto_connect_cfm(conn, ev->status);
+		hci_connect_cfm(conn, ev->status);
 		hci_conn_drop(conn);
 	} else
 		hci_encrypt_cfm(conn, ev->status, ev->encrypt);
@@ -2707,7 +2707,7 @@ static void hci_remote_features_evt(struct hci_dev *hdev,
 
 	if (!hci_outgoing_auth_needed(hdev, conn)) {
 		conn->state = BT_CONNECTED;
-		hci_proto_connect_cfm(conn, ev->status);
+		hci_connect_cfm(conn, ev->status);
 		hci_conn_drop(conn);
 	}
 
@@ -3679,7 +3679,7 @@ static void hci_remote_ext_features_evt(struct hci_dev *hdev,
 
 	if (!hci_outgoing_auth_needed(hdev, conn)) {
 		conn->state = BT_CONNECTED;
-		hci_proto_connect_cfm(conn, ev->status);
+		hci_connect_cfm(conn, ev->status);
 		hci_conn_drop(conn);
 	}
 
@@ -3738,7 +3738,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev,
 		break;
 	}
 
-	hci_proto_connect_cfm(conn, ev->status);
+	hci_connect_cfm(conn, ev->status);
 	if (ev->status)
 		hci_conn_del(conn);
 
@@ -3849,7 +3849,7 @@ static void hci_key_refresh_complete_evt(struct hci_dev *hdev,
 		if (!ev->status)
 			conn->state = BT_CONNECTED;
 
-		hci_proto_connect_cfm(conn, ev->status);
+		hci_connect_cfm(conn, ev->status);
 		hci_conn_drop(conn);
 	} else {
 		hci_auth_cfm(conn, ev->status);
@@ -4512,7 +4512,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 	hci_debugfs_create_conn(conn);
 	hci_conn_add_sysfs(conn);
 
-	hci_proto_connect_cfm(conn, ev->status);
+	hci_connect_cfm(conn, ev->status);
 
 	params = hci_pend_le_action_lookup(&hdev->pend_le_conns, &conn->dst,
 					   conn->dst_type);
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 424fcc5c4980..6e2c3bdda7d3 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -7252,13 +7252,16 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c,
 	return NULL;
 }
 
-void l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
+static void l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
 {
 	struct hci_dev *hdev = hcon->hdev;
 	struct l2cap_conn *conn;
 	struct l2cap_chan *pchan;
 	u8 dst_type;
 
+	if (hcon->type != ACL_LINK && hcon->type != LE_LINK)
+		return;
+
 	BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status);
 
 	if (status) {
@@ -7543,6 +7546,7 @@ drop:
 
 static struct hci_cb l2cap_cb = {
 	.name		= "L2CAP",
+	.connect_cfm	= l2cap_connect_cfm,
 	.security_cfm	= l2cap_security_cfm,
 };
 
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 76321b546e84..3c2e36f94b65 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -1083,9 +1083,13 @@ int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
 	return lm;
 }
 
-void sco_connect_cfm(struct hci_conn *hcon, __u8 status)
+static void sco_connect_cfm(struct hci_conn *hcon, __u8 status)
 {
+	if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK)
+		return;
+
 	BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status);
+
 	if (!status) {
 		struct sco_conn *conn;
 
@@ -1122,6 +1126,11 @@ drop:
 	return 0;
 }
 
+static struct hci_cb sco_cb = {
+	.name		= "SCO",
+	.connect_cfm	= sco_connect_cfm,
+};
+
 static int sco_debugfs_show(struct seq_file *f, void *p)
 {
 	struct sock *sk;
@@ -1203,6 +1212,8 @@ int __init sco_init(void)
 
 	BT_INFO("SCO socket layer initialized");
 
+	hci_register_cb(&sco_cb);
+
 	if (IS_ERR_OR_NULL(bt_debugfs))
 		return 0;
 
@@ -1222,6 +1233,8 @@ void __exit sco_exit(void)
 
 	debugfs_remove(sco_debugfs);
 
+	hci_unregister_cb(&sco_cb);
+
 	bt_sock_unregister(BTPROTO_SCO);
 
 	proto_unregister(&sco_proto);
-- 
cgit v1.2.3


From 3a6d576be9fe02b0c3ffa89ef6eac048e14eec84 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Wed, 18 Feb 2015 14:53:58 +0200
Subject: Bluetooth: Convert disconn_cfm to be triggered through hci_cb

This patch moves all the disconn_cfm callbacks to be based on the hci_cb
list. This means making l2cap_disconn_cfm private to l2cap_core.c and
sco_conn_cb private to sco.c respectively. Since the hci_conn type
filtering isn't done any more on the wrapper level the callbacks
themselves need to check that they were passed a relevant type of
connection.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 44 +++++++++++++++-------------------------
 net/bluetooth/hci_conn.c         |  2 +-
 net/bluetooth/hci_event.c        |  2 +-
 net/bluetooth/l2cap_core.c       |  6 +++++-
 net/bluetooth/sco.c              |  6 +++++-
 5 files changed, 28 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 0f00f0e9f257..a7bf77384464 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -504,11 +504,9 @@ extern struct mutex hci_cb_list_lock;
 /* ----- HCI interface to upper protocols ----- */
 int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr);
 int l2cap_disconn_ind(struct hci_conn *hcon);
-void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason);
 int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags);
 
 int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags);
-void sco_disconn_cfm(struct hci_conn *hcon, __u8 reason);
 int sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb);
 
 /* ----- Inquiry cache ----- */
@@ -1056,32 +1054,6 @@ static inline int hci_proto_disconn_ind(struct hci_conn *conn)
 	return l2cap_disconn_ind(conn);
 }
 
-static inline void hci_proto_disconn_cfm(struct hci_conn *conn, __u8 reason)
-{
-	switch (conn->type) {
-	case ACL_LINK:
-	case LE_LINK:
-		l2cap_disconn_cfm(conn, reason);
-		break;
-
-	case SCO_LINK:
-	case ESCO_LINK:
-		sco_disconn_cfm(conn, reason);
-		break;
-
-	/* L2CAP would be handled for BREDR chan */
-	case AMP_LINK:
-		break;
-
-	default:
-		BT_ERR("unknown link type %d", conn->type);
-		break;
-	}
-
-	if (conn->disconn_cfm_cb)
-		conn->disconn_cfm_cb(conn, reason);
-}
-
 /* ----- HCI callbacks ----- */
 struct hci_cb {
 	struct list_head list;
@@ -1089,6 +1061,7 @@ struct hci_cb {
 	char *name;
 
 	void (*connect_cfm)	(struct hci_conn *conn, __u8 status);
+	void (*disconn_cfm)	(struct hci_conn *conn, __u8 status);
 	void (*security_cfm)	(struct hci_conn *conn, __u8 status,
 								__u8 encrypt);
 	void (*key_change_cfm)	(struct hci_conn *conn, __u8 status);
@@ -1110,6 +1083,21 @@ static inline void hci_connect_cfm(struct hci_conn *conn, __u8 status)
 		conn->connect_cfm_cb(conn, status);
 }
 
+static inline void hci_disconn_cfm(struct hci_conn *conn, __u8 reason)
+{
+	struct hci_cb *cb;
+
+	mutex_lock(&hci_cb_list_lock);
+	list_for_each_entry(cb, &hci_cb_list, list) {
+		if (cb->disconn_cfm)
+			cb->disconn_cfm(conn, reason);
+	}
+	mutex_unlock(&hci_cb_list_lock);
+
+	if (conn->disconn_cfm_cb)
+		conn->disconn_cfm_cb(conn, reason);
+}
+
 static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status)
 {
 	struct hci_cb *cb;
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index e9206734e024..91ebb9cb31de 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -1151,7 +1151,7 @@ void hci_conn_hash_flush(struct hci_dev *hdev)
 	list_for_each_entry_safe(c, n, &h->list, list) {
 		c->state = BT_CLOSED;
 
-		hci_proto_disconn_cfm(c, HCI_ERROR_LOCAL_HOST_TERM);
+		hci_disconn_cfm(c, HCI_ERROR_LOCAL_HOST_TERM);
 		hci_conn_del(c);
 	}
 }
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 0b599129c64c..e9b17b585ee8 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -2444,7 +2444,7 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 
 	type = conn->type;
 
-	hci_proto_disconn_cfm(conn, ev->reason);
+	hci_disconn_cfm(conn, ev->reason);
 	hci_conn_del(conn);
 
 	/* Re-enable advertising if necessary, since it might
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 6e2c3bdda7d3..91c682846bcf 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -7324,8 +7324,11 @@ int l2cap_disconn_ind(struct hci_conn *hcon)
 	return conn->disc_reason;
 }
 
-void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason)
+static void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason)
 {
+	if (hcon->type != ACL_LINK && hcon->type != LE_LINK)
+		return;
+
 	BT_DBG("hcon %p reason %d", hcon, reason);
 
 	l2cap_conn_del(hcon, bt_to_errno(reason));
@@ -7547,6 +7550,7 @@ drop:
 static struct hci_cb l2cap_cb = {
 	.name		= "L2CAP",
 	.connect_cfm	= l2cap_connect_cfm,
+	.disconn_cfm	= l2cap_disconn_cfm,
 	.security_cfm	= l2cap_security_cfm,
 };
 
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 3c2e36f94b65..b94c3151896e 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -1100,8 +1100,11 @@ static void sco_connect_cfm(struct hci_conn *hcon, __u8 status)
 		sco_conn_del(hcon, bt_to_errno(status));
 }
 
-void sco_disconn_cfm(struct hci_conn *hcon, __u8 reason)
+static void sco_disconn_cfm(struct hci_conn *hcon, __u8 reason)
 {
+	if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK)
+		return;
+
 	BT_DBG("hcon %p reason %d", hcon, reason);
 
 	sco_conn_del(hcon, bt_to_errno(reason));
@@ -1129,6 +1132,7 @@ drop:
 static struct hci_cb sco_cb = {
 	.name		= "SCO",
 	.connect_cfm	= sco_connect_cfm,
+	.disconn_cfm	= sco_disconn_cfm,
 };
 
 static int sco_debugfs_show(struct seq_file *f, void *p)
-- 
cgit v1.2.3


From 7129069e84056ba28954550beb208b2645863299 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 20 Feb 2015 13:26:23 +0200
Subject: Bluetooth: Rename hci_send_to_control to hci_send_to_channel

The hci_send_to_control() can be made more general purpose with a small
change of passing the desired HCI channel as a parameter to it. This
allows using it for the monitor channel as well as e.g. 6lowpan in the
future.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 3 ++-
 net/bluetooth/hci_sock.c         | 9 +++++----
 net/bluetooth/mgmt.c             | 3 ++-
 3 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index a7bf77384464..a831c8ad10f1 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1265,7 +1265,8 @@ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode);
 
 /* ----- HCI Sockets ----- */
 void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb);
-void hci_send_to_control(struct sk_buff *skb, struct sock *skip_sk);
+void hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
+			 struct sock *skip_sk);
 void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb);
 
 void hci_sock_dev_event(struct hci_dev *hdev, int event);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 1d65c5be7c82..ba5d45f8aac1 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -183,12 +183,13 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
 	kfree_skb(skb_copy);
 }
 
-/* Send frame to control socket */
-void hci_send_to_control(struct sk_buff *skb, struct sock *skip_sk)
+/* Send frame to sockets with specific channel */
+void hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
+			 struct sock *skip_sk)
 {
 	struct sock *sk;
 
-	BT_DBG("len %d", skb->len);
+	BT_DBG("channel %u len %d", channel, skb->len);
 
 	read_lock(&hci_sk_list.lock);
 
@@ -202,7 +203,7 @@ void hci_send_to_control(struct sk_buff *skb, struct sock *skip_sk)
 		if (sk->sk_state != BT_BOUND)
 			continue;
 
-		if (hci_pi(sk)->channel != HCI_CHANNEL_CONTROL)
+		if (hci_pi(sk)->channel != channel)
 			continue;
 
 		nskb = skb_clone(skb, GFP_ATOMIC);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 3a1b537c9aa6..d5d46e7676f1 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -29,6 +29,7 @@
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
+#include <net/bluetooth/hci_sock.h>
 #include <net/bluetooth/l2cap.h>
 #include <net/bluetooth/mgmt.h>
 
@@ -242,7 +243,7 @@ static int mgmt_event(u16 event, struct hci_dev *hdev, void *data, u16 data_len,
 	/* Time stamp */
 	__net_timestamp(skb);
 
-	hci_send_to_control(skb, skip_sk);
+	hci_send_to_channel(HCI_CHANNEL_CONTROL, skb, skip_sk);
 	kfree_skb(skb);
 
 	return 0;
-- 
cgit v1.2.3


From 959d10f6bbf6ab5b8813c4e37540a2e43ca2ae96 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 17 Feb 2015 03:19:24 -0800
Subject: igmp: add __ip_mc_{join|leave}_group()

There is a need to perform igmp join/leave operations while RTNL is
held.

Make ip_mc_{join|leave}_group() wrappers around
__ip_mc_{join|leave}_group() to avoid the proliferation of work queues.

For example, vxlan_igmp_join() could possibly be removed.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/igmp.h |  2 ++
 net/ipv4/igmp.c      | 52 ++++++++++++++++++++++++++++++++++------------------
 2 files changed, 36 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 2c677afeea47..b5a6470e686c 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -111,7 +111,9 @@ struct ip_mc_list {
 
 extern int ip_check_mc_rcu(struct in_device *dev, __be32 mc_addr, __be32 src_addr, u16 proto);
 extern int igmp_rcv(struct sk_buff *);
+extern int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr);
 extern int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr);
+extern int __ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr);
 extern int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr);
 extern void ip_mc_drop_socket(struct sock *sk);
 extern int ip_mc_source(int add, int omode, struct sock *sk,
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 666cf364df86..4b1172d73e03 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1849,30 +1849,25 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc)
 	pmc->sfcount[MCAST_EXCLUDE] = 1;
 }
 
-
-/*
- * Join a multicast group
- */
-int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
+int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr)
 {
-	int err;
 	__be32 addr = imr->imr_multiaddr.s_addr;
-	struct ip_mc_socklist *iml = NULL, *i;
+	struct ip_mc_socklist *iml, *i;
 	struct in_device *in_dev;
 	struct inet_sock *inet = inet_sk(sk);
 	struct net *net = sock_net(sk);
 	int ifindex;
 	int count = 0;
+	int err;
+
+	ASSERT_RTNL();
 
 	if (!ipv4_is_multicast(addr))
 		return -EINVAL;
 
-	rtnl_lock();
-
 	in_dev = ip_mc_find_dev(net, imr);
 
 	if (!in_dev) {
-		iml = NULL;
 		err = -ENODEV;
 		goto done;
 	}
@@ -1900,9 +1895,22 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
 	ip_mc_inc_group(in_dev, addr);
 	err = 0;
 done:
-	rtnl_unlock();
 	return err;
 }
+EXPORT_SYMBOL(__ip_mc_join_group);
+
+/* Join a multicast group
+ */
+int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr)
+{
+	int ret;
+
+	rtnl_lock();
+	ret = __ip_mc_join_group(sk, imr);
+	rtnl_unlock();
+
+	return ret;
+}
 EXPORT_SYMBOL(ip_mc_join_group);
 
 static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
@@ -1925,11 +1933,7 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
 	return err;
 }
 
-/*
- *	Ask a socket to leave a group.
- */
-
-int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
+int __ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct ip_mc_socklist *iml;
@@ -1940,7 +1944,8 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
 	u32 ifindex;
 	int ret = -EADDRNOTAVAIL;
 
-	rtnl_lock();
+	ASSERT_RTNL();
+
 	in_dev = ip_mc_find_dev(net, imr);
 	if (!in_dev) {
 		ret = -ENODEV;
@@ -1964,14 +1969,25 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
 		*imlp = iml->next_rcu;
 
 		ip_mc_dec_group(in_dev, group);
-		rtnl_unlock();
+
 		/* decrease mem now to avoid the memleak warning */
 		atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
 		kfree_rcu(iml, rcu);
 		return 0;
 	}
 out:
+	return ret;
+}
+EXPORT_SYMBOL(__ip_mc_leave_group);
+
+int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
+{
+	int ret;
+
+	rtnl_lock();
+	ret = __ip_mc_leave_group(sk, imr);
 	rtnl_unlock();
+
 	return ret;
 }
 EXPORT_SYMBOL(ip_mc_leave_group);
-- 
cgit v1.2.3


From 059a2440fd3cf4ec57735db2c0a90401cde84fca Mon Sep 17 00:00:00 2001
From: Bojan Prtvar <prtvar.b@gmail.com>
Date: Sun, 22 Feb 2015 11:46:35 +0100
Subject: net: Remove state argument from skb_find_text()

Although it is clear that textsearch state is intentionally passed to
skb_find_text() as uninitialized argument, it was never used by the
callers. Therefore, we can simplify skb_find_text() by making it
local variable.

Signed-off-by: Bojan Prtvar <prtvar.b@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h              |  3 +--
 net/core/skbuff.c                   |  9 ++++-----
 net/netfilter/nf_conntrack_amanda.c | 10 +++-------
 net/netfilter/xt_string.c           |  3 +--
 net/sched/em_text.c                 |  3 +--
 5 files changed, 10 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 30007afe70b3..d898b32dedcc 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -870,8 +870,7 @@ unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
 void skb_abort_seq_read(struct skb_seq_state *st);
 
 unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
-			   unsigned int to, struct ts_config *config,
-			   struct ts_state *state);
+			   unsigned int to, struct ts_config *config);
 
 /*
  * Packet hash types specify the type of hash in skb_set_hash.
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 88c613eab142..374e43bc6b80 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2865,7 +2865,6 @@ static void skb_ts_finish(struct ts_config *conf, struct ts_state *state)
  * @from: search offset
  * @to: search limit
  * @config: textsearch configuration
- * @state: uninitialized textsearch state variable
  *
  * Finds a pattern in the skb data according to the specified
  * textsearch configuration. Use textsearch_next() to retrieve
@@ -2873,17 +2872,17 @@ static void skb_ts_finish(struct ts_config *conf, struct ts_state *state)
  * to the first occurrence or UINT_MAX if no match was found.
  */
 unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
-			   unsigned int to, struct ts_config *config,
-			   struct ts_state *state)
+			   unsigned int to, struct ts_config *config)
 {
+	struct ts_state state;
 	unsigned int ret;
 
 	config->get_next_block = skb_ts_get_next_block;
 	config->finish = skb_ts_finish;
 
-	skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state));
+	skb_prepare_seq_read(skb, from, to, TS_SKB_CB(&state));
 
-	ret = textsearch_find(config, state);
+	ret = textsearch_find(config, &state);
 	return (ret <= to - from ? ret : UINT_MAX);
 }
 EXPORT_SYMBOL(skb_find_text);
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index b8b95f4027ca..57a26cc90c9f 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -88,7 +88,6 @@ static int amanda_help(struct sk_buff *skb,
 		       struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo)
 {
-	struct ts_state ts;
 	struct nf_conntrack_expect *exp;
 	struct nf_conntrack_tuple *tuple;
 	unsigned int dataoff, start, stop, off, i;
@@ -113,23 +112,20 @@ static int amanda_help(struct sk_buff *skb,
 		return NF_ACCEPT;
 	}
 
-	memset(&ts, 0, sizeof(ts));
 	start = skb_find_text(skb, dataoff, skb->len,
-			      search[SEARCH_CONNECT].ts, &ts);
+			      search[SEARCH_CONNECT].ts);
 	if (start == UINT_MAX)
 		goto out;
 	start += dataoff + search[SEARCH_CONNECT].len;
 
-	memset(&ts, 0, sizeof(ts));
 	stop = skb_find_text(skb, start, skb->len,
-			     search[SEARCH_NEWLINE].ts, &ts);
+			     search[SEARCH_NEWLINE].ts);
 	if (stop == UINT_MAX)
 		goto out;
 	stop += start;
 
 	for (i = SEARCH_DATA; i <= SEARCH_INDEX; i++) {
-		memset(&ts, 0, sizeof(ts));
-		off = skb_find_text(skb, start, stop, search[i].ts, &ts);
+		off = skb_find_text(skb, start, stop, search[i].ts);
 		if (off == UINT_MAX)
 			continue;
 		off += start + search[i].len;
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index 5699adb97652..0bc3460319c8 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -26,13 +26,12 @@ static bool
 string_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_string_info *conf = par->matchinfo;
-	struct ts_state state;
 	bool invert;
 
 	invert = conf->u.v1.flags & XT_STRING_FLAG_INVERT;
 
 	return (skb_find_text((struct sk_buff *)skb, conf->from_offset,
-			     conf->to_offset, conf->config, &state)
+			     conf->to_offset, conf->config)
 			     != UINT_MAX) ^ invert;
 }
 
diff --git a/net/sched/em_text.c b/net/sched/em_text.c
index f03c3de16c27..73e2ed576ceb 100644
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -34,7 +34,6 @@ static int em_text_match(struct sk_buff *skb, struct tcf_ematch *m,
 {
 	struct text_match *tm = EM_TEXT_PRIV(m);
 	int from, to;
-	struct ts_state state;
 
 	from = tcf_get_base_ptr(skb, tm->from_layer) - skb->data;
 	from += tm->from_offset;
@@ -42,7 +41,7 @@ static int em_text_match(struct sk_buff *skb, struct tcf_ematch *m,
 	to = tcf_get_base_ptr(skb, tm->to_layer) - skb->data;
 	to += tm->to_offset;
 
-	return skb_find_text(skb, from, to, tm->config, &state) != UINT_MAX;
+	return skb_find_text(skb, from, to, tm->config) != UINT_MAX;
 }
 
 static int em_text_change(struct net *net, void *data, int len,
-- 
cgit v1.2.3


From 14c9551a32eba086c9f20c9d6a8e378481f15333 Mon Sep 17 00:00:00 2001
From: Mahesh Bandewar <maheshb@google.com>
Date: Mon, 23 Feb 2015 17:50:11 -0800
Subject: bonding: Implement port churn-machine (AD standard 43.4.17).

The Churn Detection machines detect the situation where a port is operable,
but the Actor and Partner have not attached the link to an Aggregator and
brought the link into operation within a bound time period. Under normal
operation of the LACP, agreement between Actor and Partner should be reached
very rapidly. Continued failure to reach agreement can be symptomatic of
device failure.

Actor-churn-detection state-machine
Reviewed-by: Nikolay Aleksandrov <nikolay@redhat.com>

===================================

BEGIN=True + PortEnable=False
           |
           v
 +------------------------+   ActorPort.Sync=True  +------------------+
 |   ACTOR_CHURN_MONITOR  | ---------------------> |  NO_ACTOR_CHURN  |
 |========================|                        |==================|
 |    ActorChurn=False    |  ActorPort.Sync=False  | ActorChurn=False |
 | ActorChurn.Timer=Start | <--------------------- |                  |
 +------------------------+                        +------------------+
           |                                                ^
           |                                                |
  ActorChurn.Timer=Expired                                  |
           |                                       ActorPort.Sync=True
           |                                                |
           |                +-----------------+             |
           |                |   ACTOR_CHURN   |             |
           |                |=================|             |
           +--------------> | ActorChurn=True | ------------+
                            |                 |
                            +-----------------+

Similar for the Partner-churn-detection.

Signed-off-by: Mahesh Bandewar <maheshb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_3ad.c    | 57 +++++++++++++++++++++++++++++++++++++--
 drivers/net/bonding/bond_procfs.c | 43 +++++++++++++++++++++++++----
 include/net/bond_3ad.h            | 29 ++++++++++++++++++++
 3 files changed, 122 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 9b436696b95e..f61b2870cddf 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -38,6 +38,7 @@
 #define AD_STANDBY                 0x2
 #define AD_MAX_TX_IN_SECOND        3
 #define AD_COLLECTOR_MAX_DELAY     0
+#define AD_MONITOR_CHURNED         0x1000
 
 /* Timer definitions (43.4.4 in the 802.3ad standard) */
 #define AD_FAST_PERIODIC_TIME      1
@@ -1013,16 +1014,19 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)
 	/* check if state machine should change state */
 
 	/* first, check if port was reinitialized */
-	if (port->sm_vars & AD_PORT_BEGIN)
+	if (port->sm_vars & AD_PORT_BEGIN) {
 		port->sm_rx_state = AD_RX_INITIALIZE;
+		port->sm_vars |= AD_MONITOR_CHURNED;
 	/* check if port is not enabled */
-	else if (!(port->sm_vars & AD_PORT_BEGIN)
+	} else if (!(port->sm_vars & AD_PORT_BEGIN)
 		 && !port->is_enabled && !(port->sm_vars & AD_PORT_MOVED))
 		port->sm_rx_state = AD_RX_PORT_DISABLED;
 	/* check if new lacpdu arrived */
 	else if (lacpdu && ((port->sm_rx_state == AD_RX_EXPIRED) ||
 		 (port->sm_rx_state == AD_RX_DEFAULTED) ||
 		 (port->sm_rx_state == AD_RX_CURRENT))) {
+		if (port->sm_rx_state != AD_RX_CURRENT)
+			port->sm_vars |= AD_MONITOR_CHURNED;
 		port->sm_rx_timer_counter = 0;
 		port->sm_rx_state = AD_RX_CURRENT;
 	} else {
@@ -1100,9 +1104,11 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)
 			 */
 			port->partner_oper.port_state &= ~AD_STATE_SYNCHRONIZATION;
 			port->sm_vars &= ~AD_PORT_MATCHED;
+			port->partner_oper.port_state |= AD_STATE_LACP_TIMEOUT;
 			port->partner_oper.port_state |= AD_STATE_LACP_ACTIVITY;
 			port->sm_rx_timer_counter = __ad_timer_to_ticks(AD_CURRENT_WHILE_TIMER, (u16)(AD_SHORT_TIMEOUT));
 			port->actor_oper_port_state |= AD_STATE_EXPIRED;
+			port->sm_vars |= AD_MONITOR_CHURNED;
 			break;
 		case AD_RX_DEFAULTED:
 			__update_default_selected(port);
@@ -1131,6 +1137,45 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)
 	}
 }
 
+/**
+ * ad_churn_machine - handle port churn's state machine
+ * @port: the port we're looking at
+ *
+ */
+static void ad_churn_machine(struct port *port)
+{
+	if (port->sm_vars & AD_MONITOR_CHURNED) {
+		port->sm_vars &= ~AD_MONITOR_CHURNED;
+		port->sm_churn_actor_state = AD_CHURN_MONITOR;
+		port->sm_churn_partner_state = AD_CHURN_MONITOR;
+		port->sm_churn_actor_timer_counter =
+			__ad_timer_to_ticks(AD_ACTOR_CHURN_TIMER, 0);
+		 port->sm_churn_partner_timer_counter =
+			 __ad_timer_to_ticks(AD_PARTNER_CHURN_TIMER, 0);
+		return;
+	}
+	if (port->sm_churn_actor_timer_counter &&
+	    !(--port->sm_churn_actor_timer_counter) &&
+	    port->sm_churn_actor_state == AD_CHURN_MONITOR) {
+		if (port->actor_oper_port_state & AD_STATE_SYNCHRONIZATION) {
+			port->sm_churn_actor_state = AD_NO_CHURN;
+		} else {
+			port->churn_actor_count++;
+			port->sm_churn_actor_state = AD_CHURN;
+		}
+	}
+	if (port->sm_churn_partner_timer_counter &&
+	    !(--port->sm_churn_partner_timer_counter) &&
+	    port->sm_churn_partner_state == AD_CHURN_MONITOR) {
+		if (port->partner_oper.port_state & AD_STATE_SYNCHRONIZATION) {
+			port->sm_churn_partner_state = AD_NO_CHURN;
+		} else {
+			port->churn_partner_count++;
+			port->sm_churn_partner_state = AD_CHURN;
+		}
+	}
+}
+
 /**
  * ad_tx_machine - handle a port's tx state machine
  * @port: the port we're looking at
@@ -1745,6 +1790,13 @@ static void ad_initialize_port(struct port *port, int lacp_fast)
 		port->next_port_in_aggregator = NULL;
 		port->transaction_id = 0;
 
+		port->sm_churn_actor_timer_counter = 0;
+		port->sm_churn_actor_state = 0;
+		port->churn_actor_count = 0;
+		port->sm_churn_partner_timer_counter = 0;
+		port->sm_churn_partner_state = 0;
+		port->churn_partner_count = 0;
+
 		memcpy(&port->lacpdu, &lacpdu, sizeof(lacpdu));
 	}
 }
@@ -2164,6 +2216,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
 		ad_port_selection_logic(port, &update_slave_arr);
 		ad_mux_machine(port, &update_slave_arr);
 		ad_tx_machine(port);
+		ad_churn_machine(port);
 
 		/* turn off the BEGIN bit, since we already handled it */
 		if (port->sm_vars & AD_PORT_BEGIN)
diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c
index 976f5ad2a0f2..62694cfc05b6 100644
--- a/drivers/net/bonding/bond_procfs.c
+++ b/drivers/net/bonding/bond_procfs.c
@@ -176,18 +176,51 @@ static void bond_info_show_slave(struct seq_file *seq,
 		   slave->link_failure_count);
 
 	seq_printf(seq, "Permanent HW addr: %pM\n", slave->perm_hwaddr);
+	seq_printf(seq, "Slave queue ID: %d\n", slave->queue_id);
 
 	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
-		const struct aggregator *agg
-			= SLAVE_AD_INFO(slave)->port.aggregator;
+		const struct port *port = &SLAVE_AD_INFO(slave)->port;
+		const struct aggregator *agg = port->aggregator;
 
-		if (agg)
+		if (agg) {
 			seq_printf(seq, "Aggregator ID: %d\n",
 				   agg->aggregator_identifier);
-		else
+			seq_printf(seq, "Actor Churn State: %s\n",
+				   bond_3ad_churn_desc(port->sm_churn_actor_state));
+			seq_printf(seq, "Partner Churn State: %s\n",
+				   bond_3ad_churn_desc(port->sm_churn_partner_state));
+			seq_printf(seq, "Actor Churned Count: %d\n",
+				   port->churn_actor_count);
+			seq_printf(seq, "Partner Churned Count: %d\n",
+				   port->churn_partner_count);
+
+			seq_puts(seq, "details actor lacp pdu:\n");
+			seq_printf(seq, "    system priority: %d\n",
+				   port->actor_system_priority);
+			seq_printf(seq, "    port key: %d\n",
+				   port->actor_oper_port_key);
+			seq_printf(seq, "    port priority: %d\n",
+				   port->actor_port_priority);
+			seq_printf(seq, "    port number: %d\n",
+				   port->actor_port_number);
+			seq_printf(seq, "    port state: %d\n",
+				   port->actor_oper_port_state);
+
+			seq_puts(seq, "details partner lacp pdu:\n");
+			seq_printf(seq, "    system priority: %d\n",
+				   port->partner_oper.system_priority);
+			seq_printf(seq, "    oper key: %d\n",
+				   port->partner_oper.key);
+			seq_printf(seq, "    port priority: %d\n",
+				   port->partner_oper.port_priority);
+			seq_printf(seq, "    port number: %d\n",
+				   port->partner_oper.port_number);
+			seq_printf(seq, "    port state: %d\n",
+				   port->partner_oper.port_state);
+		} else {
 			seq_puts(seq, "Aggregator ID: N/A\n");
+		}
 	}
-	seq_printf(seq, "Slave queue ID: %d\n", slave->queue_id);
 }
 
 static int bond_info_seq_show(struct seq_file *seq, void *v)
diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h
index f04cdbb7848e..c2a40a172fcd 100644
--- a/include/net/bond_3ad.h
+++ b/include/net/bond_3ad.h
@@ -82,6 +82,13 @@ typedef enum {
 	AD_TRANSMIT		/* tx Machine */
 } tx_states_t;
 
+/* churn machine states(43.4.17 in the 802.3ad standard) */
+typedef enum {
+	 AD_CHURN_MONITOR, /* monitoring for churn */
+	 AD_CHURN,         /* churn detected (error) */
+	 AD_NO_CHURN       /* no churn (no error) */
+} churn_state_t;
+
 /* rx indication types */
 typedef enum {
 	AD_TYPE_LACPDU = 1,	/* type lacpdu */
@@ -229,6 +236,12 @@ typedef struct port {
 	u16 sm_mux_timer_counter;	/* state machine mux timer counter */
 	tx_states_t sm_tx_state;	/* state machine tx state */
 	u16 sm_tx_timer_counter;	/* state machine tx timer counter(allways on - enter to transmit state 3 time per second) */
+	u16 sm_churn_actor_timer_counter;
+	u16 sm_churn_partner_timer_counter;
+	u32 churn_actor_count;
+	u32 churn_partner_count;
+	churn_state_t sm_churn_actor_state;
+	churn_state_t sm_churn_partner_state;
 	struct slave *slave;		/* pointer to the bond slave that this port belongs to */
 	struct aggregator *aggregator;	/* pointer to an aggregator that this port related to */
 	struct port *next_port_in_aggregator;	/* Next port on the linked list of the parent aggregator */
@@ -262,6 +275,22 @@ struct ad_slave_info {
 	u16 id;
 };
 
+static inline const char *bond_3ad_churn_desc(churn_state_t state)
+{
+	static const char *const churn_description[] = {
+		"monitoring",
+		"churned",
+		"none",
+		"unknown"
+	};
+	int max_size = sizeof(churn_description) / sizeof(churn_description[0]);
+
+	if (state >= max_size)
+		state = max_size - 1;
+
+	return churn_description[state];
+}
+
 /* ========== AD Exported functions to the main bonding code ========== */
 void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution);
 void bond_3ad_bind_slave(struct slave *slave);
-- 
cgit v1.2.3


From d752c364571743d696c2a54a449ce77550c35ac5 Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <mleitner@redhat.com>
Date: Mon, 23 Feb 2015 15:02:34 -0300
Subject: ipvs: allow rescheduling of new connections when port reuse is
 detected

Currently, when TCP/SCTP port reusing happens, IPVS will find the old
entry and use it for the new one, behaving like a forced persistence.
But if you consider a cluster with a heavy load of small connections,
such reuse will happen often and may lead to a not optimal load
balancing and might prevent a new node from getting a fair load.

This patch introduces a new sysctl, conn_reuse_mode, that allows
controlling how to proceed when port reuse is detected. The default
value will allow rescheduling of new connections only if the old entry
was in TIME_WAIT state for TCP or CLOSED for SCTP.

Signed-off-by: Marcelo Ricardo Leitner <mleitner@redhat.com>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 Documentation/networking/ipvs-sysctl.txt | 21 ++++++++++++++++++++
 include/net/ip_vs.h                      | 11 +++++++++++
 net/netfilter/ipvs/ip_vs_core.c          | 33 ++++++++++++++++++++++++++++----
 net/netfilter/ipvs/ip_vs_ctl.c           |  8 ++++++++
 net/netfilter/ipvs/ip_vs_sync.c          | 21 ++++++++++++++++++--
 5 files changed, 88 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/ipvs-sysctl.txt b/Documentation/networking/ipvs-sysctl.txt
index 7a3c04729591..3ba709531adb 100644
--- a/Documentation/networking/ipvs-sysctl.txt
+++ b/Documentation/networking/ipvs-sysctl.txt
@@ -22,6 +22,27 @@ backup_only - BOOLEAN
 	If set, disable the director function while the server is
 	in backup mode to avoid packet loops for DR/TUN methods.
 
+conn_reuse_mode - INTEGER
+	1 - default
+
+	Controls how ipvs will deal with connections that are detected
+	port reuse. It is a bitmap, with the values being:
+
+	0: disable any special handling on port reuse. The new
+	connection will be delivered to the same real server that was
+	servicing the previous connection. This will effectively
+	disable expire_nodest_conn.
+
+	bit 1: enable rescheduling of new connections when it is safe.
+	That is, whenever expire_nodest_conn and for TCP sockets, when
+	the connection is in TIME_WAIT state (which is only possible if
+	you use NAT mode).
+
+	bit 2: it is bit 1 plus, for TCP connections, when connections
+	are in FIN_WAIT state, as this is the last state seen by load
+	balancer in Direct Routing mode. This bit helps on adding new
+	real servers to a very busy cluster.
+
 conntrack - BOOLEAN
 	0 - disabled (default)
 	not 0 - enabled
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index a627fe690c19..20fd23398537 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -941,6 +941,7 @@ struct netns_ipvs {
 	int			sysctl_nat_icmp_send;
 	int			sysctl_pmtu_disc;
 	int			sysctl_backup_only;
+	int			sysctl_conn_reuse_mode;
 
 	/* ip_vs_lblc */
 	int			sysctl_lblc_expiration;
@@ -1059,6 +1060,11 @@ static inline int sysctl_backup_only(struct netns_ipvs *ipvs)
 	       ipvs->sysctl_backup_only;
 }
 
+static inline int sysctl_conn_reuse_mode(struct netns_ipvs *ipvs)
+{
+	return ipvs->sysctl_conn_reuse_mode;
+}
+
 #else
 
 static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
@@ -1126,6 +1132,11 @@ static inline int sysctl_backup_only(struct netns_ipvs *ipvs)
 	return 0;
 }
 
+static inline int sysctl_conn_reuse_mode(struct netns_ipvs *ipvs)
+{
+	return 1;
+}
+
 #endif
 
 /* IPVS core functions
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index c9470c86308f..6103ab933c5b 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1042,6 +1042,26 @@ static inline bool is_new_conn(const struct sk_buff *skb,
 	}
 }
 
+static inline bool is_new_conn_expected(const struct ip_vs_conn *cp,
+					int conn_reuse_mode)
+{
+	/* Controlled (FTP DATA or persistence)? */
+	if (cp->control)
+		return false;
+
+	switch (cp->protocol) {
+	case IPPROTO_TCP:
+		return (cp->state == IP_VS_TCP_S_TIME_WAIT) ||
+			((conn_reuse_mode & 2) &&
+			 (cp->state == IP_VS_TCP_S_FIN_WAIT) &&
+			 (cp->flags & IP_VS_CONN_F_NOOUTPUT));
+	case IPPROTO_SCTP:
+		return cp->state == IP_VS_SCTP_S_CLOSED;
+	default:
+		return false;
+	}
+}
+
 /* Handle response packets: rewrite addresses and send away...
  */
 static unsigned int
@@ -1580,6 +1600,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 	struct ip_vs_conn *cp;
 	int ret, pkts;
 	struct netns_ipvs *ipvs;
+	int conn_reuse_mode;
 
 	/* Already marked as IPVS request or reply? */
 	if (skb->ipvs_property)
@@ -1648,10 +1669,14 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 	 */
 	cp = pp->conn_in_get(af, skb, &iph, 0);
 
-	if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp && cp->dest &&
-	    unlikely(!atomic_read(&cp->dest->weight)) && !iph.fragoffs &&
-	    is_new_conn(skb, &iph)) {
-		ip_vs_conn_expire_now(cp);
+	conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
+	if (conn_reuse_mode && !iph.fragoffs &&
+	    is_new_conn(skb, &iph) && cp &&
+	    ((unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
+	      unlikely(!atomic_read(&cp->dest->weight))) ||
+	     unlikely(is_new_conn_expected(cp, conn_reuse_mode)))) {
+		if (!atomic_read(&cp->n_control))
+			ip_vs_conn_expire_now(cp);
 		__ip_vs_conn_put(cp);
 		cp = NULL;
 	}
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 6fd60059faf0..76cc9ffd87fa 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1823,6 +1823,12 @@ static struct ctl_table vs_vars[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "conn_reuse_mode",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 #ifdef CONFIG_IP_VS_DEBUG
 	{
 		.procname	= "debug_level",
@@ -3790,6 +3796,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
 	ipvs->sysctl_pmtu_disc = 1;
 	tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
 	tbl[idx++].data = &ipvs->sysctl_backup_only;
+	ipvs->sysctl_conn_reuse_mode = 1;
+	tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
 
 
 	ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index c47ffd7a0a70..f96229cdb6e1 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -845,10 +845,27 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
 	struct ip_vs_conn *cp;
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
-	if (!(flags & IP_VS_CONN_F_TEMPLATE))
+	if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
 		cp = ip_vs_conn_in_get(param);
-	else
+		if (cp && ((cp->dport != dport) ||
+			   !ip_vs_addr_equal(cp->daf, &cp->daddr, daddr))) {
+			if (!(flags & IP_VS_CONN_F_INACTIVE)) {
+				ip_vs_conn_expire_now(cp);
+				__ip_vs_conn_put(cp);
+				cp = NULL;
+			} else {
+				/* This is the expiration message for the
+				 * connection that was already replaced, so we
+				 * just ignore it.
+				 */
+				__ip_vs_conn_put(cp);
+				kfree(param->pe_data);
+				return;
+			}
+		}
+	} else {
 		cp = ip_vs_ct_in_get(param);
+	}
 
 	if (cp) {
 		/* Free pe_data */
-- 
cgit v1.2.3


From b73adef67765b72f2a0d01ef15aff9d784dc85da Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 24 Feb 2015 13:15:33 -0800
Subject: net: dsa: integrate with SWITCHDEV for HW bridging

In order to support bridging offloads in DSA switch drivers, select
NET_SWITCHDEV to get access to the port_stp_update and parent_get_id
NDOs that we are required to implement.

To facilitate the integratation at the DSA driver level, we implement 3
types of operations:

- port_join_bridge
- port_leave_bridge
- port_stp_update

DSA will resolve which switch ports that are currently bridge port
members as some Switch hardware/drivers need to know about that to limit
the register programming to just the relevant registers (especially for
slow MDIO buses).

We also take care of setting the correct STP state when slave network
devices are brought up/down while being bridge members.

Finally, when a port is leaving the bridge, we make sure we set in
BR_STATE_FORWARDING state, otherwise the bridge layer would leave it
disabled as a result of having left the bridge.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h  |  10 ++++
 net/dsa/Kconfig    |   1 +
 net/dsa/dsa.c      |   7 +++
 net/dsa/dsa_priv.h |   4 ++
 net/dsa/slave.c    | 149 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 171 insertions(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index ed3c34bbb67a..92be34791963 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -275,6 +275,16 @@ struct dsa_switch_driver {
 	int	(*get_regs_len)(struct dsa_switch *ds, int port);
 	void	(*get_regs)(struct dsa_switch *ds, int port,
 			    struct ethtool_regs *regs, void *p);
+
+	/*
+	 * Bridge integration
+	 */
+	int	(*port_join_bridge)(struct dsa_switch *ds, int port,
+				    u32 br_port_mask);
+	int	(*port_leave_bridge)(struct dsa_switch *ds, int port,
+				     u32 br_port_mask);
+	int	(*port_stp_update)(struct dsa_switch *ds, int port,
+				   u8 state);
 };
 
 void register_switch_driver(struct dsa_switch_driver *type);
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 5f8ac404535b..b45206e8dd3e 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -8,6 +8,7 @@ config NET_DSA
 	tristate
 	depends on HAVE_NET_DSA
 	select PHYLIB
+	select NET_SWITCHDEV
 
 if NET_DSA
 
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index fc1813140be6..9c208f0dab08 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -826,6 +826,10 @@ static struct packet_type dsa_pack_type __read_mostly = {
 	.func	= dsa_switch_rcv,
 };
 
+static struct notifier_block dsa_netdevice_nb __read_mostly = {
+	.notifier_call	= dsa_slave_netdevice_event,
+};
+
 #ifdef CONFIG_PM_SLEEP
 static int dsa_suspend(struct device *d)
 {
@@ -884,6 +888,8 @@ static int __init dsa_init_module(void)
 {
 	int rc;
 
+	register_netdevice_notifier(&dsa_netdevice_nb);
+
 	rc = platform_driver_register(&dsa_driver);
 	if (rc)
 		return rc;
@@ -896,6 +902,7 @@ module_init(dsa_init_module);
 
 static void __exit dsa_cleanup_module(void)
 {
+	unregister_netdevice_notifier(&dsa_netdevice_nb);
 	dev_remove_pack(&dsa_pack_type);
 	platform_driver_unregister(&dsa_driver);
 }
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 7eb1a6acd46c..d5f1f9b862ea 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -45,6 +45,8 @@ struct dsa_slave_priv {
 	int			old_link;
 	int			old_pause;
 	int			old_duplex;
+
+	struct net_device	*bridge_dev;
 };
 
 /* dsa.c */
@@ -57,6 +59,8 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 		     int port, char *name);
 int dsa_slave_suspend(struct net_device *slave_dev);
 int dsa_slave_resume(struct net_device *slave_dev);
+int dsa_slave_netdevice_event(struct notifier_block *unused,
+			      unsigned long event, void *ptr);
 
 /* tag_dsa.c */
 extern const struct dsa_device_ops dsa_netdev_ops;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 5be4c928c9c9..b5a4d8974b76 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -10,10 +10,13 @@
 
 #include <linux/list.h>
 #include <linux/etherdevice.h>
+#include <linux/netdevice.h>
 #include <linux/phy.h>
 #include <linux/phy_fixed.h>
 #include <linux/of_net.h>
 #include <linux/of_mdio.h>
+#include <net/rtnetlink.h>
+#include <linux/if_bridge.h>
 #include "dsa_priv.h"
 
 /* slave mii_bus handling ***************************************************/
@@ -60,11 +63,18 @@ static int dsa_slave_init(struct net_device *dev)
 	return 0;
 }
 
+static inline bool dsa_port_is_bridged(struct dsa_slave_priv *p)
+{
+	return !!p->bridge_dev;
+}
+
 static int dsa_slave_open(struct net_device *dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct net_device *master = p->parent->dst->master_netdev;
 	struct dsa_switch *ds = p->parent;
+	u8 stp_state = dsa_port_is_bridged(p) ?
+			BR_STATE_BLOCKING : BR_STATE_FORWARDING;
 	int err;
 
 	if (!(master->flags & IFF_UP))
@@ -93,6 +103,9 @@ static int dsa_slave_open(struct net_device *dev)
 			goto clear_promisc;
 	}
 
+	if (ds->drv->port_stp_update)
+		ds->drv->port_stp_update(ds, p->port, stp_state);
+
 	if (p->phy)
 		phy_start(p->phy);
 
@@ -133,6 +146,9 @@ static int dsa_slave_close(struct net_device *dev)
 	if (ds->drv->port_disable)
 		ds->drv->port_disable(ds, p->port, p->phy);
 
+	if (ds->drv->port_stp_update)
+		ds->drv->port_stp_update(ds, p->port, BR_STATE_DISABLED);
+
 	return 0;
 }
 
@@ -194,6 +210,95 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	return -EOPNOTSUPP;
 }
 
+/* Return a bitmask of all ports being currently bridged within a given bridge
+ * device. Note that on leave, the mask will still return the bitmask of ports
+ * currently bridged, prior to port removal, and this is exactly what we want.
+ */
+static u32 dsa_slave_br_port_mask(struct dsa_switch *ds,
+				  struct net_device *bridge)
+{
+	struct dsa_slave_priv *p;
+	unsigned int port;
+	u32 mask = 0;
+
+	for (port = 0; port < DSA_MAX_PORTS; port++) {
+		if (!((1 << port) & ds->phys_port_mask))
+			continue;
+
+		if (!ds->ports[port])
+			continue;
+
+		p = netdev_priv(ds->ports[port]);
+
+		if (ds->ports[port]->priv_flags & IFF_BRIDGE_PORT &&
+		    p->bridge_dev == bridge)
+			mask |= 1 << port;
+	}
+
+	return mask;
+}
+
+static int dsa_slave_stp_update(struct net_device *dev, u8 state)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_switch *ds = p->parent;
+	int ret = -EOPNOTSUPP;
+
+	if (ds->drv->port_stp_update)
+		ret = ds->drv->port_stp_update(ds, p->port, state);
+
+	return ret;
+}
+
+static int dsa_slave_bridge_port_join(struct net_device *dev,
+				      struct net_device *br)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_switch *ds = p->parent;
+	int ret = -EOPNOTSUPP;
+
+	p->bridge_dev = br;
+
+	if (ds->drv->port_join_bridge)
+		ret = ds->drv->port_join_bridge(ds, p->port,
+						dsa_slave_br_port_mask(ds, br));
+
+	return ret;
+}
+
+static int dsa_slave_bridge_port_leave(struct net_device *dev)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_switch *ds = p->parent;
+	int ret = -EOPNOTSUPP;
+
+
+	if (ds->drv->port_leave_bridge)
+		ret = ds->drv->port_leave_bridge(ds, p->port,
+						 dsa_slave_br_port_mask(ds, p->bridge_dev));
+
+	p->bridge_dev = NULL;
+
+	/* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer,
+	 * so allow it to be in BR_STATE_FORWARDING to be kept functional
+	 */
+	dsa_slave_stp_update(dev, BR_STATE_FORWARDING);
+
+	return ret;
+}
+
+static int dsa_slave_parent_id_get(struct net_device *dev,
+				   struct netdev_phys_item_id *psid)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_switch *ds = p->parent;
+
+	psid->id_len = sizeof(ds->index);
+	memcpy(&psid->id, &ds->index, psid->id_len);
+
+	return 0;
+}
+
 static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
@@ -470,6 +575,8 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
 	.ndo_set_rx_mode	= dsa_slave_set_rx_mode,
 	.ndo_set_mac_address	= dsa_slave_set_mac_address,
 	.ndo_do_ioctl		= dsa_slave_ioctl,
+	.ndo_switch_parent_id_get = dsa_slave_parent_id_get,
+	.ndo_switch_port_stp_update = dsa_slave_stp_update,
 };
 
 static void dsa_slave_adjust_link(struct net_device *dev)
@@ -684,3 +791,45 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 
 	return 0;
 }
+
+static bool dsa_slave_dev_check(struct net_device *dev)
+{
+	return dev->netdev_ops == &dsa_slave_netdev_ops;
+}
+
+static int dsa_slave_master_changed(struct net_device *dev)
+{
+	struct net_device *master = netdev_master_upper_dev_get(dev);
+	int err = 0;
+
+	if (master && master->rtnl_link_ops &&
+	    !strcmp(master->rtnl_link_ops->kind, "bridge"))
+		err = dsa_slave_bridge_port_join(dev, master);
+	else
+		err = dsa_slave_bridge_port_leave(dev);
+
+	return err;
+}
+
+int dsa_slave_netdevice_event(struct notifier_block *unused,
+			      unsigned long event, void *ptr)
+{
+	struct net_device *dev;
+	int err = 0;
+
+	switch (event) {
+	case NETDEV_CHANGEUPPER:
+		dev = netdev_notifier_info_to_dev(ptr);
+		if (!dsa_slave_dev_check(dev))
+			goto out;
+
+		err = dsa_slave_master_changed(dev);
+		if (err)
+			netdev_warn(dev, "failed to reflect master change\n");
+
+		break;
+	}
+
+out:
+	return NOTIFY_DONE;
+}
-- 
cgit v1.2.3


From d79d21073626cf022943e5c4c10a97cdf7cb8465 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 24 Feb 2015 23:02:02 -0800
Subject: net: dsa: Introduce dsa_is_port_initialized

To avoid race conditions when using the ds->ports[] array,
we need to check if the accessed port has been initialized.
Introduce and use helper function dsa_is_port_initialized
for that purpose and use it where needed.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 5 +++++
 net/dsa/dsa.c     | 4 ++--
 net/dsa/slave.c   | 5 +----
 3 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 92be34791963..c542c131d551 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -165,6 +165,11 @@ static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p)
 	return !!(ds->index == ds->dst->cpu_switch && p == ds->dst->cpu_port);
 }
 
+static inline bool dsa_is_port_initialized(struct dsa_switch *ds, int p)
+{
+	return ds->phys_port_mask & (1 << p) && ds->ports[p];
+}
+
 static inline u8 dsa_upstream_port(struct dsa_switch *ds)
 {
 	struct dsa_switch_tree *dst = ds->dst;
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 9c208f0dab08..a1d1f0775bea 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -374,7 +374,7 @@ static int dsa_switch_suspend(struct dsa_switch *ds)
 
 	/* Suspend slave network devices */
 	for (i = 0; i < DSA_MAX_PORTS; i++) {
-		if (!(ds->phys_port_mask & (1 << i)))
+		if (!dsa_is_port_initialized(ds, i))
 			continue;
 
 		ret = dsa_slave_suspend(ds->ports[i]);
@@ -400,7 +400,7 @@ static int dsa_switch_resume(struct dsa_switch *ds)
 
 	/* Resume slave network devices */
 	for (i = 0; i < DSA_MAX_PORTS; i++) {
-		if (!(ds->phys_port_mask & (1 << i)))
+		if (!dsa_is_port_initialized(ds, i))
 			continue;
 
 		ret = dsa_slave_resume(ds->ports[i]);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index b5a4d8974b76..a47305c72fcc 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -222,10 +222,7 @@ static u32 dsa_slave_br_port_mask(struct dsa_switch *ds,
 	u32 mask = 0;
 
 	for (port = 0; port < DSA_MAX_PORTS; port++) {
-		if (!((1 << port) & ds->phys_port_mask))
-			continue;
-
-		if (!ds->ports[port])
+		if (!dsa_is_port_initialized(ds, port))
 			continue;
 
 		p = netdev_priv(ds->ports[port]);
-- 
cgit v1.2.3


From 4cd3928a8bee83d86fb3865bb243ab2ff1dd0eb6 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 27 Feb 2015 10:11:13 +0200
Subject: Bluetooth: Update New CSRK event to match latest specification

The 'master' parameter of the New CSRK event was recently renamed to
'type', with the old values kept for backwards compatibility as
unauthenticated local/remote keys. This patch updates the code to take
into account the two new (authenticated) values and ensures they get
used based on the security level of the connection that the respective
keys get distributed over.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  2 +-
 include/net/bluetooth/mgmt.h     |  7 ++++++-
 net/bluetooth/mgmt.c             |  2 +-
 net/bluetooth/smp.c              | 10 ++++++++--
 4 files changed, 16 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index a831c8ad10f1..acec9140c3f9 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -108,7 +108,7 @@ struct bt_uuid {
 struct smp_csrk {
 	bdaddr_t bdaddr;
 	u8 bdaddr_type;
-	u8 master;
+	u8 type;
 	u8 val[16];
 };
 
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index e218a30f2061..fe8eef00e9ca 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -647,9 +647,14 @@ struct mgmt_ev_new_irk {
 	struct mgmt_irk_info irk;
 } __packed;
 
+#define MGMT_CSRK_LOCAL_UNAUTHENTICATED		0x00
+#define MGMT_CSRK_REMOTE_UNAUTHENTICATED	0x01
+#define MGMT_CSRK_LOCAL_AUTHENTICATED		0x02
+#define MGMT_CSRK_REMOTE_AUTHENTICATED		0x03
+
 struct mgmt_csrk_info {
 	struct mgmt_addr_info addr;
-	__u8 master;
+	__u8 type;
 	__u8 val[16];
 } __packed;
 
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index d5d46e7676f1..1e4635a3374d 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -6664,7 +6664,7 @@ void mgmt_new_csrk(struct hci_dev *hdev, struct smp_csrk *csrk,
 
 	bacpy(&ev.key.addr.bdaddr, &csrk->bdaddr);
 	ev.key.addr.type = link_to_bdaddr(LE_LINK, csrk->bdaddr_type);
-	ev.key.master = csrk->master;
+	ev.key.type = csrk->type;
 	memcpy(ev.key.val, csrk->val, sizeof(csrk->val));
 
 	mgmt_event(MGMT_EV_NEW_CSRK, hdev, &ev, sizeof(ev), NULL);
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index b2803bd6e0d8..c91c19bfc0a8 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -1252,7 +1252,10 @@ static void smp_distribute_keys(struct smp_chan *smp)
 
 		csrk = kzalloc(sizeof(*csrk), GFP_KERNEL);
 		if (csrk) {
-			csrk->master = 0x00;
+			if (hcon->sec_level > BT_SECURITY_MEDIUM)
+				csrk->type = MGMT_CSRK_LOCAL_AUTHENTICATED;
+			else
+				csrk->type = MGMT_CSRK_LOCAL_UNAUTHENTICATED;
 			memcpy(csrk->val, sign.csrk, sizeof(csrk->val));
 		}
 		smp->slave_csrk = csrk;
@@ -2352,7 +2355,10 @@ static int smp_cmd_sign_info(struct l2cap_conn *conn, struct sk_buff *skb)
 
 	csrk = kzalloc(sizeof(*csrk), GFP_KERNEL);
 	if (csrk) {
-		csrk->master = 0x01;
+		if (conn->hcon->sec_level > BT_SECURITY_MEDIUM)
+			csrk->type = MGMT_CSRK_REMOTE_AUTHENTICATED;
+		else
+			csrk->type = MGMT_CSRK_REMOTE_UNAUTHENTICATED;
 		memcpy(csrk->val, rp->csrk, sizeof(csrk->val));
 	}
 	smp->csrk = csrk;
-- 
cgit v1.2.3


From ccdaeb2b176f7db491a6f8e8b1c51f9393525f7d Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Fri, 27 Feb 2015 09:58:26 +0100
Subject: at86rf230: add support for external xtal trim

This patch adds support for setting the xtal trim register. Some at86rf2xx
transceiver boards needs fine tuning the xtal capacitor.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 .../bindings/net/ieee802154/at86rf230.txt          |  3 ++
 drivers/net/ieee802154/at86rf230.c                 | 54 ++++++++++++++++++++--
 include/linux/spi/at86rf230.h                      |  1 +
 3 files changed, 54 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/net/ieee802154/at86rf230.txt b/Documentation/devicetree/bindings/net/ieee802154/at86rf230.txt
index d3bbdded4cbe..1ae5100fea14 100644
--- a/Documentation/devicetree/bindings/net/ieee802154/at86rf230.txt
+++ b/Documentation/devicetree/bindings/net/ieee802154/at86rf230.txt
@@ -11,6 +11,8 @@ Required properties:
 Optional properties:
   - reset-gpio:		GPIO spec for the rstn pin
   - sleep-gpio:		GPIO spec for the slp_tr pin
+  - xtal-trim:		u8 value for fine tuning the internal capacitance
+			arrays of xtal pins: 0 = +0 pF, 0xf = +4.5 pF
 
 Example:
 
@@ -20,4 +22,5 @@ Example:
 		reg = <0>;
 		interrupts = <19 1>;
 		interrupt-parent = <&gpio3>;
+		xtal-trim = /bits/ 8 <0x06>;
 	};
diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c
index 9888b7ff24bc..c1323e5cdd0c 100644
--- a/drivers/net/ieee802154/at86rf230.c
+++ b/drivers/net/ieee802154/at86rf230.c
@@ -1315,7 +1315,7 @@ static struct at86rf2xx_chip_data at86rf212_data = {
 	.get_desense_steps = at86rf212_get_desens_steps
 };
 
-static int at86rf230_hw_init(struct at86rf230_local *lp)
+static int at86rf230_hw_init(struct at86rf230_local *lp, u8 xtal_trim)
 {
 	int rc, irq_type, irq_pol = IRQ_ACTIVE_HIGH;
 	unsigned int dvdd;
@@ -1362,6 +1362,45 @@ static int at86rf230_hw_init(struct at86rf230_local *lp)
 	usleep_range(lp->data->t_sleep_cycle,
 		     lp->data->t_sleep_cycle + 100);
 
+	/* xtal_trim value is calculated by:
+	 * CL = 0.5 * (CX + CTRIM + CPAR)
+	 *
+	 * whereas:
+	 * CL = capacitor of used crystal
+	 * CX = connected capacitors at xtal pins
+	 * CPAR = in all at86rf2xx datasheets this is a constant value 3 pF,
+	 *	  but this is different on each board setup. You need to fine
+	 *	  tuning this value via CTRIM.
+	 * CTRIM = variable capacitor setting. Resolution is 0.3 pF range is
+	 *	   0 pF upto 4.5 pF.
+	 *
+	 * Examples:
+	 * atben transceiver:
+	 *
+	 * CL = 8 pF
+	 * CX = 12 pF
+	 * CPAR = 3 pF (We assume the magic constant from datasheet)
+	 * CTRIM = 0.9 pF
+	 *
+	 * (12+0.9+3)/2 = 7.95 which is nearly at 8 pF
+	 *
+	 * xtal_trim = 0x3
+	 *
+	 * openlabs transceiver:
+	 *
+	 * CL = 16 pF
+	 * CX = 22 pF
+	 * CPAR = 3 pF (We assume the magic constant from datasheet)
+	 * CTRIM = 4.5 pF
+	 *
+	 * (22+4.5+3)/2 = 14.75 which is the nearest value to 16 pF
+	 *
+	 * xtal_trim = 0xf
+	 */
+	rc = at86rf230_write_subreg(lp, SR_XTAL_TRIM, xtal_trim);
+	if (rc)
+		return rc;
+
 	rc = at86rf230_read_subreg(lp, SR_DVDD_OK, &dvdd);
 	if (rc)
 		return rc;
@@ -1378,9 +1417,11 @@ static int at86rf230_hw_init(struct at86rf230_local *lp)
 }
 
 static int
-at86rf230_get_pdata(struct spi_device *spi, int *rstn, int *slp_tr)
+at86rf230_get_pdata(struct spi_device *spi, int *rstn, int *slp_tr,
+		    u8 *xtal_trim)
 {
 	struct at86rf230_platform_data *pdata = spi->dev.platform_data;
+	int ret;
 
 	if (!IS_ENABLED(CONFIG_OF) || !spi->dev.of_node) {
 		if (!pdata)
@@ -1388,11 +1429,15 @@ at86rf230_get_pdata(struct spi_device *spi, int *rstn, int *slp_tr)
 
 		*rstn = pdata->rstn;
 		*slp_tr = pdata->slp_tr;
+		*xtal_trim = pdata->xtal_trim;
 		return 0;
 	}
 
 	*rstn = of_get_named_gpio(spi->dev.of_node, "reset-gpio", 0);
 	*slp_tr = of_get_named_gpio(spi->dev.of_node, "sleep-gpio", 0);
+	ret = of_property_read_u8(spi->dev.of_node, "xtal-trim", xtal_trim);
+	if (ret < 0 && ret != -EINVAL)
+		return ret;
 
 	return 0;
 }
@@ -1505,13 +1550,14 @@ static int at86rf230_probe(struct spi_device *spi)
 	struct at86rf230_local *lp;
 	unsigned int status;
 	int rc, irq_type, rstn, slp_tr;
+	u8 xtal_trim;
 
 	if (!spi->irq) {
 		dev_err(&spi->dev, "no IRQ specified\n");
 		return -EINVAL;
 	}
 
-	rc = at86rf230_get_pdata(spi, &rstn, &slp_tr);
+	rc = at86rf230_get_pdata(spi, &rstn, &slp_tr, &xtal_trim);
 	if (rc < 0) {
 		dev_err(&spi->dev, "failed to parse platform_data: %d\n", rc);
 		return rc;
@@ -1570,7 +1616,7 @@ static int at86rf230_probe(struct spi_device *spi)
 
 	spi_set_drvdata(spi, lp);
 
-	rc = at86rf230_hw_init(lp);
+	rc = at86rf230_hw_init(lp, xtal_trim);
 	if (rc)
 		goto free_dev;
 
diff --git a/include/linux/spi/at86rf230.h b/include/linux/spi/at86rf230.h
index cd519a11c2c6..b63fe6f5fdc8 100644
--- a/include/linux/spi/at86rf230.h
+++ b/include/linux/spi/at86rf230.h
@@ -22,6 +22,7 @@ struct at86rf230_platform_data {
 	int rstn;
 	int slp_tr;
 	int dig2;
+	u8 xtal_trim;
 };
 
 #endif
-- 
cgit v1.2.3


From 723b8e460d87e957f251dc5764f4ab86af6ab44e Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Tue, 24 Feb 2015 09:17:31 -0800
Subject: udp: In udp_flow_src_port use random hash value if skb_get_hash fails

In the unlikely event that skb_get_hash is unable to deduce a hash
in udp_flow_src_port we use a consistent random value instead.
This is specified in GRE/UDP draft section 3.2.1:
https://tools.ietf.org/html/draft-ietf-tsvwg-gre-in-udp-encap-04

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/udp.h | 21 +++++++++++++++------
 net/ipv4/udp.c    | 10 ++++++++++
 2 files changed, 25 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/udp.h b/include/net/udp.h
index 07f9b70962f6..32d8d9f07f76 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -194,6 +194,8 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
 		     int (*)(const struct sock *, const struct sock *),
 		     unsigned int hash2_nulladdr);
 
+u32 udp_flow_hashrnd(void);
+
 static inline __be16 udp_flow_src_port(struct net *net, struct sk_buff *skb,
 				       int min, int max, bool use_eth)
 {
@@ -205,12 +207,19 @@ static inline __be16 udp_flow_src_port(struct net *net, struct sk_buff *skb,
 	}
 
 	hash = skb_get_hash(skb);
-	if (unlikely(!hash) && use_eth) {
-		/* Can't find a normal hash, caller has indicated an Ethernet
-		 * packet so use that to compute a hash.
-		 */
-		hash = jhash(skb->data, 2 * ETH_ALEN,
-			     (__force u32) skb->protocol);
+	if (unlikely(!hash)) {
+		if (use_eth) {
+			/* Can't find a normal hash, caller has indicated an
+			 * Ethernet packet so use that to compute a hash.
+			 */
+			hash = jhash(skb->data, 2 * ETH_ALEN,
+				     (__force u32) skb->protocol);
+		} else {
+			/* Can't derive any sort of hash for the packet, set
+			 * to some consistent random value.
+			 */
+			hash = udp_flow_hashrnd();
+		}
 	}
 
 	/* Since this is being sent on the wire obfuscate hash a bit
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 97ef1f8b7be8..0224f930c613 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2525,6 +2525,16 @@ void __init udp_table_init(struct udp_table *table, const char *name)
 	}
 }
 
+u32 udp_flow_hashrnd(void)
+{
+	static u32 hashrnd __read_mostly;
+
+	net_get_random_once(&hashrnd, sizeof(hashrnd));
+
+	return hashrnd;
+}
+EXPORT_SYMBOL(udp_flow_hashrnd);
+
 void __init udp_init(void)
 {
 	unsigned long limit;
-- 
cgit v1.2.3


From 46a4dee074b58c4256dbf6c2dbf199c372f85b04 Mon Sep 17 00:00:00 2001
From: Madhu Challa <challa@noironetworks.com>
Date: Wed, 25 Feb 2015 09:58:34 -0800
Subject: igmp v6: add __ipv6_sock_mc_join and __ipv6_sock_mc_drop

Based on the igmp v4 changes from Eric Dumazet.
959d10f6bbf6("igmp: add __ip_mc_{join|leave}_group()")

These changes are needed to perform igmp v6 join/leave while
RTNL is held.

Make ipv6_sock_mc_join and ipv6_sock_mc_drop wrappers around
__ipv6_sock_mc_join and  __ipv6_sock_mc_drop to avoid
proliferation of work queues.

Signed-off-by: Madhu Challa <challa@noironetworks.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h |  8 ++++++++
 net/ipv6/mcast.c   | 40 +++++++++++++++++++++++++++++++---------
 2 files changed, 39 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 4c9fe224d73b..b7673065c074 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -940,4 +940,12 @@ int ipv6_sysctl_register(void);
 void ipv6_sysctl_unregister(void);
 #endif
 
+int ipv6_sock_mc_join(struct sock *sk, int ifindex,
+		      const struct in6_addr *addr);
+int __ipv6_sock_mc_join(struct sock *sk, int ifindex,
+			const struct in6_addr *addr);
+int ipv6_sock_mc_drop(struct sock *sk, int ifindex,
+		      const struct in6_addr *addr);
+int __ipv6_sock_mc_drop(struct sock *sk, int ifindex,
+			const struct in6_addr *addr);
 #endif /* _NET_IPV6_H */
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 5ce107c8aab3..e4955d019734 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -132,7 +132,7 @@ static int unsolicited_report_interval(struct inet6_dev *idev)
 	return iv > 0 ? iv : 1;
 }
 
-int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
+int __ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 {
 	struct net_device *dev = NULL;
 	struct ipv6_mc_socklist *mc_lst;
@@ -140,6 +140,8 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	struct net *net = sock_net(sk);
 	int err;
 
+	ASSERT_RTNL();
+
 	if (!ipv6_addr_is_multicast(addr))
 		return -EINVAL;
 
@@ -161,7 +163,6 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	mc_lst->next = NULL;
 	mc_lst->addr = *addr;
 
-	rtnl_lock();
 	if (ifindex == 0) {
 		struct rt6_info *rt;
 		rt = rt6_lookup(net, addr, NULL, 0, 0);
@@ -173,7 +174,6 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 		dev = __dev_get_by_index(net, ifindex);
 
 	if (dev == NULL) {
-		rtnl_unlock();
 		sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
 		return -ENODEV;
 	}
@@ -190,7 +190,6 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	err = ipv6_dev_mc_inc(dev, addr);
 
 	if (err) {
-		rtnl_unlock();
 		sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
 		return err;
 	}
@@ -198,25 +197,37 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	mc_lst->next = np->ipv6_mc_list;
 	rcu_assign_pointer(np->ipv6_mc_list, mc_lst);
 
+	return 0;
+}
+EXPORT_SYMBOL(__ipv6_sock_mc_join);
+
+int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
+{
+	int ret;
+
+	rtnl_lock();
+	ret = __ipv6_sock_mc_join(sk, ifindex, addr);
 	rtnl_unlock();
 
-	return 0;
+	return ret;
 }
+EXPORT_SYMBOL(ipv6_sock_mc_join);
 
 /*
  *	socket leave on multicast group
  */
-int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
+int __ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct ipv6_mc_socklist *mc_lst;
 	struct ipv6_mc_socklist __rcu **lnk;
 	struct net *net = sock_net(sk);
 
+	ASSERT_RTNL();
+
 	if (!ipv6_addr_is_multicast(addr))
 		return -EINVAL;
 
-	rtnl_lock();
 	for (lnk = &np->ipv6_mc_list;
 	     (mc_lst = rtnl_dereference(*lnk)) != NULL;
 	      lnk = &mc_lst->next) {
@@ -235,17 +246,28 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
 					__ipv6_dev_mc_dec(idev, &mc_lst->addr);
 			} else
 				(void) ip6_mc_leave_src(sk, mc_lst, NULL);
-			rtnl_unlock();
 
 			atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
 			kfree_rcu(mc_lst, rcu);
 			return 0;
 		}
 	}
-	rtnl_unlock();
 
 	return -EADDRNOTAVAIL;
 }
+EXPORT_SYMBOL(__ipv6_sock_mc_drop);
+
+int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
+{
+	int ret;
+
+	rtnl_lock();
+	ret = __ipv6_sock_mc_drop(sk, ifindex, addr);
+	rtnl_unlock();
+
+	return ret;
+}
+EXPORT_SYMBOL(ipv6_sock_mc_drop);
 
 /* called with rcu_read_lock() */
 static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
-- 
cgit v1.2.3


From 93a714d6b53d87872e552dbb273544bdeaaf6e12 Mon Sep 17 00:00:00 2001
From: Madhu Challa <challa@noironetworks.com>
Date: Wed, 25 Feb 2015 09:58:35 -0800
Subject: multicast: Extend ip address command to enable multicast group
 join/leave on

Joining multicast group on ethernet level via "ip maddr" command would
not work if we have an Ethernet switch that does igmp snooping since
the switch would not replicate multicast packets on ports that did not
have IGMP reports for the multicast addresses.

Linux vxlan interfaces created via "ip link add vxlan" have the group option
that enables then to do the required join.

By extending ip address command with option "autojoin" we can get similar
functionality for openvswitch vxlan interfaces as well as other tunneling
mechanisms that need to receive multicast traffic. The kernel code is
structured similar to how the vxlan driver does a group join / leave.

example:
ip address add 224.1.1.10/24 dev eth5 autojoin
ip address del 224.1.1.10/24 dev eth5

Signed-off-by: Madhu Challa <challa@noironetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h     |  1 +
 include/net/netns/ipv6.h     |  1 +
 include/uapi/linux/if_addr.h |  1 +
 net/ipv4/devinet.c           | 31 +++++++++++++++++++++++++++++++
 net/ipv4/igmp.c              | 13 +++++++++++++
 net/ipv6/addrconf.c          | 38 +++++++++++++++++++++++++++++++++++---
 net/ipv6/mcast.c             | 20 ++++++++++++++++----
 7 files changed, 98 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index dbe225478adb..1b26c6c3fd7c 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -49,6 +49,7 @@ struct netns_ipv4 {
 	struct sock		*fibnl;
 
 	struct sock  * __percpu	*icmp_sk;
+	struct sock		*mc_autojoin_sk;
 
 	struct inet_peer_base	*peers;
 	struct tcpm_hash_bucket	*tcp_metrics_hash;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 69ae41f2098c..ca0db12cd089 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -67,6 +67,7 @@ struct netns_ipv6 {
 	struct sock             *ndisc_sk;
 	struct sock             *tcp_sk;
 	struct sock             *igmp_sk;
+	struct sock		*mc_autojoin_sk;
 #ifdef CONFIG_IPV6_MROUTE
 #ifndef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 	struct mr6_table	*mrt6;
diff --git a/include/uapi/linux/if_addr.h b/include/uapi/linux/if_addr.h
index dea10a87dfd1..40fdfea39714 100644
--- a/include/uapi/linux/if_addr.h
+++ b/include/uapi/linux/if_addr.h
@@ -50,6 +50,7 @@ enum {
 #define IFA_F_PERMANENT		0x80
 #define IFA_F_MANAGETEMPADDR	0x100
 #define IFA_F_NOPREFIXROUTE	0x200
+#define IFA_F_MCAUTOJOIN	0x400
 
 struct ifa_cacheinfo {
 	__u32	ifa_prefered;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 3a8985c94581..5105759e4e00 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -548,6 +548,26 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
 	return NULL;
 }
 
+static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
+{
+	struct ip_mreqn mreq = {
+		.imr_multiaddr.s_addr = ifa->ifa_address,
+		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
+	};
+	int ret;
+
+	ASSERT_RTNL();
+
+	lock_sock(sk);
+	if (join)
+		ret = __ip_mc_join_group(sk, &mreq);
+	else
+		ret = __ip_mc_leave_group(sk, &mreq);
+	release_sock(sk);
+
+	return ret;
+}
+
 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
@@ -584,6 +604,8 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
 			continue;
 
+		if (ipv4_is_multicast(ifa->ifa_address))
+			ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
 		return 0;
 	}
@@ -838,6 +860,15 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
 		 * userspace already relies on not having to provide this.
 		 */
 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
+		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
+			int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
+					       true, ifa);
+
+			if (ret < 0) {
+				inet_free_ifa(ifa);
+				return ret;
+			}
+		}
 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
 	} else {
 		inet_free_ifa(ifa);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 4b1172d73e03..5cb1ef4ce292 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -97,6 +97,7 @@
 #include <net/route.h>
 #include <net/sock.h>
 #include <net/checksum.h>
+#include <net/inet_common.h>
 #include <linux/netfilter_ipv4.h>
 #ifdef CONFIG_IP_MROUTE
 #include <linux/mroute.h>
@@ -2740,6 +2741,7 @@ static const struct file_operations igmp_mcf_seq_fops = {
 static int __net_init igmp_net_init(struct net *net)
 {
 	struct proc_dir_entry *pde;
+	int err;
 
 	pde = proc_create("igmp", S_IRUGO, net->proc_net, &igmp_mc_seq_fops);
 	if (!pde)
@@ -2748,8 +2750,18 @@ static int __net_init igmp_net_init(struct net *net)
 			  &igmp_mcf_seq_fops);
 	if (!pde)
 		goto out_mcfilter;
+	err = inet_ctl_sock_create(&net->ipv4.mc_autojoin_sk, AF_INET,
+				   SOCK_DGRAM, 0, net);
+	if (err < 0) {
+		pr_err("Failed to initialize the IGMP autojoin socket (err %d)\n",
+		       err);
+		goto out_sock;
+	}
+
 	return 0;
 
+out_sock:
+	remove_proc_entry("mcfilter", net->proc_net);
 out_mcfilter:
 	remove_proc_entry("igmp", net->proc_net);
 out_igmp:
@@ -2760,6 +2772,7 @@ static void __net_exit igmp_net_exit(struct net *net)
 {
 	remove_proc_entry("mcfilter", net->proc_net);
 	remove_proc_entry("igmp", net->proc_net);
+	inet_ctl_sock_destroy(net->ipv4.mc_autojoin_sk);
 }
 
 static struct pernet_operations igmp_net_ops = {
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 98e4a63d72bb..783bccfcc060 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2464,6 +2464,23 @@ err_exit:
 	return err;
 }
 
+static int ipv6_mc_config(struct sock *sk, bool join,
+			  const struct in6_addr *addr, int ifindex)
+{
+	int ret;
+
+	ASSERT_RTNL();
+
+	lock_sock(sk);
+	if (join)
+		ret = __ipv6_sock_mc_join(sk, ifindex, addr);
+	else
+		ret = __ipv6_sock_mc_drop(sk, ifindex, addr);
+	release_sock(sk);
+
+	return ret;
+}
+
 /*
  *	Manual configuration of address on an interface
  */
@@ -2476,10 +2493,10 @@ static int inet6_addr_add(struct net *net, int ifindex,
 	struct inet6_ifaddr *ifp;
 	struct inet6_dev *idev;
 	struct net_device *dev;
+	unsigned long timeout;
+	clock_t expires;
 	int scope;
 	u32 flags;
-	clock_t expires;
-	unsigned long timeout;
 
 	ASSERT_RTNL();
 
@@ -2501,6 +2518,14 @@ static int inet6_addr_add(struct net *net, int ifindex,
 	if (IS_ERR(idev))
 		return PTR_ERR(idev);
 
+	if (ifa_flags & IFA_F_MCAUTOJOIN) {
+		int ret = ipv6_mc_config(net->ipv6.mc_autojoin_sk,
+					 true, pfx, ifindex);
+
+		if (ret < 0)
+			return ret;
+	}
+
 	scope = ipv6_addr_scope(pfx);
 
 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
@@ -2542,6 +2567,9 @@ static int inet6_addr_add(struct net *net, int ifindex,
 		in6_ifa_put(ifp);
 		addrconf_verify_rtnl();
 		return 0;
+	} else if (ifa_flags & IFA_F_MCAUTOJOIN) {
+		ipv6_mc_config(net->ipv6.mc_autojoin_sk,
+			       false, pfx, ifindex);
 	}
 
 	return PTR_ERR(ifp);
@@ -2578,6 +2606,10 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
 						 jiffies);
 			ipv6_del_addr(ifp);
 			addrconf_verify_rtnl();
+			if (ipv6_addr_is_multicast(pfx)) {
+				ipv6_mc_config(net->ipv6.mc_autojoin_sk,
+					       false, pfx, dev->ifindex);
+			}
 			return 0;
 		}
 	}
@@ -3945,7 +3977,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
 
 	/* We ignore other flags so far. */
 	ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR |
-		     IFA_F_NOPREFIXROUTE;
+		     IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN;
 
 	ifa = ipv6_get_ifaddr(net, pfx, dev, 1);
 	if (ifa == NULL) {
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index e4955d019734..1dd1fedff9f4 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2929,20 +2929,32 @@ static int __net_init igmp6_net_init(struct net *net)
 
 	inet6_sk(net->ipv6.igmp_sk)->hop_limit = 1;
 
+	err = inet_ctl_sock_create(&net->ipv6.mc_autojoin_sk, PF_INET6,
+				   SOCK_RAW, IPPROTO_ICMPV6, net);
+	if (err < 0) {
+		pr_err("Failed to initialize the IGMP6 autojoin socket (err %d)\n",
+		       err);
+		goto out_sock_create;
+	}
+
 	err = igmp6_proc_init(net);
 	if (err)
-		goto out_sock_create;
-out:
-	return err;
+		goto out_sock_create_autojoin;
+
+	return 0;
 
+out_sock_create_autojoin:
+	inet_ctl_sock_destroy(net->ipv6.mc_autojoin_sk);
 out_sock_create:
 	inet_ctl_sock_destroy(net->ipv6.igmp_sk);
-	goto out;
+out:
+	return err;
 }
 
 static void __net_exit igmp6_net_exit(struct net *net)
 {
 	inet_ctl_sock_destroy(net->ipv6.igmp_sk);
+	inet_ctl_sock_destroy(net->ipv6.mc_autojoin_sk);
 	igmp6_proc_exit(net);
 }
 
-- 
cgit v1.2.3


From 56315f9e6e3a0ba0483c2e1f53333d5275268cb1 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Wed, 25 Feb 2015 15:31:31 -0800
Subject: fib_trie: Convert fib_alias to hlist from list

There isn't any advantage to having it as a list and by making it an hlist
we make the fib_alias more compatible with the list_info in terms of the
type of list used.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h     |  2 +-
 net/ipv4/fib_lookup.h    |  2 +-
 net/ipv4/fib_semantics.c |  4 +--
 net/ipv4/fib_trie.c      | 80 ++++++++++++++++++++++++++----------------------
 4 files changed, 48 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 5bd120e4bc0a..cba4b7c32935 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -136,7 +136,7 @@ struct fib_result {
 	u32		tclassid;
 	struct fib_info *fi;
 	struct fib_table *table;
-	struct list_head *fa_head;
+	struct hlist_head *fa_head;
 };
 
 struct fib_result_nl {
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 825981b1049a..3cd444f4e0f6 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -6,7 +6,7 @@
 #include <net/ip_fib.h>
 
 struct fib_alias {
-	struct list_head	fa_list;
+	struct hlist_node	fa_list;
 	struct fib_info		*fa_info;
 	u8			fa_tos;
 	u8			fa_type;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 1e2090ea663e..c6d267442dac 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1163,12 +1163,12 @@ int fib_sync_down_dev(struct net_device *dev, int force)
 void fib_select_default(struct fib_result *res)
 {
 	struct fib_info *fi = NULL, *last_resort = NULL;
-	struct list_head *fa_head = res->fa_head;
+	struct hlist_head *fa_head = res->fa_head;
 	struct fib_table *tb = res->table;
 	int order = -1, last_idx = -1;
 	struct fib_alias *fa;
 
-	list_for_each_entry_rcu(fa, fa_head, fa_list) {
+	hlist_for_each_entry_rcu(fa, fa_head, fa_list) {
 		struct fib_info *next_fi = fa->fa_info;
 
 		if (next_fi->fib_scope != res->scope ||
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 3daf0224ff2e..f17e2239b7b9 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -116,7 +116,7 @@ struct leaf_info {
 	struct hlist_node hlist;
 	int plen;
 	u32 mask_plen; /* ntohl(inet_make_mask(plen)) */
-	struct list_head falh;
+	struct hlist_head falh;
 	struct rcu_head rcu;
 };
 
@@ -339,7 +339,7 @@ static struct leaf_info *leaf_info_new(int plen)
 	if (li) {
 		li->plen = plen;
 		li->mask_plen = ntohl(inet_make_mask(plen));
-		INIT_LIST_HEAD(&li->falh);
+		INIT_HLIST_HEAD(&li->falh);
 	}
 	return li;
 }
@@ -881,7 +881,7 @@ static struct leaf_info *find_leaf_info(struct tnode *l, int plen)
 	return NULL;
 }
 
-static inline struct list_head *get_fa_head(struct tnode *l, int plen)
+static inline struct hlist_head *get_fa_head(struct tnode *l, int plen)
 {
 	struct leaf_info *li = find_leaf_info(l, plen);
 
@@ -994,14 +994,15 @@ static struct tnode *fib_find_node(struct trie *t, u32 key)
 /* Return the first fib alias matching TOS with
  * priority less than or equal to PRIO.
  */
-static struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
+static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 tos,
+					u32 prio)
 {
 	struct fib_alias *fa;
 
 	if (!fah)
 		return NULL;
 
-	list_for_each_entry(fa, fah, fa_list) {
+	hlist_for_each_entry(fa, fah, fa_list) {
 		if (fa->fa_tos > tos)
 			continue;
 		if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos)
@@ -1027,9 +1028,9 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
 
 /* only used from updater-side */
 
-static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
+static struct hlist_head *fib_insert_node(struct trie *t, u32 key, int plen)
 {
-	struct list_head *fa_head = NULL;
+	struct hlist_head *fa_head = NULL;
 	struct tnode *l, *n, *tp = NULL;
 	struct leaf_info *li;
 
@@ -1130,7 +1131,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 {
 	struct trie *t = (struct trie *) tb->tb_data;
 	struct fib_alias *fa, *new_fa;
-	struct list_head *fa_head = NULL;
+	struct hlist_head *fa_head = NULL;
 	struct fib_info *fi;
 	int plen = cfg->fc_dst_len;
 	u8 tos = cfg->fc_tos;
@@ -1171,10 +1172,8 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 	 * exists or to the node before which we will insert new one.
 	 *
 	 * If fa is NULL, we will need to allocate a new one and
-	 * insert to the head of f.
-	 *
-	 * If f is NULL, no fib node matched the destination key
-	 * and we need to allocate a new one of those as well.
+	 * insert to the tail of the section matching the suffix length
+	 * of the new alias.
 	 */
 
 	if (fa && fa->fa_tos == tos &&
@@ -1192,8 +1191,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 		 */
 		fa_match = NULL;
 		fa_first = fa;
-		fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
-		list_for_each_entry_continue(fa, fa_head, fa_list) {
+		hlist_for_each_entry_from(fa, fa_list) {
 			if (fa->fa_tos != tos)
 				break;
 			if (fa->fa_info->fib_priority != fi->fib_priority)
@@ -1227,7 +1225,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 			state = fa->fa_state;
 			new_fa->fa_state = state & ~FA_S_ACCESSED;
 
-			list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
+			hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list);
 			alias_free_mem_rcu(fa);
 
 			fib_release_info(fi_drop);
@@ -1276,8 +1274,19 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 	if (!plen)
 		tb->tb_num_default++;
 
-	list_add_tail_rcu(&new_fa->fa_list,
-			  (fa ? &fa->fa_list : fa_head));
+	if (fa) {
+		hlist_add_before_rcu(&new_fa->fa_list, &fa->fa_list);
+	} else {
+		struct fib_alias *last;
+
+		hlist_for_each_entry(last, fa_head, fa_list)
+			fa = last;
+
+		if (fa)
+			hlist_add_behind_rcu(&new_fa->fa_list, &fa->fa_list);
+		else
+			hlist_add_head_rcu(&new_fa->fa_list, fa_head);
+	}
 
 	rt_cache_flush(cfg->fc_nlinfo.nl_net);
 	rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
@@ -1419,7 +1428,7 @@ found:
 		if ((key ^ n->key) & li->mask_plen)
 			continue;
 
-		list_for_each_entry_rcu(fa, &li->falh, fa_list) {
+		hlist_for_each_entry_rcu(fa, &li->falh, fa_list) {
 			struct fib_info *fi = fa->fa_info;
 			int nhsel, err;
 
@@ -1501,7 +1510,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
 	int plen = cfg->fc_dst_len;
 	u8 tos = cfg->fc_tos;
 	struct fib_alias *fa, *fa_to_delete;
-	struct list_head *fa_head;
+	struct hlist_head *fa_head;
 	struct tnode *l;
 	struct leaf_info *li;
 
@@ -1534,8 +1543,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
 	pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
 
 	fa_to_delete = NULL;
-	fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
-	list_for_each_entry_continue(fa, fa_head, fa_list) {
+	hlist_for_each_entry_from(fa, fa_list) {
 		struct fib_info *fi = fa->fa_info;
 
 		if (fa->fa_tos != tos)
@@ -1561,12 +1569,12 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
 	rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
 		  &cfg->fc_nlinfo, 0);
 
-	list_del_rcu(&fa->fa_list);
+	hlist_del_rcu(&fa->fa_list);
 
 	if (!plen)
 		tb->tb_num_default--;
 
-	if (list_empty(fa_head)) {
+	if (hlist_empty(fa_head)) {
 		remove_leaf_info(l, li);
 		free_leaf_info(li);
 	}
@@ -1582,16 +1590,17 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
 	return 0;
 }
 
-static int trie_flush_list(struct list_head *head)
+static int trie_flush_list(struct hlist_head *head)
 {
-	struct fib_alias *fa, *fa_node;
+	struct hlist_node *tmp;
+	struct fib_alias *fa;
 	int found = 0;
 
-	list_for_each_entry_safe(fa, fa_node, head, fa_list) {
+	hlist_for_each_entry_safe(fa, tmp, head, fa_list) {
 		struct fib_info *fi = fa->fa_info;
 
 		if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
-			list_del_rcu(&fa->fa_list);
+			hlist_del_rcu(&fa->fa_list);
 			fib_release_info(fa->fa_info);
 			alias_free_mem_rcu(fa);
 			found++;
@@ -1603,15 +1612,14 @@ static int trie_flush_list(struct list_head *head)
 static int trie_flush_leaf(struct tnode *l)
 {
 	int found = 0;
-	struct hlist_head *lih = &l->list;
 	struct hlist_node *tmp;
-	struct leaf_info *li = NULL;
+	struct leaf_info *li;
 	unsigned char plen = KEYLENGTH;
 
-	hlist_for_each_entry_safe(li, tmp, lih, hlist) {
+	hlist_for_each_entry_safe(li, tmp, &l->list, hlist) {
 		found += trie_flush_list(&li->falh);
 
-		if (list_empty(&li->falh)) {
+		if (hlist_empty(&li->falh)) {
 			hlist_del_rcu(&li->hlist);
 			free_leaf_info(li);
 			continue;
@@ -1731,7 +1739,7 @@ void fib_free_table(struct fib_table *tb)
 	kfree(tb);
 }
 
-static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
+static int fn_trie_dump_fa(t_key key, int plen, struct hlist_head *fah,
 			   struct fib_table *tb,
 			   struct sk_buff *skb, struct netlink_callback *cb)
 {
@@ -1744,7 +1752,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
 
 	/* rcu_read_lock is hold by caller */
 
-	list_for_each_entry_rcu(fa, fah, fa_list) {
+	hlist_for_each_entry_rcu(fa, fah, fa_list) {
 		if (i < s_i) {
 			i++;
 			continue;
@@ -1787,7 +1795,7 @@ static int fn_trie_dump_leaf(struct tnode *l, struct fib_table *tb,
 		if (i > s_i)
 			cb->args[5] = 0;
 
-		if (list_empty(&li->falh))
+		if (hlist_empty(&li->falh))
 			continue;
 
 		if (fn_trie_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) {
@@ -2272,7 +2280,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
 		hlist_for_each_entry_rcu(li, &n->list, hlist) {
 			struct fib_alias *fa;
 
-			list_for_each_entry_rcu(fa, &li->falh, fa_list) {
+			hlist_for_each_entry_rcu(fa, &li->falh, fa_list) {
 				char buf1[32], buf2[32];
 
 				seq_indent(seq, iter->depth+1);
@@ -2429,7 +2437,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
 		mask = inet_make_mask(li->plen);
 		prefix = htonl(l->key);
 
-		list_for_each_entry_rcu(fa, &li->falh, fa_list) {
+		hlist_for_each_entry_rcu(fa, &li->falh, fa_list) {
 			const struct fib_info *fi = fa->fa_info;
 			unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
 
-- 
cgit v1.2.3


From 6588af614e7b79294fbcd4a666a7422c0c854e80 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben.hutchings@codethink.co.uk>
Date: Thu, 26 Feb 2015 19:34:37 +0000
Subject: usbnet: Fix tx_packets stat for FLAG_MULTI_FRAME drivers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently the usbnet core does not update the tx_packets statistic for
drivers with FLAG_MULTI_PACKET and there is no hook in the TX
completion path where they could do this.

cdc_ncm and dependent drivers are bumping tx_packets stat on the
transmit path while asix and sr9800 aren't updating it at all.

Add a packet count in struct skb_data so these drivers can fill it
in, initialise it to 1 for other drivers, and add the packet count
to the tx_packets statistic on completion.

Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
Tested-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/asix_common.c |  2 ++
 drivers/net/usb/cdc_ncm.c     |  3 ++-
 drivers/net/usb/sr9800.c      |  1 +
 drivers/net/usb/usbnet.c      |  5 +++--
 include/linux/usb/usbnet.h    | 12 ++++++++++++
 5 files changed, 20 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c
index 5c55f11572ba..724a9b50df7a 100644
--- a/drivers/net/usb/asix_common.c
+++ b/drivers/net/usb/asix_common.c
@@ -188,6 +188,8 @@ struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
 		memcpy(skb_tail_pointer(skb), &padbytes, sizeof(padbytes));
 		skb_put(skb, sizeof(padbytes));
 	}
+
+	usbnet_set_skb_tx_stats(skb, 1);
 	return skb;
 }
 
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 80a844e0ae03..70cbea551139 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -1172,7 +1172,6 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 
 	/* return skb */
 	ctx->tx_curr_skb = NULL;
-	dev->net->stats.tx_packets += ctx->tx_curr_frame_num;
 
 	/* keep private stats: framing overhead and number of NTBs */
 	ctx->tx_overhead += skb_out->len - ctx->tx_curr_frame_payload;
@@ -1184,6 +1183,8 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 	 */
 	dev->net->stats.tx_bytes -= skb_out->len - ctx->tx_curr_frame_payload;
 
+	usbnet_set_skb_tx_stats(skb_out, n);
+
 	return skb_out;
 
 exit_no_skb:
diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c
index b94a0fbb8b3b..7650cdc8fe6b 100644
--- a/drivers/net/usb/sr9800.c
+++ b/drivers/net/usb/sr9800.c
@@ -144,6 +144,7 @@ static struct sk_buff *sr_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
 		skb_put(skb, sizeof(padbytes));
 	}
 
+	usbnet_set_skb_tx_stats(skb, 1);
 	return skb;
 }
 
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 449835f4331e..0f3ff285f6a1 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1188,8 +1188,7 @@ static void tx_complete (struct urb *urb)
 	struct usbnet		*dev = entry->dev;
 
 	if (urb->status == 0) {
-		if (!(dev->driver_info->flags & FLAG_MULTI_PACKET))
-			dev->net->stats.tx_packets++;
+		dev->net->stats.tx_packets += entry->packets;
 		dev->net->stats.tx_bytes += entry->length;
 	} else {
 		dev->net->stats.tx_errors++;
@@ -1348,6 +1347,8 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb,
 			urb->transfer_flags |= URB_ZERO_PACKET;
 	}
 	entry->length = urb->transfer_buffer_length = length;
+	if (!(info->flags & FLAG_MULTI_PACKET))
+		usbnet_set_skb_tx_stats(skb, 1);
 
 	spin_lock_irqsave(&dev->txq.lock, flags);
 	retval = usb_autopm_get_interface_async(dev->intf);
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index d9a4905e01d0..ff3fb2bd0e90 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -228,8 +228,20 @@ struct skb_data {	/* skb->cb is one of these */
 	struct usbnet		*dev;
 	enum skb_state		state;
 	size_t			length;
+	unsigned long		packets;
 };
 
+/* Drivers that set FLAG_MULTI_PACKET must call this in their
+ * tx_fixup method before returning an skb.
+ */
+static inline void
+usbnet_set_skb_tx_stats(struct sk_buff *skb, unsigned long packets)
+{
+	struct skb_data *entry = (struct skb_data *) skb->cb;
+
+	entry->packets = packets;
+}
+
 extern int usbnet_open(struct net_device *net);
 extern int usbnet_stop(struct net_device *net);
 extern netdev_tx_t usbnet_start_xmit(struct sk_buff *skb,
-- 
cgit v1.2.3


From 5f852eb536ad651b8734559dcf4353514cb0bea3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 26 Feb 2015 14:10:18 -0800
Subject: tcp: tso: remove tp->tso_deferred

TSO relies on ability to defer sending a small amount of packets.
Heuristic is to wait for future ACKS in hope to send more packets at once.
Current algorithm uses a per socket tso_deferred field as a pseudo timer.

This pseudo timer relies on future ACK, but there is no guarantee
we receive them in time.

Fix would be to use a real timer, but cost of such timer is probably too
expensive for typical cases.

This patch changes the logic to test the time of last transmit,
because we should not add bursts of more than 1ms for any given flow.

We've used this patch for about two years at Google, before FQ/pacing
as it would reduce a fair amount of bursts.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h   |  1 -
 net/ipv4/tcp_output.c | 14 +++++---------
 2 files changed, 5 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 1a7adb411647..97dbf16f7d9d 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -236,7 +236,6 @@ struct tcp_sock {
 	u32	lost_out;	/* Lost packets			*/
 	u32	sacked_out;	/* SACK'd packets			*/
 	u32	fackets_out;	/* FACK'd packets			*/
-	u32	tso_deferred;
 
 	/* from STCP, retrans queue hinting */
 	struct sk_buff* lost_skb_hint;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index a2a796c5536b..cb95c7a9d1e7 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1763,9 +1763,10 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
 	if (icsk->icsk_ca_state != TCP_CA_Open)
 		goto send_now;
 
-	/* Defer for less than two clock ticks. */
-	if (tp->tso_deferred &&
-	    (((u32)jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1)
+	/* Avoid bursty behavior by allowing defer
+	 * only if the last write was recent.
+	 */
+	if ((s32)(tcp_time_stamp - tp->lsndtime) > 0)
 		goto send_now;
 
 	in_flight = tcp_packets_in_flight(tp);
@@ -1807,11 +1808,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
 			goto send_now;
 	}
 
-	/* Ok, it looks like it is advisable to defer.
-	 * Do not rearm the timer if already set to not break TCP ACK clocking.
-	 */
-	if (!tp->tso_deferred)
-		tp->tso_deferred = 1 | (jiffies << 1);
+	/* Ok, it looks like it is advisable to defer. */
 
 	if (cong_win < send_win && cong_win < skb->len)
 		*is_cwnd_limited = true;
@@ -1819,7 +1816,6 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
 	return true;
 
 send_now:
-	tp->tso_deferred = 0;
 	return false;
 }
 
-- 
cgit v1.2.3


From 31f909a2c0abfc1a1a76b2981d28ac85d33210e7 Mon Sep 17 00:00:00 2001
From: Masashi Honma <masashi.honma@gmail.com>
Date: Tue, 24 Feb 2015 22:42:16 +0900
Subject: nl/mac80211: allow zero plink timeout to disable STA expiration

Both wpa_supplicant and mac80211 have and inactivity timer. By default
wpa_supplicant will be timed out in 5 minutes and mac80211's it is 30
minutes. If wpa_supplicant uses a longer timer than mac80211, it will
get unexpected disconnection by mac80211.

Using 0xffffffff instead as the configured value could solve this w/o
changing the code, but due to integer overflow in the expression used
this doesn't work. The expression is:

(current jiffies) > (frame Rx jiffies + NL80211_MESHCONF_PLINK_TIMEOUT * 250)

On 32bit system, the right side would overflow and be a very small
value if NL80211_MESHCONF_PLINK_TIMEOUT is sufficiently large,
causing unexpectedly early disconnections.

Instead allow disabling the inactivity timer to avoid this situation,
by passing the (previously invalid and useless) value 0.

Signed-off-by: Masashi Honma <masashi.honma@gmail.com>
[reword/rewrap commit log]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 3 ++-
 net/mac80211/mesh.c          | 3 ++-
 net/wireless/nl80211.c       | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 68b294e83944..2dcf9bba317c 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3092,7 +3092,8 @@ enum nl80211_mesh_power_mode {
  *
  * @NL80211_MESHCONF_PLINK_TIMEOUT: If no tx activity is seen from a STA we've
  *	established peering with for longer than this time (in seconds), then
- *	remove it from the STA's list of peers.  Default is 30 minutes.
+ *	remove it from the STA's list of peers. You may set this to 0 to disable
+ *	the removal of the STA. Default is 30 minutes.
  *
  * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use
  */
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 0c8b2a77d312..acf441ff9f4a 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -574,7 +574,8 @@ static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata)
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	u32 changed;
 
-	ieee80211_sta_expire(sdata, ifmsh->mshcfg.plink_timeout * HZ);
+	if (ifmsh->mshcfg.plink_timeout > 0)
+		ieee80211_sta_expire(sdata, ifmsh->mshcfg.plink_timeout * HZ);
 	mesh_path_expire(sdata);
 
 	changed = mesh_accept_plinks_update(sdata);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index d78fd8b54515..9c6e23ede5b2 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5265,7 +5265,7 @@ do {									    \
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration,
 				  0, 65535, mask,
 				  NL80211_MESHCONF_AWAKE_WINDOW, nla_get_u16);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, plink_timeout, 1, 0xffffffff,
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, plink_timeout, 0, 0xffffffff,
 				  mask, NL80211_MESHCONF_PLINK_TIMEOUT,
 				  nla_get_u32);
 	if (mask_out)
-- 
cgit v1.2.3


From 36ef906ee8fefbfac3844206e66d8450e6221c69 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 25 Feb 2015 10:54:33 +0100
Subject: wext: add checked wrappers for adding events/points to streams

These checked wrappers are necessary for the next patch, which
will use them to avoid sending out partial scan results.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/iw_handler.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h
index a830b01baba4..8f81bbbc38fc 100644
--- a/include/net/iw_handler.h
+++ b/include/net/iw_handler.h
@@ -519,6 +519,17 @@ iwe_stream_add_event(struct iw_request_info *info, char *stream, char *ends,
 	return stream;
 }
 
+static inline char *
+iwe_stream_add_event_check(struct iw_request_info *info, char *stream,
+			   char *ends, struct iw_event *iwe, int event_len)
+{
+	char *res = iwe_stream_add_event(info, stream, ends, iwe, event_len);
+
+	if (res == stream)
+		return ERR_PTR(-E2BIG);
+	return res;
+}
+
 /*------------------------------------------------------------------*/
 /*
  * Wrapper to add an short Wireless Event containing a pointer to a
@@ -545,6 +556,17 @@ iwe_stream_add_point(struct iw_request_info *info, char *stream, char *ends,
 	return stream;
 }
 
+static inline char *
+iwe_stream_add_point_check(struct iw_request_info *info, char *stream,
+			   char *ends, struct iw_event *iwe, char *extra)
+{
+	char *res = iwe_stream_add_point(info, stream, ends, iwe, extra);
+
+	if (res == stream)
+		return ERR_PTR(-E2BIG);
+	return res;
+}
+
 /*------------------------------------------------------------------*/
 /*
  * Wrapper to add a value to a Wireless Event in a stream of events.
-- 
cgit v1.2.3


From a2c83fff582ae133d9f5bb187404ea9ce4da1f96 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sun, 1 Mar 2015 12:31:42 +0100
Subject: ebpf: constify various function pointer structs

We can move bpf_map_ops and bpf_verifier_ops and other structs into ro
section, bpf_map_type_list and bpf_prog_type_list into read mostly.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h   | 14 +++++++-------
 kernel/bpf/arraymap.c |  6 +++---
 kernel/bpf/hashtab.c  |  6 +++---
 kernel/bpf/helpers.c  |  6 +++---
 net/core/filter.c     |  6 +++---
 5 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index bbfceb756452..78446860f796 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -32,13 +32,13 @@ struct bpf_map {
 	u32 key_size;
 	u32 value_size;
 	u32 max_entries;
-	struct bpf_map_ops *ops;
+	const struct bpf_map_ops *ops;
 	struct work_struct work;
 };
 
 struct bpf_map_type_list {
 	struct list_head list_node;
-	struct bpf_map_ops *ops;
+	const struct bpf_map_ops *ops;
 	enum bpf_map_type type;
 };
 
@@ -109,7 +109,7 @@ struct bpf_verifier_ops {
 
 struct bpf_prog_type_list {
 	struct list_head list_node;
-	struct bpf_verifier_ops *ops;
+	const struct bpf_verifier_ops *ops;
 	enum bpf_prog_type type;
 };
 
@@ -121,7 +121,7 @@ struct bpf_prog_aux {
 	atomic_t refcnt;
 	bool is_gpl_compatible;
 	enum bpf_prog_type prog_type;
-	struct bpf_verifier_ops *ops;
+	const struct bpf_verifier_ops *ops;
 	struct bpf_map **used_maps;
 	u32 used_map_cnt;
 	struct bpf_prog *prog;
@@ -138,8 +138,8 @@ struct bpf_prog *bpf_prog_get(u32 ufd);
 int bpf_check(struct bpf_prog *fp, union bpf_attr *attr);
 
 /* verifier prototypes for helper functions called from eBPF programs */
-extern struct bpf_func_proto bpf_map_lookup_elem_proto;
-extern struct bpf_func_proto bpf_map_update_elem_proto;
-extern struct bpf_func_proto bpf_map_delete_elem_proto;
+extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
+extern const struct bpf_func_proto bpf_map_update_elem_proto;
+extern const struct bpf_func_proto bpf_map_delete_elem_proto;
 
 #endif /* _LINUX_BPF_H */
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 9eb4d8a7cd87..8a6616583f38 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -134,7 +134,7 @@ static void array_map_free(struct bpf_map *map)
 	kvfree(array);
 }
 
-static struct bpf_map_ops array_ops = {
+static const struct bpf_map_ops array_ops = {
 	.map_alloc = array_map_alloc,
 	.map_free = array_map_free,
 	.map_get_next_key = array_map_get_next_key,
@@ -143,14 +143,14 @@ static struct bpf_map_ops array_ops = {
 	.map_delete_elem = array_map_delete_elem,
 };
 
-static struct bpf_map_type_list tl = {
+static struct bpf_map_type_list array_type __read_mostly = {
 	.ops = &array_ops,
 	.type = BPF_MAP_TYPE_ARRAY,
 };
 
 static int __init register_array_map(void)
 {
-	bpf_register_map_type(&tl);
+	bpf_register_map_type(&array_type);
 	return 0;
 }
 late_initcall(register_array_map);
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index b3ba43674310..83c209d9b17a 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -345,7 +345,7 @@ static void htab_map_free(struct bpf_map *map)
 	kfree(htab);
 }
 
-static struct bpf_map_ops htab_ops = {
+static const struct bpf_map_ops htab_ops = {
 	.map_alloc = htab_map_alloc,
 	.map_free = htab_map_free,
 	.map_get_next_key = htab_map_get_next_key,
@@ -354,14 +354,14 @@ static struct bpf_map_ops htab_ops = {
 	.map_delete_elem = htab_map_delete_elem,
 };
 
-static struct bpf_map_type_list tl = {
+static struct bpf_map_type_list htab_type __read_mostly = {
 	.ops = &htab_ops,
 	.type = BPF_MAP_TYPE_HASH,
 };
 
 static int __init register_htab_map(void)
 {
-	bpf_register_map_type(&tl);
+	bpf_register_map_type(&htab_type);
 	return 0;
 }
 late_initcall(register_htab_map);
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 9e3414d85459..a3c7701a8b5e 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -41,7 +41,7 @@ static u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 	return (unsigned long) value;
 }
 
-struct bpf_func_proto bpf_map_lookup_elem_proto = {
+const struct bpf_func_proto bpf_map_lookup_elem_proto = {
 	.func = bpf_map_lookup_elem,
 	.gpl_only = false,
 	.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
@@ -60,7 +60,7 @@ static u64 bpf_map_update_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 	return map->ops->map_update_elem(map, key, value, r4);
 }
 
-struct bpf_func_proto bpf_map_update_elem_proto = {
+const struct bpf_func_proto bpf_map_update_elem_proto = {
 	.func = bpf_map_update_elem,
 	.gpl_only = false,
 	.ret_type = RET_INTEGER,
@@ -80,7 +80,7 @@ static u64 bpf_map_delete_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 	return map->ops->map_delete_elem(map, key);
 }
 
-struct bpf_func_proto bpf_map_delete_elem_proto = {
+const struct bpf_func_proto bpf_map_delete_elem_proto = {
 	.func = bpf_map_delete_elem,
 	.gpl_only = false,
 	.ret_type = RET_INTEGER,
diff --git a/net/core/filter.c b/net/core/filter.c
index f6bdc2b1ba01..6fe09e36dad9 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1159,19 +1159,19 @@ static bool sock_filter_is_valid_access(int off, int size, enum bpf_access_type
 	return false;
 }
 
-static struct bpf_verifier_ops sock_filter_ops = {
+static const struct bpf_verifier_ops sock_filter_ops = {
 	.get_func_proto = sock_filter_func_proto,
 	.is_valid_access = sock_filter_is_valid_access,
 };
 
-static struct bpf_prog_type_list tl = {
+static struct bpf_prog_type_list sock_filter_type __read_mostly = {
 	.ops = &sock_filter_ops,
 	.type = BPF_PROG_TYPE_SOCKET_FILTER,
 };
 
 static int __init register_sock_filter_ops(void)
 {
-	bpf_register_prog_type(&tl);
+	bpf_register_prog_type(&sock_filter_type);
 	return 0;
 }
 late_initcall(register_sock_filter_ops);
-- 
cgit v1.2.3


From f1a66f85b74c5ef7b503f746ea97742dacd56419 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sun, 1 Mar 2015 12:31:43 +0100
Subject: ebpf: export BPF_PSEUDO_MAP_FD to uapi

We need to export BPF_PSEUDO_MAP_FD to user space, as it's used in the
ELF BPF loader where instructions are being loaded that need map fixups.

An initial stage loads all maps into the kernel, and later on replaces
related instructions in the eBPF blob with BPF_PSEUDO_MAP_FD as source
register and the actual fd as immediate value.

The kernel verifier recognizes this keyword and replaces the map fd with
a real pointer internally.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h   | 2 --
 include/uapi/linux/bpf.h | 2 ++
 samples/bpf/libbpf.h     | 4 +++-
 3 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index caac2087a4d5..5e3863d5f666 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -145,8 +145,6 @@ struct bpf_prog_aux;
 		.off   = 0,					\
 		.imm   = ((__u64) (IMM)) >> 32 })
 
-#define BPF_PSEUDO_MAP_FD	1
-
 /* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
 #define BPF_LD_MAP_FD(DST, MAP_FD)				\
 	BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 45da7ec7d274..0248180bf2e2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -120,6 +120,8 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_SOCKET_FILTER,
 };
 
+#define BPF_PSEUDO_MAP_FD	1
+
 /* flags for BPF_MAP_UPDATE_ELEM command */
 #define BPF_ANY		0 /* create new element or update existing */
 #define BPF_NOEXIST	1 /* create new element if it didn't exist */
diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h
index 58c5fe1bdba1..a6bb7e9c22c3 100644
--- a/samples/bpf/libbpf.h
+++ b/samples/bpf/libbpf.h
@@ -92,7 +92,9 @@ extern char bpf_log_buf[LOG_BUF_SIZE];
 		.off   = 0,					\
 		.imm   = ((__u64) (IMM)) >> 32 })
 
-#define BPF_PSEUDO_MAP_FD	1
+#ifndef BPF_PSEUDO_MAP_FD
+# define BPF_PSEUDO_MAP_FD	1
+#endif
 
 /* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
 #define BPF_LD_MAP_FD(DST, MAP_FD)				\
-- 
cgit v1.2.3


From 0fc174dea54546e2b1146e1197da1b6d4bc48107 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sun, 1 Mar 2015 12:31:44 +0100
Subject: ebpf: make internal bpf API independent of CONFIG_BPF_SYSCALL ifdefs

Socket filter code and other subsystems with upcoming eBPF support should
not need to deal with the fact that we have CONFIG_BPF_SYSCALL defined or
not.

Having the bpf syscall as a config option is a nice thing and I'd expect
it to stay that way for expert users (I presume one day the default setting
of it might change, though), but code making use of it should not care if
it's actually enabled or not.

Instead, hide this via header files and let the rest deal with it.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 78446860f796..9c458144cdb4 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -113,8 +113,6 @@ struct bpf_prog_type_list {
 	enum bpf_prog_type type;
 };
 
-void bpf_register_prog_type(struct bpf_prog_type_list *tl);
-
 struct bpf_prog;
 
 struct bpf_prog_aux {
@@ -129,11 +127,25 @@ struct bpf_prog_aux {
 };
 
 #ifdef CONFIG_BPF_SYSCALL
+void bpf_register_prog_type(struct bpf_prog_type_list *tl);
+
 void bpf_prog_put(struct bpf_prog *prog);
+struct bpf_prog *bpf_prog_get(u32 ufd);
 #else
-static inline void bpf_prog_put(struct bpf_prog *prog) {}
+static inline void bpf_register_prog_type(struct bpf_prog_type_list *tl)
+{
+}
+
+static inline struct bpf_prog *bpf_prog_get(u32 ufd)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void bpf_prog_put(struct bpf_prog *prog)
+{
+}
 #endif
-struct bpf_prog *bpf_prog_get(u32 ufd);
+
 /* verify correctness of eBPF program */
 int bpf_check(struct bpf_prog *fp, union bpf_attr *attr);
 
-- 
cgit v1.2.3


From 96be4325f443dbbfeb37d2a157675ac0736531a1 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sun, 1 Mar 2015 12:31:46 +0100
Subject: ebpf: add sched_cls_type and map it to sk_filter's verifier ops

As discussed recently and at netconf/netdev01, we want to prevent making
bpf_verifier_ops registration available for modules, but have them at a
controlled place inside the kernel instead.

The reason for this is, that out-of-tree modules can go crazy and define
and register any verfifier ops they want, doing all sorts of crap, even
bypassing available GPLed eBPF helper functions. We don't want to offer
such a shiny playground, of course, but keep strict control to ourselves
inside the core kernel.

This also encourages us to design eBPF user helpers carefully and
generically, so they can be shared among various subsystems using eBPF.

For the eBPF traffic classifier (cls_bpf), it's a good start to share
the same helper facilities as we currently do in eBPF for socket filters.

That way, we have BPF_PROG_TYPE_SCHED_CLS look like it's own type, thus
one day if there's a good reason to diverge the set of helper functions
from the set available to socket filters, we keep ABI compatibility.

In future, we could place all bpf_prog_type_list at a central place,
perhaps.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h |  1 +
 kernel/bpf/verifier.c    | 15 +++++++++++++--
 net/core/filter.c        |  7 +++++++
 3 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0248180bf2e2..3fa1af8a58d7 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -118,6 +118,7 @@ enum bpf_map_type {
 enum bpf_prog_type {
 	BPF_PROG_TYPE_UNSPEC,
 	BPF_PROG_TYPE_SOCKET_FILTER,
+	BPF_PROG_TYPE_SCHED_CLS,
 };
 
 #define BPF_PSEUDO_MAP_FD	1
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a28e09c7825d..594d341f04db 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1172,6 +1172,17 @@ static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn)
 	return 0;
 }
 
+static bool may_access_skb(enum bpf_prog_type type)
+{
+	switch (type) {
+	case BPF_PROG_TYPE_SOCKET_FILTER:
+	case BPF_PROG_TYPE_SCHED_CLS:
+		return true;
+	default:
+		return false;
+	}
+}
+
 /* verify safety of LD_ABS|LD_IND instructions:
  * - they can only appear in the programs where ctx == skb
  * - since they are wrappers of function calls, they scratch R1-R5 registers,
@@ -1194,8 +1205,8 @@ static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn)
 	struct reg_state *reg;
 	int i, err;
 
-	if (env->prog->aux->prog_type != BPF_PROG_TYPE_SOCKET_FILTER) {
-		verbose("BPF_LD_ABS|IND instructions are only allowed in socket filters\n");
+	if (!may_access_skb(env->prog->aux->prog_type)) {
+		verbose("BPF_LD_ABS|IND instructions not allowed for this program type\n");
 		return -EINVAL;
 	}
 
diff --git a/net/core/filter.c b/net/core/filter.c
index 741721233166..514d4082f326 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1166,9 +1166,16 @@ static struct bpf_prog_type_list sk_filter_type __read_mostly = {
 	.type = BPF_PROG_TYPE_SOCKET_FILTER,
 };
 
+static struct bpf_prog_type_list sched_cls_type __read_mostly = {
+	.ops = &sk_filter_ops,
+	.type = BPF_PROG_TYPE_SCHED_CLS,
+};
+
 static int __init register_sk_filter_ops(void)
 {
 	bpf_register_prog_type(&sk_filter_type);
+	bpf_register_prog_type(&sched_cls_type);
+
 	return 0;
 }
 late_initcall(register_sk_filter_ops);
-- 
cgit v1.2.3


From 24701ecea76b0b93bd9667486934ec310825f558 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sun, 1 Mar 2015 12:31:47 +0100
Subject: ebpf: move read-only fields to bpf_prog and shrink bpf_prog_aux

is_gpl_compatible and prog_type should be moved directly into bpf_prog
as they stay immutable during bpf_prog's lifetime, are core attributes
and they can be locked as read-only later on via bpf_prog_select_runtime().

With a bit of rearranging, this also allows us to shrink bpf_prog_aux
to exactly 1 cacheline.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h    | 4 +---
 include/linux/filter.h | 4 +++-
 kernel/bpf/syscall.c   | 7 +++----
 kernel/bpf/verifier.c  | 4 ++--
 net/core/filter.c      | 4 ++--
 5 files changed, 11 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9c458144cdb4..a1a7ff2df328 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -117,11 +117,9 @@ struct bpf_prog;
 
 struct bpf_prog_aux {
 	atomic_t refcnt;
-	bool is_gpl_compatible;
-	enum bpf_prog_type prog_type;
+	u32 used_map_cnt;
 	const struct bpf_verifier_ops *ops;
 	struct bpf_map **used_maps;
-	u32 used_map_cnt;
 	struct bpf_prog *prog;
 	struct work_struct work;
 };
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 5e3863d5f666..9ee8c67ea249 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -308,9 +308,11 @@ struct bpf_binary_header {
 struct bpf_prog {
 	u16			pages;		/* Number of allocated pages */
 	bool			jited;		/* Is our filter JIT'ed? */
+	bool			gpl_compatible;	/* Is our filter GPL compatible? */
 	u32			len;		/* Number of filter blocks */
-	struct sock_fprog_kern	*orig_prog;	/* Original BPF program */
+	enum bpf_prog_type	type;		/* Type of BPF program */
 	struct bpf_prog_aux	*aux;		/* Auxiliary fields */
+	struct sock_fprog_kern	*orig_prog;	/* Original BPF program */
 	unsigned int		(*bpf_func)(const struct sk_buff *skb,
 					    const struct bpf_insn *filter);
 	/* Instructions for interpreter */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 536edc2be307..0d69449acbd0 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -354,10 +354,11 @@ static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
 	list_for_each_entry(tl, &bpf_prog_types, list_node) {
 		if (tl->type == type) {
 			prog->aux->ops = tl->ops;
-			prog->aux->prog_type = type;
+			prog->type = type;
 			return 0;
 		}
 	}
+
 	return -EINVAL;
 }
 
@@ -508,7 +509,7 @@ static int bpf_prog_load(union bpf_attr *attr)
 	prog->jited = false;
 
 	atomic_set(&prog->aux->refcnt, 1);
-	prog->aux->is_gpl_compatible = is_gpl;
+	prog->gpl_compatible = is_gpl;
 
 	/* find program type: socket_filter vs tracing_filter */
 	err = find_prog_type(type, prog);
@@ -517,7 +518,6 @@ static int bpf_prog_load(union bpf_attr *attr)
 
 	/* run eBPF verifier */
 	err = bpf_check(prog, attr);
-
 	if (err < 0)
 		goto free_used_maps;
 
@@ -528,7 +528,6 @@ static int bpf_prog_load(union bpf_attr *attr)
 	bpf_prog_select_runtime(prog);
 
 	err = anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC);
-
 	if (err < 0)
 		/* failed to allocate fd */
 		goto free_used_maps;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 594d341f04db..bdf4192a889b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -852,7 +852,7 @@ static int check_call(struct verifier_env *env, int func_id)
 	}
 
 	/* eBPF programs must be GPL compatible to use GPL-ed functions */
-	if (!env->prog->aux->is_gpl_compatible && fn->gpl_only) {
+	if (!env->prog->gpl_compatible && fn->gpl_only) {
 		verbose("cannot call GPL only function from proprietary program\n");
 		return -EINVAL;
 	}
@@ -1205,7 +1205,7 @@ static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn)
 	struct reg_state *reg;
 	int i, err;
 
-	if (!may_access_skb(env->prog->aux->prog_type)) {
+	if (!may_access_skb(env->prog->type)) {
 		verbose("BPF_LD_ABS|IND instructions not allowed for this program type\n");
 		return -EINVAL;
 	}
diff --git a/net/core/filter.c b/net/core/filter.c
index 514d4082f326..ff000cb25e0a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -814,7 +814,7 @@ static void bpf_release_orig_filter(struct bpf_prog *fp)
 
 static void __bpf_prog_release(struct bpf_prog *prog)
 {
-	if (prog->aux->prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
+	if (prog->type == BPF_PROG_TYPE_SOCKET_FILTER) {
 		bpf_prog_put(prog);
 	} else {
 		bpf_release_orig_filter(prog);
@@ -1105,7 +1105,7 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
 	if (IS_ERR(prog))
 		return PTR_ERR(prog);
 
-	if (prog->aux->prog_type != BPF_PROG_TYPE_SOCKET_FILTER) {
+	if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) {
 		bpf_prog_put(prog);
 		return -EINVAL;
 	}
-- 
cgit v1.2.3


From e2e9b6541dd4b31848079da80fe2253daaafb549 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sun, 1 Mar 2015 12:31:48 +0100
Subject: cls_bpf: add initial eBPF support for programmable classifiers

This work extends the "classic" BPF programmable tc classifier by
extending its scope also to native eBPF code!

This allows for user space to implement own custom, 'safe' C like
classifiers (or whatever other frontend language LLVM et al may
provide in future), that can then be compiled with the LLVM eBPF
backend to an eBPF elf file. The result of this can be loaded into
the kernel via iproute2's tc. In the kernel, they can be JITed on
major archs and thus run in native performance.

Simple, minimal toy example to demonstrate the workflow:

  #include <linux/ip.h>
  #include <linux/if_ether.h>
  #include <linux/bpf.h>

  #include "tc_bpf_api.h"

  __section("classify")
  int cls_main(struct sk_buff *skb)
  {
    return (0x800 << 16) | load_byte(skb, ETH_HLEN + __builtin_offsetof(struct iphdr, tos));
  }

  char __license[] __section("license") = "GPL";

The classifier can then be compiled into eBPF opcodes and loaded
via tc, for example:

  clang -O2 -emit-llvm -c cls.c -o - | llc -march=bpf -filetype=obj -o cls.o
  tc filter add dev em1 parent 1: bpf cls.o [...]

As it has been demonstrated, the scope can even reach up to a fully
fledged flow dissector (similarly as in samples/bpf/sockex2_kern.c).

For tc, maps are allowed to be used, but from kernel context only,
in other words, eBPF code can keep state across filter invocations.
In future, we perhaps may reattach from a different application to
those maps e.g., to read out collected statistics/state.

Similarly as in socket filters, we may extend functionality for eBPF
classifiers over time depending on the use cases. For that purpose,
cls_bpf programs are using BPF_PROG_TYPE_SCHED_CLS program type, so
we can allow additional functions/accessors (e.g. an ABI compatible
offset translation to skb fields/metadata). For an initial cls_bpf
support, we allow the same set of helper functions as eBPF socket
filters, but we could diverge at some point in time w/o problem.

I was wondering whether cls_bpf and act_bpf could share C programs,
I can imagine that at some point, we introduce i) further common
handlers for both (or even beyond their scope), and/or if truly needed
ii) some restricted function space for each of them. Both can be
abstracted easily through struct bpf_verifier_ops in future.

The context of cls_bpf versus act_bpf is slightly different though:
a cls_bpf program will return a specific classid whereas act_bpf a
drop/non-drop return code, latter may also in future mangle skbs.
That said, we can surely have a "classify" and "action" section in
a single object file, or considered mentioned constraint add a
possibility of a shared section.

The workflow for getting native eBPF running from tc [1] is as
follows: for f_bpf, I've added a slightly modified ELF parser code
from Alexei's kernel sample, which reads out the LLVM compiled
object, sets up maps (and dynamically fixes up map fds) if any, and
loads the eBPF instructions all centrally through the bpf syscall.

The resulting fd from the loaded program itself is being passed down
to cls_bpf, which looks up struct bpf_prog from the fd store, and
holds reference, so that it stays available also after tc program
lifetime. On tc filter destruction, it will then drop its reference.

Moreover, I've also added the optional possibility to annotate an
eBPF filter with a name (e.g. path to object file, or something
else if preferred) so that when tc dumps currently installed filters,
some more context can be given to an admin for a given instance (as
opposed to just the file descriptor number).

Last but not least, bpf_prog_get() and bpf_prog_put() needed to be
exported, so that eBPF can be used from cls_bpf built as a module.
Thanks to 60a3b2253c41 ("net: bpf: make eBPF interpreter images
read-only") I think this is of no concern since anything wanting to
alter eBPF opcode after verification stage would crash the kernel.

  [1] http://git.breakpoint.cc/cgit/dborkman/iproute2.git/log/?h=ebpf

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_cls.h |   2 +
 kernel/bpf/syscall.c         |   2 +
 net/sched/cls_bpf.c          | 206 ++++++++++++++++++++++++++++++++-----------
 3 files changed, 158 insertions(+), 52 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 25731dfb3fcc..bf08e76bf505 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -397,6 +397,8 @@ enum {
 	TCA_BPF_CLASSID,
 	TCA_BPF_OPS_LEN,
 	TCA_BPF_OPS,
+	TCA_BPF_FD,
+	TCA_BPF_NAME,
 	__TCA_BPF_MAX,
 };
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0d69449acbd0..669719ccc9ee 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -419,6 +419,7 @@ void bpf_prog_put(struct bpf_prog *prog)
 		bpf_prog_free(prog);
 	}
 }
+EXPORT_SYMBOL_GPL(bpf_prog_put);
 
 static int bpf_prog_release(struct inode *inode, struct file *filp)
 {
@@ -466,6 +467,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd)
 	fdput(f);
 	return prog;
 }
+EXPORT_SYMBOL_GPL(bpf_prog_get);
 
 /* last field in 'union bpf_attr' used by this command */
 #define	BPF_PROG_LOAD_LAST_FIELD log_buf
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 5f3ee9e4b5bf..6f7ed8f8e6ee 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -16,6 +16,8 @@
 #include <linux/types.h>
 #include <linux/skbuff.h>
 #include <linux/filter.h>
+#include <linux/bpf.h>
+
 #include <net/rtnetlink.h>
 #include <net/pkt_cls.h>
 #include <net/sock.h>
@@ -24,6 +26,8 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>");
 MODULE_DESCRIPTION("TC BPF based classifier");
 
+#define CLS_BPF_NAME_LEN	256
+
 struct cls_bpf_head {
 	struct list_head plist;
 	u32 hgen;
@@ -32,18 +36,24 @@ struct cls_bpf_head {
 
 struct cls_bpf_prog {
 	struct bpf_prog *filter;
-	struct sock_filter *bpf_ops;
-	struct tcf_exts exts;
-	struct tcf_result res;
 	struct list_head link;
+	struct tcf_result res;
+	struct tcf_exts exts;
 	u32 handle;
-	u16 bpf_num_ops;
+	union {
+		u32 bpf_fd;
+		u16 bpf_num_ops;
+	};
+	struct sock_filter *bpf_ops;
+	const char *bpf_name;
 	struct tcf_proto *tp;
 	struct rcu_head rcu;
 };
 
 static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
 	[TCA_BPF_CLASSID]	= { .type = NLA_U32 },
+	[TCA_BPF_FD]		= { .type = NLA_U32 },
+	[TCA_BPF_NAME]		= { .type = NLA_NUL_STRING, .len = CLS_BPF_NAME_LEN },
 	[TCA_BPF_OPS_LEN]	= { .type = NLA_U16 },
 	[TCA_BPF_OPS]		= { .type = NLA_BINARY,
 				    .len = sizeof(struct sock_filter) * BPF_MAXINSNS },
@@ -76,6 +86,11 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	return -1;
 }
 
+static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
+{
+	return !prog->bpf_ops;
+}
+
 static int cls_bpf_init(struct tcf_proto *tp)
 {
 	struct cls_bpf_head *head;
@@ -94,8 +109,12 @@ static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog)
 {
 	tcf_exts_destroy(&prog->exts);
 
-	bpf_prog_destroy(prog->filter);
+	if (cls_bpf_is_ebpf(prog))
+		bpf_prog_put(prog->filter);
+	else
+		bpf_prog_destroy(prog->filter);
 
+	kfree(prog->bpf_name);
 	kfree(prog->bpf_ops);
 	kfree(prog);
 }
@@ -114,6 +133,7 @@ static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
 	list_del_rcu(&prog->link);
 	tcf_unbind_filter(tp, &prog->res);
 	call_rcu(&prog->rcu, __cls_bpf_delete_prog);
+
 	return 0;
 }
 
@@ -151,69 +171,121 @@ static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
 	return ret;
 }
 
-static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
-				   struct cls_bpf_prog *prog,
-				   unsigned long base, struct nlattr **tb,
-				   struct nlattr *est, bool ovr)
+static int cls_bpf_prog_from_ops(struct nlattr **tb,
+				 struct cls_bpf_prog *prog, u32 classid)
 {
 	struct sock_filter *bpf_ops;
-	struct tcf_exts exts;
-	struct sock_fprog_kern tmp;
+	struct sock_fprog_kern fprog_tmp;
 	struct bpf_prog *fp;
 	u16 bpf_size, bpf_num_ops;
-	u32 classid;
 	int ret;
 
-	if (!tb[TCA_BPF_OPS_LEN] || !tb[TCA_BPF_OPS] || !tb[TCA_BPF_CLASSID])
-		return -EINVAL;
-
-	tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
-	ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr);
-	if (ret < 0)
-		return ret;
-
-	classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
 	bpf_num_ops = nla_get_u16(tb[TCA_BPF_OPS_LEN]);
-	if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0) {
-		ret = -EINVAL;
-		goto errout;
-	}
+	if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0)
+		return -EINVAL;
 
 	bpf_size = bpf_num_ops * sizeof(*bpf_ops);
-	if (bpf_size != nla_len(tb[TCA_BPF_OPS])) {
-		ret = -EINVAL;
-		goto errout;
-	}
+	if (bpf_size != nla_len(tb[TCA_BPF_OPS]))
+		return -EINVAL;
 
 	bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
-	if (bpf_ops == NULL) {
-		ret = -ENOMEM;
-		goto errout;
-	}
+	if (bpf_ops == NULL)
+		return -ENOMEM;
 
 	memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size);
 
-	tmp.len = bpf_num_ops;
-	tmp.filter = bpf_ops;
+	fprog_tmp.len = bpf_num_ops;
+	fprog_tmp.filter = bpf_ops;
 
-	ret = bpf_prog_create(&fp, &tmp);
-	if (ret)
-		goto errout_free;
+	ret = bpf_prog_create(&fp, &fprog_tmp);
+	if (ret < 0) {
+		kfree(bpf_ops);
+		return ret;
+	}
 
-	prog->bpf_num_ops = bpf_num_ops;
 	prog->bpf_ops = bpf_ops;
+	prog->bpf_num_ops = bpf_num_ops;
+	prog->bpf_name = NULL;
+
 	prog->filter = fp;
 	prog->res.classid = classid;
 
+	return 0;
+}
+
+static int cls_bpf_prog_from_efd(struct nlattr **tb,
+				 struct cls_bpf_prog *prog, u32 classid)
+{
+	struct bpf_prog *fp;
+	char *name = NULL;
+	u32 bpf_fd;
+
+	bpf_fd = nla_get_u32(tb[TCA_BPF_FD]);
+
+	fp = bpf_prog_get(bpf_fd);
+	if (IS_ERR(fp))
+		return PTR_ERR(fp);
+
+	if (fp->type != BPF_PROG_TYPE_SCHED_CLS) {
+		bpf_prog_put(fp);
+		return -EINVAL;
+	}
+
+	if (tb[TCA_BPF_NAME]) {
+		name = kmemdup(nla_data(tb[TCA_BPF_NAME]),
+			       nla_len(tb[TCA_BPF_NAME]),
+			       GFP_KERNEL);
+		if (!name) {
+			bpf_prog_put(fp);
+			return -ENOMEM;
+		}
+	}
+
+	prog->bpf_ops = NULL;
+	prog->bpf_fd = bpf_fd;
+	prog->bpf_name = name;
+
+	prog->filter = fp;
+	prog->res.classid = classid;
+
+	return 0;
+}
+
+static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
+				   struct cls_bpf_prog *prog,
+				   unsigned long base, struct nlattr **tb,
+				   struct nlattr *est, bool ovr)
+{
+	struct tcf_exts exts;
+	bool is_bpf, is_ebpf;
+	u32 classid;
+	int ret;
+
+	is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
+	is_ebpf = tb[TCA_BPF_FD];
+
+	if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf) ||
+	    !tb[TCA_BPF_CLASSID])
+		return -EINVAL;
+
+	tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
+	ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr);
+	if (ret < 0)
+		return ret;
+
+	classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
+
+	ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog, classid) :
+		       cls_bpf_prog_from_efd(tb, prog, classid);
+	if (ret < 0) {
+		tcf_exts_destroy(&exts);
+		return ret;
+	}
+
 	tcf_bind_filter(tp, &prog->res, base);
 	tcf_exts_change(tp, &prog->exts, &exts);
 
 	return 0;
-errout_free:
-	kfree(bpf_ops);
-errout:
-	tcf_exts_destroy(&exts);
-	return ret;
 }
 
 static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
@@ -297,11 +369,43 @@ errout:
 	return ret;
 }
 
+static int cls_bpf_dump_bpf_info(const struct cls_bpf_prog *prog,
+				 struct sk_buff *skb)
+{
+	struct nlattr *nla;
+
+	if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_num_ops))
+		return -EMSGSIZE;
+
+	nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_num_ops *
+			  sizeof(struct sock_filter));
+	if (nla == NULL)
+		return -EMSGSIZE;
+
+	memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla));
+
+	return 0;
+}
+
+static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog,
+				  struct sk_buff *skb)
+{
+	if (nla_put_u32(skb, TCA_BPF_FD, prog->bpf_fd))
+		return -EMSGSIZE;
+
+	if (prog->bpf_name &&
+	    nla_put_string(skb, TCA_BPF_NAME, prog->bpf_name))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
 static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 			struct sk_buff *skb, struct tcmsg *tm)
 {
 	struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh;
-	struct nlattr *nest, *nla;
+	struct nlattr *nest;
+	int ret;
 
 	if (prog == NULL)
 		return skb->len;
@@ -314,16 +418,14 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 
 	if (nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid))
 		goto nla_put_failure;
-	if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_num_ops))
-		goto nla_put_failure;
 
-	nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_num_ops *
-			  sizeof(struct sock_filter));
-	if (nla == NULL)
+	if (cls_bpf_is_ebpf(prog))
+		ret = cls_bpf_dump_ebpf_info(prog, skb);
+	else
+		ret = cls_bpf_dump_bpf_info(prog, skb);
+	if (ret)
 		goto nla_put_failure;
 
-	memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla));
-
 	if (tcf_exts_dump(skb, &prog->exts) < 0)
 		goto nla_put_failure;
 
-- 
cgit v1.2.3


From 287f3a943fef58c5c73e42545169443be379222f Mon Sep 17 00:00:00 2001
From: Simon Farnsworth <simon@farnz.org.uk>
Date: Sun, 1 Mar 2015 10:54:39 +0000
Subject: pppoe: Use workqueue to die properly when a PADT is received

When a PADT frame is received, the socket may not be in a good state to
close down the PPP interface. The current implementation handles this by
simply blocking all further PPP traffic, and hoping that the lack of traffic
will trigger the user to investigate.

Use schedule_work to get to a process context from which we clear down the
PPP interface, in a fashion analogous to hangup on a TTY-based PPP
interface. This causes pppd to disconnect immediately, and allows tools to
take immediate corrective action.

Note that pppd's rp_pppoe.so plugin has code in it to disable the session
when it disconnects; however, as a consequence of this patch, the session is
already disabled before rp_pppoe.so is asked to disable the session. The
result is a harmless error message:

Failed to disconnect PPPoE socket: 114 Operation already in progress

This message is safe to ignore, as long as the error is 114 Operation
already in progress; in that specific case, it means that the PPPoE session
has already been disabled before pppd tried to disable it.

Signed-off-by: Simon Farnsworth <simon@farnz.org.uk>
Tested-by: Dan Williams <dcbw@redhat.com>
Tested-by: Christoph Schulz <develop@kristov.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp/pppoe.c  | 17 ++++++++++++++++-
 include/linux/if_pppox.h |  2 ++
 2 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index d2408a5e43a6..9c97e9bcf5f5 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -455,6 +455,18 @@ out:
 	return NET_RX_DROP;
 }
 
+static void pppoe_unbind_sock_work(struct work_struct *work)
+{
+	struct pppox_sock *po = container_of(work, struct pppox_sock,
+					     proto.pppoe.padt_work);
+	struct sock *sk = sk_pppox(po);
+
+	lock_sock(sk);
+	pppox_unbind_sock(sk);
+	release_sock(sk);
+	sock_put(sk);
+}
+
 /************************************************************************
  *
  * Receive a PPPoE Discovery frame.
@@ -500,7 +512,8 @@ static int pppoe_disc_rcv(struct sk_buff *skb, struct net_device *dev,
 		}
 
 		bh_unlock_sock(sk);
-		sock_put(sk);
+		if (!schedule_work(&po->proto.pppoe.padt_work))
+			sock_put(sk);
 	}
 
 abort:
@@ -613,6 +626,8 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
 
 	lock_sock(sk);
 
+	INIT_WORK(&po->proto.pppoe.padt_work, pppoe_unbind_sock_work);
+
 	error = -EINVAL;
 	if (sp->sa_protocol != PX_PROTO_OE)
 		goto end;
diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index aff7ad8a4ea3..66a7d7600f43 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -19,6 +19,7 @@
 #include <linux/netdevice.h>
 #include <linux/ppp_channel.h>
 #include <linux/skbuff.h>
+#include <linux/workqueue.h>
 #include <uapi/linux/if_pppox.h>
 
 static inline struct pppoe_hdr *pppoe_hdr(const struct sk_buff *skb)
@@ -32,6 +33,7 @@ struct pppoe_opt {
 	struct pppoe_addr	pa;	  /* what this socket is bound to*/
 	struct sockaddr_pppox	relay;	  /* what socket data will be
 					     relayed to (PPPoE relaying) */
+	struct work_struct      padt_work;/* Work item for handling PADT */
 };
 
 struct pptp_opt {
-- 
cgit v1.2.3


From 49a6fe055739a77ef910c283efb99928423c97a0 Mon Sep 17 00:00:00 2001
From: Eyal Birger <eyal.birger@gmail.com>
Date: Sun, 1 Mar 2015 14:58:25 +0200
Subject: net: bluetooth: compact struct bt_skb_cb by inlining struct
 hci_req_ctrl

struct hci_req_ctrl is never used outside of struct bt_skb_cb;
Inlining it frees 8 bytes on a 64 bit system in skb->cb[] allowing
the addition of more ancillary data.

Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
Reviewed-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/bluetooth/bluetooth.h | 10 +++-------
 net/bluetooth/hci_core.c          | 12 ++++++------
 net/bluetooth/hci_event.c         |  4 ++--
 net/bluetooth/hci_request.c       |  6 +++---
 net/bluetooth/hci_sock.c          |  2 +-
 5 files changed, 15 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index e00455aab18c..09893668e6ea 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -275,21 +275,17 @@ struct hci_dev;
 
 typedef void (*hci_req_complete_t)(struct hci_dev *hdev, u8 status, u16 opcode);
 
-struct hci_req_ctrl {
-	bool			start;
-	u8			event;
-	hci_req_complete_t	complete;
-};
-
 struct bt_skb_cb {
 	__u8 pkt_type;
 	__u8 incoming;
 	__u16 opcode;
 	__u16 expect;
 	__u8 force_active;
+	bool req_start;
+	u8 req_event;
+	hci_req_complete_t req_complete;
 	struct l2cap_chan *chan;
 	struct l2cap_ctrl control;
-	struct hci_req_ctrl req;
 	bdaddr_t bdaddr;
 	__le16 psm;
 };
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 3322d3f4c85a..85a0655c4123 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -3517,7 +3517,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen,
 	/* Stand-alone HCI commands must be flagged as
 	 * single-command requests.
 	 */
-	bt_cb(skb)->req.start = true;
+	bt_cb(skb)->req_start = true;
 
 	skb_queue_tail(&hdev->cmd_q, skb);
 	queue_work(hdev->workqueue, &hdev->cmd_work);
@@ -4195,7 +4195,7 @@ static bool hci_req_is_complete(struct hci_dev *hdev)
 	if (!skb)
 		return true;
 
-	return bt_cb(skb)->req.start;
+	return bt_cb(skb)->req_start;
 }
 
 static void hci_resend_last(struct hci_dev *hdev)
@@ -4255,14 +4255,14 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status)
 	 * command queue (hdev->cmd_q).
 	 */
 	if (hdev->sent_cmd) {
-		req_complete = bt_cb(hdev->sent_cmd)->req.complete;
+		req_complete = bt_cb(hdev->sent_cmd)->req_complete;
 
 		if (req_complete) {
 			/* We must set the complete callback to NULL to
 			 * avoid calling the callback more than once if
 			 * this function gets called again.
 			 */
-			bt_cb(hdev->sent_cmd)->req.complete = NULL;
+			bt_cb(hdev->sent_cmd)->req_complete = NULL;
 
 			goto call_complete;
 		}
@@ -4271,12 +4271,12 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status)
 	/* Remove all pending commands belonging to this request */
 	spin_lock_irqsave(&hdev->cmd_q.lock, flags);
 	while ((skb = __skb_dequeue(&hdev->cmd_q))) {
-		if (bt_cb(skb)->req.start) {
+		if (bt_cb(skb)->req_start) {
 			__skb_queue_head(&hdev->cmd_q, skb);
 			break;
 		}
 
-		req_complete = bt_cb(skb)->req.complete;
+		req_complete = bt_cb(skb)->req_complete;
 		kfree_skb(skb);
 	}
 	spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index a3fb094822b6..8e8c4334c379 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -3106,7 +3106,7 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		cancel_delayed_work(&hdev->cmd_timer);
 
 	if (ev->status ||
-	    (hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->req.event))
+	    (hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->req_event))
 		hci_req_cmd_complete(hdev, opcode, ev->status);
 
 	if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) {
@@ -5039,7 +5039,7 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
 
 	skb_pull(skb, HCI_EVENT_HDR_SIZE);
 
-	if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->req.event == event) {
+	if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->req_event == event) {
 		struct hci_command_hdr *cmd_hdr = (void *) hdev->sent_cmd->data;
 		u16 opcode = __le16_to_cpu(cmd_hdr->opcode);
 
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index b59f92c6df0c..db2f45a516e9 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -55,7 +55,7 @@ int hci_req_run(struct hci_request *req, hci_req_complete_t complete)
 		return -ENODATA;
 
 	skb = skb_peek_tail(&req->cmd_q);
-	bt_cb(skb)->req.complete = complete;
+	bt_cb(skb)->req_complete = complete;
 
 	spin_lock_irqsave(&hdev->cmd_q.lock, flags);
 	skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q);
@@ -116,9 +116,9 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
 	}
 
 	if (skb_queue_empty(&req->cmd_q))
-		bt_cb(skb)->req.start = true;
+		bt_cb(skb)->req_start = true;
 
-	bt_cb(skb)->req.event = event;
+	bt_cb(skb)->req_event = event;
 
 	skb_queue_tail(&req->cmd_q, skb);
 }
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 1d65c5be7c82..f0038189a73e 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -965,7 +965,7 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 			/* Stand-alone HCI commands must be flagged as
 			 * single-command requests.
 			 */
-			bt_cb(skb)->req.start = true;
+			bt_cb(skb)->req_start = true;
 
 			skb_queue_tail(&hdev->cmd_q, skb);
 			queue_work(hdev->workqueue, &hdev->cmd_work);
-- 
cgit v1.2.3


From 6368c235777456ddaeaa61360186d8d03a61cb18 Mon Sep 17 00:00:00 2001
From: Eyal Birger <eyal.birger@gmail.com>
Date: Sun, 1 Mar 2015 14:58:26 +0200
Subject: net: bluetooth: compact struct bt_skb_cb by converting boolean fields
 to bit fields

Convert boolean fields incoming and req_start to bit fields and move
force_active in order save space in bt_skb_cb in an effort to use
a portion of skb->cb[] for storing skb->dropcount.

Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/bluetooth/bluetooth.h | 6 +++---
 net/bluetooth/hci_core.c          | 2 +-
 net/bluetooth/hci_request.c       | 2 +-
 net/bluetooth/hci_sock.c          | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 09893668e6ea..4500bf88ff55 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -277,11 +277,11 @@ typedef void (*hci_req_complete_t)(struct hci_dev *hdev, u8 status, u16 opcode);
 
 struct bt_skb_cb {
 	__u8 pkt_type;
-	__u8 incoming;
+	__u8 force_active;
 	__u16 opcode;
 	__u16 expect;
-	__u8 force_active;
-	bool req_start;
+	__u8 incoming:1;
+	__u8 req_start:1;
 	u8 req_event;
 	hci_req_complete_t req_complete;
 	struct l2cap_chan *chan;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 85a0655c4123..80f40e859d7d 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -3517,7 +3517,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen,
 	/* Stand-alone HCI commands must be flagged as
 	 * single-command requests.
 	 */
-	bt_cb(skb)->req_start = true;
+	bt_cb(skb)->req_start = 1;
 
 	skb_queue_tail(&hdev->cmd_q, skb);
 	queue_work(hdev->workqueue, &hdev->cmd_work);
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index db2f45a516e9..f857e765e081 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -116,7 +116,7 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
 	}
 
 	if (skb_queue_empty(&req->cmd_q))
-		bt_cb(skb)->req_start = true;
+		bt_cb(skb)->req_start = 1;
 
 	bt_cb(skb)->req_event = event;
 
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index f0038189a73e..37198fb99ffe 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -965,7 +965,7 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 			/* Stand-alone HCI commands must be flagged as
 			 * single-command requests.
 			 */
-			bt_cb(skb)->req_start = true;
+			bt_cb(skb)->req_start = 1;
 
 			skb_queue_tail(&hdev->cmd_q, skb);
 			queue_work(hdev->workqueue, &hdev->cmd_work);
-- 
cgit v1.2.3


From b4772ef879a8f7d8c56118c2ae5a296fcf6f81d2 Mon Sep 17 00:00:00 2001
From: Eyal Birger <eyal.birger@gmail.com>
Date: Sun, 1 Mar 2015 14:58:29 +0200
Subject: net: use common macro for assering skb->cb[] available size in
 protocol families

As part of an effort to move skb->dropcount to skb->cb[] use a common
macro in protocol families using skb->cb[] for ancillary data to
validate available room in skb->cb[].

Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h           | 3 +++
 net/bluetooth/af_bluetooth.c | 3 +--
 net/can/bcm.c                | 2 +-
 net/can/raw.c                | 6 +++---
 net/ipv4/af_inet.c           | 2 +-
 net/ipv4/tcp.c               | 3 +--
 net/ipv6/af_inet6.c          | 2 +-
 net/packet/af_packet.c       | 3 +--
 net/sctp/protocol.c          | 3 +--
 9 files changed, 13 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index ab186b1d31ff..a2502d248641 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2078,6 +2078,9 @@ static inline int sock_intr_errno(long timeo)
 	return timeo == MAX_SCHEDULE_TIMEOUT ? -ERESTARTSYS : -EINTR;
 }
 
+#define sock_skb_cb_check_size(size) \
+	BUILD_BUG_ON((size) > FIELD_SIZEOF(struct sk_buff, cb))
+
 void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 			   struct sk_buff *skb);
 void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index ce22e0cfa923..4b904c97a068 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -711,10 +711,9 @@ EXPORT_SYMBOL_GPL(bt_debugfs);
 
 static int __init bt_init(void)
 {
-	struct sk_buff *skb;
 	int err;
 
-	BUILD_BUG_ON(sizeof(struct bt_skb_cb) > sizeof(skb->cb));
+	sock_skb_cb_check_size(sizeof(struct bt_skb_cb));
 
 	BT_INFO("Core ver %s", VERSION);
 
diff --git a/net/can/bcm.c b/net/can/bcm.c
index ee9ffd956552..d559f922326d 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -328,7 +328,7 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
 	 *  containing the interface index.
 	 */
 
-	BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct sockaddr_can));
+	sock_skb_cb_check_size(sizeof(struct sockaddr_can));
 	addr = (struct sockaddr_can *)skb->cb;
 	memset(addr, 0, sizeof(*addr));
 	addr->can_family  = AF_CAN;
diff --git a/net/can/raw.c b/net/can/raw.c
index 00c13ef23661..94601b7ff0a3 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -95,8 +95,8 @@ struct raw_sock {
  */
 static inline unsigned int *raw_flags(struct sk_buff *skb)
 {
-	BUILD_BUG_ON(sizeof(skb->cb) <= (sizeof(struct sockaddr_can) +
-					 sizeof(unsigned int)));
+	sock_skb_cb_check_size(sizeof(struct sockaddr_can) +
+			       sizeof(unsigned int));
 
 	/* return pointer after struct sockaddr_can */
 	return (unsigned int *)(&((struct sockaddr_can *)skb->cb)[1]);
@@ -135,7 +135,7 @@ static void raw_rcv(struct sk_buff *oskb, void *data)
 	 *  containing the interface index.
 	 */
 
-	BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct sockaddr_can));
+	sock_skb_cb_check_size(sizeof(struct sockaddr_can));
 	addr = (struct sockaddr_can *)skb->cb;
 	memset(addr, 0, sizeof(*addr));
 	addr->can_family  = AF_CAN;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d2e49baaff63..4ce954cc94a4 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1675,7 +1675,7 @@ static int __init inet_init(void)
 	struct list_head *r;
 	int rc = -EINVAL;
 
-	BUILD_BUG_ON(sizeof(struct inet_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb));
+	sock_skb_cb_check_size(sizeof(struct inet_skb_parm));
 
 	rc = proto_register(&tcp_prot, 1);
 	if (rc)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9d72a0fcd928..4b57ea8dabc7 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3005,12 +3005,11 @@ static void __init tcp_init_mem(void)
 
 void __init tcp_init(void)
 {
-	struct sk_buff *skb = NULL;
 	unsigned long limit;
 	int max_rshare, max_wshare, cnt;
 	unsigned int i;
 
-	BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
+	sock_skb_cb_check_size(sizeof(struct tcp_skb_cb));
 
 	percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
 	percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e8c4400f23e9..6bafcc2c79e3 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -824,7 +824,7 @@ static int __init inet6_init(void)
 	struct list_head *r;
 	int err = 0;
 
-	BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb));
+	sock_skb_cb_check_size(sizeof(struct inet6_skb_parm));
 
 	/* Register the socket-side information for inet6_create.  */
 	for (r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 7eea30b9c8e5..9cdb414cd231 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1816,8 +1816,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
 		skb = nskb;
 	}
 
-	BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
-		     sizeof(skb->cb));
+	sock_skb_cb_check_size(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8);
 
 	sll = &PACKET_SKB_CB(skb)->sa.ll;
 	sll->sll_hatype = dev->type;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 8f34b27d5775..53b7acde9aa3 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1322,8 +1322,7 @@ static __init int sctp_init(void)
 	int max_share;
 	int order;
 
-	BUILD_BUG_ON(sizeof(struct sctp_ulpevent) >
-		     sizeof(((struct sk_buff *) 0)->cb));
+	sock_skb_cb_check_size(sizeof(struct sctp_ulpevent));
 
 	/* Allocate bind_bucket and chunk caches. */
 	status = -ENOBUFS;
-- 
cgit v1.2.3


From 3bc3b96f3b455bd14a8ccd83ffffc85625aba641 Mon Sep 17 00:00:00 2001
From: Eyal Birger <eyal.birger@gmail.com>
Date: Sun, 1 Mar 2015 14:58:30 +0200
Subject: net: add common accessor for setting dropcount on packets

As part of an effort to move skb->dropcount to skb->cb[], use
a common function in order to set dropcount in struct sk_buff.

Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h     | 6 ++++++
 net/core/sock.c        | 2 +-
 net/packet/af_packet.c | 2 +-
 3 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index a2502d248641..0996fe451e5f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2081,6 +2081,12 @@ static inline int sock_intr_errno(long timeo)
 #define sock_skb_cb_check_size(size) \
 	BUILD_BUG_ON((size) > FIELD_SIZEOF(struct sk_buff, cb))
 
+static inline void
+sock_skb_set_dropcount(const struct sock *sk, struct sk_buff *skb)
+{
+	skb->dropcount = atomic_read(&sk->sk_drops);
+}
+
 void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 			   struct sk_buff *skb);
 void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
diff --git a/net/core/sock.c b/net/core/sock.c
index 93c8b20c91e4..9c74fc8f0e32 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -466,7 +466,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	skb_dst_force(skb);
 
 	spin_lock_irqsave(&list->lock, flags);
-	skb->dropcount = atomic_read(&sk->sk_drops);
+	sock_skb_set_dropcount(sk, skb);
 	__skb_queue_tail(list, skb);
 	spin_unlock_irqrestore(&list->lock, flags);
 
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9cdb414cd231..9db83693d736 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1845,7 +1845,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
 
 	spin_lock(&sk->sk_receive_queue.lock);
 	po->stats.stats1.tp_packets++;
-	skb->dropcount = atomic_read(&sk->sk_drops);
+	sock_skb_set_dropcount(sk, skb);
 	__skb_queue_tail(&sk->sk_receive_queue, skb);
 	spin_unlock(&sk->sk_receive_queue.lock);
 	sk->sk_data_ready(sk);
-- 
cgit v1.2.3


From 744d5a3e9fe2690dd85d9991dbb078301694658b Mon Sep 17 00:00:00 2001
From: Eyal Birger <eyal.birger@gmail.com>
Date: Sun, 1 Mar 2015 14:58:31 +0200
Subject: net: move skb->dropcount to skb->cb[]

Commit 977750076d98 ("af_packet: add interframe drop cmsg (v6)")
unionized skb->mark and skb->dropcount in order to allow recording
of the socket drop count while maintaining struct sk_buff size.

skb->dropcount was introduced since there was no available room
in skb->cb[] in packet sockets. However, its introduction led to
the inability to export skb->mark, or any other aliased field to
userspace if so desired.

Moving the dropcount metric to skb->cb[] eliminates this problem
at the expense of 4 bytes less in skb->cb[] for protocol families
using it.

Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  2 --
 include/net/sock.h     | 18 ++++++++++++++++--
 net/socket.c           |  4 ++--
 3 files changed, 18 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d898b32dedcc..bba1330757c0 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -492,7 +492,6 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
   *	@napi_id: id of the NAPI struct this skb came from
  *	@secmark: security marking
  *	@mark: Generic packet mark
- *	@dropcount: total number of sk_receive_queue overflows
  *	@vlan_proto: vlan encapsulation protocol
  *	@vlan_tci: vlan tag control information
  *	@inner_protocol: Protocol (encapsulation)
@@ -641,7 +640,6 @@ struct sk_buff {
 #endif
 	union {
 		__u32		mark;
-		__u32		dropcount;
 		__u32		reserved_tailroom;
 	};
 
diff --git a/include/net/sock.h b/include/net/sock.h
index 0996fe451e5f..38369d3580a1 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2078,13 +2078,27 @@ static inline int sock_intr_errno(long timeo)
 	return timeo == MAX_SCHEDULE_TIMEOUT ? -ERESTARTSYS : -EINTR;
 }
 
+struct sock_skb_cb {
+	u32 dropcount;
+};
+
+/* Store sock_skb_cb at the end of skb->cb[] so protocol families
+ * using skb->cb[] would keep using it directly and utilize its
+ * alignement guarantee.
+ */
+#define SOCK_SKB_CB_OFFSET ((FIELD_SIZEOF(struct sk_buff, cb) - \
+			    sizeof(struct sock_skb_cb)))
+
+#define SOCK_SKB_CB(__skb) ((struct sock_skb_cb *)((__skb)->cb + \
+			    SOCK_SKB_CB_OFFSET))
+
 #define sock_skb_cb_check_size(size) \
-	BUILD_BUG_ON((size) > FIELD_SIZEOF(struct sk_buff, cb))
+	BUILD_BUG_ON((size) > SOCK_SKB_CB_OFFSET)
 
 static inline void
 sock_skb_set_dropcount(const struct sock *sk, struct sk_buff *skb)
 {
-	skb->dropcount = atomic_read(&sk->sk_drops);
+	SOCK_SKB_CB(skb)->dropcount = atomic_read(&sk->sk_drops);
 }
 
 void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
diff --git a/net/socket.c b/net/socket.c
index bbedbfcb42c2..b78cf601a021 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -731,9 +731,9 @@ EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
 static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
 				   struct sk_buff *skb)
 {
-	if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
+	if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
 		put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
-			sizeof(__u32), &skb->dropcount);
+			sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
 }
 
 void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
-- 
cgit v1.2.3


From 4186721d02b71ae943e60bbf50d3488fd5fd6adb Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Sun, 8 Feb 2015 17:11:47 +0100
Subject: bcma: add helpers bringing PCIe hosted bus up / down
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bringing PCIe hosted bus up requires operating on host-related core.
Since we plan to support PCIe Gen 2 devices we should provide a helper
picking the correct one (PCIE or PCIE2).

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/bcma/bcma_private.h                    |  2 ++
 drivers/bcma/driver_pci.c                      | 20 ++----------------
 drivers/bcma/host_pci.c                        | 28 ++++++++++++++++++++++++++
 drivers/net/wireless/b43/main.c                |  4 ++--
 drivers/net/wireless/brcm80211/brcmsmac/main.c |  8 ++++----
 include/linux/bcma/bcma.h                      |  3 +++
 include/linux/bcma/bcma_driver_pci.h           |  2 --
 7 files changed, 41 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h
index ac6c5fca906d..351f4afdad25 100644
--- a/drivers/bcma/bcma_private.h
+++ b/drivers/bcma/bcma_private.h
@@ -101,6 +101,8 @@ static inline void __exit bcma_host_soc_unregister_driver(void)
 
 /* driver_pci.c */
 u32 bcma_pcie_read(struct bcma_drv_pci *pc, u32 address);
+void bcma_core_pci_up(struct bcma_drv_pci *pc);
+void bcma_core_pci_down(struct bcma_drv_pci *pc);
 
 extern int bcma_chipco_watchdog_register(struct bcma_drv_cc *cc);
 
diff --git a/drivers/bcma/driver_pci.c b/drivers/bcma/driver_pci.c
index 786666488a2d..cf92bfa7eae0 100644
--- a/drivers/bcma/driver_pci.c
+++ b/drivers/bcma/driver_pci.c
@@ -328,28 +328,12 @@ static void bcma_core_pci_extend_L1timer(struct bcma_drv_pci *pc, bool extend)
 	bcma_pcie_read(pc, BCMA_CORE_PCI_DLLP_PMTHRESHREG);
 }
 
-void bcma_core_pci_up(struct bcma_bus *bus)
+void bcma_core_pci_up(struct bcma_drv_pci *pc)
 {
-	struct bcma_drv_pci *pc;
-
-	if (bus->hosttype != BCMA_HOSTTYPE_PCI)
-		return;
-
-	pc = &bus->drv_pci[0];
-
 	bcma_core_pci_extend_L1timer(pc, true);
 }
-EXPORT_SYMBOL_GPL(bcma_core_pci_up);
 
-void bcma_core_pci_down(struct bcma_bus *bus)
+void bcma_core_pci_down(struct bcma_drv_pci *pc)
 {
-	struct bcma_drv_pci *pc;
-
-	if (bus->hosttype != BCMA_HOSTTYPE_PCI)
-		return;
-
-	pc = &bus->drv_pci[0];
-
 	bcma_core_pci_extend_L1timer(pc, false);
 }
-EXPORT_SYMBOL_GPL(bcma_core_pci_down);
diff --git a/drivers/bcma/host_pci.c b/drivers/bcma/host_pci.c
index 53c6a8a58859..8dd37dc94cae 100644
--- a/drivers/bcma/host_pci.c
+++ b/drivers/bcma/host_pci.c
@@ -310,3 +310,31 @@ void __exit bcma_host_pci_exit(void)
 {
 	pci_unregister_driver(&bcma_pci_bridge_driver);
 }
+
+/**************************************************
+ * Runtime ops for drivers.
+ **************************************************/
+
+/* See also pcicore_up */
+void bcma_host_pci_up(struct bcma_bus *bus)
+{
+	if (bus->hosttype != BCMA_HOSTTYPE_PCI)
+		return;
+
+	if (bus->host_is_pcie2)
+		pr_warn("Bringing up bus with PCIe Gen 2 host is unsupported yet\n");
+	else
+		bcma_core_pci_up(&bus->drv_pci[0]);
+}
+EXPORT_SYMBOL_GPL(bcma_host_pci_up);
+
+/* See also pcicore_down */
+void bcma_host_pci_down(struct bcma_bus *bus)
+{
+	if (bus->hosttype != BCMA_HOSTTYPE_PCI)
+		return;
+
+	if (!bus->host_is_pcie2)
+		bcma_core_pci_down(&bus->drv_pci[0]);
+}
+EXPORT_SYMBOL_GPL(bcma_host_pci_down);
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index ccbdb05b28cd..34065a6f7725 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -4819,7 +4819,7 @@ static void b43_wireless_core_exit(struct b43_wldev *dev)
 	switch (dev->dev->bus_type) {
 #ifdef CONFIG_B43_BCMA
 	case B43_BUS_BCMA:
-		bcma_core_pci_down(dev->dev->bdev->bus);
+		bcma_host_pci_down(dev->dev->bdev->bus);
 		break;
 #endif
 #ifdef CONFIG_B43_SSB
@@ -4868,7 +4868,7 @@ static int b43_wireless_core_init(struct b43_wldev *dev)
 	case B43_BUS_BCMA:
 		bcma_core_pci_irq_ctl(&dev->dev->bdev->bus->drv_pci[0],
 				      dev->dev->bdev, true);
-		bcma_core_pci_up(dev->dev->bdev->bus);
+		bcma_host_pci_up(dev->dev->bdev->bus);
 		break;
 #endif
 #ifdef CONFIG_B43_SSB
diff --git a/drivers/net/wireless/brcm80211/brcmsmac/main.c b/drivers/net/wireless/brcm80211/brcmsmac/main.c
index eb8584a9c49a..bcbfc6e467e4 100644
--- a/drivers/net/wireless/brcm80211/brcmsmac/main.c
+++ b/drivers/net/wireless/brcm80211/brcmsmac/main.c
@@ -4668,7 +4668,7 @@ static int brcms_b_attach(struct brcms_c_info *wlc, struct bcma_device *core,
 	brcms_c_coredisable(wlc_hw);
 
 	/* Match driver "down" state */
-	bcma_core_pci_down(wlc_hw->d11core->bus);
+	bcma_host_pci_down(wlc_hw->d11core->bus);
 
 	/* turn off pll and xtal to match driver "down" state */
 	brcms_b_xtal(wlc_hw, OFF);
@@ -4969,12 +4969,12 @@ static int brcms_b_up_prep(struct brcms_hardware *wlc_hw)
 	 */
 	if (brcms_b_radio_read_hwdisabled(wlc_hw)) {
 		/* put SB PCI in down state again */
-		bcma_core_pci_down(wlc_hw->d11core->bus);
+		bcma_host_pci_down(wlc_hw->d11core->bus);
 		brcms_b_xtal(wlc_hw, OFF);
 		return -ENOMEDIUM;
 	}
 
-	bcma_core_pci_up(wlc_hw->d11core->bus);
+	bcma_host_pci_up(wlc_hw->d11core->bus);
 
 	/* reset the d11 core */
 	brcms_b_corereset(wlc_hw, BRCMS_USE_COREFLAGS);
@@ -5171,7 +5171,7 @@ static int brcms_b_down_finish(struct brcms_hardware *wlc_hw)
 
 		/* turn off primary xtal and pll */
 		if (!wlc_hw->noreset) {
-			bcma_core_pci_down(wlc_hw->d11core->bus);
+			bcma_host_pci_down(wlc_hw->d11core->bus);
 			brcms_b_xtal(wlc_hw, OFF);
 		}
 	}
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index 994739da827f..037620b3f113 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -434,6 +434,9 @@ static inline struct bcma_device *bcma_find_core(struct bcma_bus *bus,
 	return bcma_find_core_unit(bus, coreid, 0);
 }
 
+extern void bcma_host_pci_up(struct bcma_bus *bus);
+extern void bcma_host_pci_down(struct bcma_bus *bus);
+
 extern bool bcma_core_is_enabled(struct bcma_device *core);
 extern void bcma_core_disable(struct bcma_device *core, u32 flags);
 extern int bcma_core_enable(struct bcma_device *core, u32 flags);
diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h
index 3f809ae372c4..23af893e9b86 100644
--- a/include/linux/bcma/bcma_driver_pci.h
+++ b/include/linux/bcma/bcma_driver_pci.h
@@ -242,8 +242,6 @@ extern void bcma_core_pci_early_init(struct bcma_drv_pci *pc);
 extern void bcma_core_pci_init(struct bcma_drv_pci *pc);
 extern int bcma_core_pci_irq_ctl(struct bcma_drv_pci *pc,
 				 struct bcma_device *core, bool enable);
-extern void bcma_core_pci_up(struct bcma_bus *bus);
-extern void bcma_core_pci_down(struct bcma_bus *bus);
 extern void bcma_core_pci_power_save(struct bcma_bus *bus, bool up);
 
 extern int bcma_core_pci_pcibios_map_irq(const struct pci_dev *dev);
-- 
cgit v1.2.3


From 5b6ff664c8959d715e785b9465b042407a5d87a0 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Sun, 8 Feb 2015 17:11:48 +0100
Subject: bcma: change IRQ control function to accept bus as an argument
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It doesn't operate on PCI core, but PCI host device, so there is no
point of passing core related struct.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/bcma/driver_pci.c                      | 6 +++---
 drivers/net/wireless/b43/main.c                | 2 +-
 drivers/net/wireless/brcm80211/brcmsmac/main.c | 2 +-
 include/linux/bcma/bcma_driver_pci.h           | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/bcma/driver_pci.c b/drivers/bcma/driver_pci.c
index cf92bfa7eae0..cfd35bc1c5a3 100644
--- a/drivers/bcma/driver_pci.c
+++ b/drivers/bcma/driver_pci.c
@@ -282,21 +282,21 @@ void bcma_core_pci_power_save(struct bcma_bus *bus, bool up)
 }
 EXPORT_SYMBOL_GPL(bcma_core_pci_power_save);
 
-int bcma_core_pci_irq_ctl(struct bcma_drv_pci *pc, struct bcma_device *core,
+int bcma_core_pci_irq_ctl(struct bcma_bus *bus, struct bcma_device *core,
 			  bool enable)
 {
 	struct pci_dev *pdev;
 	u32 coremask, tmp;
 	int err = 0;
 
-	if (!pc || core->bus->hosttype != BCMA_HOSTTYPE_PCI) {
+	if (bus->hosttype != BCMA_HOSTTYPE_PCI) {
 		/* This bcma device is not on a PCI host-bus. So the IRQs are
 		 * not routed through the PCI core.
 		 * So we must not enable routing through the PCI core. */
 		goto out;
 	}
 
-	pdev = pc->core->bus->host_pci;
+	pdev = bus->host_pci;
 
 	err = pci_read_config_dword(pdev, BCMA_PCI_IRQMASK, &tmp);
 	if (err)
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index 34065a6f7725..f34aa67f3179 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -4866,7 +4866,7 @@ static int b43_wireless_core_init(struct b43_wldev *dev)
 	switch (dev->dev->bus_type) {
 #ifdef CONFIG_B43_BCMA
 	case B43_BUS_BCMA:
-		bcma_core_pci_irq_ctl(&dev->dev->bdev->bus->drv_pci[0],
+		bcma_core_pci_irq_ctl(dev->dev->bdev->bus,
 				      dev->dev->bdev, true);
 		bcma_host_pci_up(dev->dev->bdev->bus);
 		break;
diff --git a/drivers/net/wireless/brcm80211/brcmsmac/main.c b/drivers/net/wireless/brcm80211/brcmsmac/main.c
index bcbfc6e467e4..c84af1dfc88f 100644
--- a/drivers/net/wireless/brcm80211/brcmsmac/main.c
+++ b/drivers/net/wireless/brcm80211/brcmsmac/main.c
@@ -4959,7 +4959,7 @@ static int brcms_b_up_prep(struct brcms_hardware *wlc_hw)
 	 * Configure pci/pcmcia here instead of in brcms_c_attach()
 	 * to allow mfg hotswap:  down, hotswap (chip power cycle), up.
 	 */
-	bcma_core_pci_irq_ctl(&wlc_hw->d11core->bus->drv_pci[0], wlc_hw->d11core,
+	bcma_core_pci_irq_ctl(wlc_hw->d11core->bus, wlc_hw->d11core,
 			      true);
 
 	/*
diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h
index 23af893e9b86..6b8bca67851f 100644
--- a/include/linux/bcma/bcma_driver_pci.h
+++ b/include/linux/bcma/bcma_driver_pci.h
@@ -240,7 +240,7 @@ struct bcma_drv_pci {
 
 extern void bcma_core_pci_early_init(struct bcma_drv_pci *pc);
 extern void bcma_core_pci_init(struct bcma_drv_pci *pc);
-extern int bcma_core_pci_irq_ctl(struct bcma_drv_pci *pc,
+extern int bcma_core_pci_irq_ctl(struct bcma_bus *bus,
 				 struct bcma_device *core, bool enable);
 extern void bcma_core_pci_power_save(struct bcma_bus *bus, bool up);
 
-- 
cgit v1.2.3


From 804e27dee49e20c0addd1b7276654220cc3768ae Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Sun, 8 Feb 2015 17:11:49 +0100
Subject: bcma: support bringing up bus hosted on PCIe Gen 2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/bcma/bcma_private.h            |  3 +++
 drivers/bcma/driver_pcie2.c            | 28 ++++++++++++++++++++++++++--
 drivers/bcma/host_pci.c                |  2 +-
 include/linux/bcma/bcma_driver_pcie2.h |  2 ++
 4 files changed, 32 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h
index 351f4afdad25..36929126a206 100644
--- a/drivers/bcma/bcma_private.h
+++ b/drivers/bcma/bcma_private.h
@@ -104,6 +104,9 @@ u32 bcma_pcie_read(struct bcma_drv_pci *pc, u32 address);
 void bcma_core_pci_up(struct bcma_drv_pci *pc);
 void bcma_core_pci_down(struct bcma_drv_pci *pc);
 
+/* driver_pcie2.c */
+void bcma_core_pcie2_up(struct bcma_drv_pcie2 *pcie2);
+
 extern int bcma_chipco_watchdog_register(struct bcma_drv_cc *cc);
 
 #ifdef CONFIG_BCMA_DRIVER_PCI_HOSTMODE
diff --git a/drivers/bcma/driver_pcie2.c b/drivers/bcma/driver_pcie2.c
index e4be537b0c66..4568bc7bd54a 100644
--- a/drivers/bcma/driver_pcie2.c
+++ b/drivers/bcma/driver_pcie2.c
@@ -156,14 +156,23 @@ static void pciedev_reg_pm_clk_period(struct bcma_drv_pcie2 *pcie2)
 
 void bcma_core_pcie2_init(struct bcma_drv_pcie2 *pcie2)
 {
-	struct bcma_chipinfo *ci = &pcie2->core->bus->chipinfo;
+	struct bcma_bus *bus = pcie2->core->bus;
+	struct bcma_chipinfo *ci = &bus->chipinfo;
 	u32 tmp;
 
 	tmp = pcie2_read32(pcie2, BCMA_CORE_PCIE2_SPROM(54));
 	if ((tmp & 0xe) >> 1 == 2)
 		bcma_core_pcie2_cfg_write(pcie2, 0x4e0, 0x17);
 
-	/* TODO: Do we need pcie_reqsize? */
+	switch (bus->chipinfo.id) {
+	case BCMA_CHIP_ID_BCM4360:
+	case BCMA_CHIP_ID_BCM4352:
+		pcie2->reqsize = 1024;
+		break;
+	default:
+		pcie2->reqsize = 128;
+		break;
+	}
 
 	if (ci->id == BCMA_CHIP_ID_BCM4360 && ci->rev > 3)
 		bcma_core_pcie2_war_delay_perst_enab(pcie2, true);
@@ -173,3 +182,18 @@ void bcma_core_pcie2_init(struct bcma_drv_pcie2 *pcie2)
 	pciedev_crwlpciegen2_180(pcie2);
 	pciedev_crwlpciegen2_182(pcie2);
 }
+
+/**************************************************
+ * Runtime ops.
+ **************************************************/
+
+void bcma_core_pcie2_up(struct bcma_drv_pcie2 *pcie2)
+{
+	struct bcma_bus *bus = pcie2->core->bus;
+	struct pci_dev *dev = bus->host_pci;
+	int err;
+
+	err = pcie_set_readrq(dev, pcie2->reqsize);
+	if (err)
+		bcma_err(bus, "Error setting PCI_EXP_DEVCTL_READRQ: %d\n", err);
+}
diff --git a/drivers/bcma/host_pci.c b/drivers/bcma/host_pci.c
index 8dd37dc94cae..5fb87a899a24 100644
--- a/drivers/bcma/host_pci.c
+++ b/drivers/bcma/host_pci.c
@@ -322,7 +322,7 @@ void bcma_host_pci_up(struct bcma_bus *bus)
 		return;
 
 	if (bus->host_is_pcie2)
-		pr_warn("Bringing up bus with PCIe Gen 2 host is unsupported yet\n");
+		bcma_core_pcie2_up(&bus->drv_pcie2);
 	else
 		bcma_core_pci_up(&bus->drv_pci[0]);
 }
diff --git a/include/linux/bcma/bcma_driver_pcie2.h b/include/linux/bcma/bcma_driver_pcie2.h
index 5988b05781c3..d8c43294c527 100644
--- a/include/linux/bcma/bcma_driver_pcie2.h
+++ b/include/linux/bcma/bcma_driver_pcie2.h
@@ -143,6 +143,8 @@
 
 struct bcma_drv_pcie2 {
 	struct bcma_device *core;
+
+	u16 reqsize;
 };
 
 #define pcie2_read16(pcie2, offset)		bcma_read16((pcie2)->core, offset)
-- 
cgit v1.2.3


From 1b784140474e4fc94281a49e96c67d29df0efbde Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Mon, 2 Mar 2015 15:37:48 +0800
Subject: net: Remove iocb argument from sendmsg and recvmsg

After TIPC doesn't depend on iocb argument in its internal
implementations of sendmsg() and recvmsg() hooks defined in proto
structure, no any user is using iocb argument in them at all now.
Then we can drop the redundant iocb argument completely from kinds of
implementations of both sendmsg() and recvmsg() in the entire
networking stack.

Cc: Christoph Hellwig <hch@lst.de>
Suggested-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 crypto/algif_hash.c               |  8 ++--
 crypto/algif_rng.c                |  4 +-
 crypto/algif_skcipher.c           |  8 ++--
 drivers/isdn/mISDN/socket.c       |  7 ++--
 drivers/net/macvtap.c             |  9 ++---
 drivers/net/ppp/pppoe.c           |  8 ++--
 drivers/net/tun.c                 |  6 +--
 drivers/vhost/net.c               |  6 +--
 include/linux/net.h               | 10 ++---
 include/net/af_vsock.h            |  4 +-
 include/net/bluetooth/bluetooth.h |  8 ++--
 include/net/inet_common.h         |  7 ++--
 include/net/ping.h                |  7 ++--
 include/net/sock.h                | 16 ++++----
 include/net/tcp.h                 |  7 ++--
 include/net/udp.h                 |  3 +-
 net/appletalk/ddp.c               |  7 ++--
 net/atm/common.c                  |  7 ++--
 net/atm/common.h                  |  7 ++--
 net/ax25/af_ax25.c                |  7 ++--
 net/bluetooth/af_bluetooth.c      |  8 ++--
 net/bluetooth/hci_sock.c          |  8 ++--
 net/bluetooth/l2cap_sock.c        | 12 +++---
 net/bluetooth/rfcomm/sock.c       | 10 ++---
 net/bluetooth/sco.c               | 10 ++---
 net/caif/caif_socket.c            | 17 ++++-----
 net/can/bcm.c                     |  7 ++--
 net/can/raw.c                     |  7 ++--
 net/core/sock.c                   | 13 +++----
 net/dccp/dccp.h                   |  8 ++--
 net/dccp/probe.c                  |  3 +-
 net/dccp/proto.c                  |  7 ++--
 net/decnet/af_decnet.c            |  7 ++--
 net/ieee802154/socket.c           | 21 +++++------
 net/ipv4/af_inet.c                | 11 +++---
 net/ipv4/ping.c                   |  7 ++--
 net/ipv4/raw.c                    |  7 ++--
 net/ipv4/tcp.c                    |  7 ++--
 net/ipv4/udp.c                    |  9 ++---
 net/ipv4/udp_impl.h               |  4 +-
 net/ipv6/ping.c                   |  3 +-
 net/ipv6/raw.c                    |  8 ++--
 net/ipv6/udp.c                    | 10 ++---
 net/ipv6/udp_impl.h               |  7 ++--
 net/ipx/af_ipx.c                  |  7 ++--
 net/irda/af_irda.c                | 29 +++++++--------
 net/iucv/af_iucv.c                |  8 ++--
 net/key/af_key.c                  |  6 +--
 net/l2tp/l2tp_ip.c                |  4 +-
 net/l2tp/l2tp_ip6.c               |  8 ++--
 net/l2tp/l2tp_ppp.c               |  7 ++--
 net/llc/af_llc.c                  |  7 ++--
 net/netlink/af_netlink.c          |  6 +--
 net/netrom/af_netrom.c            |  7 ++--
 net/nfc/llcp_sock.c               |  8 ++--
 net/nfc/rawsock.c                 |  7 ++--
 net/packet/af_packet.c            | 11 +++---
 net/phonet/datagram.c             |  8 ++--
 net/phonet/pep.c                  |  8 ++--
 net/phonet/socket.c               |  6 +--
 net/rds/rds.h                     |  7 ++--
 net/rds/recv.c                    |  4 +-
 net/rds/send.c                    |  3 +-
 net/rose/af_rose.c                |  7 ++--
 net/rxrpc/af_rxrpc.c              |  7 ++--
 net/rxrpc/ar-internal.h           | 10 ++---
 net/rxrpc/ar-output.c             | 20 ++++------
 net/rxrpc/ar-recvmsg.c            |  4 +-
 net/sctp/socket.c                 |  8 ++--
 net/socket.c                      | 78 +++++++--------------------------------
 net/tipc/socket.c                 | 23 ++++--------
 net/unix/af_unix.c                | 50 +++++++++++--------------
 net/vmw_vsock/af_vsock.c          | 20 +++++-----
 net/vmw_vsock/vmci_transport.c    |  3 +-
 net/x25/af_x25.c                  |  6 +--
 75 files changed, 302 insertions(+), 442 deletions(-)

(limited to 'include')

diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c
index 01da360bdb55..0a465e0f3012 100644
--- a/crypto/algif_hash.c
+++ b/crypto/algif_hash.c
@@ -34,8 +34,8 @@ struct hash_ctx {
 	struct ahash_request req;
 };
 
-static int hash_sendmsg(struct kiocb *unused, struct socket *sock,
-			struct msghdr *msg, size_t ignored)
+static int hash_sendmsg(struct socket *sock, struct msghdr *msg,
+			size_t ignored)
 {
 	int limit = ALG_MAX_PAGES * PAGE_SIZE;
 	struct sock *sk = sock->sk;
@@ -139,8 +139,8 @@ unlock:
 	return err ?: size;
 }
 
-static int hash_recvmsg(struct kiocb *unused, struct socket *sock,
-			struct msghdr *msg, size_t len, int flags)
+static int hash_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+			int flags)
 {
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
diff --git a/crypto/algif_rng.c b/crypto/algif_rng.c
index 67f612cfed97..3acba0a7cd55 100644
--- a/crypto/algif_rng.c
+++ b/crypto/algif_rng.c
@@ -55,8 +55,8 @@ struct rng_ctx {
 	struct crypto_rng *drng;
 };
 
-static int rng_recvmsg(struct kiocb *unused, struct socket *sock,
-		       struct msghdr *msg, size_t len, int flags)
+static int rng_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+		       int flags)
 {
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index 0c8a1e5ccadf..b9743dc35801 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -239,8 +239,8 @@ static void skcipher_data_wakeup(struct sock *sk)
 	rcu_read_unlock();
 }
 
-static int skcipher_sendmsg(struct kiocb *unused, struct socket *sock,
-			    struct msghdr *msg, size_t size)
+static int skcipher_sendmsg(struct socket *sock, struct msghdr *msg,
+			    size_t size)
 {
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
@@ -424,8 +424,8 @@ unlock:
 	return err ?: size;
 }
 
-static int skcipher_recvmsg(struct kiocb *unused, struct socket *sock,
-			    struct msghdr *msg, size_t ignored, int flags)
+static int skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
+			    size_t ignored, int flags)
 {
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index 84b35925ee4d..8dc7290089bb 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -112,8 +112,8 @@ mISDN_sock_cmsg(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 }
 
 static int
-mISDN_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
-		   struct msghdr *msg, size_t len, int flags)
+mISDN_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+		   int flags)
 {
 	struct sk_buff		*skb;
 	struct sock		*sk = sock->sk;
@@ -173,8 +173,7 @@ mISDN_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 }
 
 static int
-mISDN_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
-		   struct msghdr *msg, size_t len)
+mISDN_sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 {
 	struct sock		*sk = sock->sk;
 	struct sk_buff		*skb;
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index e40fdfccc9c1..1e51c6bf3ae1 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -1127,16 +1127,15 @@ static const struct file_operations macvtap_fops = {
 #endif
 };
 
-static int macvtap_sendmsg(struct kiocb *iocb, struct socket *sock,
-			   struct msghdr *m, size_t total_len)
+static int macvtap_sendmsg(struct socket *sock, struct msghdr *m,
+			   size_t total_len)
 {
 	struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock);
 	return macvtap_get_user(q, m, &m->msg_iter, m->msg_flags & MSG_DONTWAIT);
 }
 
-static int macvtap_recvmsg(struct kiocb *iocb, struct socket *sock,
-			   struct msghdr *m, size_t total_len,
-			   int flags)
+static int macvtap_recvmsg(struct socket *sock, struct msghdr *m,
+			   size_t total_len, int flags)
 {
 	struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock);
 	int ret;
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 9c97e9bcf5f5..ff059e1d8ac6 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -835,8 +835,8 @@ static int pppoe_ioctl(struct socket *sock, unsigned int cmd,
 	return err;
 }
 
-static int pppoe_sendmsg(struct kiocb *iocb, struct socket *sock,
-		  struct msghdr *m, size_t total_len)
+static int pppoe_sendmsg(struct socket *sock, struct msghdr *m,
+			 size_t total_len)
 {
 	struct sk_buff *skb;
 	struct sock *sk = sock->sk;
@@ -977,8 +977,8 @@ static const struct ppp_channel_ops pppoe_chan_ops = {
 	.start_xmit = pppoe_xmit,
 };
 
-static int pppoe_recvmsg(struct kiocb *iocb, struct socket *sock,
-		  struct msghdr *m, size_t total_len, int flags)
+static int pppoe_recvmsg(struct socket *sock, struct msghdr *m,
+			 size_t total_len, int flags)
 {
 	struct sock *sk = sock->sk;
 	struct sk_buff *skb;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 857dca47bf80..b96b94cee760 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1448,8 +1448,7 @@ static void tun_sock_write_space(struct sock *sk)
 	kill_fasync(&tfile->fasync, SIGIO, POLL_OUT);
 }
 
-static int tun_sendmsg(struct kiocb *iocb, struct socket *sock,
-		       struct msghdr *m, size_t total_len)
+static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
 {
 	int ret;
 	struct tun_file *tfile = container_of(sock, struct tun_file, socket);
@@ -1464,8 +1463,7 @@ static int tun_sendmsg(struct kiocb *iocb, struct socket *sock,
 	return ret;
 }
 
-static int tun_recvmsg(struct kiocb *iocb, struct socket *sock,
-		       struct msghdr *m, size_t total_len,
+static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
 		       int flags)
 {
 	struct tun_file *tfile = container_of(sock, struct tun_file, socket);
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index afa06d28725d..633012cc9a57 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -390,7 +390,7 @@ static void handle_tx(struct vhost_net *net)
 			ubufs = NULL;
 		}
 		/* TODO: Check specific error and bomb out unless ENOBUFS? */
-		err = sock->ops->sendmsg(NULL, sock, &msg, len);
+		err = sock->ops->sendmsg(sock, &msg, len);
 		if (unlikely(err < 0)) {
 			if (zcopy_used) {
 				vhost_net_ubuf_put(ubufs);
@@ -566,7 +566,7 @@ static void handle_rx(struct vhost_net *net)
 		/* On overrun, truncate and discard */
 		if (unlikely(headcount > UIO_MAXIOV)) {
 			iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1);
-			err = sock->ops->recvmsg(NULL, sock, &msg,
+			err = sock->ops->recvmsg(sock, &msg,
 						 1, MSG_DONTWAIT | MSG_TRUNC);
 			pr_debug("Discarded rx packet: len %zd\n", sock_len);
 			continue;
@@ -597,7 +597,7 @@ static void handle_rx(struct vhost_net *net)
 			 */
 			iov_iter_advance(&fixup, sizeof(hdr));
 		}
-		err = sock->ops->recvmsg(NULL, sock, &msg,
+		err = sock->ops->recvmsg(sock, &msg,
 					 sock_len, MSG_DONTWAIT | MSG_TRUNC);
 		/* Userspace might have consumed the packet meanwhile:
 		 * it's not supposed to do this usually, but might be hard
diff --git a/include/linux/net.h b/include/linux/net.h
index 17d83393afcc..e74114bcca68 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -120,7 +120,6 @@ struct socket {
 
 struct vm_area_struct;
 struct page;
-struct kiocb;
 struct sockaddr;
 struct msghdr;
 struct module;
@@ -162,8 +161,8 @@ struct proto_ops {
 	int		(*compat_getsockopt)(struct socket *sock, int level,
 				      int optname, char __user *optval, int __user *optlen);
 #endif
-	int		(*sendmsg)   (struct kiocb *iocb, struct socket *sock,
-				      struct msghdr *m, size_t total_len);
+	int		(*sendmsg)   (struct socket *sock, struct msghdr *m,
+				      size_t total_len);
 	/* Notes for implementing recvmsg:
 	 * ===============================
 	 * msg->msg_namelen should get updated by the recvmsg handlers
@@ -172,9 +171,8 @@ struct proto_ops {
 	 * handlers can assume that msg.msg_name is either NULL or has
 	 * a minimum size of sizeof(struct sockaddr_storage).
 	 */
-	int		(*recvmsg)   (struct kiocb *iocb, struct socket *sock,
-				      struct msghdr *m, size_t total_len,
-				      int flags);
+	int		(*recvmsg)   (struct socket *sock, struct msghdr *m,
+				      size_t total_len, int flags);
 	int		(*mmap)	     (struct file *file, struct socket *sock,
 				      struct vm_area_struct * vma);
 	ssize_t		(*sendpage)  (struct socket *sock, struct page *page,
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index 0d87674fb775..172632dd9930 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -100,8 +100,8 @@ struct vsock_transport {
 
 	/* DGRAM. */
 	int (*dgram_bind)(struct vsock_sock *, struct sockaddr_vm *);
-	int (*dgram_dequeue)(struct kiocb *kiocb, struct vsock_sock *vsk,
-			     struct msghdr *msg, size_t len, int flags);
+	int (*dgram_dequeue)(struct vsock_sock *vsk, struct msghdr *msg,
+			     size_t len, int flags);
 	int (*dgram_enqueue)(struct vsock_sock *, struct sockaddr_vm *,
 			     struct msghdr *, size_t len);
 	bool (*dgram_allow)(u32 cid, u32 port);
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 4500bf88ff55..6bb97df16d2d 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -245,10 +245,10 @@ int  bt_sock_register(int proto, const struct net_proto_family *ops);
 void bt_sock_unregister(int proto);
 void bt_sock_link(struct bt_sock_list *l, struct sock *s);
 void bt_sock_unlink(struct bt_sock_list *l, struct sock *s);
-int  bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
-				struct msghdr *msg, size_t len, int flags);
-int  bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
-			struct msghdr *msg, size_t len, int flags);
+int  bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+		     int flags);
+int  bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg,
+			    size_t len, int flags);
 uint bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait);
 int  bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 int  bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo);
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index b2828a06a5a6..4a92423eefa5 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -21,12 +21,11 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
 		       int addr_len, int flags);
 int inet_accept(struct socket *sock, struct socket *newsock, int flags);
-int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
-		 size_t size);
+int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size);
 ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
 		      size_t size, int flags);
-int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
-		 size_t size, int flags);
+int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+		 int flags);
 int inet_shutdown(struct socket *sock, int how);
 int inet_listen(struct socket *sock, int backlog);
 void inet_sock_destruct(struct sock *sk);
diff --git a/include/net/ping.h b/include/net/ping.h
index cc16d413f681..ac80cb45e630 100644
--- a/include/net/ping.h
+++ b/include/net/ping.h
@@ -75,12 +75,11 @@ void ping_err(struct sk_buff *skb, int offset, u32 info);
 int  ping_getfrag(void *from, char *to, int offset, int fraglen, int odd,
 		  struct sk_buff *);
 
-int  ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
-		  size_t len, int noblock, int flags, int *addr_len);
+int  ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
+		  int flags, int *addr_len);
 int  ping_common_sendmsg(int family, struct msghdr *msg, size_t len,
 			 void *user_icmph, size_t icmph_len);
-int  ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
-		     size_t len);
+int  ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
 int  ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
 bool ping_rcv(struct sk_buff *skb);
 
diff --git a/include/net/sock.h b/include/net/sock.h
index 38369d3580a1..250822cc1e02 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -958,10 +958,9 @@ struct proto {
 	int			(*compat_ioctl)(struct sock *sk,
 					unsigned int cmd, unsigned long arg);
 #endif
-	int			(*sendmsg)(struct kiocb *iocb, struct sock *sk,
-					   struct msghdr *msg, size_t len);
-	int			(*recvmsg)(struct kiocb *iocb, struct sock *sk,
-					   struct msghdr *msg,
+	int			(*sendmsg)(struct sock *sk, struct msghdr *msg,
+					   size_t len);
+	int			(*recvmsg)(struct sock *sk, struct msghdr *msg,
 					   size_t len, int noblock, int flags,
 					   int *addr_len);
 	int			(*sendpage)(struct sock *sk, struct page *page,
@@ -1562,9 +1561,8 @@ int sock_no_listen(struct socket *, int);
 int sock_no_shutdown(struct socket *, int);
 int sock_no_getsockopt(struct socket *, int , int, char __user *, int __user *);
 int sock_no_setsockopt(struct socket *, int, int, char __user *, unsigned int);
-int sock_no_sendmsg(struct kiocb *, struct socket *, struct msghdr *, size_t);
-int sock_no_recvmsg(struct kiocb *, struct socket *, struct msghdr *, size_t,
-		    int);
+int sock_no_sendmsg(struct socket *, struct msghdr *, size_t);
+int sock_no_recvmsg(struct socket *, struct msghdr *, size_t, int);
 int sock_no_mmap(struct file *file, struct socket *sock,
 		 struct vm_area_struct *vma);
 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset,
@@ -1576,8 +1574,8 @@ ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset,
  */
 int sock_common_getsockopt(struct socket *sock, int level, int optname,
 				  char __user *optval, int __user *optlen);
-int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
-			       struct msghdr *msg, size_t size, int flags);
+int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+			int flags);
 int sock_common_setsockopt(struct socket *sock, int level, int optname,
 				  char __user *optval, unsigned int optlen);
 int compat_sock_common_getsockopt(struct socket *sock, int level,
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 8d6b983d5099..f87599d5af82 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -349,8 +349,7 @@ void tcp_v4_early_demux(struct sk_buff *skb);
 int tcp_v4_rcv(struct sk_buff *skb);
 
 int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
-int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
-		size_t size);
+int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
 		 int flags);
 void tcp_release_cb(struct sock *sk);
@@ -430,8 +429,8 @@ int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
 			  char __user *optval, unsigned int optlen);
 void tcp_set_keepalive(struct sock *sk, int val);
 void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req);
-int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
-		size_t len, int nonblock, int flags, int *addr_len);
+int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
+		int flags, int *addr_len);
 void tcp_parse_options(const struct sk_buff *skb,
 		       struct tcp_options_received *opt_rx,
 		       int estab, struct tcp_fastopen_cookie *foc);
diff --git a/include/net/udp.h b/include/net/udp.h
index 32d8d9f07f76..6d4ed18e1427 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -238,8 +238,7 @@ int udp_get_port(struct sock *sk, unsigned short snum,
 		 int (*saddr_cmp)(const struct sock *,
 				  const struct sock *));
 void udp_err(struct sk_buff *, u32);
-int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
-		size_t len);
+int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
 int udp_push_pending_frames(struct sock *sk);
 void udp_flush_pending_frames(struct sock *sk);
 void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst);
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 0d0766ea5ab1..3b7ad43c7dad 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1559,8 +1559,7 @@ freeit:
 	return 0;
 }
 
-static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
-			 size_t len)
+static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 {
 	struct sock *sk = sock->sk;
 	struct atalk_sock *at = at_sk(sk);
@@ -1728,8 +1727,8 @@ out:
 	return err ? : len;
 }
 
-static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
-			 size_t size, int flags)
+static int atalk_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+			 int flags)
 {
 	struct sock *sk = sock->sk;
 	struct ddpehdr *ddp;
diff --git a/net/atm/common.c b/net/atm/common.c
index b84057e41bd6..ed0466637e13 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -523,8 +523,8 @@ int vcc_connect(struct socket *sock, int itf, short vpi, int vci)
 	return 0;
 }
 
-int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
-		size_t size, int flags)
+int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+		int flags)
 {
 	struct sock *sk = sock->sk;
 	struct atm_vcc *vcc;
@@ -569,8 +569,7 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 	return copied;
 }
 
-int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
-		size_t size)
+int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size)
 {
 	struct sock *sk = sock->sk;
 	DEFINE_WAIT(wait);
diff --git a/net/atm/common.h b/net/atm/common.h
index cc3c2dae4d79..4d6f5b2068ac 100644
--- a/net/atm/common.h
+++ b/net/atm/common.h
@@ -13,10 +13,9 @@
 int vcc_create(struct net *net, struct socket *sock, int protocol, int family);
 int vcc_release(struct socket *sock);
 int vcc_connect(struct socket *sock, int itf, short vpi, int vci);
-int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
-		size_t size, int flags);
-int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
-		size_t total_len);
+int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+		int flags);
+int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len);
 unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait);
 int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index ca049a7c9287..330c1f4a5a0b 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1432,8 +1432,7 @@ out:
 	return err;
 }
 
-static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
-			struct msghdr *msg, size_t len)
+static int ax25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 {
 	DECLARE_SOCKADDR(struct sockaddr_ax25 *, usax, msg->msg_name);
 	struct sock *sk = sock->sk;
@@ -1599,8 +1598,8 @@ out:
 	return err;
 }
 
-static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock,
-	struct msghdr *msg, size_t size, int flags)
+static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+			int flags)
 {
 	struct sock *sk = sock->sk;
 	struct sk_buff *skb;
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 4b904c97a068..20a4698e2255 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -210,8 +210,8 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock)
 }
 EXPORT_SYMBOL(bt_accept_dequeue);
 
-int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
-				struct msghdr *msg, size_t len, int flags)
+int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+		    int flags)
 {
 	int noblock = flags & MSG_DONTWAIT;
 	struct sock *sk = sock->sk;
@@ -283,8 +283,8 @@ static long bt_sock_data_wait(struct sock *sk, long timeo)
 	return timeo;
 }
 
-int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
-			       struct msghdr *msg, size_t size, int flags)
+int bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg,
+			   size_t size, int flags)
 {
 	struct sock *sk = sock->sk;
 	int err = 0;
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 37198fb99ffe..aa8be4cb19a1 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -826,8 +826,8 @@ static void hci_sock_cmsg(struct sock *sk, struct msghdr *msg,
 	}
 }
 
-static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
-			    struct msghdr *msg, size_t len, int flags)
+static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+			    int flags)
 {
 	int noblock = flags & MSG_DONTWAIT;
 	struct sock *sk = sock->sk;
@@ -871,8 +871,8 @@ static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	return err ? : copied;
 }
 
-static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
-			    struct msghdr *msg, size_t len)
+static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
+			    size_t len)
 {
 	struct sock *sk = sock->sk;
 	struct hci_dev *hdev;
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 60694f0f4c73..9070720eedc8 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -944,8 +944,8 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
 	return err;
 }
 
-static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
-			      struct msghdr *msg, size_t len)
+static int l2cap_sock_sendmsg(struct socket *sock, struct msghdr *msg,
+			      size_t len)
 {
 	struct sock *sk = sock->sk;
 	struct l2cap_chan *chan = l2cap_pi(sk)->chan;
@@ -976,8 +976,8 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 	return err;
 }
 
-static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
-			      struct msghdr *msg, size_t len, int flags)
+static int l2cap_sock_recvmsg(struct socket *sock, struct msghdr *msg,
+			      size_t len, int flags)
 {
 	struct sock *sk = sock->sk;
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
@@ -1004,9 +1004,9 @@ static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	release_sock(sk);
 
 	if (sock->type == SOCK_STREAM)
-		err = bt_sock_stream_recvmsg(iocb, sock, msg, len, flags);
+		err = bt_sock_stream_recvmsg(sock, msg, len, flags);
 	else
-		err = bt_sock_recvmsg(iocb, sock, msg, len, flags);
+		err = bt_sock_recvmsg(sock, msg, len, flags);
 
 	if (pi->chan->mode != L2CAP_MODE_ERTM)
 		return err;
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 3c6d2c8ac1a4..825e8fb5114b 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -549,8 +549,8 @@ static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int *
 	return 0;
 }
 
-static int rfcomm_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
-			       struct msghdr *msg, size_t len)
+static int rfcomm_sock_sendmsg(struct socket *sock, struct msghdr *msg,
+			       size_t len)
 {
 	struct sock *sk = sock->sk;
 	struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc;
@@ -615,8 +615,8 @@ done:
 	return sent;
 }
 
-static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
-			       struct msghdr *msg, size_t size, int flags)
+static int rfcomm_sock_recvmsg(struct socket *sock, struct msghdr *msg,
+			       size_t size, int flags)
 {
 	struct sock *sk = sock->sk;
 	struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc;
@@ -627,7 +627,7 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 		return 0;
 	}
 
-	len = bt_sock_stream_recvmsg(iocb, sock, msg, size, flags);
+	len = bt_sock_stream_recvmsg(sock, msg, size, flags);
 
 	lock_sock(sk);
 	if (!(flags & MSG_PEEK) && len > 0)
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 76321b546e84..2bb7ef46bb99 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -688,8 +688,8 @@ static int sco_sock_getname(struct socket *sock, struct sockaddr *addr, int *len
 	return 0;
 }
 
-static int sco_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
-			    struct msghdr *msg, size_t len)
+static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg,
+			    size_t len)
 {
 	struct sock *sk = sock->sk;
 	int err;
@@ -758,8 +758,8 @@ static void sco_conn_defer_accept(struct hci_conn *conn, u16 setting)
 	}
 }
 
-static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
-			    struct msghdr *msg, size_t len, int flags)
+static int sco_sock_recvmsg(struct socket *sock, struct msghdr *msg,
+			    size_t len, int flags)
 {
 	struct sock *sk = sock->sk;
 	struct sco_pinfo *pi = sco_pi(sk);
@@ -777,7 +777,7 @@ static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 	release_sock(sk);
 
-	return bt_sock_recvmsg(iocb, sock, msg, len, flags);
+	return bt_sock_recvmsg(sock, msg, len, flags);
 }
 
 static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 769b185fefbd..b6bf51bb187d 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -271,8 +271,8 @@ static void caif_check_flow_release(struct sock *sk)
  * Copied from unix_dgram_recvmsg, but removed credit checks,
  * changed locking, address handling and added MSG_TRUNC.
  */
-static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock,
-			       struct msghdr *m, size_t len, int flags)
+static int caif_seqpkt_recvmsg(struct socket *sock, struct msghdr *m,
+			       size_t len, int flags)
 
 {
 	struct sock *sk = sock->sk;
@@ -343,9 +343,8 @@ static long caif_stream_data_wait(struct sock *sk, long timeo)
  * Copied from unix_stream_recvmsg, but removed credit checks,
  * changed locking calls, changed address handling.
  */
-static int caif_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
-			       struct msghdr *msg, size_t size,
-			       int flags)
+static int caif_stream_recvmsg(struct socket *sock, struct msghdr *msg,
+			       size_t size, int flags)
 {
 	struct sock *sk = sock->sk;
 	int copied = 0;
@@ -511,8 +510,8 @@ static int transmit_skb(struct sk_buff *skb, struct caifsock *cf_sk,
 }
 
 /* Copied from af_unix:unix_dgram_sendmsg, and adapted to CAIF */
-static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock,
-			       struct msghdr *msg, size_t len)
+static int caif_seqpkt_sendmsg(struct socket *sock, struct msghdr *msg,
+			       size_t len)
 {
 	struct sock *sk = sock->sk;
 	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
@@ -586,8 +585,8 @@ err:
  * Changed removed permission handling and added waiting for flow on
  * and other minor adaptations.
  */
-static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
-			       struct msghdr *msg, size_t len)
+static int caif_stream_sendmsg(struct socket *sock, struct msghdr *msg,
+			       size_t len)
 {
 	struct sock *sk = sock->sk;
 	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
diff --git a/net/can/bcm.c b/net/can/bcm.c
index d559f922326d..b523453585be 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1231,8 +1231,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
 /*
  * bcm_sendmsg - process BCM commands (opcodes) from the userspace
  */
-static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock,
-		       struct msghdr *msg, size_t size)
+static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 {
 	struct sock *sk = sock->sk;
 	struct bcm_sock *bo = bcm_sk(sk);
@@ -1535,8 +1534,8 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len,
 	return 0;
 }
 
-static int bcm_recvmsg(struct kiocb *iocb, struct socket *sock,
-		       struct msghdr *msg, size_t size, int flags)
+static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+		       int flags)
 {
 	struct sock *sk = sock->sk;
 	struct sk_buff *skb;
diff --git a/net/can/raw.c b/net/can/raw.c
index 94601b7ff0a3..63ffdb0f3a23 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -658,8 +658,7 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
 	return 0;
 }
 
-static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
-		       struct msghdr *msg, size_t size)
+static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 {
 	struct sock *sk = sock->sk;
 	struct raw_sock *ro = raw_sk(sk);
@@ -728,8 +727,8 @@ send_failed:
 	return err;
 }
 
-static int raw_recvmsg(struct kiocb *iocb, struct socket *sock,
-		       struct msghdr *msg, size_t size, int flags)
+static int raw_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+		       int flags)
 {
 	struct sock *sk = sock->sk;
 	struct sk_buff *skb;
diff --git a/net/core/sock.c b/net/core/sock.c
index 9c74fc8f0e32..726e1f99aa8d 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2163,15 +2163,14 @@ int sock_no_getsockopt(struct socket *sock, int level, int optname,
 }
 EXPORT_SYMBOL(sock_no_getsockopt);
 
-int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
-		    size_t len)
+int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
 {
 	return -EOPNOTSUPP;
 }
 EXPORT_SYMBOL(sock_no_sendmsg);
 
-int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
-		    size_t len, int flags)
+int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len,
+		    int flags)
 {
 	return -EOPNOTSUPP;
 }
@@ -2543,14 +2542,14 @@ int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
 EXPORT_SYMBOL(compat_sock_common_getsockopt);
 #endif
 
-int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
-			struct msghdr *msg, size_t size, int flags)
+int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+			int flags)
 {
 	struct sock *sk = sock->sk;
 	int addr_len = 0;
 	int err;
 
-	err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
+	err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
 				   flags & ~MSG_DONTWAIT, &addr_len);
 	if (err >= 0)
 		msg->msg_namelen = addr_len;
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index e4c144fa706f..3b1d64d6e093 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -310,11 +310,9 @@ int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
 			   char __user *optval, unsigned int optlen);
 #endif
 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg);
-int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
-		 size_t size);
-int dccp_recvmsg(struct kiocb *iocb, struct sock *sk,
-		 struct msghdr *msg, size_t len, int nonblock, int flags,
-		 int *addr_len);
+int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
+int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
+		 int flags, int *addr_len);
 void dccp_shutdown(struct sock *sk, int how);
 int inet_dccp_listen(struct socket *sock, int backlog);
 unsigned int dccp_poll(struct file *file, struct socket *sock,
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index 595ddf0459db..d8346d0eadeb 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -72,8 +72,7 @@ static void printl(const char *fmt, ...)
 	wake_up(&dccpw.wait);
 }
 
-static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk,
-			 struct msghdr *msg, size_t size)
+static int jdccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 {
 	const struct inet_sock *inet = inet_sk(sk);
 	struct ccid3_hc_tx_sock *hc = NULL;
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index e171b780b499..52a94016526d 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -741,8 +741,7 @@ static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
 	return 0;
 }
 
-int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
-		 size_t len)
+int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 {
 	const struct dccp_sock *dp = dccp_sk(sk);
 	const int flags = msg->msg_flags;
@@ -806,8 +805,8 @@ out_discard:
 
 EXPORT_SYMBOL_GPL(dccp_sendmsg);
 
-int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
-		 size_t len, int nonblock, int flags, int *addr_len)
+int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
+		 int flags, int *addr_len)
 {
 	const struct dccp_hdr *dh;
 	long timeo;
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 810228646de3..754484b3cd0e 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1669,8 +1669,8 @@ static int dn_data_ready(struct sock *sk, struct sk_buff_head *q, int flags, int
 }
 
 
-static int dn_recvmsg(struct kiocb *iocb, struct socket *sock,
-	struct msghdr *msg, size_t size, int flags)
+static int dn_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+		      int flags)
 {
 	struct sock *sk = sock->sk;
 	struct dn_scp *scp = DN_SK(sk);
@@ -1905,8 +1905,7 @@ static inline struct sk_buff *dn_alloc_send_pskb(struct sock *sk,
 	return skb;
 }
 
-static int dn_sendmsg(struct kiocb *iocb, struct socket *sock,
-		      struct msghdr *msg, size_t size)
+static int dn_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 {
 	struct sock *sk = sock->sk;
 	struct dn_scp *scp = DN_SK(sk);
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index 2878d8ca6d3b..b60c65f70346 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -98,12 +98,12 @@ static int ieee802154_sock_release(struct socket *sock)
 	return 0;
 }
 
-static int ieee802154_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
-				   struct msghdr *msg, size_t len)
+static int ieee802154_sock_sendmsg(struct socket *sock, struct msghdr *msg,
+				   size_t len)
 {
 	struct sock *sk = sock->sk;
 
-	return sk->sk_prot->sendmsg(iocb, sk, msg, len);
+	return sk->sk_prot->sendmsg(sk, msg, len);
 }
 
 static int ieee802154_sock_bind(struct socket *sock, struct sockaddr *uaddr,
@@ -255,8 +255,7 @@ static int raw_disconnect(struct sock *sk, int flags)
 	return 0;
 }
 
-static int raw_sendmsg(struct kiocb *iocb, struct sock *sk,
-		       struct msghdr *msg, size_t size)
+static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 {
 	struct net_device *dev;
 	unsigned int mtu;
@@ -327,8 +326,8 @@ out:
 	return err;
 }
 
-static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
-		       size_t len, int noblock, int flags, int *addr_len)
+static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+		       int noblock, int flags, int *addr_len)
 {
 	size_t copied = 0;
 	int err = -EOPNOTSUPP;
@@ -615,8 +614,7 @@ static int dgram_disconnect(struct sock *sk, int flags)
 	return 0;
 }
 
-static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk,
-			 struct msghdr *msg, size_t size)
+static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 {
 	struct net_device *dev;
 	unsigned int mtu;
@@ -715,9 +713,8 @@ out:
 	return err;
 }
 
-static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk,
-			 struct msghdr *msg, size_t len, int noblock,
-			 int flags, int *addr_len)
+static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+			 int noblock, int flags, int *addr_len)
 {
 	size_t copied = 0;
 	int err = -EOPNOTSUPP;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 4ce954cc94a4..64a9c0fdc4aa 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -716,8 +716,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
 }
 EXPORT_SYMBOL(inet_getname);
 
-int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
-		 size_t size)
+int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 {
 	struct sock *sk = sock->sk;
 
@@ -728,7 +727,7 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 	    inet_autobind(sk))
 		return -EAGAIN;
 
-	return sk->sk_prot->sendmsg(iocb, sk, msg, size);
+	return sk->sk_prot->sendmsg(sk, msg, size);
 }
 EXPORT_SYMBOL(inet_sendmsg);
 
@@ -750,8 +749,8 @@ ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
 }
 EXPORT_SYMBOL(inet_sendpage);
 
-int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
-		 size_t size, int flags)
+int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+		 int flags)
 {
 	struct sock *sk = sock->sk;
 	int addr_len = 0;
@@ -759,7 +758,7 @@ int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 
 	sock_rps_record_flow(sk);
 
-	err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
+	err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
 				   flags & ~MSG_DONTWAIT, &addr_len);
 	if (err >= 0)
 		msg->msg_namelen = addr_len;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index e9f66e1cda50..3648e7f32f3d 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -684,8 +684,7 @@ int ping_common_sendmsg(int family, struct msghdr *msg, size_t len,
 }
 EXPORT_SYMBOL_GPL(ping_common_sendmsg);
 
-static int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
-			   size_t len)
+static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 {
 	struct net *net = sock_net(sk);
 	struct flowi4 fl4;
@@ -841,8 +840,8 @@ do_confirm:
 	goto out;
 }
 
-int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
-		 size_t len, int noblock, int flags, int *addr_len)
+int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
+		 int flags, int *addr_len)
 {
 	struct inet_sock *isk = inet_sk(sk);
 	int family = sk->sk_family;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index f027a708b7e0..923cf538fce1 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -481,8 +481,7 @@ static int raw_getfrag(void *from, char *to, int offset, int len, int odd,
 	return ip_generic_getfrag(rfv->msg, to, offset, len, odd, skb);
 }
 
-static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
-		       size_t len)
+static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipcm_cookie ipc;
@@ -709,8 +708,8 @@ out:	return ret;
  *	we return it, otherwise we block.
  */
 
-static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
-		       size_t len, int noblock, int flags, int *addr_len)
+static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+		       int noblock, int flags, int *addr_len)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	size_t copied = 0;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4b57ea8dabc7..d939c35001f9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1064,8 +1064,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
 	return err;
 }
 
-int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
-		size_t size)
+int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
@@ -1543,8 +1542,8 @@ EXPORT_SYMBOL(tcp_read_sock);
  *	Probably, code can be easily improved even more.
  */
 
-int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
-		size_t len, int nonblock, int flags, int *addr_len)
+int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
+		int flags, int *addr_len)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	int copied = 0;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 0224f930c613..f27556e2158b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -873,8 +873,7 @@ out:
 }
 EXPORT_SYMBOL(udp_push_pending_frames);
 
-int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
-		size_t len)
+int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct udp_sock *up = udp_sk(sk);
@@ -1136,7 +1135,7 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset,
 		 * sendpage interface can't pass.
 		 * This will succeed only when the socket is connected.
 		 */
-		ret = udp_sendmsg(NULL, sk, &msg, 0);
+		ret = udp_sendmsg(sk, &msg, 0);
 		if (ret < 0)
 			return ret;
 	}
@@ -1254,8 +1253,8 @@ EXPORT_SYMBOL(udp_ioctl);
  * 	return it, otherwise we block.
  */
 
-int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
-		size_t len, int noblock, int flags, int *addr_len)
+int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
+		int flags, int *addr_len)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index f3c27899f62b..7e0fe4bdd967 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -21,8 +21,8 @@ int compat_udp_setsockopt(struct sock *sk, int level, int optname,
 int compat_udp_getsockopt(struct sock *sk, int level, int optname,
 			  char __user *optval, int __user *optlen);
 #endif
-int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
-		size_t len, int noblock, int flags, int *addr_len);
+int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
+		int flags, int *addr_len);
 int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
 		 int flags);
 int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index bd46f736f61d..fee25c0ed1f5 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -77,8 +77,7 @@ static int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
 	return 0;
 }
 
-int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
-		    size_t len)
+int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 0d84b2c7f24e..a5287b3582a4 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -456,9 +456,8 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
  *	we return it, otherwise we block.
  */
 
-static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
-		  struct msghdr *msg, size_t len,
-		  int noblock, int flags, int *addr_len)
+static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+			 int noblock, int flags, int *addr_len)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
@@ -730,8 +729,7 @@ static int raw6_getfrag(void *from, char *to, int offset, int len, int odd,
 	return ip_generic_getfrag(rfv->msg, to, offset, len, odd, skb);
 }
 
-static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
-		   struct msghdr *msg, size_t len)
+static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 {
 	struct ipv6_txoptions opt_space;
 	DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index d048d46779fc..70568a4548e4 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -391,8 +391,7 @@ EXPORT_SYMBOL_GPL(udp6_lib_lookup);
  *	return it, otherwise we block.
  */
 
-int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
-		  struct msghdr *msg, size_t len,
+int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 		  int noblock, int flags, int *addr_len)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
@@ -1101,8 +1100,7 @@ out:
 	return err;
 }
 
-int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
-		  struct msghdr *msg, size_t len)
+int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 {
 	struct ipv6_txoptions opt_space;
 	struct udp_sock *up = udp_sk(sk);
@@ -1164,12 +1162,12 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 do_udp_sendmsg:
 			if (__ipv6_only_sock(sk))
 				return -ENETUNREACH;
-			return udp_sendmsg(iocb, sk, msg, len);
+			return udp_sendmsg(sk, msg, len);
 		}
 	}
 
 	if (up->pending == AF_INET)
-		return udp_sendmsg(iocb, sk, msg, len);
+		return udp_sendmsg(sk, msg, len);
 
 	/* Rough check on arithmetic overflow,
 	   better check is made in ip6_append_data().
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index c779c3c90b9d..0682c031ccdc 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -23,10 +23,9 @@ int compat_udpv6_setsockopt(struct sock *sk, int level, int optname,
 int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
 			    char __user *optval, int __user *optlen);
 #endif
-int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
-		  size_t len);
-int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
-		  size_t len, int noblock, int flags, int *addr_len);
+int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
+int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
+		  int flags, int *addr_len);
 int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
 void udpv6_destroy_sock(struct sock *sk);
 
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index f11ad1d95e0e..4ea5d7497b5f 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1688,8 +1688,7 @@ out:
 	return rc;
 }
 
-static int ipx_sendmsg(struct kiocb *iocb, struct socket *sock,
-	struct msghdr *msg, size_t len)
+static int ipx_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 {
 	struct sock *sk = sock->sk;
 	struct ipx_sock *ipxs = ipx_sk(sk);
@@ -1754,8 +1753,8 @@ out:
 }
 
 
-static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock,
-		struct msghdr *msg, size_t size, int flags)
+static int ipx_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+		       int flags)
 {
 	struct sock *sk = sock->sk;
 	struct ipx_sock *ipxs = ipx_sk(sk);
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 568edc72d737..ee0ea25c8e7a 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -1256,14 +1256,13 @@ static int irda_release(struct socket *sock)
 }
 
 /*
- * Function irda_sendmsg (iocb, sock, msg, len)
+ * Function irda_sendmsg (sock, msg, len)
  *
  *    Send message down to TinyTP. This function is used for both STREAM and
  *    SEQPACK services. This is possible since it forces the client to
  *    fragment the message if necessary
  */
-static int irda_sendmsg(struct kiocb *iocb, struct socket *sock,
-			struct msghdr *msg, size_t len)
+static int irda_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 {
 	struct sock *sk = sock->sk;
 	struct irda_sock *self;
@@ -1348,13 +1347,13 @@ out:
 }
 
 /*
- * Function irda_recvmsg_dgram (iocb, sock, msg, size, flags)
+ * Function irda_recvmsg_dgram (sock, msg, size, flags)
  *
  *    Try to receive message and copy it to user. The frame is discarded
  *    after being read, regardless of how much the user actually read
  */
-static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock,
-			      struct msghdr *msg, size_t size, int flags)
+static int irda_recvmsg_dgram(struct socket *sock, struct msghdr *msg,
+			      size_t size, int flags)
 {
 	struct sock *sk = sock->sk;
 	struct irda_sock *self = irda_sk(sk);
@@ -1398,10 +1397,10 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock,
 }
 
 /*
- * Function irda_recvmsg_stream (iocb, sock, msg, size, flags)
+ * Function irda_recvmsg_stream (sock, msg, size, flags)
  */
-static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
-			       struct msghdr *msg, size_t size, int flags)
+static int irda_recvmsg_stream(struct socket *sock, struct msghdr *msg,
+			       size_t size, int flags)
 {
 	struct sock *sk = sock->sk;
 	struct irda_sock *self = irda_sk(sk);
@@ -1515,14 +1514,14 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
 }
 
 /*
- * Function irda_sendmsg_dgram (iocb, sock, msg, len)
+ * Function irda_sendmsg_dgram (sock, msg, len)
  *
  *    Send message down to TinyTP for the unreliable sequenced
  *    packet service...
  *
  */
-static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock,
-			      struct msghdr *msg, size_t len)
+static int irda_sendmsg_dgram(struct socket *sock, struct msghdr *msg,
+			      size_t len)
 {
 	struct sock *sk = sock->sk;
 	struct irda_sock *self;
@@ -1594,14 +1593,14 @@ out:
 }
 
 /*
- * Function irda_sendmsg_ultra (iocb, sock, msg, len)
+ * Function irda_sendmsg_ultra (sock, msg, len)
  *
  *    Send message down to IrLMP for the unreliable Ultra
  *    packet service...
  */
 #ifdef CONFIG_IRDA_ULTRA
-static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock,
-			      struct msghdr *msg, size_t len)
+static int irda_sendmsg_ultra(struct socket *sock, struct msghdr *msg,
+			      size_t len)
 {
 	struct sock *sk = sock->sk;
 	struct irda_sock *self;
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 2e9953b2db84..94b4c898a116 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1026,8 +1026,8 @@ static int iucv_send_iprm(struct iucv_path *path, struct iucv_message *msg,
 				 (void *) prmdata, 8);
 }
 
-static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
-			     struct msghdr *msg, size_t len)
+static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
+			     size_t len)
 {
 	struct sock *sk = sock->sk;
 	struct iucv_sock *iucv = iucv_sk(sk);
@@ -1317,8 +1317,8 @@ static void iucv_process_message_q(struct sock *sk)
 	}
 }
 
-static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
-			     struct msghdr *msg, size_t len, int flags)
+static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg,
+			     size_t len, int flags)
 {
 	int noblock = flags & MSG_DONTWAIT;
 	struct sock *sk = sock->sk;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index f8ac939d52b4..9255fd9d94bc 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3588,8 +3588,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
 }
 #endif
 
-static int pfkey_sendmsg(struct kiocb *kiocb,
-			 struct socket *sock, struct msghdr *msg, size_t len)
+static int pfkey_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 {
 	struct sock *sk = sock->sk;
 	struct sk_buff *skb = NULL;
@@ -3630,8 +3629,7 @@ out:
 	return err ? : len;
 }
 
-static int pfkey_recvmsg(struct kiocb *kiocb,
-			 struct socket *sock, struct msghdr *msg, size_t len,
+static int pfkey_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 			 int flags)
 {
 	struct sock *sk = sock->sk;
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 05dfc8aa36af..79649937ec71 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -385,7 +385,7 @@ drop:
 /* Userspace will call sendmsg() on the tunnel socket to send L2TP
  * control frames.
  */
-static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len)
+static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 {
 	struct sk_buff *skb;
 	int rc;
@@ -506,7 +506,7 @@ no_route:
 	goto out;
 }
 
-static int l2tp_ip_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+static int l2tp_ip_recvmsg(struct sock *sk, struct msghdr *msg,
 			   size_t len, int noblock, int flags, int *addr_len)
 {
 	struct inet_sock *inet = inet_sk(sk);
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 8611f1b63141..d1ded3777815 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -480,8 +480,7 @@ out:
 /* Userspace will call sendmsg() on the tunnel socket to send L2TP
  * control frames.
  */
-static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk,
-			    struct msghdr *msg, size_t len)
+static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 {
 	struct ipv6_txoptions opt_space;
 	DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name);
@@ -643,9 +642,8 @@ do_confirm:
 	goto done;
 }
 
-static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk,
-			    struct msghdr *msg, size_t len, int noblock,
-			    int flags, int *addr_len)
+static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+			    int noblock, int flags, int *addr_len)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name);
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index cc7a828fc914..e9b0dec56b8e 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -185,9 +185,8 @@ static int pppol2tp_recv_payload_hook(struct sk_buff *skb)
 
 /* Receive message. This is the recvmsg for the PPPoL2TP socket.
  */
-static int pppol2tp_recvmsg(struct kiocb *iocb, struct socket *sock,
-			    struct msghdr *msg, size_t len,
-			    int flags)
+static int pppol2tp_recvmsg(struct socket *sock, struct msghdr *msg,
+			    size_t len, int flags)
 {
 	int err;
 	struct sk_buff *skb;
@@ -295,7 +294,7 @@ static void pppol2tp_session_sock_put(struct l2tp_session *session)
  * when a user application does a sendmsg() on the session socket. L2TP and
  * PPP headers must be inserted into the user's data.
  */
-static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
+static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
 			    size_t total_len)
 {
 	static const unsigned char ppph[2] = { 0xff, 0x03 };
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 2c0b83ce43bd..17a8dff06090 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -704,8 +704,8 @@ out:
  *	Copy received data to the socket user.
  *	Returns non-negative upon success, negative otherwise.
  */
-static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock,
-			  struct msghdr *msg, size_t len, int flags)
+static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+			  int flags)
 {
 	DECLARE_SOCKADDR(struct sockaddr_llc *, uaddr, msg->msg_name);
 	const int nonblock = flags & MSG_DONTWAIT;
@@ -878,8 +878,7 @@ copy_uaddr:
  *	Transmit data provided by the socket user.
  *	Returns non-negative upon success, negative otherwise.
  */
-static int llc_ui_sendmsg(struct kiocb *iocb, struct socket *sock,
-			  struct msghdr *msg, size_t len)
+static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 {
 	struct sock *sk = sock->sk;
 	struct llc_sock *llc = llc_sk(sk);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 2702673f0f23..a96025c0583f 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2256,8 +2256,7 @@ static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
 	put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
 }
 
-static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
-			   struct msghdr *msg, size_t len)
+static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 {
 	struct sock *sk = sock->sk;
 	struct netlink_sock *nlk = nlk_sk(sk);
@@ -2346,8 +2345,7 @@ out:
 	return err;
 }
 
-static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
-			   struct msghdr *msg, size_t len,
+static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 			   int flags)
 {
 	struct scm_cookie scm;
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 69f1d5e9959f..b987fd56c3c5 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1023,8 +1023,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
 	return 1;
 }
 
-static int nr_sendmsg(struct kiocb *iocb, struct socket *sock,
-		      struct msghdr *msg, size_t len)
+static int nr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 {
 	struct sock *sk = sock->sk;
 	struct nr_sock *nr = nr_sk(sk);
@@ -1133,8 +1132,8 @@ out:
 	return err;
 }
 
-static int nr_recvmsg(struct kiocb *iocb, struct socket *sock,
-		      struct msghdr *msg, size_t size, int flags)
+static int nr_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+		      int flags)
 {
 	struct sock *sk = sock->sk;
 	DECLARE_SOCKADDR(struct sockaddr_ax25 *, sax, msg->msg_name);
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index e181e290427c..9578bd6a4f3e 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -750,8 +750,8 @@ error:
 	return ret;
 }
 
-static int llcp_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
-			     struct msghdr *msg, size_t len)
+static int llcp_sock_sendmsg(struct socket *sock, struct msghdr *msg,
+			     size_t len)
 {
 	struct sock *sk = sock->sk;
 	struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
@@ -793,8 +793,8 @@ static int llcp_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 	return nfc_llcp_send_i_frame(llcp_sock, msg, len);
 }
 
-static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
-			     struct msghdr *msg, size_t len, int flags)
+static int llcp_sock_recvmsg(struct socket *sock, struct msghdr *msg,
+			     size_t len, int flags)
 {
 	int noblock = flags & MSG_DONTWAIT;
 	struct sock *sk = sock->sk;
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index 373e138c0ab6..82b4e8024778 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -211,8 +211,7 @@ static void rawsock_tx_work(struct work_struct *work)
 	}
 }
 
-static int rawsock_sendmsg(struct kiocb *iocb, struct socket *sock,
-			   struct msghdr *msg, size_t len)
+static int rawsock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 {
 	struct sock *sk = sock->sk;
 	struct nfc_dev *dev = nfc_rawsock(sk)->dev;
@@ -248,8 +247,8 @@ static int rawsock_sendmsg(struct kiocb *iocb, struct socket *sock,
 	return len;
 }
 
-static int rawsock_recvmsg(struct kiocb *iocb, struct socket *sock,
-			   struct msghdr *msg, size_t len, int flags)
+static int rawsock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+			   int flags)
 {
 	int noblock = flags & MSG_DONTWAIT;
 	struct sock *sk = sock->sk;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9db83693d736..404c9735aee9 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1606,8 +1606,8 @@ oom:
  *	protocol layers and you must therefore supply it with a complete frame
  */
 
-static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
-			       struct msghdr *msg, size_t len)
+static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg,
+			       size_t len)
 {
 	struct sock *sk = sock->sk;
 	DECLARE_SOCKADDR(struct sockaddr_pkt *, saddr, msg->msg_name);
@@ -2601,8 +2601,7 @@ out:
 	return err;
 }
 
-static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
-		struct msghdr *msg, size_t len)
+static int packet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 {
 	struct sock *sk = sock->sk;
 	struct packet_sock *po = pkt_sk(sk);
@@ -2882,8 +2881,8 @@ out:
  *	If necessary we block.
  */
 
-static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
-			  struct msghdr *msg, size_t len, int flags)
+static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+			  int flags)
 {
 	struct sock *sk = sock->sk;
 	struct sk_buff *skb;
diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c
index 26054b4b467c..5e710435ffa9 100644
--- a/net/phonet/datagram.c
+++ b/net/phonet/datagram.c
@@ -83,8 +83,7 @@ static int pn_init(struct sock *sk)
 	return 0;
 }
 
-static int pn_sendmsg(struct kiocb *iocb, struct sock *sk,
-			struct msghdr *msg, size_t len)
+static int pn_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 {
 	DECLARE_SOCKADDR(struct sockaddr_pn *, target, msg->msg_name);
 	struct sk_buff *skb;
@@ -125,9 +124,8 @@ static int pn_sendmsg(struct kiocb *iocb, struct sock *sk,
 	return (err >= 0) ? len : err;
 }
 
-static int pn_recvmsg(struct kiocb *iocb, struct sock *sk,
-			struct msghdr *msg, size_t len, int noblock,
-			int flags, int *addr_len)
+static int pn_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+		      int noblock, int flags, int *addr_len)
 {
 	struct sk_buff *skb = NULL;
 	struct sockaddr_pn sa;
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 5d3f2b7507d4..6de2aeb98a1f 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -1118,8 +1118,7 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb)
 
 }
 
-static int pep_sendmsg(struct kiocb *iocb, struct sock *sk,
-			struct msghdr *msg, size_t len)
+static int pep_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 {
 	struct pep_sock *pn = pep_sk(sk);
 	struct sk_buff *skb;
@@ -1246,9 +1245,8 @@ struct sk_buff *pep_read(struct sock *sk)
 	return skb;
 }
 
-static int pep_recvmsg(struct kiocb *iocb, struct sock *sk,
-			struct msghdr *msg, size_t len, int noblock,
-			int flags, int *addr_len)
+static int pep_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+		       int noblock, int flags, int *addr_len)
 {
 	struct sk_buff *skb;
 	int err;
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 008214a3d5eb..d575ef4e9aa6 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -425,15 +425,15 @@ out:
 	return err;
 }
 
-static int pn_socket_sendmsg(struct kiocb *iocb, struct socket *sock,
-				struct msghdr *m, size_t total_len)
+static int pn_socket_sendmsg(struct socket *sock, struct msghdr *m,
+			     size_t total_len)
 {
 	struct sock *sk = sock->sk;
 
 	if (pn_socket_autobind(sock))
 		return -EAGAIN;
 
-	return sk->sk_prot->sendmsg(iocb, sk, m, total_len);
+	return sk->sk_prot->sendmsg(sk, m, total_len);
 }
 
 const struct proto_ops phonet_dgram_ops = {
diff --git a/net/rds/rds.h b/net/rds/rds.h
index c2a5eef41343..c3f2855c3d84 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -702,8 +702,8 @@ void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
 void rds_inc_put(struct rds_incoming *inc);
 void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
 		       struct rds_incoming *inc, gfp_t gfp);
-int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
-		size_t size, int msg_flags);
+int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+		int msg_flags);
 void rds_clear_recv_queue(struct rds_sock *rs);
 int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msg);
 void rds_inc_info_copy(struct rds_incoming *inc,
@@ -711,8 +711,7 @@ void rds_inc_info_copy(struct rds_incoming *inc,
 		       __be32 saddr, __be32 daddr, int flip);
 
 /* send.c */
-int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
-		size_t payload_len);
+int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len);
 void rds_send_reset(struct rds_connection *conn);
 int rds_send_xmit(struct rds_connection *conn);
 struct sockaddr_in;
diff --git a/net/rds/recv.c b/net/rds/recv.c
index f9ec1acd801c..a00462b0d01d 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -395,8 +395,8 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg)
 	return 0;
 }
 
-int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
-		size_t size, int msg_flags)
+int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+		int msg_flags)
 {
 	struct sock *sk = sock->sk;
 	struct rds_sock *rs = rds_sk_to_rs(sk);
diff --git a/net/rds/send.c b/net/rds/send.c
index 42f65d4305c8..44672befc0ee 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -920,8 +920,7 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
 	return ret;
 }
 
-int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
-		size_t payload_len)
+int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 {
 	struct sock *sk = sock->sk;
 	struct rds_sock *rs = rds_sk_to_rs(sk);
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 43bac7c4dd9e..8ae603069a1a 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1046,8 +1046,7 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros
 	return 1;
 }
 
-static int rose_sendmsg(struct kiocb *iocb, struct socket *sock,
-			struct msghdr *msg, size_t len)
+static int rose_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 {
 	struct sock *sk = sock->sk;
 	struct rose_sock *rose = rose_sk(sk);
@@ -1211,8 +1210,8 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock,
 }
 
 
-static int rose_recvmsg(struct kiocb *iocb, struct socket *sock,
-			struct msghdr *msg, size_t size, int flags)
+static int rose_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+			int flags)
 {
 	struct sock *sk = sock->sk;
 	struct rose_sock *rose = rose_sk(sk);
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 7b1670489638..0095b9a0b779 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -441,8 +441,7 @@ static int rxrpc_connect(struct socket *sock, struct sockaddr *addr,
  *   - sends a call data packet
  *   - may send an abort (abort code in control data)
  */
-static int rxrpc_sendmsg(struct kiocb *iocb, struct socket *sock,
-			 struct msghdr *m, size_t len)
+static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
 {
 	struct rxrpc_transport *trans;
 	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
@@ -482,7 +481,7 @@ static int rxrpc_sendmsg(struct kiocb *iocb, struct socket *sock,
 	switch (rx->sk.sk_state) {
 	case RXRPC_SERVER_LISTENING:
 		if (!m->msg_name) {
-			ret = rxrpc_server_sendmsg(iocb, rx, m, len);
+			ret = rxrpc_server_sendmsg(rx, m, len);
 			break;
 		}
 	case RXRPC_SERVER_BOUND:
@@ -492,7 +491,7 @@ static int rxrpc_sendmsg(struct kiocb *iocb, struct socket *sock,
 			break;
 		}
 	case RXRPC_CLIENT_CONNECTED:
-		ret = rxrpc_client_sendmsg(iocb, rx, trans, m, len);
+		ret = rxrpc_client_sendmsg(rx, trans, m, len);
 		break;
 	default:
 		ret = -ENOTCONN;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index ba9fd36d3f15..2fc1e659e5c9 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -548,10 +548,9 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time_t,
 extern unsigned rxrpc_resend_timeout;
 
 int rxrpc_send_packet(struct rxrpc_transport *, struct sk_buff *);
-int rxrpc_client_sendmsg(struct kiocb *, struct rxrpc_sock *,
-			 struct rxrpc_transport *, struct msghdr *, size_t);
-int rxrpc_server_sendmsg(struct kiocb *, struct rxrpc_sock *, struct msghdr *,
-			 size_t);
+int rxrpc_client_sendmsg(struct rxrpc_sock *, struct rxrpc_transport *,
+			 struct msghdr *, size_t);
+int rxrpc_server_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t);
 
 /*
  * ar-peer.c
@@ -572,8 +571,7 @@ extern const struct file_operations rxrpc_connection_seq_fops;
  * ar-recvmsg.c
  */
 void rxrpc_remove_user_ID(struct rxrpc_sock *, struct rxrpc_call *);
-int rxrpc_recvmsg(struct kiocb *, struct socket *, struct msghdr *, size_t,
-		  int);
+int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int);
 
 /*
  * ar-security.c
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index 8331c95e1522..09f584566e23 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -23,8 +23,7 @@
  */
 unsigned rxrpc_resend_timeout = 4 * HZ;
 
-static int rxrpc_send_data(struct kiocb *iocb,
-			   struct rxrpc_sock *rx,
+static int rxrpc_send_data(struct rxrpc_sock *rx,
 			   struct rxrpc_call *call,
 			   struct msghdr *msg, size_t len);
 
@@ -129,9 +128,8 @@ static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code)
  * - caller holds the socket locked
  * - the socket may be either a client socket or a server socket
  */
-int rxrpc_client_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx,
-			 struct rxrpc_transport *trans, struct msghdr *msg,
-			 size_t len)
+int rxrpc_client_sendmsg(struct rxrpc_sock *rx, struct rxrpc_transport *trans,
+			 struct msghdr *msg, size_t len)
 {
 	struct rxrpc_conn_bundle *bundle;
 	enum rxrpc_command cmd;
@@ -191,7 +189,7 @@ int rxrpc_client_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx,
 		/* request phase complete for this client call */
 		ret = -EPROTO;
 	} else {
-		ret = rxrpc_send_data(iocb, rx, call, msg, len);
+		ret = rxrpc_send_data(rx, call, msg, len);
 	}
 
 	rxrpc_put_call(call);
@@ -232,7 +230,7 @@ int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg,
 		   call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
 		ret = -EPROTO; /* request phase complete for this client call */
 	} else {
-		ret = rxrpc_send_data(NULL, call->socket, call, msg, len);
+		ret = rxrpc_send_data(call->socket, call, msg, len);
 	}
 
 	release_sock(&call->socket->sk);
@@ -271,8 +269,7 @@ EXPORT_SYMBOL(rxrpc_kernel_abort_call);
  * send a message through a server socket
  * - caller holds the socket locked
  */
-int rxrpc_server_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx,
-			 struct msghdr *msg, size_t len)
+int rxrpc_server_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 {
 	enum rxrpc_command cmd;
 	struct rxrpc_call *call;
@@ -313,7 +310,7 @@ int rxrpc_server_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx,
 			break;
 		}
 
-		ret = rxrpc_send_data(iocb, rx, call, msg, len);
+		ret = rxrpc_send_data(rx, call, msg, len);
 		break;
 
 	case RXRPC_CMD_SEND_ABORT:
@@ -520,8 +517,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
  * - must be called in process context
  * - caller holds the socket locked
  */
-static int rxrpc_send_data(struct kiocb *iocb,
-			   struct rxrpc_sock *rx,
+static int rxrpc_send_data(struct rxrpc_sock *rx,
 			   struct rxrpc_call *call,
 			   struct msghdr *msg, size_t len)
 {
diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
index d58ba702bd2c..a4f883e2d66f 100644
--- a/net/rxrpc/ar-recvmsg.c
+++ b/net/rxrpc/ar-recvmsg.c
@@ -43,8 +43,8 @@ void rxrpc_remove_user_ID(struct rxrpc_sock *rx, struct rxrpc_call *call)
  * - we need to be careful about two or more threads calling recvmsg
  *   simultaneously
  */
-int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
-		  struct msghdr *msg, size_t len, int flags)
+int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+		  int flags)
 {
 	struct rxrpc_skb_priv *sp;
 	struct rxrpc_call *call = NULL, *continue_call = NULL;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index aafe94bf292e..f1a65398f311 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1586,8 +1586,7 @@ static int sctp_error(struct sock *sk, int flags, int err)
 
 static int sctp_msghdr_parse(const struct msghdr *, sctp_cmsgs_t *);
 
-static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
-			struct msghdr *msg, size_t msg_len)
+static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 {
 	struct net *net = sock_net(sk);
 	struct sctp_sock *sp;
@@ -2066,9 +2065,8 @@ static int sctp_skb_pull(struct sk_buff *skb, int len)
  *  flags   - flags sent or received with the user message, see Section
  *            5 for complete description of the flags.
  */
-static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk,
-			struct msghdr *msg, size_t len, int noblock,
-			int flags, int *addr_len)
+static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+			int noblock, int flags, int *addr_len)
 {
 	struct sctp_ulpevent *event = NULL;
 	struct sctp_sock *sp = sctp_sk(sk);
diff --git a/net/socket.c b/net/socket.c
index b78cf601a021..95d3085cb477 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -610,45 +610,20 @@ void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags)
 }
 EXPORT_SYMBOL(__sock_tx_timestamp);
 
-static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
-				       struct msghdr *msg, size_t size)
+static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg,
+				     size_t size)
 {
-	return sock->ops->sendmsg(iocb, sock, msg, size);
+	return sock->ops->sendmsg(sock, msg, size);
 }
 
-static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
-				 struct msghdr *msg, size_t size)
+int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 {
 	int err = security_socket_sendmsg(sock, msg, size);
 
-	return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
-}
-
-static int do_sock_sendmsg(struct socket *sock, struct msghdr *msg,
-			   size_t size, bool nosec)
-{
-	struct kiocb iocb;
-	int ret;
-
-	init_sync_kiocb(&iocb, NULL);
-	ret = nosec ? __sock_sendmsg_nosec(&iocb, sock, msg, size) :
-		      __sock_sendmsg(&iocb, sock, msg, size);
-	if (-EIOCBQUEUED == ret)
-		ret = wait_on_sync_kiocb(&iocb);
-	return ret;
-}
-
-int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
-{
-	return do_sock_sendmsg(sock, msg, size, false);
+	return err ?: sock_sendmsg_nosec(sock, msg, size);
 }
 EXPORT_SYMBOL(sock_sendmsg);
 
-static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
-{
-	return do_sock_sendmsg(sock, msg, size, true);
-}
-
 int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 		   struct kvec *vec, size_t num, size_t size)
 {
@@ -744,47 +719,21 @@ void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
 }
 EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
 
-static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
-				       struct msghdr *msg, size_t size, int flags)
+static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
+				     size_t size, int flags)
 {
-	return sock->ops->recvmsg(iocb, sock, msg, size, flags);
+	return sock->ops->recvmsg(sock, msg, size, flags);
 }
 
-static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
-				 struct msghdr *msg, size_t size, int flags)
+int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+		 int flags)
 {
 	int err = security_socket_recvmsg(sock, msg, size, flags);
 
-	return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
-}
-
-int sock_recvmsg(struct socket *sock, struct msghdr *msg,
-		 size_t size, int flags)
-{
-	struct kiocb iocb;
-	int ret;
-
-	init_sync_kiocb(&iocb, NULL);
-	ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
-	if (-EIOCBQUEUED == ret)
-		ret = wait_on_sync_kiocb(&iocb);
-	return ret;
+	return err ?: sock_recvmsg_nosec(sock, msg, size, flags);
 }
 EXPORT_SYMBOL(sock_recvmsg);
 
-static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
-			      size_t size, int flags)
-{
-	struct kiocb iocb;
-	int ret;
-
-	init_sync_kiocb(&iocb, NULL);
-	ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
-	if (-EIOCBQUEUED == ret)
-		ret = wait_on_sync_kiocb(&iocb);
-	return ret;
-}
-
 /**
  * kernel_recvmsg - Receive a message from a socket (kernel space)
  * @sock:       The socket to receive the message from
@@ -861,8 +810,7 @@ static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	if (iocb->ki_nbytes == 0)	/* Match SYS5 behaviour */
 		return 0;
 
-	res = __sock_recvmsg(iocb, sock, &msg,
-			     iocb->ki_nbytes, msg.msg_flags);
+	res = sock_recvmsg(sock, &msg, iocb->ki_nbytes, msg.msg_flags);
 	*to = msg.msg_iter;
 	return res;
 }
@@ -883,7 +831,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (sock->type == SOCK_SEQPACKET)
 		msg.msg_flags |= MSG_EOR;
 
-	res = __sock_sendmsg(iocb, sock, &msg, iocb->ki_nbytes);
+	res = sock_sendmsg(sock, &msg, iocb->ki_nbytes);
 	*from = msg.msg_iter;
 	return res;
 }
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index c245ec31fa4c..dcb797c60806 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -895,7 +895,6 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p)
 
 /**
  * tipc_sendmsg - send message in connectionless manner
- * @iocb: if NULL, indicates that socket lock is already held
  * @sock: socket structure
  * @m: message to send
  * @dsz: amount of user data to be sent
@@ -907,7 +906,7 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p)
  *
  * Returns the number of bytes sent on success, or errno otherwise
  */
-static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock,
+static int tipc_sendmsg(struct socket *sock,
 			struct msghdr *m, size_t dsz)
 {
 	struct sock *sk = sock->sk;
@@ -1052,7 +1051,6 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p)
 
 /**
  * tipc_send_stream - send stream-oriented data
- * @iocb: (unused)
  * @sock: socket structure
  * @m: data to send
  * @dsz: total length of data to be transmitted
@@ -1062,8 +1060,7 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p)
  * Returns the number of bytes sent on success (or partial success),
  * or errno if no data sent
  */
-static int tipc_send_stream(struct kiocb *iocb, struct socket *sock,
-			    struct msghdr *m, size_t dsz)
+static int tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
 {
 	struct sock *sk = sock->sk;
 	int ret;
@@ -1147,7 +1144,6 @@ next:
 
 /**
  * tipc_send_packet - send a connection-oriented message
- * @iocb: if NULL, indicates that socket lock is already held
  * @sock: socket structure
  * @m: message to send
  * @dsz: length of data to be transmitted
@@ -1156,13 +1152,12 @@ next:
  *
  * Returns the number of bytes sent on success, or errno otherwise
  */
-static int tipc_send_packet(struct kiocb *iocb, struct socket *sock,
-			    struct msghdr *m, size_t dsz)
+static int tipc_send_packet(struct socket *sock, struct msghdr *m, size_t dsz)
 {
 	if (dsz > TIPC_MAX_USER_MSG_SIZE)
 		return -EMSGSIZE;
 
-	return tipc_send_stream(iocb, sock, m, dsz);
+	return tipc_send_stream(sock, m, dsz);
 }
 
 /* tipc_sk_finish_conn - complete the setup of a connection
@@ -1337,7 +1332,6 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
 
 /**
  * tipc_recvmsg - receive packet-oriented message
- * @iocb: (unused)
  * @m: descriptor for message info
  * @buf_len: total size of user buffer area
  * @flags: receive flags
@@ -1347,8 +1341,8 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
  *
  * Returns size of returned message data, errno otherwise
  */
-static int tipc_recvmsg(struct kiocb *iocb, struct socket *sock,
-			struct msghdr *m, size_t buf_len, int flags)
+static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len,
+			int flags)
 {
 	struct sock *sk = sock->sk;
 	struct tipc_sock *tsk = tipc_sk(sk);
@@ -1432,7 +1426,6 @@ exit:
 
 /**
  * tipc_recv_stream - receive stream-oriented data
- * @iocb: (unused)
  * @m: descriptor for message info
  * @buf_len: total size of user buffer area
  * @flags: receive flags
@@ -1442,8 +1435,8 @@ exit:
  *
  * Returns size of returned message data, errno otherwise
  */
-static int tipc_recv_stream(struct kiocb *iocb, struct socket *sock,
-			    struct msghdr *m, size_t buf_len, int flags)
+static int tipc_recv_stream(struct socket *sock, struct msghdr *m,
+			    size_t buf_len, int flags)
 {
 	struct sock *sk = sock->sk;
 	struct tipc_sock *tsk = tipc_sk(sk);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 526b6edab018..433f287ee548 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -516,20 +516,15 @@ static unsigned int unix_dgram_poll(struct file *, struct socket *,
 				    poll_table *);
 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 static int unix_shutdown(struct socket *, int);
-static int unix_stream_sendmsg(struct kiocb *, struct socket *,
-			       struct msghdr *, size_t);
-static int unix_stream_recvmsg(struct kiocb *, struct socket *,
-			       struct msghdr *, size_t, int);
-static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
-			      struct msghdr *, size_t);
-static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
-			      struct msghdr *, size_t, int);
+static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
+static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
+static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
+static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 			      int, int);
-static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
-				  struct msghdr *, size_t);
-static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *,
-				  struct msghdr *, size_t, int);
+static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
+static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
+				  int);
 
 static int unix_set_peek_off(struct sock *sk, int val)
 {
@@ -1442,8 +1437,8 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
  *	Send AF_UNIX data.
  */
 
-static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
-			      struct msghdr *msg, size_t len)
+static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
+			      size_t len)
 {
 	struct sock *sk = sock->sk;
 	struct net *net = sock_net(sk);
@@ -1622,8 +1617,8 @@ out:
  */
 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
 
-static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
-			       struct msghdr *msg, size_t len)
+static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
+			       size_t len)
 {
 	struct sock *sk = sock->sk;
 	struct sock *other = NULL;
@@ -1725,8 +1720,8 @@ out_err:
 	return sent ? : err;
 }
 
-static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
-				  struct msghdr *msg, size_t len)
+static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
+				  size_t len)
 {
 	int err;
 	struct sock *sk = sock->sk;
@@ -1741,19 +1736,18 @@ static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	if (msg->msg_namelen)
 		msg->msg_namelen = 0;
 
-	return unix_dgram_sendmsg(kiocb, sock, msg, len);
+	return unix_dgram_sendmsg(sock, msg, len);
 }
 
-static int unix_seqpacket_recvmsg(struct kiocb *iocb, struct socket *sock,
-			      struct msghdr *msg, size_t size,
-			      int flags)
+static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
+				  size_t size, int flags)
 {
 	struct sock *sk = sock->sk;
 
 	if (sk->sk_state != TCP_ESTABLISHED)
 		return -ENOTCONN;
 
-	return unix_dgram_recvmsg(iocb, sock, msg, size, flags);
+	return unix_dgram_recvmsg(sock, msg, size, flags);
 }
 
 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
@@ -1766,9 +1760,8 @@ static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
 	}
 }
 
-static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
-			      struct msghdr *msg, size_t size,
-			      int flags)
+static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
+			      size_t size, int flags)
 {
 	struct scm_cookie scm;
 	struct sock *sk = sock->sk;
@@ -1900,9 +1893,8 @@ static unsigned int unix_skb_len(const struct sk_buff *skb)
 	return skb->len - UNIXCB(skb).consumed;
 }
 
-static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
-			       struct msghdr *msg, size_t size,
-			       int flags)
+static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
+			       size_t size, int flags)
 {
 	struct scm_cookie scm;
 	struct sock *sk = sock->sk;
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 1d0e39c9a3e2..2ec86e652a19 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -949,8 +949,8 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
 	return mask;
 }
 
-static int vsock_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
-			       struct msghdr *msg, size_t len)
+static int vsock_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
+			       size_t len)
 {
 	int err;
 	struct sock *sk;
@@ -1062,11 +1062,10 @@ out:
 	return err;
 }
 
-static int vsock_dgram_recvmsg(struct kiocb *kiocb, struct socket *sock,
-			       struct msghdr *msg, size_t len, int flags)
+static int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
+			       size_t len, int flags)
 {
-	return transport->dgram_dequeue(kiocb, vsock_sk(sock->sk), msg, len,
-					flags);
+	return transport->dgram_dequeue(vsock_sk(sock->sk), msg, len, flags);
 }
 
 static const struct proto_ops vsock_dgram_ops = {
@@ -1505,8 +1504,8 @@ static int vsock_stream_getsockopt(struct socket *sock,
 	return 0;
 }
 
-static int vsock_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
-				struct msghdr *msg, size_t len)
+static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
+				size_t len)
 {
 	struct sock *sk;
 	struct vsock_sock *vsk;
@@ -1644,9 +1643,8 @@ out:
 
 
 static int
-vsock_stream_recvmsg(struct kiocb *kiocb,
-		     struct socket *sock,
-		     struct msghdr *msg, size_t len, int flags)
+vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+		     int flags)
 {
 	struct sock *sk;
 	struct vsock_sock *vsk;
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 7f3255084a6c..c294da095461 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1730,8 +1730,7 @@ static int vmci_transport_dgram_enqueue(
 	return err - sizeof(*dg);
 }
 
-static int vmci_transport_dgram_dequeue(struct kiocb *kiocb,
-					struct vsock_sock *vsk,
+static int vmci_transport_dgram_dequeue(struct vsock_sock *vsk,
 					struct msghdr *msg, size_t len,
 					int flags)
 {
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index d9149b68b9bc..c3ab230e4493 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1077,8 +1077,7 @@ out_clear_request:
 	goto out;
 }
 
-static int x25_sendmsg(struct kiocb *iocb, struct socket *sock,
-		       struct msghdr *msg, size_t len)
+static int x25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 {
 	struct sock *sk = sock->sk;
 	struct x25_sock *x25 = x25_sk(sk);
@@ -1252,8 +1251,7 @@ out_kfree_skb:
 }
 
 
-static int x25_recvmsg(struct kiocb *iocb, struct socket *sock,
-		       struct msghdr *msg, size_t size,
+static int x25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 		       int flags)
 {
 	struct sock *sk = sock->sk;
-- 
cgit v1.2.3


From 61e021f3b86cbbcc04cbe8ac7b7da2b8c94b5e8e Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 2 Mar 2015 15:21:55 +0100
Subject: ebpf: move CONFIG_BPF_SYSCALL-only function declarations

Masami noted that it would be better to hide the remaining CONFIG_BPF_SYSCALL-only
function declarations within the BPF header ifdef, w/o else path dummy alternatives
since these functions are not supposed to have a user outside of CONFIG_BPF_SYSCALL.

Suggested-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Reference: http://article.gmane.org/gmane.linux.kernel.api/8658
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a1a7ff2df328..a884f5a2c503 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -42,10 +42,6 @@ struct bpf_map_type_list {
 	enum bpf_map_type type;
 };
 
-void bpf_register_map_type(struct bpf_map_type_list *tl);
-void bpf_map_put(struct bpf_map *map);
-struct bpf_map *bpf_map_get(struct fd f);
-
 /* function argument constraints */
 enum bpf_arg_type {
 	ARG_ANYTHING = 0,	/* any argument is ok */
@@ -126,9 +122,16 @@ struct bpf_prog_aux {
 
 #ifdef CONFIG_BPF_SYSCALL
 void bpf_register_prog_type(struct bpf_prog_type_list *tl);
+void bpf_register_map_type(struct bpf_map_type_list *tl);
 
-void bpf_prog_put(struct bpf_prog *prog);
 struct bpf_prog *bpf_prog_get(u32 ufd);
+void bpf_prog_put(struct bpf_prog *prog);
+
+struct bpf_map *bpf_map_get(struct fd f);
+void bpf_map_put(struct bpf_map *map);
+
+/* verify correctness of eBPF program */
+int bpf_check(struct bpf_prog *fp, union bpf_attr *attr);
 #else
 static inline void bpf_register_prog_type(struct bpf_prog_type_list *tl)
 {
@@ -142,10 +145,7 @@ static inline struct bpf_prog *bpf_prog_get(u32 ufd)
 static inline void bpf_prog_put(struct bpf_prog *prog)
 {
 }
-#endif
-
-/* verify correctness of eBPF program */
-int bpf_check(struct bpf_prog *fp, union bpf_attr *attr);
+#endif /* CONFIG_BPF_SYSCALL */
 
 /* verifier prototypes for helper functions called from eBPF programs */
 extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
-- 
cgit v1.2.3


From 46d4e47abee04c24055114d50e408078a09c4d36 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 2 Mar 2015 00:04:31 -0600
Subject: ax25: Make ax25_header and ax25_rebuild_header static

The only user is in ax25_ip.c so stop exporting these functions.

Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-hams@vger.kernel.org
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ax25.h |  3 ---
 net/ax25/ax25_ip.c | 18 ++++++++----------
 2 files changed, 8 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/net/ax25.h b/include/net/ax25.h
index bf0396e9a5d3..7385a64b61b8 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -366,9 +366,6 @@ int ax25_kiss_rcv(struct sk_buff *, struct net_device *, struct packet_type *,
 		  struct net_device *);
 
 /* ax25_ip.c */
-int ax25_hard_header(struct sk_buff *, struct net_device *, unsigned short,
-		     const void *, const void *, unsigned int);
-int ax25_rebuild_header(struct sk_buff *);
 extern const struct header_ops ax25_header_ops;
 
 /* ax25_out.c */
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index db3c283821d1..d93103ba8cec 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -46,9 +46,9 @@
 
 #ifdef CONFIG_INET
 
-int ax25_hard_header(struct sk_buff *skb, struct net_device *dev,
-		     unsigned short type, const void *daddr,
-		     const void *saddr, unsigned int len)
+static int ax25_hard_header(struct sk_buff *skb, struct net_device *dev,
+			    unsigned short type, const void *daddr,
+			    const void *saddr, unsigned int len)
 {
 	unsigned char *buff;
 
@@ -100,7 +100,7 @@ int ax25_hard_header(struct sk_buff *skb, struct net_device *dev,
 	return -AX25_HEADER_LEN;	/* Unfinished header */
 }
 
-int ax25_rebuild_header(struct sk_buff *skb)
+static int ax25_rebuild_header(struct sk_buff *skb)
 {
 	struct sk_buff *ourskb;
 	unsigned char *bp  = skb->data;
@@ -218,14 +218,14 @@ put:
 
 #else	/* INET */
 
-int ax25_hard_header(struct sk_buff *skb, struct net_device *dev,
-		     unsigned short type, const void *daddr,
-		     const void *saddr, unsigned int len)
+static int ax25_hard_header(struct sk_buff *skb, struct net_device *dev,
+			    unsigned short type, const void *daddr,
+			    const void *saddr, unsigned int len)
 {
 	return -AX25_HEADER_LEN;
 }
 
-int ax25_rebuild_header(struct sk_buff *skb)
+static int ax25_rebuild_header(struct sk_buff *skb)
 {
 	return 1;
 }
@@ -237,7 +237,5 @@ const struct header_ops ax25_header_ops = {
 	.rebuild = ax25_rebuild_header,
 };
 
-EXPORT_SYMBOL(ax25_hard_header);
-EXPORT_SYMBOL(ax25_rebuild_header);
 EXPORT_SYMBOL(ax25_header_ops);
 
-- 
cgit v1.2.3


From 3b6a94bed0029a6b48055d89b8dea0567abca0ac Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 2 Mar 2015 00:05:28 -0600
Subject: ax25: Refactor to use private neighbour operations.

AX25 already has it's own private arp cache operations to isolate
it's abuse of dev_rebuild_header to transmit packets.  Add a function
ax25_neigh_construct that will allow all of the ax25 devices to
force using these operations, so that the generic arp code does
not need to.

Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-hams@vger.kernel.org
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hamradio/6pack.c      |  2 ++
 drivers/net/hamradio/baycom_epp.c |  2 ++
 drivers/net/hamradio/bpqether.c   |  2 ++
 drivers/net/hamradio/dmascc.c     |  2 ++
 drivers/net/hamradio/hdlcdrv.c    |  2 ++
 drivers/net/hamradio/mkiss.c      |  2 ++
 drivers/net/hamradio/scc.c        |  2 ++
 drivers/net/hamradio/yam.c        |  2 ++
 include/net/ax25.h                |  5 +++++
 net/ax25/ax25_ip.c                | 21 +++++++++++++++++++++
 10 files changed, 42 insertions(+)

(limited to 'include')

diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index 2533933c79dc..0b8393ca8c80 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -302,6 +302,7 @@ static const struct net_device_ops sp_netdev_ops = {
 	.ndo_stop		= sp_close,
 	.ndo_start_xmit		= sp_xmit,
 	.ndo_set_mac_address    = sp_set_mac_address,
+	.ndo_neigh_construct	= ax25_neigh_construct,
 };
 
 static void sp_setup(struct net_device *dev)
@@ -315,6 +316,7 @@ static void sp_setup(struct net_device *dev)
 
 	dev->addr_len		= AX25_ADDR_LEN;
 	dev->type		= ARPHRD_AX25;
+	dev->neigh_priv_len	= sizeof(struct ax25_neigh_priv);
 	dev->tx_queue_len	= 10;
 
 	/* Only activated in AX.25 mode */
diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c
index a98c153f371e..3539ab392f7d 100644
--- a/drivers/net/hamradio/baycom_epp.c
+++ b/drivers/net/hamradio/baycom_epp.c
@@ -1109,6 +1109,7 @@ static const struct net_device_ops baycom_netdev_ops = {
 	.ndo_do_ioctl	     = baycom_ioctl,
 	.ndo_start_xmit      = baycom_send_packet,
 	.ndo_set_mac_address = baycom_set_mac_address,
+	.ndo_neigh_construct = ax25_neigh_construct,
 };
 
 /*
@@ -1146,6 +1147,7 @@ static void baycom_probe(struct net_device *dev)
 	dev->header_ops = &ax25_header_ops;
 	
 	dev->type = ARPHRD_AX25;           /* AF_AX25 device */
+	dev->neigh_priv_len = sizeof(struct ax25_neigh_priv);
 	dev->hard_header_len = AX25_MAX_HEADER_LEN + AX25_BPQ_HEADER_LEN;
 	dev->mtu = AX25_DEF_PACLEN;        /* eth_mtu is the default */
 	dev->addr_len = AX25_ADDR_LEN;     /* sizeof an ax.25 address */
diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c
index c2894e43840e..bce105b16ed0 100644
--- a/drivers/net/hamradio/bpqether.c
+++ b/drivers/net/hamradio/bpqether.c
@@ -469,6 +469,7 @@ static const struct net_device_ops bpq_netdev_ops = {
 	.ndo_start_xmit	     = bpq_xmit,
 	.ndo_set_mac_address = bpq_set_mac_address,
 	.ndo_do_ioctl	     = bpq_ioctl,
+	.ndo_neigh_construct = ax25_neigh_construct,
 };
 
 static void bpq_setup(struct net_device *dev)
@@ -486,6 +487,7 @@ static void bpq_setup(struct net_device *dev)
 #endif
 
 	dev->type            = ARPHRD_AX25;
+	dev->neigh_priv_len  = sizeof(struct ax25_neigh_priv);
 	dev->hard_header_len = AX25_MAX_HEADER_LEN + AX25_BPQ_HEADER_LEN;
 	dev->mtu             = AX25_DEF_PACLEN;
 	dev->addr_len        = AX25_ADDR_LEN;
diff --git a/drivers/net/hamradio/dmascc.c b/drivers/net/hamradio/dmascc.c
index 0fad408f24aa..abab7be77406 100644
--- a/drivers/net/hamradio/dmascc.c
+++ b/drivers/net/hamradio/dmascc.c
@@ -433,6 +433,7 @@ module_exit(dmascc_exit);
 static void __init dev_setup(struct net_device *dev)
 {
 	dev->type = ARPHRD_AX25;
+	dev->neigh_priv_len = sizeof(struct ax25_neigh_priv);
 	dev->hard_header_len = AX25_MAX_HEADER_LEN;
 	dev->mtu = 1500;
 	dev->addr_len = AX25_ADDR_LEN;
@@ -447,6 +448,7 @@ static const struct net_device_ops scc_netdev_ops = {
 	.ndo_start_xmit = scc_send_packet,
 	.ndo_do_ioctl = scc_ioctl,
 	.ndo_set_mac_address = scc_set_mac_address,
+	.ndo_neigh_construct = ax25_neigh_construct,
 };
 
 static int __init setup_adapter(int card_base, int type, int n)
diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c
index c67a27245072..435868a7b69c 100644
--- a/drivers/net/hamradio/hdlcdrv.c
+++ b/drivers/net/hamradio/hdlcdrv.c
@@ -626,6 +626,7 @@ static const struct net_device_ops hdlcdrv_netdev = {
 	.ndo_start_xmit = hdlcdrv_send_packet,
 	.ndo_do_ioctl	= hdlcdrv_ioctl,
 	.ndo_set_mac_address = hdlcdrv_set_mac_address,
+	.ndo_neigh_construct = ax25_neigh_construct,
 };
 
 /*
@@ -676,6 +677,7 @@ static void hdlcdrv_setup(struct net_device *dev)
 	dev->header_ops = &ax25_header_ops;
 	
 	dev->type = ARPHRD_AX25;           /* AF_AX25 device */
+	dev->neigh_priv_len = sizeof(struct ax25_neigh_priv);
 	dev->hard_header_len = AX25_MAX_HEADER_LEN + AX25_BPQ_HEADER_LEN;
 	dev->mtu = AX25_DEF_PACLEN;        /* eth_mtu is the default */
 	dev->addr_len = AX25_ADDR_LEN;     /* sizeof an ax.25 address */
diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
index e37c8d515ce8..c12ec2c2b594 100644
--- a/drivers/net/hamradio/mkiss.c
+++ b/drivers/net/hamradio/mkiss.c
@@ -641,6 +641,7 @@ static const struct net_device_ops ax_netdev_ops = {
 	.ndo_stop            = ax_close,
 	.ndo_start_xmit	     = ax_xmit,
 	.ndo_set_mac_address = ax_set_mac_address,
+	.ndo_neigh_construct = ax25_neigh_construct,
 };
 
 static void ax_setup(struct net_device *dev)
@@ -650,6 +651,7 @@ static void ax_setup(struct net_device *dev)
 	dev->hard_header_len = 0;
 	dev->addr_len        = 0;
 	dev->type            = ARPHRD_AX25;
+	dev->neigh_priv_len  = sizeof(struct ax25_neigh_priv);
 	dev->tx_queue_len    = 10;
 	dev->header_ops      = &ax25_header_ops;
 	dev->netdev_ops	     = &ax_netdev_ops;
diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c
index 57be9e0e98a6..b305f51eb420 100644
--- a/drivers/net/hamradio/scc.c
+++ b/drivers/net/hamradio/scc.c
@@ -1550,6 +1550,7 @@ static const struct net_device_ops scc_netdev_ops = {
 	.ndo_set_mac_address = scc_net_set_mac_address,
 	.ndo_get_stats       = scc_net_get_stats,
 	.ndo_do_ioctl        = scc_net_ioctl,
+	.ndo_neigh_construct = ax25_neigh_construct,
 };
 
 /* ----> Initialize device <----- */
@@ -1567,6 +1568,7 @@ static void scc_net_setup(struct net_device *dev)
 	dev->flags      = 0;
 
 	dev->type = ARPHRD_AX25;
+	dev->neigh_priv_len = sizeof(struct ax25_neigh_priv);
 	dev->hard_header_len = AX25_MAX_HEADER_LEN + AX25_BPQ_HEADER_LEN;
 	dev->mtu = AX25_DEF_PACLEN;
 	dev->addr_len = AX25_ADDR_LEN;
diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c
index 717433cfb81d..89d9da7a0c51 100644
--- a/drivers/net/hamradio/yam.c
+++ b/drivers/net/hamradio/yam.c
@@ -1100,6 +1100,7 @@ static const struct net_device_ops yam_netdev_ops = {
 	.ndo_start_xmit      = yam_send_packet,
 	.ndo_do_ioctl 	     = yam_ioctl,
 	.ndo_set_mac_address = yam_set_mac_address,
+	.ndo_neigh_construct = ax25_neigh_construct,
 };
 
 static void yam_setup(struct net_device *dev)
@@ -1128,6 +1129,7 @@ static void yam_setup(struct net_device *dev)
 	dev->header_ops = &ax25_header_ops;
 
 	dev->type = ARPHRD_AX25;
+	dev->neigh_priv_len = sizeof(struct ax25_neigh_priv);
 	dev->hard_header_len = AX25_MAX_HEADER_LEN;
 	dev->mtu = AX25_MTU;
 	dev->addr_len = AX25_ADDR_LEN;
diff --git a/include/net/ax25.h b/include/net/ax25.h
index 7385a64b61b8..45feeba7a325 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -12,6 +12,7 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/atomic.h>
+#include <net/neighbour.h>
 
 #define	AX25_T1CLAMPLO  		1
 #define	AX25_T1CLAMPHI 			(30 * HZ)
@@ -366,7 +367,11 @@ int ax25_kiss_rcv(struct sk_buff *, struct net_device *, struct packet_type *,
 		  struct net_device *);
 
 /* ax25_ip.c */
+int ax25_neigh_construct(struct neighbour *neigh);
 extern const struct header_ops ax25_header_ops;
+struct ax25_neigh_priv {
+	struct neigh_ops ops;
+};
 
 /* ax25_out.c */
 ax25_cb *ax25_send_frame(struct sk_buff *, int, ax25_address *, ax25_address *,
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index d93103ba8cec..bff12e0c9090 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -216,6 +216,22 @@ put:
 	return 1;
 }
 
+int ax25_neigh_construct(struct neighbour *neigh)
+{
+	/* This trouble could be saved if ax25 would right a proper
+	 * dev_queue_xmit function.
+	 */
+	struct ax25_neigh_priv *priv = neighbour_priv(neigh);
+
+	if (neigh->tbl->family != AF_INET)
+		return -EINVAL;
+
+	priv->ops = *neigh->ops;
+	priv->ops.output = neigh_compat_output;
+	priv->ops.connected_output = neigh_compat_output;
+	return 0;
+}
+
 #else	/* INET */
 
 static int ax25_hard_header(struct sk_buff *skb, struct net_device *dev,
@@ -230,6 +246,10 @@ static int ax25_rebuild_header(struct sk_buff *skb)
 	return 1;
 }
 
+int ax25_neigh_construct(struct neighbour *neigh)
+{
+	return 0;
+}
 #endif
 
 const struct header_ops ax25_header_ops = {
@@ -238,4 +258,5 @@ const struct header_ops ax25_header_ops = {
 };
 
 EXPORT_SYMBOL(ax25_header_ops);
+EXPORT_SYMBOL(ax25_neigh_construct);
 
-- 
cgit v1.2.3


From def6775369fab9d36817b9a6bc58cab67f53f1dc Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 2 Mar 2015 00:07:37 -0600
Subject: neigh: Move neigh_compat_output into ax25_ip.c

The only caller is now is ax25_neigh_construct so move
neigh_compat_output into ax25_ip.c make it static and rename it
ax25_neigh_output.

Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-hams@vger.kernel.org
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/neighbour.h |  1 -
 net/ax25/ax25_ip.c      | 18 ++++++++++++++++--
 net/core/neighbour.c    | 20 --------------------
 3 files changed, 16 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 76f708486aae..bc66babb5f27 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -268,7 +268,6 @@ void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev);
 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb);
 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb);
-int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb);
 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb);
 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
 						u8 *lladdr, void *saddr,
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index bff12e0c9090..cc7415b33cfb 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -216,6 +216,20 @@ put:
 	return 1;
 }
 
+static int ax25_neigh_output(struct neighbour *neigh, struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+
+	__skb_pull(skb, skb_network_offset(skb));
+
+	if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
+			    skb->len) < 0 &&
+	    dev_rebuild_header(skb))
+		return 0;
+
+	return dev_queue_xmit(skb);
+}
+
 int ax25_neigh_construct(struct neighbour *neigh)
 {
 	/* This trouble could be saved if ax25 would right a proper
@@ -227,8 +241,8 @@ int ax25_neigh_construct(struct neighbour *neigh)
 		return -EINVAL;
 
 	priv->ops = *neigh->ops;
-	priv->ops.output = neigh_compat_output;
-	priv->ops.connected_output = neigh_compat_output;
+	priv->ops.output = ax25_neigh_output;
+	priv->ops.connected_output = ax25_neigh_output;
 	return 0;
 }
 
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 70fe9e10ac86..8a319ff3e8d1 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1280,26 +1280,6 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
 	write_unlock_bh(&n->lock);
 }
 
-/* This function can be used in contexts, where only old dev_queue_xmit
- * worked, f.e. if you want to override normal output path (eql, shaper),
- * but resolution is not made yet.
- */
-
-int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
-{
-	struct net_device *dev = skb->dev;
-
-	__skb_pull(skb, skb_network_offset(skb));
-
-	if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
-			    skb->len) < 0 &&
-	    dev_rebuild_header(skb))
-		return 0;
-
-	return dev_queue_xmit(skb);
-}
-EXPORT_SYMBOL(neigh_compat_output);
-
 /* Slow and careful. */
 
 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
-- 
cgit v1.2.3


From d476059e77d1af48453a58f9de1e36f2eaff6450 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 2 Mar 2015 00:11:09 -0600
Subject: net: Kill dev_rebuild_header

Now that there are no more users kill dev_rebuild_header and all of it's
implementations.

This is long overdue.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/firewire/net.c                    | 13 --------
 drivers/isdn/i4l/isdn_net.c               | 33 -------------------
 drivers/media/dvb-core/dvb_net.c          |  1 -
 drivers/net/arcnet/arcnet.c               | 55 -------------------------------
 drivers/net/ipvlan/ipvlan_main.c          |  1 -
 drivers/net/macvlan.c                     |  1 -
 drivers/net/wireless/hostap/hostap_main.c |  1 -
 include/linux/etherdevice.h               |  1 -
 include/linux/netdevice.h                 | 12 +------
 net/802/fc.c                              | 21 ------------
 net/802/fddi.c                            | 26 ---------------
 net/802/hippi.c                           | 28 ----------------
 net/8021q/vlan_dev.c                      | 35 --------------------
 net/ethernet/eth.c                        | 34 -------------------
 net/netrom/nr_dev.c                       | 31 -----------------
 net/rose/rose_dev.c                       | 14 --------
 16 files changed, 1 insertion(+), 306 deletions(-)

(limited to 'include')

diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c
index 2c68da1ceeee..f4ea80d602f7 100644
--- a/drivers/firewire/net.c
+++ b/drivers/firewire/net.c
@@ -237,18 +237,6 @@ static int fwnet_header_create(struct sk_buff *skb, struct net_device *net,
 	return -net->hard_header_len;
 }
 
-static int fwnet_header_rebuild(struct sk_buff *skb)
-{
-	struct fwnet_header *h = (struct fwnet_header *)skb->data;
-
-	if (get_unaligned_be16(&h->h_proto) == ETH_P_IP)
-		return arp_find((unsigned char *)&h->h_dest, skb);
-
-	dev_notice(&skb->dev->dev, "unable to resolve type %04x addresses\n",
-		   be16_to_cpu(h->h_proto));
-	return 0;
-}
-
 static int fwnet_header_cache(const struct neighbour *neigh,
 			      struct hh_cache *hh, __be16 type)
 {
@@ -282,7 +270,6 @@ static int fwnet_header_parse(const struct sk_buff *skb, unsigned char *haddr)
 
 static const struct header_ops fwnet_header_ops = {
 	.create         = fwnet_header_create,
-	.rebuild        = fwnet_header_rebuild,
 	.cache		= fwnet_header_cache,
 	.cache_update	= fwnet_header_cache_update,
 	.parse          = fwnet_header_parse,
diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c
index 94affa5e6f28..546b7e81161d 100644
--- a/drivers/isdn/i4l/isdn_net.c
+++ b/drivers/isdn/i4l/isdn_net.c
@@ -1951,38 +1951,6 @@ static int isdn_net_header(struct sk_buff *skb, struct net_device *dev,
 	return len;
 }
 
-/* We don't need to send arp, because we have point-to-point connections. */
-static int
-isdn_net_rebuild_header(struct sk_buff *skb)
-{
-	struct net_device *dev = skb->dev;
-	isdn_net_local *lp = netdev_priv(dev);
-	int ret = 0;
-
-	if (lp->p_encap == ISDN_NET_ENCAP_ETHER) {
-		struct ethhdr *eth = (struct ethhdr *) skb->data;
-
-		/*
-		 *      Only ARP/IP is currently supported
-		 */
-
-		if (eth->h_proto != htons(ETH_P_IP)) {
-			printk(KERN_WARNING
-			       "isdn_net: %s don't know how to resolve type %d addresses?\n",
-			       dev->name, (int) eth->h_proto);
-			memcpy(eth->h_source, dev->dev_addr, dev->addr_len);
-			return 0;
-		}
-		/*
-		 *      Try to get ARP to resolve the header.
-		 */
-#ifdef CONFIG_INET
-		ret = arp_find(eth->h_dest, skb);
-#endif
-	}
-	return ret;
-}
-
 static int isdn_header_cache(const struct neighbour *neigh, struct hh_cache *hh,
 			     __be16 type)
 {
@@ -2005,7 +1973,6 @@ static void isdn_header_cache_update(struct hh_cache *hh,
 
 static const struct header_ops isdn_header_ops = {
 	.create = isdn_net_header,
-	.rebuild = isdn_net_rebuild_header,
 	.cache = isdn_header_cache,
 	.cache_update = isdn_header_cache_update,
 };
diff --git a/drivers/media/dvb-core/dvb_net.c b/drivers/media/dvb-core/dvb_net.c
index 686d3277dad1..4a77cb02dffc 100644
--- a/drivers/media/dvb-core/dvb_net.c
+++ b/drivers/media/dvb-core/dvb_net.c
@@ -1190,7 +1190,6 @@ static int dvb_net_stop(struct net_device *dev)
 static const struct header_ops dvb_header_ops = {
 	.create		= eth_header,
 	.parse		= eth_header_parse,
-	.rebuild	= eth_rebuild_header,
 };
 
 
diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c
index 09de683c167e..10f71c732b59 100644
--- a/drivers/net/arcnet/arcnet.c
+++ b/drivers/net/arcnet/arcnet.c
@@ -104,7 +104,6 @@ EXPORT_SYMBOL(arcnet_timeout);
 static int arcnet_header(struct sk_buff *skb, struct net_device *dev,
 			 unsigned short type, const void *daddr,
 			 const void *saddr, unsigned len);
-static int arcnet_rebuild_header(struct sk_buff *skb);
 static int go_tx(struct net_device *dev);
 
 static int debug = ARCNET_DEBUG;
@@ -312,7 +311,6 @@ static int choose_mtu(void)
 
 static const struct header_ops arcnet_header_ops = {
 	.create = arcnet_header,
-	.rebuild = arcnet_rebuild_header,
 };
 
 static const struct net_device_ops arcnet_netdev_ops = {
@@ -538,59 +536,6 @@ static int arcnet_header(struct sk_buff *skb, struct net_device *dev,
 	return proto->build_header(skb, dev, type, _daddr);
 }
 
-
-/* 
- * Rebuild the ARCnet hard header. This is called after an ARP (or in the
- * future other address resolution) has completed on this sk_buff. We now
- * let ARP fill in the destination field.
- */
-static int arcnet_rebuild_header(struct sk_buff *skb)
-{
-	struct net_device *dev = skb->dev;
-	struct arcnet_local *lp = netdev_priv(dev);
-	int status = 0;		/* default is failure */
-	unsigned short type;
-	uint8_t daddr=0;
-	struct ArcProto *proto;
-	/*
-	 * XXX: Why not use skb->mac_len?
-	 */
-	if (skb->network_header - skb->mac_header != 2) {
-		BUGMSG(D_NORMAL,
-		       "rebuild_header: shouldn't be here! (hdrsize=%d)\n",
-		       (int)(skb->network_header - skb->mac_header));
-		return 0;
-	}
-	type = *(uint16_t *) skb_pull(skb, 2);
-	BUGMSG(D_DURING, "rebuild header for protocol %Xh\n", type);
-
-	if (type == ETH_P_IP) {
-#ifdef CONFIG_INET
-		BUGMSG(D_DURING, "rebuild header for ethernet protocol %Xh\n", type);
-		status = arp_find(&daddr, skb) ? 1 : 0;
-		BUGMSG(D_DURING, " rebuilt: dest is %d; protocol %Xh\n",
-		       daddr, type);
-#endif
-	} else {
-		BUGMSG(D_NORMAL,
-		       "I don't understand ethernet protocol %Xh addresses!\n", type);
-		dev->stats.tx_errors++;
-		dev->stats.tx_aborted_errors++;
-	}
-
-	/* if we couldn't resolve the address... give up. */
-	if (!status)
-		return 0;
-
-	/* add the _real_ header this time! */
-	proto = arc_proto_map[lp->default_proto[daddr]];
-	proto->build_header(skb, dev, type, daddr);
-
-	return 1;		/* success */
-}
-
-
-
 /* Called by the kernel in order to transmit a packet. */
 netdev_tx_t arcnet_send_packet(struct sk_buff *skb,
 				     struct net_device *dev)
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 4f4099d5603d..2950c3780230 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -336,7 +336,6 @@ static int ipvlan_hard_header(struct sk_buff *skb, struct net_device *dev,
 
 static const struct header_ops ipvlan_header_ops = {
 	.create  	= ipvlan_hard_header,
-	.rebuild	= eth_rebuild_header,
 	.parse		= eth_header_parse,
 	.cache		= eth_header_cache,
 	.cache_update	= eth_header_cache_update,
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 1df38bdae2ee..b5e3320ca506 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -550,7 +550,6 @@ static int macvlan_hard_header(struct sk_buff *skb, struct net_device *dev,
 
 static const struct header_ops macvlan_hard_header_ops = {
 	.create  	= macvlan_hard_header,
-	.rebuild	= eth_rebuild_header,
 	.parse		= eth_header_parse,
 	.cache		= eth_header_cache,
 	.cache_update	= eth_header_cache_update,
diff --git a/drivers/net/wireless/hostap/hostap_main.c b/drivers/net/wireless/hostap/hostap_main.c
index 52919ad42726..8f9f3e9fbfce 100644
--- a/drivers/net/wireless/hostap/hostap_main.c
+++ b/drivers/net/wireless/hostap/hostap_main.c
@@ -798,7 +798,6 @@ static void prism2_tx_timeout(struct net_device *dev)
 
 const struct header_ops hostap_80211_ops = {
 	.create		= eth_header,
-	.rebuild	= eth_rebuild_header,
 	.cache		= eth_header_cache,
 	.cache_update	= eth_header_cache_update,
 	.parse		= hostap_80211_header_parse,
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 1d869d185a0d..606563ef8a72 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -35,7 +35,6 @@ extern const struct header_ops eth_header_ops;
 
 int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
 	       const void *daddr, const void *saddr, unsigned len);
-int eth_rebuild_header(struct sk_buff *skb);
 int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr);
 int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh,
 		     __be16 type);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5897b4ea5a3f..2007f3b44d05 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -261,7 +261,6 @@ struct header_ops {
 			   unsigned short type, const void *daddr,
 			   const void *saddr, unsigned int len);
 	int	(*parse)(const struct sk_buff *skb, unsigned char *haddr);
-	int	(*rebuild)(struct sk_buff *skb);
 	int	(*cache)(const struct neighbour *neigh, struct hh_cache *hh, __be16 type);
 	void	(*cache_update)(struct hh_cache *hh,
 				const struct net_device *dev,
@@ -1346,7 +1345,7 @@ enum netdev_priv_flags {
  *			if one wants to override the ndo_*() functions
  *	@ethtool_ops:	Management operations
  *	@fwd_ops:	Management operations
- *	@header_ops:	Includes callbacks for creating,parsing,rebuilding,etc
+ *	@header_ops:	Includes callbacks for creating,parsing,caching,etc
  *			of Layer 2 headers.
  *
  *	@flags:		Interface flags (a la BSD)
@@ -2399,15 +2398,6 @@ static inline int dev_parse_header(const struct sk_buff *skb,
 	return dev->header_ops->parse(skb, haddr);
 }
 
-static inline int dev_rebuild_header(struct sk_buff *skb)
-{
-	const struct net_device *dev = skb->dev;
-
-	if (!dev->header_ops || !dev->header_ops->rebuild)
-		return 0;
-	return dev->header_ops->rebuild(skb);
-}
-
 typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, int len);
 int register_gifconf(unsigned int family, gifconf_func_t *gifconf);
 static inline int unregister_gifconf(unsigned int family)
diff --git a/net/802/fc.c b/net/802/fc.c
index 7c174b6750cd..7b9219022418 100644
--- a/net/802/fc.c
+++ b/net/802/fc.c
@@ -75,29 +75,8 @@ static int fc_header(struct sk_buff *skb, struct net_device *dev,
 	return -hdr_len;
 }
 
-/*
- *	A neighbour discovery of some species (eg arp) has completed. We
- *	can now send the packet.
- */
-
-static int fc_rebuild_header(struct sk_buff *skb)
-{
-#ifdef CONFIG_INET
-	struct fch_hdr *fch=(struct fch_hdr *)skb->data;
-	struct fcllc *fcllc=(struct fcllc *)(skb->data+sizeof(struct fch_hdr));
-	if(fcllc->ethertype != htons(ETH_P_IP)) {
-		printk("fc_rebuild_header: Don't know how to resolve type %04X addresses ?\n", ntohs(fcllc->ethertype));
-		return 0;
-	}
-	return arp_find(fch->daddr, skb);
-#else
-	return 0;
-#endif
-}
-
 static const struct header_ops fc_header_ops = {
 	.create	 = fc_header,
-	.rebuild = fc_rebuild_header,
 };
 
 static void fc_setup(struct net_device *dev)
diff --git a/net/802/fddi.c b/net/802/fddi.c
index 59e7346f1193..7d3a0af954e8 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -87,31 +87,6 @@ static int fddi_header(struct sk_buff *skb, struct net_device *dev,
 	return -hl;
 }
 
-
-/*
- * Rebuild the FDDI MAC header. This is called after an ARP
- * (or in future other address resolution) has completed on
- * this sk_buff.  We now let ARP fill in the other fields.
- */
-
-static int fddi_rebuild_header(struct sk_buff	*skb)
-{
-	struct fddihdr *fddi = (struct fddihdr *)skb->data;
-
-#ifdef CONFIG_INET
-	if (fddi->hdr.llc_snap.ethertype == htons(ETH_P_IP))
-		/* Try to get ARP to resolve the header and fill destination address */
-		return arp_find(fddi->daddr, skb);
-	else
-#endif
-	{
-		printk("%s: Don't know how to resolve type %04X addresses.\n",
-		       skb->dev->name, ntohs(fddi->hdr.llc_snap.ethertype));
-		return 0;
-	}
-}
-
-
 /*
  * Determine the packet's protocol ID and fill in skb fields.
  * This routine is called before an incoming packet is passed
@@ -177,7 +152,6 @@ EXPORT_SYMBOL(fddi_change_mtu);
 
 static const struct header_ops fddi_header_ops = {
 	.create		= fddi_header,
-	.rebuild	= fddi_rebuild_header,
 };
 
 
diff --git a/net/802/hippi.c b/net/802/hippi.c
index 2e03f8259dd5..ade1a52cdcff 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -90,33 +90,6 @@ static int hippi_header(struct sk_buff *skb, struct net_device *dev,
 }
 
 
-/*
- * Rebuild the HIPPI MAC header. This is called after an ARP has
- * completed on this sk_buff. We now let ARP fill in the other fields.
- */
-
-static int hippi_rebuild_header(struct sk_buff *skb)
-{
-	struct hippi_hdr *hip = (struct hippi_hdr *)skb->data;
-
-	/*
-	 * Only IP is currently supported
-	 */
-
-	if(hip->snap.ethertype != htons(ETH_P_IP))
-	{
-		printk(KERN_DEBUG "%s: unable to resolve type %X addresses.\n",skb->dev->name,ntohs(hip->snap.ethertype));
-		return 0;
-	}
-
-	/*
-	 * We don't support dynamic ARP on HIPPI, but we use the ARP
-	 * static ARP tables to hold the I-FIELDs.
-	 */
-	return arp_find(hip->le.daddr, skb);
-}
-
-
 /*
  *	Determine the packet's protocol ID.
  */
@@ -186,7 +159,6 @@ EXPORT_SYMBOL(hippi_neigh_setup_dev);
 
 static const struct header_ops hippi_header_ops = {
 	.create		= hippi_header,
-	.rebuild	= hippi_rebuild_header,
 };
 
 
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 118956448cf6..1dcfec8b49f3 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -36,39 +36,6 @@
 #include <linux/if_vlan.h>
 #include <linux/netpoll.h>
 
-/*
- *	Rebuild the Ethernet MAC header. This is called after an ARP
- *	(or in future other address resolution) has completed on this
- *	sk_buff. We now let ARP fill in the other fields.
- *
- *	This routine CANNOT use cached dst->neigh!
- *	Really, it is used only when dst->neigh is wrong.
- *
- * TODO:  This needs a checkup, I'm ignorant here. --BLG
- */
-static int vlan_dev_rebuild_header(struct sk_buff *skb)
-{
-	struct net_device *dev = skb->dev;
-	struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
-
-	switch (veth->h_vlan_encapsulated_proto) {
-#ifdef CONFIG_INET
-	case htons(ETH_P_IP):
-
-		/* TODO:  Confirm this will work with VLAN headers... */
-		return arp_find(veth->h_dest, skb);
-#endif
-	default:
-		pr_debug("%s: unable to resolve type %X addresses\n",
-			 dev->name, ntohs(veth->h_vlan_encapsulated_proto));
-
-		ether_addr_copy(veth->h_source, dev->dev_addr);
-		break;
-	}
-
-	return 0;
-}
-
 /*
  *	Create the VLAN header for an arbitrary protocol layer
  *
@@ -534,7 +501,6 @@ static int vlan_dev_get_lock_subclass(struct net_device *dev)
 
 static const struct header_ops vlan_header_ops = {
 	.create	 = vlan_dev_hard_header,
-	.rebuild = vlan_dev_rebuild_header,
 	.parse	 = eth_header_parse,
 };
 
@@ -554,7 +520,6 @@ static int vlan_passthru_hard_header(struct sk_buff *skb, struct net_device *dev
 
 static const struct header_ops vlan_passthru_header_ops = {
 	.create	 = vlan_passthru_hard_header,
-	.rebuild = dev_rebuild_header,
 	.parse	 = eth_header_parse,
 };
 
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 238f38d21641..8dbdf6c910b7 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -112,39 +112,6 @@ int eth_header(struct sk_buff *skb, struct net_device *dev,
 }
 EXPORT_SYMBOL(eth_header);
 
-/**
- * eth_rebuild_header- rebuild the Ethernet MAC header.
- * @skb: socket buffer to update
- *
- * This is called after an ARP or IPV6 ndisc it's resolution on this
- * sk_buff. We now let protocol (ARP) fill in the other fields.
- *
- * This routine CANNOT use cached dst->neigh!
- * Really, it is used only when dst->neigh is wrong.
- */
-int eth_rebuild_header(struct sk_buff *skb)
-{
-	struct ethhdr *eth = (struct ethhdr *)skb->data;
-	struct net_device *dev = skb->dev;
-
-	switch (eth->h_proto) {
-#ifdef CONFIG_INET
-	case htons(ETH_P_IP):
-		return arp_find(eth->h_dest, skb);
-#endif
-	default:
-		netdev_dbg(dev,
-		       "%s: unable to resolve type %X addresses.\n",
-		       dev->name, ntohs(eth->h_proto));
-
-		memcpy(eth->h_source, dev->dev_addr, ETH_ALEN);
-		break;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(eth_rebuild_header);
-
 /**
  * eth_get_headlen - determine the the length of header for an ethernet frame
  * @data: pointer to start of frame
@@ -369,7 +336,6 @@ EXPORT_SYMBOL(eth_validate_addr);
 const struct header_ops eth_header_ops ____cacheline_aligned = {
 	.create		= eth_header,
 	.parse		= eth_header_parse,
-	.rebuild	= eth_rebuild_header,
 	.cache		= eth_header_cache,
 	.cache_update	= eth_header_cache_update,
 };
diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c
index 6ae063cebf7d..988f542481a8 100644
--- a/net/netrom/nr_dev.c
+++ b/net/netrom/nr_dev.c
@@ -65,36 +65,6 @@ int nr_rx_ip(struct sk_buff *skb, struct net_device *dev)
 	return 1;
 }
 
-#ifdef CONFIG_INET
-
-static int nr_rebuild_header(struct sk_buff *skb)
-{
-	unsigned char *bp = skb->data;
-
-	if (arp_find(bp + 7, skb))
-		return 1;
-
-	bp[6] &= ~AX25_CBIT;
-	bp[6] &= ~AX25_EBIT;
-	bp[6] |= AX25_SSSID_SPARE;
-	bp    += AX25_ADDR_LEN;
-
-	bp[6] &= ~AX25_CBIT;
-	bp[6] |= AX25_EBIT;
-	bp[6] |= AX25_SSSID_SPARE;
-
-	return 0;
-}
-
-#else
-
-static int nr_rebuild_header(struct sk_buff *skb)
-{
-	return 1;
-}
-
-#endif
-
 static int nr_header(struct sk_buff *skb, struct net_device *dev,
 		     unsigned short type,
 		     const void *daddr, const void *saddr, unsigned int len)
@@ -188,7 +158,6 @@ static netdev_tx_t nr_xmit(struct sk_buff *skb, struct net_device *dev)
 
 static const struct header_ops nr_header_ops = {
 	.create	= nr_header,
-	.rebuild= nr_rebuild_header,
 };
 
 static const struct net_device_ops nr_netdev_ops = {
diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c
index 90209c1fa49b..369ca81a8c5d 100644
--- a/net/rose/rose_dev.c
+++ b/net/rose/rose_dev.c
@@ -56,19 +56,6 @@ static int rose_header(struct sk_buff *skb, struct net_device *dev,
 	return -37;
 }
 
-static int rose_rebuild_header(struct sk_buff *skb)
-{
-#ifdef CONFIG_INET
-	unsigned char *bp = (unsigned char *)skb->data;
-
-	if (arp_find(bp + 7, skb)) {
-		return 1;
-	}
-
-#endif
-	return 0;
-}
-
 static int rose_set_mac_address(struct net_device *dev, void *addr)
 {
 	struct sockaddr *sa = addr;
@@ -133,7 +120,6 @@ static netdev_tx_t rose_xmit(struct sk_buff *skb, struct net_device *dev)
 
 static const struct header_ops rose_header_ops = {
 	.create	= rose_header,
-	.rebuild = rose_rebuild_header,
 };
 
 static const struct net_device_ops rose_netdev_ops = {
-- 
cgit v1.2.3


From 59b2af26b9f674749216fd3bfba19d842de3671c Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 2 Mar 2015 00:12:05 -0600
Subject: arp: Kill arp_find

There are no more callers so kill this function.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/arp.h |  1 -
 net/ipv4/arp.c    | 65 -------------------------------------------------------
 2 files changed, 66 deletions(-)

(limited to 'include')

diff --git a/include/net/arp.h b/include/net/arp.h
index 73c49864076b..21ee1860abbc 100644
--- a/include/net/arp.h
+++ b/include/net/arp.h
@@ -47,7 +47,6 @@ static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32
 }
 
 void arp_init(void);
-int arp_find(unsigned char *haddr, struct sk_buff *skb);
 int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg);
 void arp_send(int type, int ptype, __be32 dest_ip,
 	      struct net_device *dev, __be32 src_ip,
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 2557cf9a4648..bca5b9d9b442 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -396,71 +396,6 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
 	return flag;
 }
 
-/* OBSOLETE FUNCTIONS */
-
-/*
- *	Find an arp mapping in the cache. If not found, post a request.
- *
- *	It is very UGLY routine: it DOES NOT use skb->dst->neighbour,
- *	even if it exists. It is supposed that skb->dev was mangled
- *	by a virtual device (eql, shaper). Nobody but broken devices
- *	is allowed to use this function, it is scheduled to be removed. --ANK
- */
-
-static int arp_set_predefined(int addr_hint, unsigned char *haddr,
-			      __be32 paddr, struct net_device *dev)
-{
-	switch (addr_hint) {
-	case RTN_LOCAL:
-		pr_debug("arp called for own IP address\n");
-		memcpy(haddr, dev->dev_addr, dev->addr_len);
-		return 1;
-	case RTN_MULTICAST:
-		arp_mc_map(paddr, haddr, dev, 1);
-		return 1;
-	case RTN_BROADCAST:
-		memcpy(haddr, dev->broadcast, dev->addr_len);
-		return 1;
-	}
-	return 0;
-}
-
-
-int arp_find(unsigned char *haddr, struct sk_buff *skb)
-{
-	struct net_device *dev = skb->dev;
-	__be32 paddr;
-	struct neighbour *n;
-
-	if (!skb_dst(skb)) {
-		pr_debug("arp_find is called with dst==NULL\n");
-		kfree_skb(skb);
-		return 1;
-	}
-
-	paddr = rt_nexthop(skb_rtable(skb), ip_hdr(skb)->daddr);
-	if (arp_set_predefined(inet_addr_type(dev_net(dev), paddr), haddr,
-			       paddr, dev))
-		return 0;
-
-	n = __neigh_lookup(&arp_tbl, &paddr, dev, 1);
-
-	if (n) {
-		n->used = jiffies;
-		if (n->nud_state & NUD_VALID || neigh_event_send(n, skb) == 0) {
-			neigh_ha_snapshot(haddr, n, dev);
-			neigh_release(n);
-			return 0;
-		}
-		neigh_release(n);
-	} else
-		kfree_skb(skb);
-	return 1;
-}
-EXPORT_SYMBOL(arp_find);
-
-/* END OF OBSOLETE FUNCTIONS */
-
 /*
  * Check if we can use proxy ARP for this path
  */
-- 
cgit v1.2.3


From bdf53c58490bb52e17636eca8ad18d2c38ec3cb8 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 2 Mar 2015 00:13:22 -0600
Subject: neigh: Don't require dst in neigh_hh_init

- Add protocol to neigh_tbl so that dst->ops->protocol is not needed
- Acquire the device from neigh->dev

This results in a neigh_hh_init that will cache the samve values
regardless of the packets flowing through it.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/neighbour.h | 1 +
 net/core/neighbour.c    | 8 ++++----
 net/decnet/dn_neigh.c   | 1 +
 net/ipv4/arp.c          | 1 +
 net/ipv6/ndisc.c        | 1 +
 5 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index bc66babb5f27..9f912e4d4232 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -193,6 +193,7 @@ struct neigh_table {
 	int			family;
 	int			entry_size;
 	int			key_len;
+	__be16			protocol;
 	__u32			(*hash)(const void *pkey,
 					const struct net_device *dev,
 					__u32 *hash_rnd);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 8a319ff3e8d1..af72b863e968 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1263,10 +1263,10 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl,
 EXPORT_SYMBOL(neigh_event_ns);
 
 /* called with read_lock_bh(&n->lock); */
-static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
+static void neigh_hh_init(struct neighbour *n)
 {
-	struct net_device *dev = dst->dev;
-	__be16 prot = dst->ops->protocol;
+	struct net_device *dev = n->dev;
+	__be16 prot = n->tbl->protocol;
 	struct hh_cache	*hh = &n->hh;
 
 	write_lock_bh(&n->lock);
@@ -1296,7 +1296,7 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
 		unsigned int seq;
 
 		if (dev->header_ops->cache && !neigh->hh.hh_len)
-			neigh_hh_init(neigh, dst);
+			neigh_hh_init(neigh);
 
 		do {
 			__skb_pull(skb, skb_network_offset(skb));
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 7ca7c3143da3..f123c6c6748c 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -97,6 +97,7 @@ struct neigh_table dn_neigh_table = {
 	.family =			PF_DECnet,
 	.entry_size =			NEIGH_ENTRY_SIZE(sizeof(struct dn_neigh)),
 	.key_len =			sizeof(__le16),
+	.protocol =			cpu_to_be16(ETH_P_DNA_RT),
 	.hash =				dn_neigh_hash,
 	.constructor =			dn_neigh_construct,
 	.id =				"dn_neigh_cache",
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index bca5b9d9b442..6b8aad6a0d7d 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -152,6 +152,7 @@ static const struct neigh_ops arp_direct_ops = {
 struct neigh_table arp_tbl = {
 	.family		= AF_INET,
 	.key_len	= 4,
+	.protocol	= cpu_to_be16(ETH_P_IP),
 	.hash		= arp_hash,
 	.constructor	= arp_constructor,
 	.proxy_redo	= parp_redo,
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 471ed24aabae..e363bbc2420d 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -117,6 +117,7 @@ static const struct neigh_ops ndisc_direct_ops = {
 struct neigh_table nd_tbl = {
 	.family =	AF_INET6,
 	.key_len =	sizeof(struct in6_addr),
+	.protocol =	cpu_to_be16(ETH_P_IPV6),
 	.hash =		ndisc_hash,
 	.constructor =	ndisc_constructor,
 	.pconstructor =	pndisc_constructor,
-- 
cgit v1.2.3


From ee586bbc28fb7128133457cf711880d13a3b7ce4 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 16 Feb 2015 18:54:04 +0100
Subject: netfilter: reject: don't send icmp error if csum is invalid

tcp resets are never emitted if the packet that triggers the
reject/reset has an invalid checksum.

For icmp error responses there was no such check.
It allows to distinguish icmp response generated via

iptables -I INPUT -p udp --dport 42 -j REJECT

and those emitted by network stack (won't respond if csum is invalid,
REJECT does).

Arguably its possible to avoid this by using conntrack and only
using REJECT with -m conntrack NEW/RELATED.

However, this doesn't work when connection tracking is not in use
or when using nf_conntrack_checksum=0.

Furthermore, sending errors in response to invalid csums doesn't make
much sense so just add similar test as in nf_send_reset.

Validate csum if needed and only send the response if it is ok.

Reference: http://bugzilla.redhat.com/show_bug.cgi?id=1169829
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/ipv4/nf_reject.h |  6 +-----
 include/net/netfilter/ipv6/nf_reject.h | 11 ++---------
 net/ipv4/netfilter/ipt_REJECT.c        | 17 +++++++++--------
 net/ipv4/netfilter/nf_reject_ipv4.c    | 23 ++++++++++++++++++++++
 net/ipv4/netfilter/nft_reject_ipv4.c   |  3 ++-
 net/ipv6/netfilter/nf_reject_ipv6.c    | 35 ++++++++++++++++++++++++++++++++++
 net/netfilter/nft_reject_inet.c        |  6 ++++--
 7 files changed, 76 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h
index 03e928a55229..864127573c32 100644
--- a/include/net/netfilter/ipv4/nf_reject.h
+++ b/include/net/netfilter/ipv4/nf_reject.h
@@ -5,11 +5,7 @@
 #include <net/ip.h>
 #include <net/icmp.h>
 
-static inline void nf_send_unreach(struct sk_buff *skb_in, int code)
-{
-	icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
-}
-
+void nf_send_unreach(struct sk_buff *skb_in, int code, int hook);
 void nf_send_reset(struct sk_buff *oldskb, int hook);
 
 const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb,
diff --git a/include/net/netfilter/ipv6/nf_reject.h b/include/net/netfilter/ipv6/nf_reject.h
index 23216d48abf9..0ae445d3f217 100644
--- a/include/net/netfilter/ipv6/nf_reject.h
+++ b/include/net/netfilter/ipv6/nf_reject.h
@@ -3,15 +3,8 @@
 
 #include <linux/icmpv6.h>
 
-static inline void
-nf_send_unreach6(struct net *net, struct sk_buff *skb_in, unsigned char code,
-	     unsigned int hooknum)
-{
-	if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL)
-		skb_in->dev = net->loopback_dev;
-
-	icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0);
-}
+void nf_send_unreach6(struct net *net, struct sk_buff *skb_in, unsigned char code,
+		      unsigned int hooknum);
 
 void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook);
 
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 8f48f5517e33..87907d4bd259 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -34,31 +34,32 @@ static unsigned int
 reject_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ipt_reject_info *reject = par->targinfo;
+	int hook = par->hooknum;
 
 	switch (reject->with) {
 	case IPT_ICMP_NET_UNREACHABLE:
-		nf_send_unreach(skb, ICMP_NET_UNREACH);
+		nf_send_unreach(skb, ICMP_NET_UNREACH, hook);
 		break;
 	case IPT_ICMP_HOST_UNREACHABLE:
-		nf_send_unreach(skb, ICMP_HOST_UNREACH);
+		nf_send_unreach(skb, ICMP_HOST_UNREACH, hook);
 		break;
 	case IPT_ICMP_PROT_UNREACHABLE:
-		nf_send_unreach(skb, ICMP_PROT_UNREACH);
+		nf_send_unreach(skb, ICMP_PROT_UNREACH, hook);
 		break;
 	case IPT_ICMP_PORT_UNREACHABLE:
-		nf_send_unreach(skb, ICMP_PORT_UNREACH);
+		nf_send_unreach(skb, ICMP_PORT_UNREACH, hook);
 		break;
 	case IPT_ICMP_NET_PROHIBITED:
-		nf_send_unreach(skb, ICMP_NET_ANO);
+		nf_send_unreach(skb, ICMP_NET_ANO, hook);
 		break;
 	case IPT_ICMP_HOST_PROHIBITED:
-		nf_send_unreach(skb, ICMP_HOST_ANO);
+		nf_send_unreach(skb, ICMP_HOST_ANO, hook);
 		break;
 	case IPT_ICMP_ADMIN_PROHIBITED:
-		nf_send_unreach(skb, ICMP_PKT_FILTERED);
+		nf_send_unreach(skb, ICMP_PKT_FILTERED, hook);
 		break;
 	case IPT_TCP_RESET:
-		nf_send_reset(skb, par->hooknum);
+		nf_send_reset(skb, hook);
 	case IPT_ICMP_ECHOREPLY:
 		/* Doesn't happen. */
 		break;
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index 536da7bc598a..b7405eb7f1ef 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -164,4 +164,27 @@ void nf_send_reset(struct sk_buff *oldskb, int hook)
 }
 EXPORT_SYMBOL_GPL(nf_send_reset);
 
+void nf_send_unreach(struct sk_buff *skb_in, int code, int hook)
+{
+	struct iphdr *iph = ip_hdr(skb_in);
+	u8 proto;
+
+	if (skb_in->csum_bad || iph->frag_off & htons(IP_OFFSET))
+		return;
+
+	if (skb_csum_unnecessary(skb_in)) {
+		icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
+		return;
+	}
+
+	if (iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP)
+		proto = iph->protocol;
+	else
+		proto = 0;
+
+	if (nf_ip_checksum(skb_in, hook, ip_hdrlen(skb_in), proto) == 0)
+		icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
+}
+EXPORT_SYMBOL_GPL(nf_send_unreach);
+
 MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
index d729542bd1b7..16a5d4d73d75 100644
--- a/net/ipv4/netfilter/nft_reject_ipv4.c
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -27,7 +27,8 @@ static void nft_reject_ipv4_eval(const struct nft_expr *expr,
 
 	switch (priv->type) {
 	case NFT_REJECT_ICMP_UNREACH:
-		nf_send_unreach(pkt->skb, priv->icmp_code);
+		nf_send_unreach(pkt->skb, priv->icmp_code,
+				pkt->ops->hooknum);
 		break;
 	case NFT_REJECT_TCP_RST:
 		nf_send_reset(pkt->skb, pkt->ops->hooknum);
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index d05b36440e8b..68e0bb4db1bf 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -208,4 +208,39 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
 }
 EXPORT_SYMBOL_GPL(nf_send_reset6);
 
+static bool reject6_csum_ok(struct sk_buff *skb, int hook)
+{
+	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	int thoff;
+	__be16 fo;
+	u8 proto;
+
+	if (skb->csum_bad)
+		return false;
+
+	if (skb_csum_unnecessary(skb))
+		return true;
+
+	proto = ip6h->nexthdr;
+	thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo);
+
+	if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0)
+		return false;
+
+	return nf_ip6_checksum(skb, hook, thoff, proto) == 0;
+}
+
+void nf_send_unreach6(struct net *net, struct sk_buff *skb_in,
+		      unsigned char code, unsigned int hooknum)
+{
+	if (!reject6_csum_ok(skb_in, hooknum))
+		return;
+
+	if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL)
+		skb_in->dev = net->loopback_dev;
+
+	icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0);
+}
+EXPORT_SYMBOL_GPL(nf_send_unreach6);
+
 MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index 7b5f9d58680a..92877114aff4 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c
@@ -28,14 +28,16 @@ static void nft_reject_inet_eval(const struct nft_expr *expr,
 	case NFPROTO_IPV4:
 		switch (priv->type) {
 		case NFT_REJECT_ICMP_UNREACH:
-			nf_send_unreach(pkt->skb, priv->icmp_code);
+			nf_send_unreach(pkt->skb, priv->icmp_code,
+					pkt->ops->hooknum);
 			break;
 		case NFT_REJECT_TCP_RST:
 			nf_send_reset(pkt->skb, pkt->ops->hooknum);
 			break;
 		case NFT_REJECT_ICMPX_UNREACH:
 			nf_send_unreach(pkt->skb,
-					nft_reject_icmp_code(priv->icmp_code));
+					nft_reject_icmp_code(priv->icmp_code),
+					pkt->ops->hooknum);
 			break;
 		}
 		break;
-- 
cgit v1.2.3


From 6eb18137643fee5f182d85c818062b4feddfb76b Mon Sep 17 00:00:00 2001
From: Dedy Lansky <dlansky@codeaurora.org>
Date: Sun, 8 Feb 2015 15:52:03 +0200
Subject: cfg80211: add bss_type and privacy arguments in cfg80211_get_bss()

802.11ad adds new a network type (PBSS) and changes the capability
field interpretation for the DMG (60G) band.
The same 2 bits that were interpreted as "ESS" and "IBSS" before are
re-used as a 2-bit field with 3 valid values (and 1 reserved). Valid
values are: "IBSS", "PBSS" (new) and "AP".

In order to get the BSS struct for the new PBSS networks, change the
cfg80211_get_bss() function to take a new enum ieee80211_bss_type
argument with the valid network types, as "capa_mask" and "capa_val"
no longer work correctly (the search must be band-aware now.)

The remaining bits in "capa_mask" and "capa_val" are used only for
privacy matching so replace those two with a privacy enum as well.

Signed-off-by: Dedy Lansky <dlansky@codeaurora.org>
[rewrite commit log, tiny fixes]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath10k/mac.c       |  3 +-
 drivers/net/wireless/ath/ath6kl/cfg80211.c  |  9 +--
 drivers/net/wireless/ath/wil6210/cfg80211.c |  2 +-
 drivers/net/wireless/cw1200/sta.c           |  4 +-
 drivers/net/wireless/libertas/cfg.c         |  6 +-
 drivers/net/wireless/mwifiex/cfg80211.c     |  8 +--
 include/net/cfg80211.h                      | 39 ++++++++++++-
 net/mac80211/ibss.c                         | 43 ++++-----------
 net/wireless/ibss.c                         |  2 +-
 net/wireless/mlme.c                         |  6 +-
 net/wireless/scan.c                         | 86 ++++++++++++++++++++++++++---
 net/wireless/sme.c                          | 16 ++----
 net/wireless/trace.h                        | 24 ++++----
 13 files changed, 168 insertions(+), 80 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index d6d2f0f00caa..d372ebfd933d 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -1386,7 +1386,8 @@ static void ath10k_peer_assoc_h_crypto(struct ath10k *ar,
 	lockdep_assert_held(&ar->conf_mutex);
 
 	bss = cfg80211_get_bss(ar->hw->wiphy, ar->hw->conf.chandef.chan,
-			       info->bssid, NULL, 0, 0, 0);
+			       info->bssid, NULL, 0, IEEE80211_BSS_TYPE_ANY,
+			       IEEE80211_PRIVACY_ANY);
 	if (bss) {
 		const struct cfg80211_bss_ies *ies;
 
diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c
index 85da63a67faf..ff7ba5c195c6 100644
--- a/drivers/net/wireless/ath/ath6kl/cfg80211.c
+++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c
@@ -686,20 +686,21 @@ ath6kl_add_bss_if_needed(struct ath6kl_vif *vif,
 {
 	struct ath6kl *ar = vif->ar;
 	struct cfg80211_bss *bss;
-	u16 cap_mask, cap_val;
+	u16 cap_val;
+	enum ieee80211_bss_type bss_type;
 	u8 *ie;
 
 	if (nw_type & ADHOC_NETWORK) {
-		cap_mask = WLAN_CAPABILITY_IBSS;
 		cap_val = WLAN_CAPABILITY_IBSS;
+		bss_type = IEEE80211_BSS_TYPE_IBSS;
 	} else {
-		cap_mask = WLAN_CAPABILITY_ESS;
 		cap_val = WLAN_CAPABILITY_ESS;
+		bss_type = IEEE80211_BSS_TYPE_ESS;
 	}
 
 	bss = cfg80211_get_bss(ar->wiphy, chan, bssid,
 			       vif->ssid, vif->ssid_len,
-			       cap_mask, cap_val);
+			       bss_type, IEEE80211_PRIVACY_ANY);
 	if (bss == NULL) {
 		/*
 		 * Since cfg80211 may not yet know about the BSS,
diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c
index 2d5ea21be47e..adfd815e3f7d 100644
--- a/drivers/net/wireless/ath/wil6210/cfg80211.c
+++ b/drivers/net/wireless/ath/wil6210/cfg80211.c
@@ -395,7 +395,7 @@ static int wil_cfg80211_connect(struct wiphy *wiphy,
 
 	bss = cfg80211_get_bss(wiphy, sme->channel, sme->bssid,
 			       sme->ssid, sme->ssid_len,
-			       WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS);
+			       IEEE80211_BSS_TYPE_ESS, IEEE80211_PRIVACY_ANY);
 	if (!bss) {
 		wil_err(wil, "Unable to find BSS\n");
 		return -ENOENT;
diff --git a/drivers/net/wireless/cw1200/sta.c b/drivers/net/wireless/cw1200/sta.c
index 4a47c7f8a246..1b58b2e2a538 100644
--- a/drivers/net/wireless/cw1200/sta.c
+++ b/drivers/net/wireless/cw1200/sta.c
@@ -1240,8 +1240,8 @@ static void cw1200_do_join(struct cw1200_common *priv)
 
 	bssid = priv->vif->bss_conf.bssid;
 
-	bss = cfg80211_get_bss(priv->hw->wiphy, priv->channel,
-			bssid, NULL, 0, 0, 0);
+	bss = cfg80211_get_bss(priv->hw->wiphy, priv->channel, bssid, NULL, 0,
+			       IEEE80211_BSS_TYPE_ANY, IEEE80211_PRIVACY_ANY);
 
 	if (!bss && !conf->ibss_joined) {
 		wsm_unlock_tx(priv);
diff --git a/drivers/net/wireless/libertas/cfg.c b/drivers/net/wireless/libertas/cfg.c
index a92985a6ea21..1a4d558022d8 100644
--- a/drivers/net/wireless/libertas/cfg.c
+++ b/drivers/net/wireless/libertas/cfg.c
@@ -1356,8 +1356,8 @@ static int lbs_cfg_connect(struct wiphy *wiphy, struct net_device *dev,
 
 	/* Find the BSS we want using available scan results */
 	bss = cfg80211_get_bss(wiphy, sme->channel, sme->bssid,
-		sme->ssid, sme->ssid_len,
-		WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS);
+		sme->ssid, sme->ssid_len, IEEE80211_BSS_TYPE_ESS,
+		IEEE80211_PRIVACY_ANY);
 	if (!bss) {
 		wiphy_err(wiphy, "assoc: bss %pM not in scan results\n",
 			  sme->bssid);
@@ -2000,7 +2000,7 @@ static int lbs_join_ibss(struct wiphy *wiphy, struct net_device *dev,
 	 * bss list is populated already */
 	bss = cfg80211_get_bss(wiphy, params->chandef.chan, params->bssid,
 		params->ssid, params->ssid_len,
-		WLAN_CAPABILITY_IBSS, WLAN_CAPABILITY_IBSS);
+		IEEE80211_BSS_TYPE_IBSS, IEEE80211_PRIVACY_ANY);
 
 	if (bss) {
 		ret = lbs_ibss_join_existing(priv, params, bss);
diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c
index 41c8e25df954..a47eb55bb6da 100644
--- a/drivers/net/wireless/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/mwifiex/cfg80211.c
@@ -1954,13 +1954,13 @@ done:
 		if (mode == NL80211_IFTYPE_ADHOC)
 			bss = cfg80211_get_bss(priv->wdev.wiphy, channel,
 					       bssid, ssid, ssid_len,
-					       WLAN_CAPABILITY_IBSS,
-					       WLAN_CAPABILITY_IBSS);
+					       IEEE80211_BSS_TYPE_IBSS,
+					       IEEE80211_PRIVACY_ANY);
 		else
 			bss = cfg80211_get_bss(priv->wdev.wiphy, channel,
 					       bssid, ssid, ssid_len,
-					       WLAN_CAPABILITY_ESS,
-					       WLAN_CAPABILITY_ESS);
+					       IEEE80211_BSS_TYPE_ESS,
+					       IEEE80211_PRIVACY_ANY);
 
 		if (!bss) {
 			if (is_scanning_required) {
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 64e09e1e8099..28fff56f5606 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -214,6 +214,39 @@ enum ieee80211_rate_flags {
 	IEEE80211_RATE_SUPPORTS_10MHZ	= 1<<6,
 };
 
+/**
+ * enum ieee80211_bss_type - BSS type filter
+ *
+ * @IEEE80211_BSS_TYPE_ESS: Infrastructure BSS
+ * @IEEE80211_BSS_TYPE_PBSS: Personal BSS
+ * @IEEE80211_BSS_TYPE_IBSS: Independent BSS
+ * @IEEE80211_BSS_TYPE_MBSS: Mesh BSS
+ * @IEEE80211_BSS_TYPE_ANY: Wildcard value for matching any BSS type
+ */
+enum ieee80211_bss_type {
+	IEEE80211_BSS_TYPE_ESS,
+	IEEE80211_BSS_TYPE_PBSS,
+	IEEE80211_BSS_TYPE_IBSS,
+	IEEE80211_BSS_TYPE_MBSS,
+	IEEE80211_BSS_TYPE_ANY
+};
+
+/**
+ * enum ieee80211_privacy - BSS privacy filter
+ *
+ * @IEEE80211_PRIVACY_ON: privacy bit set
+ * @IEEE80211_PRIVACY_OFF: privacy bit clear
+ * @IEEE80211_PRIVACY_ANY: Wildcard value for matching any privacy setting
+ */
+enum ieee80211_privacy {
+	IEEE80211_PRIVACY_ON,
+	IEEE80211_PRIVACY_OFF,
+	IEEE80211_PRIVACY_ANY
+};
+
+#define IEEE80211_PRIVACY(x)	\
+	((x) ? IEEE80211_PRIVACY_ON : IEEE80211_PRIVACY_OFF)
+
 /**
  * struct ieee80211_rate - bitrate definition
  *
@@ -4012,14 +4045,16 @@ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy,
 				      struct ieee80211_channel *channel,
 				      const u8 *bssid,
 				      const u8 *ssid, size_t ssid_len,
-				      u16 capa_mask, u16 capa_val);
+				      enum ieee80211_bss_type bss_type,
+				      enum ieee80211_privacy);
 static inline struct cfg80211_bss *
 cfg80211_get_ibss(struct wiphy *wiphy,
 		  struct ieee80211_channel *channel,
 		  const u8 *ssid, size_t ssid_len)
 {
 	return cfg80211_get_bss(wiphy, channel, NULL, ssid, ssid_len,
-				WLAN_CAPABILITY_IBSS, WLAN_CAPABILITY_IBSS);
+				IEEE80211_BSS_TYPE_IBSS,
+				IEEE80211_PRIVACY_ANY);
 }
 
 /**
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index b606b53a49a7..ee93d7d9aa4b 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -470,22 +470,19 @@ int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata,
 	struct beacon_data *presp, *old_presp;
 	struct cfg80211_bss *cbss;
 	const struct cfg80211_bss_ies *ies;
-	u16 capability;
+	u16 capability = 0;
 	u64 tsf;
 	int ret = 0;
 
 	sdata_assert_lock(sdata);
 
-	capability = WLAN_CAPABILITY_IBSS;
-
 	if (ifibss->privacy)
-		capability |= WLAN_CAPABILITY_PRIVACY;
+		capability = WLAN_CAPABILITY_PRIVACY;
 
 	cbss = cfg80211_get_bss(sdata->local->hw.wiphy, ifibss->chandef.chan,
 				ifibss->bssid, ifibss->ssid,
-				ifibss->ssid_len, WLAN_CAPABILITY_IBSS |
-				WLAN_CAPABILITY_PRIVACY,
-				capability);
+				ifibss->ssid_len, IEEE80211_BSS_TYPE_IBSS,
+				IEEE80211_PRIVACY(ifibss->privacy));
 
 	if (WARN_ON(!cbss)) {
 		ret = -EINVAL;
@@ -525,23 +522,17 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata)
 	struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
 	struct cfg80211_bss *cbss;
 	int err, changed = 0;
-	u16 capability;
 
 	sdata_assert_lock(sdata);
 
 	/* update cfg80211 bss information with the new channel */
 	if (!is_zero_ether_addr(ifibss->bssid)) {
-		capability = WLAN_CAPABILITY_IBSS;
-
-		if (ifibss->privacy)
-			capability |= WLAN_CAPABILITY_PRIVACY;
-
 		cbss = cfg80211_get_bss(sdata->local->hw.wiphy,
 					ifibss->chandef.chan,
 					ifibss->bssid, ifibss->ssid,
-					ifibss->ssid_len, WLAN_CAPABILITY_IBSS |
-					WLAN_CAPABILITY_PRIVACY,
-					capability);
+					ifibss->ssid_len,
+					IEEE80211_BSS_TYPE_IBSS,
+					IEEE80211_PRIVACY(ifibss->privacy));
 		/* XXX: should not really modify cfg80211 data */
 		if (cbss) {
 			cbss->channel = sdata->csa_chandef.chan;
@@ -682,19 +673,13 @@ static void ieee80211_ibss_disconnect(struct ieee80211_sub_if_data *sdata)
 	struct cfg80211_bss *cbss;
 	struct beacon_data *presp;
 	struct sta_info *sta;
-	u16 capability;
 
 	if (!is_zero_ether_addr(ifibss->bssid)) {
-		capability = WLAN_CAPABILITY_IBSS;
-
-		if (ifibss->privacy)
-			capability |= WLAN_CAPABILITY_PRIVACY;
-
 		cbss = cfg80211_get_bss(local->hw.wiphy, ifibss->chandef.chan,
 					ifibss->bssid, ifibss->ssid,
-					ifibss->ssid_len, WLAN_CAPABILITY_IBSS |
-					WLAN_CAPABILITY_PRIVACY,
-					capability);
+					ifibss->ssid_len,
+					IEEE80211_BSS_TYPE_IBSS,
+					IEEE80211_PRIVACY(ifibss->privacy));
 
 		if (cbss) {
 			cfg80211_unlink_bss(local->hw.wiphy, cbss);
@@ -1325,7 +1310,6 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
 	const u8 *bssid = NULL;
 	enum nl80211_bss_scan_width scan_width;
 	int active_ibss;
-	u16 capability;
 
 	sdata_assert_lock(sdata);
 
@@ -1335,9 +1319,6 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
 	if (active_ibss)
 		return;
 
-	capability = WLAN_CAPABILITY_IBSS;
-	if (ifibss->privacy)
-		capability |= WLAN_CAPABILITY_PRIVACY;
 	if (ifibss->fixed_bssid)
 		bssid = ifibss->bssid;
 	if (ifibss->fixed_channel)
@@ -1346,8 +1327,8 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
 		bssid = ifibss->bssid;
 	cbss = cfg80211_get_bss(local->hw.wiphy, chan, bssid,
 				ifibss->ssid, ifibss->ssid_len,
-				WLAN_CAPABILITY_IBSS | WLAN_CAPABILITY_PRIVACY,
-				capability);
+				IEEE80211_BSS_TYPE_IBSS,
+				IEEE80211_PRIVACY(ifibss->privacy));
 
 	if (cbss) {
 		struct ieee80211_bss *bss;
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index e24fc585c883..1a65662a5d73 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -30,7 +30,7 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid,
 		return;
 
 	bss = cfg80211_get_bss(wdev->wiphy, channel, bssid, NULL, 0,
-			       WLAN_CAPABILITY_IBSS, WLAN_CAPABILITY_IBSS);
+			       IEEE80211_BSS_TYPE_IBSS, IEEE80211_PRIVACY_ANY);
 
 	if (WARN_ON(!bss))
 		return;
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 2c52b59e43f3..7aae329e2b4e 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -229,7 +229,8 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 		return -EALREADY;
 
 	req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len,
-				   WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS);
+				   IEEE80211_BSS_TYPE_ESS,
+				   IEEE80211_PRIVACY_ANY);
 	if (!req.bss)
 		return -ENOENT;
 
@@ -296,7 +297,8 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 				   rdev->wiphy.vht_capa_mod_mask);
 
 	req->bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len,
-				    WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS);
+				    IEEE80211_BSS_TYPE_ESS,
+				    IEEE80211_PRIVACY_ANY);
 	if (!req->bss)
 		return -ENOENT;
 
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index ceb8f0040dae..3a50aa2553bf 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -531,24 +531,78 @@ static int cmp_bss(struct cfg80211_bss *a,
 	}
 }
 
+static bool cfg80211_bss_type_match(u16 capability,
+				    enum ieee80211_band band,
+				    enum ieee80211_bss_type bss_type)
+{
+	bool ret = true;
+	u16 mask, val;
+
+	if (bss_type == IEEE80211_BSS_TYPE_ANY)
+		return ret;
+
+	if (band == IEEE80211_BAND_60GHZ) {
+		mask = WLAN_CAPABILITY_DMG_TYPE_MASK;
+		switch (bss_type) {
+		case IEEE80211_BSS_TYPE_ESS:
+			val = WLAN_CAPABILITY_DMG_TYPE_AP;
+			break;
+		case IEEE80211_BSS_TYPE_PBSS:
+			val = WLAN_CAPABILITY_DMG_TYPE_PBSS;
+			break;
+		case IEEE80211_BSS_TYPE_IBSS:
+			val = WLAN_CAPABILITY_DMG_TYPE_IBSS;
+			break;
+		default:
+			return false;
+		}
+	} else {
+		mask = WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS;
+		switch (bss_type) {
+		case IEEE80211_BSS_TYPE_ESS:
+			val = WLAN_CAPABILITY_ESS;
+			break;
+		case IEEE80211_BSS_TYPE_IBSS:
+			val = WLAN_CAPABILITY_IBSS;
+			break;
+		case IEEE80211_BSS_TYPE_MBSS:
+			val = 0;
+			break;
+		default:
+			return false;
+		}
+	}
+
+	ret = ((capability & mask) == val);
+	return ret;
+}
+
 /* Returned bss is reference counted and must be cleaned up appropriately. */
 struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy,
 				      struct ieee80211_channel *channel,
 				      const u8 *bssid,
 				      const u8 *ssid, size_t ssid_len,
-				      u16 capa_mask, u16 capa_val)
+				      enum ieee80211_bss_type bss_type,
+				      enum ieee80211_privacy privacy)
 {
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	struct cfg80211_internal_bss *bss, *res = NULL;
 	unsigned long now = jiffies;
+	int bss_privacy;
 
-	trace_cfg80211_get_bss(wiphy, channel, bssid, ssid, ssid_len, capa_mask,
-			       capa_val);
+	trace_cfg80211_get_bss(wiphy, channel, bssid, ssid, ssid_len, bss_type,
+			       privacy);
 
 	spin_lock_bh(&rdev->bss_lock);
 
 	list_for_each_entry(bss, &rdev->bss_list, list) {
-		if ((bss->pub.capability & capa_mask) != capa_val)
+		if (!cfg80211_bss_type_match(bss->pub.capability,
+					     bss->pub.channel->band, bss_type))
+			continue;
+
+		bss_privacy = (bss->pub.capability & WLAN_CAPABILITY_PRIVACY);
+		if ((privacy == IEEE80211_PRIVACY_ON && !bss_privacy) ||
+		    (privacy == IEEE80211_PRIVACY_OFF && bss_privacy))
 			continue;
 		if (channel && bss->pub.channel != channel)
 			continue;
@@ -896,6 +950,7 @@ cfg80211_inform_bss_width(struct wiphy *wiphy,
 	struct cfg80211_bss_ies *ies;
 	struct ieee80211_channel *channel;
 	struct cfg80211_internal_bss tmp = {}, *res;
+	int bss_type;
 	bool signal_valid;
 
 	if (WARN_ON(!wiphy))
@@ -950,8 +1005,15 @@ cfg80211_inform_bss_width(struct wiphy *wiphy,
 	if (!res)
 		return NULL;
 
-	if (res->pub.capability & WLAN_CAPABILITY_ESS)
-		regulatory_hint_found_beacon(wiphy, channel, gfp);
+	if (channel->band == IEEE80211_BAND_60GHZ) {
+		bss_type = res->pub.capability & WLAN_CAPABILITY_DMG_TYPE_MASK;
+		if (bss_type == WLAN_CAPABILITY_DMG_TYPE_AP ||
+		    bss_type == WLAN_CAPABILITY_DMG_TYPE_PBSS)
+			regulatory_hint_found_beacon(wiphy, channel, gfp);
+	} else {
+		if (res->pub.capability & WLAN_CAPABILITY_ESS)
+			regulatory_hint_found_beacon(wiphy, channel, gfp);
+	}
 
 	trace_cfg80211_return_bss(&res->pub);
 	/* cfg80211_bss_update gives us a referenced result */
@@ -973,6 +1035,7 @@ cfg80211_inform_bss_width_frame(struct wiphy *wiphy,
 	bool signal_valid;
 	size_t ielen = len - offsetof(struct ieee80211_mgmt,
 				      u.probe_resp.variable);
+	int bss_type;
 
 	BUILD_BUG_ON(offsetof(struct ieee80211_mgmt, u.probe_resp.variable) !=
 			offsetof(struct ieee80211_mgmt, u.beacon.variable));
@@ -1025,8 +1088,15 @@ cfg80211_inform_bss_width_frame(struct wiphy *wiphy,
 	if (!res)
 		return NULL;
 
-	if (res->pub.capability & WLAN_CAPABILITY_ESS)
-		regulatory_hint_found_beacon(wiphy, channel, gfp);
+	if (channel->band == IEEE80211_BAND_60GHZ) {
+		bss_type = res->pub.capability & WLAN_CAPABILITY_DMG_TYPE_MASK;
+		if (bss_type == WLAN_CAPABILITY_DMG_TYPE_AP ||
+		    bss_type == WLAN_CAPABILITY_DMG_TYPE_PBSS)
+			regulatory_hint_found_beacon(wiphy, channel, gfp);
+	} else {
+		if (res->pub.capability & WLAN_CAPABILITY_ESS)
+			regulatory_hint_found_beacon(wiphy, channel, gfp);
+	}
 
 	trace_cfg80211_return_bss(&res->pub);
 	/* cfg80211_bss_update gives us a referenced result */
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 0ab3711c79a0..ea1da6621ff0 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -257,19 +257,15 @@ static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev)
 {
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	struct cfg80211_bss *bss;
-	u16 capa = WLAN_CAPABILITY_ESS;
 
 	ASSERT_WDEV_LOCK(wdev);
 
-	if (wdev->conn->params.privacy)
-		capa |= WLAN_CAPABILITY_PRIVACY;
-
 	bss = cfg80211_get_bss(wdev->wiphy, wdev->conn->params.channel,
 			       wdev->conn->params.bssid,
 			       wdev->conn->params.ssid,
 			       wdev->conn->params.ssid_len,
-			       WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_PRIVACY,
-			       capa);
+			       IEEE80211_BSS_TYPE_ESS,
+			       IEEE80211_PRIVACY(wdev->conn->params.privacy));
 	if (!bss)
 		return NULL;
 
@@ -637,8 +633,8 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 		WARN_ON_ONCE(!wiphy_to_rdev(wdev->wiphy)->ops->connect);
 		bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid,
 				       wdev->ssid, wdev->ssid_len,
-				       WLAN_CAPABILITY_ESS,
-				       WLAN_CAPABILITY_ESS);
+				       IEEE80211_BSS_TYPE_ESS,
+				       IEEE80211_PRIVACY_ANY);
 		if (bss)
 			cfg80211_hold_bss(bss_from_pub(bss));
 	}
@@ -795,8 +791,8 @@ void cfg80211_roamed(struct net_device *dev,
 	struct cfg80211_bss *bss;
 
 	bss = cfg80211_get_bss(wdev->wiphy, channel, bssid, wdev->ssid,
-			       wdev->ssid_len, WLAN_CAPABILITY_ESS,
-			       WLAN_CAPABILITY_ESS);
+			       wdev->ssid_len,
+			       IEEE80211_BSS_TYPE_ESS, IEEE80211_PRIVACY_ANY);
 	if (WARN_ON(!bss))
 		return;
 
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index b17b3692f8c2..b19773c9c81b 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -2636,28 +2636,30 @@ DEFINE_EVENT(wiphy_only_evt, cfg80211_sched_scan_stopped,
 TRACE_EVENT(cfg80211_get_bss,
 	TP_PROTO(struct wiphy *wiphy, struct ieee80211_channel *channel,
 		 const u8 *bssid, const u8 *ssid, size_t ssid_len,
-		 u16 capa_mask, u16 capa_val),
-	TP_ARGS(wiphy, channel, bssid, ssid, ssid_len, capa_mask, capa_val),
+		 enum ieee80211_bss_type bss_type,
+		 enum ieee80211_privacy privacy),
+	TP_ARGS(wiphy, channel, bssid, ssid, ssid_len, bss_type, privacy),
 	TP_STRUCT__entry(
 		WIPHY_ENTRY
 		CHAN_ENTRY
 		MAC_ENTRY(bssid)
 		__dynamic_array(u8, ssid, ssid_len)
-		__field(u16, capa_mask)
-		__field(u16, capa_val)
+		__field(enum ieee80211_bss_type, bss_type)
+		__field(enum ieee80211_privacy, privacy)
 	),
 	TP_fast_assign(
 		WIPHY_ASSIGN;
 		CHAN_ASSIGN(channel);
 		MAC_ASSIGN(bssid, bssid);
 		memcpy(__get_dynamic_array(ssid), ssid, ssid_len);
-		__entry->capa_mask = capa_mask;
-		__entry->capa_val = capa_val;
-	),
-	TP_printk(WIPHY_PR_FMT ", " CHAN_PR_FMT ", " MAC_PR_FMT ", buf: %#.2x, "
-		  "capa_mask: %d, capa_val: %u", WIPHY_PR_ARG, CHAN_PR_ARG,
-		  MAC_PR_ARG(bssid), ((u8 *)__get_dynamic_array(ssid))[0],
-		  __entry->capa_mask, __entry->capa_val)
+		__entry->bss_type = bss_type;
+		__entry->privacy = privacy;
+	),
+	TP_printk(WIPHY_PR_FMT ", " CHAN_PR_FMT ", " MAC_PR_FMT
+		  ", buf: %#.2x, bss_type: %d, privacy: %d",
+		  WIPHY_PR_ARG, CHAN_PR_ARG, MAC_PR_ARG(bssid),
+		  ((u8 *)__get_dynamic_array(ssid))[0], __entry->bss_type,
+		  __entry->privacy)
 );
 
 TRACE_EVENT(cfg80211_inform_bss_width_frame,
-- 
cgit v1.2.3


From ffc1199122d83d60ad99f9c55df32feb650b7bff Mon Sep 17 00:00:00 2001
From: "Janusz.Dziedzic@tieto.com" <Janusz.Dziedzic@tieto.com>
Date: Sat, 21 Feb 2015 16:52:39 +0100
Subject: cfg80211: add VHT support for IBSS

Add NL80211_EXT_FEATURE_VHT_IBSS flag and VHT
support for IBSS.

Signed-off-by: Janusz Dziedzic <janusz.dziedzic@tieto.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h |  2 ++
 net/wireless/nl80211.c       | 14 ++++++++++++--
 2 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 2dcf9bba317c..8ee31f108407 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -4328,11 +4328,13 @@ enum nl80211_feature_flags {
 
 /**
  * enum nl80211_ext_feature_index - bit index of extended features.
+ * @NL80211_EXT_FEATURE_VHT_IBSS: This driver supports IBSS with VHT datarates.
  *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
  */
 enum nl80211_ext_feature_index {
+	NL80211_EXT_FEATURE_VHT_IBSS,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 9c6e23ede5b2..66666fdf1c8d 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -7265,8 +7265,18 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
 		break;
 	case NL80211_CHAN_WIDTH_20:
 	case NL80211_CHAN_WIDTH_40:
-		if (rdev->wiphy.features & NL80211_FEATURE_HT_IBSS)
-			break;
+		if (!(rdev->wiphy.features & NL80211_FEATURE_HT_IBSS))
+			return -EINVAL;
+		break;
+	case NL80211_CHAN_WIDTH_80:
+	case NL80211_CHAN_WIDTH_80P80:
+	case NL80211_CHAN_WIDTH_160:
+		if (!(rdev->wiphy.features & NL80211_FEATURE_HT_IBSS))
+			return -EINVAL;
+		if (!wiphy_ext_feature_isset(&rdev->wiphy,
+					     NL80211_EXT_FEATURE_VHT_IBSS))
+			return -EINVAL;
+		break;
 	default:
 		return -EINVAL;
 	}
-- 
cgit v1.2.3


From 6c09e791b21309a1ad71f9702b766dae12a3cb0a Mon Sep 17 00:00:00 2001
From: Ahmad Kholaif <akholaif@qca.qualcomm.com>
Date: Thu, 26 Feb 2015 15:26:53 +0200
Subject: cfg80211: Allow NL80211_ATTR_IFINDEX to be added to vendor events

This modifies cfg80211_vendor_event_alloc() with an additional argument
struct wireless_dev *wdev. __cfg80211_alloc_event_skb() is modified to
take in *wdev argument, if wdev != NULL, both the NL80211_ATTR_IFINDEX
and wdev identifier are added to the vendor event.

These changes make it easier for drivers to add ifindex indication in
vendor events cleanly.

This also updates all existing users of cfg80211_vendor_event_alloc()
and __cfg80211_alloc_event_skb() in the kernel tree.

Signed-off-by: Ahmad Kholaif <akholaif@qca.qualcomm.com>
Signed-off-by: Jouni Malinen <jouni@qca.qualcomm.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ti/wl18xx/event.c |  4 ++--
 include/net/cfg80211.h                 | 14 ++++++++++----
 net/wireless/nl80211.c                 | 19 +++++++++++++++----
 3 files changed, 27 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ti/wl18xx/event.c b/drivers/net/wireless/ti/wl18xx/event.c
index c28f06854195..548bb9e7e91e 100644
--- a/drivers/net/wireless/ti/wl18xx/event.c
+++ b/drivers/net/wireless/ti/wl18xx/event.c
@@ -77,7 +77,7 @@ static int wlcore_smart_config_sync_event(struct wl1271 *wl, u8 sync_channel,
 	wl1271_debug(DEBUG_EVENT,
 		     "SMART_CONFIG_SYNC_EVENT_ID, freq: %d (chan: %d band %d)",
 		     freq, sync_channel, sync_band);
-	skb = cfg80211_vendor_event_alloc(wl->hw->wiphy, 20,
+	skb = cfg80211_vendor_event_alloc(wl->hw->wiphy, NULL, 20,
 					  WLCORE_VENDOR_EVENT_SC_SYNC,
 					  GFP_KERNEL);
 
@@ -98,7 +98,7 @@ static int wlcore_smart_config_decode_event(struct wl1271 *wl,
 	wl1271_debug(DEBUG_EVENT, "SMART_CONFIG_DECODE_EVENT_ID");
 	wl1271_dump_ascii(DEBUG_EVENT, "SSID:", ssid, ssid_len);
 
-	skb = cfg80211_vendor_event_alloc(wl->hw->wiphy,
+	skb = cfg80211_vendor_event_alloc(wl->hw->wiphy, NULL,
 					  ssid_len + pwd_len + 20,
 					  WLCORE_VENDOR_EVENT_SC_DECODE,
 					  GFP_KERNEL);
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 28fff56f5606..12a6121ea76e 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4295,6 +4295,7 @@ struct sk_buff *__cfg80211_alloc_reply_skb(struct wiphy *wiphy,
 					   int approxlen);
 
 struct sk_buff *__cfg80211_alloc_event_skb(struct wiphy *wiphy,
+					   struct wireless_dev *wdev,
 					   enum nl80211_commands cmd,
 					   enum nl80211_attrs attr,
 					   int vendor_event_idx,
@@ -4349,6 +4350,7 @@ int cfg80211_vendor_cmd_reply(struct sk_buff *skb);
 /**
  * cfg80211_vendor_event_alloc - allocate vendor-specific event skb
  * @wiphy: the wiphy
+ * @wdev: the wireless device
  * @event_idx: index of the vendor event in the wiphy's vendor_events
  * @approxlen: an upper bound of the length of the data that will
  *	be put into the skb
@@ -4357,16 +4359,20 @@ int cfg80211_vendor_cmd_reply(struct sk_buff *skb);
  * This function allocates and pre-fills an skb for an event on the
  * vendor-specific multicast group.
  *
+ * If wdev != NULL, both the ifindex and identifier of the specified
+ * wireless device are added to the event message before the vendor data
+ * attribute.
+ *
  * When done filling the skb, call cfg80211_vendor_event() with the
  * skb to send the event.
  *
  * Return: An allocated and pre-filled skb. %NULL if any errors happen.
  */
 static inline struct sk_buff *
-cfg80211_vendor_event_alloc(struct wiphy *wiphy, int approxlen,
-			    int event_idx, gfp_t gfp)
+cfg80211_vendor_event_alloc(struct wiphy *wiphy, struct wireless_dev *wdev,
+			     int approxlen, int event_idx, gfp_t gfp)
 {
-	return __cfg80211_alloc_event_skb(wiphy, NL80211_CMD_VENDOR,
+	return __cfg80211_alloc_event_skb(wiphy, wdev, NL80211_CMD_VENDOR,
 					  NL80211_ATTR_VENDOR_DATA,
 					  event_idx, approxlen, gfp);
 }
@@ -4467,7 +4473,7 @@ static inline int cfg80211_testmode_reply(struct sk_buff *skb)
 static inline struct sk_buff *
 cfg80211_testmode_alloc_event_skb(struct wiphy *wiphy, int approxlen, gfp_t gfp)
 {
-	return __cfg80211_alloc_event_skb(wiphy, NL80211_CMD_TESTMODE,
+	return __cfg80211_alloc_event_skb(wiphy, NULL, NL80211_CMD_TESTMODE,
 					  NL80211_ATTR_TESTDATA, -1,
 					  approxlen, gfp);
 }
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 66666fdf1c8d..01874628ae00 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -7389,8 +7389,8 @@ static int nl80211_set_mcast_rate(struct sk_buff *skb, struct genl_info *info)
 
 static struct sk_buff *
 __cfg80211_alloc_vendor_skb(struct cfg80211_registered_device *rdev,
-			    int approxlen, u32 portid, u32 seq,
-			    enum nl80211_commands cmd,
+			    struct wireless_dev *wdev, int approxlen,
+			    u32 portid, u32 seq, enum nl80211_commands cmd,
 			    enum nl80211_attrs attr,
 			    const struct nl80211_vendor_cmd_info *info,
 			    gfp_t gfp)
@@ -7421,6 +7421,16 @@ __cfg80211_alloc_vendor_skb(struct cfg80211_registered_device *rdev,
 			goto nla_put_failure;
 	}
 
+	if (wdev) {
+		if (nla_put_u64(skb, NL80211_ATTR_WDEV,
+				wdev_id(wdev)))
+			goto nla_put_failure;
+		if (wdev->netdev &&
+		    nla_put_u32(skb, NL80211_ATTR_IFINDEX,
+				wdev->netdev->ifindex))
+			goto nla_put_failure;
+	}
+
 	data = nla_nest_start(skb, attr);
 
 	((void **)skb->cb)[0] = rdev;
@@ -7435,6 +7445,7 @@ __cfg80211_alloc_vendor_skb(struct cfg80211_registered_device *rdev,
 }
 
 struct sk_buff *__cfg80211_alloc_event_skb(struct wiphy *wiphy,
+					   struct wireless_dev *wdev,
 					   enum nl80211_commands cmd,
 					   enum nl80211_attrs attr,
 					   int vendor_event_idx,
@@ -7460,7 +7471,7 @@ struct sk_buff *__cfg80211_alloc_event_skb(struct wiphy *wiphy,
 		return NULL;
 	}
 
-	return __cfg80211_alloc_vendor_skb(rdev, approxlen, 0, 0,
+	return __cfg80211_alloc_vendor_skb(rdev, wdev, approxlen, 0, 0,
 					   cmd, attr, info, gfp);
 }
 EXPORT_SYMBOL(__cfg80211_alloc_event_skb);
@@ -9906,7 +9917,7 @@ struct sk_buff *__cfg80211_alloc_reply_skb(struct wiphy *wiphy,
 	if (WARN_ON(!rdev->cur_cmd_info))
 		return NULL;
 
-	return __cfg80211_alloc_vendor_skb(rdev, approxlen,
+	return __cfg80211_alloc_vendor_skb(rdev, NULL, approxlen,
 					   rdev->cur_cmd_info->snd_portid,
 					   rdev->cur_cmd_info->snd_seq,
 					   cmd, attr, NULL, GFP_KERNEL);
-- 
cgit v1.2.3


From 5fc7432991a86678b38a2d700edbe8bcd29cc579 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 27 Feb 2015 15:32:43 +0100
Subject: nl80211: add notes about userspace API/ABI modifications

Add notes about userspace ABI/API modifications, including the
fact that we decided that API submissions should come with a
driver implementation.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 8ee31f108407..90c5aeb3cca7 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -25,6 +25,19 @@
  *
  */
 
+/*
+ * This header file defines the userspace API to the wireless stack. Please
+ * be careful not to break things - i.e. don't move anything around or so
+ * unless you can demonstrate that it breaks neither API nor ABI.
+ *
+ * Additions to the API should be accompanied by actual implementations in
+ * an upstream driver, so that example implementations exist in case there
+ * are ever concerns about the precise semantics of the API or changes are
+ * needed, and to ensure that code for dead (no longer implemented) API
+ * can actually be identified and removed.
+ * Nonetheless, semantics should also be documented carefully in this file.
+ */
+
 #include <linux/types.h>
 
 #define NL80211_GENL_NAME "nl80211"
-- 
cgit v1.2.3


From 2ecc3905e6c51f545a44cc621216b5dfd7f94c50 Mon Sep 17 00:00:00 2001
From: Alexander Bondar <alexander.bondar@intel.com>
Date: Sun, 1 Mar 2015 09:10:00 +0200
Subject: mac80211: Update beacon's timing and DTIM count on every beacon

Beacon's timestamp, device system time associated with this beacon and
DTIM count parameters are not updated in the associated vif context
if the latest beacon's content is identical to the previously received.
It make sense to update these changing parameters on every beacon so the
driver can get most updated values. This may be necessary, for example,
to avoid either beacons' drift effect or device time stamp overrun.
IMPORTANT: Three sync_* parameters - sync_ts, sync_device_ts and
sync_dtim_count would possibly be out of sync by the time the driver will
use them. The synchronized view is currently guaranteed only in certain
callbacks.

Signed-off-by: Alexander Bondar <alexander.bondar@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h |  7 +++++--
 net/mac80211/mlme.c    | 32 ++++++++++++++++++++------------
 2 files changed, 25 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index d52914b75331..3a029f0e303d 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -337,12 +337,15 @@ enum ieee80211_rssi_event {
  *	HW flag %IEEE80211_HW_TIMING_BEACON_ONLY is set, then this can
  *	only come from a beacon, but might not become valid until after
  *	association when a beacon is received (which is notified with the
- *	%BSS_CHANGED_DTIM flag.)
+ *	%BSS_CHANGED_DTIM flag.). See also sync_dtim_count important notice.
  * @sync_device_ts: the device timestamp corresponding to the sync_tsf,
  *	the driver/device can use this to calculate synchronisation
- *	(see @sync_tsf)
+ *	(see @sync_tsf). See also sync_dtim_count important notice.
  * @sync_dtim_count: Only valid when %IEEE80211_HW_TIMING_BEACON_ONLY
  *	is requested, see @sync_tsf/@sync_device_ts.
+ *	IMPORTANT: These three sync_* parameters would possibly be out of sync
+ *	by the time the driver will use them. The synchronized view is currently
+ *	guaranteed only in certain callbacks.
  * @beacon_int: beacon interval
  * @assoc_capability: capabilities taken from assoc resp
  * @basic_rates: bitmap of basic rates, each bit stands for an
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 10ac6324c1d0..cf3ae9348a9d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3419,6 +3419,26 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 	if (ifmgd->csa_waiting_bcn)
 		ieee80211_chswitch_post_beacon(sdata);
 
+	/*
+	 * Update beacon timing and dtim count on every beacon appearance. This
+	 * will allow the driver to use the most updated values. Do it before
+	 * comparing this one with last received beacon.
+	 * IMPORTANT: These parameters would possibly be out of sync by the time
+	 * the driver will use them. The synchronized view is currently
+	 * guaranteed only in certain callbacks.
+	 */
+	if (local->hw.flags & IEEE80211_HW_TIMING_BEACON_ONLY) {
+		sdata->vif.bss_conf.sync_tsf =
+			le64_to_cpu(mgmt->u.beacon.timestamp);
+		sdata->vif.bss_conf.sync_device_ts =
+			rx_status->device_timestamp;
+		if (elems.tim)
+			sdata->vif.bss_conf.sync_dtim_count =
+				elems.tim->dtim_count;
+		else
+			sdata->vif.bss_conf.sync_dtim_count = 0;
+	}
+
 	if (ncrc == ifmgd->beacon_crc && ifmgd->beacon_crc_valid)
 		return;
 	ifmgd->beacon_crc = ncrc;
@@ -3446,18 +3466,6 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 		else
 			bss_conf->dtim_period = 1;
 
-		if (local->hw.flags & IEEE80211_HW_TIMING_BEACON_ONLY) {
-			sdata->vif.bss_conf.sync_tsf =
-				le64_to_cpu(mgmt->u.beacon.timestamp);
-			sdata->vif.bss_conf.sync_device_ts =
-				rx_status->device_timestamp;
-			if (elems.tim)
-				sdata->vif.bss_conf.sync_dtim_count =
-					elems.tim->dtim_count;
-			else
-				sdata->vif.bss_conf.sync_dtim_count = 0;
-		}
-
 		changed |= BSS_CHANGED_BEACON_INFO;
 		ifmgd->have_beacon = true;
 
-- 
cgit v1.2.3


From 1d5da757da860a6916adbf68b09e868062b4b3b8 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 3 Mar 2015 09:41:47 -0600
Subject: ax25: Stop using magic neighbour cache operations.

Before the ax25 stack calls dev_queue_xmit it always calls
ax25_type_trans which sets skb->protocol to ETH_P_AX25.

Which means that by looking at the protocol type it is possible to
detect IP packets that have not been munged by the ax25 stack in
ndo_start_xmit and call a function to munge them.

Rename ax25_neigh_xmit to ax25_ip_xmit and tweak the return type and
value to be appropriate for an ndo_start_xmit function.

Update all of the ax25 devices to test the protocol type for ETH_P_IP
and return ax25_ip_xmit as the first thing they do.  This preserves
the existing semantics of IP packet processing, but the timing will be
a little different as the IP packets now pass through the qdisc layer
before reaching the ax25 ip packet processing.

Remove the now unnecessary ax25 neighbour table operations.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hamradio/6pack.c      |  5 ++--
 drivers/net/hamradio/baycom_epp.c |  5 ++--
 drivers/net/hamradio/bpqether.c   |  5 ++--
 drivers/net/hamradio/dmascc.c     |  5 ++--
 drivers/net/hamradio/hdlcdrv.c    |  5 ++--
 drivers/net/hamradio/mkiss.c      |  5 ++--
 drivers/net/hamradio/scc.c        |  5 ++--
 drivers/net/hamradio/yam.c        |  5 ++--
 include/net/ax25.h                |  5 +---
 net/ax25/ax25_ip.c                | 60 ++++-----------------------------------
 10 files changed, 31 insertions(+), 74 deletions(-)

(limited to 'include')

diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index 0b8393ca8c80..7c4a4151ef0f 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -247,6 +247,9 @@ static netdev_tx_t sp_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct sixpack *sp = netdev_priv(dev);
 
+	if (skb->protocol == htons(ETH_P_IP))
+		return ax25_ip_xmit(skb);
+
 	spin_lock_bh(&sp->lock);
 	/* We were not busy, so we are now... :-) */
 	netif_stop_queue(dev);
@@ -302,7 +305,6 @@ static const struct net_device_ops sp_netdev_ops = {
 	.ndo_stop		= sp_close,
 	.ndo_start_xmit		= sp_xmit,
 	.ndo_set_mac_address    = sp_set_mac_address,
-	.ndo_neigh_construct	= ax25_neigh_construct,
 };
 
 static void sp_setup(struct net_device *dev)
@@ -316,7 +318,6 @@ static void sp_setup(struct net_device *dev)
 
 	dev->addr_len		= AX25_ADDR_LEN;
 	dev->type		= ARPHRD_AX25;
-	dev->neigh_priv_len	= sizeof(struct ax25_neigh_priv);
 	dev->tx_queue_len	= 10;
 
 	/* Only activated in AX.25 mode */
diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c
index 3539ab392f7d..83c7cce0d172 100644
--- a/drivers/net/hamradio/baycom_epp.c
+++ b/drivers/net/hamradio/baycom_epp.c
@@ -772,6 +772,9 @@ static int baycom_send_packet(struct sk_buff *skb, struct net_device *dev)
 {
 	struct baycom_state *bc = netdev_priv(dev);
 
+	if (skb->protocol == htons(ETH_P_IP))
+		return ax25_ip_xmit(skb);
+
 	if (skb->data[0] != 0) {
 		do_kiss_params(bc, skb->data, skb->len);
 		dev_kfree_skb(skb);
@@ -1109,7 +1112,6 @@ static const struct net_device_ops baycom_netdev_ops = {
 	.ndo_do_ioctl	     = baycom_ioctl,
 	.ndo_start_xmit      = baycom_send_packet,
 	.ndo_set_mac_address = baycom_set_mac_address,
-	.ndo_neigh_construct = ax25_neigh_construct,
 };
 
 /*
@@ -1147,7 +1149,6 @@ static void baycom_probe(struct net_device *dev)
 	dev->header_ops = &ax25_header_ops;
 	
 	dev->type = ARPHRD_AX25;           /* AF_AX25 device */
-	dev->neigh_priv_len = sizeof(struct ax25_neigh_priv);
 	dev->hard_header_len = AX25_MAX_HEADER_LEN + AX25_BPQ_HEADER_LEN;
 	dev->mtu = AX25_DEF_PACLEN;        /* eth_mtu is the default */
 	dev->addr_len = AX25_ADDR_LEN;     /* sizeof an ax.25 address */
diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c
index bce105b16ed0..63ff08a26da8 100644
--- a/drivers/net/hamradio/bpqether.c
+++ b/drivers/net/hamradio/bpqether.c
@@ -251,6 +251,9 @@ static netdev_tx_t bpq_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct net_device *orig_dev;
 	int size;
 
+	if (skb->protocol == htons(ETH_P_IP))
+		return ax25_ip_xmit(skb);
+
 	/*
 	 * Just to be *really* sure not to send anything if the interface
 	 * is down, the ethernet device may have gone.
@@ -469,7 +472,6 @@ static const struct net_device_ops bpq_netdev_ops = {
 	.ndo_start_xmit	     = bpq_xmit,
 	.ndo_set_mac_address = bpq_set_mac_address,
 	.ndo_do_ioctl	     = bpq_ioctl,
-	.ndo_neigh_construct = ax25_neigh_construct,
 };
 
 static void bpq_setup(struct net_device *dev)
@@ -487,7 +489,6 @@ static void bpq_setup(struct net_device *dev)
 #endif
 
 	dev->type            = ARPHRD_AX25;
-	dev->neigh_priv_len  = sizeof(struct ax25_neigh_priv);
 	dev->hard_header_len = AX25_MAX_HEADER_LEN + AX25_BPQ_HEADER_LEN;
 	dev->mtu             = AX25_DEF_PACLEN;
 	dev->addr_len        = AX25_ADDR_LEN;
diff --git a/drivers/net/hamradio/dmascc.c b/drivers/net/hamradio/dmascc.c
index abab7be77406..c3d377770616 100644
--- a/drivers/net/hamradio/dmascc.c
+++ b/drivers/net/hamradio/dmascc.c
@@ -433,7 +433,6 @@ module_exit(dmascc_exit);
 static void __init dev_setup(struct net_device *dev)
 {
 	dev->type = ARPHRD_AX25;
-	dev->neigh_priv_len = sizeof(struct ax25_neigh_priv);
 	dev->hard_header_len = AX25_MAX_HEADER_LEN;
 	dev->mtu = 1500;
 	dev->addr_len = AX25_ADDR_LEN;
@@ -448,7 +447,6 @@ static const struct net_device_ops scc_netdev_ops = {
 	.ndo_start_xmit = scc_send_packet,
 	.ndo_do_ioctl = scc_ioctl,
 	.ndo_set_mac_address = scc_set_mac_address,
-	.ndo_neigh_construct = ax25_neigh_construct,
 };
 
 static int __init setup_adapter(int card_base, int type, int n)
@@ -922,6 +920,9 @@ static int scc_send_packet(struct sk_buff *skb, struct net_device *dev)
 	unsigned long flags;
 	int i;
 
+	if (skb->protocol == htons(ETH_P_IP))
+		return ax25_ip_xmit(skb);
+
 	/* Temporarily stop the scheduler feeding us packets */
 	netif_stop_queue(dev);
 
diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c
index 435868a7b69c..49fe59b180a8 100644
--- a/drivers/net/hamradio/hdlcdrv.c
+++ b/drivers/net/hamradio/hdlcdrv.c
@@ -404,6 +404,9 @@ static netdev_tx_t hdlcdrv_send_packet(struct sk_buff *skb,
 {
 	struct hdlcdrv_state *sm = netdev_priv(dev);
 
+	if (skb->protocol == htons(ETH_P_IP))
+		return ax25_ip_xmit(skb);
+
 	if (skb->data[0] != 0) {
 		do_kiss_params(sm, skb->data, skb->len);
 		dev_kfree_skb(skb);
@@ -626,7 +629,6 @@ static const struct net_device_ops hdlcdrv_netdev = {
 	.ndo_start_xmit = hdlcdrv_send_packet,
 	.ndo_do_ioctl	= hdlcdrv_ioctl,
 	.ndo_set_mac_address = hdlcdrv_set_mac_address,
-	.ndo_neigh_construct = ax25_neigh_construct,
 };
 
 /*
@@ -677,7 +679,6 @@ static void hdlcdrv_setup(struct net_device *dev)
 	dev->header_ops = &ax25_header_ops;
 	
 	dev->type = ARPHRD_AX25;           /* AF_AX25 device */
-	dev->neigh_priv_len = sizeof(struct ax25_neigh_priv);
 	dev->hard_header_len = AX25_MAX_HEADER_LEN + AX25_BPQ_HEADER_LEN;
 	dev->mtu = AX25_DEF_PACLEN;        /* eth_mtu is the default */
 	dev->addr_len = AX25_ADDR_LEN;     /* sizeof an ax.25 address */
diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
index c12ec2c2b594..17058c490b79 100644
--- a/drivers/net/hamradio/mkiss.c
+++ b/drivers/net/hamradio/mkiss.c
@@ -529,6 +529,9 @@ static netdev_tx_t ax_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct mkiss *ax = netdev_priv(dev);
 
+	if (skb->protocol == htons(ETH_P_IP))
+		return ax25_ip_xmit(skb);
+
 	if (!netif_running(dev))  {
 		printk(KERN_ERR "mkiss: %s: xmit call when iface is down\n", dev->name);
 		return NETDEV_TX_BUSY;
@@ -641,7 +644,6 @@ static const struct net_device_ops ax_netdev_ops = {
 	.ndo_stop            = ax_close,
 	.ndo_start_xmit	     = ax_xmit,
 	.ndo_set_mac_address = ax_set_mac_address,
-	.ndo_neigh_construct = ax25_neigh_construct,
 };
 
 static void ax_setup(struct net_device *dev)
@@ -651,7 +653,6 @@ static void ax_setup(struct net_device *dev)
 	dev->hard_header_len = 0;
 	dev->addr_len        = 0;
 	dev->type            = ARPHRD_AX25;
-	dev->neigh_priv_len  = sizeof(struct ax25_neigh_priv);
 	dev->tx_queue_len    = 10;
 	dev->header_ops      = &ax25_header_ops;
 	dev->netdev_ops	     = &ax_netdev_ops;
diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c
index b305f51eb420..ce88df33fe17 100644
--- a/drivers/net/hamradio/scc.c
+++ b/drivers/net/hamradio/scc.c
@@ -1550,7 +1550,6 @@ static const struct net_device_ops scc_netdev_ops = {
 	.ndo_set_mac_address = scc_net_set_mac_address,
 	.ndo_get_stats       = scc_net_get_stats,
 	.ndo_do_ioctl        = scc_net_ioctl,
-	.ndo_neigh_construct = ax25_neigh_construct,
 };
 
 /* ----> Initialize device <----- */
@@ -1568,7 +1567,6 @@ static void scc_net_setup(struct net_device *dev)
 	dev->flags      = 0;
 
 	dev->type = ARPHRD_AX25;
-	dev->neigh_priv_len = sizeof(struct ax25_neigh_priv);
 	dev->hard_header_len = AX25_MAX_HEADER_LEN + AX25_BPQ_HEADER_LEN;
 	dev->mtu = AX25_DEF_PACLEN;
 	dev->addr_len = AX25_ADDR_LEN;
@@ -1641,6 +1639,9 @@ static netdev_tx_t scc_net_tx(struct sk_buff *skb, struct net_device *dev)
 	unsigned long flags;
 	char kisscmd;
 
+	if (skb->protocol == htons(ETH_P_IP))
+		return ax25_ip_xmit(skb);
+
 	if (skb->len > scc->stat.bufsize || skb->len < 2) {
 		scc->dev_stat.tx_dropped++;	/* bogus frame */
 		dev_kfree_skb(skb);
diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c
index 89d9da7a0c51..1a4729c36aa4 100644
--- a/drivers/net/hamradio/yam.c
+++ b/drivers/net/hamradio/yam.c
@@ -597,6 +597,9 @@ static netdev_tx_t yam_send_packet(struct sk_buff *skb,
 {
 	struct yam_port *yp = netdev_priv(dev);
 
+	if (skb->protocol == htons(ETH_P_IP))
+		return ax25_ip_xmit(skb);
+
 	skb_queue_tail(&yp->send_queue, skb);
 	dev->trans_start = jiffies;
 	return NETDEV_TX_OK;
@@ -1100,7 +1103,6 @@ static const struct net_device_ops yam_netdev_ops = {
 	.ndo_start_xmit      = yam_send_packet,
 	.ndo_do_ioctl 	     = yam_ioctl,
 	.ndo_set_mac_address = yam_set_mac_address,
-	.ndo_neigh_construct = ax25_neigh_construct,
 };
 
 static void yam_setup(struct net_device *dev)
@@ -1129,7 +1131,6 @@ static void yam_setup(struct net_device *dev)
 	dev->header_ops = &ax25_header_ops;
 
 	dev->type = ARPHRD_AX25;
-	dev->neigh_priv_len = sizeof(struct ax25_neigh_priv);
 	dev->hard_header_len = AX25_MAX_HEADER_LEN;
 	dev->mtu = AX25_MTU;
 	dev->addr_len = AX25_ADDR_LEN;
diff --git a/include/net/ax25.h b/include/net/ax25.h
index 45feeba7a325..16a923a3a43a 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -367,11 +367,8 @@ int ax25_kiss_rcv(struct sk_buff *, struct net_device *, struct packet_type *,
 		  struct net_device *);
 
 /* ax25_ip.c */
-int ax25_neigh_construct(struct neighbour *neigh);
+netdev_tx_t ax25_ip_xmit(struct sk_buff *skb);
 extern const struct header_ops ax25_header_ops;
-struct ax25_neigh_priv {
-	struct neigh_ops ops;
-};
 
 /* ax25_out.c */
 ax25_cb *ax25_send_frame(struct sk_buff *, int, ax25_address *, ax25_address *,
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index e030c64ebfb7..8b35af4ef93e 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -100,7 +100,7 @@ static int ax25_hard_header(struct sk_buff *skb, struct net_device *dev,
 	return -AX25_HEADER_LEN;	/* Unfinished header */
 }
 
-static int ax25_neigh_xmit(struct sk_buff *skb)
+netdev_tx_t ax25_ip_xmit(struct sk_buff *skb)
 {
 	struct sk_buff *ourskb;
 	unsigned char *bp  = skb->data;
@@ -210,56 +210,7 @@ put:
 	if (route)
 		ax25_put_route(route);
 
-	return 1;
-}
-
-static int ax25_neigh_output(struct neighbour *neigh, struct sk_buff *skb)
-{
-	/* Except for calling ax25_neigh_xmit instead of
-	 * dev_queue_xmit this is neigh_resolve_output.
-	 */
-	int rc = 0;
-
-	if (!neigh_event_send(neigh, skb)) {
-		int err;
-		struct net_device *dev = neigh->dev;
-		unsigned int seq;
-
-		do {
-			__skb_pull(skb, skb_network_offset(skb));
-			seq = read_seqbegin(&neigh->ha_lock);
-			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
-					      neigh->ha, NULL, skb->len);
-		} while (read_seqretry(&neigh->ha_lock, seq));
-
-		if (err >= 0) {
-			ax25_neigh_xmit(skb);
-		} else
-			goto out_kfree_skb;
-	}
-out:
-	return rc;
-
-out_kfree_skb:
-	rc = -EINVAL;
-	kfree_skb(skb);
-	goto out;
-}
-
-int ax25_neigh_construct(struct neighbour *neigh)
-{
-	/* This trouble could be saved if ax25 would right a proper
-	 * dev_queue_xmit function.
-	 */
-	struct ax25_neigh_priv *priv = neighbour_priv(neigh);
-
-	if (neigh->tbl->family != AF_INET)
-		return -EINVAL;
-
-	priv->ops = *neigh->ops;
-	priv->ops.output = ax25_neigh_output;
-	priv->ops.connected_output = ax25_neigh_output;
-	return 0;
+	return NETDEV_TX_OK;
 }
 
 #else	/* INET */
@@ -271,9 +222,10 @@ static int ax25_hard_header(struct sk_buff *skb, struct net_device *dev,
 	return -AX25_HEADER_LEN;
 }
 
-int ax25_neigh_construct(struct neighbour *neigh)
+netdev_tx_t ax25_ip_xmit(sturct sk_buff *skb)
 {
-	return 0;
+	kfree_skb(skb);
+	return NETDEV_TX_OK;
 }
 #endif
 
@@ -282,5 +234,5 @@ const struct header_ops ax25_header_ops = {
 };
 
 EXPORT_SYMBOL(ax25_header_ops);
-EXPORT_SYMBOL(ax25_neigh_construct);
+EXPORT_SYMBOL(ax25_ip_xmit);
 
-- 
cgit v1.2.3


From 60395a20ffd74166ea373ea91418d6f98fa7fdfb Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 3 Mar 2015 17:10:44 -0600
Subject: neigh: Factor out ___neigh_lookup_noref

While looking at the mpls code I found myself writing yet another
version of neigh_lookup_noref.  We currently have __ipv4_lookup_noref
and __ipv6_lookup_noref.

So to make my work a little easier and to make it a smidge easier to
verify/maintain the mpls code in the future I stopped and wrote
___neigh_lookup_noref.  Then I rewote __ipv4_lookup_noref and
__ipv6_lookup_noref in terms of this new function.  I tested my new
version by verifying that the same code is generated in
ip_finish_output2 and ip6_finish_output2 where these functions are
inlined.

To get to ___neigh_lookup_noref I added a new neighbour cache table
function key_eq.  So that the static size of the key would be
available.

I also added __neigh_lookup_noref for people who want to to lookup
a neighbour table entry quickly but don't know which neibhgour table
they are going to look up.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/arp.h       | 19 ++++--------------
 include/net/ndisc.h     | 19 +-----------------
 include/net/neighbour.h | 52 +++++++++++++++++++++++++++++++++++++++++++++++++
 net/core/neighbour.c    | 20 +++++--------------
 net/decnet/dn_neigh.c   |  6 ++++++
 net/ipv4/arp.c          |  9 ++++++++-
 net/ipv6/ndisc.c        |  7 +++++++
 7 files changed, 83 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/include/net/arp.h b/include/net/arp.h
index 21ee1860abbc..5e0f891d476c 100644
--- a/include/net/arp.h
+++ b/include/net/arp.h
@@ -9,28 +9,17 @@
 
 extern struct neigh_table arp_tbl;
 
-static inline u32 arp_hashfn(u32 key, const struct net_device *dev, u32 hash_rnd)
+static inline u32 arp_hashfn(const void *pkey, const struct net_device *dev, u32 *hash_rnd)
 {
+	u32 key = *(const u32 *)pkey;
 	u32 val = key ^ hash32_ptr(dev);
 
-	return val * hash_rnd;
+	return val * hash_rnd[0];
 }
 
 static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key)
 {
-	struct neigh_hash_table *nht = rcu_dereference_bh(arp_tbl.nht);
-	struct neighbour *n;
-	u32 hash_val;
-
-	hash_val = arp_hashfn(key, dev, nht->hash_rnd[0]) >> (32 - nht->hash_shift);
-	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
-	     n != NULL;
-	     n = rcu_dereference_bh(n->next)) {
-		if (n->dev == dev && *(u32 *)n->primary_key == key)
-			return n;
-	}
-
-	return NULL;
+	return ___neigh_lookup_noref(&arp_tbl, neigh_key_eq32, arp_hashfn, &key, dev);
 }
 
 static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32 key)
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index 6bbda34d5e59..b3a7751251b4 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -156,24 +156,7 @@ static inline u32 ndisc_hashfn(const void *pkey, const struct net_device *dev, _
 
 static inline struct neighbour *__ipv6_neigh_lookup_noref(struct net_device *dev, const void *pkey)
 {
-	struct neigh_hash_table *nht;
-	const u32 *p32 = pkey;
-	struct neighbour *n;
-	u32 hash_val;
-
-	nht = rcu_dereference_bh(nd_tbl.nht);
-	hash_val = ndisc_hashfn(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
-	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
-	     n != NULL;
-	     n = rcu_dereference_bh(n->next)) {
-		u32 *n32 = (u32 *) n->primary_key;
-		if (n->dev == dev &&
-		    ((n32[0] ^ p32[0]) | (n32[1] ^ p32[1]) |
-		     (n32[2] ^ p32[2]) | (n32[3] ^ p32[3])) == 0)
-			return n;
-	}
-
-	return NULL;
+	return ___neigh_lookup_noref(&nd_tbl, neigh_key_eq128, ndisc_hashfn, pkey, dev);
 }
 
 static inline struct neighbour *__ipv6_neigh_lookup(struct net_device *dev, const void *pkey)
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 9f912e4d4232..14e3f017966b 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -197,6 +197,7 @@ struct neigh_table {
 	__u32			(*hash)(const void *pkey,
 					const struct net_device *dev,
 					__u32 *hash_rnd);
+	bool			(*key_eq)(const struct neighbour *, const void *pkey);
 	int			(*constructor)(struct neighbour *);
 	int			(*pconstructor)(struct pneigh_entry *);
 	void			(*pdestructor)(struct pneigh_entry *);
@@ -247,6 +248,57 @@ static inline void *neighbour_priv(const struct neighbour *n)
 #define NEIGH_UPDATE_F_ISROUTER			0x40000000
 #define NEIGH_UPDATE_F_ADMIN			0x80000000
 
+
+static inline bool neigh_key_eq16(const struct neighbour *n, const void *pkey)
+{
+	return *(const u16 *)n->primary_key == *(const u16 *)pkey;
+}
+
+static inline bool neigh_key_eq32(const struct neighbour *n, const void *pkey)
+{
+	return *(const u32 *)n->primary_key == *(const u32 *)pkey;
+}
+
+static inline bool neigh_key_eq128(const struct neighbour *n, const void *pkey)
+{
+	const u32 *n32 = (const u32 *)n->primary_key;
+	const u32 *p32 = pkey;
+
+	return ((n32[0] ^ p32[0]) | (n32[1] ^ p32[1]) |
+		(n32[2] ^ p32[2]) | (n32[3] ^ p32[3])) == 0;
+}
+
+static inline struct neighbour *___neigh_lookup_noref(
+	struct neigh_table *tbl,
+	bool (*key_eq)(const struct neighbour *n, const void *pkey),
+	__u32 (*hash)(const void *pkey,
+		      const struct net_device *dev,
+		      __u32 *hash_rnd),
+	const void *pkey,
+	struct net_device *dev)
+{
+	struct neigh_hash_table *nht = rcu_dereference_bh(tbl->nht);
+	struct neighbour *n;
+	u32 hash_val;
+
+	hash_val = hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
+	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
+	     n != NULL;
+	     n = rcu_dereference_bh(n->next)) {
+		if (n->dev == dev && key_eq(n, pkey))
+			return n;
+	}
+
+	return NULL;
+}
+
+static inline struct neighbour *__neigh_lookup_noref(struct neigh_table *tbl,
+						     const void *pkey,
+						     struct net_device *dev)
+{
+	return ___neigh_lookup_noref(tbl, tbl->key_eq, tbl->hash, pkey, dev);
+}
+
 void neigh_table_init(int index, struct neigh_table *tbl);
 int neigh_table_clear(int index, struct neigh_table *tbl);
 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 0f48ea3affed..fe3c6eac5805 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -397,25 +397,15 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
 			       struct net_device *dev)
 {
 	struct neighbour *n;
-	int key_len = tbl->key_len;
-	u32 hash_val;
-	struct neigh_hash_table *nht;
 
 	NEIGH_CACHE_STAT_INC(tbl, lookups);
 
 	rcu_read_lock_bh();
-	nht = rcu_dereference_bh(tbl->nht);
-	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
-
-	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
-	     n != NULL;
-	     n = rcu_dereference_bh(n->next)) {
-		if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
-			if (!atomic_inc_not_zero(&n->refcnt))
-				n = NULL;
-			NEIGH_CACHE_STAT_INC(tbl, hits);
-			break;
-		}
+	n = __neigh_lookup_noref(tbl, pkey, dev);
+	if (n) {
+		if (!atomic_inc_not_zero(&n->refcnt))
+			n = NULL;
+		NEIGH_CACHE_STAT_INC(tbl, hits);
 	}
 
 	rcu_read_unlock_bh();
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index f123c6c6748c..ee7d1cef0027 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -93,12 +93,18 @@ static u32 dn_neigh_hash(const void *pkey,
 	return jhash_2words(*(__u16 *)pkey, 0, hash_rnd[0]);
 }
 
+static bool dn_key_eq(const struct neighbour *neigh, const void *pkey)
+{
+	return neigh_key_eq16(neigh, pkey);
+}
+
 struct neigh_table dn_neigh_table = {
 	.family =			PF_DECnet,
 	.entry_size =			NEIGH_ENTRY_SIZE(sizeof(struct dn_neigh)),
 	.key_len =			sizeof(__le16),
 	.protocol =			cpu_to_be16(ETH_P_DNA_RT),
 	.hash =				dn_neigh_hash,
+	.key_eq =			dn_key_eq,
 	.constructor =			dn_neigh_construct,
 	.id =				"dn_neigh_cache",
 	.parms ={
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 6b8aad6a0d7d..5f5c674e130a 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -122,6 +122,7 @@
  *	Interface to generic neighbour cache.
  */
 static u32 arp_hash(const void *pkey, const struct net_device *dev, __u32 *hash_rnd);
+static bool arp_key_eq(const struct neighbour *n, const void *pkey);
 static int arp_constructor(struct neighbour *neigh);
 static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb);
 static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb);
@@ -154,6 +155,7 @@ struct neigh_table arp_tbl = {
 	.key_len	= 4,
 	.protocol	= cpu_to_be16(ETH_P_IP),
 	.hash		= arp_hash,
+	.key_eq		= arp_key_eq,
 	.constructor	= arp_constructor,
 	.proxy_redo	= parp_redo,
 	.id		= "arp_cache",
@@ -209,7 +211,12 @@ static u32 arp_hash(const void *pkey,
 		    const struct net_device *dev,
 		    __u32 *hash_rnd)
 {
-	return arp_hashfn(*(u32 *)pkey, dev, *hash_rnd);
+	return arp_hashfn(pkey, dev, hash_rnd);
+}
+
+static bool arp_key_eq(const struct neighbour *neigh, const void *pkey)
+{
+	return neigh_key_eq32(neigh, pkey);
 }
 
 static int arp_constructor(struct neighbour *neigh)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index e363bbc2420d..247ad7c298f7 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -84,6 +84,7 @@ do {								\
 static u32 ndisc_hash(const void *pkey,
 		      const struct net_device *dev,
 		      __u32 *hash_rnd);
+static bool ndisc_key_eq(const struct neighbour *neigh, const void *pkey);
 static int ndisc_constructor(struct neighbour *neigh);
 static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
 static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
@@ -119,6 +120,7 @@ struct neigh_table nd_tbl = {
 	.key_len =	sizeof(struct in6_addr),
 	.protocol =	cpu_to_be16(ETH_P_IPV6),
 	.hash =		ndisc_hash,
+	.key_eq =	ndisc_key_eq,
 	.constructor =	ndisc_constructor,
 	.pconstructor =	pndisc_constructor,
 	.pdestructor =	pndisc_destructor,
@@ -295,6 +297,11 @@ static u32 ndisc_hash(const void *pkey,
 	return ndisc_hashfn(pkey, dev, hash_rnd);
 }
 
+static bool ndisc_key_eq(const struct neighbour *n, const void *pkey)
+{
+	return neigh_key_eq128(n, pkey);
+}
+
 static int ndisc_constructor(struct neighbour *neigh)
 {
 	struct in6_addr *addr = (struct in6_addr *)&neigh->primary_key;
-- 
cgit v1.2.3


From 4fd3d7d9e868ffbdb0e7a67c5c8e9dfdcd846a62 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 3 Mar 2015 17:11:16 -0600
Subject: neigh: Add helper function neigh_xmit

For MPLS I am building the code so that either the neighbour mac
address can be specified or we can have a next hop in ipv4 or ipv6.

The kind of next hop we have is indicated by the neighbour table
pointer.  A neighbour table pointer of NULL is a link layer address.
A non-NULL neighbour table pointer indicates which neighbour table and
thus which address family the next hop address is in that we need to
look up.

The code either sends a packet directly or looks up the appropriate
neighbour table entry and sends the packet.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/neighbour.h |  3 +++
 net/core/neighbour.c    | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+)

(limited to 'include')

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 14e3f017966b..afb8237b0a8c 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -358,6 +358,7 @@ void neigh_for_each(struct neigh_table *tbl,
 		    void (*cb)(struct neighbour *, void *), void *cookie);
 void __neigh_for_each_release(struct neigh_table *tbl,
 			      int (*cb)(struct neighbour *));
+int neigh_xmit(int fam, struct net_device *, const void *, struct sk_buff *);
 void pneigh_for_each(struct neigh_table *tbl,
 		     void (*cb)(struct pneigh_entry *));
 
@@ -511,4 +512,6 @@ static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n,
 		memcpy(dst, n->ha, dev->addr_len);
 	} while (read_seqretry(&n->ha_lock, seq));
 }
+
+
 #endif
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index fe3c6eac5805..cffaf00561e7 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2391,6 +2391,40 @@ void __neigh_for_each_release(struct neigh_table *tbl,
 }
 EXPORT_SYMBOL(__neigh_for_each_release);
 
+int neigh_xmit(int family, struct net_device *dev,
+	       const void *addr, struct sk_buff *skb)
+{
+	int err;
+	if (family == AF_PACKET) {
+		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
+				      addr, NULL, skb->len);
+		if (err < 0)
+			goto out_kfree_skb;
+		err = dev_queue_xmit(skb);
+	} else {
+		struct neigh_table *tbl;
+		struct neighbour *neigh;
+
+		err = -ENETDOWN;
+		tbl = neigh_find_table(family);
+		if (!tbl)
+			goto out;
+		neigh = __neigh_lookup_noref(tbl, addr, dev);
+		if (!neigh)
+			neigh = __neigh_create(tbl, addr, dev, false);
+		err = PTR_ERR(neigh);
+		if (IS_ERR(neigh))
+			goto out_kfree_skb;
+		err = neigh->output(neigh, skb);
+	}
+out:
+	return err;
+out_kfree_skb:
+	kfree_skb(skb);
+	goto out;
+}
+EXPORT_SYMBOL(neigh_xmit);
+
 #ifdef CONFIG_PROC_FS
 
 static struct neighbour *neigh_get_first(struct seq_file *seq)
-- 
cgit v1.2.3


From 0189197f441602acdca3f97750d392a895b778fd Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 3 Mar 2015 19:10:47 -0600
Subject: mpls: Basic routing support

This change adds a new Kconfig option MPLS_ROUTING.

The core of this change is the code to look at an mpls packet received
from another machine.  Look that packet up in a routing table and
forward the packet on.

Support of MPLS over ATM is not considered or attempted here.  This
implemntation follows RFC3032 and implements the MPLS shim header that
can pass over essentially any network.

What RFC3021 refers to as the as the Incoming Label Map (ILM) I call
net->mpls.platform_label[].  What RFC3031 refers to as the Next Label
Hop Forwarding Entry (NHLFE) I call mpls_route.  Though calling it the
label fordwarding information base (lfib) might also be valid.

Further the implemntation forwards packets as described in RFC3032.
There is no need and given the original motivation for MPLS a strong
discincentive to have a flexible label forwarding path.  In essence
the logic is the topmost label is read, looked up, removed, and
replaced by 0 or more new lables and the sent out the specified
interface to it's next hop.

Quite a few optional features are not implemented here.  Among them
are generation of ICMP errors when the TTL is exceeded or the packet
is larger than the next hop MTU (those conditions are detected and the
packets are dropped instead of generating an icmp error).  The traffic
class field is always set to 0.  The implementation focuses on IP over
MPLS and does not handle egress of other kinds of protocols.

Instead of implementing coordination with the neighbour table and
sorting out how to input next hops in a different address family (for
which there is value).  I was lazy and implemented a next hop mac
address instead.  The code is simpler and there are flavor of MPLS
such as MPLS-TP where neither an IPv4 nor an IPv6 next hop is
appropriate so a next hop by mac address would need to be implemented
at some point.

Two new definitions AF_MPLS and PF_MPLS are exposed to userspace.

Decoding the mpls header must be done by first byeswapping a 32bit bit
endian word into the local cpu endian and then bit shifting to extract
the pieces.  There is no C bit-field that can represent a wire format
mpls header on a little endian machine as the low bits of the 20bit
label wind up in the wrong half of third byte.  Therefore internally
everything is deal with in cpu native byte order except when writing
to and reading from a packet.

For management simplicity if a label is configured to forward out
an interface that is down the packet is dropped early.  Similarly
if an network interface is removed rt_dev is updated to NULL
(so no reference is preserved) and any packets for that label
are dropped.  Keeping the label entries in the kernel allows
the kernel label table to function as the definitive source
of which labels are allocated and which are not.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h      |   2 +
 include/net/net_namespace.h |   4 +
 include/net/netns/mpls.h    |  15 ++
 net/mpls/Kconfig            |   5 +
 net/mpls/Makefile           |   1 +
 net/mpls/af_mpls.c          | 349 ++++++++++++++++++++++++++++++++++++++++++++
 net/mpls/internal.h         |  56 +++++++
 7 files changed, 432 insertions(+)
 create mode 100644 include/net/netns/mpls.h
 create mode 100644 net/mpls/af_mpls.c
 create mode 100644 net/mpls/internal.h

(limited to 'include')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 5c19cba34dce..fab4d0ddf4ed 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -181,6 +181,7 @@ struct ucred {
 #define AF_WANPIPE	25	/* Wanpipe API Sockets */
 #define AF_LLC		26	/* Linux LLC			*/
 #define AF_IB		27	/* Native InfiniBand address	*/
+#define AF_MPLS		28	/* MPLS */
 #define AF_CAN		29	/* Controller Area Network      */
 #define AF_TIPC		30	/* TIPC sockets			*/
 #define AF_BLUETOOTH	31	/* Bluetooth sockets 		*/
@@ -226,6 +227,7 @@ struct ucred {
 #define PF_WANPIPE	AF_WANPIPE
 #define PF_LLC		AF_LLC
 #define PF_IB		AF_IB
+#define PF_MPLS		AF_MPLS
 #define PF_CAN		AF_CAN
 #define PF_TIPC		AF_TIPC
 #define PF_BLUETOOTH	AF_BLUETOOTH
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 36faf4990c4b..2cb9acb618e9 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -26,6 +26,7 @@
 #endif
 #include <net/netns/nftables.h>
 #include <net/netns/xfrm.h>
+#include <net/netns/mpls.h>
 #include <linux/ns_common.h>
 
 struct user_namespace;
@@ -129,6 +130,9 @@ struct net {
 #endif
 #if IS_ENABLED(CONFIG_IP_VS)
 	struct netns_ipvs	*ipvs;
+#endif
+#if IS_ENABLED(CONFIG_MPLS)
+	struct netns_mpls	mpls;
 #endif
 	struct sock		*diag_nlsk;
 	atomic_t		fnhe_genid;
diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h
new file mode 100644
index 000000000000..f90aaf8d4f89
--- /dev/null
+++ b/include/net/netns/mpls.h
@@ -0,0 +1,15 @@
+/*
+ * mpls in net namespaces
+ */
+
+#ifndef __NETNS_MPLS_H__
+#define __NETNS_MPLS_H__
+
+struct mpls_route;
+
+struct netns_mpls {
+	size_t platform_labels;
+	struct mpls_route __rcu * __rcu *platform_label;
+};
+
+#endif /* __NETNS_MPLS_H__ */
diff --git a/net/mpls/Kconfig b/net/mpls/Kconfig
index a77fbcdd04ee..f4286ee7e2b0 100644
--- a/net/mpls/Kconfig
+++ b/net/mpls/Kconfig
@@ -22,4 +22,9 @@ config NET_MPLS_GSO
 	 that have had MPLS stack entries pushed onto them and thus
 	 become MPLS GSO packets.
 
+config MPLS_ROUTING
+	bool "MPLS: routing support"
+	help
+	 Add support for forwarding of mpls packets.
+
 endif # MPLS
diff --git a/net/mpls/Makefile b/net/mpls/Makefile
index 6dec088c2d0f..60af15f1960e 100644
--- a/net/mpls/Makefile
+++ b/net/mpls/Makefile
@@ -2,3 +2,4 @@
 # Makefile for MPLS.
 #
 obj-$(CONFIG_NET_MPLS_GSO) += mpls_gso.o
+obj-$(CONFIG_MPLS_ROUTING) += af_mpls.o
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
new file mode 100644
index 000000000000..924377736b2a
--- /dev/null
+++ b/net/mpls/af_mpls.c
@@ -0,0 +1,349 @@
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/net.h>
+#include <linux/module.h>
+#include <linux/if_arp.h>
+#include <linux/ipv6.h>
+#include <linux/mpls.h>
+#include <net/ip.h>
+#include <net/dst.h>
+#include <net/sock.h>
+#include <net/arp.h>
+#include <net/ip_fib.h>
+#include <net/netevent.h>
+#include <net/netns/generic.h>
+#include "internal.h"
+
+#define MAX_NEW_LABELS 2
+
+/* This maximum ha length copied from the definition of struct neighbour */
+#define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, sizeof(unsigned long)))
+
+struct mpls_route { /* next hop label forwarding entry */
+	struct net_device 	*rt_dev;
+	struct rcu_head		rt_rcu;
+	u32			rt_label[MAX_NEW_LABELS];
+	u8			rt_protocol; /* routing protocol that set this entry */
+	u8			rt_labels:2,
+				rt_via_alen:6;
+	unsigned short		rt_via_family;
+	u8			rt_via[0];
+};
+
+static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index)
+{
+	struct mpls_route *rt = NULL;
+
+	if (index < net->mpls.platform_labels) {
+		struct mpls_route __rcu **platform_label =
+			rcu_dereference(net->mpls.platform_label);
+		rt = rcu_dereference(platform_label[index]);
+	}
+	return rt;
+}
+
+static bool mpls_output_possible(const struct net_device *dev)
+{
+	return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev);
+}
+
+static unsigned int mpls_rt_header_size(const struct mpls_route *rt)
+{
+	/* The size of the layer 2.5 labels to be added for this route */
+	return rt->rt_labels * sizeof(struct mpls_shim_hdr);
+}
+
+static unsigned int mpls_dev_mtu(const struct net_device *dev)
+{
+	/* The amount of data the layer 2 frame can hold */
+	return dev->mtu;
+}
+
+static bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
+{
+	if (skb->len <= mtu)
+		return false;
+
+	if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
+		return false;
+
+	return true;
+}
+
+static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
+			struct mpls_entry_decoded dec)
+{
+	/* RFC4385 and RFC5586 encode other packets in mpls such that
+	 * they don't conflict with the ip version number, making
+	 * decoding by examining the ip version correct in everything
+	 * except for the strangest cases.
+	 *
+	 * The strange cases if we choose to support them will require
+	 * manual configuration.
+	 */
+	struct iphdr *hdr4 = ip_hdr(skb);
+	bool success = true;
+
+	if (hdr4->version == 4) {
+		skb->protocol = htons(ETH_P_IP);
+		csum_replace2(&hdr4->check,
+			      htons(hdr4->ttl << 8),
+			      htons(dec.ttl << 8));
+		hdr4->ttl = dec.ttl;
+	}
+	else if (hdr4->version == 6) {
+		struct ipv6hdr *hdr6 = ipv6_hdr(skb);
+		skb->protocol = htons(ETH_P_IPV6);
+		hdr6->hop_limit = dec.ttl;
+	}
+	else
+		/* version 0 and version 1 are used by pseudo wires */
+		success = false;
+	return success;
+}
+
+static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
+			struct packet_type *pt, struct net_device *orig_dev)
+{
+	struct net *net = dev_net(dev);
+	struct mpls_shim_hdr *hdr;
+	struct mpls_route *rt;
+	struct mpls_entry_decoded dec;
+	struct net_device *out_dev;
+	unsigned int hh_len;
+	unsigned int new_header_size;
+	unsigned int mtu;
+	int err;
+
+	/* Careful this entire function runs inside of an rcu critical section */
+
+	if (skb->pkt_type != PACKET_HOST)
+		goto drop;
+
+	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
+		goto drop;
+
+	if (!pskb_may_pull(skb, sizeof(*hdr)))
+		goto drop;
+
+	/* Read and decode the label */
+	hdr = mpls_hdr(skb);
+	dec = mpls_entry_decode(hdr);
+
+	/* Pop the label */
+	skb_pull(skb, sizeof(*hdr));
+	skb_reset_network_header(skb);
+
+	skb_orphan(skb);
+
+	rt = mpls_route_input_rcu(net, dec.label);
+	if (!rt)
+		goto drop;
+
+	/* Find the output device */
+	out_dev = rt->rt_dev;
+	if (!mpls_output_possible(out_dev))
+		goto drop;
+
+	if (skb_warn_if_lro(skb))
+		goto drop;
+
+	skb_forward_csum(skb);
+
+	/* Verify ttl is valid */
+	if (dec.ttl <= 2)
+		goto drop;
+	dec.ttl -= 1;
+
+	/* Verify the destination can hold the packet */
+	new_header_size = mpls_rt_header_size(rt);
+	mtu = mpls_dev_mtu(out_dev);
+	if (mpls_pkt_too_big(skb, mtu - new_header_size))
+		goto drop;
+
+	hh_len = LL_RESERVED_SPACE(out_dev);
+	if (!out_dev->header_ops)
+		hh_len = 0;
+
+	/* Ensure there is enough space for the headers in the skb */
+	if (skb_cow(skb, hh_len + new_header_size))
+		goto drop;
+
+	skb->dev = out_dev;
+	skb->protocol = htons(ETH_P_MPLS_UC);
+
+	if (unlikely(!new_header_size && dec.bos)) {
+		/* Penultimate hop popping */
+		if (!mpls_egress(rt, skb, dec))
+			goto drop;
+	} else {
+		bool bos;
+		int i;
+		skb_push(skb, new_header_size);
+		skb_reset_network_header(skb);
+		/* Push the new labels */
+		hdr = mpls_hdr(skb);
+		bos = dec.bos;
+		for (i = rt->rt_labels - 1; i >= 0; i--) {
+			hdr[i] = mpls_entry_encode(rt->rt_label[i], dec.ttl, 0, bos);
+			bos = false;
+		}
+	}
+
+	err = neigh_xmit(rt->rt_via_family, out_dev, rt->rt_via, skb);
+	if (err)
+		net_dbg_ratelimited("%s: packet transmission failed: %d\n",
+				    __func__, err);
+	return 0;
+
+drop:
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
+
+static struct packet_type mpls_packet_type __read_mostly = {
+	.type = cpu_to_be16(ETH_P_MPLS_UC),
+	.func = mpls_forward,
+};
+
+static struct mpls_route *mpls_rt_alloc(size_t alen)
+{
+	struct mpls_route *rt;
+
+	rt = kzalloc(GFP_KERNEL, sizeof(*rt) + alen);
+	if (rt)
+		rt->rt_via_alen = alen;
+	return rt;
+}
+
+static void mpls_rt_free(struct mpls_route *rt)
+{
+	if (rt)
+		kfree_rcu(rt, rt_rcu);
+}
+
+static void mpls_route_update(struct net *net, unsigned index,
+			      struct net_device *dev, struct mpls_route *new,
+			      const struct nl_info *info)
+{
+	struct mpls_route *rt, *old = NULL;
+
+	ASSERT_RTNL();
+
+	rt = net->mpls.platform_label[index];
+	if (!dev || (rt && (rt->rt_dev == dev))) {
+		rcu_assign_pointer(net->mpls.platform_label[index], new);
+		old = rt;
+	}
+
+	/* If we removed a route free it now */
+	mpls_rt_free(old);
+}
+
+static void mpls_ifdown(struct net_device *dev)
+{
+	struct net *net = dev_net(dev);
+	unsigned index;
+
+	for (index = 0; index < net->mpls.platform_labels; index++) {
+		struct mpls_route *rt = net->mpls.platform_label[index];
+		if (!rt)
+			continue;
+		if (rt->rt_dev != dev)
+			continue;
+		rt->rt_dev = NULL;
+	}
+}
+
+static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
+			   void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+	switch(event) {
+	case NETDEV_UNREGISTER:
+		mpls_ifdown(dev);
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block mpls_dev_notifier = {
+	.notifier_call = mpls_dev_notify,
+};
+
+static int mpls_net_init(struct net *net)
+{
+	net->mpls.platform_labels = 0;
+	net->mpls.platform_label = NULL;
+
+	return 0;
+}
+
+static void mpls_net_exit(struct net *net)
+{
+	unsigned int index;
+
+	/* An rcu grace period haselapsed since there was a device in
+	 * the network namespace (and thus the last in fqlight packet)
+	 * left this network namespace.  This is because
+	 * unregister_netdevice_many and netdev_run_todo has completed
+	 * for each network device that was in this network namespace.
+	 *
+	 * As such no additional rcu synchronization is necessary when
+	 * freeing the platform_label table.
+	 */
+	rtnl_lock();
+	for (index = 0; index < net->mpls.platform_labels; index++) {
+		struct mpls_route *rt = net->mpls.platform_label[index];
+		rcu_assign_pointer(net->mpls.platform_label[index], NULL);
+		mpls_rt_free(rt);
+	}
+	rtnl_unlock();
+
+	kvfree(net->mpls.platform_label);
+}
+
+static struct pernet_operations mpls_net_ops = {
+	.init = mpls_net_init,
+	.exit = mpls_net_exit,
+};
+
+static int __init mpls_init(void)
+{
+	int err;
+
+	BUILD_BUG_ON(sizeof(struct mpls_shim_hdr) != 4);
+
+	err = register_pernet_subsys(&mpls_net_ops);
+	if (err)
+		goto out;
+
+	err = register_netdevice_notifier(&mpls_dev_notifier);
+	if (err)
+		goto out_unregister_pernet;
+
+	dev_add_pack(&mpls_packet_type);
+
+	err = 0;
+out:
+	return err;
+
+out_unregister_pernet:
+	unregister_pernet_subsys(&mpls_net_ops);
+	goto out;
+}
+module_init(mpls_init);
+
+static void __exit mpls_exit(void)
+{
+	dev_remove_pack(&mpls_packet_type);
+	unregister_netdevice_notifier(&mpls_dev_notifier);
+	unregister_pernet_subsys(&mpls_net_ops);
+}
+module_exit(mpls_exit);
+
+MODULE_DESCRIPTION("MultiProtocol Label Switching");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_NETPROTO(PF_MPLS);
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
new file mode 100644
index 000000000000..c2944cb84d48
--- /dev/null
+++ b/net/mpls/internal.h
@@ -0,0 +1,56 @@
+#ifndef MPLS_INTERNAL_H
+#define MPLS_INTERNAL_H
+
+#define LABEL_IPV4_EXPLICIT_NULL	0 /* RFC3032 */
+#define LABEL_ROUTER_ALERT_LABEL	1 /* RFC3032 */
+#define LABEL_IPV6_EXPLICIT_NULL	2 /* RFC3032 */
+#define LABEL_IMPLICIT_NULL		3 /* RFC3032 */
+#define LABEL_ENTROPY_INDICATOR		7 /* RFC6790 */
+#define LABEL_GAL			13 /* RFC5586 */
+#define LABEL_OAM_ALERT			14 /* RFC3429 */
+#define LABEL_EXTENSION			15 /* RFC7274 */
+
+
+struct mpls_shim_hdr {
+	__be32 label_stack_entry;
+};
+
+struct mpls_entry_decoded {
+	u32 label;
+	u8 ttl;
+	u8 tc;
+	u8 bos;
+};
+
+struct sk_buff;
+
+static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb)
+{
+	return (struct mpls_shim_hdr *)skb_network_header(skb);
+}
+
+static inline struct mpls_shim_hdr mpls_entry_encode(u32 label, unsigned ttl, unsigned tc, bool bos)
+{
+	struct mpls_shim_hdr result;
+	result.label_stack_entry =
+		cpu_to_be32((label << MPLS_LS_LABEL_SHIFT) |
+			    (tc << MPLS_LS_TC_SHIFT) |
+			    (bos ? (1 << MPLS_LS_S_SHIFT) : 0) |
+			    (ttl << MPLS_LS_TTL_SHIFT));
+	return result;
+}
+
+static inline struct mpls_entry_decoded mpls_entry_decode(struct mpls_shim_hdr *hdr)
+{
+	struct mpls_entry_decoded result;
+	unsigned entry = be32_to_cpu(hdr->label_stack_entry);
+
+	result.label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
+	result.ttl = (entry & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT;
+	result.tc =  (entry & MPLS_LS_TC_MASK) >> MPLS_LS_TC_SHIFT;
+	result.bos = (entry & MPLS_LS_S_MASK) >> MPLS_LS_S_SHIFT;
+
+	return result;
+}
+
+#endif /* MPLS_INTERNAL_H */
-- 
cgit v1.2.3


From 7720c01f3f590116882e251f13c7e1d5602f8643 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 3 Mar 2015 19:11:20 -0600
Subject: mpls: Add a sysctl to control the size of the mpls label table

This sysctl gives two benefits.  By defaulting the table size to 0
mpls even when compiled in and enabled defaults to not forwarding
any packets.  This prevents unpleasant surprises for users.

The other benefit is that as mpls labels are allocated locally a dense
table a small dense label table may be used which saves memory and
is extremely simple and efficient to implement.

This sysctl allows userspace to choose the restrictions on the label
table size userspace applications need to cope with.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/mpls-sysctl.txt |  20 +++++
 include/net/netns/mpls.h                 |   2 +
 net/mpls/af_mpls.c                       | 146 +++++++++++++++++++++++++++++++
 3 files changed, 168 insertions(+)
 create mode 100644 Documentation/networking/mpls-sysctl.txt

(limited to 'include')

diff --git a/Documentation/networking/mpls-sysctl.txt b/Documentation/networking/mpls-sysctl.txt
new file mode 100644
index 000000000000..639ddf0ece9b
--- /dev/null
+++ b/Documentation/networking/mpls-sysctl.txt
@@ -0,0 +1,20 @@
+/proc/sys/net/mpls/* Variables:
+
+platform_labels - INTEGER
+	Number of entries in the platform label table.  It is not
+	possible to configure forwarding for label values equal to or
+	greater than the number of platform labels.
+
+	A dense utliziation of the entries in the platform label table
+	is possible and expected aas the platform labels are locally
+	allocated.
+
+	If the number of platform label table entries is set to 0 no
+	label will be recognized by the kernel and mpls forwarding
+	will be disabled.
+
+	Reducing this value will remove all label routing entries that
+	no longer fit in the table.
+
+	Possible values: 0 - 1048575
+	Default: 0
diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h
index f90aaf8d4f89..d29203651c01 100644
--- a/include/net/netns/mpls.h
+++ b/include/net/netns/mpls.h
@@ -6,10 +6,12 @@
 #define __NETNS_MPLS_H__
 
 struct mpls_route;
+struct ctl_table_header;
 
 struct netns_mpls {
 	size_t platform_labels;
 	struct mpls_route __rcu * __rcu *platform_label;
+	struct ctl_table_header *ctl;
 };
 
 #endif /* __NETNS_MPLS_H__ */
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 924377736b2a..b097125dfa33 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1,6 +1,7 @@
 #include <linux/types.h>
 #include <linux/skbuff.h>
 #include <linux/socket.h>
+#include <linux/sysctl.h>
 #include <linux/net.h>
 #include <linux/module.h>
 #include <linux/if_arp.h>
@@ -31,6 +32,9 @@ struct mpls_route { /* next hop label forwarding entry */
 	u8			rt_via[0];
 };
 
+static int zero = 0;
+static int label_limit = (1 << 20) - 1;
+
 static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index)
 {
 	struct mpls_route *rt = NULL;
@@ -273,18 +277,160 @@ static struct notifier_block mpls_dev_notifier = {
 	.notifier_call = mpls_dev_notify,
 };
 
+static int resize_platform_label_table(struct net *net, size_t limit)
+{
+	size_t size = sizeof(struct mpls_route *) * limit;
+	size_t old_limit;
+	size_t cp_size;
+	struct mpls_route __rcu **labels = NULL, **old;
+	struct mpls_route *rt0 = NULL, *rt2 = NULL;
+	unsigned index;
+
+	if (size) {
+		labels = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
+		if (!labels)
+			labels = vzalloc(size);
+
+		if (!labels)
+			goto nolabels;
+	}
+
+	/* In case the predefined labels need to be populated */
+	if (limit > LABEL_IPV4_EXPLICIT_NULL) {
+		struct net_device *lo = net->loopback_dev;
+		rt0 = mpls_rt_alloc(lo->addr_len);
+		if (!rt0)
+			goto nort0;
+		rt0->rt_dev = lo;
+		rt0->rt_protocol = RTPROT_KERNEL;
+		rt0->rt_via_family = AF_PACKET;
+		memcpy(rt0->rt_via, lo->dev_addr, lo->addr_len);
+	}
+	if (limit > LABEL_IPV6_EXPLICIT_NULL) {
+		struct net_device *lo = net->loopback_dev;
+		rt2 = mpls_rt_alloc(lo->addr_len);
+		if (!rt2)
+			goto nort2;
+		rt2->rt_dev = lo;
+		rt2->rt_protocol = RTPROT_KERNEL;
+		rt2->rt_via_family = AF_PACKET;
+		memcpy(rt2->rt_via, lo->dev_addr, lo->addr_len);
+	}
+
+	rtnl_lock();
+	/* Remember the original table */
+	old = net->mpls.platform_label;
+	old_limit = net->mpls.platform_labels;
+
+	/* Free any labels beyond the new table */
+	for (index = limit; index < old_limit; index++)
+		mpls_route_update(net, index, NULL, NULL, NULL);
+
+	/* Copy over the old labels */
+	cp_size = size;
+	if (old_limit < limit)
+		cp_size = old_limit * sizeof(struct mpls_route *);
+
+	memcpy(labels, old, cp_size);
+
+	/* If needed set the predefined labels */
+	if ((old_limit <= LABEL_IPV6_EXPLICIT_NULL) &&
+	    (limit > LABEL_IPV6_EXPLICIT_NULL)) {
+		labels[LABEL_IPV6_EXPLICIT_NULL] = rt2;
+		rt2 = NULL;
+	}
+
+	if ((old_limit <= LABEL_IPV4_EXPLICIT_NULL) &&
+	    (limit > LABEL_IPV4_EXPLICIT_NULL)) {
+		labels[LABEL_IPV4_EXPLICIT_NULL] = rt0;
+		rt0 = NULL;
+	}
+
+	/* Update the global pointers */
+	net->mpls.platform_labels = limit;
+	net->mpls.platform_label = labels;
+
+	rtnl_unlock();
+
+	mpls_rt_free(rt2);
+	mpls_rt_free(rt0);
+
+	if (old) {
+		synchronize_rcu();
+		kvfree(old);
+	}
+	return 0;
+
+nort2:
+	mpls_rt_free(rt0);
+nort0:
+	kvfree(labels);
+nolabels:
+	return -ENOMEM;
+}
+
+static int mpls_platform_labels(struct ctl_table *table, int write,
+				void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct net *net = table->data;
+	int platform_labels = net->mpls.platform_labels;
+	int ret;
+	struct ctl_table tmp = {
+		.procname	= table->procname,
+		.data		= &platform_labels,
+		.maxlen		= sizeof(int),
+		.mode		= table->mode,
+		.extra1		= &zero,
+		.extra2		= &label_limit,
+	};
+
+	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+
+	if (write && ret == 0)
+		ret = resize_platform_label_table(net, platform_labels);
+
+	return ret;
+}
+
+static struct ctl_table mpls_table[] = {
+	{
+		.procname	= "platform_labels",
+		.data		= NULL,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= mpls_platform_labels,
+	},
+	{ }
+};
+
 static int mpls_net_init(struct net *net)
 {
+	struct ctl_table *table;
+
 	net->mpls.platform_labels = 0;
 	net->mpls.platform_label = NULL;
 
+	table = kmemdup(mpls_table, sizeof(mpls_table), GFP_KERNEL);
+	if (table == NULL)
+		return -ENOMEM;
+
+	table[0].data = net;
+	net->mpls.ctl = register_net_sysctl(net, "net/mpls", table);
+	if (net->mpls.ctl == NULL)
+		return -ENOMEM;
+
 	return 0;
 }
 
 static void mpls_net_exit(struct net *net)
 {
+	struct ctl_table *table;
 	unsigned int index;
 
+	table = net->mpls.ctl->ctl_table_arg;
+	unregister_net_sysctl_table(net->mpls.ctl);
+	kfree(table);
+
 	/* An rcu grace period haselapsed since there was a device in
 	 * the network namespace (and thus the last in fqlight packet)
 	 * left this network namespace.  This is because
-- 
cgit v1.2.3


From 03c0566542f4c7a45ce3193f27cbf5700b506c18 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 3 Mar 2015 19:13:56 -0600
Subject: mpls: Netlink commands to add, remove, and dump routes

This change adds two new netlink routing attributes:
RTA_VIA and RTA_NEWDST.

RTA_VIA specifies the specifies the next machine to send a packet to
like RTA_GATEWAY.  RTA_VIA differs from RTA_GATEWAY in that it
includes the address family of the address of the next machine to send
a packet to.  Currently the MPLS code supports addresses in AF_INET,
AF_INET6 and AF_PACKET.  For AF_INET and AF_INET6 the destination mac
address is acquired from the neighbour table.  For AF_PACKET the
destination mac_address is specified in the netlink configuration.

I think raw destination mac address support with the family AF_PACKET
will prove useful.  There is MPLS-TP which is defined to operate
on machines that do not support internet packets of any flavor.  Further
seem to be corner cases where it can be useful.  At this point
I don't care much either way.

RTA_NEWDST specifies the destination address to forward the packet
with.  MPLS typically changes it's destination address at every hop.
For a swap operation RTA_NEWDST is specified with a length of one label.
For a push operation RTA_NEWDST is specified with two or more labels.
For a pop operation RTA_NEWDST is not specified or equivalently an emtpy
RTAN_NEWDST is specified.

Those new netlink attributes are used to implement handling of rt-netlink
RTM_NEWROUTE, RTM_DELROUTE, and RTM_GETROUTE messages, to maintain the
MPLS label table.

rtm_to_route_config parses a netlink RTM_NEWROUTE or RTM_DELROUTE message,
verify no unhandled attributes or unhandled values are present and sets
up the data structures for mpls_route_add and mpls_route_del.

I did my best to match up with the existing conventions with the caveats
that MPLS addresses are all destination-specific-addresses, and so
don't properly have a scope.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/rtnetlink.h |   8 ++
 net/mpls/af_mpls.c             | 229 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 237 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 5cc5d66bf519..bad65550ae3e 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -303,6 +303,8 @@ enum rtattr_type_t {
 	RTA_TABLE,
 	RTA_MARK,
 	RTA_MFC_STATS,
+	RTA_VIA,
+	RTA_NEWDST,
 	__RTA_MAX
 };
 
@@ -344,6 +346,12 @@ struct rtnexthop {
 #define RTNH_SPACE(len)	RTNH_ALIGN(RTNH_LENGTH(len))
 #define RTNH_DATA(rtnh)   ((struct rtattr*)(((char*)(rtnh)) + RTNH_LENGTH(0)))
 
+/* RTA_VIA */
+struct rtvia {
+	__kernel_sa_family_t	rtvia_family;
+	__u8			rtvia_addr[0];
+};
+
 /* RTM_CACHEINFO */
 
 struct rta_cacheinfo {
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 2d6612a10e30..b4d7cec398d2 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -212,6 +212,11 @@ static struct packet_type mpls_packet_type __read_mostly = {
 	.func = mpls_forward,
 };
 
+const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = {
+	[RTA_DST]		= { .type = NLA_U32 },
+	[RTA_OIF]		= { .type = NLA_U32 },
+};
+
 struct mpls_route_config {
 	u32		rc_protocol;
 	u32		rc_ifindex;
@@ -410,6 +415,22 @@ static struct notifier_block mpls_dev_notifier = {
 	.notifier_call = mpls_dev_notify,
 };
 
+static int nla_put_via(struct sk_buff *skb,
+		       u16 family, const void *addr, int alen)
+{
+	struct nlattr *nla;
+	struct rtvia *via;
+
+	nla = nla_reserve(skb, RTA_VIA, alen + 2);
+	if (!nla)
+		return -EMSGSIZE;
+
+	via = nla_data(nla);
+	via->rtvia_family = family;
+	memcpy(via->rtvia_addr, addr, alen);
+	return 0;
+}
+
 int nla_put_labels(struct sk_buff *skb, int attrtype,
 		   u8 labels, const u32 label[])
 {
@@ -467,6 +488,210 @@ int nla_get_labels(const struct nlattr *nla,
 	return 0;
 }
 
+static int rtm_to_route_config(struct sk_buff *skb,  struct nlmsghdr *nlh,
+			       struct mpls_route_config *cfg)
+{
+	struct rtmsg *rtm;
+	struct nlattr *tb[RTA_MAX+1];
+	int index;
+	int err;
+
+	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_mpls_policy);
+	if (err < 0)
+		goto errout;
+
+	err = -EINVAL;
+	rtm = nlmsg_data(nlh);
+	memset(cfg, 0, sizeof(*cfg));
+
+	if (rtm->rtm_family != AF_MPLS)
+		goto errout;
+	if (rtm->rtm_dst_len != 20)
+		goto errout;
+	if (rtm->rtm_src_len != 0)
+		goto errout;
+	if (rtm->rtm_tos != 0)
+		goto errout;
+	if (rtm->rtm_table != RT_TABLE_MAIN)
+		goto errout;
+	/* Any value is acceptable for rtm_protocol */
+
+	/* As mpls uses destination specific addresses
+	 * (or source specific address in the case of multicast)
+	 * all addresses have universal scope.
+	 */
+	if (rtm->rtm_scope != RT_SCOPE_UNIVERSE)
+		goto errout;
+	if (rtm->rtm_type != RTN_UNICAST)
+		goto errout;
+	if (rtm->rtm_flags != 0)
+		goto errout;
+
+	cfg->rc_label		= LABEL_NOT_SPECIFIED;
+	cfg->rc_protocol	= rtm->rtm_protocol;
+	cfg->rc_nlflags		= nlh->nlmsg_flags;
+	cfg->rc_nlinfo.portid	= NETLINK_CB(skb).portid;
+	cfg->rc_nlinfo.nlh	= nlh;
+	cfg->rc_nlinfo.nl_net	= sock_net(skb->sk);
+
+	for (index = 0; index <= RTA_MAX; index++) {
+		struct nlattr *nla = tb[index];
+		if (!nla)
+			continue;
+
+		switch(index) {
+		case RTA_OIF:
+			cfg->rc_ifindex = nla_get_u32(nla);
+			break;
+		case RTA_NEWDST:
+			if (nla_get_labels(nla, MAX_NEW_LABELS,
+					   &cfg->rc_output_labels,
+					   cfg->rc_output_label))
+				goto errout;
+			break;
+		case RTA_DST:
+		{
+			u32 label_count;
+			if (nla_get_labels(nla, 1, &label_count,
+					   &cfg->rc_label))
+				goto errout;
+
+			/* The first 16 labels are reserved, and may not be set */
+			if (cfg->rc_label < 16)
+				goto errout;
+
+			break;
+		}
+		case RTA_VIA:
+		{
+			struct rtvia *via = nla_data(nla);
+			cfg->rc_via_family = via->rtvia_family;
+			cfg->rc_via_alen   = nla_len(nla) - 2;
+			if (cfg->rc_via_alen > MAX_VIA_ALEN)
+				goto errout;
+
+			/* Validate the address family */
+			switch(cfg->rc_via_family) {
+			case AF_PACKET:
+				break;
+			case AF_INET:
+				if (cfg->rc_via_alen != 4)
+					goto errout;
+				break;
+			case AF_INET6:
+				if (cfg->rc_via_alen != 16)
+					goto errout;
+				break;
+			default:
+				/* Unsupported address family */
+				goto errout;
+			}
+
+			memcpy(cfg->rc_via, via->rtvia_addr, cfg->rc_via_alen);
+			break;
+		}
+		default:
+			/* Unsupported attribute */
+			goto errout;
+		}
+	}
+
+	err = 0;
+errout:
+	return err;
+}
+
+static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	struct mpls_route_config cfg;
+	int err;
+
+	err = rtm_to_route_config(skb, nlh, &cfg);
+	if (err < 0)
+		return err;
+
+	return mpls_route_del(&cfg);
+}
+
+
+static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	struct mpls_route_config cfg;
+	int err;
+
+	err = rtm_to_route_config(skb, nlh, &cfg);
+	if (err < 0)
+		return err;
+
+	return mpls_route_add(&cfg);
+}
+
+static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
+			   u32 label, struct mpls_route *rt, int flags)
+{
+	struct nlmsghdr *nlh;
+	struct rtmsg *rtm;
+
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags);
+	if (nlh == NULL)
+		return -EMSGSIZE;
+
+	rtm = nlmsg_data(nlh);
+	rtm->rtm_family = AF_MPLS;
+	rtm->rtm_dst_len = 20;
+	rtm->rtm_src_len = 0;
+	rtm->rtm_tos = 0;
+	rtm->rtm_table = RT_TABLE_MAIN;
+	rtm->rtm_protocol = rt->rt_protocol;
+	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
+	rtm->rtm_type = RTN_UNICAST;
+	rtm->rtm_flags = 0;
+
+	if (rt->rt_labels &&
+	    nla_put_labels(skb, RTA_NEWDST, rt->rt_labels, rt->rt_label))
+		goto nla_put_failure;
+	if (nla_put_via(skb, rt->rt_via_family, rt->rt_via, rt->rt_via_alen))
+		goto nla_put_failure;
+	if (rt->rt_dev && nla_put_u32(skb, RTA_OIF, rt->rt_dev->ifindex))
+		goto nla_put_failure;
+	if (nla_put_labels(skb, RTA_DST, 1, &label))
+		goto nla_put_failure;
+
+	nlmsg_end(skb, nlh);
+	return 0;
+
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+}
+
+static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	unsigned int index;
+
+	ASSERT_RTNL();
+
+	index = cb->args[0];
+	if (index < 16)
+		index = 16;
+
+	for (; index < net->mpls.platform_labels; index++) {
+		struct mpls_route *rt;
+		rt = net->mpls.platform_label[index];
+		if (!rt)
+			continue;
+
+		if (mpls_dump_route(skb, NETLINK_CB(cb->skb).portid,
+				    cb->nlh->nlmsg_seq, RTM_NEWROUTE,
+				    index, rt, NLM_F_MULTI) < 0)
+			break;
+	}
+	cb->args[0] = index;
+
+	return skb->len;
+}
+
 static int resize_platform_label_table(struct net *net, size_t limit)
 {
 	size_t size = sizeof(struct mpls_route *) * limit;
@@ -662,6 +887,9 @@ static int __init mpls_init(void)
 
 	dev_add_pack(&mpls_packet_type);
 
+	rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, NULL);
+	rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, NULL);
+	rtnl_register(PF_MPLS, RTM_GETROUTE, NULL, mpls_dump_routes, NULL);
 	err = 0;
 out:
 	return err;
@@ -674,6 +902,7 @@ module_init(mpls_init);
 
 static void __exit mpls_exit(void)
 {
+	rtnl_unregister_all(PF_MPLS);
 	dev_remove_pack(&mpls_packet_type);
 	unregister_netdevice_notifier(&mpls_dev_notifier);
 	unregister_pernet_subsys(&mpls_net_ops);
-- 
cgit v1.2.3


From 8de147dc8e2adea82b8a1a2a08fcc983330f6770 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 3 Mar 2015 19:14:31 -0600
Subject: mpls: Multicast route table change notifications

Unlike IPv4 this code notifies on all cases where mpls routes
are added or removed and it never automatically removes routes.
Avoiding both the userspace confusion that is caused by omitting
route updates and the possibility of a flood of netlink traffic
when an interface goes doew.

For now reserved labels are handled automatically and userspace
is not notified.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/rtnetlink.h |  2 ++
 net/mpls/af_mpls.c             | 60 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index bad65550ae3e..06f75a407f74 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -631,6 +631,8 @@ enum rtnetlink_groups {
 #define RTNLGRP_IPV6_NETCONF	RTNLGRP_IPV6_NETCONF
 	RTNLGRP_MDB,
 #define RTNLGRP_MDB		RTNLGRP_MDB
+	RTNLGRP_MPLS_ROUTE,
+#define RTNLGRP_MPLS_ROUTE	RTNLGRP_MPLS_ROUTE
 	__RTNLGRP_MAX
 };
 #define RTNLGRP_MAX	(__RTNLGRP_MAX - 1)
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index b4d7cec398d2..75a994a50381 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -36,6 +36,10 @@ struct mpls_route { /* next hop label forwarding entry */
 static int zero = 0;
 static int label_limit = (1 << 20) - 1;
 
+static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
+		       struct nlmsghdr *nlh, struct net *net, u32 portid,
+		       unsigned int nlm_flags);
+
 static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index)
 {
 	struct mpls_route *rt = NULL;
@@ -246,6 +250,20 @@ static void mpls_rt_free(struct mpls_route *rt)
 		kfree_rcu(rt, rt_rcu);
 }
 
+static void mpls_notify_route(struct net *net, unsigned index,
+			      struct mpls_route *old, struct mpls_route *new,
+			      const struct nl_info *info)
+{
+	struct nlmsghdr *nlh = info ? info->nlh : NULL;
+	unsigned portid = info ? info->portid : 0;
+	int event = new ? RTM_NEWROUTE : RTM_DELROUTE;
+	struct mpls_route *rt = new ? new : old;
+	unsigned nlm_flags = (old && new) ? NLM_F_REPLACE : 0;
+	/* Ignore reserved labels for now */
+	if (rt && (index >= 16))
+		rtmsg_lfib(event, index, rt, nlh, net, portid, nlm_flags);
+}
+
 static void mpls_route_update(struct net *net, unsigned index,
 			      struct net_device *dev, struct mpls_route *new,
 			      const struct nl_info *info)
@@ -260,6 +278,8 @@ static void mpls_route_update(struct net *net, unsigned index,
 		old = rt;
 	}
 
+	mpls_notify_route(net, index, old, new, info);
+
 	/* If we removed a route free it now */
 	mpls_rt_free(old);
 }
@@ -692,6 +712,46 @@ static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
+static inline size_t lfib_nlmsg_size(struct mpls_route *rt)
+{
+	size_t payload =
+		NLMSG_ALIGN(sizeof(struct rtmsg))
+		+ nla_total_size(2 + rt->rt_via_alen)	/* RTA_VIA */
+		+ nla_total_size(4);			/* RTA_DST */
+	if (rt->rt_labels)				/* RTA_NEWDST */
+		payload += nla_total_size(rt->rt_labels * 4);
+	if (rt->rt_dev)					/* RTA_OIF */
+		payload += nla_total_size(4);
+	return payload;
+}
+
+static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
+		       struct nlmsghdr *nlh, struct net *net, u32 portid,
+		       unsigned int nlm_flags)
+{
+	struct sk_buff *skb;
+	u32 seq = nlh ? nlh->nlmsg_seq : 0;
+	int err = -ENOBUFS;
+
+	skb = nlmsg_new(lfib_nlmsg_size(rt), GFP_KERNEL);
+	if (skb == NULL)
+		goto errout;
+
+	err = mpls_dump_route(skb, portid, seq, event, label, rt, nlm_flags);
+	if (err < 0) {
+		/* -EMSGSIZE implies BUG in lfib_nlmsg_size */
+		WARN_ON(err == -EMSGSIZE);
+		kfree_skb(skb);
+		goto errout;
+	}
+	rtnl_notify(skb, net, portid, RTNLGRP_MPLS_ROUTE, nlh, GFP_KERNEL);
+
+	return;
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(net, RTNLGRP_MPLS_ROUTE, err);
+}
+
 static int resize_platform_label_table(struct net *net, size_t limit)
 {
 	size_t size = sizeof(struct mpls_route *) * limit;
-- 
cgit v1.2.3


From 98fc43864af9e74116eec81c290db048cded15d8 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sun, 1 Mar 2015 09:10:13 +0200
Subject: nl80211: prohibit mixing 'any' and regular wowlan triggers

If the device supports waking up on 'any' signal - i.e. it continues
operating as usual and wakes up the host on pretty much anything that
happens, then it makes no sense to also configure the more restricted
WoWLAN mode where the device operates more autonomously but also in a
more restricted fashion.

Currently only cw2100 supports both 'any' and other triggers, but it
seems to be broken as it doesn't configure anything to the device, so
we can't currently get into a situation where both even can correctly
be configured. This is about to change (Intel devices are going to
support both and have different behaviour depending on configuration)
so make sure the conflicting modes cannot be configured.

(It seems that cw2100 advertises 'any' and 'disconnect' as a means of
saying that's what it will always do, but that isn't really the way
this API was meant to be used nor does it actually mean anything as
'any' always implies 'disconnect' already, and the driver doesn't
change device configuration in any way depending on the settings.)

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h |  2 ++
 net/wireless/nl80211.c       | 22 ++++++++++++++++++++++
 2 files changed, 24 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 90c5aeb3cca7..37e7f39441e5 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3708,6 +3708,8 @@ struct nl80211_pattern_support {
  * @NL80211_WOWLAN_TRIG_ANY: wake up on any activity, do not really put
  *	the chip into a special state -- works best with chips that have
  *	support for low-power operation already (flag)
+ *	Note that this mode is incompatible with all of the others, if
+ *	any others are even supported by the device.
  * @NL80211_WOWLAN_TRIG_DISCONNECT: wake up on disconnect, the way disconnect
  *	is detected is implementation-specific (flag)
  * @NL80211_WOWLAN_TRIG_MAGIC_PKT: wake up on magic packet (6x 0xff, followed
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 01874628ae00..07cef3d7653e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -9105,6 +9105,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
 	const struct wiphy_wowlan_support *wowlan = rdev->wiphy.wowlan;
 	int err, i;
 	bool prev_enabled = rdev->wiphy.wowlan_config;
+	bool regular = false;
 
 	if (!wowlan)
 		return -EOPNOTSUPP;
@@ -9132,12 +9133,14 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
 		if (!(wowlan->flags & WIPHY_WOWLAN_DISCONNECT))
 			return -EINVAL;
 		new_triggers.disconnect = true;
+		regular = true;
 	}
 
 	if (tb[NL80211_WOWLAN_TRIG_MAGIC_PKT]) {
 		if (!(wowlan->flags & WIPHY_WOWLAN_MAGIC_PKT))
 			return -EINVAL;
 		new_triggers.magic_pkt = true;
+		regular = true;
 	}
 
 	if (tb[NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED])
@@ -9147,24 +9150,28 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
 		if (!(wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE))
 			return -EINVAL;
 		new_triggers.gtk_rekey_failure = true;
+		regular = true;
 	}
 
 	if (tb[NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST]) {
 		if (!(wowlan->flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ))
 			return -EINVAL;
 		new_triggers.eap_identity_req = true;
+		regular = true;
 	}
 
 	if (tb[NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE]) {
 		if (!(wowlan->flags & WIPHY_WOWLAN_4WAY_HANDSHAKE))
 			return -EINVAL;
 		new_triggers.four_way_handshake = true;
+		regular = true;
 	}
 
 	if (tb[NL80211_WOWLAN_TRIG_RFKILL_RELEASE]) {
 		if (!(wowlan->flags & WIPHY_WOWLAN_RFKILL_RELEASE))
 			return -EINVAL;
 		new_triggers.rfkill_release = true;
+		regular = true;
 	}
 
 	if (tb[NL80211_WOWLAN_TRIG_PKT_PATTERN]) {
@@ -9173,6 +9180,8 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
 		int rem, pat_len, mask_len, pkt_offset;
 		struct nlattr *pat_tb[NUM_NL80211_PKTPAT];
 
+		regular = true;
+
 		nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN],
 				    rem)
 			n_patterns++;
@@ -9234,6 +9243,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	if (tb[NL80211_WOWLAN_TRIG_TCP_CONNECTION]) {
+		regular = true;
 		err = nl80211_parse_wowlan_tcp(
 			rdev, tb[NL80211_WOWLAN_TRIG_TCP_CONNECTION],
 			&new_triggers);
@@ -9242,6 +9252,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	if (tb[NL80211_WOWLAN_TRIG_NET_DETECT]) {
+		regular = true;
 		err = nl80211_parse_wowlan_nd(
 			rdev, wowlan, tb[NL80211_WOWLAN_TRIG_NET_DETECT],
 			&new_triggers);
@@ -9249,6 +9260,17 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
 			goto error;
 	}
 
+	/* The 'any' trigger means the device continues operating more or less
+	 * as in its normal operation mode and wakes up the host on most of the
+	 * normal interrupts (like packet RX, ...)
+	 * It therefore makes little sense to combine with the more constrained
+	 * wakeup trigger modes.
+	 */
+	if (new_triggers.any && regular) {
+		err = -EINVAL;
+		goto error;
+	}
+
 	ntrig = kmemdup(&new_triggers, sizeof(new_triggers), GFP_KERNEL);
 	if (!ntrig) {
 		err = -ENOMEM;
-- 
cgit v1.2.3


From 3384d757d41521a3dee274ed2802bcd285ed8e62 Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Sun, 1 Mar 2015 09:10:15 +0200
Subject: mac80211: allow iterating inactive interfaces

Sometimes the driver might want to modify private data in interfaces
that are down. One possible use-case is cleaning up interface state
after HW recovery. Some interfaces that were up before the recovery took
place might be down now, but they might still be "dirty".

Introduce a new iterate_interfaces() API and a new ACTIVE iterator flag.
This way the internal implementation of the both active and inactive
APIs remains the same.

Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 35 ++++++++++++++++++++++++++++++-----
 net/mac80211/util.c    | 29 ++++++++++++++++-------------
 2 files changed, 46 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 3a029f0e303d..d1d6fbc13b1c 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -4346,12 +4346,32 @@ void ieee80211_sched_scan_stopped(struct ieee80211_hw *hw);
  *	haven't been re-added to the driver yet.
  * @IEEE80211_IFACE_ITER_RESUME_ALL: During resume, iterate over all
  *	interfaces, even if they haven't been re-added to the driver yet.
+ * @IEEE80211_IFACE_ITER_ACTIVE: Iterate only active interfaces (netdev is up).
  */
 enum ieee80211_interface_iteration_flags {
 	IEEE80211_IFACE_ITER_NORMAL	= 0,
 	IEEE80211_IFACE_ITER_RESUME_ALL	= BIT(0),
+	IEEE80211_IFACE_ITER_ACTIVE	= BIT(1),
 };
 
+/**
+ * ieee80211_iterate_interfaces - iterate interfaces
+ *
+ * This function iterates over the interfaces associated with a given
+ * hardware and calls the callback for them. This includes active as well as
+ * inactive interfaces. This function allows the iterator function to sleep.
+ * Will iterate over a new interface during add_interface().
+ *
+ * @hw: the hardware struct of which the interfaces should be iterated over
+ * @iter_flags: iteration flags, see &enum ieee80211_interface_iteration_flags
+ * @iterator: the iterator function to call
+ * @data: first argument of the iterator function
+ */
+void ieee80211_iterate_interfaces(struct ieee80211_hw *hw, u32 iter_flags,
+				  void (*iterator)(void *data, u8 *mac,
+						   struct ieee80211_vif *vif),
+				  void *data);
+
 /**
  * ieee80211_iterate_active_interfaces - iterate active interfaces
  *
@@ -4367,11 +4387,16 @@ enum ieee80211_interface_iteration_flags {
  * @iterator: the iterator function to call
  * @data: first argument of the iterator function
  */
-void ieee80211_iterate_active_interfaces(struct ieee80211_hw *hw,
-					 u32 iter_flags,
-					 void (*iterator)(void *data, u8 *mac,
-						struct ieee80211_vif *vif),
-					 void *data);
+static inline void
+ieee80211_iterate_active_interfaces(struct ieee80211_hw *hw, u32 iter_flags,
+				    void (*iterator)(void *data, u8 *mac,
+						     struct ieee80211_vif *vif),
+				    void *data)
+{
+	ieee80211_iterate_interfaces(hw,
+				     iter_flags | IEEE80211_IFACE_ITER_ACTIVE,
+				     iterator, data);
+}
 
 /**
  * ieee80211_iterate_active_interfaces_atomic - iterate active interfaces
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 327886748a1d..37d85d36dd2c 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -625,13 +625,14 @@ void ieee80211_wake_vif_queues(struct ieee80211_local *local,
 					reason, true);
 }
 
-static void __iterate_active_interfaces(struct ieee80211_local *local,
-					u32 iter_flags,
-					void (*iterator)(void *data, u8 *mac,
-						struct ieee80211_vif *vif),
-					void *data)
+static void __iterate_interfaces(struct ieee80211_local *local,
+				 u32 iter_flags,
+				 void (*iterator)(void *data, u8 *mac,
+						  struct ieee80211_vif *vif),
+				 void *data)
 {
 	struct ieee80211_sub_if_data *sdata;
+	bool active_only = iter_flags & IEEE80211_IFACE_ITER_ACTIVE;
 
 	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
 		switch (sdata->vif.type) {
@@ -645,9 +646,9 @@ static void __iterate_active_interfaces(struct ieee80211_local *local,
 			break;
 		}
 		if (!(iter_flags & IEEE80211_IFACE_ITER_RESUME_ALL) &&
-		    !(sdata->flags & IEEE80211_SDATA_IN_DRIVER))
+		    active_only && !(sdata->flags & IEEE80211_SDATA_IN_DRIVER))
 			continue;
-		if (ieee80211_sdata_running(sdata))
+		if (ieee80211_sdata_running(sdata) || !active_only)
 			iterator(data, sdata->vif.addr,
 				 &sdata->vif);
 	}
@@ -656,12 +657,12 @@ static void __iterate_active_interfaces(struct ieee80211_local *local,
 				      lockdep_is_held(&local->iflist_mtx) ||
 				      lockdep_rtnl_is_held());
 	if (sdata &&
-	    (iter_flags & IEEE80211_IFACE_ITER_RESUME_ALL ||
+	    (iter_flags & IEEE80211_IFACE_ITER_RESUME_ALL || !active_only ||
 	     sdata->flags & IEEE80211_SDATA_IN_DRIVER))
 		iterator(data, sdata->vif.addr, &sdata->vif);
 }
 
-void ieee80211_iterate_active_interfaces(
+void ieee80211_iterate_interfaces(
 	struct ieee80211_hw *hw, u32 iter_flags,
 	void (*iterator)(void *data, u8 *mac,
 			 struct ieee80211_vif *vif),
@@ -670,10 +671,10 @@ void ieee80211_iterate_active_interfaces(
 	struct ieee80211_local *local = hw_to_local(hw);
 
 	mutex_lock(&local->iflist_mtx);
-	__iterate_active_interfaces(local, iter_flags, iterator, data);
+	__iterate_interfaces(local, iter_flags, iterator, data);
 	mutex_unlock(&local->iflist_mtx);
 }
-EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces);
+EXPORT_SYMBOL_GPL(ieee80211_iterate_interfaces);
 
 void ieee80211_iterate_active_interfaces_atomic(
 	struct ieee80211_hw *hw, u32 iter_flags,
@@ -684,7 +685,8 @@ void ieee80211_iterate_active_interfaces_atomic(
 	struct ieee80211_local *local = hw_to_local(hw);
 
 	rcu_read_lock();
-	__iterate_active_interfaces(local, iter_flags, iterator, data);
+	__iterate_interfaces(local, iter_flags | IEEE80211_IFACE_ITER_ACTIVE,
+			     iterator, data);
 	rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces_atomic);
@@ -699,7 +701,8 @@ void ieee80211_iterate_active_interfaces_rtnl(
 
 	ASSERT_RTNL();
 
-	__iterate_active_interfaces(local, iter_flags, iterator, data);
+	__iterate_interfaces(local, iter_flags | IEEE80211_IFACE_ITER_ACTIVE,
+			     iterator, data);
 }
 EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces_rtnl);
 
-- 
cgit v1.2.3


From 64a8cef41a8ce694b59ec75ae52688f58925693c Mon Sep 17 00:00:00 2001
From: SenthilKumar Jegadeesan <sjegadee@qti.qualcomm.com>
Date: Mon, 2 Mar 2015 13:29:40 +0530
Subject: mac80211: provide station PMF configuration to driver

Some device drivers offload part of aggregation including AddBA/DelBA
negotiations to firmware. In such scenario, the PMF configuration of
the station needs to be provided to driver to enable encryption of
AddBA/DelBA action frames.

Signed-off-by: SenthilKumar Jegadeesan <sjegadee@qti.qualcomm.com>
[fix commit log, documentation]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 2 ++
 net/mac80211/cfg.c     | 1 +
 net/mac80211/mlme.c    | 6 +++++-
 3 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index d1d6fbc13b1c..a7756e45465e 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1491,6 +1491,7 @@ struct ieee80211_sta_rates {
  * @tdls: indicates whether the STA is a TDLS peer
  * @tdls_initiator: indicates the STA is an initiator of the TDLS link. Only
  *	valid if the STA is a TDLS peer in the first place.
+ * @mfp: indicates whether the STA uses management frame protection or not.
  */
 struct ieee80211_sta {
 	u32 supp_rates[IEEE80211_NUM_BANDS];
@@ -1507,6 +1508,7 @@ struct ieee80211_sta {
 	struct ieee80211_sta_rates __rcu *rates;
 	bool tdls;
 	bool tdls_initiator;
+	bool mfp;
 
 	/* must be last */
 	u8 drv_priv[0] __aligned(sizeof(void *));
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 06557e4f9588..94889def2ef5 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1068,6 +1068,7 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 		sta->sta.wme = set & BIT(NL80211_STA_FLAG_WME);
 
 	if (mask & BIT(NL80211_STA_FLAG_MFP)) {
+		sta->sta.mfp = !!(set & BIT(NL80211_STA_FLAG_MFP));
 		if (set & BIT(NL80211_STA_FLAG_MFP))
 			set_sta_flag(sta, WLAN_STA_MFP);
 		else
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index bc2975e91272..539d6a976cbf 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2966,8 +2966,12 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 
 	rate_control_rate_init(sta);
 
-	if (ifmgd->flags & IEEE80211_STA_MFP_ENABLED)
+	if (ifmgd->flags & IEEE80211_STA_MFP_ENABLED) {
 		set_sta_flag(sta, WLAN_STA_MFP);
+		sta->sta.mfp = true;
+	} else {
+		sta->sta.mfp = false;
+	}
 
 	sta->sta.wme = elems.wmm_param;
 
-- 
cgit v1.2.3


From a7e53531234dc206bb75abb5305a72665dd4d75d Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Wed, 4 Mar 2015 15:02:44 -0800
Subject: fib_trie: Make fib_table rcu safe

The fib_table was wrapped in several places with an
rcu_read_lock/rcu_read_unlock however after looking over the code I found
several spots where the tables were being accessed as just standard
pointers without any protections.  This change fixes that so that all of
the proper protections are in place when accessing the table to take RCU
replacement or removal of the table into account.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h     | 70 +++++++++++++++++++++++++++++-------------------
 include/net/netns/ipv4.h |  7 ++---
 net/ipv4/fib_frontend.c  | 52 ++++++++++++++++++++++++-----------
 net/ipv4/fib_trie.c      | 21 ++++++++++-----
 4 files changed, 98 insertions(+), 52 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index cba4b7c32935..825cb2800908 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -185,6 +185,7 @@ struct fib_table {
 	u32			tb_id;
 	int			tb_default;
 	int			tb_num_default;
+	struct rcu_head		rcu;
 	unsigned long		tb_data[0];
 };
 
@@ -206,12 +207,16 @@ void fib_free_table(struct fib_table *tb);
 
 static inline struct fib_table *fib_get_table(struct net *net, u32 id)
 {
+	struct hlist_node *tb_hlist;
 	struct hlist_head *ptr;
 
 	ptr = id == RT_TABLE_LOCAL ?
 		&net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX] :
 		&net->ipv4.fib_table_hash[TABLE_MAIN_INDEX];
-	return hlist_entry(ptr->first, struct fib_table, tb_hlist);
+
+	tb_hlist = rcu_dereference_rtnl(hlist_first_rcu(ptr));
+
+	return hlist_entry(tb_hlist, struct fib_table, tb_hlist);
 }
 
 static inline struct fib_table *fib_new_table(struct net *net, u32 id)
@@ -222,15 +227,19 @@ static inline struct fib_table *fib_new_table(struct net *net, u32 id)
 static inline int fib_lookup(struct net *net, const struct flowi4 *flp,
 			     struct fib_result *res)
 {
-	int err = -ENETUNREACH;
+	struct fib_table *tb;
+	int err;
 
 	rcu_read_lock();
 
-	if (!fib_table_lookup(fib_get_table(net, RT_TABLE_LOCAL), flp, res,
-			      FIB_LOOKUP_NOREF) ||
-	    !fib_table_lookup(fib_get_table(net, RT_TABLE_MAIN), flp, res,
-			      FIB_LOOKUP_NOREF))
-		err = 0;
+	for (err = 0; !err; err = -ENETUNREACH) {
+		tb = fib_get_table(net, RT_TABLE_LOCAL);
+		if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF))
+			break;
+		tb = fib_get_table(net, RT_TABLE_MAIN);
+		if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF))
+			break;
+	}
 
 	rcu_read_unlock();
 
@@ -249,28 +258,33 @@ int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res);
 static inline int fib_lookup(struct net *net, struct flowi4 *flp,
 			     struct fib_result *res)
 {
-	if (!net->ipv4.fib_has_custom_rules) {
-		int err = -ENETUNREACH;
-
-		rcu_read_lock();
-
-		res->tclassid = 0;
-		if ((net->ipv4.fib_local &&
-		     !fib_table_lookup(net->ipv4.fib_local, flp, res,
-				       FIB_LOOKUP_NOREF)) ||
-		    (net->ipv4.fib_main &&
-		     !fib_table_lookup(net->ipv4.fib_main, flp, res,
-				       FIB_LOOKUP_NOREF)) ||
-		    (net->ipv4.fib_default &&
-		     !fib_table_lookup(net->ipv4.fib_default, flp, res,
-				       FIB_LOOKUP_NOREF)))
-			err = 0;
-
-		rcu_read_unlock();
-
-		return err;
+	struct fib_table *tb;
+	int err;
+
+	if (net->ipv4.fib_has_custom_rules)
+		return __fib_lookup(net, flp, res);
+
+	rcu_read_lock();
+
+	res->tclassid = 0;
+
+	for (err = 0; !err; err = -ENETUNREACH) {
+		tb = rcu_dereference_rtnl(net->ipv4.fib_local);
+		if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF))
+			break;
+
+		tb = rcu_dereference_rtnl(net->ipv4.fib_main);
+		if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF))
+			break;
+
+		tb = rcu_dereference_rtnl(net->ipv4.fib_default);
+		if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF))
+			break;
 	}
-	return __fib_lookup(net, flp, res);
+
+	rcu_read_unlock();
+
+	return err;
 }
 
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 1b26c6c3fd7c..db1db158a00e 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -7,6 +7,7 @@
 
 #include <linux/uidgid.h>
 #include <net/inet_frag.h>
+#include <linux/rcupdate.h>
 
 struct tcpm_hash_bucket;
 struct ctl_table_header;
@@ -38,9 +39,9 @@ struct netns_ipv4 {
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 	struct fib_rules_ops	*rules_ops;
 	bool			fib_has_custom_rules;
-	struct fib_table	*fib_local;
-	struct fib_table	*fib_main;
-	struct fib_table	*fib_default;
+	struct fib_table __rcu	*fib_local;
+	struct fib_table __rcu	*fib_main;
+	struct fib_table __rcu	*fib_default;
 #endif
 #ifdef CONFIG_IP_ROUTE_CLASSID
 	int			fib_num_tclassid_users;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 57be71dd6a9e..220c4b4af4cf 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -89,17 +89,14 @@ struct fib_table *fib_new_table(struct net *net, u32 id)
 
 	switch (id) {
 	case RT_TABLE_LOCAL:
-		net->ipv4.fib_local = tb;
+		rcu_assign_pointer(net->ipv4.fib_local, tb);
 		break;
-
 	case RT_TABLE_MAIN:
-		net->ipv4.fib_main = tb;
+		rcu_assign_pointer(net->ipv4.fib_main, tb);
 		break;
-
 	case RT_TABLE_DEFAULT:
-		net->ipv4.fib_default = tb;
+		rcu_assign_pointer(net->ipv4.fib_default, tb);
 		break;
-
 	default:
 		break;
 	}
@@ -132,13 +129,14 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
 static void fib_flush(struct net *net)
 {
 	int flushed = 0;
-	struct fib_table *tb;
-	struct hlist_head *head;
 	unsigned int h;
 
 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
-		head = &net->ipv4.fib_table_hash[h];
-		hlist_for_each_entry(tb, head, tb_hlist)
+		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
+		struct hlist_node *tmp;
+		struct fib_table *tb;
+
+		hlist_for_each_entry_safe(tb, tmp, head, tb_hlist)
 			flushed += fib_table_flush(tb);
 	}
 
@@ -665,10 +663,12 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 	s_h = cb->args[0];
 	s_e = cb->args[1];
 
+	rcu_read_lock();
+
 	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
 		e = 0;
 		head = &net->ipv4.fib_table_hash[h];
-		hlist_for_each_entry(tb, head, tb_hlist) {
+		hlist_for_each_entry_rcu(tb, head, tb_hlist) {
 			if (e < s_e)
 				goto next;
 			if (dumped)
@@ -682,6 +682,8 @@ next:
 		}
 	}
 out:
+	rcu_read_unlock();
+
 	cb->args[1] = e;
 	cb->args[0] = h;
 
@@ -1117,14 +1119,34 @@ static void ip_fib_net_exit(struct net *net)
 
 	rtnl_lock();
 	for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
-		struct fib_table *tb;
-		struct hlist_head *head;
+		struct hlist_head *head = &net->ipv4.fib_table_hash[i];
 		struct hlist_node *tmp;
+		struct fib_table *tb;
+
+		/* this is done in two passes as flushing the table could
+		 * cause it to be reallocated in order to accommodate new
+		 * tnodes at the root as the table shrinks.
+		 */
+		hlist_for_each_entry_safe(tb, tmp, head, tb_hlist)
+			fib_table_flush(tb);
 
-		head = &net->ipv4.fib_table_hash[i];
 		hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) {
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+			switch (tb->tb_id) {
+			case RT_TABLE_LOCAL:
+				RCU_INIT_POINTER(net->ipv4.fib_local, NULL);
+				break;
+			case RT_TABLE_MAIN:
+				RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
+				break;
+			case RT_TABLE_DEFAULT:
+				RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
+				break;
+			default:
+				break;
+			}
+#endif
 			hlist_del(&tb->tb_hlist);
-			fib_table_flush(tb);
 			fib_free_table(tb);
 		}
 	}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 2233ebf2aae8..3642b17c8726 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -193,6 +193,13 @@ static inline struct tnode *tnode_get_child_rcu(const struct tnode *tn,
 	return rcu_dereference_rtnl(tn->tnode[i]);
 }
 
+static inline struct fib_table *trie_get_table(struct trie *t)
+{
+	unsigned long *tb_data = (unsigned long *)t;
+
+	return container_of(tb_data, struct fib_table, tb_data[0]);
+}
+
 /* To understand this stuff, an understanding of keys and all their bits is
  * necessary. Every node in the trie has a key associated with it, but not
  * all of the bits in that key are significant.
@@ -1593,8 +1600,9 @@ flush_complete:
 	return found;
 }
 
-void fib_free_table(struct fib_table *tb)
+static void __trie_free_rcu(struct rcu_head *head)
 {
+	struct fib_table *tb = container_of(head, struct fib_table, rcu);
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 	struct trie *t = (struct trie *)tb->tb_data;
 
@@ -1603,6 +1611,11 @@ void fib_free_table(struct fib_table *tb)
 	kfree(tb);
 }
 
+void fib_free_table(struct fib_table *tb)
+{
+	call_rcu(&tb->rcu, __trie_free_rcu);
+}
+
 static int fn_trie_dump_leaf(struct tnode *l, struct fib_table *tb,
 			     struct sk_buff *skb, struct netlink_callback *cb)
 {
@@ -1639,6 +1652,7 @@ static int fn_trie_dump_leaf(struct tnode *l, struct fib_table *tb,
 	return skb->len;
 }
 
+/* rcu_read_lock needs to be hold by caller from readside */
 int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
 		   struct netlink_callback *cb)
 {
@@ -1650,15 +1664,12 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
 	int count = cb->args[2];
 	t_key key = cb->args[3];
 
-	rcu_read_lock();
-
 	tp = rcu_dereference_rtnl(t->trie);
 
 	while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
 		if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
 			cb->args[3] = key;
 			cb->args[2] = count;
-			rcu_read_unlock();
 			return -1;
 		}
 
@@ -1673,8 +1684,6 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
 			break;
 	}
 
-	rcu_read_unlock();
-
 	cb->args[3] = key;
 	cb->args[2] = count;
 
-- 
cgit v1.2.3


From 82f8b651a94d5c7090563fe55cfdb286c461a16c Mon Sep 17 00:00:00 2001
From: Jakub Pawlowski <jpawlowski@google.com>
Date: Wed, 4 Mar 2015 16:24:26 -0800
Subject: Bluetooth: fix service discovery behaviour for empty uuids filter

This patch fixes service discovery behaviour, when provided uuid filter
is empty and HCI_QUIRK_STRICT_DUPLICATE_FILTER is set. Before this
patch, empty uuid filter was unable to trigger scan restart, and that
caused inconsistent behaviour in applications.

Example: two DBus clients call BlueZ, one to find all devices with
service abcd, second to find all devices with rssi smaller than -90.
Sum of those filters, that is passed to mgmt_service_scan is empty
filter, with no rssi or uuids set.
That caused kernel not to restart scan when quirk was set.
That was inconsistent with what happen when there's only one of those
two filters set (scan is restarted and reports devices).

To fix that, new variable hdev->discovery.result_filtering was
introduced. It can indicate that filtered scan is running, no matter
what uuid or rssi filter is set.

Signed-off-by: Jakub Pawlowski <jpawlowski@google.com>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h | 2 ++
 net/bluetooth/mgmt.c             | 7 +++----
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index acec9140c3f9..15c761c1f82a 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -76,6 +76,7 @@ struct discovery_state {
 	u8			last_adv_data[HCI_MAX_AD_LENGTH];
 	u8			last_adv_data_len;
 	bool			report_invalid_rssi;
+	bool			result_filtering;
 	s8			rssi;
 	u16			uuid_count;
 	u8			(*uuids)[16];
@@ -525,6 +526,7 @@ static inline void discovery_init(struct hci_dev *hdev)
 
 static inline void hci_discovery_filter_clear(struct hci_dev *hdev)
 {
+	hdev->discovery.result_filtering = false;
 	hdev->discovery.report_invalid_rssi = true;
 	hdev->discovery.rssi = HCI_RSSI_INVALID;
 	hdev->discovery.uuid_count = 0;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index bc09c5a37032..967f07fdbbbe 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -3933,8 +3933,7 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status,
 		 */
 		if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER,
 			     &hdev->quirks) &&
-		    (hdev->discovery.uuid_count > 0 ||
-		     hdev->discovery.rssi != HCI_RSSI_INVALID)) {
+		    hdev->discovery.result_filtering) {
 			hdev->discovery.scan_start = jiffies;
 			hdev->discovery.scan_duration = timeout;
 		}
@@ -4087,6 +4086,7 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev,
 	 */
 	hci_discovery_filter_clear(hdev);
 
+	hdev->discovery.result_filtering = true;
 	hdev->discovery.type = cp->type;
 	hdev->discovery.rssi = cp->rssi;
 	hdev->discovery.uuid_count = uuid_count;
@@ -7344,8 +7344,7 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
 			return;
 	}
 
-	if (hdev->discovery.rssi != HCI_RSSI_INVALID ||
-	    hdev->discovery.uuid_count > 0) {
+	if (hdev->discovery.result_filtering) {
 		/* We are using service discovery */
 		if (!is_filter_match(hdev, rssi, eir, eir_len, scan_rsp,
 				     scan_rsp_len))
-- 
cgit v1.2.3


From c32ec2a11321978c34296d9a6bd5b0c31a2eb182 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Wed, 4 Mar 2015 12:14:41 +0100
Subject: bcma: make bcma_host_pci_(up|down) calls safe for every config
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We were providing declarations but actual code was compiled only with
CONFIG_BCMA_HOST_PCI set. This could result in:
ERROR: "bcma_host_pci_down" [drivers/net/wireless/brcm80211/brcmsmac/brcmsmac.ko] undefined!
ERROR: "bcma_host_pci_up" [drivers/net/wireless/brcm80211/brcmsmac/brcmsmac.ko] undefined!
ERROR: "bcma_host_pci_down" [drivers/net/wireless/b43/b43.ko] undefined!
ERROR: "bcma_host_pci_up" [drivers/net/wireless/b43/b43.ko] undefined!

Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 include/linux/bcma/bcma.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index 037620b3f113..44057b45ed32 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -434,8 +434,17 @@ static inline struct bcma_device *bcma_find_core(struct bcma_bus *bus,
 	return bcma_find_core_unit(bus, coreid, 0);
 }
 
+#ifdef CONFIG_BCMA_HOST_PCI
 extern void bcma_host_pci_up(struct bcma_bus *bus);
 extern void bcma_host_pci_down(struct bcma_bus *bus);
+#else
+static inline void bcma_host_pci_up(struct bcma_bus *bus)
+{
+}
+static inline void bcma_host_pci_down(struct bcma_bus *bus)
+{
+}
+#endif
 
 extern bool bcma_core_is_enabled(struct bcma_device *core);
 extern void bcma_core_disable(struct bcma_device *core, u32 flags);
-- 
cgit v1.2.3


From 0a4e699a41f767dff76ca7dc1019b9ca6de3eb42 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Wed, 4 Mar 2015 14:24:52 +0100
Subject: bcma: move internal function declarations to private header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These functions are not exported nor used anywhere, so there is no
reason to put them in public headers.
Also drop unused bcma_chipco_(suspend|resume).

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/bcma/bcma_private.h                 | 41 +++++++++++++++++++++++++++++
 include/linux/bcma/bcma_driver_chipcommon.h | 11 --------
 include/linux/bcma/bcma_driver_gmac_cmn.h   |  6 -----
 include/linux/bcma/bcma_driver_mips.h       | 15 -----------
 include/linux/bcma/bcma_driver_pci.h        |  2 --
 include/linux/bcma/bcma_driver_pcie2.h      |  2 --
 6 files changed, 41 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h
index 29565e30700a..5a1d22489afc 100644
--- a/drivers/bcma/bcma_private.h
+++ b/drivers/bcma/bcma_private.h
@@ -43,6 +43,9 @@ int bcma_bus_scan(struct bcma_bus *bus);
 int bcma_sprom_get(struct bcma_bus *bus);
 
 /* driver_chipcommon.c */
+void bcma_core_chipcommon_early_init(struct bcma_drv_cc *cc);
+void bcma_core_chipcommon_init(struct bcma_drv_cc *cc);
+void bcma_chipco_bcm4331_ext_pa_lines_ctl(struct bcma_drv_cc *cc, bool enable);
 #ifdef CONFIG_BCMA_DRIVER_MIPS
 void bcma_chipco_serial_init(struct bcma_drv_cc *cc);
 extern struct platform_device bcma_pflash_dev;
@@ -53,6 +56,8 @@ int bcma_core_chipcommon_b_init(struct bcma_drv_cc_b *ccb);
 void bcma_core_chipcommon_b_free(struct bcma_drv_cc_b *ccb);
 
 /* driver_chipcommon_pmu.c */
+void bcma_pmu_early_init(struct bcma_drv_cc *cc);
+void bcma_pmu_init(struct bcma_drv_cc *cc);
 u32 bcma_pmu_get_alp_clock(struct bcma_drv_cc *cc);
 u32 bcma_pmu_get_cpu_clock(struct bcma_drv_cc *cc);
 
@@ -102,10 +107,13 @@ static inline void __exit bcma_host_soc_unregister_driver(void)
 
 /* driver_pci.c */
 u32 bcma_pcie_read(struct bcma_drv_pci *pc, u32 address);
+void bcma_core_pci_early_init(struct bcma_drv_pci *pc);
+void bcma_core_pci_init(struct bcma_drv_pci *pc);
 void bcma_core_pci_up(struct bcma_drv_pci *pc);
 void bcma_core_pci_down(struct bcma_drv_pci *pc);
 
 /* driver_pcie2.c */
+void bcma_core_pcie2_init(struct bcma_drv_pcie2 *pcie2);
 void bcma_core_pcie2_up(struct bcma_drv_pcie2 *pcie2);
 
 extern int bcma_chipco_watchdog_register(struct bcma_drv_cc *cc);
@@ -123,6 +131,39 @@ static inline void bcma_core_pci_hostmode_init(struct bcma_drv_pci *pc)
 }
 #endif /* CONFIG_BCMA_DRIVER_PCI_HOSTMODE */
 
+/**************************************************
+ * driver_mips.c
+ **************************************************/
+
+#ifdef CONFIG_BCMA_DRIVER_MIPS
+unsigned int bcma_core_mips_irq(struct bcma_device *dev);
+void bcma_core_mips_early_init(struct bcma_drv_mips *mcore);
+void bcma_core_mips_init(struct bcma_drv_mips *mcore);
+#else
+static inline unsigned int bcma_core_mips_irq(struct bcma_device *dev)
+{
+	return 0;
+}
+static inline void bcma_core_mips_early_init(struct bcma_drv_mips *mcore)
+{
+}
+static inline void bcma_core_mips_init(struct bcma_drv_mips *mcore)
+{
+}
+#endif
+
+/**************************************************
+ * driver_gmac_cmn.c
+ **************************************************/
+
+#ifdef CONFIG_BCMA_DRIVER_GMAC_CMN
+void bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc);
+#else
+static inline void bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc)
+{
+}
+#endif
+
 #ifdef CONFIG_BCMA_DRIVER_GPIO
 /* driver_gpio.c */
 int bcma_gpio_init(struct bcma_drv_cc *cc);
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index db6fa217f98b..6cceedf65ca2 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -663,14 +663,6 @@ struct bcma_drv_cc_b {
 #define bcma_cc_maskset32(cc, offset, mask, set) \
 	bcma_cc_write32(cc, offset, (bcma_cc_read32(cc, offset) & (mask)) | (set))
 
-extern void bcma_core_chipcommon_init(struct bcma_drv_cc *cc);
-extern void bcma_core_chipcommon_early_init(struct bcma_drv_cc *cc);
-
-extern void bcma_chipco_suspend(struct bcma_drv_cc *cc);
-extern void bcma_chipco_resume(struct bcma_drv_cc *cc);
-
-void bcma_chipco_bcm4331_ext_pa_lines_ctl(struct bcma_drv_cc *cc, bool enable);
-
 extern u32 bcma_chipco_watchdog_timer_set(struct bcma_drv_cc *cc, u32 ticks);
 
 extern u32 bcma_chipco_get_alp_clock(struct bcma_drv_cc *cc);
@@ -690,9 +682,6 @@ u32 bcma_chipco_gpio_pullup(struct bcma_drv_cc *cc, u32 mask, u32 value);
 u32 bcma_chipco_gpio_pulldown(struct bcma_drv_cc *cc, u32 mask, u32 value);
 
 /* PMU support */
-extern void bcma_pmu_init(struct bcma_drv_cc *cc);
-extern void bcma_pmu_early_init(struct bcma_drv_cc *cc);
-
 extern void bcma_chipco_pll_write(struct bcma_drv_cc *cc, u32 offset,
 				  u32 value);
 extern void bcma_chipco_pll_maskset(struct bcma_drv_cc *cc, u32 offset,
diff --git a/include/linux/bcma/bcma_driver_gmac_cmn.h b/include/linux/bcma/bcma_driver_gmac_cmn.h
index 4dd1f33e36a2..4354d4ea6713 100644
--- a/include/linux/bcma/bcma_driver_gmac_cmn.h
+++ b/include/linux/bcma/bcma_driver_gmac_cmn.h
@@ -91,10 +91,4 @@ struct bcma_drv_gmac_cmn {
 #define gmac_cmn_write16(gc, offset, val)	bcma_write16((gc)->core, offset, val)
 #define gmac_cmn_write32(gc, offset, val)	bcma_write32((gc)->core, offset, val)
 
-#ifdef CONFIG_BCMA_DRIVER_GMAC_CMN
-extern void bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc);
-#else
-static inline void bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc) { }
-#endif
-
 #endif /* LINUX_BCMA_DRIVER_GMAC_CMN_H_ */
diff --git a/include/linux/bcma/bcma_driver_mips.h b/include/linux/bcma/bcma_driver_mips.h
index 0b3b32aeeb8a..8eea7f9e33b4 100644
--- a/include/linux/bcma/bcma_driver_mips.h
+++ b/include/linux/bcma/bcma_driver_mips.h
@@ -39,21 +39,6 @@ struct bcma_drv_mips {
 	u8 early_setup_done:1;
 };
 
-#ifdef CONFIG_BCMA_DRIVER_MIPS
-extern void bcma_core_mips_init(struct bcma_drv_mips *mcore);
-extern void bcma_core_mips_early_init(struct bcma_drv_mips *mcore);
-
-extern unsigned int bcma_core_mips_irq(struct bcma_device *dev);
-#else
-static inline void bcma_core_mips_init(struct bcma_drv_mips *mcore) { }
-static inline void bcma_core_mips_early_init(struct bcma_drv_mips *mcore) { }
-
-static inline unsigned int bcma_core_mips_irq(struct bcma_device *dev)
-{
-	return 0;
-}
-#endif
-
 extern u32 bcma_cpu_clock(struct bcma_drv_mips *mcore);
 
 #endif /* LINUX_BCMA_DRIVER_MIPS_H_ */
diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h
index 6b8bca67851f..8e90004fdfd7 100644
--- a/include/linux/bcma/bcma_driver_pci.h
+++ b/include/linux/bcma/bcma_driver_pci.h
@@ -238,8 +238,6 @@ struct bcma_drv_pci {
 #define pcicore_write16(pc, offset, val)	bcma_write16((pc)->core, offset, val)
 #define pcicore_write32(pc, offset, val)	bcma_write32((pc)->core, offset, val)
 
-extern void bcma_core_pci_early_init(struct bcma_drv_pci *pc);
-extern void bcma_core_pci_init(struct bcma_drv_pci *pc);
 extern int bcma_core_pci_irq_ctl(struct bcma_bus *bus,
 				 struct bcma_device *core, bool enable);
 extern void bcma_core_pci_power_save(struct bcma_bus *bus, bool up);
diff --git a/include/linux/bcma/bcma_driver_pcie2.h b/include/linux/bcma/bcma_driver_pcie2.h
index d8c43294c527..31e6d17ab798 100644
--- a/include/linux/bcma/bcma_driver_pcie2.h
+++ b/include/linux/bcma/bcma_driver_pcie2.h
@@ -155,6 +155,4 @@ struct bcma_drv_pcie2 {
 #define pcie2_set32(pcie2, offset, set)		bcma_set32((pcie2)->core, offset, set)
 #define pcie2_mask32(pcie2, offset, mask)	bcma_mask32((pcie2)->core, offset, mask)
 
-void bcma_core_pcie2_init(struct bcma_drv_pcie2 *pcie2);
-
 #endif /* LINUX_BCMA_DRIVER_PCIE2_H_ */
-- 
cgit v1.2.3


From 842a9ae08a25671db3d4f689eed68b4d64be15b5 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@codeaurora.org>
Date: Wed, 4 Mar 2015 12:54:21 +0200
Subject: bridge: Extend Proxy ARP design to allow optional rules for Wi-Fi

This extends the design in commit 958501163ddd ("bridge: Add support for
IEEE 802.11 Proxy ARP") with optional set of rules that are needed to
meet the IEEE 802.11 and Hotspot 2.0 requirements for ProxyARP. The
previously added BR_PROXYARP behavior is left as-is and a new
BR_PROXYARP_WIFI alternative is added so that this behavior can be
configured from user space when required.

In addition, this enables proxyarp functionality for unicast ARP
requests for both BR_PROXYARP and BR_PROXYARP_WIFI since it is possible
to use unicast as well as broadcast for these frames.

The key differences in functionality:

BR_PROXYARP:
- uses the flag on the bridge port on which the request frame was
  received to determine whether to reply
- block bridge port flooding completely on ports that enable proxy ARP

BR_PROXYARP_WIFI:
- uses the flag on the bridge port to which the target device of the
  request belongs
- block bridge port flooding selectively based on whether the proxyarp
  functionality replied

Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h    |  1 +
 include/uapi/linux/if_link.h |  1 +
 net/bridge/br_forward.c      |  3 +++
 net/bridge/br_input.c        | 17 ++++++++++-------
 net/bridge/br_netlink.c      |  5 ++++-
 net/bridge/br_private.h      |  1 +
 net/bridge/br_sysfs_if.c     |  2 ++
 7 files changed, 22 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index a57bca2ea97e..dad8b00beed2 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -44,6 +44,7 @@ struct br_ip_list {
 #define BR_PROMISC		BIT(7)
 #define BR_PROXYARP		BIT(8)
 #define BR_LEARNING_SYNC	BIT(9)
+#define BR_PROXYARP_WIFI	BIT(10)
 
 extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
 
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index dfd0bb22e554..756436e1ce89 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -247,6 +247,7 @@ enum {
 	IFLA_BRPORT_UNICAST_FLOOD, /* flood unicast traffic */
 	IFLA_BRPORT_PROXYARP,	/* proxy ARP */
 	IFLA_BRPORT_LEARNING_SYNC, /* mac learning sync from device */
+	IFLA_BRPORT_PROXYARP_WIFI, /* proxy ARP for Wi-Fi */
 	__IFLA_BRPORT_MAX
 };
 #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index f96933a823e3..1238fabff874 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -188,6 +188,9 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb,
 		/* Do not flood to ports that enable proxy ARP */
 		if (p->flags & BR_PROXYARP)
 			continue;
+		if ((p->flags & BR_PROXYARP_WIFI) &&
+		    BR_INPUT_SKB_CB(skb)->proxyarp_replied)
+			continue;
 
 		prev = maybe_deliver(prev, p, skb, __packet_hook);
 		if (IS_ERR(prev))
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index e2aa7be3a847..052c5ebbc947 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -60,7 +60,7 @@ static int br_pass_frame_up(struct sk_buff *skb)
 }
 
 static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
-			    u16 vid)
+			    u16 vid, struct net_bridge_port *p)
 {
 	struct net_device *dev = br->dev;
 	struct neighbour *n;
@@ -68,6 +68,8 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
 	u8 *arpptr, *sha;
 	__be32 sip, tip;
 
+	BR_INPUT_SKB_CB(skb)->proxyarp_replied = false;
+
 	if (dev->flags & IFF_NOARP)
 		return;
 
@@ -105,9 +107,12 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
 		}
 
 		f = __br_fdb_get(br, n->ha, vid);
-		if (f)
+		if (f && ((p->flags & BR_PROXYARP) ||
+			  (f->dst && (f->dst->flags & BR_PROXYARP_WIFI)))) {
 			arp_send(ARPOP_REPLY, ETH_P_ARP, sip, skb->dev, tip,
 				 sha, n->ha, sha);
+			BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+		}
 
 		neigh_release(n);
 	}
@@ -153,12 +158,10 @@ int br_handle_frame_finish(struct sk_buff *skb)
 
 	dst = NULL;
 
-	if (is_broadcast_ether_addr(dest)) {
-		if (IS_ENABLED(CONFIG_INET) &&
-		    p->flags & BR_PROXYARP &&
-		    skb->protocol == htons(ETH_P_ARP))
-			br_do_proxy_arp(skb, br, vid);
+	if (IS_ENABLED(CONFIG_INET) && skb->protocol == htons(ETH_P_ARP))
+		br_do_proxy_arp(skb, br, vid, p);
 
+	if (is_broadcast_ether_addr(dest)) {
 		skb2 = skb;
 		unicast = false;
 	} else if (is_multicast_ether_addr(dest)) {
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index c72083968768..8bc6b67457dc 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -143,7 +143,9 @@ static int br_port_fill_attrs(struct sk_buff *skb,
 	    nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE)) ||
 	    nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING)) ||
 	    nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, !!(p->flags & BR_FLOOD)) ||
-	    nla_put_u8(skb, IFLA_BRPORT_PROXYARP, !!(p->flags & BR_PROXYARP)))
+	    nla_put_u8(skb, IFLA_BRPORT_PROXYARP, !!(p->flags & BR_PROXYARP)) ||
+	    nla_put_u8(skb, IFLA_BRPORT_PROXYARP_WIFI,
+		       !!(p->flags & BR_PROXYARP_WIFI)))
 		return -EMSGSIZE;
 
 	return 0;
@@ -553,6 +555,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
 	br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING);
 	br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD);
 	br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP, BR_PROXYARP);
+	br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP_WIFI, BR_PROXYARP_WIFI);
 
 	if (tb[IFLA_BRPORT_COST]) {
 		err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST]));
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index de0919975a25..c32e279c62f8 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -305,6 +305,7 @@ struct br_input_skb_cb {
 #endif
 
 	u16 frag_max_size;
+	bool proxyarp_replied;
 
 #ifdef CONFIG_BRIDGE_VLAN_FILTERING
 	bool vlan_filtered;
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 2de5d91199e8..4905845a94e9 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -171,6 +171,7 @@ BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK);
 BRPORT_ATTR_FLAG(learning, BR_LEARNING);
 BRPORT_ATTR_FLAG(unicast_flood, BR_FLOOD);
 BRPORT_ATTR_FLAG(proxyarp, BR_PROXYARP);
+BRPORT_ATTR_FLAG(proxyarp_wifi, BR_PROXYARP_WIFI);
 
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
 static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf)
@@ -215,6 +216,7 @@ static const struct brport_attribute *brport_attrs[] = {
 	&brport_attr_multicast_fast_leave,
 #endif
 	&brport_attr_proxyarp,
+	&brport_attr_proxyarp_wifi,
 	NULL
 };
 
-- 
cgit v1.2.3


From 43270b1bc5f1e33522dacf3d3b9175c29404c36c Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 2 Mar 2015 14:40:39 +0100
Subject: netfilter: ipt_CLUSTERIP: deprecate it in favour of xt_cluster

xt_cluster supersedes ipt_CLUSTERIP since it can be also used in
gateway configurations (not only from the backend side).

ipt_CLUSTER is also known to leak the netdev that it uses on
device removal, which requires a rather large fix to workaround
the problem: http://patchwork.ozlabs.org/patch/358629/

So let's deprecate this so we can probably kill code this in the
future.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netns/x_tables.h       | 1 +
 net/ipv4/netfilter/ipt_CLUSTERIP.c | 7 +++++++
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h
index c24060ee411e..4d6597ad6067 100644
--- a/include/net/netns/x_tables.h
+++ b/include/net/netns/x_tables.h
@@ -9,6 +9,7 @@ struct ebt_table;
 struct netns_xt {
 	struct list_head tables[NFPROTO_NUMPROTO];
 	bool notrack_deprecated_warning;
+	bool clusterip_deprecated_warning;
 #if defined(CONFIG_BRIDGE_NF_EBTABLES) || \
     defined(CONFIG_BRIDGE_NF_EBTABLES_MODULE)
 	struct ebt_table *broute_table;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index e90f83a3415b..f75e9df5e017 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -418,6 +418,13 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
 	if (ret < 0)
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
+
+	if (!par->net->xt.clusterip_deprecated_warning) {
+		pr_info("ipt_CLUSTERIP is deprecated and it will removed soon, "
+			"use xt_cluster instead\n");
+		par->net->xt.clusterip_deprecated_warning = true;
+	}
+
 	return ret;
 }
 
-- 
cgit v1.2.3


From 1a1e1a12199ca27cebe87677b4c2153a3d2bacf2 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 3 Mar 2015 20:10:06 +0000
Subject: netfilter: nf_tables: cleanup nf_tables.h

The transaction related definitions are squeezed in between the rule
and expression definitions, which are closely related and should be
next to each other. The transaction definitions actually don't belong
into that file at all since it defines the global objects and API and
transactions are internal to nf_tables_api, but for now simply move
them to a seperate section.

Similar, the chain types are in between a set of registration functions,
they belong to the chain section.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 174 +++++++++++++++++++-------------------
 1 file changed, 87 insertions(+), 87 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 9eaaa7884586..04188b47629d 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -393,74 +393,6 @@ struct nft_rule {
 		__attribute__((aligned(__alignof__(struct nft_expr))));
 };
 
-/**
- *	struct nft_trans - nf_tables object update in transaction
- *
- *	@list: used internally
- *	@msg_type: message type
- *	@ctx: transaction context
- *	@data: internal information related to the transaction
- */
-struct nft_trans {
-	struct list_head		list;
-	int				msg_type;
-	struct nft_ctx			ctx;
-	char				data[0];
-};
-
-struct nft_trans_rule {
-	struct nft_rule			*rule;
-};
-
-#define nft_trans_rule(trans)	\
-	(((struct nft_trans_rule *)trans->data)->rule)
-
-struct nft_trans_set {
-	struct nft_set	*set;
-	u32		set_id;
-};
-
-#define nft_trans_set(trans)	\
-	(((struct nft_trans_set *)trans->data)->set)
-#define nft_trans_set_id(trans)	\
-	(((struct nft_trans_set *)trans->data)->set_id)
-
-struct nft_trans_chain {
-	bool		update;
-	char		name[NFT_CHAIN_MAXNAMELEN];
-	struct nft_stats __percpu *stats;
-	u8		policy;
-};
-
-#define nft_trans_chain_update(trans)	\
-	(((struct nft_trans_chain *)trans->data)->update)
-#define nft_trans_chain_name(trans)	\
-	(((struct nft_trans_chain *)trans->data)->name)
-#define nft_trans_chain_stats(trans)	\
-	(((struct nft_trans_chain *)trans->data)->stats)
-#define nft_trans_chain_policy(trans)	\
-	(((struct nft_trans_chain *)trans->data)->policy)
-
-struct nft_trans_table {
-	bool		update;
-	bool		enable;
-};
-
-#define nft_trans_table_update(trans)	\
-	(((struct nft_trans_table *)trans->data)->update)
-#define nft_trans_table_enable(trans)	\
-	(((struct nft_trans_table *)trans->data)->enable)
-
-struct nft_trans_elem {
-	struct nft_set		*set;
-	struct nft_set_elem	elem;
-};
-
-#define nft_trans_elem_set(trans)	\
-	(((struct nft_trans_elem *)trans->data)->set)
-#define nft_trans_elem(trans)	\
-	(((struct nft_trans_elem *)trans->data)->elem)
-
 static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule)
 {
 	return (struct nft_expr *)&rule->data[0];
@@ -528,6 +460,25 @@ enum nft_chain_type {
 	NFT_CHAIN_T_MAX
 };
 
+/**
+ * 	struct nf_chain_type - nf_tables chain type info
+ *
+ * 	@name: name of the type
+ * 	@type: numeric identifier
+ * 	@family: address family
+ * 	@owner: module owner
+ * 	@hook_mask: mask of valid hooks
+ * 	@hooks: hookfn overrides
+ */
+struct nf_chain_type {
+	const char			*name;
+	enum nft_chain_type		type;
+	int				family;
+	struct module			*owner;
+	unsigned int			hook_mask;
+	nf_hookfn			*hooks[NF_MAX_HOOKS];
+};
+
 int nft_chain_validate_dependency(const struct nft_chain *chain,
 				  enum nft_chain_type type);
 int nft_chain_validate_hooks(const struct nft_chain *chain,
@@ -614,25 +565,6 @@ struct nft_af_info {
 int nft_register_afinfo(struct net *, struct nft_af_info *);
 void nft_unregister_afinfo(struct nft_af_info *);
 
-/**
- * 	struct nf_chain_type - nf_tables chain type info
- *
- * 	@name: name of the type
- * 	@type: numeric identifier
- * 	@family: address family
- * 	@owner: module owner
- * 	@hook_mask: mask of valid hooks
- * 	@hooks: hookfn overrides
- */
-struct nf_chain_type {
-	const char			*name;
-	enum nft_chain_type		type;
-	int				family;
-	struct module			*owner;
-	unsigned int			hook_mask;
-	nf_hookfn			*hooks[NF_MAX_HOOKS];
-};
-
 int nft_register_chain_type(const struct nf_chain_type *);
 void nft_unregister_chain_type(const struct nf_chain_type *);
 
@@ -657,4 +589,72 @@ void nft_unregister_expr(struct nft_expr_type *);
 #define MODULE_ALIAS_NFT_SET() \
 	MODULE_ALIAS("nft-set")
 
+/**
+ *	struct nft_trans - nf_tables object update in transaction
+ *
+ *	@list: used internally
+ *	@msg_type: message type
+ *	@ctx: transaction context
+ *	@data: internal information related to the transaction
+ */
+struct nft_trans {
+	struct list_head		list;
+	int				msg_type;
+	struct nft_ctx			ctx;
+	char				data[0];
+};
+
+struct nft_trans_rule {
+	struct nft_rule			*rule;
+};
+
+#define nft_trans_rule(trans)	\
+	(((struct nft_trans_rule *)trans->data)->rule)
+
+struct nft_trans_set {
+	struct nft_set			*set;
+	u32				set_id;
+};
+
+#define nft_trans_set(trans)	\
+	(((struct nft_trans_set *)trans->data)->set)
+#define nft_trans_set_id(trans)	\
+	(((struct nft_trans_set *)trans->data)->set_id)
+
+struct nft_trans_chain {
+	bool				update;
+	char				name[NFT_CHAIN_MAXNAMELEN];
+	struct nft_stats __percpu	*stats;
+	u8				policy;
+};
+
+#define nft_trans_chain_update(trans)	\
+	(((struct nft_trans_chain *)trans->data)->update)
+#define nft_trans_chain_name(trans)	\
+	(((struct nft_trans_chain *)trans->data)->name)
+#define nft_trans_chain_stats(trans)	\
+	(((struct nft_trans_chain *)trans->data)->stats)
+#define nft_trans_chain_policy(trans)	\
+	(((struct nft_trans_chain *)trans->data)->policy)
+
+struct nft_trans_table {
+	bool				update;
+	bool				enable;
+};
+
+#define nft_trans_table_update(trans)	\
+	(((struct nft_trans_table *)trans->data)->update)
+#define nft_trans_table_enable(trans)	\
+	(((struct nft_trans_table *)trans->data)->enable)
+
+struct nft_trans_elem {
+	struct nft_set			*set;
+	struct nft_set_elem		elem;
+};
+
+#define nft_trans_elem_set(trans)	\
+	(((struct nft_trans_elem *)trans->data)->set)
+#define nft_trans_elem(trans)	\
+	(((struct nft_trans_elem *)trans->data)->elem)
+
 #endif /* _NET_NF_TABLES_H */
-- 
cgit v1.2.3


From 1cae565e8b746f484f1ff1b71d2a1c89d7cf0668 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 5 Mar 2015 15:05:36 +0100
Subject: netfilter: nf_tables: limit maximum table name length to 32 bytes

Set the same as we use for chain names, it should be enough.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h        | 2 +-
 include/uapi/linux/netfilter/nf_tables.h | 1 +
 net/netfilter/nf_tables_api.c            | 7 ++++---
 3 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 04188b47629d..a143acafa5d9 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -535,7 +535,7 @@ struct nft_table {
 	u64				hgenerator;
 	u32				use;
 	u16				flags;
-	char				name[];
+	char				name[NFT_TABLE_MAXNAMELEN];
 };
 
 /**
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 832bc46db78b..b9783931503b 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_NF_TABLES_H
 #define _LINUX_NF_TABLES_H
 
+#define NFT_TABLE_MAXNAMELEN	32
 #define NFT_CHAIN_MAXNAMELEN	32
 #define NFT_USERDATA_MAXLEN	256
 
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 199fd0f27b0e..284b20ce566b 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -401,7 +401,8 @@ nf_tables_chain_type_lookup(const struct nft_af_info *afi,
 }
 
 static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
-	[NFTA_TABLE_NAME]	= { .type = NLA_STRING },
+	[NFTA_TABLE_NAME]	= { .type = NLA_STRING,
+				    .len = NFT_TABLE_MAXNAMELEN - 1 },
 	[NFTA_TABLE_FLAGS]	= { .type = NLA_U32 },
 };
 
@@ -686,13 +687,13 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
 	if (!try_module_get(afi->owner))
 		return -EAFNOSUPPORT;
 
-	table = kzalloc(sizeof(*table) + nla_len(name), GFP_KERNEL);
+	table = kzalloc(sizeof(*table), GFP_KERNEL);
 	if (table == NULL) {
 		module_put(afi->owner);
 		return -ENOMEM;
 	}
 
-	nla_strlcpy(table->name, name, nla_len(name));
+	nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN);
 	INIT_LIST_HEAD(&table->chains);
 	INIT_LIST_HEAD(&table->sets);
 	table->flags = flags;
-- 
cgit v1.2.3


From d0f91938bede204a343473792529e0db7d599836 Mon Sep 17 00:00:00 2001
From: Erik Hugne <erik.hugne@ericsson.com>
Date: Thu, 5 Mar 2015 10:23:49 +0100
Subject: tipc: add ip/udp media type

The ip/udp bearer can be configured in a point-to-point
mode by specifying both local and remote ip/hostname,
or it can be enabled in multicast mode, where links are
established to all tipc nodes that have joined the same
multicast group. The multicast IP address is generated
based on the TIPC network ID, but can be overridden by
using another multicast address as remote ip.

Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc_netlink.h |   9 +
 net/tipc/Kconfig                  |   8 +
 net/tipc/Makefile                 |   1 +
 net/tipc/bearer.c                 |  13 +-
 net/tipc/bearer.h                 |  12 +-
 net/tipc/msg.h                    |   2 +-
 net/tipc/udp_media.c              | 442 ++++++++++++++++++++++++++++++++++++++
 7 files changed, 479 insertions(+), 8 deletions(-)
 create mode 100644 net/tipc/udp_media.c

(limited to 'include')

diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h
index 8d723824ad69..d4c8f142ba63 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -83,11 +83,20 @@ enum {
 	TIPC_NLA_BEARER_NAME,		/* string */
 	TIPC_NLA_BEARER_PROP,		/* nest */
 	TIPC_NLA_BEARER_DOMAIN,		/* u32 */
+	TIPC_NLA_BEARER_UDP_OPTS,	/* nest */
 
 	__TIPC_NLA_BEARER_MAX,
 	TIPC_NLA_BEARER_MAX = __TIPC_NLA_BEARER_MAX - 1
 };
 
+enum {
+	TIPC_NLA_UDP_UNSPEC,
+	TIPC_NLA_UDP_LOCAL,		/* sockaddr_storage */
+	TIPC_NLA_UDP_REMOTE,		/* sockaddr_storage */
+
+	__TIPC_NLA_UDP_MAX,
+	TIPC_NLA_UDP_MAX = __TIPC_NLA_UDP_MAX - 1
+};
 /* Socket info */
 enum {
 	TIPC_NLA_SOCK_UNSPEC,
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index 91c8a8e031db..c25a3a149dc4 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -26,3 +26,11 @@ config TIPC_MEDIA_IB
 	help
 	  Saying Y here will enable support for running TIPC on
 	  IP-over-InfiniBand devices.
+config TIPC_MEDIA_UDP
+	bool "IP/UDP media type support"
+	depends on TIPC
+	select NET_UDP_TUNNEL
+	help
+	  Saying Y here will enable support for running TIPC over IP/UDP
+	bool
+	default y
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index 599b1a540d2b..57e460be4692 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -10,5 +10,6 @@ tipc-y	+= addr.o bcast.o bearer.o \
 	   netlink.o netlink_compat.o node.o socket.o eth_media.o \
 	   server.o socket.o
 
+tipc-$(CONFIG_TIPC_MEDIA_UDP)	+= udp_media.o
 tipc-$(CONFIG_TIPC_MEDIA_IB)	+= ib_media.o
 tipc-$(CONFIG_SYSCTL)		+= sysctl.o
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index af6deeb397a8..840db89e4283 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -47,6 +47,9 @@ static struct tipc_media * const media_info_array[] = {
 	&eth_media_info,
 #ifdef CONFIG_TIPC_MEDIA_IB
 	&ib_media_info,
+#endif
+#ifdef CONFIG_TIPC_MEDIA_UDP
+	&udp_media_info,
 #endif
 	NULL
 };
@@ -216,7 +219,8 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest)
  * tipc_enable_bearer - enable bearer with the given name
  */
 static int tipc_enable_bearer(struct net *net, const char *name,
-			      u32 disc_domain, u32 priority)
+			      u32 disc_domain, u32 priority,
+			      struct nlattr *attr[])
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_bearer *b_ptr;
@@ -304,7 +308,7 @@ restart:
 
 	strcpy(b_ptr->name, name);
 	b_ptr->media = m_ptr;
-	res = m_ptr->enable_media(net, b_ptr);
+	res = m_ptr->enable_media(net, b_ptr, attr);
 	if (res) {
 		pr_warn("Bearer <%s> rejected, enable failure (%d)\n",
 			name, -res);
@@ -372,7 +376,8 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr,
 	kfree_rcu(b_ptr, rcu);
 }
 
-int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b)
+int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
+			 struct nlattr *attr[])
 {
 	struct net_device *dev;
 	char *driver_name = strchr((const char *)b->name, ':') + 1;
@@ -791,7 +796,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	rtnl_lock();
-	err = tipc_enable_bearer(net, bearer, domain, prio);
+	err = tipc_enable_bearer(net, bearer, domain, prio, attrs);
 	if (err) {
 		rtnl_unlock();
 		return err;
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 097aff08ad5b..5cad243ee8fc 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -41,7 +41,7 @@
 #include <net/genetlink.h>
 
 #define MAX_BEARERS	2
-#define MAX_MEDIA	2
+#define MAX_MEDIA	3
 #define MAX_NODES	4096
 #define WSIZE		32
 
@@ -59,6 +59,7 @@
  */
 #define TIPC_MEDIA_TYPE_ETH	1
 #define TIPC_MEDIA_TYPE_IB	2
+#define TIPC_MEDIA_TYPE_UDP	3
 
 /**
  * struct tipc_node_map - set of node identifiers
@@ -104,7 +105,8 @@ struct tipc_media {
 	int (*send_msg)(struct net *net, struct sk_buff *buf,
 			struct tipc_bearer *b_ptr,
 			struct tipc_media_addr *dest);
-	int (*enable_media)(struct net *net, struct tipc_bearer *b_ptr);
+	int (*enable_media)(struct net *net, struct tipc_bearer *b_ptr,
+			    struct nlattr *attr[]);
 	void (*disable_media)(struct tipc_bearer *b_ptr);
 	int (*addr2str)(struct tipc_media_addr *addr,
 			char *strbuf,
@@ -183,6 +185,9 @@ extern struct tipc_media eth_media_info;
 #ifdef CONFIG_TIPC_MEDIA_IB
 extern struct tipc_media ib_media_info;
 #endif
+#ifdef CONFIG_TIPC_MEDIA_UDP
+extern struct tipc_media udp_media_info;
+#endif
 
 int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info);
@@ -197,7 +202,8 @@ int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info);
 int tipc_media_set_priority(const char *name, u32 new_value);
 int tipc_media_set_window(const char *name, u32 new_value);
 void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a);
-int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b);
+int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
+			 struct nlattr *attrs[]);
 void tipc_disable_l2_media(struct tipc_bearer *b);
 int tipc_l2_send_msg(struct net *net, struct sk_buff *buf,
 		     struct tipc_bearer *b, struct tipc_media_addr *dest);
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index c1cc8d7a5d52..fa167846d1ab 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -87,7 +87,7 @@ struct plist;
  * Note: Headroom should be a multiple of 4 to ensure the TIPC header fields
  *       are word aligned for quicker access
  */
-#define BUF_HEADROOM LL_MAX_HEADER
+#define BUF_HEADROOM (LL_MAX_HEADER + 48)
 
 struct tipc_skb_cb {
 	void *handle;
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
new file mode 100644
index 000000000000..0d10001db40d
--- /dev/null
+++ b/net/tipc/udp_media.c
@@ -0,0 +1,442 @@
+/* net/tipc/udp_media.c: IP bearer support for TIPC
+ *
+ * Copyright (c) 2015, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/socket.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/inet.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
+#include <linux/kernel.h>
+#include <linux/workqueue.h>
+#include <linux/list.h>
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/udp_tunnel.h>
+#include <linux/tipc_netlink.h>
+#include "core.h"
+#include "bearer.h"
+
+/* IANA assigned UDP port */
+#define UDP_PORT_DEFAULT	6118
+
+static const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = {
+	[TIPC_NLA_UDP_UNSPEC]	= {.type = NLA_UNSPEC},
+	[TIPC_NLA_UDP_LOCAL]	= {.type = NLA_BINARY,
+				   .len = sizeof(struct sockaddr_storage)},
+	[TIPC_NLA_UDP_REMOTE]	= {.type = NLA_BINARY,
+				   .len = sizeof(struct sockaddr_storage)},
+};
+
+/**
+ * struct udp_media_addr - IP/UDP addressing information
+ *
+ * This is the bearer level originating address used in neighbor discovery
+ * messages, and all fields should be in network byte order
+ */
+struct udp_media_addr {
+	__be16	proto;
+	__be16	udp_port;
+	union {
+		struct in_addr ipv4;
+		struct in6_addr ipv6;
+	};
+};
+
+/**
+ * struct udp_bearer - ip/udp bearer data structure
+ * @bearer:	associated generic tipc bearer
+ * @ubsock:	bearer associated socket
+ * @ifindex:	local address scope
+ * @work:	used to schedule deferred work on a bearer
+ */
+struct udp_bearer {
+	struct tipc_bearer __rcu *bearer;
+	struct socket *ubsock;
+	u32 ifindex;
+	struct work_struct work;
+};
+
+/* udp_media_addr_set - convert a ip/udp address to a TIPC media address */
+static void tipc_udp_media_addr_set(struct tipc_media_addr *addr,
+				    struct udp_media_addr *ua)
+{
+	memset(addr, 0, sizeof(struct tipc_media_addr));
+	addr->media_id = TIPC_MEDIA_TYPE_UDP;
+	memcpy(addr->value, ua, sizeof(struct udp_media_addr));
+	if (ntohs(ua->proto) == ETH_P_IP) {
+		if (ipv4_is_multicast(ua->ipv4.s_addr))
+			addr->broadcast = 1;
+	} else if (ntohs(ua->proto) == ETH_P_IPV6) {
+		if (ipv6_addr_type(&ua->ipv6) & IPV6_ADDR_MULTICAST)
+			addr->broadcast = 1;
+	} else {
+		pr_err("Invalid UDP media address\n");
+	}
+}
+
+/* tipc_udp_addr2str - convert ip/udp address to string */
+static int tipc_udp_addr2str(struct tipc_media_addr *a, char *buf, int size)
+{
+	struct udp_media_addr *ua = (struct udp_media_addr *)&a->value;
+
+	if (ntohs(ua->proto) == ETH_P_IP)
+		snprintf(buf, size, "%pI4:%u", &ua->ipv4, ntohs(ua->udp_port));
+	else if (ntohs(ua->proto) == ETH_P_IPV6)
+		snprintf(buf, size, "%pI6:%u", &ua->ipv6, ntohs(ua->udp_port));
+	else
+		pr_err("Invalid UDP media address\n");
+	return 0;
+}
+
+/* tipc_udp_msg2addr - extract an ip/udp address from a TIPC ndisc message */
+static int tipc_udp_msg2addr(struct tipc_bearer *b, struct tipc_media_addr *a,
+			     char *msg)
+{
+	struct udp_media_addr *ua;
+
+	ua = (struct udp_media_addr *) (msg + TIPC_MEDIA_ADDR_OFFSET);
+	if (msg[TIPC_MEDIA_TYPE_OFFSET] != TIPC_MEDIA_TYPE_UDP)
+		return -EINVAL;
+	tipc_udp_media_addr_set(a, ua);
+	return 0;
+}
+
+/* tipc_udp_addr2msg - write an ip/udp address to a TIPC ndisc message */
+static int tipc_udp_addr2msg(char *msg, struct tipc_media_addr *a)
+{
+	memset(msg, 0, TIPC_MEDIA_INFO_SIZE);
+	msg[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_UDP;
+	memcpy(msg + TIPC_MEDIA_ADDR_OFFSET, a->value,
+	       sizeof(struct udp_media_addr));
+	return 0;
+}
+
+/* tipc_send_msg - enqueue a send request */
+static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
+			     struct tipc_bearer *b,
+			     struct tipc_media_addr *dest)
+{
+	int ttl, err = 0;
+	struct udp_bearer *ub;
+	struct udp_media_addr *dst = (struct udp_media_addr *)&dest->value;
+	struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value;
+	struct sk_buff *clone;
+	struct rtable *rt;
+
+	clone = skb_clone(skb, GFP_ATOMIC);
+	skb_set_inner_protocol(clone, htons(ETH_P_TIPC));
+	ub = rcu_dereference_rtnl(b->media_ptr);
+	if (!ub) {
+		err = -ENODEV;
+		goto tx_error;
+	}
+	if (htons(dst->proto) == ETH_P_IP) {
+		struct flowi4 fl = {
+			.daddr = dst->ipv4.s_addr,
+			.saddr = src->ipv4.s_addr,
+			.flowi4_mark = clone->mark,
+			.flowi4_proto = IPPROTO_UDP
+		};
+		rt = ip_route_output_key(net, &fl);
+		if (IS_ERR(rt)) {
+			err = PTR_ERR(rt);
+			goto tx_error;
+		}
+		ttl = ip4_dst_hoplimit(&rt->dst);
+		err = udp_tunnel_xmit_skb(rt, clone, src->ipv4.s_addr,
+					  dst->ipv4.s_addr, 0, ttl, 0,
+					  src->udp_port, dst->udp_port,
+					  false, true);
+		if (err < 0) {
+			ip_rt_put(rt);
+			goto tx_error;
+		}
+#if IS_ENABLED(CONFIG_IPV6)
+	} else {
+		struct dst_entry *ndst;
+		struct flowi6 fl6 = {
+			.flowi6_oif = ub->ifindex,
+			.daddr = dst->ipv6,
+			.saddr = src->ipv6,
+			.flowi6_proto = IPPROTO_UDP
+		};
+		err = ip6_dst_lookup(ub->ubsock->sk, &ndst, &fl6);
+		if (err)
+			goto tx_error;
+		ttl = ip6_dst_hoplimit(ndst);
+		err = udp_tunnel6_xmit_skb(ndst, clone, ndst->dev, &src->ipv6,
+					   &dst->ipv6, 0, ttl, src->udp_port,
+					   dst->udp_port, false);
+#endif
+	}
+	return err;
+
+tx_error:
+	kfree_skb(clone);
+	return err;
+}
+
+/* tipc_udp_recv - read data from bearer socket */
+static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb)
+{
+	struct udp_bearer *ub;
+	struct tipc_bearer *b;
+
+	ub = rcu_dereference_sk_user_data(sk);
+	if (!ub) {
+		pr_err_ratelimited("Failed to get UDP bearer reference");
+		kfree_skb(skb);
+		return 0;
+	}
+
+	skb_pull(skb, sizeof(struct udphdr));
+	rcu_read_lock();
+	b = rcu_dereference_rtnl(ub->bearer);
+
+	if (b) {
+		tipc_rcv(sock_net(sk), skb, b);
+		rcu_read_unlock();
+		return 0;
+	}
+	rcu_read_unlock();
+	kfree_skb(skb);
+	return 0;
+}
+
+static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote)
+{
+	int err = 0;
+	struct ip_mreqn mreqn;
+	struct sock *sk = ub->ubsock->sk;
+
+	if (ntohs(remote->proto) == ETH_P_IP) {
+		if (!ipv4_is_multicast(remote->ipv4.s_addr))
+			return 0;
+		mreqn.imr_multiaddr = remote->ipv4;
+		mreqn.imr_ifindex = ub->ifindex;
+		err = __ip_mc_join_group(sk, &mreqn);
+	} else {
+		if (!ipv6_addr_is_multicast(&remote->ipv6))
+			return 0;
+		err = __ipv6_sock_mc_join(sk, ub->ifindex, &remote->ipv6);
+	}
+	return err;
+}
+
+/**
+ * parse_options - build local/remote addresses from configuration
+ * @attrs:	netlink config data
+ * @ub:		UDP bearer instance
+ * @local:	local bearer IP address/port
+ * @remote:	peer or multicast IP/port
+ */
+static int parse_options(struct nlattr *attrs[], struct udp_bearer *ub,
+			 struct udp_media_addr *local,
+			 struct udp_media_addr *remote)
+{
+	struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
+	struct sockaddr_storage *sa_local, *sa_remote;
+
+	if (!attrs[TIPC_NLA_BEARER_UDP_OPTS])
+		goto err;
+	if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX,
+			     attrs[TIPC_NLA_BEARER_UDP_OPTS],
+			     tipc_nl_udp_policy))
+		goto err;
+	if (opts[TIPC_NLA_UDP_LOCAL] && opts[TIPC_NLA_UDP_REMOTE]) {
+		sa_local = nla_data(opts[TIPC_NLA_UDP_LOCAL]);
+		sa_remote = nla_data(opts[TIPC_NLA_UDP_REMOTE]);
+	} else {
+err:
+		pr_err("Invalid UDP bearer configuration");
+		return -EINVAL;
+	}
+	if ((sa_local->ss_family & sa_remote->ss_family) == AF_INET) {
+		struct sockaddr_in *ip4;
+
+		ip4 = (struct sockaddr_in *)sa_local;
+		local->proto = htons(ETH_P_IP);
+		local->udp_port = ip4->sin_port;
+		local->ipv4.s_addr = ip4->sin_addr.s_addr;
+
+		ip4 = (struct sockaddr_in *)sa_remote;
+		remote->proto = htons(ETH_P_IP);
+		remote->udp_port = ip4->sin_port;
+		remote->ipv4.s_addr = ip4->sin_addr.s_addr;
+		return 0;
+
+#if IS_ENABLED(CONFIG_IPV6)
+	} else if ((sa_local->ss_family & sa_remote->ss_family) == AF_INET6) {
+		struct sockaddr_in6 *ip6;
+
+		ip6 = (struct sockaddr_in6 *)sa_local;
+		local->proto = htons(ETH_P_IPV6);
+		local->udp_port = ip6->sin6_port;
+		local->ipv6 = ip6->sin6_addr;
+		ub->ifindex = ip6->sin6_scope_id;
+
+		ip6 = (struct sockaddr_in6 *)sa_remote;
+		remote->proto = htons(ETH_P_IPV6);
+		remote->udp_port = ip6->sin6_port;
+		remote->ipv6 = ip6->sin6_addr;
+		return 0;
+#endif
+	}
+	return -EADDRNOTAVAIL;
+}
+
+/**
+ * tipc_udp_enable - callback to create a new udp bearer instance
+ * @net:	network namespace
+ * @b:		pointer to generic tipc_bearer
+ * @attrs:	netlink bearer configuration
+ *
+ * validate the bearer parameters and initialize the udp bearer
+ * rtnl_lock should be held
+ */
+static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
+			   struct nlattr *attrs[])
+{
+	int err = -EINVAL;
+	struct udp_bearer *ub;
+	struct udp_media_addr *remote;
+	struct udp_media_addr local = {0};
+	struct udp_port_cfg udp_conf = {0};
+	struct udp_tunnel_sock_cfg tuncfg = {0};
+
+	ub = kzalloc(sizeof(*ub), GFP_ATOMIC);
+	if (!ub)
+		return -ENOMEM;
+
+	remote = (struct udp_media_addr *)&b->bcast_addr.value;
+	memset(remote, 0, sizeof(struct udp_media_addr));
+	err = parse_options(attrs, ub, &local, remote);
+	if (err)
+		goto err;
+
+	b->bcast_addr.media_id = TIPC_MEDIA_TYPE_UDP;
+	b->bcast_addr.broadcast = 1;
+	rcu_assign_pointer(b->media_ptr, ub);
+	rcu_assign_pointer(ub->bearer, b);
+	tipc_udp_media_addr_set(&b->addr, &local);
+	if (htons(local.proto) == ETH_P_IP) {
+		struct net_device *dev;
+
+		dev = __ip_dev_find(net, local.ipv4.s_addr, false);
+		if (!dev) {
+			err = -ENODEV;
+			goto err;
+		}
+		udp_conf.family = AF_INET;
+		udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
+		udp_conf.use_udp_checksums = false;
+		ub->ifindex = dev->ifindex;
+		b->mtu = dev->mtu - sizeof(struct iphdr)
+			- sizeof(struct udphdr);
+#if IS_ENABLED(CONFIG_IPV6)
+	} else if (htons(local.proto) == ETH_P_IPV6) {
+		udp_conf.family = AF_INET6;
+		udp_conf.use_udp6_tx_checksums = true;
+		udp_conf.use_udp6_rx_checksums = true;
+		udp_conf.local_ip6 = in6addr_any;
+		b->mtu = 1280;
+#endif
+	} else {
+		err = -EAFNOSUPPORT;
+		goto err;
+	}
+	udp_conf.local_udp_port = local.udp_port;
+	err = udp_sock_create(net, &udp_conf, &ub->ubsock);
+	if (err)
+		goto err;
+	tuncfg.sk_user_data = ub;
+	tuncfg.encap_type = 1;
+	tuncfg.encap_rcv = tipc_udp_recv;
+	tuncfg.encap_destroy = NULL;
+	setup_udp_tunnel_sock(net, ub->ubsock, &tuncfg);
+
+	if (enable_mcast(ub, remote))
+		goto err;
+	return 0;
+err:
+	kfree(ub);
+	return err;
+}
+
+/* cleanup_bearer - break the socket/bearer association */
+static void cleanup_bearer(struct work_struct *work)
+{
+	struct udp_bearer *ub = container_of(work, struct udp_bearer, work);
+
+	if (ub->ubsock)
+		udp_tunnel_sock_release(ub->ubsock);
+	synchronize_net();
+	kfree(ub);
+}
+
+/* tipc_udp_disable - detach bearer from socket */
+static void tipc_udp_disable(struct tipc_bearer *b)
+{
+	struct udp_bearer *ub;
+
+	ub = rcu_dereference_rtnl(b->media_ptr);
+	if (!ub) {
+		pr_err("UDP bearer instance not found\n");
+		return;
+	}
+	if (ub->ubsock)
+		sock_set_flag(ub->ubsock->sk, SOCK_DEAD);
+	RCU_INIT_POINTER(b->media_ptr, NULL);
+	RCU_INIT_POINTER(ub->bearer, NULL);
+
+	/* sock_release need to be done outside of rtnl lock */
+	INIT_WORK(&ub->work, cleanup_bearer);
+	schedule_work(&ub->work);
+}
+
+struct tipc_media udp_media_info = {
+	.send_msg	= tipc_udp_send_msg,
+	.enable_media	= tipc_udp_enable,
+	.disable_media	= tipc_udp_disable,
+	.addr2str	= tipc_udp_addr2str,
+	.addr2msg	= tipc_udp_addr2msg,
+	.msg2addr	= tipc_udp_msg2addr,
+	.priority	= TIPC_DEF_LINK_PRI,
+	.tolerance	= TIPC_DEF_LINK_TOL,
+	.window		= TIPC_DEF_LINK_WIN,
+	.type_id	= TIPC_MEDIA_TYPE_UDP,
+	.hwaddr_len	= 0,
+	.name		= "udp"
+};
-- 
cgit v1.2.3


From 59299031038f3ea92cf484bc4a68d16ad4bb3050 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 5 Mar 2015 12:35:07 -0800
Subject: net: dsa: let switches specify their tagging protocol

In order to support the new DSA device driver model, a dsa_switch should
be able to advertise the type of tagging protocol supported by the
underlying switch device. This also removes constraints on how tagging
can be stacked to each other.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 5 +++++
 net/dsa/dsa.c     | 5 +++--
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index c542c131d551..b525ac516559 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -127,6 +127,11 @@ struct dsa_switch {
 	struct dsa_switch_tree	*dst;
 	int			index;
 
+	/*
+	 * Tagging protocol understood by this switch
+	 */
+	enum dsa_tag_protocol	tag_protocol;
+
 	/*
 	 * Configuration data for this switch.
 	 */
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 6f02ccc57593..4cc995664fdf 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -227,7 +227,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
 	 * switch.
 	 */
 	if (dst->cpu_switch == index) {
-		switch (drv->tag_protocol) {
+		switch (ds->tag_protocol) {
 #ifdef CONFIG_NET_DSA_TAG_DSA
 		case DSA_TAG_PROTO_DSA:
 			dst->rcv = dsa_netdev_ops.rcv;
@@ -255,7 +255,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
 			goto out;
 		}
 
-		dst->tag_protocol = drv->tag_protocol;
+		dst->tag_protocol = ds->tag_protocol;
 	}
 
 	/*
@@ -364,6 +364,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
 	ds->index = index;
 	ds->pd = pd;
 	ds->drv = drv;
+	ds->tag_protocol = drv->tag_protocol;
 	ds->master_dev = host_dev;
 
 	ret = dsa_switch_setup_one(ds, parent);
-- 
cgit v1.2.3


From 37ed9493699cc5dafe1b8725858ef73176fdc9d2 Mon Sep 17 00:00:00 2001
From: Scott Feldman <sfeldma@gmail.com>
Date: Thu, 5 Mar 2015 21:21:13 -0800
Subject: rtnetlink: add RTNH_F_EXTERNAL flag for fib offload

Add new RTNH_F_EXTERNAL flag to mark fib entries offloaded externally, for
example to a switchdev switch device.

Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/rtnetlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 06f75a407f74..c3722b024e73 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -334,6 +334,7 @@ struct rtnexthop {
 #define RTNH_F_DEAD		1	/* Nexthop is dead (used by multipath)	*/
 #define RTNH_F_PERVASIVE	2	/* Do recursive gateway lookup	*/
 #define RTNH_F_ONLINK		4	/* Gateway is forced on link	*/
+#define RTNH_F_EXTERNAL		8	/* Route installed externally	*/
 
 /* Macros to handle hexthops */
 
-- 
cgit v1.2.3


From 4586f1bb911ce219a4bc1c2a9d6eee2e058b2b51 Mon Sep 17 00:00:00 2001
From: Scott Feldman <sfeldma@gmail.com>
Date: Thu, 5 Mar 2015 21:21:14 -0800
Subject: netdevice: add IPv4 fib add/del ops

Add two new ndo ops for IPv4 fib offload support, add and del.  Add uses
modifiy semantics if fib entry already offloaded.  Drivers implementing the new
ndo ops will return err<0 if programming device fails, for example if device's
tables are full.

Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 625c8d71511b..45413784a3b1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -768,6 +768,8 @@ struct netdev_phys_item_id {
 typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
 				       struct sk_buff *skb);
 
+struct fib_info;
+
 /*
  * This structure defines the management hooks for network devices.
  * The following hooks can be defined; unless noted otherwise, they are
@@ -1031,6 +1033,14 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  * int (*ndo_switch_port_stp_update)(struct net_device *dev, u8 state);
  *	Called to notify switch device port of bridge port STP
  *	state change.
+ * int (*ndo_sw_parent_fib_ipv4_add)(struct net_device *dev, __be32 dst,
+ *				     int dst_len, struct fib_info *fi,
+ *				     u8 tos, u8 type, u32 tb_id);
+ *	Called to add/modify IPv4 route to switch device.
+ * int (*ndo_sw_parent_fib_ipv4_del)(struct net_device *dev, __be32 dst,
+ *				     int dst_len, struct fib_info *fi,
+ *				     u8 tos, u8 type, u32 tb_id);
+ *	Called to delete IPv4 route from switch device.
  */
 struct net_device_ops {
 	int			(*ndo_init)(struct net_device *dev);
@@ -1192,6 +1202,18 @@ struct net_device_ops {
 							    struct netdev_phys_item_id *psid);
 	int			(*ndo_switch_port_stp_update)(struct net_device *dev,
 							      u8 state);
+	int			(*ndo_switch_fib_ipv4_add)(struct net_device *dev,
+							   __be32 dst,
+							   int dst_len,
+							   struct fib_info *fi,
+							   u8 tos, u8 type,
+							   u32 tb_id);
+	int			(*ndo_switch_fib_ipv4_del)(struct net_device *dev,
+							   __be32 dst,
+							   int dst_len,
+							   struct fib_info *fi,
+							   u8 tos, u8 type,
+							   u32 tb_id);
 #endif
 };
 
-- 
cgit v1.2.3


From 5e8d90497d65f528c54015644095ace6e330fd8e Mon Sep 17 00:00:00 2001
From: Scott Feldman <sfeldma@gmail.com>
Date: Thu, 5 Mar 2015 21:21:15 -0800
Subject: switchdev: add IPv4 fib ndo ops wrappers

Add IPv4 fib ndo wrapper funcs and stub them out for now.

Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/switchdev.h   | 19 +++++++++++++++++++
 net/switchdev/switchdev.c | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+)

(limited to 'include')

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index cfcdac2e5d25..8d2ac663325a 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -51,6 +51,11 @@ int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev,
 					       struct nlmsghdr *nlh, u16 flags);
 int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev,
 					       struct nlmsghdr *nlh, u16 flags);
+int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
+			       u8 tos, u8 type, u32 tb_id);
+int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
+			       u8 tos, u8 type, u32 tb_id);
+
 #else
 
 static inline int netdev_switch_parent_id_get(struct net_device *dev,
@@ -109,6 +114,20 @@ static inline int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *
 	return 0;
 }
 
+static inline int netdev_switch_fib_ipv4_add(u32 dst, int dst_len,
+					     struct fib_info *fi,
+					     u8 tos, u8 type, u32 tb_id)
+{
+	return 0;
+}
+
+static inline int netdev_switch_fib_ipv4_del(u32 dst, int dst_len,
+					     struct fib_info *fi,
+					     u8 tos, u8 type, u32 tb_id)
+{
+	return 0;
+}
+
 #endif
 
 #endif /* _LINUX_SWITCHDEV_H_ */
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 8c1e558db118..3c090f8d071b 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -14,6 +14,7 @@
 #include <linux/mutex.h>
 #include <linux/notifier.h>
 #include <linux/netdevice.h>
+#include <net/ip_fib.h>
 #include <net/switchdev.h>
 
 /**
@@ -225,3 +226,41 @@ int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev,
 	return ret;
 }
 EXPORT_SYMBOL(ndo_dflt_netdev_switch_port_bridge_dellink);
+
+/**
+ *	netdev_switch_fib_ipv4_add - Add IPv4 route entry to switch
+ *
+ *	@dst: route's IPv4 destination address
+ *	@dst_len: destination address length (prefix length)
+ *	@fi: route FIB info structure
+ *	@tos: route TOS
+ *	@type: route type
+ *	@tb_id: route table ID
+ *
+ *	Add IPv4 route entry to switch device.
+ */
+int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
+			       u8 tos, u8 type, u32 tb_id)
+{
+	return 0;
+}
+EXPORT_SYMBOL(netdev_switch_fib_ipv4_add);
+
+/**
+ *	netdev_switch_fib_ipv4_del - Delete IPv4 route entry from switch
+ *
+ *	@dst: route's IPv4 destination address
+ *	@dst_len: destination address length (prefix length)
+ *	@fi: route FIB info structure
+ *	@tos: route TOS
+ *	@type: route type
+ *	@tb_id: route table ID
+ *
+ *	Delete IPv4 route entry from switch device.
+ */
+int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
+			       u8 tos, u8 type, u32 tb_id)
+{
+	return 0;
+}
+EXPORT_SYMBOL(netdev_switch_fib_ipv4_del);
-- 
cgit v1.2.3


From 104616e74e0b464d449fdd2ee2f547d2fad71610 Mon Sep 17 00:00:00 2001
From: Scott Feldman <sfeldma@gmail.com>
Date: Thu, 5 Mar 2015 21:21:16 -0800
Subject: switchdev: don't support custom ip rules, for now

Keep switchdev FIB offload model simple for now and don't allow custom ip
rules.

Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h      |  2 ++
 net/ipv4/fib_frontend.c   | 13 ++++++++++
 net/ipv4/fib_rules.c      |  3 +++
 net/ipv4/fib_trie.c       | 61 +++++++++++++++++++++++++++++++++++++++++++++++
 net/switchdev/switchdev.c |  4 ++++
 5 files changed, 83 insertions(+)

(limited to 'include')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 825cb2800908..1657604c5dd3 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -196,6 +196,7 @@ int fib_table_delete(struct fib_table *, struct fib_config *);
 int fib_table_dump(struct fib_table *table, struct sk_buff *skb,
 		   struct netlink_callback *cb);
 int fib_table_flush(struct fib_table *table);
+void fib_table_flush_external(struct fib_table *table);
 void fib_free_table(struct fib_table *tb);
 
 
@@ -308,6 +309,7 @@ static inline int fib_num_tclassid_users(struct net *net)
 	return 0;
 }
 #endif
+void fib_flush_external(struct net *net);
 
 /* Exported by fib_semantics.c */
 int ip_fib_check_default(__be32 gw, struct net_device *dev);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 220c4b4af4cf..e067770235bf 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -144,6 +144,19 @@ static void fib_flush(struct net *net)
 		rt_cache_flush(net);
 }
 
+void fib_flush_external(struct net *net)
+{
+	struct fib_table *tb;
+	struct hlist_head *head;
+	unsigned int h;
+
+	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
+		head = &net->ipv4.fib_table_hash[h];
+		hlist_for_each_entry(tb, head, tb_hlist)
+			fib_table_flush_external(tb);
+	}
+}
+
 /*
  * Find address type as if only "dev" was present in the system. If
  * on_dev is NULL then all interfaces are taken into consideration.
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index d3db718be51d..190d0d00d744 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -209,6 +209,8 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 	rule4->tos = frh->tos;
 
 	net->ipv4.fib_has_custom_rules = true;
+	fib_flush_external(rule->fr_net);
+
 	err = 0;
 errout:
 	return err;
@@ -224,6 +226,7 @@ static void fib4_rule_delete(struct fib_rule *rule)
 		net->ipv4.fib_num_tclassid_users--;
 #endif
 	net->ipv4.fib_has_custom_rules = true;
+	fib_flush_external(rule->fr_net);
 }
 
 static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index fae34ad4bb1a..2de43956c9d0 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1536,6 +1536,67 @@ found:
 	return n;
 }
 
+/* Caller must hold RTNL */
+void fib_table_flush_external(struct fib_table *tb)
+{
+	struct trie *t = (struct trie *)tb->tb_data;
+	struct fib_alias *fa;
+	struct tnode *n, *pn;
+	unsigned long cindex;
+	unsigned char slen;
+	int found = 0;
+
+	n = rcu_dereference(t->trie);
+	if (!n)
+		return;
+
+	pn = NULL;
+	cindex = 0;
+
+	while (IS_TNODE(n)) {
+		/* record pn and cindex for leaf walking */
+		pn = n;
+		cindex = 1ul << n->bits;
+backtrace:
+		/* walk trie in reverse order */
+		do {
+			while (!(cindex--)) {
+				t_key pkey = pn->key;
+
+				n = pn;
+				pn = node_parent(n);
+
+				/* resize completed node */
+				resize(t, n);
+
+				/* if we got the root we are done */
+				if (!pn)
+					return;
+
+				cindex = get_index(pkey, pn);
+			}
+
+			/* grab the next available node */
+			n = tnode_get_child(pn, cindex);
+		} while (!n);
+	}
+
+	hlist_for_each_entry(fa, &n->leaf, fa_list) {
+		struct fib_info *fi = fa->fa_info;
+
+		if (fi && (fi->fib_flags & RTNH_F_EXTERNAL)) {
+			netdev_switch_fib_ipv4_del(n->key,
+						   KEYLENGTH - fa->fa_slen,
+						   fi, fa->fa_tos,
+						   fa->fa_type, tb->tb_id);
+		}
+	}
+
+	/* if trie is leaf only loop is completed */
+	if (pn)
+		goto backtrace;
+}
+
 /* Caller must hold RTNL. */
 int fib_table_flush(struct fib_table *tb)
 {
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 3c090f8d071b..81c4c0274b9b 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -242,6 +242,10 @@ EXPORT_SYMBOL(ndo_dflt_netdev_switch_port_bridge_dellink);
 int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
 			       u8 tos, u8 type, u32 tb_id)
 {
+	/* Don't offload route if using custom ip rules */
+	if (fi->fib_net->ipv4.fib_has_custom_rules)
+		return 0;
+
 	return 0;
 }
 EXPORT_SYMBOL(netdev_switch_fib_ipv4_add);
-- 
cgit v1.2.3


From 448b128a14501543748514a4f9adedd3c0da2e85 Mon Sep 17 00:00:00 2001
From: Scott Feldman <sfeldma@gmail.com>
Date: Thu, 5 Mar 2015 21:21:18 -0800
Subject: ipv4: add net bool fib_offload_disabled

If something goes wrong with IPv4 FIB offload, mark entire net offload
disabled.  This is brute force policy to basically shut down IPv4 FIB offload
permanently if there is a problem offloading any route to an external device.
We can refine the policy in the future, to handle failures on a per-device or
per-route basis, but for now, this policy is per-net.

What we're trying to avoid is an inconsistent split between the kernel's FIB
and the offload device's FIB.  We don't want the device to fwd a pkt
inconsitent with what the kernel would do.  An example of a split is if device
has 10.0.0.0/16 and kernel has 10.0.0.0/16 and 10.0.0.0/24, the device wouldn't
see the longest prefix 10.0.0.0/24 and potentially forward pkts incorrectly.

Limited capacity or limited capability are two ways a route may fail to install
to the offload device.  We'll not differentiate between failures at this time,
and treat any failure as fatal and mark the net as fib_offload_disabled.

Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index db1db158a00e..1085e12f940f 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -47,6 +47,7 @@ struct netns_ipv4 {
 	int			fib_num_tclassid_users;
 #endif
 	struct hlist_head	*fib_table_hash;
+	bool			fib_offload_disabled;
 	struct sock		*fibnl;
 
 	struct sock  * __percpu	*icmp_sk;
-- 
cgit v1.2.3


From 8e05fd7166c6123334b7a739a697d677747aa462 Mon Sep 17 00:00:00 2001
From: Scott Feldman <sfeldma@gmail.com>
Date: Thu, 5 Mar 2015 21:21:19 -0800
Subject: fib: hook IPv4 fib for hardware offload

Call into the switchdev driver any time an IPv4 fib entry is
added/modified/deleted from the kernel's FIB.  The switchdev driver may or
may not install the route to the offload device.  In the case where the
driver tries to install the route and something goes wrong (device's routing
table is full, etc), then all of the offloaded routes will be flushed from the
device, route forwarding falls back to the kernel, and no more routes are
offloading.

We can refine this logic later.  For now, use the simplist model of offloading
routes up to the point of failure, and then on failure, undo everything and
mark IPv4 offloading disabled.

Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/switchdev.h   |  5 +++++
 net/ipv4/fib_trie.c       | 31 ++++++++++++++++++++++++++++++-
 net/switchdev/switchdev.c | 28 ++++++++++++++++++++++++++--
 3 files changed, 61 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 8d2ac663325a..dc0a5cc7c2c5 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -55,6 +55,7 @@ int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
 			       u8 tos, u8 type, u32 tb_id);
 int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
 			       u8 tos, u8 type, u32 tb_id);
+void netdev_switch_fib_ipv4_abort(struct fib_info *fi);
 
 #else
 
@@ -128,6 +129,10 @@ static inline int netdev_switch_fib_ipv4_del(u32 dst, int dst_len,
 	return 0;
 }
 
+void netdev_switch_fib_ipv4_abort(struct fib_info *fi)
+{
+}
+
 #endif
 
 #endif /* _LINUX_SWITCHDEV_H_ */
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 2de43956c9d0..6544f1a0cfa1 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -79,6 +79,7 @@
 #include <net/tcp.h>
 #include <net/sock.h>
 #include <net/ip_fib.h>
+#include <net/switchdev.h>
 #include "fib_lookup.h"
 
 #define MAX_STAT_DEPTH 32
@@ -1135,7 +1136,18 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 			new_fa->fa_state = state & ~FA_S_ACCESSED;
 			new_fa->fa_slen = fa->fa_slen;
 
+			err = netdev_switch_fib_ipv4_add(key, plen, fi,
+							 new_fa->fa_tos,
+							 cfg->fc_type,
+							 tb->tb_id);
+			if (err) {
+				netdev_switch_fib_ipv4_abort(fi);
+				kmem_cache_free(fn_alias_kmem, new_fa);
+				goto out;
+			}
+
 			hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list);
+
 			alias_free_mem_rcu(fa);
 
 			fib_release_info(fi_drop);
@@ -1171,10 +1183,18 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 	new_fa->fa_state = 0;
 	new_fa->fa_slen = slen;
 
+	/* (Optionally) offload fib entry to switch hardware. */
+	err = netdev_switch_fib_ipv4_add(key, plen, fi, tos,
+					 cfg->fc_type, tb->tb_id);
+	if (err) {
+		netdev_switch_fib_ipv4_abort(fi);
+		goto out_free_new_fa;
+	}
+
 	/* Insert new entry to the list. */
 	err = fib_insert_alias(t, tp, l, new_fa, fa, key);
 	if (err)
-		goto out_free_new_fa;
+		goto out_sw_fib_del;
 
 	if (!plen)
 		tb->tb_num_default++;
@@ -1185,6 +1205,8 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 succeeded:
 	return 0;
 
+out_sw_fib_del:
+	netdev_switch_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id);
 out_free_new_fa:
 	kmem_cache_free(fn_alias_kmem, new_fa);
 out:
@@ -1456,6 +1478,9 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
 	if (!fa_to_delete)
 		return -ESRCH;
 
+	netdev_switch_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos,
+				   cfg->fc_type, tb->tb_id);
+
 	rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id,
 		  &cfg->fc_nlinfo, 0);
 
@@ -1650,6 +1675,10 @@ backtrace:
 		struct fib_info *fi = fa->fa_info;
 
 		if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
+			netdev_switch_fib_ipv4_del(n->key,
+						   KEYLENGTH - fa->fa_slen,
+						   fi, fa->fa_tos,
+						   fa->fa_type, tb->tb_id);
 			hlist_del_rcu(&fa->fa_list);
 			fib_release_info(fa->fa_info);
 			alias_free_mem_rcu(fa);
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 99907d829419..f4fd575aa2a3 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -305,8 +305,12 @@ int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
 	const struct net_device_ops *ops;
 	int err = 0;
 
-	/* Don't offload route if using custom ip rules */
-	if (fi->fib_net->ipv4.fib_has_custom_rules)
+	/* Don't offload route if using custom ip rules or if
+	 * IPv4 FIB offloading has been disabled completely.
+	 */
+
+	if (fi->fib_net->ipv4.fib_has_custom_rules |
+	    fi->fib_net->ipv4.fib_offload_disabled)
 		return 0;
 
 	dev = netdev_switch_get_dev_by_nhs(fi);
@@ -362,3 +366,23 @@ int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
 	return err;
 }
 EXPORT_SYMBOL(netdev_switch_fib_ipv4_del);
+
+/**
+ *	netdev_switch_fib_ipv4_abort - Abort an IPv4 FIB operation
+ *
+ *	@fi: route FIB info structure
+ */
+void netdev_switch_fib_ipv4_abort(struct fib_info *fi)
+{
+	/* There was a problem installing this route to the offload
+	 * device.  For now, until we come up with more refined
+	 * policy handling, abruptly end IPv4 fib offloading for
+	 * for entire net by flushing offload device(s) of all
+	 * IPv4 routes, and mark IPv4 fib offloading broken from
+	 * this point forward.
+	 */
+
+	fib_flush_external(fi->fib_net);
+	fi->fib_net->ipv4.fib_offload_disabled = true;
+}
+EXPORT_SYMBOL(netdev_switch_fib_ipv4_abort);
-- 
cgit v1.2.3


From 05050753602626ed4c46271c689929b625f409e7 Mon Sep 17 00:00:00 2001
From: Ilan peer <ilan.peer@intel.com>
Date: Wed, 4 Mar 2015 00:32:06 -0500
Subject: cfg80211: Add API to change the indoor regulatory setting

Previously, the indoor setting configuration assumed that as
long as a station interface is connected, the indoor environment
setting does not change. However, this assumption is problematic
as:

- It is possible that a station interface is connected to a mobile
  AP, e.g., softAP or a P2P GO, where it is possible that both the
  station and the mobile AP move out of the indoor environment making
  the indoor setting invalid. In such a case, user space has no way to
  invalidate the setting.
- A station interface disconnection does not necessarily imply that
  the device is no longer operating in an indoor environment, e.g.,
  it is possible that the station interface is roaming but is still
  stays indoor.

To handle the above, extend the indoor configuration API to allow
user space to indicate a change of indoor settings, and allow it to
indicate weather it controls the indoor setting, such that:

1. If the user space process explicitly indicates that it is going
   to control the indoor setting, do not clear the indoor setting
   internally, unless the socket is released. The user space process
   should use the NL80211_ATTR_SOCKET_OWNER attribute in the command
   to state that it is going to control the indoor setting.
2. Reset the indoor setting when restoring the regulatory settings in
   case it is not owned by a user space process.

Based on the above, a user space tool that continuously monitors the
indoor settings, i.e., tracking power setting, location etc., can
indicate environment changes to the regulatory core.

It should be noted that currently user space is the only provided mechanism
used to hint to the regulatory core over the indoor/outdoor environment --
while the country IEs do have an environment setting this has been completely
ignored by the regulatory core by design for a while now since country IEs
typically can contain bogus data.

Acked-by: Luis R. Rodriguez <mcgrof@suse.com>
Signed-off-by: ArikX Nemtsov <arik@wizery.com>
Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h |  9 +++++++
 net/wireless/nl80211.c       | 19 ++++++++++++++-
 net/wireless/reg.c           | 57 ++++++++++++++++++++++++++++++++++++++++----
 net/wireless/reg.h           | 15 +++++++++++-
 4 files changed, 94 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 37e7f39441e5..ae16ba9cb1e3 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1697,6 +1697,10 @@ enum nl80211_commands {
  *	If set during scheduled scan start then the new scan req will be
  *	owned by the netlink socket that created it and the scheduled scan will
  *	be stopped when the socket is closed.
+ *	If set during configuration of regulatory indoor operation then the
+ *	regulatory indoor configuration would be owned by the netlink socket
+ *	that configured the indoor setting, and the indoor operation would be
+ *	cleared when the socket is closed.
  *
  * @NL80211_ATTR_TDLS_INITIATOR: flag attribute indicating the current end is
  *	the TDLS link initiator.
@@ -1752,6 +1756,9 @@ enum nl80211_commands {
  *
  * @NL80211_ATTR_SCHED_SCAN_DELAY: delay before a scheduled scan (or a
  *	WoWLAN net-detect scan) is started, u32 in seconds.
+
+ * @NL80211_ATTR_REG_INDOOR: flag attribute, if set indicates that the device
+ *      is operating in an indoor environment.
  *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
@@ -2120,6 +2127,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_SCHED_SCAN_DELAY,
 
+	NL80211_ATTR_REG_INDOOR,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 07cef3d7653e..b02085301785 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -399,6 +399,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_WIPHY_SELF_MANAGED_REG] = { .type = NLA_FLAG },
 	[NL80211_ATTR_NETNS_FD] = { .type = NLA_U32 },
 	[NL80211_ATTR_SCHED_SCAN_DELAY] = { .type = NLA_U32 },
+	[NL80211_ATTR_REG_INDOOR] = { .type = NLA_FLAG },
 };
 
 /* policy for the key attributes */
@@ -4958,7 +4959,10 @@ static int parse_reg_rule(struct nlattr *tb[],
 static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
 {
 	char *data = NULL;
+	bool is_indoor;
 	enum nl80211_user_reg_hint_type user_reg_hint_type;
+	u32 owner_nlportid;
+
 
 	/*
 	 * You should only get this when cfg80211 hasn't yet initialized
@@ -4984,7 +4988,15 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
 		data = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]);
 		return regulatory_hint_user(data, user_reg_hint_type);
 	case NL80211_USER_REG_HINT_INDOOR:
-		return regulatory_hint_indoor_user();
+		if (info->attrs[NL80211_ATTR_SOCKET_OWNER]) {
+			owner_nlportid = info->snd_portid;
+			is_indoor = !!info->attrs[NL80211_ATTR_REG_INDOOR];
+		} else {
+			owner_nlportid = 0;
+			is_indoor = true;
+		}
+
+		return regulatory_hint_indoor(is_indoor, owner_nlportid);
 	default:
 		return -EINVAL;
 	}
@@ -12810,6 +12822,11 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
 
 	rcu_read_unlock();
 
+	/*
+	 * It is possible that the user space process that is controlling the
+	 * indoor setting disappeared, so notify the regulatory core.
+	 */
+	regulatory_netlink_notify(notify->portid);
 	return NOTIFY_OK;
 }
 
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index c24c8bf3c988..4239dd408137 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -128,9 +128,12 @@ static int reg_num_devs_support_basehint;
  * State variable indicating if the platform on which the devices
  * are attached is operating in an indoor environment. The state variable
  * is relevant for all registered devices.
- * (protected by RTNL)
  */
 static bool reg_is_indoor;
+static spinlock_t reg_indoor_lock;
+
+/* Used to track the userspace process controlling the indoor setting */
+static u32 reg_is_indoor_portid;
 
 static const struct ieee80211_regdomain *get_cfg80211_regdom(void)
 {
@@ -2288,15 +2291,50 @@ int regulatory_hint_user(const char *alpha2,
 	return 0;
 }
 
-int regulatory_hint_indoor_user(void)
+int regulatory_hint_indoor(bool is_indoor, u32 portid)
 {
+	spin_lock(&reg_indoor_lock);
+
+	/* It is possible that more than one user space process is trying to
+	 * configure the indoor setting. To handle such cases, clear the indoor
+	 * setting in case that some process does not think that the device
+	 * is operating in an indoor environment. In addition, if a user space
+	 * process indicates that it is controlling the indoor setting, save its
+	 * portid, i.e., make it the owner.
+	 */
+	reg_is_indoor = is_indoor;
+	if (reg_is_indoor) {
+		if (!reg_is_indoor_portid)
+			reg_is_indoor_portid = portid;
+	} else {
+		reg_is_indoor_portid = 0;
+	}
 
+	spin_unlock(&reg_indoor_lock);
 
-	reg_is_indoor = true;
+	if (!is_indoor)
+		reg_check_channels();
 
 	return 0;
 }
 
+void regulatory_netlink_notify(u32 portid)
+{
+	spin_lock(&reg_indoor_lock);
+
+	if (reg_is_indoor_portid != portid) {
+		spin_unlock(&reg_indoor_lock);
+		return;
+	}
+
+	reg_is_indoor = false;
+	reg_is_indoor_portid = 0;
+
+	spin_unlock(&reg_indoor_lock);
+
+	reg_check_channels();
+}
+
 /* Driver hints */
 int regulatory_hint(struct wiphy *wiphy, const char *alpha2)
 {
@@ -2464,7 +2502,17 @@ static void restore_regulatory_settings(bool reset_user)
 
 	ASSERT_RTNL();
 
-	reg_is_indoor = false;
+	/*
+	 * Clear the indoor setting in case that it is not controlled by user
+	 * space, as otherwise there is no guarantee that the device is still
+	 * operating in an indoor environment.
+	 */
+	spin_lock(&reg_indoor_lock);
+	if (reg_is_indoor && !reg_is_indoor_portid) {
+		reg_is_indoor = false;
+		reg_check_channels();
+	}
+	spin_unlock(&reg_indoor_lock);
 
 	reset_regdomains(true, &world_regdom);
 	restore_alpha2(alpha2, reset_user);
@@ -3061,6 +3109,7 @@ int __init regulatory_init(void)
 
 	spin_lock_init(&reg_requests_lock);
 	spin_lock_init(&reg_pending_beacons_lock);
+	spin_lock_init(&reg_indoor_lock);
 
 	reg_regdb_size_check();
 
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index 4b45d6e61d24..a2c4e16459da 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -25,7 +25,20 @@ enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy);
 
 int regulatory_hint_user(const char *alpha2,
 			 enum nl80211_user_reg_hint_type user_reg_hint_type);
-int regulatory_hint_indoor_user(void);
+
+/**
+ * regulatory_hint_indoor - hint operation in indoor env. or not
+ * @is_indoor: if true indicates that user space thinks that the
+ * device is operating in an indoor environment.
+ * @portid: the netlink port ID on which the hint was given.
+ */
+int regulatory_hint_indoor(bool is_indoor, u32 portid);
+
+/**
+ * regulatory_netlink_notify - notify on released netlink socket
+ * @portid: the netlink socket port ID
+ */
+void regulatory_netlink_notify(u32 portid);
 
 void wiphy_regulatory_register(struct wiphy *wiphy);
 void wiphy_regulatory_deregister(struct wiphy *wiphy);
-- 
cgit v1.2.3


From 93690c227acf08a2a19cbaf9acbcd2210fbb8ded Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Fri, 6 Mar 2015 10:11:21 -0800
Subject: Bluetooth: Introduce controller setting information for static
 address

Currently it is not possible to determine if the static address is used
by the controller. It is also not possible to determine if using a
static on a dual-mode controller with disabled BR/EDR is possible or
not.

To address this issue, introduce a new setting called static-address. If
support for this setting is signaled that means that the kernel supports
using static addresses. And if used on dual-mode controllers with BR/EDR
disabled it means that a configured static address can be used.

In addition utilize the same setting for the list of current active
settings that indicates if a static address is configured and if that
address will be actually used.

With this in mind the existing Set Static Address management command
has been extended to return the current settings. That way the caller
of that command can easily determine if the programmed address will
be used or if extra steps are required.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/mgmt.h |  1 +
 net/bluetooth/mgmt.c         | 28 ++++++++++++++++++++++++++--
 2 files changed, 27 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index fe8eef00e9ca..0c737e4b8f57 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -98,6 +98,7 @@ struct mgmt_rp_read_index_list {
 #define MGMT_SETTING_DEBUG_KEYS		0x00001000
 #define MGMT_SETTING_PRIVACY		0x00002000
 #define MGMT_SETTING_CONFIGURATION	0x00004000
+#define MGMT_SETTING_STATIC_ADDRESS	0x00008000
 
 #define MGMT_OP_READ_INFO		0x0004
 #define MGMT_READ_INFO_SIZE		0
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 967f07fdbbbe..d185a9800983 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -583,6 +583,7 @@ static u32 get_supported_settings(struct hci_dev *hdev)
 		settings |= MGMT_SETTING_ADVERTISING;
 		settings |= MGMT_SETTING_SECURE_CONN;
 		settings |= MGMT_SETTING_PRIVACY;
+		settings |= MGMT_SETTING_STATIC_ADDRESS;
 	}
 
 	if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) ||
@@ -638,6 +639,25 @@ static u32 get_current_settings(struct hci_dev *hdev)
 	if (test_bit(HCI_PRIVACY, &hdev->dev_flags))
 		settings |= MGMT_SETTING_PRIVACY;
 
+	/* The current setting for static address has two purposes. The
+	 * first is to indicate if the static address will be used and
+	 * the second is to indicate if it is actually set.
+	 *
+	 * This means if the static address is not configured, this flag
+	 * will never bet set. If the address is configured, then if the
+	 * address is actually used decides if the flag is set or not.
+	 *
+	 * For single mode LE only controllers and dual-mode controllers
+	 * with BR/EDR disabled, the existence of the static address will
+	 * be evaluated.
+	 */
+	if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) ||
+	    !test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) ||
+	    !bacmp(&hdev->bdaddr, BDADDR_ANY)) {
+		if (bacmp(&hdev->static_addr, BDADDR_ANY))
+			settings |= MGMT_SETTING_STATIC_ADDRESS;
+	}
+
 	return settings;
 }
 
@@ -4498,10 +4518,14 @@ static int set_static_address(struct sock *sk, struct hci_dev *hdev,
 
 	bacpy(&hdev->static_addr, &cp->bdaddr);
 
-	err = cmd_complete(sk, hdev->id, MGMT_OP_SET_STATIC_ADDRESS, 0, NULL, 0);
+	err = send_settings_rsp(sk, MGMT_OP_SET_STATIC_ADDRESS, hdev);
+	if (err < 0)
+		goto unlock;
 
-	hci_dev_unlock(hdev);
+	err = new_settings(hdev, sk);
 
+unlock:
+	hci_dev_unlock(hdev);
 	return err;
 }
 
-- 
cgit v1.2.3


From 801c1e8da57499a9922223ee1882b2b59debd47c Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 6 Mar 2015 21:08:50 +0200
Subject: Bluetooth: Add mgmt HCI channel registration API

This patch adds an API for registering HCI channels with mgmt-like
semantics. For now the only user will be HCI_CHANNEL_CONTROL, but e.g.
6lowpan is intended to use this as well in the future.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 17 ++++++++
 net/bluetooth/hci_sock.c         | 85 ++++++++++++++++++++++++++++++++++++++--
 2 files changed, 99 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 15c761c1f82a..0c84d48e5517 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1273,6 +1273,23 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb);
 
 void hci_sock_dev_event(struct hci_dev *hdev, int event);
 
+struct hci_mgmt_handler {
+	int (*func) (struct sock *sk, struct hci_dev *hdev, void *data,
+		     u16 data_len);
+	bool var_len;
+	size_t data_len;
+};
+
+struct hci_mgmt_chan {
+	struct list_head list;
+	unsigned short channel;
+	size_t handler_count;
+	const struct hci_mgmt_handler *handlers;
+};
+
+int hci_mgmt_chan_register(struct hci_mgmt_chan *c);
+void hci_mgmt_chan_unregister(struct hci_mgmt_chan *c);
+
 /* Management interface */
 #define DISCOV_TYPE_BREDR		(BIT(BDADDR_BREDR))
 #define DISCOV_TYPE_LE			(BIT(BDADDR_LE_PUBLIC) | \
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index cb4bc4883350..0d5ace8922b1 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -31,6 +31,9 @@
 #include <net/bluetooth/hci_core.h>
 #include <net/bluetooth/hci_mon.h>
 
+static LIST_HEAD(mgmt_chan_list);
+static DEFINE_MUTEX(mgmt_chan_list_lock);
+
 static atomic_t monitor_promisc = ATOMIC_INIT(0);
 
 /* ----- HCI socket interface ----- */
@@ -401,6 +404,56 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event)
 	}
 }
 
+static struct hci_mgmt_chan *__hci_mgmt_chan_find(unsigned short channel)
+{
+	struct hci_mgmt_chan *c;
+
+	list_for_each_entry(c, &mgmt_chan_list, list) {
+		if (c->channel == channel)
+			return c;
+	}
+
+	return NULL;
+}
+
+static struct hci_mgmt_chan *hci_mgmt_chan_find(unsigned short channel)
+{
+	struct hci_mgmt_chan *c;
+
+	mutex_lock(&mgmt_chan_list_lock);
+	c = __hci_mgmt_chan_find(channel);
+	mutex_unlock(&mgmt_chan_list_lock);
+
+	return c;
+}
+
+int hci_mgmt_chan_register(struct hci_mgmt_chan *c)
+{
+	if (c->channel < HCI_CHANNEL_CONTROL)
+		return -EINVAL;
+
+	mutex_lock(&mgmt_chan_list_lock);
+	if (__hci_mgmt_chan_find(c->channel)) {
+		mutex_unlock(&mgmt_chan_list_lock);
+		return -EALREADY;
+	}
+
+	list_add_tail(&c->list, &mgmt_chan_list);
+
+	mutex_unlock(&mgmt_chan_list_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(hci_mgmt_chan_register);
+
+void hci_mgmt_chan_unregister(struct hci_mgmt_chan *c)
+{
+	mutex_lock(&mgmt_chan_list_lock);
+	list_del(&c->list);
+	mutex_unlock(&mgmt_chan_list_lock);
+}
+EXPORT_SYMBOL(hci_mgmt_chan_unregister);
+
 static int hci_sock_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
@@ -718,8 +771,22 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
 		break;
 
 	default:
-		err = -EINVAL;
-		goto done;
+		if (!hci_mgmt_chan_find(haddr.hci_channel)) {
+			err = -EINVAL;
+			goto done;
+		}
+
+		if (haddr.hci_dev != HCI_DEV_NONE) {
+			err = -EINVAL;
+			goto done;
+		}
+
+		if (!capable(CAP_NET_ADMIN)) {
+			err = -EPERM;
+			goto done;
+		}
+
+		break;
 	}
 
 
@@ -837,6 +904,10 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 	case HCI_CHANNEL_MONITOR:
 		sock_recv_timestamp(msg, sk, skb);
 		break;
+	default:
+		if (hci_mgmt_chan_find(hci_pi(sk)->channel))
+			sock_recv_timestamp(msg, sk, skb);
+		break;
 	}
 
 	skb_free_datagram(sk, skb);
@@ -848,6 +919,7 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
 			    size_t len)
 {
 	struct sock *sk = sock->sk;
+	struct hci_mgmt_chan *chan;
 	struct hci_dev *hdev;
 	struct sk_buff *skb;
 	int err;
@@ -876,7 +948,14 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
 		err = -EOPNOTSUPP;
 		goto done;
 	default:
-		err = -EINVAL;
+		mutex_lock(&mgmt_chan_list_lock);
+		chan = __hci_mgmt_chan_find(hci_pi(sk)->channel);
+		if (chan)
+			err = -ENOSYS; /* FIXME: call handler */
+		else
+			err = -EINVAL;
+
+		mutex_unlock(&mgmt_chan_list_lock);
 		goto done;
 	}
 
-- 
cgit v1.2.3


From 6d785aa345f525e1fdf098b7c590168f0b00f3f1 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 6 Mar 2015 21:08:51 +0200
Subject: Bluetooth: Convert mgmt to use HCI chan registration API

This patch converts the existing mgmt code to use the newly introduced
generic API for registering HCI channels with mgmt-like semantics.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/bluetooth.h |  3 +++
 include/net/bluetooth/hci_core.h  |  4 +++-
 net/bluetooth/af_bluetooth.c      |  9 +++++++++
 net/bluetooth/hci_sock.c          | 19 +------------------
 net/bluetooth/mgmt.c              | 34 +++++++++++++++++++++++-----------
 5 files changed, 39 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 6bb97df16d2d..e598ca096ec9 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -354,6 +354,9 @@ void l2cap_exit(void);
 int sco_init(void);
 void sco_exit(void);
 
+int mgmt_init(void);
+void mgmt_exit(void);
+
 void bt_sock_reclassify_lock(struct sock *sk, int proto);
 
 #endif /* __BLUETOOTH_H */
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 0c84d48e5517..b2a183d201b7 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1309,7 +1309,9 @@ void hci_mgmt_chan_unregister(struct hci_mgmt_chan *c);
 #define DISCOV_BREDR_INQUIRY_LEN	0x08
 #define DISCOV_LE_RESTART_DELAY		msecs_to_jiffies(200)	/* msec */
 
-int mgmt_control(struct sock *sk, struct msghdr *msg, size_t len);
+int mgmt_control(struct hci_mgmt_chan *chan, struct sock *sk,
+		 struct msghdr *msg, size_t msglen);
+
 int mgmt_new_settings(struct hci_dev *hdev);
 void mgmt_index_added(struct hci_dev *hdev);
 void mgmt_index_removed(struct hci_dev *hdev);
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 20a4698e2255..70f9d945faf7 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -749,6 +749,13 @@ static int __init bt_init(void)
 		goto sock_err;
 	}
 
+	err = mgmt_init();
+	if (err < 0) {
+		sco_exit();
+		l2cap_exit();
+		goto sock_err;
+	}
+
 	return 0;
 
 sock_err:
@@ -763,6 +770,8 @@ error:
 
 static void __exit bt_exit(void)
 {
+	mgmt_exit();
+
 	sco_exit();
 
 	l2cap_exit();
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 0d5ace8922b1..aa9ffcb9481f 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -741,19 +741,6 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
 		hci_pi(sk)->hdev = hdev;
 		break;
 
-	case HCI_CHANNEL_CONTROL:
-		if (haddr.hci_dev != HCI_DEV_NONE) {
-			err = -EINVAL;
-			goto done;
-		}
-
-		if (!capable(CAP_NET_ADMIN)) {
-			err = -EPERM;
-			goto done;
-		}
-
-		break;
-
 	case HCI_CHANNEL_MONITOR:
 		if (haddr.hci_dev != HCI_DEV_NONE) {
 			err = -EINVAL;
@@ -900,7 +887,6 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 		hci_sock_cmsg(sk, msg, skb);
 		break;
 	case HCI_CHANNEL_USER:
-	case HCI_CHANNEL_CONTROL:
 	case HCI_CHANNEL_MONITOR:
 		sock_recv_timestamp(msg, sk, skb);
 		break;
@@ -941,9 +927,6 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
 	case HCI_CHANNEL_RAW:
 	case HCI_CHANNEL_USER:
 		break;
-	case HCI_CHANNEL_CONTROL:
-		err = mgmt_control(sk, msg, len);
-		goto done;
 	case HCI_CHANNEL_MONITOR:
 		err = -EOPNOTSUPP;
 		goto done;
@@ -951,7 +934,7 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
 		mutex_lock(&mgmt_chan_list_lock);
 		chan = __hci_mgmt_chan_find(hci_pi(sk)->channel);
 		if (chan)
-			err = -ENOSYS; /* FIXME: call handler */
+			err = mgmt_control(chan, sk, msg, len);
 		else
 			err = -EINVAL;
 
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index d185a9800983..bb02dd1b82bf 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -6130,12 +6130,7 @@ unlock:
 	return err;
 }
 
-static const struct mgmt_handler {
-	int (*func) (struct sock *sk, struct hci_dev *hdev, void *data,
-		     u16 data_len);
-	bool var_len;
-	size_t data_len;
-} mgmt_handlers[] = {
+static const struct hci_mgmt_handler mgmt_handlers[] = {
 	{ NULL }, /* 0x0000 (no command) */
 	{ read_version,           false, MGMT_READ_VERSION_SIZE },
 	{ read_commands,          false, MGMT_READ_COMMANDS_SIZE },
@@ -6197,14 +6192,15 @@ static const struct mgmt_handler {
 	{ start_service_discovery,true,  MGMT_START_SERVICE_DISCOVERY_SIZE },
 };
 
-int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen)
+int mgmt_control(struct hci_mgmt_chan *chan, struct sock *sk,
+		 struct msghdr *msg, size_t msglen)
 {
 	void *buf;
 	u8 *cp;
 	struct mgmt_hdr *hdr;
 	u16 opcode, index, len;
 	struct hci_dev *hdev = NULL;
-	const struct mgmt_handler *handler;
+	const struct hci_mgmt_handler *handler;
 	int err;
 
 	BT_DBG("got %zu bytes", msglen);
@@ -6257,8 +6253,8 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen)
 		}
 	}
 
-	if (opcode >= ARRAY_SIZE(mgmt_handlers) ||
-	    mgmt_handlers[opcode].func == NULL) {
+	if (opcode >= chan->handler_count ||
+	    chan->handlers[opcode].func == NULL) {
 		BT_DBG("Unknown op %u", opcode);
 		err = cmd_status(sk, index, opcode,
 				 MGMT_STATUS_UNKNOWN_COMMAND);
@@ -6279,7 +6275,7 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen)
 		goto done;
 	}
 
-	handler = &mgmt_handlers[opcode];
+	handler = &chan->handlers[opcode];
 
 	if ((handler->var_len && len < handler->data_len) ||
 	    (!handler->var_len && len != handler->data_len)) {
@@ -7470,3 +7466,19 @@ void mgmt_reenable_advertising(struct hci_dev *hdev)
 	enable_advertising(&req);
 	hci_req_run(&req, adv_enable_complete);
 }
+
+static struct hci_mgmt_chan chan = {
+	.channel	= HCI_CHANNEL_CONTROL,
+	.handler_count	= ARRAY_SIZE(mgmt_handlers),
+	.handlers	= mgmt_handlers,
+};
+
+int mgmt_init(void)
+{
+	return hci_mgmt_chan_register(&chan);
+}
+
+void mgmt_exit(void)
+{
+	hci_mgmt_chan_unregister(&chan);
+}
-- 
cgit v1.2.3


From b9a245fb12315f8c6528b29a991a004859c982d5 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 6 Mar 2015 21:08:52 +0200
Subject: Bluetooth: Move all mgmt command quirks to handler table

In order to completely generalize the mgmt command handling we need to
move away command-specific information from mgmt_control() into the
actual command table. This patch adds a new 'flags' field to the handler
entries which can now contain the following command specific
information:

 - Command takes variable length parameters
 - Command doesn't target any specific HCI device
 - Command can be sent when the HCI device is unconfigured

After this the mgmt_control() function is completely generic and can
potentially be reused by new HCI channels.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |   6 +-
 net/bluetooth/mgmt.c             | 170 ++++++++++++++++++++-------------------
 2 files changed, 93 insertions(+), 83 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index b2a183d201b7..afc641c5e55c 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1273,11 +1273,15 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb);
 
 void hci_sock_dev_event(struct hci_dev *hdev, int event);
 
+#define HCI_MGMT_VAR_LEN	(1 << 0)
+#define HCI_MGMT_NO_HDEV	(1 << 1)
+#define HCI_MGMT_UNCONFIGURED	(1 << 2)
+
 struct hci_mgmt_handler {
 	int (*func) (struct sock *sk, struct hci_dev *hdev, void *data,
 		     u16 data_len);
-	bool var_len;
 	size_t data_len;
+	unsigned long flags;
 };
 
 struct hci_mgmt_chan {
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index bb02dd1b82bf..f65516420a31 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -6132,64 +6132,77 @@ unlock:
 
 static const struct hci_mgmt_handler mgmt_handlers[] = {
 	{ NULL }, /* 0x0000 (no command) */
-	{ read_version,           false, MGMT_READ_VERSION_SIZE },
-	{ read_commands,          false, MGMT_READ_COMMANDS_SIZE },
-	{ read_index_list,        false, MGMT_READ_INDEX_LIST_SIZE },
-	{ read_controller_info,   false, MGMT_READ_INFO_SIZE },
-	{ set_powered,            false, MGMT_SETTING_SIZE },
-	{ set_discoverable,       false, MGMT_SET_DISCOVERABLE_SIZE },
-	{ set_connectable,        false, MGMT_SETTING_SIZE },
-	{ set_fast_connectable,   false, MGMT_SETTING_SIZE },
-	{ set_bondable,           false, MGMT_SETTING_SIZE },
-	{ set_link_security,      false, MGMT_SETTING_SIZE },
-	{ set_ssp,                false, MGMT_SETTING_SIZE },
-	{ set_hs,                 false, MGMT_SETTING_SIZE },
-	{ set_le,                 false, MGMT_SETTING_SIZE },
-	{ set_dev_class,          false, MGMT_SET_DEV_CLASS_SIZE },
-	{ set_local_name,         false, MGMT_SET_LOCAL_NAME_SIZE },
-	{ add_uuid,               false, MGMT_ADD_UUID_SIZE },
-	{ remove_uuid,            false, MGMT_REMOVE_UUID_SIZE },
-	{ load_link_keys,         true,  MGMT_LOAD_LINK_KEYS_SIZE },
-	{ load_long_term_keys,    true,  MGMT_LOAD_LONG_TERM_KEYS_SIZE },
-	{ disconnect,             false, MGMT_DISCONNECT_SIZE },
-	{ get_connections,        false, MGMT_GET_CONNECTIONS_SIZE },
-	{ pin_code_reply,         false, MGMT_PIN_CODE_REPLY_SIZE },
-	{ pin_code_neg_reply,     false, MGMT_PIN_CODE_NEG_REPLY_SIZE },
-	{ set_io_capability,      false, MGMT_SET_IO_CAPABILITY_SIZE },
-	{ pair_device,            false, MGMT_PAIR_DEVICE_SIZE },
-	{ cancel_pair_device,     false, MGMT_CANCEL_PAIR_DEVICE_SIZE },
-	{ unpair_device,          false, MGMT_UNPAIR_DEVICE_SIZE },
-	{ user_confirm_reply,     false, MGMT_USER_CONFIRM_REPLY_SIZE },
-	{ user_confirm_neg_reply, false, MGMT_USER_CONFIRM_NEG_REPLY_SIZE },
-	{ user_passkey_reply,     false, MGMT_USER_PASSKEY_REPLY_SIZE },
-	{ user_passkey_neg_reply, false, MGMT_USER_PASSKEY_NEG_REPLY_SIZE },
-	{ read_local_oob_data,    false, MGMT_READ_LOCAL_OOB_DATA_SIZE },
-	{ add_remote_oob_data,    true,  MGMT_ADD_REMOTE_OOB_DATA_SIZE },
-	{ remove_remote_oob_data, false, MGMT_REMOVE_REMOTE_OOB_DATA_SIZE },
-	{ start_discovery,        false, MGMT_START_DISCOVERY_SIZE },
-	{ stop_discovery,         false, MGMT_STOP_DISCOVERY_SIZE },
-	{ confirm_name,           false, MGMT_CONFIRM_NAME_SIZE },
-	{ block_device,           false, MGMT_BLOCK_DEVICE_SIZE },
-	{ unblock_device,         false, MGMT_UNBLOCK_DEVICE_SIZE },
-	{ set_device_id,          false, MGMT_SET_DEVICE_ID_SIZE },
-	{ set_advertising,        false, MGMT_SETTING_SIZE },
-	{ set_bredr,              false, MGMT_SETTING_SIZE },
-	{ set_static_address,     false, MGMT_SET_STATIC_ADDRESS_SIZE },
-	{ set_scan_params,        false, MGMT_SET_SCAN_PARAMS_SIZE },
-	{ set_secure_conn,        false, MGMT_SETTING_SIZE },
-	{ set_debug_keys,         false, MGMT_SETTING_SIZE },
-	{ set_privacy,            false, MGMT_SET_PRIVACY_SIZE },
-	{ load_irks,              true,  MGMT_LOAD_IRKS_SIZE },
-	{ get_conn_info,          false, MGMT_GET_CONN_INFO_SIZE },
-	{ get_clock_info,         false, MGMT_GET_CLOCK_INFO_SIZE },
-	{ add_device,             false, MGMT_ADD_DEVICE_SIZE },
-	{ remove_device,          false, MGMT_REMOVE_DEVICE_SIZE },
-	{ load_conn_param,        true,  MGMT_LOAD_CONN_PARAM_SIZE },
-	{ read_unconf_index_list, false, MGMT_READ_UNCONF_INDEX_LIST_SIZE },
-	{ read_config_info,       false, MGMT_READ_CONFIG_INFO_SIZE },
-	{ set_external_config,    false, MGMT_SET_EXTERNAL_CONFIG_SIZE },
-	{ set_public_address,     false, MGMT_SET_PUBLIC_ADDRESS_SIZE },
-	{ start_service_discovery,true,  MGMT_START_SERVICE_DISCOVERY_SIZE },
+	{ read_version,            MGMT_READ_VERSION_SIZE,
+						HCI_MGMT_NO_HDEV },
+	{ read_commands,           MGMT_READ_COMMANDS_SIZE,
+						HCI_MGMT_NO_HDEV },
+	{ read_index_list,         MGMT_READ_INDEX_LIST_SIZE,
+						HCI_MGMT_NO_HDEV },
+	{ read_controller_info,    MGMT_READ_INFO_SIZE,                 0 },
+	{ set_powered,             MGMT_SETTING_SIZE,                   0 },
+	{ set_discoverable,        MGMT_SET_DISCOVERABLE_SIZE,          0 },
+	{ set_connectable,         MGMT_SETTING_SIZE,                   0 },
+	{ set_fast_connectable,    MGMT_SETTING_SIZE,                   0 },
+	{ set_bondable,            MGMT_SETTING_SIZE,                   0 },
+	{ set_link_security,       MGMT_SETTING_SIZE,                   0 },
+	{ set_ssp,                 MGMT_SETTING_SIZE,                   0 },
+	{ set_hs,                  MGMT_SETTING_SIZE,                   0 },
+	{ set_le,                  MGMT_SETTING_SIZE,                   0 },
+	{ set_dev_class,           MGMT_SET_DEV_CLASS_SIZE,             0 },
+	{ set_local_name,          MGMT_SET_LOCAL_NAME_SIZE,            0 },
+	{ add_uuid,                MGMT_ADD_UUID_SIZE,                  0 },
+	{ remove_uuid,             MGMT_REMOVE_UUID_SIZE,               0 },
+	{ load_link_keys,          MGMT_LOAD_LINK_KEYS_SIZE,
+						HCI_MGMT_VAR_LEN },
+	{ load_long_term_keys,     MGMT_LOAD_LONG_TERM_KEYS_SIZE,
+						HCI_MGMT_VAR_LEN },
+	{ disconnect,              MGMT_DISCONNECT_SIZE,                0 },
+	{ get_connections,         MGMT_GET_CONNECTIONS_SIZE,           0 },
+	{ pin_code_reply,          MGMT_PIN_CODE_REPLY_SIZE,            0 },
+	{ pin_code_neg_reply,      MGMT_PIN_CODE_NEG_REPLY_SIZE,        0 },
+	{ set_io_capability,       MGMT_SET_IO_CAPABILITY_SIZE,         0 },
+	{ pair_device,             MGMT_PAIR_DEVICE_SIZE,               0 },
+	{ cancel_pair_device,      MGMT_CANCEL_PAIR_DEVICE_SIZE,        0 },
+	{ unpair_device,           MGMT_UNPAIR_DEVICE_SIZE,             0 },
+	{ user_confirm_reply,      MGMT_USER_CONFIRM_REPLY_SIZE,        0 },
+	{ user_confirm_neg_reply,  MGMT_USER_CONFIRM_NEG_REPLY_SIZE,    0 },
+	{ user_passkey_reply,      MGMT_USER_PASSKEY_REPLY_SIZE,        0 },
+	{ user_passkey_neg_reply,  MGMT_USER_PASSKEY_NEG_REPLY_SIZE,    0 },
+	{ read_local_oob_data,     MGMT_READ_LOCAL_OOB_DATA_SIZE },
+	{ add_remote_oob_data,     MGMT_ADD_REMOTE_OOB_DATA_SIZE,
+						HCI_MGMT_VAR_LEN },
+	{ remove_remote_oob_data,  MGMT_REMOVE_REMOTE_OOB_DATA_SIZE,    0 },
+	{ start_discovery,         MGMT_START_DISCOVERY_SIZE,           0 },
+	{ stop_discovery,          MGMT_STOP_DISCOVERY_SIZE,            0 },
+	{ confirm_name,            MGMT_CONFIRM_NAME_SIZE,              0 },
+	{ block_device,            MGMT_BLOCK_DEVICE_SIZE,              0 },
+	{ unblock_device,          MGMT_UNBLOCK_DEVICE_SIZE,            0 },
+	{ set_device_id,           MGMT_SET_DEVICE_ID_SIZE,             0 },
+	{ set_advertising,         MGMT_SETTING_SIZE,                   0 },
+	{ set_bredr,               MGMT_SETTING_SIZE,                   0 },
+	{ set_static_address,      MGMT_SET_STATIC_ADDRESS_SIZE,        0 },
+	{ set_scan_params,         MGMT_SET_SCAN_PARAMS_SIZE,           0 },
+	{ set_secure_conn,         MGMT_SETTING_SIZE,                   0 },
+	{ set_debug_keys,          MGMT_SETTING_SIZE,                   0 },
+	{ set_privacy,             MGMT_SET_PRIVACY_SIZE,               0 },
+	{ load_irks,               MGMT_LOAD_IRKS_SIZE,
+						HCI_MGMT_VAR_LEN },
+	{ get_conn_info,           MGMT_GET_CONN_INFO_SIZE,             0 },
+	{ get_clock_info,          MGMT_GET_CLOCK_INFO_SIZE,            0 },
+	{ add_device,              MGMT_ADD_DEVICE_SIZE,                0 },
+	{ remove_device,           MGMT_REMOVE_DEVICE_SIZE,             0 },
+	{ load_conn_param,         MGMT_LOAD_CONN_PARAM_SIZE,
+						HCI_MGMT_VAR_LEN },
+	{ read_unconf_index_list,  MGMT_READ_UNCONF_INDEX_LIST_SIZE,
+						HCI_MGMT_NO_HDEV },
+	{ read_config_info,        MGMT_READ_CONFIG_INFO_SIZE,
+						HCI_MGMT_UNCONFIGURED },
+	{ set_external_config,     MGMT_SET_EXTERNAL_CONFIG_SIZE,
+						HCI_MGMT_UNCONFIGURED },
+	{ set_public_address,      MGMT_SET_PUBLIC_ADDRESS_SIZE,
+						HCI_MGMT_UNCONFIGURED },
+	{ start_service_discovery, MGMT_START_SERVICE_DISCOVERY_SIZE,
+						HCI_MGMT_VAR_LEN },
 };
 
 int mgmt_control(struct hci_mgmt_chan *chan, struct sock *sk,
@@ -6201,6 +6214,7 @@ int mgmt_control(struct hci_mgmt_chan *chan, struct sock *sk,
 	u16 opcode, index, len;
 	struct hci_dev *hdev = NULL;
 	const struct hci_mgmt_handler *handler;
+	bool var_len, no_hdev;
 	int err;
 
 	BT_DBG("got %zu bytes", msglen);
@@ -6227,6 +6241,16 @@ int mgmt_control(struct hci_mgmt_chan *chan, struct sock *sk,
 		goto done;
 	}
 
+	if (opcode >= chan->handler_count ||
+	    chan->handlers[opcode].func == NULL) {
+		BT_DBG("Unknown op %u", opcode);
+		err = cmd_status(sk, index, opcode,
+				 MGMT_STATUS_UNKNOWN_COMMAND);
+		goto done;
+	}
+
+	handler = &chan->handlers[opcode];
+
 	if (index != MGMT_INDEX_NONE) {
 		hdev = hci_dev_get(index);
 		if (!hdev) {
@@ -6244,41 +6268,23 @@ int mgmt_control(struct hci_mgmt_chan *chan, struct sock *sk,
 		}
 
 		if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) &&
-		    opcode != MGMT_OP_READ_CONFIG_INFO &&
-		    opcode != MGMT_OP_SET_EXTERNAL_CONFIG &&
-		    opcode != MGMT_OP_SET_PUBLIC_ADDRESS) {
+		    !(handler->flags & HCI_MGMT_UNCONFIGURED)) {
 			err = cmd_status(sk, index, opcode,
 					 MGMT_STATUS_INVALID_INDEX);
 			goto done;
 		}
 	}
 
-	if (opcode >= chan->handler_count ||
-	    chan->handlers[opcode].func == NULL) {
-		BT_DBG("Unknown op %u", opcode);
-		err = cmd_status(sk, index, opcode,
-				 MGMT_STATUS_UNKNOWN_COMMAND);
-		goto done;
-	}
-
-	if (hdev && (opcode <= MGMT_OP_READ_INDEX_LIST ||
-		     opcode == MGMT_OP_READ_UNCONF_INDEX_LIST)) {
-		err = cmd_status(sk, index, opcode,
-				 MGMT_STATUS_INVALID_INDEX);
-		goto done;
-	}
-
-	if (!hdev && (opcode > MGMT_OP_READ_INDEX_LIST &&
-		      opcode != MGMT_OP_READ_UNCONF_INDEX_LIST)) {
+	no_hdev = (handler->flags & HCI_MGMT_NO_HDEV);
+	if (no_hdev != !hdev) {
 		err = cmd_status(sk, index, opcode,
 				 MGMT_STATUS_INVALID_INDEX);
 		goto done;
 	}
 
-	handler = &chan->handlers[opcode];
-
-	if ((handler->var_len && len < handler->data_len) ||
-	    (!handler->var_len && len != handler->data_len)) {
+	var_len = (handler->flags & HCI_MGMT_VAR_LEN);
+	if ((var_len && len < handler->data_len) ||
+	    (!var_len && len != handler->data_len)) {
 		err = cmd_status(sk, index, opcode,
 				 MGMT_STATUS_INVALID_PARAMS);
 		goto done;
-- 
cgit v1.2.3


From aaa4e70404c7b38a8792dc69af54afd7218b2ec0 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 4 Mar 2015 10:16:43 -0600
Subject: DECnet: Only use neigh_ops for adding the link layer header

Other users users of the neighbour table use neigh->output as the method
to decided when and which link-layer header to place on a packet.
DECnet has been using neigh->output to decide which DECnet headers to
place on a packet depending which neighbour the packet is destined for.

The DECnet usage isn't totally wrong but it can run into problems if the
neighbour output function is run for a second time as the teql driver
and the bridge netfilter code can do.

Therefore to avoid pathologic problems later down the line and make the
neighbour code easier to understand by refactoring the decnet output
code to only use a neighbour method to add a link layer header to a
packet.

This is done by moving the neigbhour operations lookup from
dn_to_neigh_output to dn_neigh_output_packet.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dn_neigh.h |   1 +
 net/decnet/dn_neigh.c  | 105 +++++++++++++++++++++++++------------------------
 net/decnet/dn_route.c  |   9 -----
 3 files changed, 55 insertions(+), 60 deletions(-)

(limited to 'include')

diff --git a/include/net/dn_neigh.h b/include/net/dn_neigh.h
index fac4e3f4a6d3..0f26aa707e62 100644
--- a/include/net/dn_neigh.h
+++ b/include/net/dn_neigh.h
@@ -22,6 +22,7 @@ int dn_neigh_router_hello(struct sk_buff *skb);
 int dn_neigh_endnode_hello(struct sk_buff *skb);
 void dn_neigh_pointopoint_hello(struct sk_buff *skb);
 int dn_neigh_elist(struct net_device *dev, unsigned char *ptr, int n);
+int dn_to_neigh_output(struct sk_buff *skb);
 
 extern struct neigh_table dn_neigh_table;
 
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index ee7d1cef0027..be1f08cdad29 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -49,41 +49,17 @@
 #include <net/dn_route.h>
 
 static int dn_neigh_construct(struct neighbour *);
-static void dn_long_error_report(struct neighbour *, struct sk_buff *);
-static void dn_short_error_report(struct neighbour *, struct sk_buff *);
-static int dn_long_output(struct neighbour *, struct sk_buff *);
-static int dn_short_output(struct neighbour *, struct sk_buff *);
-static int dn_phase3_output(struct neighbour *, struct sk_buff *);
-
-
-/*
- * For talking to broadcast devices: Ethernet & PPP
- */
-static const struct neigh_ops dn_long_ops = {
-	.family =		AF_DECnet,
-	.error_report =		dn_long_error_report,
-	.output =		dn_long_output,
-	.connected_output =	dn_long_output,
-};
+static void dn_neigh_error_report(struct neighbour *, struct sk_buff *);
+static int dn_neigh_output(struct neighbour *neigh, struct sk_buff *skb);
 
 /*
- * For talking to pointopoint and multidrop devices: DDCMP and X.25
+ * Operations for adding the link layer header.
  */
-static const struct neigh_ops dn_short_ops = {
+static const struct neigh_ops dn_neigh_ops = {
 	.family =		AF_DECnet,
-	.error_report =		dn_short_error_report,
-	.output =		dn_short_output,
-	.connected_output =	dn_short_output,
-};
-
-/*
- * For talking to DECnet phase III nodes
- */
-static const struct neigh_ops dn_phase3_ops = {
-	.family =		AF_DECnet,
-	.error_report =		dn_short_error_report, /* Can use short version here */
-	.output =		dn_phase3_output,
-	.connected_output =	dn_phase3_output,
+	.error_report =		dn_neigh_error_report,
+	.output =		dn_neigh_output,
+	.connected_output =	dn_neigh_output,
 };
 
 static u32 dn_neigh_hash(const void *pkey,
@@ -153,16 +129,9 @@ static int dn_neigh_construct(struct neighbour *neigh)
 
 	__neigh_parms_put(neigh->parms);
 	neigh->parms = neigh_parms_clone(parms);
-
-	if (dn_db->use_long)
-		neigh->ops = &dn_long_ops;
-	else
-		neigh->ops = &dn_short_ops;
 	rcu_read_unlock();
 
-	if (dn->flags & DN_NDFLAG_P3)
-		neigh->ops = &dn_phase3_ops;
-
+	neigh->ops = &dn_neigh_ops;
 	neigh->nud_state = NUD_NOARP;
 	neigh->output = neigh->ops->connected_output;
 
@@ -194,24 +163,16 @@ static int dn_neigh_construct(struct neighbour *neigh)
 	return 0;
 }
 
-static void dn_long_error_report(struct neighbour *neigh, struct sk_buff *skb)
-{
-	printk(KERN_DEBUG "dn_long_error_report: called\n");
-	kfree_skb(skb);
-}
-
-
-static void dn_short_error_report(struct neighbour *neigh, struct sk_buff *skb)
+static void dn_neigh_error_report(struct neighbour *neigh, struct sk_buff *skb)
 {
-	printk(KERN_DEBUG "dn_short_error_report: called\n");
+	printk(KERN_DEBUG "dn_neigh_error_report: called\n");
 	kfree_skb(skb);
 }
 
-static int dn_neigh_output_packet(struct sk_buff *skb)
+static int dn_neigh_output(struct neighbour *neigh, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
 	struct dn_route *rt = (struct dn_route *)dst;
-	struct neighbour *neigh = rt->n;
 	struct net_device *dev = neigh->dev;
 	char mac_addr[ETH_ALEN];
 	unsigned int seq;
@@ -233,6 +194,18 @@ static int dn_neigh_output_packet(struct sk_buff *skb)
 	return err;
 }
 
+static int dn_neigh_output_packet(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb_dst(skb);
+	struct dn_route *rt = (struct dn_route *)dst;
+	struct neighbour *neigh = rt->n;
+
+	return neigh->output(neigh, skb);
+}
+
+/*
+ * For talking to broadcast devices: Ethernet & PPP
+ */
 static int dn_long_output(struct neighbour *neigh, struct sk_buff *skb)
 {
 	struct net_device *dev = neigh->dev;
@@ -276,6 +249,9 @@ static int dn_long_output(struct neighbour *neigh, struct sk_buff *skb)
 		       neigh->dev, dn_neigh_output_packet);
 }
 
+/*
+ * For talking to pointopoint and multidrop devices: DDCMP and X.25
+ */
 static int dn_short_output(struct neighbour *neigh, struct sk_buff *skb)
 {
 	struct net_device *dev = neigh->dev;
@@ -313,7 +289,8 @@ static int dn_short_output(struct neighbour *neigh, struct sk_buff *skb)
 }
 
 /*
- * Phase 3 output is the same is short output, execpt that
+ * For talking to DECnet phase III nodes
+ * Phase 3 output is the same as short output, execpt that
  * it clears the area bits before transmission.
  */
 static int dn_phase3_output(struct neighbour *neigh, struct sk_buff *skb)
@@ -351,6 +328,32 @@ static int dn_phase3_output(struct neighbour *neigh, struct sk_buff *skb)
 		       neigh->dev, dn_neigh_output_packet);
 }
 
+int dn_to_neigh_output(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb_dst(skb);
+	struct dn_route *rt = (struct dn_route *) dst;
+	struct neighbour *neigh = rt->n;
+	struct dn_neigh *dn = (struct dn_neigh *)neigh;
+	struct dn_dev *dn_db;
+	bool use_long;
+
+	rcu_read_lock();
+	dn_db = rcu_dereference(neigh->dev->dn_ptr);
+	if (dn_db == NULL) {
+		rcu_read_unlock();
+		return -EINVAL;
+	}
+	use_long = dn_db->use_long;
+	rcu_read_unlock();
+
+	if (dn->flags & DN_NDFLAG_P3)
+		return dn_phase3_output(neigh, skb);
+	if (use_long)
+		return dn_long_output(neigh, skb);
+	else
+		return dn_short_output(neigh, skb);
+}
+
 /*
  * Unfortunately, the neighbour code uses the device in its hash
  * function, so we don't get any advantage from it. This function
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 3b81092771f8..771815575dbd 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -743,15 +743,6 @@ out:
 	return NET_RX_DROP;
 }
 
-static int dn_to_neigh_output(struct sk_buff *skb)
-{
-	struct dst_entry *dst = skb_dst(skb);
-	struct dn_route *rt = (struct dn_route *) dst;
-	struct neighbour *n = rt->n;
-
-	return n->output(n, skb);
-}
-
 static int dn_output(struct sock *sk, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
-- 
cgit v1.2.3


From dcd8fb8533ceb493146ce030d15f7965b82d0c27 Mon Sep 17 00:00:00 2001
From: Fan Du <fan.du@intel.com>
Date: Fri, 6 Mar 2015 11:18:22 +0800
Subject: ipv4: Raise tcp PMTU probe mss base size

Quotes from RFC4821 7.2.  Selecting Initial Values

   It is RECOMMENDED that search_low be initially set to an MTU size
   that is likely to work over a very wide range of environments.  Given
   today's technologies, a value of 1024 bytes is probably safe enough.
   The initial value for search_low SHOULD be configurable.

Moreover, set a small value will introduce extra time for the search
to converge. So set the initial probe base mss size to 1024 Bytes.

Signed-off-by: Fan Du <fan.du@intel.com>
Acked-by: John Heffner <johnwheffner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index f87599d5af82..834089b0cffc 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -65,7 +65,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCP_MIN_MSS		88U
 
 /* The least MTU to use for probing */
-#define TCP_BASE_MSS		512
+#define TCP_BASE_MSS		1024
 
 /* After receiving this amount of duplicate ACKs fast retransmit starts. */
 #define TCP_FASTRETRANS_THRESH 3
-- 
cgit v1.2.3


From 6b58e0a5f32dedb609438bb9c9c82aa6e23381f2 Mon Sep 17 00:00:00 2001
From: Fan Du <fan.du@intel.com>
Date: Fri, 6 Mar 2015 11:18:23 +0800
Subject: ipv4: Use binary search to choose tcp PMTU probe_size

Current probe_size is chosen by doubling mss_cache,
the probing process will end shortly with a sub-optimal
mss size, and the link mtu will not be taken full
advantage of, in return, this will make user to tweak
tcp_base_mss with care.

Use binary search to choose probe_size in a fine
granularity manner, an optimal mss will be found
to boost performance as its maxmium.

In addition, introduce a sysctl_tcp_probe_threshold
to control when probing will stop in respect to
the width of search range.

Test env:
Docker instance with vxlan encapuslation(82599EB)
iperf -c 10.0.0.24  -t 60

before this patch:
1.26 Gbits/sec

After this patch: increase 26%
1.59 Gbits/sec

Signed-off-by: Fan Du <fan.du@intel.com>
Acked-by: John Heffner <johnwheffner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h   |  1 +
 include/net/tcp.h          |  3 +++
 net/ipv4/sysctl_net_ipv4.c |  7 +++++++
 net/ipv4/tcp_ipv4.c        |  1 +
 net/ipv4/tcp_output.c      | 14 +++++++++++---
 5 files changed, 23 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 1085e12f940f..e051d399fa17 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -87,6 +87,7 @@ struct netns_ipv4 {
 	int sysctl_tcp_fwmark_accept;
 	int sysctl_tcp_mtu_probing;
 	int sysctl_tcp_base_mss;
+	int sysctl_tcp_probe_threshold;
 
 	struct ping_group_range ping_group_range;
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 834089b0cffc..1ad82e334e27 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -67,6 +67,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 /* The least MTU to use for probing */
 #define TCP_BASE_MSS		1024
 
+/* Specify interval when tcp mtu probing will stop */
+#define TCP_PROBE_THRESHOLD	8
+
 /* After receiving this amount of duplicate ACKs fast retransmit starts. */
 #define TCP_FASTRETRANS_THRESH 3
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d151539da8e6..d3c09c12ee81 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -883,6 +883,13 @@ static struct ctl_table ipv4_net_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "tcp_probe_threshold",
+		.data		= &init_net.ipv4.sysctl_tcp_probe_threshold,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 	{ }
 };
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5a2dfed4783b..35790d977a2b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2460,6 +2460,7 @@ static int __net_init tcp_sk_init(struct net *net)
 	}
 	net->ipv4.sysctl_tcp_ecn = 2;
 	net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
+	net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
 	return 0;
 
 fail:
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8bbd86cd81c8..ed024cbb097f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1842,11 +1842,13 @@ static int tcp_mtu_probe(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct sk_buff *skb, *nskb, *next;
+	struct net *net = sock_net(sk);
 	int len;
 	int probe_size;
 	int size_needed;
 	int copy;
 	int mss_now;
+	int interval;
 
 	/* Not currently probing/verifying,
 	 * not in recovery,
@@ -1859,11 +1861,17 @@ static int tcp_mtu_probe(struct sock *sk)
 	    tp->rx_opt.num_sacks || tp->rx_opt.dsack)
 		return -1;
 
-	/* Very simple search strategy: just double the MSS. */
+	/* Use binary search for probe_size between tcp_mss_base,
+	 * and current mss_clamp. if (search_high - search_low)
+	 * smaller than a threshold, backoff from probing.
+	 */
 	mss_now = tcp_current_mss(sk);
-	probe_size = 2 * tp->mss_cache;
+	probe_size = tcp_mtu_to_mss(sk, (icsk->icsk_mtup.search_high +
+				    icsk->icsk_mtup.search_low) >> 1);
 	size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
-	if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) {
+	interval = icsk->icsk_mtup.search_high - icsk->icsk_mtup.search_low;
+	if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) ||
+	    interval < max(1, net->ipv4.sysctl_tcp_probe_threshold)) {
 		/* TODO: set timer for probe_converge_event */
 		return -1;
 	}
-- 
cgit v1.2.3


From 05cbc0db03e82128f2e7e353d4194dd24a1627fe Mon Sep 17 00:00:00 2001
From: Fan Du <fan.du@intel.com>
Date: Fri, 6 Mar 2015 11:18:24 +0800
Subject: ipv4: Create probe timer for tcp PMTU as per RFC4821

As per RFC4821 7.3.  Selecting Probe Size, a probe timer should
be armed once probing has converged. Once this timer expired,
probing again to take advantage of any path PMTU change. The
recommended probing interval is 10 minutes per RFC1981. Probing
interval could be sysctled by sysctl_tcp_probe_interval.

Eric Dumazet suggested to implement pseudo timer based on 32bits
jiffies tcp_time_stamp instead of using classic timer for such
rare event.

Signed-off-by: Fan Du <fan.du@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_connection_sock.h |  2 ++
 include/net/netns/ipv4.h           |  1 +
 include/net/tcp.h                  |  3 +++
 net/ipv4/sysctl_net_ipv4.c         |  7 +++++++
 net/ipv4/tcp_ipv4.c                |  1 +
 net/ipv4/tcp_output.c              | 38 ++++++++++++++++++++++++++++++++++++--
 net/ipv4/tcp_timer.c               |  1 +
 7 files changed, 51 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 5976bdecf58b..b9a6b0a94cc6 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -126,6 +126,8 @@ struct inet_connection_sock {
 
 		/* Information on the current probe. */
 		int		  probe_size;
+
+		u32		  probe_timestamp;
 	} icsk_mtup;
 	u32			  icsk_ca_priv[16];
 	u32			  icsk_user_timeout;
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index e051d399fa17..8f3a1a1a5a94 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -88,6 +88,7 @@ struct netns_ipv4 {
 	int sysctl_tcp_mtu_probing;
 	int sysctl_tcp_base_mss;
 	int sysctl_tcp_probe_threshold;
+	u32 sysctl_tcp_probe_interval;
 
 	struct ping_group_range ping_group_range;
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 1ad82e334e27..2e11e38205c2 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -67,6 +67,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 /* The least MTU to use for probing */
 #define TCP_BASE_MSS		1024
 
+/* probing interval, default to 10 minutes as per RFC4821 */
+#define TCP_PROBE_INTERVAL	600
+
 /* Specify interval when tcp mtu probing will stop */
 #define TCP_PROBE_THRESHOLD	8
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d3c09c12ee81..fdf899163d44 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -890,6 +890,13 @@ static struct ctl_table ipv4_net_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "tcp_probe_interval",
+		.data		= &init_net.ipv4.sysctl_tcp_probe_interval,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 	{ }
 };
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 35790d977a2b..f0c6fc32bfa8 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2461,6 +2461,7 @@ static int __net_init tcp_sk_init(struct net *net)
 	net->ipv4.sysctl_tcp_ecn = 2;
 	net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
 	net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
+	net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
 	return 0;
 
 fail:
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ed024cbb097f..5a73ad5afaf7 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1354,6 +1354,8 @@ void tcp_mtup_init(struct sock *sk)
 			       icsk->icsk_af_ops->net_header_len;
 	icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
 	icsk->icsk_mtup.probe_size = 0;
+	if (icsk->icsk_mtup.enabled)
+		icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
 }
 EXPORT_SYMBOL(tcp_mtup_init);
 
@@ -1828,6 +1830,31 @@ send_now:
 	return false;
 }
 
+static inline void tcp_mtu_check_reprobe(struct sock *sk)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct net *net = sock_net(sk);
+	u32 interval;
+	s32 delta;
+
+	interval = net->ipv4.sysctl_tcp_probe_interval;
+	delta = tcp_time_stamp - icsk->icsk_mtup.probe_timestamp;
+	if (unlikely(delta >= interval * HZ)) {
+		int mss = tcp_current_mss(sk);
+
+		/* Update current search range */
+		icsk->icsk_mtup.probe_size = 0;
+		icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp +
+			sizeof(struct tcphdr) +
+			icsk->icsk_af_ops->net_header_len;
+		icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
+
+		/* Update probe time stamp */
+		icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
+	}
+}
+
 /* Create a new MTU probe if we are ready.
  * MTU probe is regularly attempting to increase the path MTU by
  * deliberately sending larger packets.  This discovers routing
@@ -1870,9 +1897,16 @@ static int tcp_mtu_probe(struct sock *sk)
 				    icsk->icsk_mtup.search_low) >> 1);
 	size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
 	interval = icsk->icsk_mtup.search_high - icsk->icsk_mtup.search_low;
+	/* When misfortune happens, we are reprobing actively,
+	 * and then reprobe timer has expired. We stick with current
+	 * probing process by not resetting search range to its orignal.
+	 */
 	if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) ||
-	    interval < max(1, net->ipv4.sysctl_tcp_probe_threshold)) {
-		/* TODO: set timer for probe_converge_event */
+		interval < net->ipv4.sysctl_tcp_probe_threshold) {
+		/* Check whether enough time has elaplased for
+		 * another round of probing.
+		 */
+		tcp_mtu_check_reprobe(sk);
 		return -1;
 	}
 
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 0732b787904e..15505936511d 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -107,6 +107,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
 	if (net->ipv4.sysctl_tcp_mtu_probing) {
 		if (!icsk->icsk_mtup.enabled) {
 			icsk->icsk_mtup.enabled = 1;
+			icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
 			tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 		} else {
 			struct net *net = sock_net(sk);
-- 
cgit v1.2.3


From 89650ad0047f039b3c3bc0f6a5823bb9c9738152 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 6 Mar 2015 11:44:28 -0500
Subject: fib: make netdev_switch_fib_ipv4_abort in header file static inline

When building without CONFIG_NET_SWITCHDEV,
netdev_switch_fib_ipv4_abort is defined in the header file. It must
be static inline to avoid build failure at link time.

Fixes: 8e05fd7166c6 ("fib: hook IPv4 fib for hardware offload")

Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/switchdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index dc0a5cc7c2c5..933fac410a7a 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -129,7 +129,7 @@ static inline int netdev_switch_fib_ipv4_del(u32 dst, int dst_len,
 	return 0;
 }
 
-void netdev_switch_fib_ipv4_abort(struct fib_info *fi)
+static inline void netdev_switch_fib_ipv4_abort(struct fib_info *fi)
 {
 }
 
-- 
cgit v1.2.3


From c93682477bd861744589215515a63b81fdbd8948 Mon Sep 17 00:00:00 2001
From: Shani Michaeli <shanim@mellanox.com>
Date: Thu, 5 Mar 2015 20:16:11 +0200
Subject: net/dcb: Add IEEE QCN attribute

As specified in 802.1Qau spec. Add this optional attribute to the
DCB netlink layer. To allow for application to use the new attribute,
NIC drivers should implement and register the  callbacks ieee_getqcn,
ieee_setqcn and ieee_getqcnstats.

The QCN attribute holds a set of parameters for management, and
a set of statistics to provide informative data on Congestion-Control
defined by this spec.

Signed-off-by: Shani Michaeli <shanim@mellanox.com>
Signed-off-by: Shachar Raindel <raindel@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Acked-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dcbnl.h        |  3 +++
 include/uapi/linux/dcbnl.h | 66 ++++++++++++++++++++++++++++++++++++++++++++++
 net/dcb/dcbnl.c            | 44 ++++++++++++++++++++++++++++---
 3 files changed, 110 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h
index 597b88a94332..207d9ba1f92c 100644
--- a/include/net/dcbnl.h
+++ b/include/net/dcbnl.h
@@ -49,6 +49,9 @@ struct dcbnl_rtnl_ops {
 	int (*ieee_setets) (struct net_device *, struct ieee_ets *);
 	int (*ieee_getmaxrate) (struct net_device *, struct ieee_maxrate *);
 	int (*ieee_setmaxrate) (struct net_device *, struct ieee_maxrate *);
+	int (*ieee_getqcn) (struct net_device *, struct ieee_qcn *);
+	int (*ieee_setqcn) (struct net_device *, struct ieee_qcn *);
+	int (*ieee_getqcnstats) (struct net_device *, struct ieee_qcn_stats *);
 	int (*ieee_getpfc) (struct net_device *, struct ieee_pfc *);
 	int (*ieee_setpfc) (struct net_device *, struct ieee_pfc *);
 	int (*ieee_getapp) (struct net_device *, struct dcb_app *);
diff --git a/include/uapi/linux/dcbnl.h b/include/uapi/linux/dcbnl.h
index e711f20dc522..6497d7933d5b 100644
--- a/include/uapi/linux/dcbnl.h
+++ b/include/uapi/linux/dcbnl.h
@@ -78,6 +78,70 @@ struct ieee_maxrate {
 	__u64	tc_maxrate[IEEE_8021QAZ_MAX_TCS];
 };
 
+enum dcbnl_cndd_states {
+	DCB_CNDD_RESET = 0,
+	DCB_CNDD_EDGE,
+	DCB_CNDD_INTERIOR,
+	DCB_CNDD_INTERIOR_READY,
+};
+
+/* This structure contains the IEEE 802.1Qau QCN managed object.
+ *
+ *@rpg_enable: enable QCN RP
+ *@rppp_max_rps: maximum number of RPs allowed for this CNPV on this port
+ *@rpg_time_reset: time between rate increases if no CNMs received.
+ *		   given in u-seconds
+ *@rpg_byte_reset: transmitted data between rate increases if no CNMs received.
+ *		   given in Bytes
+ *@rpg_threshold: The number of times rpByteStage or rpTimeStage can count
+ *		   before RP rate control state machine advances states
+ *@rpg_max_rate: the maxinun rate, in Mbits per second,
+ *		 at which an RP can transmit
+ *@rpg_ai_rate: The rate, in Mbits per second,
+ *		used to increase rpTargetRate in the RPR_ACTIVE_INCREASE
+ *@rpg_hai_rate: The rate, in Mbits per second,
+ *		 used to increase rpTargetRate in the RPR_HYPER_INCREASE state
+ *@rpg_gd: Upon CNM receive, flow rate is limited to (Fb/Gd)*CurrentRate.
+ *	   rpgGd is given as log2(Gd), where Gd may only be powers of 2
+ *@rpg_min_dec_fac: The minimum factor by which the current transmit rate
+ *		    can be changed by reception of a CNM.
+ *		    value is given as percentage (1-100)
+ *@rpg_min_rate: The minimum value, in bits per second, for rate to limit
+ *@cndd_state_machine: The state of the congestion notification domain
+ *		       defense state machine, as defined by IEEE 802.3Qau
+ *		       section 32.1.1. In the interior ready state,
+ *		       the QCN capable hardware may add CN-TAG TLV to the
+ *		       outgoing traffic, to specifically identify outgoing
+ *		       flows.
+ */
+
+struct ieee_qcn {
+	__u8 rpg_enable[IEEE_8021QAZ_MAX_TCS];
+	__u32 rppp_max_rps[IEEE_8021QAZ_MAX_TCS];
+	__u32 rpg_time_reset[IEEE_8021QAZ_MAX_TCS];
+	__u32 rpg_byte_reset[IEEE_8021QAZ_MAX_TCS];
+	__u32 rpg_threshold[IEEE_8021QAZ_MAX_TCS];
+	__u32 rpg_max_rate[IEEE_8021QAZ_MAX_TCS];
+	__u32 rpg_ai_rate[IEEE_8021QAZ_MAX_TCS];
+	__u32 rpg_hai_rate[IEEE_8021QAZ_MAX_TCS];
+	__u32 rpg_gd[IEEE_8021QAZ_MAX_TCS];
+	__u32 rpg_min_dec_fac[IEEE_8021QAZ_MAX_TCS];
+	__u32 rpg_min_rate[IEEE_8021QAZ_MAX_TCS];
+	__u32 cndd_state_machine[IEEE_8021QAZ_MAX_TCS];
+};
+
+/* This structure contains the IEEE 802.1Qau QCN statistics.
+ *
+ *@rppp_rp_centiseconds: the number of RP-centiseconds accumulated
+ *			 by RPs at this priority level on this Port
+ *@rppp_created_rps: number of active RPs(flows) that react to CNMs
+ */
+
+struct ieee_qcn_stats {
+	__u64 rppp_rp_centiseconds[IEEE_8021QAZ_MAX_TCS];
+	__u32 rppp_created_rps[IEEE_8021QAZ_MAX_TCS];
+};
+
 /* This structure contains the IEEE 802.1Qaz PFC managed object
  *
  * @pfc_cap: Indicates the number of traffic classes on the local device
@@ -334,6 +398,8 @@ enum ieee_attrs {
 	DCB_ATTR_IEEE_PEER_PFC,
 	DCB_ATTR_IEEE_PEER_APP,
 	DCB_ATTR_IEEE_MAXRATE,
+	DCB_ATTR_IEEE_QCN,
+	DCB_ATTR_IEEE_QCN_STATS,
 	__DCB_ATTR_IEEE_MAX
 };
 #define DCB_ATTR_IEEE_MAX (__DCB_ATTR_IEEE_MAX - 1)
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 93ea80196f0e..5b21f6f88e97 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -177,6 +177,8 @@ static const struct nla_policy dcbnl_ieee_policy[DCB_ATTR_IEEE_MAX + 1] = {
 	[DCB_ATTR_IEEE_PFC]	    = {.len = sizeof(struct ieee_pfc)},
 	[DCB_ATTR_IEEE_APP_TABLE]   = {.type = NLA_NESTED},
 	[DCB_ATTR_IEEE_MAXRATE]   = {.len = sizeof(struct ieee_maxrate)},
+	[DCB_ATTR_IEEE_QCN]         = {.len = sizeof(struct ieee_qcn)},
+	[DCB_ATTR_IEEE_QCN_STATS]   = {.len = sizeof(struct ieee_qcn_stats)},
 };
 
 static const struct nla_policy dcbnl_ieee_app[DCB_ATTR_IEEE_APP_MAX + 1] = {
@@ -1030,7 +1032,7 @@ nla_put_failure:
 	return err;
 }
 
-/* Handle IEEE 802.1Qaz GET commands. */
+/* Handle IEEE 802.1Qaz/802.1Qau/802.1Qbb GET commands. */
 static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct nlattr *ieee, *app;
@@ -1067,6 +1069,32 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev)
 		}
 	}
 
+	if (ops->ieee_getqcn) {
+		struct ieee_qcn qcn;
+
+		memset(&qcn, 0, sizeof(qcn));
+		err = ops->ieee_getqcn(netdev, &qcn);
+		if (!err) {
+			err = nla_put(skb, DCB_ATTR_IEEE_QCN,
+				      sizeof(qcn), &qcn);
+			if (err)
+				return -EMSGSIZE;
+		}
+	}
+
+	if (ops->ieee_getqcnstats) {
+		struct ieee_qcn_stats qcn_stats;
+
+		memset(&qcn_stats, 0, sizeof(qcn_stats));
+		err = ops->ieee_getqcnstats(netdev, &qcn_stats);
+		if (!err) {
+			err = nla_put(skb, DCB_ATTR_IEEE_QCN_STATS,
+				      sizeof(qcn_stats), &qcn_stats);
+			if (err)
+				return -EMSGSIZE;
+		}
+	}
+
 	if (ops->ieee_getpfc) {
 		struct ieee_pfc pfc;
 		memset(&pfc, 0, sizeof(pfc));
@@ -1379,8 +1407,9 @@ int dcbnl_cee_notify(struct net_device *dev, int event, int cmd,
 }
 EXPORT_SYMBOL(dcbnl_cee_notify);
 
-/* Handle IEEE 802.1Qaz SET commands. If any requested operation can not
- * be completed the entire msg is aborted and error value is returned.
+/* Handle IEEE 802.1Qaz/802.1Qau/802.1Qbb SET commands.
+ * If any requested operation can not be completed
+ * the entire msg is aborted and error value is returned.
  * No attempt is made to reconcile the case where only part of the
  * cmd can be completed.
  */
@@ -1417,6 +1446,15 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh,
 			goto err;
 	}
 
+	if (ieee[DCB_ATTR_IEEE_QCN] && ops->ieee_setqcn) {
+		struct ieee_qcn *qcn =
+			nla_data(ieee[DCB_ATTR_IEEE_QCN]);
+
+		err = ops->ieee_setqcn(netdev, qcn);
+		if (err)
+			goto err;
+	}
+
 	if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setpfc) {
 		struct ieee_pfc *pfc = nla_data(ieee[DCB_ATTR_IEEE_PFC]);
 		err = ops->ieee_setpfc(netdev, pfc);
-- 
cgit v1.2.3


From d237baa1cbb3a2335357484c1d63a810a01947e2 Mon Sep 17 00:00:00 2001
From: Shani Michaeli <shanim@mellanox.com>
Date: Thu, 5 Mar 2015 20:16:12 +0200
Subject: net/mlx4_core: Add basic elements for QCN

Add device capability, firmware command opcode and etc prior elements
needed for QCN suppprt. Disable SRIOV VF view/access for QCN is disabled.

While here, remove a redundant offset definition into the
QUERY_DEV_CAP mailbox.

Signed-off-by: Shani Michaeli <shanim@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/cmd.c |  9 +++++++++
 drivers/net/ethernet/mellanox/mlx4/fw.c  | 13 +++++++++++--
 include/linux/mlx4/cmd.h                 | 13 +++++++++++++
 include/linux/mlx4/device.h              |  3 ++-
 4 files changed, 35 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index a681d7c0bb9f..20b3c7b21e63 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -1499,6 +1499,15 @@ static struct mlx4_cmd_info cmd_info[] = {
 		.verify = NULL,
 		.wrapper = mlx4_ACCESS_REG_wrapper,
 	},
+	{
+		.opcode = MLX4_CMD_CONGESTION_CTRL_OPCODE,
+		.has_inbox = false,
+		.has_outbox = false,
+		.out_is_imm = false,
+		.encode_slave_id = false,
+		.verify = NULL,
+		.wrapper = mlx4_CMD_EPERM_wrapper,
+	},
 	/* Native multicast commands are not available for guests */
 	{
 		.opcode = MLX4_CMD_QP_ATTACH,
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 5a21e5dc94cb..242bcee5d774 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -143,7 +143,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
 		[18] = "More than 80 VFs support",
 		[19] = "Performance optimized for limited rule configuration flow steering support",
 		[20] = "Recoverable error events support",
-		[21] = "Port Remap support"
+		[21] = "Port Remap support",
+		[22] = "QCN support"
 	};
 	int i;
 
@@ -675,7 +676,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET	0x76
 #define QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET	0x77
 #define QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE	0x7a
-#define QUERY_DEV_CAP_ETH_PROT_CTRL_OFFSET	0x7a
+#define QUERY_DEV_CAP_ECN_QCN_VER_OFFSET	0x7b
 #define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET	0x80
 #define QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET	0x82
 #define QUERY_DEV_CAP_AUX_ENTRY_SZ_OFFSET	0x84
@@ -777,6 +778,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DMFS_IPOIB;
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET);
 	dev_cap->fs_max_num_qp_per_entry = field;
+	MLX4_GET(field, outbox, QUERY_DEV_CAP_ECN_QCN_VER_OFFSET);
+	if (field & 0x1)
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_QCN;
 	MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET);
 	dev_cap->stat_rate_support = stat_rate;
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET);
@@ -1149,6 +1153,11 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
 		     DEV_CAP_EXT_2_FLAG_FSM);
 	MLX4_PUT(outbox->buf, field32, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
 
+	/* turn off QCN for guests */
+	MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_ECN_QCN_VER_OFFSET);
+	field &= 0xfe;
+	MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_ECN_QCN_VER_OFFSET);
+
 	return 0;
 }
 
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index 7b6d4e9ff603..7299e9548906 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -163,6 +163,9 @@ enum {
 	MLX4_QP_FLOW_STEERING_ATTACH = 0x65,
 	MLX4_QP_FLOW_STEERING_DETACH = 0x66,
 	MLX4_FLOW_STEERING_IB_UC_QP_RANGE = 0x64,
+
+	/* Update and read QCN parameters */
+	MLX4_CMD_CONGESTION_CTRL_OPCODE = 0x68,
 };
 
 enum {
@@ -233,6 +236,16 @@ struct mlx4_config_dev_params {
 	u8	rx_csum_flags_port_2;
 };
 
+enum mlx4_en_congestion_control_algorithm {
+	MLX4_CTRL_ALGO_802_1_QAU_REACTION_POINT = 0,
+};
+
+enum mlx4_en_congestion_control_opmod {
+	MLX4_CONGESTION_CONTROL_GET_PARAMS,
+	MLX4_CONGESTION_CONTROL_GET_STATISTICS,
+	MLX4_CONGESTION_CONTROL_SET_PARAMS = 4,
+};
+
 struct mlx4_dev;
 
 struct mlx4_cmd_mailbox {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index e4ebff7e9d02..1cc54822b931 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -203,7 +203,8 @@ enum {
 	MLX4_DEV_CAP_FLAG2_80_VFS		= 1LL <<  18,
 	MLX4_DEV_CAP_FLAG2_FS_A0		= 1LL <<  19,
 	MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT = 1LL << 20,
-	MLX4_DEV_CAP_FLAG2_PORT_REMAP		= 1LL <<  21
+	MLX4_DEV_CAP_FLAG2_PORT_REMAP		= 1LL <<  21,
+	MLX4_DEV_CAP_FLAG2_QCN			= 1LL <<  22,
 };
 
 enum {
-- 
cgit v1.2.3


From b79bda3d38ae67940f1740f7e015f284eb551680 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 7 Mar 2015 16:25:56 -0600
Subject: neigh: Use neigh table index for neigh_packet_xmit

Remove a little bit of unnecessary work when transmitting a packet with
neigh_packet_xmit.  Use the neighbour table index not the address family
as a parameter.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/neighbour.h |  1 +
 net/core/neighbour.c    | 22 +++++++++++-----------
 net/mpls/af_mpls.c      | 35 ++++++++++++++++++++++-------------
 3 files changed, 34 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index afb8237b0a8c..d48b8ec8b5f4 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -226,6 +226,7 @@ enum {
 	NEIGH_ND_TABLE = 1,
 	NEIGH_DN_TABLE = 2,
 	NEIGH_NR_TABLES,
+	NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */
 };
 
 static inline int neigh_parms_family(struct neigh_parms *p)
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index cffaf00561e7..ad07990e943d 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2391,22 +2391,15 @@ void __neigh_for_each_release(struct neigh_table *tbl,
 }
 EXPORT_SYMBOL(__neigh_for_each_release);
 
-int neigh_xmit(int family, struct net_device *dev,
+int neigh_xmit(int index, struct net_device *dev,
 	       const void *addr, struct sk_buff *skb)
 {
-	int err;
-	if (family == AF_PACKET) {
-		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
-				      addr, NULL, skb->len);
-		if (err < 0)
-			goto out_kfree_skb;
-		err = dev_queue_xmit(skb);
-	} else {
+	int err = -EAFNOSUPPORT;
+	if (likely(index < NEIGH_NR_TABLES)) {
 		struct neigh_table *tbl;
 		struct neighbour *neigh;
 
-		err = -ENETDOWN;
-		tbl = neigh_find_table(family);
+		tbl = neigh_tables[index];
 		if (!tbl)
 			goto out;
 		neigh = __neigh_lookup_noref(tbl, addr, dev);
@@ -2417,6 +2410,13 @@ int neigh_xmit(int family, struct net_device *dev,
 			goto out_kfree_skb;
 		err = neigh->output(neigh, skb);
 	}
+	else if (index == NEIGH_LINK_TABLE) {
+		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
+				      addr, NULL, skb->len);
+		if (err < 0)
+			goto out_kfree_skb;
+		err = dev_queue_xmit(skb);
+	}
 out:
 	return err;
 out_kfree_skb:
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index e120074157de..0ad8f7141be2 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -28,9 +28,9 @@ struct mpls_route { /* next hop label forwarding entry */
 	struct rcu_head		rt_rcu;
 	u32			rt_label[MAX_NEW_LABELS];
 	u8			rt_protocol; /* routing protocol that set this entry */
-	u8			rt_labels:2,
-				rt_via_alen:6;
-	unsigned short		rt_via_family;
+	u8			rt_labels;
+	u8			rt_via_alen;
+	u8			rt_via_table;
 	u8			rt_via[0];
 };
 
@@ -201,7 +201,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
 		}
 	}
 
-	err = neigh_xmit(rt->rt_via_family, out_dev, rt->rt_via, skb);
+	err = neigh_xmit(rt->rt_via_table, out_dev, rt->rt_via, skb);
 	if (err)
 		net_dbg_ratelimited("%s: packet transmission failed: %d\n",
 				    __func__, err);
@@ -225,7 +225,7 @@ static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = {
 struct mpls_route_config {
 	u32		rc_protocol;
 	u32		rc_ifindex;
-	u16		rc_via_family;
+	u16		rc_via_table;
 	u16		rc_via_alen;
 	u8		rc_via[MAX_VIA_ALEN];
 	u32		rc_label;
@@ -343,7 +343,7 @@ static int mpls_route_add(struct mpls_route_config *cfg)
 		goto errout;
 
 	err = -EINVAL;
-	if ((cfg->rc_via_family == AF_PACKET) &&
+	if ((cfg->rc_via_table == NEIGH_LINK_TABLE) &&
 	    (dev->addr_len != cfg->rc_via_alen))
 		goto errout;
 
@@ -376,7 +376,7 @@ static int mpls_route_add(struct mpls_route_config *cfg)
 		rt->rt_label[i] = cfg->rc_output_label[i];
 	rt->rt_protocol = cfg->rc_protocol;
 	RCU_INIT_POINTER(rt->rt_dev, dev);
-	rt->rt_via_family = cfg->rc_via_family;
+	rt->rt_via_table = cfg->rc_via_table;
 	memcpy(rt->rt_via, cfg->rc_via, cfg->rc_via_alen);
 
 	mpls_route_update(net, index, NULL, rt, &cfg->rc_nlinfo);
@@ -448,15 +448,22 @@ static struct notifier_block mpls_dev_notifier = {
 };
 
 static int nla_put_via(struct sk_buff *skb,
-		       u16 family, const void *addr, int alen)
+		       u8 table, const void *addr, int alen)
 {
+	static const int table_to_family[NEIGH_NR_TABLES + 1] = {
+		AF_INET, AF_INET6, AF_DECnet, AF_PACKET,
+	};
 	struct nlattr *nla;
 	struct rtvia *via;
+	int family = AF_UNSPEC;
 
 	nla = nla_reserve(skb, RTA_VIA, alen + 2);
 	if (!nla)
 		return -EMSGSIZE;
 
+	if (table <= NEIGH_NR_TABLES)
+		family = table_to_family[table];
+
 	via = nla_data(nla);
 	via->rtvia_family = family;
 	memcpy(via->rtvia_addr, addr, alen);
@@ -599,21 +606,23 @@ static int rtm_to_route_config(struct sk_buff *skb,  struct nlmsghdr *nlh,
 			struct rtvia *via = nla_data(nla);
 			if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr))
 				goto errout;
-			cfg->rc_via_family = via->rtvia_family;
 			cfg->rc_via_alen   = nla_len(nla) -
 				offsetof(struct rtvia, rtvia_addr);
 			if (cfg->rc_via_alen > MAX_VIA_ALEN)
 				goto errout;
 
 			/* Validate the address family */
-			switch(cfg->rc_via_family) {
+			switch(via->rtvia_family) {
 			case AF_PACKET:
+				cfg->rc_via_table = NEIGH_LINK_TABLE;
 				break;
 			case AF_INET:
+				cfg->rc_via_table = NEIGH_ARP_TABLE;
 				if (cfg->rc_via_alen != 4)
 					goto errout;
 				break;
 			case AF_INET6:
+				cfg->rc_via_table = NEIGH_ND_TABLE;
 				if (cfg->rc_via_alen != 16)
 					goto errout;
 				break;
@@ -686,7 +695,7 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
 	if (rt->rt_labels &&
 	    nla_put_labels(skb, RTA_NEWDST, rt->rt_labels, rt->rt_label))
 		goto nla_put_failure;
-	if (nla_put_via(skb, rt->rt_via_family, rt->rt_via, rt->rt_via_alen))
+	if (nla_put_via(skb, rt->rt_via_table, rt->rt_via, rt->rt_via_alen))
 		goto nla_put_failure;
 	dev = rtnl_dereference(rt->rt_dev);
 	if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex))
@@ -799,7 +808,7 @@ static int resize_platform_label_table(struct net *net, size_t limit)
 			goto nort0;
 		RCU_INIT_POINTER(rt0->rt_dev, lo);
 		rt0->rt_protocol = RTPROT_KERNEL;
-		rt0->rt_via_family = AF_PACKET;
+		rt0->rt_via_table = NEIGH_LINK_TABLE;
 		memcpy(rt0->rt_via, lo->dev_addr, lo->addr_len);
 	}
 	if (limit > LABEL_IPV6_EXPLICIT_NULL) {
@@ -809,7 +818,7 @@ static int resize_platform_label_table(struct net *net, size_t limit)
 			goto nort2;
 		RCU_INIT_POINTER(rt2->rt_dev, lo);
 		rt2->rt_protocol = RTPROT_KERNEL;
-		rt2->rt_via_family = AF_PACKET;
+		rt2->rt_via_table = NEIGH_LINK_TABLE;
 		memcpy(rt2->rt_via, lo->dev_addr, lo->addr_len);
 	}
 
-- 
cgit v1.2.3


From 8bd63cf1a426e69bf4f611b08978f721e46c194f Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 5 Mar 2015 00:52:33 +0100
Subject: bridge: move mac header copying into br_netfilter

The mac header only has to be copied back into the skb for
fragments generated by ip_fragment(), which only happens
for bridge forwarded packets with nf-call-iptables=1 && active nf_defrag.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_bridge.h | 31 -------------------------------
 net/bridge/br_forward.c          |  4 +---
 net/bridge/br_netfilter.c        | 29 ++++++++++++++++++++++++++++-
 3 files changed, 29 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index c755e4971fa3..332ef8ab37e9 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -44,36 +44,6 @@ static inline void nf_bridge_update_protocol(struct sk_buff *skb)
 		skb->protocol = htons(ETH_P_PPP_SES);
 }
 
-/* Fill in the header for fragmented IP packets handled by
- * the IPv4 connection tracking code.
- *
- * Only used in br_forward.c
- */
-static inline int nf_bridge_copy_header(struct sk_buff *skb)
-{
-	int err;
-	unsigned int header_size;
-
-	nf_bridge_update_protocol(skb);
-	header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
-	err = skb_cow_head(skb, header_size);
-	if (err)
-		return err;
-
-	skb_copy_to_linear_data_offset(skb, -header_size,
-				       skb->nf_bridge->data, header_size);
-	__skb_push(skb, nf_bridge_encap_header_len(skb));
-	return 0;
-}
-
-static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb)
-{
-	if (skb->nf_bridge &&
-	    skb->nf_bridge->mask & (BRNF_BRIDGED | BRNF_BRIDGED_DNAT))
-		return nf_bridge_copy_header(skb);
-  	return 0;
-}
-
 static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
 {
 	if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE))
@@ -119,7 +89,6 @@ static inline void br_drop_fake_rtable(struct sk_buff *skb)
 }
 
 #else
-#define nf_bridge_maybe_copy_header(skb)	(0)
 #define nf_bridge_pad(skb)			(0)
 #define br_drop_fake_rtable(skb)	        do { } while (0)
 #endif /* CONFIG_BRIDGE_NETFILTER */
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index f96933a823e3..32541d4f72e8 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -37,9 +37,7 @@ static inline int should_deliver(const struct net_bridge_port *p,
 
 int br_dev_queue_push_xmit(struct sk_buff *skb)
 {
-	/* ip_fragment doesn't copy the MAC header */
-	if (nf_bridge_maybe_copy_header(skb) ||
-	    !is_skb_forwardable(skb->dev, skb)) {
+	if (!is_skb_forwardable(skb->dev, skb)) {
 		kfree_skb(skb);
 	} else {
 		skb_push(skb, ETH_HLEN);
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 0ee453fad3de..e5479112c4a3 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -764,6 +764,33 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops,
 }
 
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
+static bool nf_bridge_copy_header(struct sk_buff *skb)
+{
+	int err;
+	unsigned int header_size;
+
+	nf_bridge_update_protocol(skb);
+	header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
+	err = skb_cow_head(skb, header_size);
+	if (err)
+		return false;
+
+	skb_copy_to_linear_data_offset(skb, -header_size,
+				       skb->nf_bridge->data, header_size);
+	__skb_push(skb, nf_bridge_encap_header_len(skb));
+	return true;
+}
+
+static int br_nf_push_frag_xmit(struct sk_buff *skb)
+{
+	if (!nf_bridge_copy_header(skb)) {
+		kfree_skb(skb);
+		return 0;
+	}
+
+	return br_dev_queue_push_xmit(skb);
+}
+
 static int br_nf_dev_queue_xmit(struct sk_buff *skb)
 {
 	int ret;
@@ -780,7 +807,7 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb)
 			/* Drop invalid packet */
 			return NF_DROP;
 		IPCB(skb)->frag_max_size = frag_max_size;
-		ret = ip_fragment(skb, br_dev_queue_push_xmit);
+		ret = ip_fragment(skb, br_nf_push_frag_xmit);
 	} else
 		ret = br_dev_queue_push_xmit(skb);
 
-- 
cgit v1.2.3


From 4a9d2f200862683d6680d5565f30c126625afe65 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 5 Mar 2015 00:52:34 +0100
Subject: netfilter: bridge: move nf_bridge_update_protocol to where its used

no need to keep it in a header file.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_bridge.h | 8 --------
 net/bridge/br_netfilter.c        | 8 ++++++++
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index 332ef8ab37e9..dd580a9a1add 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -36,14 +36,6 @@ static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
 	}
 }
 
-static inline void nf_bridge_update_protocol(struct sk_buff *skb)
-{
-	if (skb->nf_bridge->mask & BRNF_8021Q)
-		skb->protocol = htons(ETH_P_8021Q);
-	else if (skb->nf_bridge->mask & BRNF_PPPoE)
-		skb->protocol = htons(ETH_P_PPP_SES);
-}
-
 static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
 {
 	if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE))
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index e5479112c4a3..5b3bceb3ee62 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -239,6 +239,14 @@ drop:
 	return -1;
 }
 
+static void nf_bridge_update_protocol(struct sk_buff *skb)
+{
+	if (skb->nf_bridge->mask & BRNF_8021Q)
+		skb->protocol = htons(ETH_P_8021Q);
+	else if (skb->nf_bridge->mask & BRNF_PPPoE)
+		skb->protocol = htons(ETH_P_PPP_SES);
+}
+
 /* PF_BRIDGE/PRE_ROUTING *********************************************/
 /* Undo the changes made for ip6tables PREROUTING and continue the
  * bridge PRE_ROUTING hook. */
-- 
cgit v1.2.3


From e5de75bf88858f5b3ab11e2504b86ec059f03102 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 9 Mar 2015 12:30:12 +0100
Subject: netfilter: bridge: move DNAT helper to br_netfilter

Only one caller, there is no need to keep this in a header.
Move it to br_netfilter.c where this belongs to.

Based on patch from Florian Westphal.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_bridge.h | 12 ------------
 net/bridge/br_device.c           |  5 +----
 net/bridge/br_netfilter.c        | 32 ++++++++++++++++++++++++++++++++
 net/bridge/br_private.h          |  5 +++++
 4 files changed, 38 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index dd580a9a1add..bb39113ea596 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -44,18 +44,6 @@ static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
 }
 
 int br_handle_frame_finish(struct sk_buff *skb);
-/* Only used in br_device.c */
-static inline int br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
-{
-	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
-
-	skb_pull(skb, ETH_HLEN);
-	nf_bridge->mask ^= BRNF_BRIDGED_DNAT;
-	skb_copy_to_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN),
-				       skb->nf_bridge->data, ETH_HLEN-ETH_ALEN);
-	skb->dev = nf_bridge->physindev;
-	return br_handle_frame_finish(skb);
-}
 
 /* This is called by the IP fragmenting code and it ensures there is
  * enough room for the encapsulating header (if there is one). */
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index ffd379db5938..294cbcc49263 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -36,13 +36,10 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	u16 vid = 0;
 
 	rcu_read_lock();
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-	if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) {
-		br_nf_pre_routing_finish_bridge_slow(skb);
+	if (br_nf_prerouting_finish_bridge(skb)) {
 		rcu_read_unlock();
 		return NETDEV_TX_OK;
 	}
-#endif
 
 	u64_stats_update_begin(&brstats->syncp);
 	brstats->tx_packets++;
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index ef1fe281ca11..a8361c7cdf81 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -892,6 +892,38 @@ static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops,
 	return NF_ACCEPT;
 }
 
+/* This is called when br_netfilter has called into iptables/netfilter,
+ * and DNAT has taken place on a bridge-forwarded packet.
+ *
+ * neigh->output has created a new MAC header, with local br0 MAC
+ * as saddr.
+ *
+ * This restores the original MAC saddr of the bridged packet
+ * before invoking bridge forward logic to transmit the packet.
+ */
+static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
+{
+	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+
+	skb_pull(skb, ETH_HLEN);
+	nf_bridge->mask &= ~BRNF_BRIDGED_DNAT;
+
+	skb_copy_to_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN),
+				       skb->nf_bridge->data, ETH_HLEN-ETH_ALEN);
+	skb->dev = nf_bridge->physindev;
+	br_handle_frame_finish(skb);
+}
+
+int br_nf_prerouting_finish_bridge(struct sk_buff *skb)
+{
+	if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) {
+		br_nf_pre_routing_finish_bridge_slow(skb);
+		return 1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(br_nf_prerouting_finish_bridge);
+
 void br_netfilter_enable(void)
 {
 }
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index de0919975a25..d63fc17fe4f4 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -764,10 +764,15 @@ static inline int br_vlan_enabled(struct net_bridge *br)
 
 /* br_netfilter.c */
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+int br_nf_prerouting_finish_bridge(struct sk_buff *skb);
 int br_nf_core_init(void);
 void br_nf_core_fini(void);
 void br_netfilter_rtable_init(struct net_bridge *);
 #else
+static inline int br_nf_prerouting_finish_bridge(struct sk_buff *skb)
+{
+        return 0;
+}
 static inline int br_nf_core_init(void) { return 0; }
 static inline void br_nf_core_fini(void) {}
 #define br_netfilter_rtable_init(x)
-- 
cgit v1.2.3


From 1e052be69d045c8d0f82ff1116fd3e5a79661745 Mon Sep 17 00:00:00 2001
From: Cong Wang <cwang@twopensource.com>
Date: Fri, 6 Mar 2015 11:47:59 -0800
Subject: net_sched: destroy proto tp when all filters are gone

Kernel automatically creates a tp for each
(kind, protocol, priority) tuple, which has handle 0,
when we add a new filter, but it still is left there
after we remove our own, unless we don't specify the
handle (literally means all the filters under
the tuple). For example this one is left:

  # tc filter show dev eth0
  filter parent 8001: protocol arp pref 49152 basic

The user-space is hard to clean up these for kernel
because filters like u32 are organized in a complex way.
So kernel is responsible to remove it after all filters
are gone.  Each type of filter has its own way to
store the filters, so each type has to provide its
way to check if all filters are gone.

Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Cong Wang <cwang@twopensource.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Jamal Hadi Salim<jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  4 ++--
 net/sched/cls_api.c       | 14 ++++++++++----
 net/sched/cls_basic.c     |  6 +++++-
 net/sched/cls_bpf.c       |  6 +++++-
 net/sched/cls_cgroup.c    |  6 +++++-
 net/sched/cls_flow.c      |  6 +++++-
 net/sched/cls_fw.c        | 11 +++++++++--
 net/sched/cls_route.c     | 12 ++++++++++--
 net/sched/cls_rsvp.h      | 12 ++++++++++--
 net/sched/cls_tcindex.c   |  6 +++++-
 net/sched/cls_u32.c       | 25 ++++++++++++++++++++++++-
 net/sched/sch_api.c       | 14 +++++++++-----
 12 files changed, 99 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index c605d305c577..6d778efcfdfd 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -213,7 +213,7 @@ struct tcf_proto_ops {
 					    const struct tcf_proto *,
 					    struct tcf_result *);
 	int			(*init)(struct tcf_proto*);
-	void			(*destroy)(struct tcf_proto*);
+	bool			(*destroy)(struct tcf_proto*, bool);
 
 	unsigned long		(*get)(struct tcf_proto*, u32 handle);
 	int			(*change)(struct net *net, struct sk_buff *,
@@ -399,7 +399,7 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
 				const struct Qdisc_ops *ops, u32 parentid);
 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
 			       const struct qdisc_size_table *stab);
-void tcf_destroy(struct tcf_proto *tp);
+bool tcf_destroy(struct tcf_proto *tp, bool force);
 void tcf_destroy_chain(struct tcf_proto __rcu **fl);
 
 /* Reset all TX qdiscs greater then index of a device.  */
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index baef987fe2c0..8b0470e418dc 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -286,7 +286,7 @@ replay:
 			RCU_INIT_POINTER(*back, next);
 
 			tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER);
-			tcf_destroy(tp);
+			tcf_destroy(tp, true);
 			err = 0;
 			goto errout;
 		}
@@ -301,14 +301,20 @@ replay:
 			err = -EEXIST;
 			if (n->nlmsg_flags & NLM_F_EXCL) {
 				if (tp_created)
-					tcf_destroy(tp);
+					tcf_destroy(tp, true);
 				goto errout;
 			}
 			break;
 		case RTM_DELTFILTER:
 			err = tp->ops->delete(tp, fh);
-			if (err == 0)
+			if (err == 0) {
 				tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER);
+				if (tcf_destroy(tp, false)) {
+					struct tcf_proto *next = rtnl_dereference(tp->next);
+
+					RCU_INIT_POINTER(*back, next);
+				}
+			}
 			goto errout;
 		case RTM_GETTFILTER:
 			err = tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER);
@@ -329,7 +335,7 @@ replay:
 		tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER);
 	} else {
 		if (tp_created)
-			tcf_destroy(tp);
+			tcf_destroy(tp, true);
 	}
 
 errout:
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index fc399db86f11..0b8c3ace671f 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -96,11 +96,14 @@ static void basic_delete_filter(struct rcu_head *head)
 	kfree(f);
 }
 
-static void basic_destroy(struct tcf_proto *tp)
+static bool basic_destroy(struct tcf_proto *tp, bool force)
 {
 	struct basic_head *head = rtnl_dereference(tp->root);
 	struct basic_filter *f, *n;
 
+	if (!force && !list_empty(&head->flist))
+		return false;
+
 	list_for_each_entry_safe(f, n, &head->flist, link) {
 		list_del_rcu(&f->link);
 		tcf_unbind_filter(tp, &f->res);
@@ -108,6 +111,7 @@ static void basic_destroy(struct tcf_proto *tp)
 	}
 	RCU_INIT_POINTER(tp->root, NULL);
 	kfree_rcu(head, rcu);
+	return true;
 }
 
 static int basic_delete(struct tcf_proto *tp, unsigned long arg)
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 6f7ed8f8e6ee..243c9f225a73 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -137,11 +137,14 @@ static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
 	return 0;
 }
 
-static void cls_bpf_destroy(struct tcf_proto *tp)
+static bool cls_bpf_destroy(struct tcf_proto *tp, bool force)
 {
 	struct cls_bpf_head *head = rtnl_dereference(tp->root);
 	struct cls_bpf_prog *prog, *tmp;
 
+	if (!force && !list_empty(&head->plist))
+		return false;
+
 	list_for_each_entry_safe(prog, tmp, &head->plist, link) {
 		list_del_rcu(&prog->link);
 		tcf_unbind_filter(tp, &prog->res);
@@ -150,6 +153,7 @@ static void cls_bpf_destroy(struct tcf_proto *tp)
 
 	RCU_INIT_POINTER(tp->root, NULL);
 	kfree_rcu(head, rcu);
+	return true;
 }
 
 static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 221697ab0247..ea611b216412 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -143,14 +143,18 @@ errout:
 	return err;
 }
 
-static void cls_cgroup_destroy(struct tcf_proto *tp)
+static bool cls_cgroup_destroy(struct tcf_proto *tp, bool force)
 {
 	struct cls_cgroup_head *head = rtnl_dereference(tp->root);
 
+	if (!force)
+		return false;
+
 	if (head) {
 		RCU_INIT_POINTER(tp->root, NULL);
 		call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
 	}
+	return true;
 }
 
 static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg)
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 461410394d08..a620c4e288a5 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -557,17 +557,21 @@ static int flow_init(struct tcf_proto *tp)
 	return 0;
 }
 
-static void flow_destroy(struct tcf_proto *tp)
+static bool flow_destroy(struct tcf_proto *tp, bool force)
 {
 	struct flow_head *head = rtnl_dereference(tp->root);
 	struct flow_filter *f, *next;
 
+	if (!force && !list_empty(&head->filters))
+		return false;
+
 	list_for_each_entry_safe(f, next, &head->filters, list) {
 		list_del_rcu(&f->list);
 		call_rcu(&f->rcu, flow_destroy_filter);
 	}
 	RCU_INIT_POINTER(tp->root, NULL);
 	kfree_rcu(head, rcu);
+	return true;
 }
 
 static unsigned long flow_get(struct tcf_proto *tp, u32 handle)
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 9d9aa3e82b10..715e01e5910a 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -133,14 +133,20 @@ static void fw_delete_filter(struct rcu_head *head)
 	kfree(f);
 }
 
-static void fw_destroy(struct tcf_proto *tp)
+static bool fw_destroy(struct tcf_proto *tp, bool force)
 {
 	struct fw_head *head = rtnl_dereference(tp->root);
 	struct fw_filter *f;
 	int h;
 
 	if (head == NULL)
-		return;
+		return true;
+
+	if (!force) {
+		for (h = 0; h < HTSIZE; h++)
+			if (rcu_access_pointer(head->ht[h]))
+				return false;
+	}
 
 	for (h = 0; h < HTSIZE; h++) {
 		while ((f = rtnl_dereference(head->ht[h])) != NULL) {
@@ -152,6 +158,7 @@ static void fw_destroy(struct tcf_proto *tp)
 	}
 	RCU_INIT_POINTER(tp->root, NULL);
 	kfree_rcu(head, rcu);
+	return true;
 }
 
 static int fw_delete(struct tcf_proto *tp, unsigned long arg)
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index bb8a60235d01..08a3b0a6f5ab 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -277,13 +277,20 @@ route4_delete_filter(struct rcu_head *head)
 	kfree(f);
 }
 
-static void route4_destroy(struct tcf_proto *tp)
+static bool route4_destroy(struct tcf_proto *tp, bool force)
 {
 	struct route4_head *head = rtnl_dereference(tp->root);
 	int h1, h2;
 
 	if (head == NULL)
-		return;
+		return true;
+
+	if (!force) {
+		for (h1 = 0; h1 <= 256; h1++) {
+			if (rcu_access_pointer(head->table[h1]))
+				return false;
+		}
+	}
 
 	for (h1 = 0; h1 <= 256; h1++) {
 		struct route4_bucket *b;
@@ -308,6 +315,7 @@ static void route4_destroy(struct tcf_proto *tp)
 	}
 	RCU_INIT_POINTER(tp->root, NULL);
 	kfree_rcu(head, rcu);
+	return true;
 }
 
 static int route4_delete(struct tcf_proto *tp, unsigned long arg)
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index edd8ade3fbc1..02fa82792dab 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -291,13 +291,20 @@ rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
 	kfree_rcu(f, rcu);
 }
 
-static void rsvp_destroy(struct tcf_proto *tp)
+static bool rsvp_destroy(struct tcf_proto *tp, bool force)
 {
 	struct rsvp_head *data = rtnl_dereference(tp->root);
 	int h1, h2;
 
 	if (data == NULL)
-		return;
+		return true;
+
+	if (!force) {
+		for (h1 = 0; h1 < 256; h1++) {
+			if (rcu_access_pointer(data->ht[h1]))
+				return false;
+		}
+	}
 
 	RCU_INIT_POINTER(tp->root, NULL);
 
@@ -319,6 +326,7 @@ static void rsvp_destroy(struct tcf_proto *tp)
 		}
 	}
 	kfree_rcu(data, rcu);
+	return true;
 }
 
 static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index bd49bf547a47..a557dbaf5afe 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -468,11 +468,14 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
 	}
 }
 
-static void tcindex_destroy(struct tcf_proto *tp)
+static bool tcindex_destroy(struct tcf_proto *tp, bool force)
 {
 	struct tcindex_data *p = rtnl_dereference(tp->root);
 	struct tcf_walker walker;
 
+	if (!force)
+		return false;
+
 	pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p);
 	walker.count = 0;
 	walker.skip = 0;
@@ -481,6 +484,7 @@ static void tcindex_destroy(struct tcf_proto *tp)
 
 	RCU_INIT_POINTER(tp->root, NULL);
 	call_rcu(&p->rcu, __tcindex_destroy);
+	return true;
 }
 
 
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 09487afbfd51..375e51b71c80 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -460,13 +460,35 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
 	return -ENOENT;
 }
 
-static void u32_destroy(struct tcf_proto *tp)
+static bool ht_empty(struct tc_u_hnode *ht)
+{
+	unsigned int h;
+
+	for (h = 0; h <= ht->divisor; h++)
+		if (rcu_access_pointer(ht->ht[h]))
+			return false;
+
+	return true;
+}
+
+static bool u32_destroy(struct tcf_proto *tp, bool force)
 {
 	struct tc_u_common *tp_c = tp->data;
 	struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
 
 	WARN_ON(root_ht == NULL);
 
+	if (!force) {
+		if (root_ht) {
+			if (root_ht->refcnt > 1)
+				return false;
+			if (root_ht->refcnt == 1) {
+				if (!ht_empty(root_ht))
+					return false;
+			}
+		}
+	}
+
 	if (root_ht && --root_ht->refcnt == 0)
 		u32_destroy_hnode(tp, root_ht);
 
@@ -491,6 +513,7 @@ static void u32_destroy(struct tcf_proto *tp)
 	}
 
 	tp->data = NULL;
+	return true;
 }
 
 static int u32_delete(struct tcf_proto *tp, unsigned long arg)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 243b7d169d61..ad9eed70bc8f 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1858,11 +1858,15 @@ reclassify:
 }
 EXPORT_SYMBOL(tc_classify);
 
-void tcf_destroy(struct tcf_proto *tp)
+bool tcf_destroy(struct tcf_proto *tp, bool force)
 {
-	tp->ops->destroy(tp);
-	module_put(tp->ops->owner);
-	kfree_rcu(tp, rcu);
+	if (tp->ops->destroy(tp, force)) {
+		module_put(tp->ops->owner);
+		kfree_rcu(tp, rcu);
+		return true;
+	}
+
+	return false;
 }
 
 void tcf_destroy_chain(struct tcf_proto __rcu **fl)
@@ -1871,7 +1875,7 @@ void tcf_destroy_chain(struct tcf_proto __rcu **fl)
 
 	while ((tp = rtnl_dereference(*fl)) != NULL) {
 		RCU_INIT_POINTER(*fl, tp->next);
-		tcf_destroy(tp);
+		tcf_destroy(tp, true);
 	}
 }
 EXPORT_SYMBOL(tcf_destroy_chain);
-- 
cgit v1.2.3


From ddb3b6033ca68d71a5f0611b58b2642729342245 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 9 Mar 2015 13:14:37 -0500
Subject: net: Remove protocol from struct dst_ops

After my change to neigh_hh_init to obtain the protocol from the
neigh_table there are no more users of protocol in struct dst_ops.
Remove the protocol field from dst_ops and all of it's initializers.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst_ops.h   | 1 -
 net/bridge/br_nf_core.c | 1 -
 net/decnet/dn_route.c   | 1 -
 net/ipv4/route.c        | 2 --
 net/ipv4/xfrm4_policy.c | 1 -
 net/ipv6/route.c        | 2 --
 net/ipv6/xfrm6_policy.c | 1 -
 7 files changed, 9 deletions(-)

(limited to 'include')

diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index 1f99a1de0e4f..d64253914a6a 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -12,7 +12,6 @@ struct sock;
 
 struct dst_ops {
 	unsigned short		family;
-	__be16			protocol;
 	unsigned int		gc_thresh;
 
 	int			(*gc)(struct dst_ops *ops);
diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c
index 387cb3bd017c..20cbb727df4d 100644
--- a/net/bridge/br_nf_core.c
+++ b/net/bridge/br_nf_core.c
@@ -54,7 +54,6 @@ static unsigned int fake_mtu(const struct dst_entry *dst)
 
 static struct dst_ops fake_dst_ops = {
 	.family		= AF_INET,
-	.protocol	= cpu_to_be16(ETH_P_IP),
 	.update_pmtu	= fake_update_pmtu,
 	.redirect	= fake_redirect,
 	.cow_metrics	= fake_cow_metrics,
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 771815575dbd..9ab0c4ba297f 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -136,7 +136,6 @@ int decnet_dst_gc_interval = 2;
 
 static struct dst_ops dn_dst_ops = {
 	.family =		PF_DECnet,
-	.protocol =		cpu_to_be16(ETH_P_DNA_RT),
 	.gc_thresh =		128,
 	.gc =			dn_dst_gc,
 	.check =		dn_dst_check,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ad5064362c5c..649c8a3f0189 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -152,7 +152,6 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
 
 static struct dst_ops ipv4_dst_ops = {
 	.family =		AF_INET,
-	.protocol =		cpu_to_be16(ETH_P_IP),
 	.check =		ipv4_dst_check,
 	.default_advmss =	ipv4_default_advmss,
 	.mtu =			ipv4_mtu,
@@ -2225,7 +2224,6 @@ static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
 
 static struct dst_ops ipv4_dst_blackhole_ops = {
 	.family			=	AF_INET,
-	.protocol		=	cpu_to_be16(ETH_P_IP),
 	.check			=	ipv4_blackhole_dst_check,
 	.mtu			=	ipv4_blackhole_mtu,
 	.default_advmss		=	ipv4_default_advmss,
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 6156f68a1e90..c224c856247b 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -232,7 +232,6 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 
 static struct dst_ops xfrm4_dst_ops = {
 	.family =		AF_INET,
-	.protocol =		cpu_to_be16(ETH_P_IP),
 	.gc =			xfrm4_garbage_collect,
 	.update_pmtu =		xfrm4_update_pmtu,
 	.redirect =		xfrm4_redirect,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4688bd4d7f59..06fa819c43c9 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -194,7 +194,6 @@ static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
 
 static struct dst_ops ip6_dst_ops_template = {
 	.family			=	AF_INET6,
-	.protocol		=	cpu_to_be16(ETH_P_IPV6),
 	.gc			=	ip6_dst_gc,
 	.gc_thresh		=	1024,
 	.check			=	ip6_dst_check,
@@ -236,7 +235,6 @@ static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
 
 static struct dst_ops ip6_dst_blackhole_ops = {
 	.family			=	AF_INET6,
-	.protocol		=	cpu_to_be16(ETH_P_IPV6),
 	.destroy		=	ip6_dst_destroy,
 	.check			=	ip6_dst_check,
 	.mtu			=	ip6_blackhole_mtu,
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 48bf5a06847b..8ddf2b545151 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -292,7 +292,6 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 
 static struct dst_ops xfrm6_dst_ops = {
 	.family =		AF_INET6,
-	.protocol =		cpu_to_be16(ETH_P_IPV6),
 	.gc =			xfrm6_garbage_collect,
 	.update_pmtu =		xfrm6_update_pmtu,
 	.redirect =		xfrm6_redirect,
-- 
cgit v1.2.3


From aa836df958886e57ff0d43fb3d79d1af4aec0cc8 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 9 Mar 2015 14:31:20 -0700
Subject: net: core: add of_find_net_device_by_node()

Add a helper function which allows getting the struct net_device pointer
associated with a given struct device_node pointer. This is useful for
instance for DSA Ethernet devices not backed by a platform_device, but a PCI
device.

Since we need to access net_class which is not accessible outside of
net/core/net-sysfs.c, this helper function is also added here and gated
with CONFIG_OF_NET.

Network devices initialized with SET_NETDEV_DEV() are also taken into
account by checking for dev->parent first and then falling back to
checking the device pointer within struct net_device.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/of_net.h |  8 ++++++++
 net/core/net-sysfs.c   | 25 +++++++++++++++++++++++++
 2 files changed, 33 insertions(+)

(limited to 'include')

diff --git a/include/linux/of_net.h b/include/linux/of_net.h
index 34597c8c1a4c..9cd72aab76fe 100644
--- a/include/linux/of_net.h
+++ b/include/linux/of_net.h
@@ -9,8 +9,11 @@
 
 #ifdef CONFIG_OF_NET
 #include <linux/of.h>
+
+struct net_device;
 extern int of_get_phy_mode(struct device_node *np);
 extern const void *of_get_mac_address(struct device_node *np);
+extern struct net_device *of_find_net_device_by_node(struct device_node *np);
 #else
 static inline int of_get_phy_mode(struct device_node *np)
 {
@@ -21,6 +24,11 @@ static inline const void *of_get_mac_address(struct device_node *np)
 {
 	return NULL;
 }
+
+static inline struct net_device *of_find_net_device_by_node(struct device_node *np)
+{
+	return NULL;
+}
 #endif
 
 #endif /* __LINUX_OF_NET_H */
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index f2aa73bfb0e4..cf30620a88e1 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -23,6 +23,7 @@
 #include <linux/export.h>
 #include <linux/jiffies.h>
 #include <linux/pm_runtime.h>
+#include <linux/of.h>
 
 #include "net-sysfs.h"
 
@@ -1374,6 +1375,30 @@ static struct class net_class = {
 	.namespace = net_namespace,
 };
 
+#ifdef CONFIG_OF_NET
+static int of_dev_node_match(struct device *dev, const void *data)
+{
+	int ret = 0;
+
+	if (dev->parent)
+		ret = dev->parent->of_node == data;
+
+	return ret == 0 ? dev->of_node == data : ret;
+}
+
+struct net_device *of_find_net_device_by_node(struct device_node *np)
+{
+	struct device *dev;
+
+	dev = class_find_device(&net_class, NULL, np, of_dev_node_match);
+	if (!dev)
+		return NULL;
+
+	return to_net_dev(dev);
+}
+EXPORT_SYMBOL(of_find_net_device_by_node);
+#endif
+
 /* Delete sysfs entries but hold kobject reference until after all
  * netdev references are gone.
  */
-- 
cgit v1.2.3


From 769a020289bc8f68b7e48faf8fee970346d71a3b Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 9 Mar 2015 14:31:21 -0700
Subject: net: dsa: utilize of_find_net_device_by_node

Using of_find_device_by_node() restricts the search to platform_device that
match the specified device_node pointer. This is not even remotely true for
network devices backed by a pci_device for instance.

of_find_net_device_by_node() allows us to do a more thorough lookup to find the
struct net_device corresponding to a particular device_node pointer.

For symetry with the non-OF code path, we hold the net_device pointer in
dsa_probe() just like what dev_to_net_dev() does when we call this
function.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h |  1 +
 net/dsa/dsa.c     | 16 +++++++++++-----
 2 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index b525ac516559..47917e5e1e12 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -72,6 +72,7 @@ struct dsa_platform_data {
 	 * to the root switch chip of the tree.
 	 */
 	struct device	*netdev;
+	struct net_device *of_netdev;
 
 	/*
 	 * Info structs describing each of the switch chips
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index b40f11bb419c..899772108ee3 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -20,6 +20,7 @@
 #include <linux/of.h>
 #include <linux/of_mdio.h>
 #include <linux/of_platform.h>
+#include <linux/of_net.h>
 #include <linux/sysfs.h>
 #include "dsa_priv.h"
 
@@ -583,7 +584,7 @@ static int dsa_of_probe(struct device *dev)
 	struct device_node *np = dev->of_node;
 	struct device_node *child, *mdio, *ethernet, *port, *link;
 	struct mii_bus *mdio_bus;
-	struct platform_device *ethernet_dev;
+	struct net_device *ethernet_dev;
 	struct dsa_platform_data *pd;
 	struct dsa_chip_data *cd;
 	const char *port_name;
@@ -604,7 +605,7 @@ static int dsa_of_probe(struct device *dev)
 	if (!ethernet)
 		return -EINVAL;
 
-	ethernet_dev = of_find_device_by_node(ethernet);
+	ethernet_dev = of_find_net_device_by_node(ethernet);
 	if (!ethernet_dev)
 		return -EPROBE_DEFER;
 
@@ -613,7 +614,7 @@ static int dsa_of_probe(struct device *dev)
 		return -ENOMEM;
 
 	dev->platform_data = pd;
-	pd->netdev = &ethernet_dev->dev;
+	pd->of_netdev = ethernet_dev;
 	pd->nr_chips = of_get_available_child_count(np);
 	if (pd->nr_chips > DSA_MAX_SWITCHES)
 		pd->nr_chips = DSA_MAX_SWITCHES;
@@ -771,10 +772,15 @@ static int dsa_probe(struct platform_device *pdev)
 		pd = pdev->dev.platform_data;
 	}
 
-	if (pd == NULL || pd->netdev == NULL)
+	if (pd == NULL || (pd->netdev == NULL && pd->of_netdev == NULL))
 		return -EINVAL;
 
-	dev = dev_to_net_device(pd->netdev);
+	if (pd->of_netdev) {
+		dev = pd->of_netdev;
+		dev_hold(dev);
+	} else {
+		dev = dev_to_net_device(pd->netdev);
+	}
 	if (dev == NULL) {
 		ret = -EPROBE_DEFER;
 		goto out;
-- 
cgit v1.2.3


From f8f2147150de303e814c0452075d467734d3544b Mon Sep 17 00:00:00 2001
From: Scott Feldman <sfeldma@gmail.com>
Date: Mon, 9 Mar 2015 13:59:09 -0700
Subject: switchdev: add netlink flags to IPv4 FIB add op

Pass in the netlink flags (NLM_F_*) into switchdev driver for IPv4 FIB add op
to allow driver to 1) optimize hardware updates, 2) handle ip route prepend
and append commands correctly.

Suggested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Suggested-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Acked-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/rocker/rocker.c | 3 ++-
 include/linux/netdevice.h            | 3 ++-
 include/net/switchdev.h              | 6 ++++--
 net/ipv4/fib_trie.c                  | 5 ++++-
 net/switchdev/switchdev.c            | 7 +++++--
 5 files changed, 17 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c
index 65e140315a58..223348d8cc07 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -4152,7 +4152,8 @@ static int rocker_port_switch_port_stp_update(struct net_device *dev, u8 state)
 static int rocker_port_switch_fib_ipv4_add(struct net_device *dev,
 					   __be32 dst, int dst_len,
 					   struct fib_info *fi,
-					   u8 tos, u8 type, u32 tb_id)
+					   u8 tos, u8 type,
+					   u32 nlflags, u32 tb_id)
 {
 	struct rocker_port *rocker_port = netdev_priv(dev);
 	int flags = 0;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 45413784a3b1..1354ae83efc8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1035,7 +1035,7 @@ struct fib_info;
  *	state change.
  * int (*ndo_sw_parent_fib_ipv4_add)(struct net_device *dev, __be32 dst,
  *				     int dst_len, struct fib_info *fi,
- *				     u8 tos, u8 type, u32 tb_id);
+ *				     u8 tos, u8 type, u32 nlflags, u32 tb_id);
  *	Called to add/modify IPv4 route to switch device.
  * int (*ndo_sw_parent_fib_ipv4_del)(struct net_device *dev, __be32 dst,
  *				     int dst_len, struct fib_info *fi,
@@ -1207,6 +1207,7 @@ struct net_device_ops {
 							   int dst_len,
 							   struct fib_info *fi,
 							   u8 tos, u8 type,
+							   u32 nlflags,
 							   u32 tb_id);
 	int			(*ndo_switch_fib_ipv4_del)(struct net_device *dev,
 							   __be32 dst,
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 933fac410a7a..1a9382febcc3 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -1,6 +1,7 @@
 /*
  * include/net/switchdev.h - Switch device API
  * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
+ * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -52,7 +53,7 @@ int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev,
 int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev,
 					       struct nlmsghdr *nlh, u16 flags);
 int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
-			       u8 tos, u8 type, u32 tb_id);
+			       u8 tos, u8 type, u32 nlflags, u32 tb_id);
 int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
 			       u8 tos, u8 type, u32 tb_id);
 void netdev_switch_fib_ipv4_abort(struct fib_info *fi);
@@ -117,7 +118,8 @@ static inline int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *
 
 static inline int netdev_switch_fib_ipv4_add(u32 dst, int dst_len,
 					     struct fib_info *fi,
-					     u8 tos, u8 type, u32 tb_id)
+					     u8 tos, u8 type,
+					     u32 nlflags, u32 tb_id)
 {
 	return 0;
 }
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 90955455884e..fcfa9825a816 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1155,6 +1155,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 			err = netdev_switch_fib_ipv4_add(key, plen, fi,
 							 new_fa->fa_tos,
 							 cfg->fc_type,
+							 cfg->fc_nlflags,
 							 tb->tb_id);
 			if (err) {
 				netdev_switch_fib_ipv4_abort(fi);
@@ -1201,7 +1202,9 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 
 	/* (Optionally) offload fib entry to switch hardware. */
 	err = netdev_switch_fib_ipv4_add(key, plen, fi, tos,
-					 cfg->fc_type, tb->tb_id);
+					 cfg->fc_type,
+					 cfg->fc_nlflags,
+					 tb->tb_id);
 	if (err) {
 		netdev_switch_fib_ipv4_abort(fi);
 		goto out_free_new_fa;
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index aba6aa2656d8..8cf42a69baf4 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -1,6 +1,7 @@
 /*
  * net/switchdev/switchdev.c - Switch device API
  * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
+ * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -294,12 +295,13 @@ static struct net_device *netdev_switch_get_dev_by_nhs(struct fib_info *fi)
  *	@fi: route FIB info structure
  *	@tos: route TOS
  *	@type: route type
+ *	@nlflags: netlink flags passed in (NLM_F_*)
  *	@tb_id: route table ID
  *
  *	Add IPv4 route entry to switch device.
  */
 int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
-			       u8 tos, u8 type, u32 tb_id)
+			       u8 tos, u8 type, u32 nlflags, u32 tb_id)
 {
 	struct net_device *dev;
 	const struct net_device_ops *ops;
@@ -324,7 +326,8 @@ int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
 
 	if (ops->ndo_switch_fib_ipv4_add) {
 		err = ops->ndo_switch_fib_ipv4_add(dev, htonl(dst), dst_len,
-						   fi, tos, type, tb_id);
+						   fi, tos, type, nlflags,
+						   tb_id);
 		if (!err)
 			fi->fib_flags |= RTNH_F_EXTERNAL;
 	}
-- 
cgit v1.2.3


From 59e33c2b021322db92ca27c4d9958e57630443b6 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 9 Mar 2015 15:44:13 -0700
Subject: net: phy: bcm7xxx: add alternate id for 7439

BCM7439 has an alternate PHY OUI: 0xae025080 which is to be found in
some variants of this chip.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/bcm7xxx.c | 1 +
 include/linux/brcmphy.h   | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c
index 974ec4515269..64c74c6a4828 100644
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c
@@ -396,6 +396,7 @@ static struct phy_driver bcm7xxx_driver[] = {
 	BCM7XXX_28NM_GPHY(PHY_ID_BCM7364, "Broadcom BCM7364"),
 	BCM7XXX_28NM_GPHY(PHY_ID_BCM7366, "Broadcom BCM7366"),
 	BCM7XXX_28NM_GPHY(PHY_ID_BCM7439, "Broadcom BCM7439"),
+	BCM7XXX_28NM_GPHY(PHY_ID_BCM7439_2, "Broadcom BCM7439 (2)"),
 	BCM7XXX_28NM_GPHY(PHY_ID_BCM7445, "Broadcom BCM7445"),
 {
 	.phy_id         = PHY_ID_BCM7425,
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 7ccd928cc1f2..cab606617522 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -19,6 +19,7 @@
 #define PHY_ID_BCM7425			0x03625e60
 #define PHY_ID_BCM7429			0x600d8730
 #define PHY_ID_BCM7439			0x600d8480
+#define PHY_ID_BCM7439_2		0xae025080
 #define PHY_ID_BCM7445			0x600d8510
 
 #define PHY_BCM_OUI_MASK		0xfffffc00
-- 
cgit v1.2.3


From a03a8dbe20eff6d57aae3147577bf84b52aba4e6 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 9 Mar 2015 23:04:15 +0100
Subject: netfilter: fix sparse warnings in reject handling

make C=1 CF=-D__CHECK_ENDIAN__ shows following:

net/bridge/netfilter/nft_reject_bridge.c:65:50: warning: incorrect type in argument 3 (different base types)
net/bridge/netfilter/nft_reject_bridge.c:65:50:    expected restricted __be16 [usertype] protocol [..]
net/bridge/netfilter/nft_reject_bridge.c:102:37: warning: cast from restricted __be16
net/bridge/netfilter/nft_reject_bridge.c:102:37: warning: incorrect type in argument 1 (different base types) [..]
net/bridge/netfilter/nft_reject_bridge.c:121:50: warning: incorrect type in argument 3 (different base types) [..]
net/bridge/netfilter/nft_reject_bridge.c:168:52: warning: incorrect type in argument 3 (different base types) [..]
net/bridge/netfilter/nft_reject_bridge.c:233:52: warning: incorrect type in argument 3 (different base types) [..]

Caused by two (harmless) errors:
1. htons() instead of ntohs()
2. __be16 for protocol in nf_reject_ipXhdr_put API, use u8 instead.

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/ipv4/nf_reject.h   | 2 +-
 include/net/netfilter/ipv6/nf_reject.h   | 2 +-
 net/bridge/netfilter/nft_reject_bridge.c | 2 +-
 net/ipv4/netfilter/nf_reject_ipv4.c      | 2 +-
 net/ipv6/netfilter/nf_reject_ipv6.c      | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h
index 864127573c32..77862c3645f0 100644
--- a/include/net/netfilter/ipv4/nf_reject.h
+++ b/include/net/netfilter/ipv4/nf_reject.h
@@ -12,7 +12,7 @@ const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb,
 					     struct tcphdr *_oth, int hook);
 struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb,
 				  const struct sk_buff *oldskb,
-				  __be16 protocol, int ttl);
+				  __u8 protocol, int ttl);
 void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb,
 			     const struct tcphdr *oth);
 
diff --git a/include/net/netfilter/ipv6/nf_reject.h b/include/net/netfilter/ipv6/nf_reject.h
index 0ae445d3f217..0ea4fa37db16 100644
--- a/include/net/netfilter/ipv6/nf_reject.h
+++ b/include/net/netfilter/ipv6/nf_reject.h
@@ -13,7 +13,7 @@ const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb,
 					      unsigned int *otcplen, int hook);
 struct ipv6hdr *nf_reject_ip6hdr_put(struct sk_buff *nskb,
 				     const struct sk_buff *oldskb,
-				     __be16 protocol, int hoplimit);
+				     __u8 protocol, int hoplimit);
 void nf_reject_ip6_tcphdr_put(struct sk_buff *nskb,
 			      const struct sk_buff *oldskb,
 			      const struct tcphdr *oth, unsigned int otcplen);
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index 5c6c96585acd..54a2fdf0f457 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -99,7 +99,7 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb,
 	if (!pskb_may_pull(oldskb, len))
 		return;
 
-	if (pskb_trim_rcsum(oldskb, htons(ip_hdr(oldskb)->tot_len)))
+	if (pskb_trim_rcsum(oldskb, ntohs(ip_hdr(oldskb)->tot_len)))
 		return;
 
 	if (ip_hdr(oldskb)->protocol == IPPROTO_TCP ||
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index b7405eb7f1ef..c5b794da51a9 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -43,7 +43,7 @@ EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_get);
 
 struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb,
 				  const struct sk_buff *oldskb,
-				  __be16 protocol, int ttl)
+				  __u8 protocol, int ttl)
 {
 	struct iphdr *niph, *oiph = ip_hdr(oldskb);
 
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index 68e0bb4db1bf..3afdce03d94e 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -65,7 +65,7 @@ EXPORT_SYMBOL_GPL(nf_reject_ip6_tcphdr_get);
 
 struct ipv6hdr *nf_reject_ip6hdr_put(struct sk_buff *nskb,
 				     const struct sk_buff *oldskb,
-				     __be16 protocol, int hoplimit)
+				     __u8 protocol, int hoplimit)
 {
 	struct ipv6hdr *ip6h;
 	const struct ipv6hdr *oip6h = ipv6_hdr(oldskb);
-- 
cgit v1.2.3


From 491da2a477077357c8206a601559e2ea58f224db Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 10 Mar 2015 07:15:52 -0700
Subject: net: constify sock_diag_check_cookie()

sock_diag_check_cookie() second parameter is constant

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sock_diag.h | 2 +-
 net/core/sock_diag.c      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index 46cca4c06848..b5ad7d35a636 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -19,7 +19,7 @@ void sock_diag_unregister(const struct sock_diag_handler *h);
 void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
 void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
 
-int sock_diag_check_cookie(void *sk, __u32 *cookie);
+int sock_diag_check_cookie(void *sk, const __u32 *cookie);
 void sock_diag_save_cookie(void *sk, __u32 *cookie);
 
 int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr);
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index ad704c757bb4..96e70ee05a8d 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -13,7 +13,7 @@ static const struct sock_diag_handler *sock_diag_handlers[AF_MAX];
 static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
 static DEFINE_MUTEX(sock_diag_table_mutex);
 
-int sock_diag_check_cookie(void *sk, __u32 *cookie)
+int sock_diag_check_cookie(void *sk, const __u32 *cookie)
 {
 	if ((cookie[0] != INET_DIAG_NOCOOKIE ||
 	     cookie[1] != INET_DIAG_NOCOOKIE) &&
-- 
cgit v1.2.3


From 34160ea3f9c96b5ae71a11459f9b9f6c298b8930 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 10 Mar 2015 07:15:54 -0700
Subject: inet_diag: add const to inet_diag_req_v2

diag dumpers should not modify the request.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h | 43 ++++++++++++++++++++++---------------------
 net/dccp/diag.c           |  7 ++++---
 net/ipv4/inet_diag.c      | 22 +++++++++++-----------
 net/ipv4/tcp_diag.c       |  4 ++--
 net/ipv4/udp_diag.c       | 22 +++++++++++++---------
 5 files changed, 52 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 46da02410a09..ac48b10c9395 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -11,33 +11,34 @@ struct sk_buff;
 struct netlink_callback;
 
 struct inet_diag_handler {
-	void			(*dump)(struct sk_buff *skb,
-					struct netlink_callback *cb,
-					struct inet_diag_req_v2 *r,
-					struct nlattr *bc);
-
-	int			(*dump_one)(struct sk_buff *in_skb,
-					const struct nlmsghdr *nlh,
-					struct inet_diag_req_v2 *req);
-
-	void			(*idiag_get_info)(struct sock *sk,
-						  struct inet_diag_msg *r,
-						  void *info);
-	__u16                   idiag_type;
+	void		(*dump)(struct sk_buff *skb,
+				struct netlink_callback *cb,
+				const struct inet_diag_req_v2 *r,
+				struct nlattr *bc);
+
+	int		(*dump_one)(struct sk_buff *in_skb,
+				    const struct nlmsghdr *nlh,
+				    const struct inet_diag_req_v2 *req);
+
+	void		(*idiag_get_info)(struct sock *sk,
+					  struct inet_diag_msg *r,
+					  void *info);
+	__u16		idiag_type;
 };
 
 struct inet_connection_sock;
 int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
-			      struct sk_buff *skb, struct inet_diag_req_v2 *req,
-			      struct user_namespace *user_ns,
-			      u32 pid, u32 seq, u16 nlmsg_flags,
-			      const struct nlmsghdr *unlh);
+		      struct sk_buff *skb, const struct inet_diag_req_v2 *req,
+		      struct user_namespace *user_ns,
+		      u32 pid, u32 seq, u16 nlmsg_flags,
+		      const struct nlmsghdr *unlh);
 void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb,
-		struct netlink_callback *cb, struct inet_diag_req_v2 *r,
-		struct nlattr *bc);
+			 struct netlink_callback *cb,
+			 const struct inet_diag_req_v2 *r,
+			 struct nlattr *bc);
 int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
-		struct sk_buff *in_skb, const struct nlmsghdr *nlh,
-		struct inet_diag_req_v2 *req);
+			    struct sk_buff *in_skb, const struct nlmsghdr *nlh,
+			    const struct inet_diag_req_v2 *req);
 
 int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
 
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
index 028fc43aacbd..5a45f8de5d99 100644
--- a/net/dccp/diag.c
+++ b/net/dccp/diag.c
@@ -49,13 +49,14 @@ static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
 }
 
 static void dccp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
-		struct inet_diag_req_v2 *r, struct nlattr *bc)
+			   const struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
 	inet_diag_dump_icsk(&dccp_hashinfo, skb, cb, r, bc);
 }
 
-static int dccp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
-		struct inet_diag_req_v2 *req)
+static int dccp_diag_dump_one(struct sk_buff *in_skb,
+			      const struct nlmsghdr *nlh,
+			      const struct inet_diag_req_v2 *req)
 {
 	return inet_diag_dump_one_icsk(&dccp_hashinfo, in_skb, nlh, req);
 }
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index cd261f6e3abb..ac3bfb458afd 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -71,7 +71,7 @@ static void inet_diag_unlock_handler(const struct inet_diag_handler *handler)
 }
 
 int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
-		      struct sk_buff *skb, struct inet_diag_req_v2 *req,
+		      struct sk_buff *skb, const struct inet_diag_req_v2 *req,
 		      struct user_namespace *user_ns,
 		      u32 portid, u32 seq, u16 nlmsg_flags,
 		      const struct nlmsghdr *unlh)
@@ -212,7 +212,7 @@ EXPORT_SYMBOL_GPL(inet_sk_diag_fill);
 
 static int inet_csk_diag_fill(struct sock *sk,
 			      struct sk_buff *skb,
-			      struct inet_diag_req_v2 *req,
+			      const struct inet_diag_req_v2 *req,
 			      struct user_namespace *user_ns,
 			      u32 portid, u32 seq, u16 nlmsg_flags,
 			      const struct nlmsghdr *unlh)
@@ -223,7 +223,7 @@ static int inet_csk_diag_fill(struct sock *sk,
 
 static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
 			       struct sk_buff *skb,
-			       struct inet_diag_req_v2 *req,
+			       const struct inet_diag_req_v2 *req,
 			       u32 portid, u32 seq, u16 nlmsg_flags,
 			       const struct nlmsghdr *unlh)
 {
@@ -277,7 +277,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
 }
 
 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
-			struct inet_diag_req_v2 *r,
+			const struct inet_diag_req_v2 *r,
 			struct user_namespace *user_ns,
 			u32 portid, u32 seq, u16 nlmsg_flags,
 			const struct nlmsghdr *unlh)
@@ -293,7 +293,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
 			    struct sk_buff *in_skb,
 			    const struct nlmsghdr *nlh,
-			    struct inet_diag_req_v2 *req)
+			    const struct inet_diag_req_v2 *req)
 {
 	struct net *net = sock_net(in_skb->sk);
 	struct sk_buff *rep;
@@ -358,7 +358,7 @@ EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk);
 
 static int inet_diag_get_exact(struct sk_buff *in_skb,
 			       const struct nlmsghdr *nlh,
-			       struct inet_diag_req_v2 *req)
+			       const struct inet_diag_req_v2 *req)
 {
 	const struct inet_diag_handler *handler;
 	int err;
@@ -626,7 +626,7 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
 static int inet_csk_diag_dump(struct sock *sk,
 			      struct sk_buff *skb,
 			      struct netlink_callback *cb,
-			      struct inet_diag_req_v2 *r,
+			      const struct inet_diag_req_v2 *r,
 			      const struct nlattr *bc)
 {
 	if (!inet_diag_bc_sk(bc, sk))
@@ -667,7 +667,7 @@ static void twsk_build_assert(void)
 static int inet_twsk_diag_dump(struct sock *sk,
 			       struct sk_buff *skb,
 			       struct netlink_callback *cb,
-			       struct inet_diag_req_v2 *r,
+			       const struct inet_diag_req_v2 *r,
 			       const struct nlattr *bc)
 {
 	twsk_build_assert();
@@ -770,7 +770,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
 
 static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
 			       struct netlink_callback *cb,
-			       struct inet_diag_req_v2 *r,
+			       const struct inet_diag_req_v2 *r,
 			       const struct nlattr *bc)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
@@ -842,7 +842,7 @@ out:
 
 void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
 			 struct netlink_callback *cb,
-			 struct inet_diag_req_v2 *r, struct nlattr *bc)
+			 const struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
 	struct net *net = sock_net(skb->sk);
 	int i, num, s_i, s_num;
@@ -978,7 +978,7 @@ out:
 EXPORT_SYMBOL_GPL(inet_diag_dump_icsk);
 
 static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
-			    struct inet_diag_req_v2 *r,
+			    const struct inet_diag_req_v2 *r,
 			    struct nlattr *bc)
 {
 	const struct inet_diag_handler *handler;
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 0d73f9ddb55b..86dc119a3815 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -34,13 +34,13 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
 }
 
 static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
-			  struct inet_diag_req_v2 *r, struct nlattr *bc)
+			  const struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
 	inet_diag_dump_icsk(&tcp_hashinfo, skb, cb, r, bc);
 }
 
 static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
-			     struct inet_diag_req_v2 *req)
+			     const struct inet_diag_req_v2 *req)
 {
 	return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req);
 }
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 4a000f1dd757..2dbfc1f1f7b3 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -18,8 +18,9 @@
 #include <linux/sock_diag.h>
 
 static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
-		struct netlink_callback *cb, struct inet_diag_req_v2 *req,
-		struct nlattr *bc)
+			struct netlink_callback *cb,
+			const struct inet_diag_req_v2 *req,
+			struct nlattr *bc)
 {
 	if (!inet_diag_bc_sk(bc, sk))
 		return 0;
@@ -31,7 +32,8 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
 }
 
 static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
-		const struct nlmsghdr *nlh, struct inet_diag_req_v2 *req)
+			const struct nlmsghdr *nlh,
+			const struct inet_diag_req_v2 *req)
 {
 	int err = -EINVAL;
 	struct sock *sk;
@@ -90,8 +92,9 @@ out_nosk:
 	return err;
 }
 
-static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlink_callback *cb,
-		struct inet_diag_req_v2 *r, struct nlattr *bc)
+static void udp_dump(struct udp_table *table, struct sk_buff *skb,
+		     struct netlink_callback *cb,
+		     const struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
 	int num, s_num, slot, s_slot;
 	struct net *net = sock_net(skb->sk);
@@ -144,13 +147,13 @@ done:
 }
 
 static void udp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
-		struct inet_diag_req_v2 *r, struct nlattr *bc)
+			  const struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
 	udp_dump(&udp_table, skb, cb, r, bc);
 }
 
 static int udp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
-		struct inet_diag_req_v2 *req)
+			     const struct inet_diag_req_v2 *req)
 {
 	return udp_dump_one(&udp_table, in_skb, nlh, req);
 }
@@ -170,13 +173,14 @@ static const struct inet_diag_handler udp_diag_handler = {
 };
 
 static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
-		struct inet_diag_req_v2 *r, struct nlattr *bc)
+			      const struct inet_diag_req_v2 *r,
+			      struct nlattr *bc)
 {
 	udp_dump(&udplite_table, skb, cb, r, bc);
 }
 
 static int udplite_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
-		struct inet_diag_req_v2 *req)
+				 const struct inet_diag_req_v2 *req)
 {
 	return udp_dump_one(&udplite_table, in_skb, nlh, req);
 }
-- 
cgit v1.2.3


From 406ef2a67bd0bb13d77d5e5d700e36a2caea09ae Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Tue, 10 Mar 2015 20:14:27 +0200
Subject: Bluetooth: Make Fast Connectable available while powered off

To maximize the usability of the Fast Connectable feature we should make
it possible to set (or unset) it at any given moment. This means
removing the dependency on the 'connectable' setting as well as the
'powered' setting. The former makes also sense since page scan may get
enabled through add_device even if 'connectable' is false. To keep the
setting available over power cycles its flag also needs to be removed
from the flags that are cleared upon HCI_Reset.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h |  2 +-
 net/bluetooth/mgmt.c        | 30 ++++++++++++------------------
 2 files changed, 13 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 8e54f825153c..f76f45ae76c3 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -231,7 +231,7 @@ enum {
  * or the HCI device is closed.
  */
 #define HCI_PERSISTENT_MASK (BIT(HCI_LE_SCAN) | BIT(HCI_PERIODIC_INQ) | \
-			      BIT(HCI_FAST_CONNECTABLE) | BIT(HCI_LE_ADV))
+			     BIT(HCI_LE_ADV))
 
 /* HCI timeouts */
 #define HCI_DISCONN_TIMEOUT	msecs_to_jiffies(2000)	/* 2 seconds */
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index d769b428b630..49b8e09ffe67 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -1968,15 +1968,6 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data,
 	}
 
 no_scan_update:
-	/* If we're going from non-connectable to connectable or
-	 * vice-versa when fast connectable is enabled ensure that fast
-	 * connectable gets disabled. write_fast_connectable won't do
-	 * anything if the page scan parameters are already what they
-	 * should be.
-	 */
-	if (cp->val || test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags))
-		write_fast_connectable(&req, false);
-
 	/* Update the advertising parameters if necessary */
 	if (test_bit(HCI_ADVERTISING, &hdev->dev_flags))
 		enable_advertising(&req);
@@ -4660,14 +4651,6 @@ static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev,
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,
 				       MGMT_STATUS_INVALID_PARAMS);
 
-	if (!hdev_is_powered(hdev))
-		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,
-				       MGMT_STATUS_NOT_POWERED);
-
-	if (!test_bit(HCI_CONNECTABLE, &hdev->dev_flags))
-		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,
-				       MGMT_STATUS_REJECTED);
-
 	hci_dev_lock(hdev);
 
 	if (mgmt_pending_find(MGMT_OP_SET_FAST_CONNECTABLE, hdev)) {
@@ -4682,6 +4665,14 @@ static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev,
 		goto unlock;
 	}
 
+	if (!hdev_is_powered(hdev)) {
+		change_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags);
+		err = send_settings_rsp(sk, MGMT_OP_SET_FAST_CONNECTABLE,
+					hdev);
+		new_settings(hdev, sk);
+		goto unlock;
+	}
+
 	cmd = mgmt_pending_add(sk, MGMT_OP_SET_FAST_CONNECTABLE, hdev,
 			       data, len);
 	if (!cmd) {
@@ -6481,7 +6472,10 @@ static int powered_update_hci(struct hci_dev *hdev)
 			    sizeof(link_sec), &link_sec);
 
 	if (lmp_bredr_capable(hdev)) {
-		write_fast_connectable(&req, false);
+		if (test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags))
+			write_fast_connectable(&req, true);
+		else
+			write_fast_connectable(&req, false);
 		__hci_update_page_scan(&req);
 		update_class(&req);
 		update_name(&req);
-- 
cgit v1.2.3


From 55e76b38986a61259f3079afd0f9a865651a34fe Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Tue, 10 Mar 2015 22:34:40 +0200
Subject: Bluetooth: Add 'Already Paired' error for Pair Device command

To make the behavior predictable when attempting to pair with a device
for which we already have a Link Key or Long Term Key, this patch adds a
new 'Already Paired' error which gets sent in such a scenario.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  2 ++
 include/net/bluetooth/mgmt.h     |  1 +
 net/bluetooth/hci_core.c         | 27 +++++++++++++++++++++++++++
 net/bluetooth/mgmt.c             |  7 +++++++
 4 files changed, 37 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index afc641c5e55c..5cc5a192359d 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -967,6 +967,8 @@ struct smp_irk *hci_add_irk(struct hci_dev *hdev, bdaddr_t *bdaddr,
 void hci_remove_irk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type);
 void hci_smp_irks_clear(struct hci_dev *hdev);
 
+bool hci_bdaddr_is_paired(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type);
+
 void hci_remote_oob_data_clear(struct hci_dev *hdev);
 struct oob_data *hci_find_remote_oob_data(struct hci_dev *hdev,
 					  bdaddr_t *bdaddr, u8 bdaddr_type);
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 0c737e4b8f57..5bf6af9cee78 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -43,6 +43,7 @@
 #define MGMT_STATUS_CANCELLED		0x10
 #define MGMT_STATUS_INVALID_INDEX	0x11
 #define MGMT_STATUS_RFKILLED		0x12
+#define MGMT_STATUS_ALREADY_PAIRED	0x13
 
 struct mgmt_hdr {
 	__le16	opcode;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index bba4c344c6e0..a35d8441187a 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2516,6 +2516,33 @@ void hci_remove_irk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type)
 	}
 }
 
+bool hci_bdaddr_is_paired(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type)
+{
+	struct smp_ltk *k;
+	u8 addr_type;
+
+	if (type == BDADDR_BREDR) {
+		if (hci_find_link_key(hdev, bdaddr))
+			return true;
+		return false;
+	}
+
+	/* Convert to HCI addr type which struct smp_ltk uses */
+	if (type == BDADDR_LE_PUBLIC)
+		addr_type = ADDR_LE_DEV_PUBLIC;
+	else
+		addr_type = ADDR_LE_DEV_RANDOM;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(k, &hdev->long_term_keys, list) {
+		if (k->bdaddr_type == addr_type && !bacmp(bdaddr, &k->bdaddr))
+			return true;
+	}
+	rcu_read_unlock();
+
+	return false;
+}
+
 /* HCI command timer function */
 static void hci_cmd_timeout(struct work_struct *work)
 {
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 49b8e09ffe67..600636c00d34 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -3245,6 +3245,13 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
 		goto unlock;
 	}
 
+	if (hci_bdaddr_is_paired(hdev, &cp->addr.bdaddr, cp->addr.type)) {
+		err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE,
+					MGMT_STATUS_ALREADY_PAIRED, &rp,
+					sizeof(rp));
+		goto unlock;
+	}
+
 	sec_level = BT_SECURITY_MEDIUM;
 	auth_type = HCI_AT_DEDICATED_BONDING;
 
-- 
cgit v1.2.3


From 0ddcf43d5d4a03ded1ee3f6b3b72a0cbed4e90b1 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Fri, 6 Mar 2015 13:47:00 -0800
Subject: ipv4: FIB Local/MAIN table collapse

This patch is meant to collapse local and main into one by converting
tb_data from an array to a pointer.  Doing this allows us to point the
local table into the main while maintaining the same variables in the
table.

As such the tb_data was converted from an array to a pointer, and a new
array called data is added in order to still provide an object for tb_data
to point to.

In order to track the origin of the fib aliases a tb_id value was added in
a hole that existed on 64b systems.  Using this we can also reverse the
merge in the event that custom FIB rules are enabled.

With this patch I am seeing an improvement of 20ns to 30ns for routing
lookups as long as custom rules are not enabled, with custom rules enabled
we fall back to split tables and the original behavior.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/fib_rules.h |   2 +-
 include/net/ip_fib.h    |  26 +++-----
 net/core/fib_rules.c    |   8 ++-
 net/ipv4/fib_frontend.c |  59 +++++++++++++++--
 net/ipv4/fib_lookup.h   |   1 +
 net/ipv4/fib_rules.c    |  20 ++++--
 net/ipv4/fib_trie.c     | 172 ++++++++++++++++++++++++++++++++++++++++++++++--
 7 files changed, 250 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index e584de16e4c3..88d2ae526961 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -58,7 +58,7 @@ struct fib_rules_ops {
 					     struct sk_buff *,
 					     struct fib_rule_hdr *,
 					     struct nlattr **);
-	void			(*delete)(struct fib_rule *);
+	int			(*delete)(struct fib_rule *);
 	int			(*compare)(struct fib_rule *,
 					   struct fib_rule_hdr *,
 					   struct nlattr **);
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 1657604c5dd3..54271ed0ed45 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -186,7 +186,8 @@ struct fib_table {
 	int			tb_default;
 	int			tb_num_default;
 	struct rcu_head		rcu;
-	unsigned long		tb_data[0];
+	unsigned long 		*tb_data;
+	unsigned long		__data[0];
 };
 
 int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
@@ -196,11 +197,10 @@ int fib_table_delete(struct fib_table *, struct fib_config *);
 int fib_table_dump(struct fib_table *table, struct sk_buff *skb,
 		   struct netlink_callback *cb);
 int fib_table_flush(struct fib_table *table);
+struct fib_table *fib_trie_unmerge(struct fib_table *main_tb);
 void fib_table_flush_external(struct fib_table *table);
 void fib_free_table(struct fib_table *tb);
 
-
-
 #ifndef CONFIG_IP_MULTIPLE_TABLES
 
 #define TABLE_LOCAL_INDEX	(RT_TABLE_LOCAL & (FIB_TABLE_HASHSZ - 1))
@@ -229,18 +229,13 @@ static inline int fib_lookup(struct net *net, const struct flowi4 *flp,
 			     struct fib_result *res)
 {
 	struct fib_table *tb;
-	int err;
+	int err = -ENETUNREACH;
 
 	rcu_read_lock();
 
-	for (err = 0; !err; err = -ENETUNREACH) {
-		tb = fib_get_table(net, RT_TABLE_LOCAL);
-		if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF))
-			break;
-		tb = fib_get_table(net, RT_TABLE_MAIN);
-		if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF))
-			break;
-	}
+	tb = fib_get_table(net, RT_TABLE_MAIN);
+	if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF))
+		err = 0;
 
 	rcu_read_unlock();
 
@@ -270,10 +265,6 @@ static inline int fib_lookup(struct net *net, struct flowi4 *flp,
 	res->tclassid = 0;
 
 	for (err = 0; !err; err = -ENETUNREACH) {
-		tb = rcu_dereference_rtnl(net->ipv4.fib_local);
-		if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF))
-			break;
-
 		tb = rcu_dereference_rtnl(net->ipv4.fib_main);
 		if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF))
 			break;
@@ -309,6 +300,7 @@ static inline int fib_num_tclassid_users(struct net *net)
 	return 0;
 }
 #endif
+int fib_unmerge(struct net *net);
 void fib_flush_external(struct net *net);
 
 /* Exported by fib_semantics.c */
@@ -320,7 +312,7 @@ void fib_select_multipath(struct fib_result *res);
 
 /* Exported by fib_trie.c */
 void fib_trie_init(void);
-struct fib_table *fib_trie_table(u32 id);
+struct fib_table *fib_trie_table(u32 id, struct fib_table *alias);
 
 static inline void fib_combine_itag(u32 *itag, const struct fib_result *res)
 {
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 44706e81b2e0..b55677fed1c8 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -492,6 +492,12 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh)
 			goto errout;
 		}
 
+		if (ops->delete) {
+			err = ops->delete(rule);
+			if (err)
+				goto errout;
+		}
+
 		list_del_rcu(&rule->list);
 
 		if (rule->action == FR_ACT_GOTO) {
@@ -517,8 +523,6 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh)
 
 		notify_rule_change(RTM_DELRULE, rule, ops, nlh,
 				   NETLINK_CB(skb).portid);
-		if (ops->delete)
-			ops->delete(rule);
 		fib_rule_put(rule);
 		flush_route_cache(ops);
 		rules_ops_put(ops);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index e067770235bf..7cda3b0521d8 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -52,14 +52,14 @@ static int __net_init fib4_rules_init(struct net *net)
 {
 	struct fib_table *local_table, *main_table;
 
-	local_table = fib_trie_table(RT_TABLE_LOCAL);
-	if (local_table == NULL)
-		return -ENOMEM;
-
-	main_table  = fib_trie_table(RT_TABLE_MAIN);
+	main_table  = fib_trie_table(RT_TABLE_MAIN, NULL);
 	if (main_table == NULL)
 		goto fail;
 
+	local_table = fib_trie_table(RT_TABLE_LOCAL, main_table);
+	if (local_table == NULL)
+		return -ENOMEM;
+
 	hlist_add_head_rcu(&local_table->tb_hlist,
 				&net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
 	hlist_add_head_rcu(&main_table->tb_hlist,
@@ -74,7 +74,7 @@ fail:
 
 struct fib_table *fib_new_table(struct net *net, u32 id)
 {
-	struct fib_table *tb;
+	struct fib_table *tb, *alias = NULL;
 	unsigned int h;
 
 	if (id == 0)
@@ -83,7 +83,10 @@ struct fib_table *fib_new_table(struct net *net, u32 id)
 	if (tb)
 		return tb;
 
-	tb = fib_trie_table(id);
+	if (id == RT_TABLE_LOCAL)
+		alias = fib_new_table(net, RT_TABLE_MAIN);
+
+	tb = fib_trie_table(id, alias);
 	if (!tb)
 		return NULL;
 
@@ -126,6 +129,48 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
 }
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
 
+static void fib_replace_table(struct net *net, struct fib_table *old,
+			      struct fib_table *new)
+{
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	switch (new->tb_id) {
+	case RT_TABLE_LOCAL:
+		rcu_assign_pointer(net->ipv4.fib_local, new);
+		break;
+	case RT_TABLE_MAIN:
+		rcu_assign_pointer(net->ipv4.fib_main, new);
+		break;
+	case RT_TABLE_DEFAULT:
+		rcu_assign_pointer(net->ipv4.fib_default, new);
+		break;
+	default:
+		break;
+	}
+
+#endif
+	/* replace the old table in the hlist */
+	hlist_replace_rcu(&old->tb_hlist, &new->tb_hlist);
+}
+
+int fib_unmerge(struct net *net)
+{
+	struct fib_table *old, *new;
+
+	old = fib_get_table(net, RT_TABLE_LOCAL);
+	new = fib_trie_unmerge(old);
+
+	if (!new)
+		return -ENOMEM;
+
+	/* replace merged table with clean table */
+	if (new != old) {
+		fib_replace_table(net, old, new);
+		fib_free_table(old);
+	}
+
+	return 0;
+}
+
 static void fib_flush(struct net *net)
 {
 	int flushed = 0;
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index ae2e6eede46e..c6211ed60b03 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -12,6 +12,7 @@ struct fib_alias {
 	u8			fa_type;
 	u8			fa_state;
 	u8			fa_slen;
+	u32			tb_id;
 	struct rcu_head		rcu;
 };
 
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 190d0d00d744..e9bc5e42cf43 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -174,6 +174,11 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 	if (frh->tos & ~IPTOS_TOS_MASK)
 		goto errout;
 
+	/* split local/main if they are not already split */
+	err = fib_unmerge(net);
+	if (err)
+		goto errout;
+
 	if (rule->table == RT_TABLE_UNSPEC) {
 		if (rule->action == FR_ACT_TO_TBL) {
 			struct fib_table *table;
@@ -216,17 +221,24 @@ errout:
 	return err;
 }
 
-static void fib4_rule_delete(struct fib_rule *rule)
+static int fib4_rule_delete(struct fib_rule *rule)
 {
 	struct net *net = rule->fr_net;
-#ifdef CONFIG_IP_ROUTE_CLASSID
-	struct fib4_rule *rule4 = (struct fib4_rule *) rule;
+	int err;
 
-	if (rule4->tclassid)
+	/* split local/main if they are not already split */
+	err = fib_unmerge(net);
+	if (err)
+		goto errout;
+
+#ifdef CONFIG_IP_ROUTE_CLASSID
+	if (((struct fib4_rule *)rule)->tclassid)
 		net->ipv4.fib_num_tclassid_users--;
 #endif
 	net->ipv4.fib_has_custom_rules = true;
 	fib_flush_external(rule->fr_net);
+errout:
+	return err;
 }
 
 static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 83290beaf7cf..7b2badd74ad8 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1120,6 +1120,9 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 				break;
 			if (fa->fa_info->fib_priority != fi->fib_priority)
 				break;
+			/* duplicate entry from another table */
+			if (WARN_ON(fa->tb_id != tb->tb_id))
+				continue;
 			if (fa->fa_type == cfg->fc_type &&
 			    fa->fa_info == fi) {
 				fa_match = fa;
@@ -1197,6 +1200,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 	new_fa->fa_type = cfg->fc_type;
 	new_fa->fa_state = 0;
 	new_fa->fa_slen = slen;
+	new_fa->tb_id = tb->tb_id;
 
 	/* (Optionally) offload fib entry to switch hardware. */
 	err = netdev_switch_fib_ipv4_add(key, plen, fi, tos,
@@ -1217,7 +1221,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 		tb->tb_num_default++;
 
 	rt_cache_flush(cfg->fc_nlinfo.nl_net);
-	rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
+	rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id,
 		  &cfg->fc_nlinfo, 0);
 succeeded:
 	return 0;
@@ -1243,7 +1247,7 @@ static inline t_key prefix_mismatch(t_key key, struct key_vector *n)
 int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
 		     struct fib_result *res, int fib_flags)
 {
-	struct trie *t = (struct trie *)tb->tb_data;
+	struct trie *t = (struct trie *) tb->tb_data;
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 	struct trie_use_stats __percpu *stats = t->stats;
 #endif
@@ -1483,6 +1487,9 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
 		if ((fa->fa_slen != slen) || (fa->fa_tos != tos))
 			break;
 
+		if (fa->tb_id != tb->tb_id)
+			continue;
+
 		if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
 		    (cfg->fc_scope == RT_SCOPE_NOWHERE ||
 		     fa->fa_info->fib_scope == cfg->fc_scope) &&
@@ -1576,6 +1583,120 @@ found:
 	return n;
 }
 
+static void fib_trie_free(struct fib_table *tb)
+{
+	struct trie *t = (struct trie *)tb->tb_data;
+	struct key_vector *pn = t->kv;
+	unsigned long cindex = 1;
+	struct hlist_node *tmp;
+	struct fib_alias *fa;
+
+	/* walk trie in reverse order and free everything */
+	for (;;) {
+		struct key_vector *n;
+
+		if (!(cindex--)) {
+			t_key pkey = pn->key;
+
+			if (IS_TRIE(pn))
+				break;
+
+			n = pn;
+			pn = node_parent(pn);
+
+			/* drop emptied tnode */
+			put_child_root(pn, n->key, NULL);
+			node_free(n);
+
+			cindex = get_index(pkey, pn);
+
+			continue;
+		}
+
+		/* grab the next available node */
+		n = get_child(pn, cindex);
+		if (!n)
+			continue;
+
+		if (IS_TNODE(n)) {
+			/* record pn and cindex for leaf walking */
+			pn = n;
+			cindex = 1ul << n->bits;
+
+			continue;
+		}
+
+		hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
+			hlist_del_rcu(&fa->fa_list);
+			alias_free_mem_rcu(fa);
+		}
+
+		put_child_root(pn, n->key, NULL);
+		node_free(n);
+	}
+
+#ifdef CONFIG_IP_FIB_TRIE_STATS
+	free_percpu(t->stats);
+#endif
+	kfree(tb);
+}
+
+struct fib_table *fib_trie_unmerge(struct fib_table *oldtb)
+{
+	struct trie *ot = (struct trie *)oldtb->tb_data;
+	struct key_vector *l, *tp = ot->kv;
+	struct fib_table *local_tb;
+	struct fib_alias *fa;
+	struct trie *lt;
+	t_key key = 0;
+
+	if (oldtb->tb_data == oldtb->__data)
+		return oldtb;
+
+	local_tb = fib_trie_table(RT_TABLE_LOCAL, NULL);
+	if (!local_tb)
+		return NULL;
+
+	lt = (struct trie *)local_tb->tb_data;
+
+	while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
+		struct key_vector *local_l = NULL, *local_tp;
+
+		hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
+			struct fib_alias *new_fa;
+
+			if (local_tb->tb_id != fa->tb_id)
+				continue;
+
+			/* clone fa for new local table */
+			new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
+			if (!new_fa)
+				goto out;
+
+			memcpy(new_fa, fa, sizeof(*fa));
+
+			/* insert clone into table */
+			if (!local_l)
+				local_l = fib_find_node(lt, &local_tp, l->key);
+
+			if (fib_insert_alias(lt, local_tp, local_l, new_fa,
+					     NULL, l->key))
+				goto out;
+		}
+
+		/* stop loop if key wrapped back to 0 */
+		key = l->key + 1;
+		if (key < l->key)
+			break;
+	}
+
+	return local_tb;
+out:
+	fib_trie_free(local_tb);
+
+	return NULL;
+}
+
 /* Caller must hold RTNL */
 void fib_table_flush_external(struct fib_table *tb)
 {
@@ -1587,6 +1708,7 @@ void fib_table_flush_external(struct fib_table *tb)
 
 	/* walk trie in reverse order */
 	for (;;) {
+		unsigned char slen = 0;
 		struct key_vector *n;
 
 		if (!(cindex--)) {
@@ -1596,8 +1718,8 @@ void fib_table_flush_external(struct fib_table *tb)
 			if (IS_TRIE(pn))
 				break;
 
-			/* no need to resize like in flush below */
-			pn = node_parent(pn);
+			/* resize completed node */
+			pn = resize(t, pn);
 			cindex = get_index(pkey, pn);
 
 			continue;
@@ -1619,6 +1741,18 @@ void fib_table_flush_external(struct fib_table *tb)
 		hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
 			struct fib_info *fi = fa->fa_info;
 
+			/* if alias was cloned to local then we just
+			 * need to remove the local copy from main
+			 */
+			if (tb->tb_id != fa->tb_id) {
+				hlist_del_rcu(&fa->fa_list);
+				alias_free_mem_rcu(fa);
+				continue;
+			}
+
+			/* record local slen */
+			slen = fa->fa_slen;
+
 			if (!fi || !(fi->fib_flags & RTNH_F_EXTERNAL))
 				continue;
 
@@ -1627,6 +1761,16 @@ void fib_table_flush_external(struct fib_table *tb)
 						   fi, fa->fa_tos,
 						   fa->fa_type, tb->tb_id);
 		}
+
+		/* update leaf slen */
+		n->slen = slen;
+
+		if (hlist_empty(&n->leaf)) {
+			put_child_root(pn, n->key, NULL);
+			node_free(n);
+		} else {
+			leaf_pull_suffix(pn, n);
+		}
 	}
 }
 
@@ -1711,7 +1855,8 @@ static void __trie_free_rcu(struct rcu_head *head)
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 	struct trie *t = (struct trie *)tb->tb_data;
 
-	free_percpu(t->stats);
+	if (tb->tb_data == tb->__data)
+		free_percpu(t->stats);
 #endif /* CONFIG_IP_FIB_TRIE_STATS */
 	kfree(tb);
 }
@@ -1738,6 +1883,11 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
 			continue;
 		}
 
+		if (tb->tb_id != fa->tb_id) {
+			i++;
+			continue;
+		}
+
 		if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
 				  cb->nlh->nlmsg_seq,
 				  RTM_NEWROUTE,
@@ -1804,18 +1954,26 @@ void __init fib_trie_init(void)
 					   0, SLAB_PANIC, NULL);
 }
 
-struct fib_table *fib_trie_table(u32 id)
+struct fib_table *fib_trie_table(u32 id, struct fib_table *alias)
 {
 	struct fib_table *tb;
 	struct trie *t;
+	size_t sz = sizeof(*tb);
+
+	if (!alias)
+		sz += sizeof(struct trie);
 
-	tb = kzalloc(sizeof(*tb) + sizeof(struct trie), GFP_KERNEL);
+	tb = kzalloc(sz, GFP_KERNEL);
 	if (tb == NULL)
 		return NULL;
 
 	tb->tb_id = id;
 	tb->tb_default = -1;
 	tb->tb_num_default = 0;
+	tb->tb_data = (alias ? alias->__data : tb->__data);
+
+	if (alias)
+		return tb;
 
 	t = (struct trie *) tb->tb_data;
 	t->kv[0].pos = KEYLENGTH;
-- 
cgit v1.2.3


From 988dfbd795cf08b00576c1ced4210281b2bccffc Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 10 Mar 2015 09:27:55 +1100
Subject: rhashtable: Move hash_rnd into bucket_table

Currently hash_rnd is a parameter that users can set.  However,
no existing users set this parameter.  It is also something that
people are unlikely to want to set directly since it's just a
random number.

In preparation for allowing the reseeding/rehashing of rhashtable,
this patch moves hash_rnd into bucket_table so that it's now an
internal state rather than a parameter.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h |  4 ++--
 lib/rhashtable.c           | 24 +++++++++++++++---------
 2 files changed, 17 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index d438eeb08bff..5ef8ea551556 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -49,12 +49,14 @@ struct rhash_head {
 /**
  * struct bucket_table - Table of hash buckets
  * @size: Number of hash buckets
+ * @hash_rnd: Random seed to fold into hash
  * @locks_mask: Mask to apply before accessing locks[]
  * @locks: Array of spinlocks protecting individual buckets
  * @buckets: size * hash buckets
  */
 struct bucket_table {
 	size_t			size;
+	u32			hash_rnd;
 	unsigned int		locks_mask;
 	spinlock_t		*locks;
 
@@ -72,7 +74,6 @@ struct rhashtable;
  * @key_len: Length of key
  * @key_offset: Offset of key in struct to be hashed
  * @head_offset: Offset of rhash_head in struct to be hashed
- * @hash_rnd: Seed to use while hashing
  * @max_shift: Maximum number of shifts while expanding
  * @min_shift: Minimum number of shifts while shrinking
  * @nulls_base: Base value to generate nulls marker
@@ -85,7 +86,6 @@ struct rhashtable_params {
 	size_t			key_len;
 	size_t			key_offset;
 	size_t			head_offset;
-	u32			hash_rnd;
 	size_t			max_shift;
 	size_t			min_shift;
 	u32			nulls_base;
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index b5344ef4c684..ba15dceee27f 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -66,25 +66,28 @@ static u32 rht_bucket_index(const struct bucket_table *tbl, u32 hash)
 	return hash & (tbl->size - 1);
 }
 
-static u32 obj_raw_hashfn(const struct rhashtable *ht, const void *ptr)
+static u32 obj_raw_hashfn(struct rhashtable *ht, const void *ptr)
 {
+	struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
 	u32 hash;
 
 	if (unlikely(!ht->p.key_len))
-		hash = ht->p.obj_hashfn(ptr, ht->p.hash_rnd);
+		hash = ht->p.obj_hashfn(ptr, tbl->hash_rnd);
 	else
 		hash = ht->p.hashfn(ptr + ht->p.key_offset, ht->p.key_len,
-				    ht->p.hash_rnd);
+				    tbl->hash_rnd);
 
 	return hash >> HASH_RESERVED_SPACE;
 }
 
 static u32 key_hashfn(struct rhashtable *ht, const void *key, u32 len)
 {
-	return ht->p.hashfn(key, len, ht->p.hash_rnd) >> HASH_RESERVED_SPACE;
+	struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
+
+	return ht->p.hashfn(key, len, tbl->hash_rnd) >> HASH_RESERVED_SPACE;
 }
 
-static u32 head_hashfn(const struct rhashtable *ht,
+static u32 head_hashfn(struct rhashtable *ht,
 		       const struct bucket_table *tbl,
 		       const struct rhash_head *he)
 {
@@ -92,7 +95,7 @@ static u32 head_hashfn(const struct rhashtable *ht,
 }
 
 #ifdef CONFIG_PROVE_LOCKING
-static void debug_dump_buckets(const struct rhashtable *ht,
+static void debug_dump_buckets(struct rhashtable *ht,
 			       const struct bucket_table *tbl)
 {
 	struct rhash_head *he;
@@ -385,6 +388,8 @@ int rhashtable_expand(struct rhashtable *ht)
 	if (new_tbl == NULL)
 		return -ENOMEM;
 
+	new_tbl->hash_rnd = old_tbl->hash_rnd;
+
 	atomic_inc(&ht->shift);
 
 	/* Make insertions go into the new, empty table right away. Deletions
@@ -476,6 +481,8 @@ int rhashtable_shrink(struct rhashtable *ht)
 	if (new_tbl == NULL)
 		return -ENOMEM;
 
+	new_tbl->hash_rnd = tbl->hash_rnd;
+
 	rcu_assign_pointer(ht->future_tbl, new_tbl);
 	synchronize_rcu();
 
@@ -1099,14 +1106,13 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
 	if (tbl == NULL)
 		return -ENOMEM;
 
+	get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
+
 	atomic_set(&ht->nelems, 0);
 	atomic_set(&ht->shift, ilog2(tbl->size));
 	RCU_INIT_POINTER(ht->tbl, tbl);
 	RCU_INIT_POINTER(ht->future_tbl, tbl);
 
-	if (!ht->p.hash_rnd)
-		get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd));
-
 	INIT_WORK(&ht->run_work, rht_deferred_worker);
 
 	return 0;
-- 
cgit v1.2.3


From 33d6737761ea425d43f3b6e4561573a4020982f2 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 10 Mar 2015 16:57:11 -0700
Subject: of: mdio: export of_mdio_parse_addr

Export of_mdio_parse_addr() which allows parsing a given Ethernet PHY
node MDIO address, verify it is within the allowed range, and return
its value. This is going to be useful for the DSA code which needs to
deal with multiple layers of MDIO buses.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/of/of_mdio.c    | 3 ++-
 include/linux/of_mdio.h | 7 +++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index 1bd43053b8c7..0c064485d1c2 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -88,7 +88,7 @@ static int of_mdiobus_register_phy(struct mii_bus *mdio, struct device_node *chi
 	return 0;
 }
 
-static int of_mdio_parse_addr(struct device *dev, const struct device_node *np)
+int of_mdio_parse_addr(struct device *dev, const struct device_node *np)
 {
 	u32 addr;
 	int ret;
@@ -108,6 +108,7 @@ static int of_mdio_parse_addr(struct device *dev, const struct device_node *np)
 
 	return addr;
 }
+EXPORT_SYMBOL(of_mdio_parse_addr);
 
 /**
  * of_mdiobus_register - Register mii_bus and create PHYs from the device tree
diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h
index d449018d0726..8f2237eb3485 100644
--- a/include/linux/of_mdio.h
+++ b/include/linux/of_mdio.h
@@ -24,6 +24,7 @@ struct phy_device *of_phy_attach(struct net_device *dev,
 				 phy_interface_t iface);
 
 extern struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np);
+extern int of_mdio_parse_addr(struct device *dev, const struct device_node *np);
 
 #else /* CONFIG_OF */
 static inline int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
@@ -60,6 +61,12 @@ static inline struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np)
 {
 	return NULL;
 }
+
+static inline int of_mdio_parse_addr(struct device *dev,
+				     const struct device_node *np)
+{
+	return -ENOSYS;
+}
 #endif /* CONFIG_OF */
 
 #if defined(CONFIG_OF) && defined(CONFIG_FIXED_PHY)
-- 
cgit v1.2.3


From 33cf7c90fe2f97afb1cadaa0cfb782cb9d1b9ee2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 11 Mar 2015 18:53:14 -0700
Subject: net: add real socket cookies

A long standing problem in netlink socket dumps is the use
of kernel socket addresses as cookies.

1) It is a security concern.

2) Sockets can be reused quite quickly, so there is
   no guarantee a cookie is used once and identify
   a flow.

3) request sock, establish sock, and timewait socks
   for a given flow have different cookies.

Part of our effort to bring better TCP statistics requires
to switch to a different allocator.

In this patch, I chose to use a per network namespace 64bit generator,
and to use it only in the case a socket needs to be dumped to netlink.
(This might be refined later if needed)

Note that I tried to carry cookies from request sock, to establish sock,
then timewait sockets.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Eric Salo <salo@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sock_diag.h        |  4 ++--
 include/net/inet_sock.h          |  2 ++
 include/net/inet_timewait_sock.h |  1 +
 include/net/net_namespace.h      |  2 ++
 include/net/sock.h               |  3 +++
 net/core/sock.c                  |  1 +
 net/core/sock_diag.c             | 37 +++++++++++++++++++++++++++----------
 net/dccp/ipv4.c                  |  2 ++
 net/ipv4/inet_connection_sock.c  |  2 ++
 net/ipv4/inet_diag.c             | 14 +++++++++-----
 net/ipv4/inet_timewait_sock.c    |  1 +
 net/ipv4/syncookies.c            |  1 +
 net/ipv4/tcp_input.c             |  2 ++
 13 files changed, 55 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index b5ad7d35a636..083ac388098e 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -19,8 +19,8 @@ void sock_diag_unregister(const struct sock_diag_handler *h);
 void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
 void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
 
-int sock_diag_check_cookie(void *sk, const __u32 *cookie);
-void sock_diag_save_cookie(void *sk, __u32 *cookie);
+int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie);
+void sock_diag_save_cookie(struct sock *sk, __u32 *cookie);
 
 int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr);
 int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk,
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index eb16c7beed1e..e565afdc14ad 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -77,6 +77,8 @@ struct inet_request_sock {
 #define ir_v6_rmt_addr		req.__req_common.skc_v6_daddr
 #define ir_v6_loc_addr		req.__req_common.skc_v6_rcv_saddr
 #define ir_iif			req.__req_common.skc_bound_dev_if
+#define ir_cookie		req.__req_common.skc_cookie
+#define ireq_net		req.__req_common.skc_net
 
 	kmemcheck_bitfield_begin(flags);
 	u16			snd_wscale : 4,
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 6c566034e26d..b7ce1003c429 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -122,6 +122,7 @@ struct inet_timewait_sock {
 #define tw_v6_rcv_saddr    	__tw_common.skc_v6_rcv_saddr
 #define tw_dport		__tw_common.skc_dport
 #define tw_num			__tw_common.skc_num
+#define tw_cookie		__tw_common.skc_cookie
 
 	int			tw_timeout;
 	volatile unsigned char	tw_substate;
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 2cb9acb618e9..e086f4030dd2 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -56,6 +56,8 @@ struct net {
 #endif
 	spinlock_t		rules_mod_lock;
 
+	atomic64_t		cookie_gen;
+
 	struct list_head	list;		/* list of network namespaces */
 	struct list_head	cleanup_list;	/* namespaces on death row */
 	struct list_head	exit_list;	/* Use only net_mutex */
diff --git a/include/net/sock.h b/include/net/sock.h
index 250822cc1e02..d996c633bec2 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -199,6 +199,8 @@ struct sock_common {
 	struct in6_addr		skc_v6_rcv_saddr;
 #endif
 
+	atomic64_t		skc_cookie;
+
 	/*
 	 * fields between dontcopy_begin/dontcopy_end
 	 * are not copied in sock_copy()
@@ -329,6 +331,7 @@ struct sock {
 #define sk_net			__sk_common.skc_net
 #define sk_v6_daddr		__sk_common.skc_v6_daddr
 #define sk_v6_rcv_saddr	__sk_common.skc_v6_rcv_saddr
+#define sk_cookie		__sk_common.skc_cookie
 
 	socket_lock_t		sk_lock;
 	struct sk_buff_head	sk_receive_queue;
diff --git a/net/core/sock.c b/net/core/sock.c
index 726e1f99aa8d..a9a9c2ff9260 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1538,6 +1538,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 		newsk->sk_err	   = 0;
 		newsk->sk_priority = 0;
 		newsk->sk_incoming_cpu = raw_smp_processor_id();
+		atomic64_set(&newsk->sk_cookie, 0);
 		/*
 		 * Before updating sk_refcnt, we must commit prior changes to memory
 		 * (Documentation/RCU/rculist_nulls.txt for details)
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 96e70ee05a8d..74dddf84adcd 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -13,22 +13,39 @@ static const struct sock_diag_handler *sock_diag_handlers[AF_MAX];
 static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
 static DEFINE_MUTEX(sock_diag_table_mutex);
 
-int sock_diag_check_cookie(void *sk, const __u32 *cookie)
+static u64 sock_gen_cookie(struct sock *sk)
 {
-	if ((cookie[0] != INET_DIAG_NOCOOKIE ||
-	     cookie[1] != INET_DIAG_NOCOOKIE) &&
-	    ((u32)(unsigned long)sk != cookie[0] ||
-	     (u32)((((unsigned long)sk) >> 31) >> 1) != cookie[1]))
-		return -ESTALE;
-	else
+	while (1) {
+		u64 res = atomic64_read(&sk->sk_cookie);
+
+		if (res)
+			return res;
+		res = atomic64_inc_return(&sock_net(sk)->cookie_gen);
+		atomic64_cmpxchg(&sk->sk_cookie, 0, res);
+	}
+}
+
+int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie)
+{
+	u64 res;
+
+	if (cookie[0] == INET_DIAG_NOCOOKIE && cookie[1] == INET_DIAG_NOCOOKIE)
 		return 0;
+
+	res = sock_gen_cookie(sk);
+	if ((u32)res != cookie[0] || (u32)(res >> 32) != cookie[1])
+		return -ESTALE;
+
+	return 0;
 }
 EXPORT_SYMBOL_GPL(sock_diag_check_cookie);
 
-void sock_diag_save_cookie(void *sk, __u32 *cookie)
+void sock_diag_save_cookie(struct sock *sk, __u32 *cookie)
 {
-	cookie[0] = (u32)(unsigned long)sk;
-	cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1);
+	u64 res = sock_gen_cookie(sk);
+
+	cookie[0] = (u32)res;
+	cookie[1] = (u32)(res >> 32);
 }
 EXPORT_SYMBOL_GPL(sock_diag_save_cookie);
 
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index e45b968613a4..207281ae3536 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -641,6 +641,8 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	ireq = inet_rsk(req);
 	ireq->ir_loc_addr = ip_hdr(skb)->daddr;
 	ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
+	ireq->ireq_net = sock_net(sk);
+	atomic64_set(&ireq->ir_cookie, 0);
 
 	/*
 	 * Step 3: Process LISTEN state
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 14d02ea905b6..34581f928afa 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -678,6 +678,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
 		newsk->sk_write_space = sk_stream_write_space;
 
 		newsk->sk_mark = inet_rsk(req)->ir_mark;
+		atomic64_set(&newsk->sk_cookie,
+			     atomic64_read(&inet_rsk(req)->ir_cookie));
 
 		newicsk->icsk_retransmits = 0;
 		newicsk->icsk_backoff	  = 0;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index ac3bfb458afd..29317ff4a007 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -221,12 +221,13 @@ static int inet_csk_diag_fill(struct sock *sk,
 				 user_ns, portid, seq, nlmsg_flags, unlh);
 }
 
-static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
+static int inet_twsk_diag_fill(struct sock *sk,
 			       struct sk_buff *skb,
 			       const struct inet_diag_req_v2 *req,
 			       u32 portid, u32 seq, u16 nlmsg_flags,
 			       const struct nlmsghdr *unlh)
 {
+	struct inet_timewait_sock *tw = inet_twsk(sk);
 	struct inet_diag_msg *r;
 	struct nlmsghdr *nlh;
 	s32 tmo;
@@ -247,7 +248,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
 	r->idiag_retrans      = 0;
 
 	r->id.idiag_if	      = tw->tw_bound_dev_if;
-	sock_diag_save_cookie(tw, r->id.idiag_cookie);
+	sock_diag_save_cookie(sk, r->id.idiag_cookie);
 
 	r->id.idiag_sport     = tw->tw_sport;
 	r->id.idiag_dport     = tw->tw_dport;
@@ -283,7 +284,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 			const struct nlmsghdr *unlh)
 {
 	if (sk->sk_state == TCP_TIME_WAIT)
-		return inet_twsk_diag_fill(inet_twsk(sk), skb, r, portid, seq,
+		return inet_twsk_diag_fill(sk, skb, r, portid, seq,
 					   nlmsg_flags, unlh);
 
 	return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq,
@@ -675,7 +676,7 @@ static int inet_twsk_diag_dump(struct sock *sk,
 	if (!inet_diag_bc_sk(bc, sk))
 		return 0;
 
-	return inet_twsk_diag_fill(inet_twsk(sk), skb, r,
+	return inet_twsk_diag_fill(sk, skb, r,
 				   NETLINK_CB(cb->skb).portid,
 				   cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
 }
@@ -734,7 +735,10 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
 	r->idiag_retrans = req->num_retrans;
 
 	r->id.idiag_if = sk->sk_bound_dev_if;
-	sock_diag_save_cookie(req, r->id.idiag_cookie);
+
+	BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) !=
+		     offsetof(struct sock, sk_cookie));
+	sock_diag_save_cookie((struct sock *)ireq, r->id.idiag_cookie);
 
 	tmo = req->expires - jiffies;
 	if (tmo < 0)
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 6d592f8555fb..2bd980526631 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -195,6 +195,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
 		tw->tw_ipv6only	    = 0;
 		tw->tw_transparent  = inet->transparent;
 		tw->tw_prot	    = sk->sk_prot_creator;
+		atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie));
 		twsk_net_set(tw, hold_net(sock_net(sk)));
 		/*
 		 * Because we use RCU lookups, we should not set tw_refcnt
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 45fe60c5238e..ece31b426013 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -346,6 +346,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 	req->ts_recent		= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
 	treq->snt_synack	= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
 	treq->listener		= NULL;
+	ireq->ireq_net		= sock_net(sk);
 
 	/* We throwed the options of the initial SYN away, so we hope
 	 * the ACK carries the same options again (see RFC1122 4.2.3.8)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index fb4cf8b8e121..d7045f5f6ebf 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5965,6 +5965,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 
 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
 	tcp_openreq_init(req, &tmp_opt, skb, sk);
+	inet_rsk(req)->ireq_net = sock_net(sk);
+	atomic64_set(&inet_rsk(req)->ir_cookie, 0);
 
 	af_ops->init_req(req, sk, skb);
 
-- 
cgit v1.2.3


From d299ce149c1aa6a2e2c8db44974a6a1c07d7303b Mon Sep 17 00:00:00 2001
From: Simon Horman <simon.horman@netronome.com>
Date: Thu, 12 Mar 2015 11:00:10 +0900
Subject: vxlan: Correct path typo in comment

Flags are used in the return path rather than the return patch.

Fixes: af33c1adae1e ("vxlan: Eliminate dependency on UDP socket in transmit path")
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/vxlan.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index eabd3a038674..9564b779246f 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -130,7 +130,7 @@ struct vxlan_sock {
 #define VXLAN_F_GBP			0x800
 #define VXLAN_F_REMCSUM_NOPARTIAL	0x1000
 
-/* Flags that are used in the receive patch. These flags must match in
+/* Flags that are used in the receive path. These flags must match in
  * order for a socket to be shareable
  */
 #define VXLAN_F_RCV_FLAGS		(VXLAN_F_GBP |			\
-- 
cgit v1.2.3


From c78ba6d64c78634a875d1e316676667cabfea256 Mon Sep 17 00:00:00 2001
From: Lubomir Rintel <lkundrak@v3.sk>
Date: Wed, 11 Mar 2015 15:39:21 +0100
Subject: ipv6: expose RFC4191 route preference via rtnetlink

This makes it possible to retain the route preference when RAs are handled in
userspace.

Signed-off-by: Lubomir Rintel <lkundrak@v3.sk>
Reviewed-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/rtnetlink.h |  1 +
 net/ipv6/route.c               | 16 +++++++++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index c3722b024e73..bea910f924dd 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -305,6 +305,7 @@ enum rtattr_type_t {
 	RTA_MFC_STATS,
 	RTA_VIA,
 	RTA_NEWDST,
+	RTA_PREF,
 	__RTA_MAX
 };
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 06fa819c43c9..58c0e6a4d15d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2398,6 +2398,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
 	[RTA_PRIORITY]          = { .type = NLA_U32 },
 	[RTA_METRICS]           = { .type = NLA_NESTED },
 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
+	[RTA_PREF]              = { .type = NLA_U8 },
 };
 
 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -2405,6 +2406,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
 {
 	struct rtmsg *rtm;
 	struct nlattr *tb[RTA_MAX+1];
+	unsigned int pref;
 	int err;
 
 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
@@ -2480,6 +2482,14 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
 	}
 
+	if (tb[RTA_PREF]) {
+		pref = nla_get_u8(tb[RTA_PREF]);
+		if (pref != ICMPV6_ROUTER_PREF_LOW &&
+		    pref != ICMPV6_ROUTER_PREF_HIGH)
+			pref = ICMPV6_ROUTER_PREF_MEDIUM;
+		cfg->fc_flags |= RTF_PREF(pref);
+	}
+
 	err = 0;
 errout:
 	return err;
@@ -2583,7 +2593,8 @@ static inline size_t rt6_nlmsg_size(void)
 	       + nla_total_size(4) /* RTA_PRIORITY */
 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
 	       + nla_total_size(sizeof(struct rta_cacheinfo))
-	       + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */
+	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
+	       + nla_total_size(1); /* RTA_PREF */
 }
 
 static int rt6_fill_node(struct net *net,
@@ -2724,6 +2735,9 @@ static int rt6_fill_node(struct net *net,
 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
 		goto nla_put_failure;
 
+	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
+		goto nla_put_failure;
+
 	nlmsg_end(skb, nlh);
 	return 0;
 
-- 
cgit v1.2.3


From efd7ef1c1929d7a0329d4349252863c04d6f1729 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 11 Mar 2015 23:04:08 -0500
Subject: net: Kill hold_net release_net

hold_net and release_net were an idea that turned out to be useless.
The code has been disabled since 2008.  Kill the code it is long past due.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h     |  3 +--
 include/net/fib_rules.h       |  9 +--------
 include/net/net_namespace.h   | 29 -----------------------------
 include/net/sock.h            |  2 +-
 net/core/dev.c                |  2 --
 net/core/fib_rules.c          | 17 +++--------------
 net/core/neighbour.c          |  9 ++-------
 net/core/net_namespace.c      | 11 -----------
 net/core/sock.c               |  1 -
 net/ipv4/fib_semantics.c      |  3 +--
 net/ipv4/inet_hashtables.c    |  3 +--
 net/ipv4/inet_timewait_sock.c |  3 +--
 net/ipv6/addrlabel.c          |  5 +----
 net/ipv6/ip6_flowlabel.c      |  3 +--
 net/openvswitch/datapath.c    |  4 +---
 15 files changed, 14 insertions(+), 90 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1354ae83efc8..cede40d9cac9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1864,8 +1864,7 @@ static inline
 void dev_net_set(struct net_device *dev, struct net *net)
 {
 #ifdef CONFIG_NET_NS
-	release_net(dev->nd_net);
-	dev->nd_net = hold_net(net);
+	dev->nd_net = net;
 #endif
 }
 
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 88d2ae526961..6d67383a5114 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -95,17 +95,10 @@ static inline void fib_rule_get(struct fib_rule *rule)
 	atomic_inc(&rule->refcnt);
 }
 
-static inline void fib_rule_put_rcu(struct rcu_head *head)
-{
-	struct fib_rule *rule = container_of(head, struct fib_rule, rcu);
-	release_net(rule->fr_net);
-	kfree(rule);
-}
-
 static inline void fib_rule_put(struct fib_rule *rule)
 {
 	if (atomic_dec_and_test(&rule->refcnt))
-		call_rcu(&rule->rcu, fib_rule_put_rcu);
+		kfree_rcu(rule, rcu);
 }
 
 static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla)
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index e086f4030dd2..fab51ceeabf3 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -49,11 +49,6 @@ struct net {
 	atomic_t		count;		/* To decided when the network
 						 *  namespace should be shut down.
 						 */
-#ifdef NETNS_REFCNT_DEBUG
-	atomic_t		use_count;	/* To track references we
-						 * destroy on demand
-						 */
-#endif
 	spinlock_t		rules_mod_lock;
 
 	atomic64_t		cookie_gen;
@@ -236,30 +231,6 @@ int net_eq(const struct net *net1, const struct net *net2)
 #endif
 
 
-#ifdef NETNS_REFCNT_DEBUG
-static inline struct net *hold_net(struct net *net)
-{
-	if (net)
-		atomic_inc(&net->use_count);
-	return net;
-}
-
-static inline void release_net(struct net *net)
-{
-	if (net)
-		atomic_dec(&net->use_count);
-}
-#else
-static inline struct net *hold_net(struct net *net)
-{
-	return net;
-}
-
-static inline void release_net(struct net *net)
-{
-}
-#endif
-
 #ifdef CONFIG_NET_NS
 
 static inline void write_pnet(struct net **pnet, struct net *net)
diff --git a/include/net/sock.h b/include/net/sock.h
index d996c633bec2..95b2c1c220f9 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2204,7 +2204,7 @@ static inline void sk_change_net(struct sock *sk, struct net *net)
 
 	if (!net_eq(current_net, net)) {
 		put_net(current_net);
-		sock_net_set(sk, hold_net(net));
+		sock_net_set(sk, net);
 	}
 }
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 962ee9d71964..39fe369b46ad 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6841,8 +6841,6 @@ void free_netdev(struct net_device *dev)
 {
 	struct napi_struct *p, *n;
 
-	release_net(dev_net(dev));
-
 	netif_free_tx_queues(dev);
 #ifdef CONFIG_SYSFS
 	kvfree(dev->_rx);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index b55677fed1c8..68ea6950cad1 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -31,7 +31,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
 	r->pref = pref;
 	r->table = table;
 	r->flags = flags;
-	r->fr_net = hold_net(ops->fro_net);
+	r->fr_net = ops->fro_net;
 
 	r->suppress_prefixlen = -1;
 	r->suppress_ifgroup = -1;
@@ -116,7 +116,6 @@ static int __fib_rules_register(struct fib_rules_ops *ops)
 		if (ops->family == o->family)
 			goto errout;
 
-	hold_net(net);
 	list_add_tail_rcu(&ops->list, &net->rules_ops);
 	err = 0;
 errout:
@@ -160,15 +159,6 @@ static void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
 	}
 }
 
-static void fib_rules_put_rcu(struct rcu_head *head)
-{
-	struct fib_rules_ops *ops = container_of(head, struct fib_rules_ops, rcu);
-	struct net *net = ops->fro_net;
-
-	release_net(net);
-	kfree(ops);
-}
-
 void fib_rules_unregister(struct fib_rules_ops *ops)
 {
 	struct net *net = ops->fro_net;
@@ -178,7 +168,7 @@ void fib_rules_unregister(struct fib_rules_ops *ops)
 	fib_rules_cleanup_ops(ops);
 	spin_unlock(&net->rules_mod_lock);
 
-	call_rcu(&ops->rcu, fib_rules_put_rcu);
+	kfree_rcu(ops, rcu);
 }
 EXPORT_SYMBOL_GPL(fib_rules_unregister);
 
@@ -303,7 +293,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
 		err = -ENOMEM;
 		goto errout;
 	}
-	rule->fr_net = hold_net(net);
+	rule->fr_net = net;
 
 	if (tb[FRA_PRIORITY])
 		rule->pref = nla_get_u32(tb[FRA_PRIORITY]);
@@ -423,7 +413,6 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
 	return 0;
 
 errout_free:
-	release_net(rule->fr_net);
 	kfree(rule);
 errout:
 	rules_ops_put(ops);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index ad07990e943d..0e8b32efc031 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -591,7 +591,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
 	if (!n)
 		goto out;
 
-	write_pnet(&n->net, hold_net(net));
+	write_pnet(&n->net, net);
 	memcpy(n->key, pkey, key_len);
 	n->dev = dev;
 	if (dev)
@@ -600,7 +600,6 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
 	if (tbl->pconstructor && tbl->pconstructor(n)) {
 		if (dev)
 			dev_put(dev);
-		release_net(net);
 		kfree(n);
 		n = NULL;
 		goto out;
@@ -634,7 +633,6 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
 				tbl->pdestructor(n);
 			if (n->dev)
 				dev_put(n->dev);
-			release_net(pneigh_net(n));
 			kfree(n);
 			return 0;
 		}
@@ -657,7 +655,6 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
 					tbl->pdestructor(n);
 				if (n->dev)
 					dev_put(n->dev);
-				release_net(pneigh_net(n));
 				kfree(n);
 				continue;
 			}
@@ -1428,11 +1425,10 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
 				neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
 		dev_hold(dev);
 		p->dev = dev;
-		write_pnet(&p->net, hold_net(net));
+		write_pnet(&p->net, net);
 		p->sysctl_table = NULL;
 
 		if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
-			release_net(net);
 			dev_put(dev);
 			kfree(p);
 			return NULL;
@@ -1472,7 +1468,6 @@ EXPORT_SYMBOL(neigh_parms_release);
 
 static void neigh_parms_destroy(struct neigh_parms *parms)
 {
-	release_net(neigh_parms_net(parms));
 	kfree(parms);
 }
 
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index cb5290b8c428..e5e96b0f6717 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -236,10 +236,6 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 	net->user_ns = user_ns;
 	idr_init(&net->netns_ids);
 
-#ifdef NETNS_REFCNT_DEBUG
-	atomic_set(&net->use_count, 0);
-#endif
-
 	list_for_each_entry(ops, &pernet_list, list) {
 		error = ops_init(ops, net);
 		if (error < 0)
@@ -294,13 +290,6 @@ out_free:
 
 static void net_free(struct net *net)
 {
-#ifdef NETNS_REFCNT_DEBUG
-	if (unlikely(atomic_read(&net->use_count) != 0)) {
-		pr_emerg("network namespace not free! Usage: %d\n",
-			 atomic_read(&net->use_count));
-		return;
-	}
-#endif
 	kfree(rcu_access_pointer(net->gen));
 	kmem_cache_free(net_cachep, net);
 }
diff --git a/net/core/sock.c b/net/core/sock.c
index a9a9c2ff9260..c8842f279f7a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1455,7 +1455,6 @@ void sk_release_kernel(struct sock *sk)
 
 	sock_hold(sk);
 	sock_release(sk->sk_socket);
-	release_net(sock_net(sk));
 	sock_net_set(sk, get_net(&init_net));
 	sock_put(sk);
 }
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index c6d267442dac..66c1e4fbf884 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -213,7 +213,6 @@ static void free_fib_info_rcu(struct rcu_head *head)
 		rt_fibinfo_free(&nexthop_nh->nh_rth_input);
 	} endfor_nexthops(fi);
 
-	release_net(fi->fib_net);
 	if (fi->fib_metrics != (u32 *) dst_default_metrics)
 		kfree(fi->fib_metrics);
 	kfree(fi);
@@ -814,7 +813,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 	} else
 		fi->fib_metrics = (u32 *) dst_default_metrics;
 
-	fi->fib_net = hold_net(net);
+	fi->fib_net = net;
 	fi->fib_protocol = cfg->fc_protocol;
 	fi->fib_scope = cfg->fc_scope;
 	fi->fib_flags = cfg->fc_flags;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 9111a4e22155..f6a12b97d12b 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -61,7 +61,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
 	struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
 
 	if (tb != NULL) {
-		write_pnet(&tb->ib_net, hold_net(net));
+		write_pnet(&tb->ib_net, net);
 		tb->port      = snum;
 		tb->fastreuse = 0;
 		tb->fastreuseport = 0;
@@ -79,7 +79,6 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket
 {
 	if (hlist_empty(&tb->owners)) {
 		__hlist_del(&tb->node);
-		release_net(ib_net(tb));
 		kmem_cache_free(cachep, tb);
 	}
 }
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 2bd980526631..86ebf020925b 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -98,7 +98,6 @@ void inet_twsk_free(struct inet_timewait_sock *tw)
 #ifdef SOCK_REFCNT_DEBUG
 	pr_debug("%s timewait_sock %p released\n", tw->tw_prot->name, tw);
 #endif
-	release_net(twsk_net(tw));
 	kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw);
 	module_put(owner);
 }
@@ -196,7 +195,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
 		tw->tw_transparent  = inet->transparent;
 		tw->tw_prot	    = sk->sk_prot_creator;
 		atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie));
-		twsk_net_set(tw, hold_net(sock_net(sk)));
+		twsk_net_set(tw, sock_net(sk));
 		/*
 		 * Because we use RCU lookups, we should not set tw_refcnt
 		 * to a non null value before everything is setup for this
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index e43e79d0a612..59c793040498 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -129,9 +129,6 @@ static const __net_initconst struct ip6addrlbl_init_table
 /* Object management */
 static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p)
 {
-#ifdef CONFIG_NET_NS
-	release_net(p->lbl_net);
-#endif
 	kfree(p);
 }
 
@@ -241,7 +238,7 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net,
 	newp->label = label;
 	INIT_HLIST_NODE(&newp->list);
 #ifdef CONFIG_NET_NS
-	newp->lbl_net = hold_net(net);
+	newp->lbl_net = net;
 #endif
 	atomic_set(&newp->refcnt, 1);
 	return newp;
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index f45d6db50a45..457303886fd4 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -100,7 +100,6 @@ static void fl_free(struct ip6_flowlabel *fl)
 	if (fl) {
 		if (fl->share == IPV6_FL_S_PROCESS)
 			put_pid(fl->owner.pid);
-		release_net(fl->fl_net);
 		kfree(fl->opt);
 		kfree_rcu(fl, rcu);
 	}
@@ -403,7 +402,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
 		}
 	}
 
-	fl->fl_net = hold_net(net);
+	fl->fl_net = net;
 	fl->expires = jiffies;
 	err = fl6_renew(fl, freq->flr_linger, freq->flr_expires);
 	if (err)
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 5bae7243c577..096c6276e6b9 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -203,7 +203,6 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
 
 	ovs_flow_tbl_destroy(&dp->table);
 	free_percpu(dp->stats_percpu);
-	release_net(ovs_dp_get_net(dp));
 	kfree(dp->ports);
 	kfree(dp);
 }
@@ -1501,7 +1500,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	if (dp == NULL)
 		goto err_free_reply;
 
-	ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
+	ovs_dp_set_net(dp, sock_net(skb->sk));
 
 	/* Allocate table. */
 	err = ovs_flow_tbl_init(&dp->table);
@@ -1575,7 +1574,6 @@ err_destroy_percpu:
 err_destroy_table:
 	ovs_flow_tbl_destroy(&dp->table);
 err_free_dp:
-	release_net(ovs_dp_get_net(dp));
 	kfree(dp);
 err_free_reply:
 	kfree_skb(reply);
-- 
cgit v1.2.3


From 0c5c9fb55106333e773de8c9dd321fa8240caeb3 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 11 Mar 2015 23:06:44 -0500
Subject: net: Introduce possible_net_t

Having to say
> #ifdef CONFIG_NET_NS
> 	struct net *net;
> #endif

in structures is a little bit wordy and a little bit error prone.

Instead it is possible to say:
> typedef struct {
> #ifdef CONFIG_NET_NS
>       struct net *net;
> #endif
> } possible_net_t;

And then in a header say:

> 	possible_net_t net;

Which is cleaner and easier to use and easier to test, as the
possible_net_t is always there no matter what the compile options.

Further this allows read_pnet and write_pnet to be functions in all
cases which is better at catching typos.

This change adds possible_net_t, updates the definitions of read_pnet
and write_pnet, updates optional struct net * variables that
write_pnet uses on to have the type possible_net_t, and finally fixes
up the b0rked users of read_pnet and write_pnet.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h            |  8 ++------
 include/net/cfg80211.h               |  4 +---
 include/net/genetlink.h              |  4 +---
 include/net/inet_hashtables.h        |  4 +---
 include/net/ip_vs.h                  |  8 ++++----
 include/net/neighbour.h              |  8 ++------
 include/net/net_namespace.h          | 23 +++++++++++++----------
 include/net/netfilter/nf_conntrack.h |  5 ++---
 include/net/sock.h                   |  4 +---
 include/net/xfrm.h                   |  8 ++------
 net/9p/trans_fd.c                    |  4 ++--
 net/ipv4/ipmr.c                      |  4 +---
 net/ipv6/addrlabel.c                 |  8 ++------
 net/ipv6/ip6mr.c                     |  4 +---
 net/openvswitch/datapath.h           |  4 +---
 net/packet/internal.h                |  4 +---
 16 files changed, 37 insertions(+), 67 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cede40d9cac9..ddab1a2a07a0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1721,9 +1721,7 @@ struct net_device {
 	struct netpoll_info __rcu	*npinfo;
 #endif
 
-#ifdef CONFIG_NET_NS
-	struct net		*nd_net;
-#endif
+	possible_net_t			nd_net;
 
 	/* mid-layer private */
 	union {
@@ -1863,9 +1861,7 @@ struct net *dev_net(const struct net_device *dev)
 static inline
 void dev_net_set(struct net_device *dev, struct net *net)
 {
-#ifdef CONFIG_NET_NS
-	dev->nd_net = net;
-#endif
+	write_pnet(&dev->nd_net, net);
 }
 
 static inline bool netdev_uses_dsa(struct net_device *dev)
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 64e09e1e8099..f977abec07f6 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3183,10 +3183,8 @@ struct wiphy {
 	const struct ieee80211_ht_cap *ht_capa_mod_mask;
 	const struct ieee80211_vht_cap *vht_capa_mod_mask;
 
-#ifdef CONFIG_NET_NS
 	/* the network namespace this phy lives in currently */
-	struct net *_net;
-#endif
+	possible_net_t _net;
 
 #ifdef CONFIG_CFG80211_WEXT
 	const struct iw_handler_def *wext;
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index 0574abd3db86..a9af1cc8c1bc 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -92,9 +92,7 @@ struct genl_info {
 	struct genlmsghdr *	genlhdr;
 	void *			userhdr;
 	struct nlattr **	attrs;
-#ifdef CONFIG_NET_NS
-	struct net *		_net;
-#endif
+	possible_net_t		_net;
 	void *			user_ptr[2];
 	struct sock *		dst_sk;
 };
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index dd1950a7e273..bcd64756e5fe 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -76,9 +76,7 @@ struct inet_ehash_bucket {
  * ports are created in O(1) time?  I thought so. ;-)	-DaveM
  */
 struct inet_bind_bucket {
-#ifdef CONFIG_NET_NS
-	struct net		*ib_net;
-#endif
+	possible_net_t		ib_net;
 	unsigned short		port;
 	signed char		fastreuse;
 	signed char		fastreuseport;
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 20fd23398537..4e3731ee4eac 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -47,13 +47,13 @@ static inline struct net *skb_net(const struct sk_buff *skb)
 	 * Start with the most likely hit
 	 * End with BUG
 	 */
-	if (likely(skb->dev && skb->dev->nd_net))
+	if (likely(skb->dev && dev_net(skb->dev)))
 		return dev_net(skb->dev);
 	if (skb_dst(skb) && skb_dst(skb)->dev)
 		return dev_net(skb_dst(skb)->dev);
 	WARN(skb->sk, "Maybe skb_sknet should be used in %s() at line:%d\n",
 		      __func__, __LINE__);
-	if (likely(skb->sk && skb->sk->sk_net))
+	if (likely(skb->sk && sock_net(skb->sk)))
 		return sock_net(skb->sk);
 	pr_err("There is no net ptr to find in the skb in %s() line:%d\n",
 		__func__, __LINE__);
@@ -71,11 +71,11 @@ static inline struct net *skb_sknet(const struct sk_buff *skb)
 #ifdef CONFIG_NET_NS
 #ifdef CONFIG_IP_VS_DEBUG
 	/* Start with the most likely hit */
-	if (likely(skb->sk && skb->sk->sk_net))
+	if (likely(skb->sk && sock_net(skb->sk)))
 		return sock_net(skb->sk);
 	WARN(skb->dev, "Maybe skb_net should be used instead in %s() line:%d\n",
 		       __func__, __LINE__);
-	if (likely(skb->dev && skb->dev->nd_net))
+	if (likely(skb->dev && dev_net(skb->dev)))
 		return dev_net(skb->dev);
 	pr_err("There is no net ptr to find in the skb in %s() line:%d\n",
 		__func__, __LINE__);
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index d48b8ec8b5f4..e7bdf5170802 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -65,9 +65,7 @@ enum {
 };
 
 struct neigh_parms {
-#ifdef CONFIG_NET_NS
-	struct net *net;
-#endif
+	possible_net_t net;
 	struct net_device *dev;
 	struct list_head list;
 	int	(*neigh_setup)(struct neighbour *);
@@ -167,9 +165,7 @@ struct neigh_ops {
 
 struct pneigh_entry {
 	struct pneigh_entry	*next;
-#ifdef CONFIG_NET_NS
-	struct net		*net;
-#endif
+	possible_net_t		net;
 	struct net_device	*dev;
 	u8			flags;
 	u8			key[0];
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index fab51ceeabf3..f733656404de 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -231,24 +231,27 @@ int net_eq(const struct net *net1, const struct net *net2)
 #endif
 
 
+typedef struct {
 #ifdef CONFIG_NET_NS
+	struct net *net;
+#endif
+} possible_net_t;
 
-static inline void write_pnet(struct net **pnet, struct net *net)
+static inline void write_pnet(possible_net_t *pnet, struct net *net)
 {
-	*pnet = net;
+#ifdef CONFIG_NET_NS
+	pnet->net = net;
+#endif
 }
 
-static inline struct net *read_pnet(struct net * const *pnet)
+static inline struct net *read_pnet(const possible_net_t *pnet)
 {
-	return *pnet;
-}
-
+#ifdef CONFIG_NET_NS
+	return pnet->net;
 #else
-
-#define write_pnet(pnet, net)	do { (void)(net);} while (0)
-#define read_pnet(pnet)		(&init_net)
-
+	return &init_net;
 #endif
+}
 
 #define for_each_net(VAR)				\
 	list_for_each_entry(VAR, &net_namespace_list, list)
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 74f271a172dd..095433b8a8b0 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -95,9 +95,8 @@ struct nf_conn {
 	/* Timer function; drops refcnt when it goes off. */
 	struct timer_list timeout;
 
-#ifdef CONFIG_NET_NS
-	struct net *ct_net;
-#endif
+	possible_net_t ct_net;
+
 	/* all members below initialized via memset */
 	u8 __nfct_init_offset[0];
 
diff --git a/include/net/sock.h b/include/net/sock.h
index 95b2c1c220f9..9411c3421dd3 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -190,9 +190,7 @@ struct sock_common {
 		struct hlist_nulls_node skc_portaddr_node;
 	};
 	struct proto		*skc_prot;
-#ifdef CONFIG_NET_NS
-	struct net	 	*skc_net;
-#endif
+	possible_net_t		skc_net;
 
 #if IS_ENABLED(CONFIG_IPV6)
 	struct in6_addr		skc_v6_daddr;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index dc4865e90fe4..d0ac7d7be8a7 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -126,9 +126,7 @@ struct xfrm_state_walk {
 
 /* Full description of state of transformer. */
 struct xfrm_state {
-#ifdef CONFIG_NET_NS
-	struct net		*xs_net;
-#endif
+	possible_net_t		xs_net;
 	union {
 		struct hlist_node	gclist;
 		struct hlist_node	bydst;
@@ -522,9 +520,7 @@ struct xfrm_policy_queue {
 };
 
 struct xfrm_policy {
-#ifdef CONFIG_NET_NS
-	struct net		*xp_net;
-#endif
+	possible_net_t		xp_net;
 	struct hlist_node	bydst;
 	struct hlist_node	byidx;
 
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 80d08f6664cb..3e3d82d8ff70 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -940,7 +940,7 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
 	sin_server.sin_family = AF_INET;
 	sin_server.sin_addr.s_addr = in_aton(addr);
 	sin_server.sin_port = htons(opts.port);
-	err = __sock_create(read_pnet(&current->nsproxy->net_ns), PF_INET,
+	err = __sock_create(current->nsproxy->net_ns, PF_INET,
 			    SOCK_STREAM, IPPROTO_TCP, &csocket, 1);
 	if (err) {
 		pr_err("%s (%d): problem creating socket\n",
@@ -988,7 +988,7 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args)
 
 	sun_server.sun_family = PF_UNIX;
 	strcpy(sun_server.sun_path, addr);
-	err = __sock_create(read_pnet(&current->nsproxy->net_ns), PF_UNIX,
+	err = __sock_create(current->nsproxy->net_ns, PF_UNIX,
 			    SOCK_STREAM, 0, &csocket, 1);
 	if (err < 0) {
 		pr_err("%s (%d): problem creating socket\n",
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9d78427652d2..5b188832800f 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -73,9 +73,7 @@
 
 struct mr_table {
 	struct list_head	list;
-#ifdef CONFIG_NET_NS
-	struct net		*net;
-#endif
+	possible_net_t		net;
 	u32			id;
 	struct sock __rcu	*mroute_sk;
 	struct timer_list	ipmr_expire_timer;
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 59c793040498..3cc50e2d3bf5 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -29,9 +29,7 @@
  * Policy Table
  */
 struct ip6addrlbl_entry {
-#ifdef CONFIG_NET_NS
-	struct net *lbl_net;
-#endif
+	possible_net_t lbl_net;
 	struct in6_addr prefix;
 	int prefixlen;
 	int ifindex;
@@ -237,9 +235,7 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net,
 	newp->addrtype = addrtype;
 	newp->label = label;
 	INIT_HLIST_NODE(&newp->list);
-#ifdef CONFIG_NET_NS
-	newp->lbl_net = net;
-#endif
+	write_pnet(&newp->lbl_net, net);
 	atomic_set(&newp->refcnt, 1);
 	return newp;
 }
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 34b682617f50..4b9315aa273e 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -56,9 +56,7 @@
 
 struct mr6_table {
 	struct list_head	list;
-#ifdef CONFIG_NET_NS
-	struct net		*net;
-#endif
+	possible_net_t		net;
 	u32			id;
 	struct sock		*mroute6_sk;
 	struct timer_list	ipmr_expire_timer;
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 3ece94563079..4ec4a480b147 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -84,10 +84,8 @@ struct datapath {
 	/* Stats. */
 	struct dp_stats_percpu __percpu *stats_percpu;
 
-#ifdef CONFIG_NET_NS
 	/* Network namespace ref. */
-	struct net *net;
-#endif
+	possible_net_t net;
 
 	u32 user_features;
 };
diff --git a/net/packet/internal.h b/net/packet/internal.h
index cdddf6a30399..fe6e20caea1d 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -74,9 +74,7 @@ extern struct mutex fanout_mutex;
 #define PACKET_FANOUT_MAX	256
 
 struct packet_fanout {
-#ifdef CONFIG_NET_NS
-	struct net		*net;
-#endif
+	possible_net_t		net;
 	unsigned int		num_members;
 	u16			id;
 	u8			type;
-- 
cgit v1.2.3


From 80f1d68ccba70b1060c9c7360ca83da430f66bed Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 12 Mar 2015 17:21:42 +0100
Subject: ebpf: verifier: check that call reg with ARG_ANYTHING is initialized

I noticed that a helper function with argument type ARG_ANYTHING does
not need to have an initialized value (register).

This can worst case lead to unintented stack memory leakage in future
helper functions if they are not carefully designed, or unintended
application behaviour in case the application developer was not careful
enough to match a correct helper function signature in the API.

The underlying issue is that ARG_ANYTHING should actually be split
into two different semantics:

  1) ARG_DONTCARE for function arguments that the helper function
     does not care about (in other words: the default for unused
     function arguments), and

  2) ARG_ANYTHING that is an argument actually being used by a
     helper function and *guaranteed* to be an initialized register.

The current risk is low: ARG_ANYTHING is only used for the 'flags'
argument (r4) in bpf_map_update_elem() that internally does strict
checking.

Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h   | 4 +++-
 kernel/bpf/verifier.c | 5 ++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a884f5a2c503..80f2e0fc3d02 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -44,7 +44,7 @@ struct bpf_map_type_list {
 
 /* function argument constraints */
 enum bpf_arg_type {
-	ARG_ANYTHING = 0,	/* any argument is ok */
+	ARG_DONTCARE = 0,	/* unused argument in helper function */
 
 	/* the following constraints used to prototype
 	 * bpf_map_lookup/update/delete_elem() functions
@@ -58,6 +58,8 @@ enum bpf_arg_type {
 	 */
 	ARG_PTR_TO_STACK,	/* any pointer to eBPF program stack */
 	ARG_CONST_STACK_SIZE,	/* number of bytes accessed from stack */
+
+	ARG_ANYTHING,		/* any (initialized) argument is ok */
 };
 
 /* type of values returned from helper functions */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index bdf4192a889b..e6b522496250 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -755,7 +755,7 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
 	enum bpf_reg_type expected_type;
 	int err = 0;
 
-	if (arg_type == ARG_ANYTHING)
+	if (arg_type == ARG_DONTCARE)
 		return 0;
 
 	if (reg->type == NOT_INIT) {
@@ -763,6 +763,9 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
 		return -EACCES;
 	}
 
+	if (arg_type == ARG_ANYTHING)
+		return 0;
+
 	if (arg_type == ARG_PTR_TO_STACK || arg_type == ARG_PTR_TO_MAP_KEY ||
 	    arg_type == ARG_PTR_TO_MAP_VALUE) {
 		expected_type = PTR_TO_STACK;
-- 
cgit v1.2.3


From bd337c581b2b0d933d37f664bf55b342577fed3a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 12 Mar 2015 16:44:03 -0700
Subject: ipv6: add missing ireq_net & ir_cookie initializations

I forgot to update dccp_v6_conn_request() & cookie_v6_check().
They both need to set ireq->ireq_net and ireq->ir_cookie

Lets clear ireq->ir_cookie in inet_reqsk_alloc()

Signed-off-by: Eric Dumazet <edumazet@google.com>
Fixes: 33cf7c90fe2f ("net: add real socket cookies")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h | 1 +
 net/dccp/ipv4.c         | 1 -
 net/dccp/ipv6.c         | 1 +
 net/ipv4/tcp_input.c    | 1 -
 net/ipv6/syncookies.c   | 1 +
 5 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index e565afdc14ad..30f7170abbf3 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -249,6 +249,7 @@ static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops
 	if (req != NULL) {
 		kmemcheck_annotate_bitfield(ireq, flags);
 		ireq->opt = NULL;
+		atomic64_set(&ireq->ir_cookie, 0);
 	}
 
 	return req;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index a78e0b999f96..f695874b5ade 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -642,7 +642,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	ireq->ir_loc_addr = ip_hdr(skb)->daddr;
 	ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
 	write_pnet(&ireq->ireq_net, sock_net(sk));
-	atomic64_set(&ireq->ir_cookie, 0);
 
 	/*
 	 * Step 3: Process LISTEN state
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 6bcaa33cd804..703a21acf434 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -403,6 +403,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	ireq = inet_rsk(req);
 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
+	write_pnet(&ireq->ireq_net, sock_net(sk));
 
 	if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) ||
 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 26f24995bd3d..da61a8e75f68 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5966,7 +5966,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
 	tcp_openreq_init(req, &tmp_opt, skb, sk);
 	write_pnet(&inet_rsk(req)->ireq_net, sock_net(sk));
-	atomic64_set(&inet_rsk(req)->ir_cookie, 0);
 
 	af_ops->init_req(req, sk, skb);
 
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 7337fc7947e2..66bba6a84e47 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -196,6 +196,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	ireq = inet_rsk(req);
 	treq = tcp_rsk(req);
 	treq->listener = NULL;
+	write_pnet(&ireq->ireq_net, sock_net(sk));
 
 	if (security_inet_conn_request(sk, skb, req))
 		goto out_free;
-- 
cgit v1.2.3


From 10feb428a5045d5eb18a5d755fbb8f0cc9645626 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 12 Mar 2015 16:44:04 -0700
Subject: inet: add TCP_NEW_SYN_RECV state

TCP_SYN_RECV state is currently used by fast open sockets.

Initial TCP requests (the pseudo sockets created when a SYN is received)
are not yet associated to a state. They are attached to their parent,
and the parent is in TCP_LISTEN state.

This commit adds TCP_NEW_SYN_RECV state, so that we can convert
TCP stack to a different schem gradually.

This state is not exported to user space.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp_states.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/tcp_states.h b/include/net/tcp_states.h
index b0b645988bd8..50e78a74d0df 100644
--- a/include/net/tcp_states.h
+++ b/include/net/tcp_states.h
@@ -25,6 +25,7 @@ enum {
 	TCP_LAST_ACK,
 	TCP_LISTEN,
 	TCP_CLOSING,	/* Now a valid state */
+	TCP_NEW_SYN_RECV,
 
 	TCP_MAX_STATES	/* Leave at the end! */
 };
@@ -44,7 +45,8 @@ enum {
 	TCPF_CLOSE_WAIT	 = (1 << 8),
 	TCPF_LAST_ACK	 = (1 << 9),
 	TCPF_LISTEN	 = (1 << 10),
-	TCPF_CLOSING	 = (1 << 11) 
+	TCPF_CLOSING	 = (1 << 11),
+	TCPF_NEW_SYN_RECV = (1 << 12),
 };
 
 #endif	/* _LINUX_TCP_STATES_H */
-- 
cgit v1.2.3


From d34ac51b76e8c7de6094cfb11780ef9c2b93469f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 12 Mar 2015 16:44:05 -0700
Subject: inet: add ireq_state field to inet_request_sock

We need to identify request sock when they'll be visible in
global ehash table.

ireq_state is an alias to req.__req_common.skc_state.

Its value is set to TCP_NEW_SYN_RECV

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 30f7170abbf3..b31f01de5cd6 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -27,6 +27,7 @@
 #include <net/sock.h>
 #include <net/request_sock.h>
 #include <net/netns/hash.h>
+#include <net/tcp_states.h>
 
 /** struct ip_options - IP Options
  *
@@ -79,6 +80,7 @@ struct inet_request_sock {
 #define ir_iif			req.__req_common.skc_bound_dev_if
 #define ir_cookie		req.__req_common.skc_cookie
 #define ireq_net		req.__req_common.skc_net
+#define ireq_state		req.__req_common.skc_state
 
 	kmemcheck_bitfield_begin(flags);
 	u16			snd_wscale : 4,
@@ -250,6 +252,7 @@ static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops
 		kmemcheck_annotate_bitfield(ireq, flags);
 		ireq->opt = NULL;
 		atomic64_set(&ireq->ir_cookie, 0);
+		ireq->ireq_state = TCP_NEW_SYN_RECV;
 	}
 
 	return req;
-- 
cgit v1.2.3


From 1e2e01172fd11b4dbfee746c0c8fbcaa9dbf22a0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 12 Mar 2015 16:44:06 -0700
Subject: inet: add rsk_refcnt/ireq_refcnt to request socks

When request socks will be in ehash, they'll need to be refcounted.

This patch adds rsk_refcnt/ireq_refcnt macros, and adds
reqsk_put() function, but nothing yet use them.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h    | 1 +
 include/net/request_sock.h | 8 ++++++++
 2 files changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index b31f01de5cd6..9d6470c16a27 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -81,6 +81,7 @@ struct inet_request_sock {
 #define ir_cookie		req.__req_common.skc_cookie
 #define ireq_net		req.__req_common.skc_net
 #define ireq_state		req.__req_common.skc_state
+#define ireq_refcnt		req.__req_common.skc_refcnt
 
 	kmemcheck_bitfield_begin(flags);
 	u16			snd_wscale : 4,
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 7f830ff67f08..e255ecf8bb40 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -49,6 +49,8 @@ int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req);
  */
 struct request_sock {
 	struct sock_common		__req_common;
+#define rsk_refcnt			__req_common.skc_refcnt
+
 	struct request_sock		*dl_next;
 	u16				mss;
 	u8				num_retrans; /* number of retransmits */
@@ -86,6 +88,12 @@ static inline void reqsk_free(struct request_sock *req)
 	__reqsk_free(req);
 }
 
+static inline void reqsk_put(struct request_sock *req)
+{
+	if (atomic_dec_and_test(&req->rsk_refcnt))
+		reqsk_free(req);
+}
+
 extern int sysctl_max_syn_backlog;
 
 /** struct listen_sock - listen state
-- 
cgit v1.2.3


From 41b822c59e21414d829bcfd00df0c8f7f13b1b95 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 12 Mar 2015 16:44:08 -0700
Subject: inet: prepare sock_edemux() & sock_gen_put() for new SYN_RECV state

sock_edemux() & sock_gen_put() should be ready to cope with request socks.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/request_sock.h | 5 +++++
 include/net/sock.h         | 2 +-
 net/core/sock.c            | 2 ++
 net/ipv4/inet_hashtables.c | 2 ++
 4 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index e255ecf8bb40..3275cf31f731 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -77,6 +77,11 @@ static inline struct request_sock *reqsk_alloc(const struct request_sock_ops *op
 	return req;
 }
 
+static inline struct request_sock *inet_reqsk(struct sock *sk)
+{
+	return (struct request_sock *)sk;
+}
+
 static inline void __reqsk_free(struct request_sock *req)
 {
 	kmem_cache_free(req->rsk_ops->slab, req);
diff --git a/include/net/sock.h b/include/net/sock.h
index 9411c3421dd3..f10832ca2e90 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1625,7 +1625,7 @@ static inline void sock_put(struct sock *sk)
 		sk_free(sk);
 }
 /* Generic version of sock_put(), dealing with all sockets
- * (TCP_TIMEWAIT, ESTABLISHED...)
+ * (TCP_TIMEWAIT, TCP_NEW_SYN_RECV, ESTABLISHED...)
  */
 void sock_gen_put(struct sock *sk);
 
diff --git a/net/core/sock.c b/net/core/sock.c
index 63d871a91b5c..4bc42efb3e40 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1668,6 +1668,8 @@ void sock_edemux(struct sk_buff *skb)
 
 	if (sk->sk_state == TCP_TIME_WAIT)
 		inet_twsk_put(inet_twsk(sk));
+	else if (sk->sk_state == TCP_NEW_SYN_RECV)
+		reqsk_put(inet_reqsk(sk));
 	else
 		sock_put(sk);
 }
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index f6a12b97d12b..64401a2fdd33 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -262,6 +262,8 @@ void sock_gen_put(struct sock *sk)
 
 	if (sk->sk_state == TCP_TIME_WAIT)
 		inet_twsk_free(inet_twsk(sk));
+	else if (sk->sk_state == TCP_NEW_SYN_RECV)
+		reqsk_free(inet_reqsk(sk));
 	else
 		sk_free(sk);
 }
-- 
cgit v1.2.3


From 3f66b083a5b7f1a63540c24df3679c24f2e935a9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 12 Mar 2015 16:44:10 -0700
Subject: inet: introduce ireq_family

Before inserting request socks into general hash table,
fill their socket family.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h | 1 +
 net/dccp/ipv4.c         | 1 +
 net/dccp/ipv6.c         | 1 +
 net/ipv4/inet_diag.c    | 2 +-
 net/ipv4/syncookies.c   | 1 +
 net/ipv4/tcp_ipv4.c     | 1 +
 net/ipv6/syncookies.c   | 1 +
 net/ipv6/tcp_ipv6.c     | 1 +
 8 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 9d6470c16a27..b3053fdd871e 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -82,6 +82,7 @@ struct inet_request_sock {
 #define ireq_net		req.__req_common.skc_net
 #define ireq_state		req.__req_common.skc_state
 #define ireq_refcnt		req.__req_common.skc_refcnt
+#define ireq_family		req.__req_common.skc_family
 
 	kmemcheck_bitfield_begin(flags);
 	u16			snd_wscale : 4,
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index f695874b5ade..8f6f4004daac 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -642,6 +642,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	ireq->ir_loc_addr = ip_hdr(skb)->daddr;
 	ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
 	write_pnet(&ireq->ireq_net, sock_net(sk));
+	ireq->ireq_family = AF_INET;
 
 	/*
 	 * Step 3: Process LISTEN state
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 703a21acf434..5166b0043f95 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -404,6 +404,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
 	write_pnet(&ireq->ireq_net, sock_net(sk));
+	ireq->ireq_family = AF_INET6;
 
 	if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) ||
 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index c55a6fa3162d..43789c99031f 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -728,7 +728,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
 		return -EMSGSIZE;
 
 	r = nlmsg_data(nlh);
-	r->idiag_family = sk->sk_family;
+	r->idiag_family = ireq->ireq_family;
 	r->idiag_state = TCP_SYN_RECV;
 	r->idiag_timer = 1;
 	r->idiag_retrans = req->num_retrans;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 18e5a67fda81..0c432730c7b4 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -347,6 +347,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 	treq->snt_synack	= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
 	treq->listener		= NULL;
 	write_pnet(&ireq->ireq_net, sock_net(sk));
+	ireq->ireq_family = AF_INET;
 
 	/* We throwed the options of the initial SYN away, so we hope
 	 * the ACK carries the same options again (see RFC1122 4.2.3.8)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 70b0f701bbdb..1f514a0c5e60 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1228,6 +1228,7 @@ static void tcp_v4_init_req(struct request_sock *req, struct sock *sk,
 	ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
 	ireq->no_srccheck = inet_sk(sk)->transparent;
 	ireq->opt = tcp_v4_save_options(skb);
+	ireq->ireq_family = AF_INET;
 }
 
 static struct dst_entry *tcp_v4_route_req(struct sock *sk, struct flowi *fl,
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 66bba6a84e47..58875ce8e178 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -197,6 +197,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	treq = tcp_rsk(req);
 	treq->listener = NULL;
 	write_pnet(&ireq->ireq_net, sock_net(sk));
+	ireq->ireq_family = AF_INET6;
 
 	if (security_inet_conn_request(sk, skb, req))
 		goto out_free;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 1ccfede7d55f..c5fc6a5e4adc 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -749,6 +749,7 @@ static void tcp_v6_init_req(struct request_sock *req, struct sock *sk,
 		atomic_inc(&skb->users);
 		ireq->pktopts = skb;
 	}
+	ireq->ireq_family = AF_INET6;
 }
 
 static struct dst_entry *tcp_v6_route_req(struct sock *sk, struct flowi *fl,
-- 
cgit v1.2.3


From a5b6846f9e1a080493210013385c28faecee36f0 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 12 Mar 2015 15:28:40 +0100
Subject: rhashtable: kill ht->shift atomic operations

Commit c0c09bfdc415 ("rhashtable: avoid unnecessary wakeup for worker
queue") changed ht->shift to be atomic, which is actually unnecessary.

Instead of leaving the current shift in the core rhashtable structure,
it can be cached inside the individual bucket tables.

There, it will only be initialized once during a new table allocation
in the shrink/expansion slow path, and from then onward it stays immutable
for the rest of the bucket table liftime.

That allows shift to be non-atomic. The patch also moves hash_rnd
management into the table setup. The rhashtable structure now consumes
3 instead of 4 cachelines.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Ying Xue <ying.xue@windriver.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h |  6 ++---
 lib/rhashtable.c           | 55 +++++++++++++++++++++-------------------------
 2 files changed, 28 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 5ef8ea551556..c93ff8ac474a 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -50,6 +50,7 @@ struct rhash_head {
  * struct bucket_table - Table of hash buckets
  * @size: Number of hash buckets
  * @hash_rnd: Random seed to fold into hash
+ * @shift: Current size (1 << shift)
  * @locks_mask: Mask to apply before accessing locks[]
  * @locks: Array of spinlocks protecting individual buckets
  * @buckets: size * hash buckets
@@ -57,6 +58,7 @@ struct rhash_head {
 struct bucket_table {
 	size_t			size;
 	u32			hash_rnd;
+	u32			shift;
 	unsigned int		locks_mask;
 	spinlock_t		*locks;
 
@@ -99,7 +101,6 @@ struct rhashtable_params {
  * @tbl: Bucket table
  * @future_tbl: Table under construction during expansion/shrinking
  * @nelems: Number of elements in table
- * @shift: Current size (1 << shift)
  * @p: Configuration parameters
  * @run_work: Deferred worker to expand/shrink asynchronously
  * @mutex: Mutex to protect current/future table swapping
@@ -110,12 +111,11 @@ struct rhashtable {
 	struct bucket_table __rcu	*tbl;
 	struct bucket_table __rcu       *future_tbl;
 	atomic_t			nelems;
-	atomic_t			shift;
+	bool                            being_destroyed;
 	struct rhashtable_params	p;
 	struct work_struct		run_work;
 	struct mutex                    mutex;
 	struct list_head		walkers;
-	bool                            being_destroyed;
 };
 
 /**
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 68210cc2bab8..adea791ea3ab 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -147,7 +147,7 @@ static void bucket_table_free(const struct bucket_table *tbl)
 }
 
 static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
-					       size_t nbuckets)
+					       size_t nbuckets, u32 hash_rnd)
 {
 	struct bucket_table *tbl = NULL;
 	size_t size;
@@ -162,6 +162,8 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 		return NULL;
 
 	tbl->size = nbuckets;
+	tbl->shift = ilog2(nbuckets);
+	tbl->hash_rnd = hash_rnd;
 
 	if (alloc_bucket_locks(ht, tbl) < 0) {
 		bucket_table_free(tbl);
@@ -177,25 +179,27 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 /**
  * rht_grow_above_75 - returns true if nelems > 0.75 * table-size
  * @ht:		hash table
- * @new_size:	new table size
+ * @tbl:	current table
  */
-static bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size)
+static bool rht_grow_above_75(const struct rhashtable *ht,
+			      const struct bucket_table *tbl)
 {
 	/* Expand table when exceeding 75% load */
-	return atomic_read(&ht->nelems) > (new_size / 4 * 3) &&
-	       (!ht->p.max_shift || atomic_read(&ht->shift) < ht->p.max_shift);
+	return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) &&
+	       (!ht->p.max_shift || tbl->shift < ht->p.max_shift);
 }
 
 /**
  * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size
  * @ht:		hash table
- * @new_size:	new table size
+ * @tbl:	current table
  */
-static bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size)
+static bool rht_shrink_below_30(const struct rhashtable *ht,
+				const struct bucket_table *tbl)
 {
 	/* Shrink table beneath 30% load */
-	return atomic_read(&ht->nelems) < (new_size * 3 / 10) &&
-	       (atomic_read(&ht->shift) > ht->p.min_shift);
+	return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) &&
+	       tbl->shift > ht->p.min_shift;
 }
 
 static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash)
@@ -310,16 +314,11 @@ int rhashtable_expand(struct rhashtable *ht)
 
 	ASSERT_RHT_MUTEX(ht);
 
-	new_tbl = bucket_table_alloc(ht, old_tbl->size * 2);
+	new_tbl = bucket_table_alloc(ht, old_tbl->size * 2, old_tbl->hash_rnd);
 	if (new_tbl == NULL)
 		return -ENOMEM;
 
-	new_tbl->hash_rnd = old_tbl->hash_rnd;
-
-	atomic_inc(&ht->shift);
-
 	rhashtable_rehash(ht, new_tbl);
-
 	return 0;
 }
 EXPORT_SYMBOL_GPL(rhashtable_expand);
@@ -342,20 +341,15 @@ EXPORT_SYMBOL_GPL(rhashtable_expand);
  */
 int rhashtable_shrink(struct rhashtable *ht)
 {
-	struct bucket_table *new_tbl, *tbl = rht_dereference(ht->tbl, ht);
+	struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
 
 	ASSERT_RHT_MUTEX(ht);
 
-	new_tbl = bucket_table_alloc(ht, tbl->size / 2);
+	new_tbl = bucket_table_alloc(ht, old_tbl->size / 2, old_tbl->hash_rnd);
 	if (new_tbl == NULL)
 		return -ENOMEM;
 
-	new_tbl->hash_rnd = tbl->hash_rnd;
-
-	atomic_dec(&ht->shift);
-
 	rhashtable_rehash(ht, new_tbl);
-
 	return 0;
 }
 EXPORT_SYMBOL_GPL(rhashtable_shrink);
@@ -376,9 +370,9 @@ static void rht_deferred_worker(struct work_struct *work)
 	list_for_each_entry(walker, &ht->walkers, list)
 		walker->resize = true;
 
-	if (rht_grow_above_75(ht, tbl->size))
+	if (rht_grow_above_75(ht, tbl))
 		rhashtable_expand(ht);
-	else if (rht_shrink_below_30(ht, tbl->size))
+	else if (rht_shrink_below_30(ht, tbl))
 		rhashtable_shrink(ht);
 unlock:
 	mutex_unlock(&ht->mutex);
@@ -431,7 +425,7 @@ static bool __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
 	rcu_assign_pointer(tbl->buckets[hash], obj);
 
 	atomic_inc(&ht->nelems);
-	if (no_resize_running && rht_grow_above_75(ht, tbl->size))
+	if (no_resize_running && rht_grow_above_75(ht, tbl))
 		schedule_work(&ht->run_work);
 
 exit:
@@ -539,7 +533,7 @@ bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj)
 		bool no_resize_running = tbl == old_tbl;
 
 		atomic_dec(&ht->nelems);
-		if (no_resize_running && rht_shrink_below_30(ht, tbl->size))
+		if (no_resize_running && rht_shrink_below_30(ht, tbl))
 			schedule_work(&ht->run_work);
 	}
 
@@ -913,6 +907,7 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
 {
 	struct bucket_table *tbl;
 	size_t size;
+	u32 hash_rnd;
 
 	size = HASH_DEFAULT_SIZE;
 
@@ -939,14 +934,14 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
 	else
 		ht->p.locks_mul = BUCKET_LOCKS_PER_CPU;
 
-	tbl = bucket_table_alloc(ht, size);
+	get_random_bytes(&hash_rnd, sizeof(hash_rnd));
+
+	tbl = bucket_table_alloc(ht, size, hash_rnd);
 	if (tbl == NULL)
 		return -ENOMEM;
 
-	get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
-
 	atomic_set(&ht->nelems, 0);
-	atomic_set(&ht->shift, ilog2(tbl->size));
+
 	RCU_INIT_POINTER(ht->tbl, tbl);
 	RCU_INIT_POINTER(ht->future_tbl, tbl);
 
-- 
cgit v1.2.3


From 098a697b497e3154a1a583c1d34c67568acaadcc Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 13 Mar 2015 00:07:44 -0500
Subject: tcp_metrics: Use a single hash table for all network namespaces.

Now that all of the operations are safe on a single hash table
accross network namespaces, allocate a single global hash table
and update the code to use it.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h |  2 --
 net/ipv4/tcp_metrics.c   | 66 ++++++++++++++++++++++--------------------------
 2 files changed, 30 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 8f3a1a1a5a94..614a49be68a9 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -54,8 +54,6 @@ struct netns_ipv4 {
 	struct sock		*mc_autojoin_sk;
 
 	struct inet_peer_base	*peers;
-	struct tcpm_hash_bucket	*tcp_metrics_hash;
-	unsigned int		tcp_metrics_hash_log;
 	struct sock  * __percpu	*tcp_sk;
 	struct netns_frags	frags;
 #ifdef CONFIG_NETFILTER
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index baccb070427d..366728cbee4a 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -97,6 +97,9 @@ struct tcpm_hash_bucket {
 	struct tcp_metrics_block __rcu	*chain;
 };
 
+static struct tcpm_hash_bucket	*tcp_metrics_hash __read_mostly;
+static unsigned int		tcp_metrics_hash_log __read_mostly;
+
 static DEFINE_SPINLOCK(tcp_metrics_lock);
 
 static void tcpm_suck_dst(struct tcp_metrics_block *tm,
@@ -177,7 +180,7 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
 	if (unlikely(reclaim)) {
 		struct tcp_metrics_block *oldest;
 
-		oldest = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain);
+		oldest = rcu_dereference(tcp_metrics_hash[hash].chain);
 		for (tm = rcu_dereference(oldest->tcpm_next); tm;
 		     tm = rcu_dereference(tm->tcpm_next)) {
 			if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp))
@@ -196,8 +199,8 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
 	tcpm_suck_dst(tm, dst, true);
 
 	if (likely(!reclaim)) {
-		tm->tcpm_next = net->ipv4.tcp_metrics_hash[hash].chain;
-		rcu_assign_pointer(net->ipv4.tcp_metrics_hash[hash].chain, tm);
+		tm->tcpm_next = tcp_metrics_hash[hash].chain;
+		rcu_assign_pointer(tcp_metrics_hash[hash].chain, tm);
 	}
 
 out_unlock:
@@ -221,7 +224,7 @@ static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *s
 	struct tcp_metrics_block *tm;
 	int depth = 0;
 
-	for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm;
+	for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm;
 	     tm = rcu_dereference(tm->tcpm_next)) {
 		if (addr_same(&tm->tcpm_saddr, saddr) &&
 		    addr_same(&tm->tcpm_daddr, daddr) &&
@@ -261,9 +264,9 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
 
 	net = dev_net(dst->dev);
 	hash ^= net_hash_mix(net);
-	hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log);
+	hash = hash_32(hash, tcp_metrics_hash_log);
 
-	for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm;
+	for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm;
 	     tm = rcu_dereference(tm->tcpm_next)) {
 		if (addr_same(&tm->tcpm_saddr, &saddr) &&
 		    addr_same(&tm->tcpm_daddr, &daddr) &&
@@ -310,9 +313,9 @@ static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock
 
 	net = twsk_net(tw);
 	hash ^= net_hash_mix(net);
-	hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log);
+	hash = hash_32(hash, tcp_metrics_hash_log);
 
-	for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm;
+	for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm;
 	     tm = rcu_dereference(tm->tcpm_next)) {
 		if (addr_same(&tm->tcpm_saddr, &saddr) &&
 		    addr_same(&tm->tcpm_daddr, &daddr) &&
@@ -360,7 +363,7 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk,
 
 	net = dev_net(dst->dev);
 	hash ^= net_hash_mix(net);
-	hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log);
+	hash = hash_32(hash, tcp_metrics_hash_log);
 
 	tm = __tcp_get_metrics(&saddr, &daddr, net, hash);
 	if (tm == TCP_METRICS_RECLAIM_PTR)
@@ -911,13 +914,13 @@ static int tcp_metrics_nl_dump(struct sk_buff *skb,
 			       struct netlink_callback *cb)
 {
 	struct net *net = sock_net(skb->sk);
-	unsigned int max_rows = 1U << net->ipv4.tcp_metrics_hash_log;
+	unsigned int max_rows = 1U << tcp_metrics_hash_log;
 	unsigned int row, s_row = cb->args[0];
 	int s_col = cb->args[1], col = s_col;
 
 	for (row = s_row; row < max_rows; row++, s_col = 0) {
 		struct tcp_metrics_block *tm;
-		struct tcpm_hash_bucket *hb = net->ipv4.tcp_metrics_hash + row;
+		struct tcpm_hash_bucket *hb = tcp_metrics_hash + row;
 
 		rcu_read_lock();
 		for (col = 0, tm = rcu_dereference(hb->chain); tm;
@@ -1010,10 +1013,10 @@ static int tcp_metrics_nl_cmd_get(struct sk_buff *skb, struct genl_info *info)
 		goto nla_put_failure;
 
 	hash ^= net_hash_mix(net);
-	hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log);
+	hash = hash_32(hash, tcp_metrics_hash_log);
 	ret = -ESRCH;
 	rcu_read_lock();
-	for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm;
+	for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm;
 	     tm = rcu_dereference(tm->tcpm_next)) {
 		if (addr_same(&tm->tcpm_daddr, &daddr) &&
 		    (!src || addr_same(&tm->tcpm_saddr, &saddr)) &&
@@ -1045,8 +1048,8 @@ out_free:
 
 static void tcp_metrics_flush_all(struct net *net)
 {
-	unsigned int max_rows = 1U << net->ipv4.tcp_metrics_hash_log;
-	struct tcpm_hash_bucket *hb = net->ipv4.tcp_metrics_hash;
+	unsigned int max_rows = 1U << tcp_metrics_hash_log;
+	struct tcpm_hash_bucket *hb = tcp_metrics_hash;
 	struct tcp_metrics_block *tm;
 	unsigned int row;
 
@@ -1090,8 +1093,8 @@ static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info)
 		src = false;
 
 	hash ^= net_hash_mix(net);
-	hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log);
-	hb = net->ipv4.tcp_metrics_hash + hash;
+	hash = hash_32(hash, tcp_metrics_hash_log);
+	hb = tcp_metrics_hash + hash;
 	pp = &hb->chain;
 	spin_lock_bh(&tcp_metrics_lock);
 	for (tm = deref_locked_genl(*pp); tm; tm = deref_locked_genl(*pp)) {
@@ -1147,6 +1150,9 @@ static int __net_init tcp_net_metrics_init(struct net *net)
 	size_t size;
 	unsigned int slots;
 
+	if (!net_eq(net, &init_net))
+		return 0;
+
 	slots = tcpmhash_entries;
 	if (!slots) {
 		if (totalram_pages >= 128 * 1024)
@@ -1155,14 +1161,14 @@ static int __net_init tcp_net_metrics_init(struct net *net)
 			slots = 8 * 1024;
 	}
 
-	net->ipv4.tcp_metrics_hash_log = order_base_2(slots);
-	size = sizeof(struct tcpm_hash_bucket) << net->ipv4.tcp_metrics_hash_log;
+	tcp_metrics_hash_log = order_base_2(slots);
+	size = sizeof(struct tcpm_hash_bucket) << tcp_metrics_hash_log;
 
-	net->ipv4.tcp_metrics_hash = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
-	if (!net->ipv4.tcp_metrics_hash)
-		net->ipv4.tcp_metrics_hash = vzalloc(size);
+	tcp_metrics_hash = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
+	if (!tcp_metrics_hash)
+		tcp_metrics_hash = vzalloc(size);
 
-	if (!net->ipv4.tcp_metrics_hash)
+	if (!tcp_metrics_hash)
 		return -ENOMEM;
 
 	return 0;
@@ -1170,19 +1176,7 @@ static int __net_init tcp_net_metrics_init(struct net *net)
 
 static void __net_exit tcp_net_metrics_exit(struct net *net)
 {
-	unsigned int i;
-
-	for (i = 0; i < (1U << net->ipv4.tcp_metrics_hash_log) ; i++) {
-		struct tcp_metrics_block *tm, *next;
-
-		tm = rcu_dereference_protected(net->ipv4.tcp_metrics_hash[i].chain, 1);
-		while (tm) {
-			next = rcu_dereference_protected(tm->tcpm_next, 1);
-			kfree(tm);
-			tm = next;
-		}
-	}
-	kvfree(net->ipv4.tcp_metrics_hash);
+	tcp_metrics_flush_all(net);
 }
 
 static __net_initdata struct pernet_operations tcp_net_metrics_ops = {
-- 
cgit v1.2.3


From cc91cb042ce5dab66fedeb94260b18f193ed33d9 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Thu, 12 Mar 2015 22:30:58 -0700
Subject: Bluetooth: Add support connectable advertising setting

The patch adds a second advertising setting that allows switching of the
controller into connectable mode independent of the global connectable
setting.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci.h |  1 +
 net/bluetooth/mgmt.c        | 39 ++++++++++++++++++++++++++++++---------
 2 files changed, 31 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index f76f45ae76c3..0f3413b285a5 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -217,6 +217,7 @@ enum {
 	HCI_HS_ENABLED,
 	HCI_LE_ENABLED,
 	HCI_ADVERTISING,
+	HCI_ADVERTISING_CONNECTABLE,
 	HCI_CONNECTABLE,
 	HCI_DISCOVERABLE,
 	HCI_LIMITED_DISCOVERABLE,
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 025f29bf1f1a..0761f2e0fefa 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -1110,7 +1110,10 @@ static void enable_advertising(struct hci_request *req)
 	 */
 	clear_bit(HCI_LE_ADV, &hdev->dev_flags);
 
-	connectable = get_connectable(hdev);
+	if (test_bit(HCI_ADVERTISING_CONNECTABLE, &hdev->dev_flags))
+		connectable = true;
+	else
+		connectable = get_connectable(hdev);
 
 	/* Set require_privacy to true only when non-connectable
 	 * advertising is used. In that case it is fine to use a
@@ -4430,7 +4433,7 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data,
 	struct mgmt_mode *cp = data;
 	struct mgmt_pending_cmd *cmd;
 	struct hci_request req;
-	u8 val, enabled, status;
+	u8 val, status;
 	int err;
 
 	BT_DBG("request for %s", hdev->name);
@@ -4440,29 +4443,42 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data,
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING,
 				       status);
 
-	if (cp->val != 0x00 && cp->val != 0x01)
+	if (cp->val != 0x00 && cp->val != 0x01 && cp->val != 0x02)
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING,
 				       MGMT_STATUS_INVALID_PARAMS);
 
 	hci_dev_lock(hdev);
 
 	val = !!cp->val;
-	enabled = test_bit(HCI_ADVERTISING, &hdev->dev_flags);
 
 	/* The following conditions are ones which mean that we should
 	 * not do any HCI communication but directly send a mgmt
 	 * response to user space (after toggling the flag if
 	 * necessary).
 	 */
-	if (!hdev_is_powered(hdev) || val == enabled ||
+	if (!hdev_is_powered(hdev) ||
+	    (val == test_bit(HCI_ADVERTISING, &hdev->dev_flags) &&
+	     (cp->val == 0x02) == test_bit(HCI_ADVERTISING_CONNECTABLE,
+					   &hdev->dev_flags)) ||
 	    hci_conn_num(hdev, LE_LINK) > 0 ||
 	    (test_bit(HCI_LE_SCAN, &hdev->dev_flags) &&
 	     hdev->le_scan_type == LE_SCAN_ACTIVE)) {
-		bool changed = false;
+		bool changed;
 
-		if (val != test_bit(HCI_ADVERTISING, &hdev->dev_flags)) {
-			change_bit(HCI_ADVERTISING, &hdev->dev_flags);
-			changed = true;
+		if (cp->val) {
+			changed = !test_and_set_bit(HCI_ADVERTISING,
+						    &hdev->dev_flags);
+			if (cp->val == 0x02)
+				set_bit(HCI_ADVERTISING_CONNECTABLE,
+					&hdev->dev_flags);
+			else
+				clear_bit(HCI_ADVERTISING_CONNECTABLE,
+					  &hdev->dev_flags);
+		} else {
+			changed = test_and_clear_bit(HCI_ADVERTISING,
+						     &hdev->dev_flags);
+			clear_bit(HCI_ADVERTISING_CONNECTABLE,
+				  &hdev->dev_flags);
 		}
 
 		err = send_settings_rsp(sk, MGMT_OP_SET_ADVERTISING, hdev);
@@ -4490,6 +4506,11 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data,
 
 	hci_req_init(&req, hdev);
 
+	if (cp->val == 0x02)
+		set_bit(HCI_ADVERTISING_CONNECTABLE, &hdev->dev_flags);
+	else
+		clear_bit(HCI_ADVERTISING_CONNECTABLE, &hdev->dev_flags);
+
 	if (val)
 		enable_advertising(&req);
 	else
-- 
cgit v1.2.3


From d7a5a11d7fa80beb43d5f7cb421c86f9b4d21200 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Fri, 13 Mar 2015 02:11:00 -0700
Subject: Bluetooth: Introduce hci_dev_test_flag helper macro

Instead of manually coding test_bit on hdev->dev_flags all the time,
use hci_dev_test_flag helper macro.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h |  14 +--
 net/bluetooth/hci_conn.c         |  14 +--
 net/bluetooth/hci_core.c         | 120 ++++++++++-----------
 net/bluetooth/hci_debugfs.c      |   4 +-
 net/bluetooth/hci_event.c        |  92 ++++++++--------
 net/bluetooth/hci_request.c      |  28 ++---
 net/bluetooth/hci_sock.c         |   8 +-
 net/bluetooth/l2cap_core.c       |   8 +-
 net/bluetooth/mgmt.c             | 224 +++++++++++++++++++--------------------
 net/bluetooth/smp.c              |  44 ++++----
 10 files changed, 278 insertions(+), 278 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 5cc5a192359d..05e95a75aba5 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -502,6 +502,8 @@ extern struct list_head hci_cb_list;
 extern rwlock_t hci_dev_list_lock;
 extern struct mutex hci_cb_list_lock;
 
+#define hci_dev_test_flag(hdev, nr)   test_bit((nr), &(hdev)->dev_flags)
+
 /* ----- HCI interface to upper protocols ----- */
 int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr);
 int l2cap_disconn_ind(struct hci_conn *hcon);
@@ -598,14 +600,14 @@ enum {
 static inline bool hci_conn_ssp_enabled(struct hci_conn *conn)
 {
 	struct hci_dev *hdev = conn->hdev;
-	return test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) &&
+	return hci_dev_test_flag(hdev, HCI_SSP_ENABLED) &&
 	       test_bit(HCI_CONN_SSP_ENABLED, &conn->flags);
 }
 
 static inline bool hci_conn_sc_enabled(struct hci_conn *conn)
 {
 	struct hci_dev *hdev = conn->hdev;
-	return test_bit(HCI_SC_ENABLED, &hdev->dev_flags) &&
+	return hci_dev_test_flag(hdev, HCI_SC_ENABLED) &&
 	       test_bit(HCI_CONN_SC_ENABLED, &conn->flags);
 }
 
@@ -1025,10 +1027,10 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
 #define lmp_host_le_capable(dev)   (!!((dev)->features[1][0] & LMP_HOST_LE))
 #define lmp_host_le_br_capable(dev) (!!((dev)->features[1][0] & LMP_HOST_LE_BREDR))
 
-#define hdev_is_powered(hdev) (test_bit(HCI_UP, &hdev->flags) && \
-				!test_bit(HCI_AUTO_OFF, &hdev->dev_flags))
-#define bredr_sc_enabled(dev) (lmp_sc_capable(dev) && \
-			       test_bit(HCI_SC_ENABLED, &(dev)->dev_flags))
+#define hdev_is_powered(dev)   (test_bit(HCI_UP, &(dev)->flags) && \
+				!hci_dev_test_flag(dev, HCI_AUTO_OFF))
+#define bredr_sc_enabled(dev)  (lmp_sc_capable(dev) && \
+				hci_dev_test_flag(dev, HCI_SC_ENABLED))
 
 /* ----- HCI protocols ----- */
 #define HCI_PROTO_DEFER             0x01
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 91ebb9cb31de..5444e194eb4e 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -571,7 +571,7 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src)
 
 	list_for_each_entry(d, &hci_dev_list, list) {
 		if (!test_bit(HCI_UP, &d->flags) ||
-		    test_bit(HCI_USER_CHANNEL, &d->dev_flags) ||
+		    hci_dev_test_flag(d, HCI_USER_CHANNEL) ||
 		    d->dev_type != HCI_BREDR)
 			continue;
 
@@ -734,7 +734,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
 	int err;
 
 	/* Let's make sure that le is enabled.*/
-	if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) {
+	if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) {
 		if (lmp_le_capable(hdev))
 			return ERR_PTR(-ECONNREFUSED);
 
@@ -799,7 +799,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
 	 * anyway have to disable it in order to start directed
 	 * advertising.
 	 */
-	if (test_bit(HCI_LE_ADV, &hdev->dev_flags)) {
+	if (hci_dev_test_flag(hdev, HCI_LE_ADV)) {
 		u8 enable = 0x00;
 		hci_req_add(&req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable),
 			    &enable);
@@ -810,7 +810,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
 		/* If we're active scanning most controllers are unable
 		 * to initiate advertising. Simply reject the attempt.
 		 */
-		if (test_bit(HCI_LE_SCAN, &hdev->dev_flags) &&
+		if (hci_dev_test_flag(hdev, HCI_LE_SCAN) &&
 		    hdev->le_scan_type == LE_SCAN_ACTIVE) {
 			skb_queue_purge(&req.cmd_q);
 			hci_conn_del(conn);
@@ -840,7 +840,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
 	 * handler for scan disabling knows to set the correct discovery
 	 * state.
 	 */
-	if (test_bit(HCI_LE_SCAN, &hdev->dev_flags)) {
+	if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) {
 		hci_req_add_le_scan_disable(&req);
 		set_bit(HCI_LE_SCAN_INTERRUPTED, &hdev->dev_flags);
 	}
@@ -864,7 +864,7 @@ struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
 {
 	struct hci_conn *acl;
 
-	if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) {
+	if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) {
 		if (lmp_bredr_capable(hdev))
 			return ERR_PTR(-ECONNREFUSED);
 
@@ -942,7 +942,7 @@ int hci_conn_check_link_mode(struct hci_conn *conn)
 	 * Connections is used and the link is encrypted with AES-CCM
 	 * using a P-256 authenticated combination key.
 	 */
-	if (test_bit(HCI_SC_ONLY, &conn->hdev->flags)) {
+	if (hci_dev_test_flag(conn->hdev, HCI_SC_ONLY)) {
 		if (!hci_conn_sc_enabled(conn) ||
 		    !test_bit(HCI_CONN_AES_CCM, &conn->flags) ||
 		    conn->key_type != HCI_LK_AUTH_COMBINATION_P256)
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index e3bbdd537b90..20fe5ef6abc5 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -617,7 +617,7 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt)
 		 */
 		hdev->max_page = 0x01;
 
-		if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) {
+		if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) {
 			u8 mode = 0x01;
 
 			hci_req_add(req, HCI_OP_WRITE_SSP_MODE,
@@ -656,7 +656,7 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt)
 			    sizeof(cp), &cp);
 	}
 
-	if (test_bit(HCI_LINK_SECURITY, &hdev->dev_flags)) {
+	if (hci_dev_test_flag(hdev, HCI_LINK_SECURITY)) {
 		u8 enable = 1;
 		hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, sizeof(enable),
 			    &enable);
@@ -693,7 +693,7 @@ static void hci_set_le_support(struct hci_request *req)
 
 	memset(&cp, 0, sizeof(cp));
 
-	if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) {
+	if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) {
 		cp.le = 0x01;
 		cp.simul = 0x00;
 	}
@@ -881,7 +881,7 @@ static void hci_init4_req(struct hci_request *req, unsigned long opt)
 		hci_req_add(req, HCI_OP_READ_SYNC_TRAIN_PARAMS, 0, NULL);
 
 	/* Enable Secure Connections if supported and configured */
-	if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) &&
+	if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED) &&
 	    bredr_sc_enabled(hdev)) {
 		u8 support = 0x01;
 
@@ -901,7 +901,7 @@ static int __hci_init(struct hci_dev *hdev)
 	/* The Device Under Test (DUT) mode is special and available for
 	 * all controller types. So just create it early on.
 	 */
-	if (test_bit(HCI_SETUP, &hdev->dev_flags)) {
+	if (hci_dev_test_flag(hdev, HCI_SETUP)) {
 		debugfs_create_file("dut_mode", 0644, hdev->debugfs, hdev,
 				    &dut_mode_fops);
 	}
@@ -937,8 +937,8 @@ static int __hci_init(struct hci_dev *hdev)
 	 * So only when in setup phase or config phase, create the debugfs
 	 * entries and register the SMP channels.
 	 */
-	if (!test_bit(HCI_SETUP, &hdev->dev_flags) &&
-	    !test_bit(HCI_CONFIG, &hdev->dev_flags))
+	if (!hci_dev_test_flag(hdev, HCI_SETUP) &&
+	    !hci_dev_test_flag(hdev, HCI_CONFIG))
 		return 0;
 
 	hci_debugfs_create_common(hdev);
@@ -1300,12 +1300,12 @@ int hci_inquiry(void __user *arg)
 	if (!hdev)
 		return -ENODEV;
 
-	if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) {
+	if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) {
 		err = -EBUSY;
 		goto done;
 	}
 
-	if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) {
+	if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
 		err = -EOPNOTSUPP;
 		goto done;
 	}
@@ -1315,7 +1315,7 @@ int hci_inquiry(void __user *arg)
 		goto done;
 	}
 
-	if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) {
+	if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) {
 		err = -EOPNOTSUPP;
 		goto done;
 	}
@@ -1387,17 +1387,17 @@ static int hci_dev_do_open(struct hci_dev *hdev)
 
 	hci_req_lock(hdev);
 
-	if (test_bit(HCI_UNREGISTER, &hdev->dev_flags)) {
+	if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) {
 		ret = -ENODEV;
 		goto done;
 	}
 
-	if (!test_bit(HCI_SETUP, &hdev->dev_flags) &&
-	    !test_bit(HCI_CONFIG, &hdev->dev_flags)) {
+	if (!hci_dev_test_flag(hdev, HCI_SETUP) &&
+	    !hci_dev_test_flag(hdev, HCI_CONFIG)) {
 		/* Check for rfkill but allow the HCI setup stage to
 		 * proceed (which in itself doesn't cause any RF activity).
 		 */
-		if (test_bit(HCI_RFKILLED, &hdev->dev_flags)) {
+		if (hci_dev_test_flag(hdev, HCI_RFKILLED)) {
 			ret = -ERFKILL;
 			goto done;
 		}
@@ -1414,7 +1414,7 @@ static int hci_dev_do_open(struct hci_dev *hdev)
 		 * This check is only valid for BR/EDR controllers
 		 * since AMP controllers do not have an address.
 		 */
-		if (!test_bit(HCI_USER_CHANNEL, &hdev->dev_flags) &&
+		if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
 		    hdev->dev_type == HCI_BREDR &&
 		    !bacmp(&hdev->bdaddr, BDADDR_ANY) &&
 		    !bacmp(&hdev->static_addr, BDADDR_ANY)) {
@@ -1436,7 +1436,7 @@ static int hci_dev_do_open(struct hci_dev *hdev)
 	atomic_set(&hdev->cmd_cnt, 1);
 	set_bit(HCI_INIT, &hdev->flags);
 
-	if (test_bit(HCI_SETUP, &hdev->dev_flags)) {
+	if (hci_dev_test_flag(hdev, HCI_SETUP)) {
 		if (hdev->setup)
 			ret = hdev->setup(hdev);
 
@@ -1458,11 +1458,11 @@ static int hci_dev_do_open(struct hci_dev *hdev)
 		 * also the original Bluetooth public device address
 		 * will be read using the Read BD Address command.
 		 */
-		if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags))
+		if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED))
 			ret = __hci_unconf_init(hdev);
 	}
 
-	if (test_bit(HCI_CONFIG, &hdev->dev_flags)) {
+	if (hci_dev_test_flag(hdev, HCI_CONFIG)) {
 		/* If public address change is configured, ensure that
 		 * the address gets programmed. If the driver does not
 		 * support changing the public address, fail the power
@@ -1476,8 +1476,8 @@ static int hci_dev_do_open(struct hci_dev *hdev)
 	}
 
 	if (!ret) {
-		if (!test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) &&
-		    !test_bit(HCI_USER_CHANNEL, &hdev->dev_flags))
+		if (!hci_dev_test_flag(hdev, HCI_UNCONFIGURED) &&
+		    !hci_dev_test_flag(hdev, HCI_USER_CHANNEL))
 			ret = __hci_init(hdev);
 	}
 
@@ -1488,10 +1488,10 @@ static int hci_dev_do_open(struct hci_dev *hdev)
 		set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags);
 		set_bit(HCI_UP, &hdev->flags);
 		hci_notify(hdev, HCI_DEV_UP);
-		if (!test_bit(HCI_SETUP, &hdev->dev_flags) &&
-		    !test_bit(HCI_CONFIG, &hdev->dev_flags) &&
-		    !test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) &&
-		    !test_bit(HCI_USER_CHANNEL, &hdev->dev_flags) &&
+		if (!hci_dev_test_flag(hdev, HCI_SETUP) &&
+		    !hci_dev_test_flag(hdev, HCI_CONFIG) &&
+		    !hci_dev_test_flag(hdev, HCI_UNCONFIGURED) &&
+		    !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
 		    hdev->dev_type == HCI_BREDR) {
 			hci_dev_lock(hdev);
 			mgmt_powered(hdev, 1);
@@ -1543,8 +1543,8 @@ int hci_dev_open(__u16 dev)
 	 * HCI_USER_CHANNEL will be set first before attempting to
 	 * open the device.
 	 */
-	if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) &&
-	    !test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) {
+	if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED) &&
+	    !hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) {
 		err = -EOPNOTSUPP;
 		goto done;
 	}
@@ -1569,8 +1569,8 @@ int hci_dev_open(__u16 dev)
 	 * is in use this bit will be cleared again and userspace has
 	 * to explicitly enable it.
 	 */
-	if (!test_bit(HCI_USER_CHANNEL, &hdev->dev_flags) &&
-	    !test_bit(HCI_MGMT, &hdev->dev_flags))
+	if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
+	    !hci_dev_test_flag(hdev, HCI_MGMT))
 		set_bit(HCI_BONDABLE, &hdev->dev_flags);
 
 	err = hci_dev_do_open(hdev);
@@ -1601,7 +1601,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 {
 	BT_DBG("%s %p", hdev->name, hdev);
 
-	if (!test_bit(HCI_UNREGISTER, &hdev->dev_flags)) {
+	if (!hci_dev_test_flag(hdev, HCI_UNREGISTER)) {
 		/* Execute vendor specific shutdown routine */
 		if (hdev->shutdown)
 			hdev->shutdown(hdev);
@@ -1635,7 +1635,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 	cancel_delayed_work_sync(&hdev->le_scan_disable);
 	cancel_delayed_work_sync(&hdev->le_scan_restart);
 
-	if (test_bit(HCI_MGMT, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_MGMT))
 		cancel_delayed_work_sync(&hdev->rpa_expired);
 
 	/* Avoid potential lockdep warnings from the *_flush() calls by
@@ -1667,8 +1667,8 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 	/* Reset device */
 	skb_queue_purge(&hdev->cmd_q);
 	atomic_set(&hdev->cmd_cnt, 1);
-	if (!test_bit(HCI_AUTO_OFF, &hdev->dev_flags) &&
-	    !test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) &&
+	if (!hci_dev_test_flag(hdev, HCI_AUTO_OFF) &&
+	    !hci_dev_test_flag(hdev, HCI_UNCONFIGURED) &&
 	    test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) {
 		set_bit(HCI_INIT, &hdev->flags);
 		__hci_req_sync(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT);
@@ -1723,7 +1723,7 @@ int hci_dev_close(__u16 dev)
 	if (!hdev)
 		return -ENODEV;
 
-	if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) {
+	if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) {
 		err = -EBUSY;
 		goto done;
 	}
@@ -1786,12 +1786,12 @@ int hci_dev_reset(__u16 dev)
 		goto done;
 	}
 
-	if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) {
+	if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) {
 		err = -EBUSY;
 		goto done;
 	}
 
-	if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) {
+	if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
 		err = -EOPNOTSUPP;
 		goto done;
 	}
@@ -1812,12 +1812,12 @@ int hci_dev_reset_stat(__u16 dev)
 	if (!hdev)
 		return -ENODEV;
 
-	if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) {
+	if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) {
 		ret = -EBUSY;
 		goto done;
 	}
 
-	if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) {
+	if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
 		ret = -EOPNOTSUPP;
 		goto done;
 	}
@@ -1851,14 +1851,14 @@ static void hci_update_scan_state(struct hci_dev *hdev, u8 scan)
 						    &hdev->dev_flags);
 	}
 
-	if (!test_bit(HCI_MGMT, &hdev->dev_flags))
+	if (!hci_dev_test_flag(hdev, HCI_MGMT))
 		return;
 
 	if (conn_changed || discov_changed) {
 		/* In case this was disabled through mgmt */
 		set_bit(HCI_BREDR_ENABLED, &hdev->dev_flags);
 
-		if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags))
+		if (hci_dev_test_flag(hdev, HCI_LE_ENABLED))
 			mgmt_update_adv_data(hdev);
 
 		mgmt_new_settings(hdev);
@@ -1878,12 +1878,12 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
 	if (!hdev)
 		return -ENODEV;
 
-	if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) {
+	if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) {
 		err = -EBUSY;
 		goto done;
 	}
 
-	if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) {
+	if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
 		err = -EOPNOTSUPP;
 		goto done;
 	}
@@ -1893,7 +1893,7 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
 		goto done;
 	}
 
-	if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) {
+	if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) {
 		err = -EOPNOTSUPP;
 		goto done;
 	}
@@ -1997,7 +1997,7 @@ int hci_get_dev_list(void __user *arg)
 		 * is running, but in that case still indicate that the
 		 * device is actually down.
 		 */
-		if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags))
+		if (hci_dev_test_flag(hdev, HCI_AUTO_OFF))
 			flags &= ~BIT(HCI_UP);
 
 		(dr + n)->dev_id  = hdev->id;
@@ -2035,7 +2035,7 @@ int hci_get_dev_info(void __user *arg)
 	 * is running, but in that case still indicate that the
 	 * device is actually down.
 	 */
-	if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_AUTO_OFF))
 		flags = hdev->flags & ~BIT(HCI_UP);
 	else
 		flags = hdev->flags;
@@ -2078,13 +2078,13 @@ static int hci_rfkill_set_block(void *data, bool blocked)
 
 	BT_DBG("%p name %s blocked %d", hdev, hdev->name, blocked);
 
-	if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL))
 		return -EBUSY;
 
 	if (blocked) {
 		set_bit(HCI_RFKILLED, &hdev->dev_flags);
-		if (!test_bit(HCI_SETUP, &hdev->dev_flags) &&
-		    !test_bit(HCI_CONFIG, &hdev->dev_flags))
+		if (!hci_dev_test_flag(hdev, HCI_SETUP) &&
+		    !hci_dev_test_flag(hdev, HCI_CONFIG))
 			hci_dev_do_close(hdev);
 	} else {
 		clear_bit(HCI_RFKILLED, &hdev->dev_flags);
@@ -2116,14 +2116,14 @@ static void hci_power_on(struct work_struct *work)
 	 * ignored and they need to be checked now. If they are still
 	 * valid, it is important to turn the device back off.
 	 */
-	if (test_bit(HCI_RFKILLED, &hdev->dev_flags) ||
-	    test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) ||
+	if (hci_dev_test_flag(hdev, HCI_RFKILLED) ||
+	    hci_dev_test_flag(hdev, HCI_UNCONFIGURED) ||
 	    (hdev->dev_type == HCI_BREDR &&
 	     !bacmp(&hdev->bdaddr, BDADDR_ANY) &&
 	     !bacmp(&hdev->static_addr, BDADDR_ANY))) {
 		clear_bit(HCI_AUTO_OFF, &hdev->dev_flags);
 		hci_dev_do_close(hdev);
-	} else if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) {
+	} else if (hci_dev_test_flag(hdev, HCI_AUTO_OFF)) {
 		queue_delayed_work(hdev->req_workqueue, &hdev->power_off,
 				   HCI_AUTO_OFF_TIMEOUT);
 	}
@@ -2132,7 +2132,7 @@ static void hci_power_on(struct work_struct *work)
 		/* For unconfigured devices, set the HCI_RAW flag
 		 * so that userspace can easily identify them.
 		 */
-		if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags))
+		if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED))
 			set_bit(HCI_RAW, &hdev->flags);
 
 		/* For fully configured devices, this will send
@@ -2147,7 +2147,7 @@ static void hci_power_on(struct work_struct *work)
 		/* When the controller is now configured, then it
 		 * is important to clear the HCI_RAW flag.
 		 */
-		if (!test_bit(HCI_UNCONFIGURED, &hdev->dev_flags))
+		if (!hci_dev_test_flag(hdev, HCI_UNCONFIGURED))
 			clear_bit(HCI_RAW, &hdev->flags);
 
 		/* Powering on the controller with HCI_CONFIG set only
@@ -2986,7 +2986,7 @@ static void le_scan_restart_work(struct work_struct *work)
 	BT_DBG("%s", hdev->name);
 
 	/* If controller is not scanning we are done. */
-	if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags))
+	if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
 		return;
 
 	hci_req_init(&req, hdev);
@@ -3021,7 +3021,7 @@ void hci_copy_identity_address(struct hci_dev *hdev, bdaddr_t *bdaddr,
 {
 	if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) ||
 	    !bacmp(&hdev->bdaddr, BDADDR_ANY) ||
-	    (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) &&
+	    (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) &&
 	     bacmp(&hdev->static_addr, BDADDR_ANY))) {
 		bacpy(bdaddr, &hdev->static_addr);
 		*bdaddr_type = ADDR_LE_DEV_RANDOM;
@@ -3251,8 +3251,8 @@ void hci_unregister_dev(struct hci_dev *hdev)
 	cancel_work_sync(&hdev->power_on);
 
 	if (!test_bit(HCI_INIT, &hdev->flags) &&
-	    !test_bit(HCI_SETUP, &hdev->dev_flags) &&
-	    !test_bit(HCI_CONFIG, &hdev->dev_flags)) {
+	    !hci_dev_test_flag(hdev, HCI_SETUP) &&
+	    !hci_dev_test_flag(hdev, HCI_CONFIG)) {
 		hci_dev_lock(hdev);
 		mgmt_index_removed(hdev);
 		hci_dev_unlock(hdev);
@@ -3926,7 +3926,7 @@ static inline int __get_blocks(struct hci_dev *hdev, struct sk_buff *skb)
 
 static void __check_timeout(struct hci_dev *hdev, unsigned int cnt)
 {
-	if (!test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) {
+	if (!hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
 		/* ACL tx timeout must be longer than maximum
 		 * link supervision timeout (40.9 seconds) */
 		if (!cnt && time_after(jiffies, hdev->acl_last_tx +
@@ -4109,7 +4109,7 @@ static void hci_sched_le(struct hci_dev *hdev)
 	if (!hci_conn_num(hdev, LE_LINK))
 		return;
 
-	if (!test_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) {
+	if (!hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
 		/* LE tx timeout must be longer than maximum
 		 * link supervision timeout (40.9 seconds) */
 		if (!hdev->le_cnt && hdev->le_pkts &&
@@ -4157,7 +4157,7 @@ static void hci_tx_work(struct work_struct *work)
 	BT_DBG("%s acl %d sco %d le %d", hdev->name, hdev->acl_cnt,
 	       hdev->sco_cnt, hdev->le_cnt);
 
-	if (!test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) {
+	if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) {
 		/* Schedule queues and send stuff to HCI driver */
 		hci_sched_acl(hdev);
 		hci_sched_sco(hdev);
@@ -4354,7 +4354,7 @@ static void hci_rx_work(struct work_struct *work)
 			hci_send_to_sock(hdev, skb);
 		}
 
-		if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) {
+		if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) {
 			kfree_skb(skb);
 			continue;
 		}
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index 65261e5d4b84..3c025ee5572c 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -247,7 +247,7 @@ static ssize_t use_debug_keys_read(struct file *file, char __user *user_buf,
 	struct hci_dev *hdev = file->private_data;
 	char buf[3];
 
-	buf[0] = test_bit(HCI_USE_DEBUG_KEYS, &hdev->dev_flags) ? 'Y': 'N';
+	buf[0] = hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS) ? 'Y': 'N';
 	buf[1] = '\n';
 	buf[2] = '\0';
 	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
@@ -265,7 +265,7 @@ static ssize_t sc_only_mode_read(struct file *file, char __user *user_buf,
 	struct hci_dev *hdev = file->private_data;
 	char buf[3];
 
-	buf[0] = test_bit(HCI_SC_ONLY, &hdev->dev_flags) ? 'Y': 'N';
+	buf[0] = hci_dev_test_flag(hdev, HCI_SC_ONLY) ? 'Y': 'N';
 	buf[1] = '\n';
 	buf[2] = '\0';
 	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 39653d46932b..f1ed3fe9e0df 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -265,7 +265,7 @@ static void hci_cc_write_local_name(struct hci_dev *hdev, struct sk_buff *skb)
 
 	hci_dev_lock(hdev);
 
-	if (test_bit(HCI_MGMT, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_MGMT))
 		mgmt_set_local_name_complete(hdev, sent, status);
 	else if (!status)
 		memcpy(hdev->dev_name, sent, HCI_MAX_NAME_LENGTH);
@@ -282,8 +282,8 @@ static void hci_cc_read_local_name(struct hci_dev *hdev, struct sk_buff *skb)
 	if (rp->status)
 		return;
 
-	if (test_bit(HCI_SETUP, &hdev->dev_flags) ||
-	    test_bit(HCI_CONFIG, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_SETUP) ||
+	    hci_dev_test_flag(hdev, HCI_CONFIG))
 		memcpy(hdev->dev_name, rp->name, HCI_MAX_NAME_LENGTH);
 }
 
@@ -309,7 +309,7 @@ static void hci_cc_write_auth_enable(struct hci_dev *hdev, struct sk_buff *skb)
 			clear_bit(HCI_AUTH, &hdev->flags);
 	}
 
-	if (test_bit(HCI_MGMT, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_MGMT))
 		mgmt_auth_enable_complete(hdev, status);
 
 	hci_dev_unlock(hdev);
@@ -404,7 +404,7 @@ static void hci_cc_write_class_of_dev(struct hci_dev *hdev, struct sk_buff *skb)
 	if (status == 0)
 		memcpy(hdev->dev_class, sent, 3);
 
-	if (test_bit(HCI_MGMT, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_MGMT))
 		mgmt_set_class_of_dev_complete(hdev, sent, status);
 
 	hci_dev_unlock(hdev);
@@ -497,7 +497,7 @@ static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb)
 			hdev->features[1][0] &= ~LMP_HOST_SSP;
 	}
 
-	if (test_bit(HCI_MGMT, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_MGMT))
 		mgmt_ssp_enable_complete(hdev, sent->mode, status);
 	else if (!status) {
 		if (sent->mode)
@@ -529,7 +529,7 @@ static void hci_cc_write_sc_support(struct hci_dev *hdev, struct sk_buff *skb)
 			hdev->features[1][0] &= ~LMP_HOST_SC;
 	}
 
-	if (!test_bit(HCI_MGMT, &hdev->dev_flags) && !status) {
+	if (!hci_dev_test_flag(hdev, HCI_MGMT) && !status) {
 		if (sent->support)
 			set_bit(HCI_SC_ENABLED, &hdev->dev_flags);
 		else
@@ -548,8 +548,8 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)
 	if (rp->status)
 		return;
 
-	if (test_bit(HCI_SETUP, &hdev->dev_flags) ||
-	    test_bit(HCI_CONFIG, &hdev->dev_flags)) {
+	if (hci_dev_test_flag(hdev, HCI_SETUP) ||
+	    hci_dev_test_flag(hdev, HCI_CONFIG)) {
 		hdev->hci_ver = rp->hci_ver;
 		hdev->hci_rev = __le16_to_cpu(rp->hci_rev);
 		hdev->lmp_ver = rp->lmp_ver;
@@ -568,8 +568,8 @@ static void hci_cc_read_local_commands(struct hci_dev *hdev,
 	if (rp->status)
 		return;
 
-	if (test_bit(HCI_SETUP, &hdev->dev_flags) ||
-	    test_bit(HCI_CONFIG, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_SETUP) ||
+	    hci_dev_test_flag(hdev, HCI_CONFIG))
 		memcpy(hdev->commands, rp->commands, sizeof(hdev->commands));
 }
 
@@ -691,7 +691,7 @@ static void hci_cc_read_bd_addr(struct hci_dev *hdev, struct sk_buff *skb)
 	if (test_bit(HCI_INIT, &hdev->flags))
 		bacpy(&hdev->bdaddr, &rp->bdaddr);
 
-	if (test_bit(HCI_SETUP, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_SETUP))
 		bacpy(&hdev->setup_addr, &rp->bdaddr);
 }
 
@@ -900,7 +900,7 @@ static void hci_cc_pin_code_reply(struct hci_dev *hdev, struct sk_buff *skb)
 
 	hci_dev_lock(hdev);
 
-	if (test_bit(HCI_MGMT, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_MGMT))
 		mgmt_pin_code_reply_complete(hdev, &rp->bdaddr, rp->status);
 
 	if (rp->status)
@@ -926,7 +926,7 @@ static void hci_cc_pin_code_neg_reply(struct hci_dev *hdev, struct sk_buff *skb)
 
 	hci_dev_lock(hdev);
 
-	if (test_bit(HCI_MGMT, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_MGMT))
 		mgmt_pin_code_neg_reply_complete(hdev, &rp->bdaddr,
 						 rp->status);
 
@@ -985,7 +985,7 @@ static void hci_cc_user_confirm_reply(struct hci_dev *hdev, struct sk_buff *skb)
 
 	hci_dev_lock(hdev);
 
-	if (test_bit(HCI_MGMT, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_MGMT))
 		mgmt_user_confirm_reply_complete(hdev, &rp->bdaddr, ACL_LINK, 0,
 						 rp->status);
 
@@ -1001,7 +1001,7 @@ static void hci_cc_user_confirm_neg_reply(struct hci_dev *hdev,
 
 	hci_dev_lock(hdev);
 
-	if (test_bit(HCI_MGMT, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_MGMT))
 		mgmt_user_confirm_neg_reply_complete(hdev, &rp->bdaddr,
 						     ACL_LINK, 0, rp->status);
 
@@ -1016,7 +1016,7 @@ static void hci_cc_user_passkey_reply(struct hci_dev *hdev, struct sk_buff *skb)
 
 	hci_dev_lock(hdev);
 
-	if (test_bit(HCI_MGMT, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_MGMT))
 		mgmt_user_passkey_reply_complete(hdev, &rp->bdaddr, ACL_LINK,
 						 0, rp->status);
 
@@ -1032,7 +1032,7 @@ static void hci_cc_user_passkey_neg_reply(struct hci_dev *hdev,
 
 	hci_dev_lock(hdev);
 
-	if (test_bit(HCI_MGMT, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_MGMT))
 		mgmt_user_passkey_neg_reply_complete(hdev, &rp->bdaddr,
 						     ACL_LINK, 0, rp->status);
 
@@ -1229,7 +1229,7 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
 		if (test_and_clear_bit(HCI_LE_SCAN_INTERRUPTED,
 				       &hdev->dev_flags))
 			hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
-		else if (!test_bit(HCI_LE_ADV, &hdev->dev_flags) &&
+		else if (!hci_dev_test_flag(hdev, HCI_LE_ADV) &&
 			 hdev->discovery.state == DISCOVERY_FINDING)
 			mgmt_reenable_advertising(hdev);
 
@@ -1769,7 +1769,7 @@ static void hci_cs_remote_name_req(struct hci_dev *hdev, __u8 status)
 
 	conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->bdaddr);
 
-	if (test_bit(HCI_MGMT, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_MGMT))
 		hci_check_pending_name(hdev, conn, &cp->bdaddr, NULL, 0);
 
 	if (!conn)
@@ -2118,7 +2118,7 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 	smp_mb__after_atomic(); /* wake_up_bit advises about this barrier */
 	wake_up_bit(&hdev->flags, HCI_INQUIRY);
 
-	if (!test_bit(HCI_MGMT, &hdev->dev_flags))
+	if (!hci_dev_test_flag(hdev, HCI_MGMT))
 		return;
 
 	hci_dev_lock(hdev);
@@ -2154,7 +2154,7 @@ static void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb)
 	if (!num_rsp)
 		return;
 
-	if (test_bit(HCI_PERIODIC_INQ, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_PERIODIC_INQ))
 		return;
 
 	hci_dev_lock(hdev);
@@ -2304,8 +2304,8 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
 	 * connection. These features are only touched through mgmt so
 	 * only do the checks if HCI_MGMT is set.
 	 */
-	if (test_bit(HCI_MGMT, &hdev->dev_flags) &&
-	    !test_bit(HCI_CONNECTABLE, &hdev->dev_flags) &&
+	if (hci_dev_test_flag(hdev, HCI_MGMT) &&
+	    !hci_dev_test_flag(hdev, HCI_CONNECTABLE) &&
 	    !hci_bdaddr_list_lookup(&hdev->whitelist, &ev->bdaddr,
 				    BDADDR_BREDR)) {
 		    hci_reject_conn(hdev, &ev->bdaddr);
@@ -2542,7 +2542,7 @@ static void hci_remote_name_evt(struct hci_dev *hdev, struct sk_buff *skb)
 
 	conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
 
-	if (!test_bit(HCI_MGMT, &hdev->dev_flags))
+	if (!hci_dev_test_flag(hdev, HCI_MGMT))
 		goto check_auth;
 
 	if (ev->status == 0)
@@ -2626,7 +2626,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		 * connections that are not encrypted with AES-CCM
 		 * using a P-256 authenticated combination key.
 		 */
-		if (test_bit(HCI_SC_ONLY, &hdev->dev_flags) &&
+		if (hci_dev_test_flag(hdev, HCI_SC_ONLY) &&
 		    (!test_bit(HCI_CONN_AES_CCM, &conn->flags) ||
 		     conn->key_type != HCI_LK_AUTH_COMBINATION_P256)) {
 			hci_connect_cfm(conn, HCI_ERROR_AUTH_FAILURE);
@@ -3331,11 +3331,11 @@ static void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_conn_drop(conn);
 	}
 
-	if (!test_bit(HCI_BONDABLE, &hdev->dev_flags) &&
+	if (!hci_dev_test_flag(hdev, HCI_BONDABLE) &&
 	    !test_bit(HCI_CONN_AUTH_INITIATOR, &conn->flags)) {
 		hci_send_cmd(hdev, HCI_OP_PIN_CODE_NEG_REPLY,
 			     sizeof(ev->bdaddr), &ev->bdaddr);
-	} else if (test_bit(HCI_MGMT, &hdev->dev_flags)) {
+	} else if (hci_dev_test_flag(hdev, HCI_MGMT)) {
 		u8 secure;
 
 		if (conn->pending_sec_level == BT_SECURITY_HIGH)
@@ -3391,7 +3391,7 @@ static void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
 
 	BT_DBG("%s", hdev->name);
 
-	if (!test_bit(HCI_MGMT, &hdev->dev_flags))
+	if (!hci_dev_test_flag(hdev, HCI_MGMT))
 		return;
 
 	hci_dev_lock(hdev);
@@ -3465,7 +3465,7 @@ static void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb)
 	set_bit(HCI_CONN_NEW_LINK_KEY, &conn->flags);
 	conn_set_key(conn, ev->key_type, conn->pin_length);
 
-	if (!test_bit(HCI_MGMT, &hdev->dev_flags))
+	if (!hci_dev_test_flag(hdev, HCI_MGMT))
 		goto unlock;
 
 	key = hci_add_link_key(hdev, conn, &ev->bdaddr, ev->link_key,
@@ -3487,7 +3487,7 @@ static void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb)
 	 * store_hint being 0).
 	 */
 	if (key->type == HCI_LK_DEBUG_COMBINATION &&
-	    !test_bit(HCI_KEEP_DEBUG_KEYS, &hdev->dev_flags)) {
+	    !hci_dev_test_flag(hdev, HCI_KEEP_DEBUG_KEYS)) {
 		list_del_rcu(&key->list);
 		kfree_rcu(key, rcu);
 		goto unlock;
@@ -3570,7 +3570,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev,
 	if (!num_rsp)
 		return;
 
-	if (test_bit(HCI_PERIODIC_INQ, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_PERIODIC_INQ))
 		return;
 
 	hci_dev_lock(hdev);
@@ -3776,7 +3776,7 @@ static void hci_extended_inquiry_result_evt(struct hci_dev *hdev,
 	if (!num_rsp)
 		return;
 
-	if (test_bit(HCI_PERIODIC_INQ, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_PERIODIC_INQ))
 		return;
 
 	hci_dev_lock(hdev);
@@ -3794,7 +3794,7 @@ static void hci_extended_inquiry_result_evt(struct hci_dev *hdev,
 		data.rssi		= info->rssi;
 		data.ssp_mode		= 0x01;
 
-		if (test_bit(HCI_MGMT, &hdev->dev_flags))
+		if (hci_dev_test_flag(hdev, HCI_MGMT))
 			name_known = eir_has_data_type(info->data,
 						       sizeof(info->data),
 						       EIR_NAME_COMPLETE);
@@ -3898,7 +3898,7 @@ static u8 bredr_oob_data_present(struct hci_conn *conn)
 			 * information. However it can only be trusted when
 			 * not in Secure Connection Only mode.
 			 */
-			if (!test_bit(HCI_SC_ONLY, &hdev->dev_flags))
+			if (!hci_dev_test_flag(hdev, HCI_SC_ONLY))
 				return data->present;
 
 			/* When Secure Connections Only mode is enabled, then
@@ -3942,13 +3942,13 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
 
 	hci_conn_hold(conn);
 
-	if (!test_bit(HCI_MGMT, &hdev->dev_flags))
+	if (!hci_dev_test_flag(hdev, HCI_MGMT))
 		goto unlock;
 
 	/* Allow pairing if we're pairable, the initiators of the
 	 * pairing or if the remote is not requesting bonding.
 	 */
-	if (test_bit(HCI_BONDABLE, &hdev->dev_flags) ||
+	if (hci_dev_test_flag(hdev, HCI_BONDABLE) ||
 	    test_bit(HCI_CONN_AUTH_INITIATOR, &conn->flags) ||
 	    (conn->remote_auth & ~0x01) == HCI_AT_NO_BONDING) {
 		struct hci_cp_io_capability_reply cp;
@@ -3974,7 +3974,7 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		/* If we're not bondable, force one of the non-bondable
 		 * authentication requirement values.
 		 */
-		if (!test_bit(HCI_BONDABLE, &hdev->dev_flags))
+		if (!hci_dev_test_flag(hdev, HCI_BONDABLE))
 			conn->auth_type &= HCI_AT_NO_BONDING_MITM;
 
 		cp.authentication = conn->auth_type;
@@ -4029,7 +4029,7 @@ static void hci_user_confirm_request_evt(struct hci_dev *hdev,
 
 	hci_dev_lock(hdev);
 
-	if (!test_bit(HCI_MGMT, &hdev->dev_flags))
+	if (!hci_dev_test_flag(hdev, HCI_MGMT))
 		goto unlock;
 
 	conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
@@ -4100,7 +4100,7 @@ static void hci_user_passkey_request_evt(struct hci_dev *hdev,
 
 	BT_DBG("%s", hdev->name);
 
-	if (test_bit(HCI_MGMT, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_MGMT))
 		mgmt_user_passkey_request(hdev, &ev->bdaddr, ACL_LINK, 0);
 }
 
@@ -4119,7 +4119,7 @@ static void hci_user_passkey_notify_evt(struct hci_dev *hdev,
 	conn->passkey_notify = __le32_to_cpu(ev->passkey);
 	conn->passkey_entered = 0;
 
-	if (test_bit(HCI_MGMT, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_MGMT))
 		mgmt_user_passkey_notify(hdev, &conn->dst, conn->type,
 					 conn->dst_type, conn->passkey_notify,
 					 conn->passkey_entered);
@@ -4157,7 +4157,7 @@ static void hci_keypress_notify_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		return;
 	}
 
-	if (test_bit(HCI_MGMT, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_MGMT))
 		mgmt_user_passkey_notify(hdev, &conn->dst, conn->type,
 					 conn->dst_type, conn->passkey_notify,
 					 conn->passkey_entered);
@@ -4226,7 +4226,7 @@ static void hci_remote_oob_data_request_evt(struct hci_dev *hdev,
 
 	hci_dev_lock(hdev);
 
-	if (!test_bit(HCI_MGMT, &hdev->dev_flags))
+	if (!hci_dev_test_flag(hdev, HCI_MGMT))
 		goto unlock;
 
 	data = hci_find_remote_oob_data(hdev, &ev->bdaddr, BDADDR_BREDR);
@@ -4243,7 +4243,7 @@ static void hci_remote_oob_data_request_evt(struct hci_dev *hdev,
 		struct hci_cp_remote_oob_ext_data_reply cp;
 
 		bacpy(&cp.bdaddr, &ev->bdaddr);
-		if (test_bit(HCI_SC_ONLY, &hdev->dev_flags)) {
+		if (hci_dev_test_flag(hdev, HCI_SC_ONLY)) {
 			memset(cp.hash192, 0, sizeof(cp.hash192));
 			memset(cp.rand192, 0, sizeof(cp.rand192));
 		} else {
@@ -4432,7 +4432,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		if (conn->out) {
 			conn->resp_addr_type = ev->bdaddr_type;
 			bacpy(&conn->resp_addr, &ev->bdaddr);
-			if (test_bit(HCI_PRIVACY, &hdev->dev_flags)) {
+			if (hci_dev_test_flag(hdev, HCI_PRIVACY)) {
 				conn->init_addr_type = ADDR_LE_DEV_RANDOM;
 				bacpy(&conn->init_addr, &hdev->rpa);
 			} else {
@@ -4658,7 +4658,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
 		/* If the controller is not using resolvable random
 		 * addresses, then this report can be ignored.
 		 */
-		if (!test_bit(HCI_PRIVACY, &hdev->dev_flags))
+		if (!hci_dev_test_flag(hdev, HCI_PRIVACY))
 			return;
 
 		/* If the local IRK of the controller does not match
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index f857e765e081..42fa10522e89 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -270,7 +270,7 @@ void hci_req_add_le_passive_scan(struct hci_request *req)
 	 * and 0x01 (whitelist enabled) use the new filter policies
 	 * 0x02 (no whitelist) and 0x03 (whitelist enabled).
 	 */
-	if (test_bit(HCI_PRIVACY, &hdev->dev_flags) &&
+	if (hci_dev_test_flag(hdev, HCI_PRIVACY) &&
 	    (hdev->le_features[0] & HCI_LE_EXT_SCAN_POLICY))
 		filter_policy |= 0x02;
 
@@ -304,7 +304,7 @@ static void set_random_addr(struct hci_request *req, bdaddr_t *rpa)
 	 * In this kind of scenario skip the update and let the random
 	 * address be updated at the next cycle.
 	 */
-	if (test_bit(HCI_LE_ADV, &hdev->dev_flags) ||
+	if (hci_dev_test_flag(hdev, HCI_LE_ADV) ||
 	    hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT)) {
 		BT_DBG("Deferring random address update");
 		set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags);
@@ -324,7 +324,7 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
 	 * current RPA has expired or there is something else than
 	 * the current RPA in use, then generate a new one.
 	 */
-	if (test_bit(HCI_PRIVACY, &hdev->dev_flags)) {
+	if (hci_dev_test_flag(hdev, HCI_PRIVACY)) {
 		int to;
 
 		*own_addr_type = ADDR_LE_DEV_RANDOM;
@@ -385,7 +385,7 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
 	 */
 	if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) ||
 	    !bacmp(&hdev->bdaddr, BDADDR_ANY) ||
-	    (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) &&
+	    (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) &&
 	     bacmp(&hdev->static_addr, BDADDR_ANY))) {
 		*own_addr_type = ADDR_LE_DEV_RANDOM;
 		if (bacmp(&hdev->static_addr, &hdev->random_addr))
@@ -425,7 +425,7 @@ void __hci_update_page_scan(struct hci_request *req)
 	struct hci_dev *hdev = req->hdev;
 	u8 scan;
 
-	if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags))
+	if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
 		return;
 
 	if (!hdev_is_powered(hdev))
@@ -434,7 +434,7 @@ void __hci_update_page_scan(struct hci_request *req)
 	if (mgmt_powering_down(hdev))
 		return;
 
-	if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags) ||
+	if (hci_dev_test_flag(hdev, HCI_CONNECTABLE) ||
 	    disconnected_whitelist_entries(hdev))
 		scan = SCAN_PAGE;
 	else
@@ -443,7 +443,7 @@ void __hci_update_page_scan(struct hci_request *req)
 	if (test_bit(HCI_PSCAN, &hdev->flags) == !!(scan & SCAN_PAGE))
 		return;
 
-	if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE))
 		scan |= SCAN_INQUIRY;
 
 	hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
@@ -471,14 +471,14 @@ void __hci_update_background_scan(struct hci_request *req)
 
 	if (!test_bit(HCI_UP, &hdev->flags) ||
 	    test_bit(HCI_INIT, &hdev->flags) ||
-	    test_bit(HCI_SETUP, &hdev->dev_flags) ||
-	    test_bit(HCI_CONFIG, &hdev->dev_flags) ||
-	    test_bit(HCI_AUTO_OFF, &hdev->dev_flags) ||
-	    test_bit(HCI_UNREGISTER, &hdev->dev_flags))
+	    hci_dev_test_flag(hdev, HCI_SETUP) ||
+	    hci_dev_test_flag(hdev, HCI_CONFIG) ||
+	    hci_dev_test_flag(hdev, HCI_AUTO_OFF) ||
+	    hci_dev_test_flag(hdev, HCI_UNREGISTER))
 		return;
 
 	/* No point in doing scanning if LE support hasn't been enabled */
-	if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags))
+	if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED))
 		return;
 
 	/* If discovery is active don't interfere with it */
@@ -502,7 +502,7 @@ void __hci_update_background_scan(struct hci_request *req)
 		 */
 
 		/* If controller is not scanning we are done. */
-		if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags))
+		if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
 			return;
 
 		hci_req_add_le_scan_disable(req);
@@ -524,7 +524,7 @@ void __hci_update_background_scan(struct hci_request *req)
 		/* If controller is currently scanning, we stop it to ensure we
 		 * don't miss any advertising (due to duplicates filter).
 		 */
-		if (test_bit(HCI_LE_SCAN, &hdev->dev_flags))
+		if (hci_dev_test_flag(hdev, HCI_LE_SCAN))
 			hci_req_add_le_scan_disable(req);
 
 		hci_req_add_le_passive_scan(req);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index aa9ffcb9481f..b297709d82bf 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -534,10 +534,10 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd,
 	if (!hdev)
 		return -EBADFD;
 
-	if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL))
 		return -EBUSY;
 
-	if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED))
 		return -EOPNOTSUPP;
 
 	if (hdev->dev_type != HCI_BREDR)
@@ -713,8 +713,8 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
 
 		if (test_bit(HCI_UP, &hdev->flags) ||
 		    test_bit(HCI_INIT, &hdev->flags) ||
-		    test_bit(HCI_SETUP, &hdev->dev_flags) ||
-		    test_bit(HCI_CONFIG, &hdev->dev_flags)) {
+		    hci_dev_test_flag(hdev, HCI_SETUP) ||
+		    hci_dev_test_flag(hdev, HCI_CONFIG)) {
 			err = -EBUSY;
 			hci_dev_put(hdev);
 			goto done;
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 91c682846bcf..af30d8240c80 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -3900,7 +3900,7 @@ static int l2cap_connect_req(struct l2cap_conn *conn,
 		return -EPROTO;
 
 	hci_dev_lock(hdev);
-	if (test_bit(HCI_MGMT, &hdev->dev_flags) &&
+	if (hci_dev_test_flag(hdev, HCI_MGMT) &&
 	    !test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &hcon->flags))
 		mgmt_device_connected(hdev, hcon, 0, NULL, 0);
 	hci_dev_unlock(hdev);
@@ -6987,10 +6987,10 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon)
 	conn->local_fixed_chan = L2CAP_FC_SIG_BREDR | L2CAP_FC_CONNLESS;
 
 	if (hcon->type == ACL_LINK &&
-	    test_bit(HCI_HS_ENABLED, &hcon->hdev->dev_flags))
+	    hci_dev_test_flag(hcon->hdev, HCI_HS_ENABLED))
 		conn->local_fixed_chan |= L2CAP_FC_A2MP;
 
-	if (test_bit(HCI_LE_ENABLED, &hcon->hdev->dev_flags) &&
+	if (hci_dev_test_flag(hcon->hdev, HCI_LE_ENABLED) &&
 	    (bredr_sc_enabled(hcon->hdev) ||
 	     test_bit(HCI_FORCE_BREDR_SMP, &hcon->hdev->dbg_flags)))
 		conn->local_fixed_chan |= L2CAP_FC_SMP_BREDR;
@@ -7112,7 +7112,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
 		else
 			dst_type = ADDR_LE_DEV_RANDOM;
 
-		if (test_bit(HCI_ADVERTISING, &hdev->dev_flags))
+		if (hci_dev_test_flag(hdev, HCI_ADVERTISING))
 			role = HCI_ROLE_SLAVE;
 		else
 			role = HCI_ROLE_MASTER;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 0761f2e0fefa..596b36111e64 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -385,7 +385,7 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data,
 	count = 0;
 	list_for_each_entry(d, &hci_dev_list, list) {
 		if (d->dev_type == HCI_BREDR &&
-		    !test_bit(HCI_UNCONFIGURED, &d->dev_flags))
+		    !hci_dev_test_flag(d, HCI_UNCONFIGURED))
 			count++;
 	}
 
@@ -398,9 +398,9 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data,
 
 	count = 0;
 	list_for_each_entry(d, &hci_dev_list, list) {
-		if (test_bit(HCI_SETUP, &d->dev_flags) ||
-		    test_bit(HCI_CONFIG, &d->dev_flags) ||
-		    test_bit(HCI_USER_CHANNEL, &d->dev_flags))
+		if (hci_dev_test_flag(d, HCI_SETUP) ||
+		    hci_dev_test_flag(d, HCI_CONFIG) ||
+		    hci_dev_test_flag(d, HCI_USER_CHANNEL))
 			continue;
 
 		/* Devices marked as raw-only are neither configured
@@ -410,7 +410,7 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data,
 			continue;
 
 		if (d->dev_type == HCI_BREDR &&
-		    !test_bit(HCI_UNCONFIGURED, &d->dev_flags)) {
+		    !hci_dev_test_flag(d, HCI_UNCONFIGURED)) {
 			rp->index[count++] = cpu_to_le16(d->id);
 			BT_DBG("Added hci%u", d->id);
 		}
@@ -445,7 +445,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev,
 	count = 0;
 	list_for_each_entry(d, &hci_dev_list, list) {
 		if (d->dev_type == HCI_BREDR &&
-		    test_bit(HCI_UNCONFIGURED, &d->dev_flags))
+		    hci_dev_test_flag(d, HCI_UNCONFIGURED))
 			count++;
 	}
 
@@ -458,9 +458,9 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev,
 
 	count = 0;
 	list_for_each_entry(d, &hci_dev_list, list) {
-		if (test_bit(HCI_SETUP, &d->dev_flags) ||
-		    test_bit(HCI_CONFIG, &d->dev_flags) ||
-		    test_bit(HCI_USER_CHANNEL, &d->dev_flags))
+		if (hci_dev_test_flag(d, HCI_SETUP) ||
+		    hci_dev_test_flag(d, HCI_CONFIG) ||
+		    hci_dev_test_flag(d, HCI_USER_CHANNEL))
 			continue;
 
 		/* Devices marked as raw-only are neither configured
@@ -470,7 +470,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev,
 			continue;
 
 		if (d->dev_type == HCI_BREDR &&
-		    test_bit(HCI_UNCONFIGURED, &d->dev_flags)) {
+		    hci_dev_test_flag(d, HCI_UNCONFIGURED)) {
 			rp->index[count++] = cpu_to_le16(d->id);
 			BT_DBG("Added hci%u", d->id);
 		}
@@ -492,7 +492,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev,
 static bool is_configured(struct hci_dev *hdev)
 {
 	if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) &&
-	    !test_bit(HCI_EXT_CONFIGURED, &hdev->dev_flags))
+	    !hci_dev_test_flag(hdev, HCI_EXT_CONFIGURED))
 		return false;
 
 	if (test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) &&
@@ -507,7 +507,7 @@ static __le32 get_missing_options(struct hci_dev *hdev)
 	u32 options = 0;
 
 	if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) &&
-	    !test_bit(HCI_EXT_CONFIGURED, &hdev->dev_flags))
+	    !hci_dev_test_flag(hdev, HCI_EXT_CONFIGURED))
 		options |= MGMT_OPTION_EXTERNAL_CONFIG;
 
 	if (test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) &&
@@ -608,43 +608,43 @@ static u32 get_current_settings(struct hci_dev *hdev)
 	if (hdev_is_powered(hdev))
 		settings |= MGMT_SETTING_POWERED;
 
-	if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_CONNECTABLE))
 		settings |= MGMT_SETTING_CONNECTABLE;
 
-	if (test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_FAST_CONNECTABLE))
 		settings |= MGMT_SETTING_FAST_CONNECTABLE;
 
-	if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE))
 		settings |= MGMT_SETTING_DISCOVERABLE;
 
-	if (test_bit(HCI_BONDABLE, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_BONDABLE))
 		settings |= MGMT_SETTING_BONDABLE;
 
-	if (test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
 		settings |= MGMT_SETTING_BREDR;
 
-	if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_LE_ENABLED))
 		settings |= MGMT_SETTING_LE;
 
-	if (test_bit(HCI_LINK_SECURITY, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_LINK_SECURITY))
 		settings |= MGMT_SETTING_LINK_SECURITY;
 
-	if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED))
 		settings |= MGMT_SETTING_SSP;
 
-	if (test_bit(HCI_HS_ENABLED, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_HS_ENABLED))
 		settings |= MGMT_SETTING_HS;
 
-	if (test_bit(HCI_ADVERTISING, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_ADVERTISING))
 		settings |= MGMT_SETTING_ADVERTISING;
 
-	if (test_bit(HCI_SC_ENABLED, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_SC_ENABLED))
 		settings |= MGMT_SETTING_SECURE_CONN;
 
-	if (test_bit(HCI_KEEP_DEBUG_KEYS, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_KEEP_DEBUG_KEYS))
 		settings |= MGMT_SETTING_DEBUG_KEYS;
 
-	if (test_bit(HCI_PRIVACY, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_PRIVACY))
 		settings |= MGMT_SETTING_PRIVACY;
 
 	/* The current setting for static address has two purposes. The
@@ -660,7 +660,7 @@ static u32 get_current_settings(struct hci_dev *hdev)
 	 * be evaluated.
 	 */
 	if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) ||
-	    !test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) ||
+	    !hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) ||
 	    !bacmp(&hdev->bdaddr, BDADDR_ANY)) {
 		if (bacmp(&hdev->static_addr, BDADDR_ANY))
 			settings |= MGMT_SETTING_STATIC_ADDRESS;
@@ -840,7 +840,7 @@ static void update_scan_rsp_data(struct hci_request *req)
 	struct hci_cp_le_set_scan_rsp_data cp;
 	u8 len;
 
-	if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags))
+	if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED))
 		return;
 
 	memset(&cp, 0, sizeof(cp));
@@ -874,9 +874,9 @@ static u8 get_adv_discov_flags(struct hci_dev *hdev)
 		else if (cp->val == 0x02)
 			return LE_AD_LIMITED;
 	} else {
-		if (test_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags))
+		if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE))
 			return LE_AD_LIMITED;
-		else if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags))
+		else if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE))
 			return LE_AD_GENERAL;
 	}
 
@@ -889,7 +889,7 @@ static u8 create_adv_data(struct hci_dev *hdev, u8 *ptr)
 
 	flags |= get_adv_discov_flags(hdev);
 
-	if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags))
+	if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
 		flags |= LE_AD_NO_BREDR;
 
 	if (flags) {
@@ -921,7 +921,7 @@ static void update_adv_data(struct hci_request *req)
 	struct hci_cp_le_set_adv_data cp;
 	u8 len;
 
-	if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags))
+	if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED))
 		return;
 
 	memset(&cp, 0, sizeof(cp));
@@ -1009,10 +1009,10 @@ static void update_eir(struct hci_request *req)
 	if (!lmp_ext_inq_capable(hdev))
 		return;
 
-	if (!test_bit(HCI_SSP_ENABLED, &hdev->dev_flags))
+	if (!hci_dev_test_flag(hdev, HCI_SSP_ENABLED))
 		return;
 
-	if (test_bit(HCI_SERVICE_CACHE, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_SERVICE_CACHE))
 		return;
 
 	memset(&cp, 0, sizeof(cp));
@@ -1048,17 +1048,17 @@ static void update_class(struct hci_request *req)
 	if (!hdev_is_powered(hdev))
 		return;
 
-	if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags))
+	if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
 		return;
 
-	if (test_bit(HCI_SERVICE_CACHE, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_SERVICE_CACHE))
 		return;
 
 	cod[0] = hdev->minor_class;
 	cod[1] = hdev->major_class;
 	cod[2] = get_service_classes(hdev);
 
-	if (test_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE))
 		cod[1] |= 0x20;
 
 	if (memcmp(cod, hdev->dev_class, 3) == 0)
@@ -1080,7 +1080,7 @@ static bool get_connectable(struct hci_dev *hdev)
 		return cp->val;
 	}
 
-	return test_bit(HCI_CONNECTABLE, &hdev->dev_flags);
+	return hci_dev_test_flag(hdev, HCI_CONNECTABLE);
 }
 
 static void disable_advertising(struct hci_request *req)
@@ -1100,7 +1100,7 @@ static void enable_advertising(struct hci_request *req)
 	if (hci_conn_num(hdev, LE_LINK) > 0)
 		return;
 
-	if (test_bit(HCI_LE_ADV, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_LE_ADV))
 		disable_advertising(req);
 
 	/* Clear the HCI_LE_ADV bit temporarily so that the
@@ -1110,7 +1110,7 @@ static void enable_advertising(struct hci_request *req)
 	 */
 	clear_bit(HCI_LE_ADV, &hdev->dev_flags);
 
-	if (test_bit(HCI_ADVERTISING_CONNECTABLE, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE))
 		connectable = true;
 	else
 		connectable = get_connectable(hdev);
@@ -1165,7 +1165,7 @@ static void rpa_expired(struct work_struct *work)
 
 	set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags);
 
-	if (!test_bit(HCI_ADVERTISING, &hdev->dev_flags))
+	if (!hci_dev_test_flag(hdev, HCI_ADVERTISING))
 		return;
 
 	/* The generation of a new RPA and programming it into the
@@ -1328,7 +1328,7 @@ static bool hci_stop_discovery(struct hci_request *req)
 
 	default:
 		/* Passive scanning */
-		if (test_bit(HCI_LE_SCAN, &hdev->dev_flags)) {
+		if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) {
 			hci_req_add_le_scan_disable(req);
 			return true;
 		}
@@ -1354,7 +1354,7 @@ static int clean_up_hci_state(struct hci_dev *hdev)
 		hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
 	}
 
-	if (test_bit(HCI_LE_ADV, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_LE_ADV))
 		disable_advertising(&req);
 
 	discov_stopped = hci_stop_discovery(&req);
@@ -1538,7 +1538,7 @@ static u8 mgmt_bredr_support(struct hci_dev *hdev)
 {
 	if (!lmp_bredr_capable(hdev))
 		return MGMT_STATUS_NOT_SUPPORTED;
-	else if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags))
+	else if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
 		return MGMT_STATUS_REJECTED;
 	else
 		return MGMT_STATUS_SUCCESS;
@@ -1548,7 +1548,7 @@ static u8 mgmt_le_support(struct hci_dev *hdev)
 {
 	if (!lmp_le_capable(hdev))
 		return MGMT_STATUS_NOT_SUPPORTED;
-	else if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags))
+	else if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED))
 		return MGMT_STATUS_REJECTED;
 	else
 		return MGMT_STATUS_SUCCESS;
@@ -1626,8 +1626,8 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data,
 
 	BT_DBG("request for %s", hdev->name);
 
-	if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags) &&
-	    !test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags))
+	if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED) &&
+	    !hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE,
 				       MGMT_STATUS_REJECTED);
 
@@ -1660,7 +1660,7 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data,
 		goto failed;
 	}
 
-	if (!test_bit(HCI_CONNECTABLE, &hdev->dev_flags)) {
+	if (!hci_dev_test_flag(hdev, HCI_CONNECTABLE)) {
 		err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE,
 				      MGMT_STATUS_REJECTED);
 		goto failed;
@@ -1673,7 +1673,7 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data,
 		 * not a valid operation since it requires a timeout
 		 * and so no need to check HCI_LIMITED_DISCOVERABLE.
 		 */
-		if (!!cp->val != test_bit(HCI_DISCOVERABLE, &hdev->dev_flags)) {
+		if (!!cp->val != hci_dev_test_flag(hdev, HCI_DISCOVERABLE)) {
 			change_bit(HCI_DISCOVERABLE, &hdev->dev_flags);
 			changed = true;
 		}
@@ -1692,9 +1692,9 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data,
 	 * value with the new value. And if only the timeout gets updated,
 	 * then no need for any HCI transactions.
 	 */
-	if (!!cp->val == test_bit(HCI_DISCOVERABLE, &hdev->dev_flags) &&
-	    (cp->val == 0x02) == test_bit(HCI_LIMITED_DISCOVERABLE,
-					  &hdev->dev_flags)) {
+	if (!!cp->val == hci_dev_test_flag(hdev, HCI_DISCOVERABLE) &&
+	    (cp->val == 0x02) == hci_dev_test_flag(hdev,
+						   HCI_LIMITED_DISCOVERABLE)) {
 		cancel_delayed_work(&hdev->discov_off);
 		hdev->discov_timeout = timeout;
 
@@ -1732,7 +1732,7 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data,
 	/* The procedure for LE-only controllers is much simpler - just
 	 * update the advertising data.
 	 */
-	if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags))
+	if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
 		goto update_ad;
 
 	scan = SCAN_PAGE;
@@ -1785,7 +1785,7 @@ static void write_fast_connectable(struct hci_request *req, bool enable)
 	struct hci_cp_write_page_scan_activity acp;
 	u8 type;
 
-	if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags))
+	if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
 		return;
 
 	if (hdev->hci_ver < BLUETOOTH_VER_1_2)
@@ -1870,7 +1870,7 @@ static int set_connectable_update_settings(struct hci_dev *hdev,
 	bool changed = false;
 	int err;
 
-	if (!!val != test_bit(HCI_CONNECTABLE, &hdev->dev_flags))
+	if (!!val != hci_dev_test_flag(hdev, HCI_CONNECTABLE))
 		changed = true;
 
 	if (val) {
@@ -1904,8 +1904,8 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data,
 
 	BT_DBG("request for %s", hdev->name);
 
-	if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags) &&
-	    !test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags))
+	if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED) &&
+	    !hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_CONNECTABLE,
 				       MGMT_STATUS_REJECTED);
 
@@ -1939,7 +1939,7 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data,
 	 * by-product of disabling connectable, we need to update the
 	 * advertising flags.
 	 */
-	if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) {
+	if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) {
 		if (!cp->val) {
 			clear_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags);
 			clear_bit(HCI_DISCOVERABLE, &hdev->dev_flags);
@@ -1972,7 +1972,7 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data,
 
 no_scan_update:
 	/* Update the advertising parameters if necessary */
-	if (test_bit(HCI_ADVERTISING, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_ADVERTISING))
 		enable_advertising(&req);
 
 	err = hci_req_run(&req, set_connectable_complete);
@@ -2045,8 +2045,7 @@ static int set_link_security(struct sock *sk, struct hci_dev *hdev, void *data,
 	if (!hdev_is_powered(hdev)) {
 		bool changed = false;
 
-		if (!!cp->val != test_bit(HCI_LINK_SECURITY,
-					  &hdev->dev_flags)) {
+		if (!!cp->val != hci_dev_test_flag(hdev, HCI_LINK_SECURITY)) {
 			change_bit(HCI_LINK_SECURITY, &hdev->dev_flags);
 			changed = true;
 		}
@@ -2146,7 +2145,7 @@ static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 		goto failed;
 	}
 
-	if (!!cp->val == test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) {
+	if (!!cp->val == hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) {
 		err = send_settings_rsp(sk, MGMT_OP_SET_SSP, hdev);
 		goto failed;
 	}
@@ -2157,7 +2156,7 @@ static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 		goto failed;
 	}
 
-	if (!cp->val && test_bit(HCI_USE_DEBUG_KEYS, &hdev->dev_flags))
+	if (!cp->val && hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS))
 		hci_send_cmd(hdev, HCI_OP_WRITE_SSP_DEBUG_MODE,
 			     sizeof(cp->val), &cp->val);
 
@@ -2189,7 +2188,7 @@ static int set_hs(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS,
 				       MGMT_STATUS_NOT_SUPPORTED);
 
-	if (!test_bit(HCI_SSP_ENABLED, &hdev->dev_flags))
+	if (!hci_dev_test_flag(hdev, HCI_SSP_ENABLED))
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS,
 				       MGMT_STATUS_REJECTED);
 
@@ -2255,7 +2254,7 @@ static void le_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 	 * has actually been enabled. During power on, the
 	 * update in powered_update_hci will take care of it.
 	 */
-	if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) {
+	if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) {
 		struct hci_request req;
 
 		hci_req_init(&req, hdev);
@@ -2289,7 +2288,7 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 				       MGMT_STATUS_INVALID_PARAMS);
 
 	/* LE-only devices do not allow toggling LE on/off */
-	if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags))
+	if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_LE,
 				       MGMT_STATUS_REJECTED);
 
@@ -2301,12 +2300,12 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 	if (!hdev_is_powered(hdev) || val == enabled) {
 		bool changed = false;
 
-		if (val != test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) {
+		if (val != hci_dev_test_flag(hdev, HCI_LE_ENABLED)) {
 			change_bit(HCI_LE_ENABLED, &hdev->dev_flags);
 			changed = true;
 		}
 
-		if (!val && test_bit(HCI_ADVERTISING, &hdev->dev_flags)) {
+		if (!val && hci_dev_test_flag(hdev, HCI_ADVERTISING)) {
 			clear_bit(HCI_ADVERTISING, &hdev->dev_flags);
 			changed = true;
 		}
@@ -2342,7 +2341,7 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 		hci_cp.le = val;
 		hci_cp.simul = 0x00;
 	} else {
-		if (test_bit(HCI_LE_ADV, &hdev->dev_flags))
+		if (hci_dev_test_flag(hdev, HCI_LE_ADV))
 			disable_advertising(&req);
 	}
 
@@ -3860,12 +3859,12 @@ static bool trigger_discovery(struct hci_request *req, u8 *status)
 			return false;
 
 		if (hdev->discovery.type == DISCOV_TYPE_INTERLEAVED &&
-		    !test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) {
+		    !hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) {
 			*status = MGMT_STATUS_NOT_SUPPORTED;
 			return false;
 		}
 
-		if (test_bit(HCI_LE_ADV, &hdev->dev_flags)) {
+		if (hci_dev_test_flag(hdev, HCI_LE_ADV)) {
 			/* Don't let discovery abort an outgoing
 			 * connection attempt that's using directed
 			 * advertising.
@@ -3883,7 +3882,7 @@ static bool trigger_discovery(struct hci_request *req, u8 *status)
 		 * is running. Thus, we should temporarily stop it in order to
 		 * set the discovery scanning parameters.
 		 */
-		if (test_bit(HCI_LE_SCAN, &hdev->dev_flags))
+		if (hci_dev_test_flag(hdev, HCI_LE_SCAN))
 			hci_req_add_le_scan_disable(req);
 
 		memset(&param_cp, 0, sizeof(param_cp));
@@ -4007,7 +4006,7 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev,
 	}
 
 	if (hdev->discovery.state != DISCOVERY_STOPPED ||
-	    test_bit(HCI_PERIODIC_INQ, &hdev->dev_flags)) {
+	    hci_dev_test_flag(hdev, HCI_PERIODIC_INQ)) {
 		err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_DISCOVERY,
 					MGMT_STATUS_BUSY, &cp->type,
 					sizeof(cp->type));
@@ -4083,7 +4082,7 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev,
 	}
 
 	if (hdev->discovery.state != DISCOVERY_STOPPED ||
-	    test_bit(HCI_PERIODIC_INQ, &hdev->dev_flags)) {
+	    hci_dev_test_flag(hdev, HCI_PERIODIC_INQ)) {
 		err = mgmt_cmd_complete(sk, hdev->id,
 					MGMT_OP_START_SERVICE_DISCOVERY,
 					MGMT_STATUS_BUSY, &cp->type,
@@ -4410,7 +4409,7 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status,
 		goto unlock;
 	}
 
-	if (test_bit(HCI_LE_ADV, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_LE_ADV))
 		set_bit(HCI_ADVERTISING, &hdev->dev_flags);
 	else
 		clear_bit(HCI_ADVERTISING, &hdev->dev_flags);
@@ -4457,11 +4456,10 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data,
 	 * necessary).
 	 */
 	if (!hdev_is_powered(hdev) ||
-	    (val == test_bit(HCI_ADVERTISING, &hdev->dev_flags) &&
-	     (cp->val == 0x02) == test_bit(HCI_ADVERTISING_CONNECTABLE,
-					   &hdev->dev_flags)) ||
+	    (val == hci_dev_test_flag(hdev, HCI_ADVERTISING) &&
+	     (cp->val == 0x02) == hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE)) ||
 	    hci_conn_num(hdev, LE_LINK) > 0 ||
-	    (test_bit(HCI_LE_SCAN, &hdev->dev_flags) &&
+	    (hci_dev_test_flag(hdev, HCI_LE_SCAN) &&
 	     hdev->le_scan_type == LE_SCAN_ACTIVE)) {
 		bool changed;
 
@@ -4609,7 +4607,7 @@ static int set_scan_params(struct sock *sk, struct hci_dev *hdev,
 	/* If background scan is running, restart it so new parameters are
 	 * loaded.
 	 */
-	if (test_bit(HCI_LE_SCAN, &hdev->dev_flags) &&
+	if (hci_dev_test_flag(hdev, HCI_LE_SCAN) &&
 	    hdev->discovery.state == DISCOVERY_STOPPED) {
 		struct hci_request req;
 
@@ -4670,7 +4668,7 @@ static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev,
 
 	BT_DBG("%s", hdev->name);
 
-	if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) ||
+	if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) ||
 	    hdev->hci_ver < BLUETOOTH_VER_1_2)
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,
 				       MGMT_STATUS_NOT_SUPPORTED);
@@ -4687,7 +4685,7 @@ static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev,
 		goto unlock;
 	}
 
-	if (!!cp->val == test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags)) {
+	if (!!cp->val == hci_dev_test_flag(hdev, HCI_FAST_CONNECTABLE)) {
 		err = send_settings_rsp(sk, MGMT_OP_SET_FAST_CONNECTABLE,
 					hdev);
 		goto unlock;
@@ -4770,7 +4768,7 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR,
 				       MGMT_STATUS_NOT_SUPPORTED);
 
-	if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags))
+	if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED))
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR,
 				       MGMT_STATUS_REJECTED);
 
@@ -4780,7 +4778,7 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 
 	hci_dev_lock(hdev);
 
-	if (cp->val == test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) {
+	if (cp->val == hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) {
 		err = send_settings_rsp(sk, MGMT_OP_SET_BREDR, hdev);
 		goto unlock;
 	}
@@ -4824,9 +4822,9 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 		 * switching BR/EDR back on when secure connections has been
 		 * enabled is not a supported transaction.
 		 */
-		if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) &&
+		if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) &&
 		    (bacmp(&hdev->static_addr, BDADDR_ANY) ||
-		     test_bit(HCI_SC_ENABLED, &hdev->dev_flags))) {
+		     hci_dev_test_flag(hdev, HCI_SC_ENABLED))) {
 			err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR,
 					      MGMT_STATUS_REJECTED);
 			goto unlock;
@@ -4926,13 +4924,13 @@ static int set_secure_conn(struct sock *sk, struct hci_dev *hdev,
 	BT_DBG("request for %s", hdev->name);
 
 	if (!lmp_sc_capable(hdev) &&
-	    !test_bit(HCI_LE_ENABLED, &hdev->dev_flags))
+	    !hci_dev_test_flag(hdev, HCI_LE_ENABLED))
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN,
 				       MGMT_STATUS_NOT_SUPPORTED);
 
-	if (test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) &&
+	if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) &&
 	    lmp_sc_capable(hdev) &&
-	    !test_bit(HCI_SSP_ENABLED, &hdev->dev_flags))
+	    !hci_dev_test_flag(hdev, HCI_SSP_ENABLED))
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN,
 				       MGMT_STATUS_REJECTED);
 
@@ -4943,7 +4941,7 @@ static int set_secure_conn(struct sock *sk, struct hci_dev *hdev,
 	hci_dev_lock(hdev);
 
 	if (!hdev_is_powered(hdev) || !lmp_sc_capable(hdev) ||
-	    !test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) {
+	    !hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) {
 		bool changed;
 
 		if (cp->val) {
@@ -4977,8 +4975,8 @@ static int set_secure_conn(struct sock *sk, struct hci_dev *hdev,
 
 	val = !!cp->val;
 
-	if (val == test_bit(HCI_SC_ENABLED, &hdev->dev_flags) &&
-	    (cp->val == 0x02) == test_bit(HCI_SC_ONLY, &hdev->dev_flags)) {
+	if (val == hci_dev_test_flag(hdev, HCI_SC_ENABLED) &&
+	    (cp->val == 0x02) == hci_dev_test_flag(hdev, HCI_SC_ONLY)) {
 		err = send_settings_rsp(sk, MGMT_OP_SET_SECURE_CONN, hdev);
 		goto failed;
 	}
@@ -5032,7 +5030,7 @@ static int set_debug_keys(struct sock *sk, struct hci_dev *hdev,
 						 &hdev->dev_flags);
 
 	if (hdev_is_powered(hdev) && use_changed &&
-	    test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) {
+	    hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) {
 		u8 mode = (cp->val == 0x02) ? 0x01 : 0x00;
 		hci_send_cmd(hdev, HCI_OP_WRITE_SSP_DEBUG_MODE,
 			     sizeof(mode), &mode);
@@ -6104,7 +6102,7 @@ static int set_external_config(struct sock *sk, struct hci_dev *hdev,
 
 	err = new_options(hdev, sk);
 
-	if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) == is_configured(hdev)) {
+	if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED) == is_configured(hdev)) {
 		mgmt_index_removed(hdev);
 
 		if (test_and_change_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) {
@@ -6156,7 +6154,7 @@ static int set_public_address(struct sock *sk, struct hci_dev *hdev,
 	if (!changed)
 		goto unlock;
 
-	if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED))
 		err = new_options(hdev, sk);
 
 	if (is_configured(hdev)) {
@@ -6304,15 +6302,15 @@ int mgmt_control(struct hci_mgmt_chan *chan, struct sock *sk,
 			goto done;
 		}
 
-		if (test_bit(HCI_SETUP, &hdev->dev_flags) ||
-		    test_bit(HCI_CONFIG, &hdev->dev_flags) ||
-		    test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) {
+		if (hci_dev_test_flag(hdev, HCI_SETUP) ||
+		    hci_dev_test_flag(hdev, HCI_CONFIG) ||
+		    hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) {
 			err = mgmt_cmd_status(sk, index, opcode,
 					      MGMT_STATUS_INVALID_INDEX);
 			goto done;
 		}
 
-		if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags) &&
+		if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED) &&
 		    !(handler->flags & HCI_MGMT_UNCONFIGURED)) {
 			err = mgmt_cmd_status(sk, index, opcode,
 					      MGMT_STATUS_INVALID_INDEX);
@@ -6362,7 +6360,7 @@ void mgmt_index_added(struct hci_dev *hdev)
 	if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
 		return;
 
-	if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED))
 		mgmt_event(MGMT_EV_UNCONF_INDEX_ADDED, hdev, NULL, 0, NULL);
 	else
 		mgmt_event(MGMT_EV_INDEX_ADDED, hdev, NULL, 0, NULL);
@@ -6380,7 +6378,7 @@ void mgmt_index_removed(struct hci_dev *hdev)
 
 	mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status);
 
-	if (test_bit(HCI_UNCONFIGURED, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED))
 		mgmt_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev, NULL, 0, NULL);
 	else
 		mgmt_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0, NULL);
@@ -6448,7 +6446,7 @@ static int powered_update_hci(struct hci_dev *hdev)
 
 	hci_req_init(&req, hdev);
 
-	if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) &&
+	if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED) &&
 	    !lmp_host_ssp_capable(hdev)) {
 		u8 mode = 0x01;
 
@@ -6462,7 +6460,7 @@ static int powered_update_hci(struct hci_dev *hdev)
 		}
 	}
 
-	if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags) &&
+	if (hci_dev_test_flag(hdev, HCI_LE_ENABLED) &&
 	    lmp_bredr_capable(hdev)) {
 		struct hci_cp_write_le_host_supported cp;
 
@@ -6483,24 +6481,24 @@ static int powered_update_hci(struct hci_dev *hdev)
 		 * advertising data. This also applies to the case
 		 * where BR/EDR was toggled during the AUTO_OFF phase.
 		 */
-		if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) {
+		if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) {
 			update_adv_data(&req);
 			update_scan_rsp_data(&req);
 		}
 
-		if (test_bit(HCI_ADVERTISING, &hdev->dev_flags))
+		if (hci_dev_test_flag(hdev, HCI_ADVERTISING))
 			enable_advertising(&req);
 
 		restart_le_actions(&req);
 	}
 
-	link_sec = test_bit(HCI_LINK_SECURITY, &hdev->dev_flags);
+	link_sec = hci_dev_test_flag(hdev, HCI_LINK_SECURITY);
 	if (link_sec != test_bit(HCI_AUTH, &hdev->flags))
 		hci_req_add(&req, HCI_OP_WRITE_AUTH_ENABLE,
 			    sizeof(link_sec), &link_sec);
 
 	if (lmp_bredr_capable(hdev)) {
-		if (test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags))
+		if (hci_dev_test_flag(hdev, HCI_FAST_CONNECTABLE))
 			write_fast_connectable(&req, true);
 		else
 			write_fast_connectable(&req, false);
@@ -6519,7 +6517,7 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered)
 	u8 status, zero_cod[] = { 0, 0, 0 };
 	int err;
 
-	if (!test_bit(HCI_MGMT, &hdev->dev_flags))
+	if (!hci_dev_test_flag(hdev, HCI_MGMT))
 		return 0;
 
 	if (powered) {
@@ -6540,7 +6538,7 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered)
 	 * been triggered, potentially causing misleading DISCONNECTED
 	 * status responses.
 	 */
-	if (test_bit(HCI_UNREGISTER, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_UNREGISTER))
 		status = MGMT_STATUS_INVALID_INDEX;
 	else
 		status = MGMT_STATUS_NOT_POWERED;
@@ -6594,7 +6592,7 @@ void mgmt_discoverable_timeout(struct hci_dev *hdev)
 	clear_bit(HCI_DISCOVERABLE, &hdev->dev_flags);
 
 	hci_req_init(&req, hdev);
-	if (test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) {
+	if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) {
 		u8 scan = SCAN_PAGE;
 		hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE,
 			    sizeof(scan), &scan);
@@ -7170,8 +7168,8 @@ void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status)
 
 	hci_req_init(&req, hdev);
 
-	if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) {
-		if (test_bit(HCI_USE_DEBUG_KEYS, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) {
+		if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS))
 			hci_req_add(&req, HCI_OP_WRITE_SSP_DEBUG_MODE,
 				    sizeof(enable), &enable);
 		update_eir(&req);
@@ -7343,7 +7341,7 @@ static bool eir_has_uuids(u8 *eir, u16 eir_len, u16 uuid_count, u8 (*uuids)[16])
 static void restart_le_scan(struct hci_dev *hdev)
 {
 	/* If controller is not scanning we are done. */
-	if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags))
+	if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
 		return;
 
 	if (time_after(jiffies + DISCOV_LE_RESTART_DELAY,
@@ -7514,7 +7512,7 @@ void mgmt_reenable_advertising(struct hci_dev *hdev)
 {
 	struct hci_request req;
 
-	if (!test_bit(HCI_ADVERTISING, &hdev->dev_flags))
+	if (!hci_dev_test_flag(hdev, HCI_ADVERTISING))
 		return;
 
 	hci_req_init(&req, hdev);
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index d6ef7e48c2c3..81975f274c2b 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -52,7 +52,7 @@
 
 #define SMP_TIMEOUT	msecs_to_jiffies(30000)
 
-#define AUTH_REQ_MASK(dev)	(test_bit(HCI_SC_ENABLED, &(dev)->dev_flags) ? \
+#define AUTH_REQ_MASK(dev)	(hci_dev_test_flag(dev, HCI_SC_ENABLED) ? \
 				 0x1f : 0x07)
 #define KEY_DIST_MASK		0x07
 
@@ -589,7 +589,7 @@ static void build_pairing_cmd(struct l2cap_conn *conn,
 	struct hci_dev *hdev = hcon->hdev;
 	u8 local_dist = 0, remote_dist = 0, oob_flag = SMP_OOB_NOT_PRESENT;
 
-	if (test_bit(HCI_BONDABLE, &conn->hcon->hdev->dev_flags)) {
+	if (hci_dev_test_flag(hdev, HCI_BONDABLE)) {
 		local_dist = SMP_DIST_ENC_KEY | SMP_DIST_SIGN;
 		remote_dist = SMP_DIST_ENC_KEY | SMP_DIST_SIGN;
 		authreq |= SMP_AUTH_BONDING;
@@ -597,18 +597,18 @@ static void build_pairing_cmd(struct l2cap_conn *conn,
 		authreq &= ~SMP_AUTH_BONDING;
 	}
 
-	if (test_bit(HCI_RPA_RESOLVING, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_RPA_RESOLVING))
 		remote_dist |= SMP_DIST_ID_KEY;
 
-	if (test_bit(HCI_PRIVACY, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_PRIVACY))
 		local_dist |= SMP_DIST_ID_KEY;
 
-	if (test_bit(HCI_SC_ENABLED, &hdev->dev_flags) &&
+	if (hci_dev_test_flag(hdev, HCI_SC_ENABLED) &&
 	    (authreq & SMP_AUTH_SC)) {
 		struct oob_data *oob_data;
 		u8 bdaddr_type;
 
-		if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) {
+		if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) {
 			local_dist |= SMP_DIST_LINK_KEY;
 			remote_dist |= SMP_DIST_LINK_KEY;
 		}
@@ -692,7 +692,7 @@ static void smp_chan_destroy(struct l2cap_conn *conn)
 	 * support hasn't been explicitly enabled.
 	 */
 	if (smp->ltk && smp->ltk->type == SMP_LTK_P256_DEBUG &&
-	    !test_bit(HCI_KEEP_DEBUG_KEYS, &hcon->hdev->dev_flags)) {
+	    !hci_dev_test_flag(hcon->hdev, HCI_KEEP_DEBUG_KEYS)) {
 		list_del_rcu(&smp->ltk->list);
 		kfree_rcu(smp->ltk, rcu);
 		smp->ltk = NULL;
@@ -1052,7 +1052,7 @@ static void smp_notify_keys(struct l2cap_conn *conn)
 			/* Don't keep debug keys around if the relevant
 			 * flag is not set.
 			 */
-			if (!test_bit(HCI_KEEP_DEBUG_KEYS, &hdev->dev_flags) &&
+			if (!hci_dev_test_flag(hdev, HCI_KEEP_DEBUG_KEYS) &&
 			    key->type == HCI_LK_DEBUG_COMBINATION) {
 				list_del_rcu(&key->list);
 				kfree_rcu(key, rcu);
@@ -1604,15 +1604,15 @@ static void build_bredr_pairing_cmd(struct smp_chan *smp,
 	struct hci_dev *hdev = conn->hcon->hdev;
 	u8 local_dist = 0, remote_dist = 0;
 
-	if (test_bit(HCI_BONDABLE, &hdev->dev_flags)) {
+	if (hci_dev_test_flag(hdev, HCI_BONDABLE)) {
 		local_dist = SMP_DIST_ENC_KEY | SMP_DIST_SIGN;
 		remote_dist = SMP_DIST_ENC_KEY | SMP_DIST_SIGN;
 	}
 
-	if (test_bit(HCI_RPA_RESOLVING, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_RPA_RESOLVING))
 		remote_dist |= SMP_DIST_ID_KEY;
 
-	if (test_bit(HCI_PRIVACY, &hdev->dev_flags))
+	if (hci_dev_test_flag(hdev, HCI_PRIVACY))
 		local_dist |= SMP_DIST_ID_KEY;
 
 	if (!rsp) {
@@ -1664,11 +1664,11 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb)
 	/* We didn't start the pairing, so match remote */
 	auth = req->auth_req & AUTH_REQ_MASK(hdev);
 
-	if (!test_bit(HCI_BONDABLE, &hdev->dev_flags) &&
+	if (!hci_dev_test_flag(hdev, HCI_BONDABLE) &&
 	    (auth & SMP_AUTH_BONDING))
 		return SMP_PAIRING_NOTSUPP;
 
-	if (test_bit(HCI_SC_ONLY, &hdev->dev_flags) && !(auth & SMP_AUTH_SC))
+	if (hci_dev_test_flag(hdev, HCI_SC_ONLY) && !(auth & SMP_AUTH_SC))
 		return SMP_AUTH_REQUIREMENTS;
 
 	smp->preq[0] = SMP_CMD_PAIRING_REQ;
@@ -1761,7 +1761,7 @@ static u8 sc_send_public_key(struct smp_chan *smp)
 
 	BT_DBG("");
 
-	if (test_bit(HCI_USE_DEBUG_KEYS, &hdev->dev_flags)) {
+	if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) {
 		BT_DBG("Using debug keys");
 		memcpy(smp->local_pk, debug_pk, 64);
 		memcpy(smp->local_sk, debug_sk, 32);
@@ -1816,7 +1816,7 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb)
 
 	auth = rsp->auth_req & AUTH_REQ_MASK(hdev);
 
-	if (test_bit(HCI_SC_ONLY, &hdev->dev_flags) && !(auth & SMP_AUTH_SC))
+	if (hci_dev_test_flag(hdev, HCI_SC_ONLY) && !(auth & SMP_AUTH_SC))
 		return SMP_AUTH_REQUIREMENTS;
 
 	smp->prsp[0] = SMP_CMD_PAIRING_RSP;
@@ -2086,7 +2086,7 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb)
 
 	auth = rp->auth_req & AUTH_REQ_MASK(hdev);
 
-	if (test_bit(HCI_SC_ONLY, &hdev->dev_flags) && !(auth & SMP_AUTH_SC))
+	if (hci_dev_test_flag(hdev, HCI_SC_ONLY) && !(auth & SMP_AUTH_SC))
 		return SMP_AUTH_REQUIREMENTS;
 
 	if (hcon->io_capability == HCI_IO_NO_INPUT_OUTPUT)
@@ -2107,7 +2107,7 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb)
 	if (!smp)
 		return SMP_UNSPECIFIED;
 
-	if (!test_bit(HCI_BONDABLE, &hcon->hdev->dev_flags) &&
+	if (!hci_dev_test_flag(hdev, HCI_BONDABLE) &&
 	    (auth & SMP_AUTH_BONDING))
 		return SMP_PAIRING_NOTSUPP;
 
@@ -2141,7 +2141,7 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
 
 	chan = conn->smp;
 
-	if (!test_bit(HCI_LE_ENABLED, &hcon->hdev->dev_flags))
+	if (!hci_dev_test_flag(hcon->hdev, HCI_LE_ENABLED))
 		return 1;
 
 	if (smp_sufficient_security(hcon, sec_level, SMP_USE_LTK))
@@ -2170,7 +2170,7 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
 
 	authreq = seclevel_to_authreq(sec_level);
 
-	if (test_bit(HCI_SC_ENABLED, &hcon->hdev->dev_flags))
+	if (hci_dev_test_flag(hcon->hdev, HCI_SC_ENABLED))
 		authreq |= SMP_AUTH_SC;
 
 	/* Require MITM if IO Capability allows or the security level
@@ -2606,7 +2606,7 @@ static int smp_sig_channel(struct l2cap_chan *chan, struct sk_buff *skb)
 	if (skb->len < 1)
 		return -EILSEQ;
 
-	if (!test_bit(HCI_LE_ENABLED, &hcon->hdev->dev_flags)) {
+	if (!hci_dev_test_flag(hcon->hdev, HCI_LE_ENABLED)) {
 		reason = SMP_PAIRING_NOTSUPP;
 		goto done;
 	}
@@ -2744,7 +2744,7 @@ static void bredr_pairing(struct l2cap_chan *chan)
 		return;
 
 	/* Secure Connections support must be enabled */
-	if (!test_bit(HCI_SC_ENABLED, &hdev->dev_flags))
+	if (!hci_dev_test_flag(hdev, HCI_SC_ENABLED))
 		return;
 
 	/* BR/EDR must use Secure Connections for SMP */
@@ -2753,7 +2753,7 @@ static void bredr_pairing(struct l2cap_chan *chan)
 		return;
 
 	/* If our LE support is not enabled don't do anything */
-	if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags))
+	if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED))
 		return;
 
 	/* Don't bother if remote LE support is not enabled */
-- 
cgit v1.2.3


From a1536da255f16f42b8f069b2769134b32558b265 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Fri, 13 Mar 2015 02:11:01 -0700
Subject: Bluetooth: Introduce hci_dev_set_flag helper macro

Instead of manually coding set_bit on hdev->dev_flags all the time,
use hci_dev_set_flag helper macro.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h |  1 +
 net/bluetooth/hci_conn.c         |  2 +-
 net/bluetooth/hci_core.c         | 24 ++++++++++++------------
 net/bluetooth/hci_event.c        | 14 +++++++-------
 net/bluetooth/hci_request.c      |  2 +-
 net/bluetooth/mgmt.c             | 39 +++++++++++++++++++--------------------
 6 files changed, 41 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 05e95a75aba5..bc2a7e918da7 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -502,6 +502,7 @@ extern struct list_head hci_cb_list;
 extern rwlock_t hci_dev_list_lock;
 extern struct mutex hci_cb_list_lock;
 
+#define hci_dev_set_flag(hdev, nr)    set_bit((nr), &(hdev)->dev_flags)
 #define hci_dev_test_flag(hdev, nr)   test_bit((nr), &(hdev)->dev_flags)
 
 /* ----- HCI interface to upper protocols ----- */
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 5444e194eb4e..c0fd42ceff61 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -842,7 +842,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
 	 */
 	if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) {
 		hci_req_add_le_scan_disable(&req);
-		set_bit(HCI_LE_SCAN_INTERRUPTED, &hdev->dev_flags);
+		hci_dev_set_flag(hdev, HCI_LE_SCAN_INTERRUPTED);
 	}
 
 	hci_req_add_le_create_conn(&req, conn);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 20fe5ef6abc5..252b597362eb 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -501,7 +501,7 @@ static void le_setup(struct hci_request *req)
 
 	/* LE-only controllers have LE implicitly enabled */
 	if (!lmp_bredr_capable(hdev))
-		set_bit(HCI_LE_ENABLED, &hdev->dev_flags);
+		hci_dev_set_flag(hdev, HCI_LE_ENABLED);
 }
 
 static void hci_setup_event_mask(struct hci_request *req)
@@ -1448,7 +1448,7 @@ static int hci_dev_do_open(struct hci_dev *hdev)
 		 */
 		if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) ||
 		    test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks))
-			set_bit(HCI_UNCONFIGURED, &hdev->dev_flags);
+			hci_dev_set_flag(hdev, HCI_UNCONFIGURED);
 
 		/* For an unconfigured controller it is required to
 		 * read at least the version information provided by
@@ -1485,7 +1485,7 @@ static int hci_dev_do_open(struct hci_dev *hdev)
 
 	if (!ret) {
 		hci_dev_hold(hdev);
-		set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags);
+		hci_dev_set_flag(hdev, HCI_RPA_EXPIRED);
 		set_bit(HCI_UP, &hdev->flags);
 		hci_notify(hdev, HCI_DEV_UP);
 		if (!hci_dev_test_flag(hdev, HCI_SETUP) &&
@@ -1571,7 +1571,7 @@ int hci_dev_open(__u16 dev)
 	 */
 	if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
 	    !hci_dev_test_flag(hdev, HCI_MGMT))
-		set_bit(HCI_BONDABLE, &hdev->dev_flags);
+		hci_dev_set_flag(hdev, HCI_BONDABLE);
 
 	err = hci_dev_do_open(hdev);
 
@@ -1856,7 +1856,7 @@ static void hci_update_scan_state(struct hci_dev *hdev, u8 scan)
 
 	if (conn_changed || discov_changed) {
 		/* In case this was disabled through mgmt */
-		set_bit(HCI_BREDR_ENABLED, &hdev->dev_flags);
+		hci_dev_set_flag(hdev, HCI_BREDR_ENABLED);
 
 		if (hci_dev_test_flag(hdev, HCI_LE_ENABLED))
 			mgmt_update_adv_data(hdev);
@@ -2082,7 +2082,7 @@ static int hci_rfkill_set_block(void *data, bool blocked)
 		return -EBUSY;
 
 	if (blocked) {
-		set_bit(HCI_RFKILLED, &hdev->dev_flags);
+		hci_dev_set_flag(hdev, HCI_RFKILLED);
 		if (!hci_dev_test_flag(hdev, HCI_SETUP) &&
 		    !hci_dev_test_flag(hdev, HCI_CONFIG))
 			hci_dev_do_close(hdev);
@@ -3189,16 +3189,16 @@ int hci_register_dev(struct hci_dev *hdev)
 	}
 
 	if (hdev->rfkill && rfkill_blocked(hdev->rfkill))
-		set_bit(HCI_RFKILLED, &hdev->dev_flags);
+		hci_dev_set_flag(hdev, HCI_RFKILLED);
 
-	set_bit(HCI_SETUP, &hdev->dev_flags);
-	set_bit(HCI_AUTO_OFF, &hdev->dev_flags);
+	hci_dev_set_flag(hdev, HCI_SETUP);
+	hci_dev_set_flag(hdev, HCI_AUTO_OFF);
 
 	if (hdev->dev_type == HCI_BREDR) {
 		/* Assume BR/EDR support until proven otherwise (such as
 		 * through reading supported features during init.
 		 */
-		set_bit(HCI_BREDR_ENABLED, &hdev->dev_flags);
+		hci_dev_set_flag(hdev, HCI_BREDR_ENABLED);
 	}
 
 	write_lock(&hci_dev_list_lock);
@@ -3209,7 +3209,7 @@ int hci_register_dev(struct hci_dev *hdev)
 	 * and should not be included in normal operation.
 	 */
 	if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
-		set_bit(HCI_UNCONFIGURED, &hdev->dev_flags);
+		hci_dev_set_flag(hdev, HCI_UNCONFIGURED);
 
 	hci_notify(hdev, HCI_DEV_REG);
 	hci_dev_hold(hdev);
@@ -3235,7 +3235,7 @@ void hci_unregister_dev(struct hci_dev *hdev)
 
 	BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);
 
-	set_bit(HCI_UNREGISTER, &hdev->dev_flags);
+	hci_dev_set_flag(hdev, HCI_UNREGISTER);
 
 	id = hdev->id;
 
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index f1ed3fe9e0df..b01a93efada8 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -70,7 +70,7 @@ static void hci_cc_periodic_inq(struct hci_dev *hdev, struct sk_buff *skb)
 	if (status)
 		return;
 
-	set_bit(HCI_PERIODIC_INQ, &hdev->dev_flags);
+	hci_dev_set_flag(hdev, HCI_PERIODIC_INQ);
 }
 
 static void hci_cc_exit_periodic_inq(struct hci_dev *hdev, struct sk_buff *skb)
@@ -501,7 +501,7 @@ static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb)
 		mgmt_ssp_enable_complete(hdev, sent->mode, status);
 	else if (!status) {
 		if (sent->mode)
-			set_bit(HCI_SSP_ENABLED, &hdev->dev_flags);
+			hci_dev_set_flag(hdev, HCI_SSP_ENABLED);
 		else
 			clear_bit(HCI_SSP_ENABLED, &hdev->dev_flags);
 	}
@@ -531,7 +531,7 @@ static void hci_cc_write_sc_support(struct hci_dev *hdev, struct sk_buff *skb)
 
 	if (!hci_dev_test_flag(hdev, HCI_MGMT) && !status) {
 		if (sent->support)
-			set_bit(HCI_SC_ENABLED, &hdev->dev_flags);
+			hci_dev_set_flag(hdev, HCI_SC_ENABLED);
 		else
 			clear_bit(HCI_SC_ENABLED, &hdev->dev_flags);
 	}
@@ -1109,7 +1109,7 @@ static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb)
 	if (*sent) {
 		struct hci_conn *conn;
 
-		set_bit(HCI_LE_ADV, &hdev->dev_flags);
+		hci_dev_set_flag(hdev, HCI_LE_ADV);
 
 		conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT);
 		if (conn)
@@ -1192,7 +1192,7 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
 
 	switch (cp->enable) {
 	case LE_SCAN_ENABLE:
-		set_bit(HCI_LE_SCAN, &hdev->dev_flags);
+		hci_dev_set_flag(hdev, HCI_LE_SCAN);
 		if (hdev->le_scan_type == LE_SCAN_ACTIVE)
 			clear_pending_adv_report(hdev);
 		break;
@@ -1388,7 +1388,7 @@ static void hci_cc_write_le_host_supported(struct hci_dev *hdev,
 
 	if (sent->le) {
 		hdev->features[1][0] |= LMP_HOST_LE;
-		set_bit(HCI_LE_ENABLED, &hdev->dev_flags);
+		hci_dev_set_flag(hdev, HCI_LE_ENABLED);
 	} else {
 		hdev->features[1][0] &= ~LMP_HOST_LE;
 		clear_bit(HCI_LE_ENABLED, &hdev->dev_flags);
@@ -2608,7 +2608,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
 	 * whenever the encryption procedure fails.
 	 */
 	if (ev->status && conn->type == LE_LINK)
-		set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags);
+		hci_dev_set_flag(hdev, HCI_RPA_EXPIRED);
 
 	clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
 
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 42fa10522e89..fd7b2a97740b 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -307,7 +307,7 @@ static void set_random_addr(struct hci_request *req, bdaddr_t *rpa)
 	if (hci_dev_test_flag(hdev, HCI_LE_ADV) ||
 	    hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT)) {
 		BT_DBG("Deferring random address update");
-		set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags);
+		hci_dev_set_flag(hdev, HCI_RPA_EXPIRED);
 		return;
 	}
 
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 596b36111e64..924bf3ee6261 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -1163,7 +1163,7 @@ static void rpa_expired(struct work_struct *work)
 
 	BT_DBG("");
 
-	set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags);
+	hci_dev_set_flag(hdev, HCI_RPA_EXPIRED);
 
 	if (!hci_dev_test_flag(hdev, HCI_ADVERTISING))
 		return;
@@ -1723,7 +1723,7 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data,
 
 	/* Limited discoverable mode */
 	if (cp->val == 0x02)
-		set_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags);
+		hci_dev_set_flag(hdev, HCI_LIMITED_DISCOVERABLE);
 	else
 		clear_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags);
 
@@ -1874,7 +1874,7 @@ static int set_connectable_update_settings(struct hci_dev *hdev,
 		changed = true;
 
 	if (val) {
-		set_bit(HCI_CONNECTABLE, &hdev->dev_flags);
+		hci_dev_set_flag(hdev, HCI_CONNECTABLE);
 	} else {
 		clear_bit(HCI_CONNECTABLE, &hdev->dev_flags);
 		clear_bit(HCI_DISCOVERABLE, &hdev->dev_flags);
@@ -4410,7 +4410,7 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status,
 	}
 
 	if (hci_dev_test_flag(hdev, HCI_LE_ADV))
-		set_bit(HCI_ADVERTISING, &hdev->dev_flags);
+		hci_dev_set_flag(hdev, HCI_ADVERTISING);
 	else
 		clear_bit(HCI_ADVERTISING, &hdev->dev_flags);
 
@@ -4467,8 +4467,7 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data,
 			changed = !test_and_set_bit(HCI_ADVERTISING,
 						    &hdev->dev_flags);
 			if (cp->val == 0x02)
-				set_bit(HCI_ADVERTISING_CONNECTABLE,
-					&hdev->dev_flags);
+				hci_dev_set_flag(hdev, HCI_ADVERTISING_CONNECTABLE);
 			else
 				clear_bit(HCI_ADVERTISING_CONNECTABLE,
 					  &hdev->dev_flags);
@@ -4505,7 +4504,7 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data,
 	hci_req_init(&req, hdev);
 
 	if (cp->val == 0x02)
-		set_bit(HCI_ADVERTISING_CONNECTABLE, &hdev->dev_flags);
+		hci_dev_set_flag(hdev, HCI_ADVERTISING_CONNECTABLE);
 	else
 		clear_bit(HCI_ADVERTISING_CONNECTABLE, &hdev->dev_flags);
 
@@ -4644,7 +4643,7 @@ static void fast_connectable_complete(struct hci_dev *hdev, u8 status,
 		struct mgmt_mode *cp = cmd->param;
 
 		if (cp->val)
-			set_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags);
+			hci_dev_set_flag(hdev, HCI_FAST_CONNECTABLE);
 		else
 			clear_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags);
 
@@ -4846,7 +4845,7 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 	/* We need to flip the bit already here so that update_adv_data
 	 * generates the correct flags.
 	 */
-	set_bit(HCI_BREDR_ENABLED, &hdev->dev_flags);
+	hci_dev_set_flag(hdev, HCI_BREDR_ENABLED);
 
 	hci_req_init(&req, hdev);
 
@@ -4894,12 +4893,12 @@ static void sc_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 		clear_bit(HCI_SC_ONLY, &hdev->dev_flags);
 		break;
 	case 0x01:
-		set_bit(HCI_SC_ENABLED, &hdev->dev_flags);
+		hci_dev_set_flag(hdev, HCI_SC_ENABLED);
 		clear_bit(HCI_SC_ONLY, &hdev->dev_flags);
 		break;
 	case 0x02:
-		set_bit(HCI_SC_ENABLED, &hdev->dev_flags);
-		set_bit(HCI_SC_ONLY, &hdev->dev_flags);
+		hci_dev_set_flag(hdev, HCI_SC_ENABLED);
+		hci_dev_set_flag(hdev, HCI_SC_ONLY);
 		break;
 	}
 
@@ -4948,7 +4947,7 @@ static int set_secure_conn(struct sock *sk, struct hci_dev *hdev,
 			changed = !test_and_set_bit(HCI_SC_ENABLED,
 						    &hdev->dev_flags);
 			if (cp->val == 0x02)
-				set_bit(HCI_SC_ONLY, &hdev->dev_flags);
+				hci_dev_set_flag(hdev, HCI_SC_ONLY);
 			else
 				clear_bit(HCI_SC_ONLY, &hdev->dev_flags);
 		} else {
@@ -5074,12 +5073,12 @@ static int set_privacy(struct sock *sk, struct hci_dev *hdev, void *cp_data,
 	/* If user space supports this command it is also expected to
 	 * handle IRKs. Therefore, set the HCI_RPA_RESOLVING flag.
 	 */
-	set_bit(HCI_RPA_RESOLVING, &hdev->dev_flags);
+	hci_dev_set_flag(hdev, HCI_RPA_RESOLVING);
 
 	if (cp->privacy) {
 		changed = !test_and_set_bit(HCI_PRIVACY, &hdev->dev_flags);
 		memcpy(hdev->irk, cp->irk, sizeof(hdev->irk));
-		set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags);
+		hci_dev_set_flag(hdev, HCI_RPA_EXPIRED);
 	} else {
 		changed = test_and_clear_bit(HCI_PRIVACY, &hdev->dev_flags);
 		memset(hdev->irk, 0, sizeof(hdev->irk));
@@ -5172,7 +5171,7 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data,
 			    BDADDR_ANY);
 	}
 
-	set_bit(HCI_RPA_RESOLVING, &hdev->dev_flags);
+	hci_dev_set_flag(hdev, HCI_RPA_RESOLVING);
 
 	err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_LOAD_IRKS, 0, NULL, 0);
 
@@ -6106,8 +6105,8 @@ static int set_external_config(struct sock *sk, struct hci_dev *hdev,
 		mgmt_index_removed(hdev);
 
 		if (test_and_change_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) {
-			set_bit(HCI_CONFIG, &hdev->dev_flags);
-			set_bit(HCI_AUTO_OFF, &hdev->dev_flags);
+			hci_dev_set_flag(hdev, HCI_CONFIG);
+			hci_dev_set_flag(hdev, HCI_AUTO_OFF);
 
 			queue_work(hdev->req_workqueue, &hdev->power_on);
 		} else {
@@ -6162,8 +6161,8 @@ static int set_public_address(struct sock *sk, struct hci_dev *hdev,
 
 		clear_bit(HCI_UNCONFIGURED, &hdev->dev_flags);
 
-		set_bit(HCI_CONFIG, &hdev->dev_flags);
-		set_bit(HCI_AUTO_OFF, &hdev->dev_flags);
+		hci_dev_set_flag(hdev, HCI_CONFIG);
+		hci_dev_set_flag(hdev, HCI_AUTO_OFF);
 
 		queue_work(hdev->req_workqueue, &hdev->power_on);
 	}
-- 
cgit v1.2.3


From a358dc11d80ecaca443aa1fd2fd9d4f3425922e7 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Fri, 13 Mar 2015 02:11:02 -0700
Subject: Bluetooth: Introduce hci_dev_clear_flag helper macro

Instead of manually coding clear_bit on hdev->dev_flags all the time,
use hci_dev_clear_flag helper macro.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h |  1 +
 net/bluetooth/hci_conn.c         |  2 +-
 net/bluetooth/hci_core.c         | 12 +++----
 net/bluetooth/hci_event.c        | 16 +++++-----
 net/bluetooth/hci_sock.c         |  4 +--
 net/bluetooth/mgmt.c             | 68 +++++++++++++++++++---------------------
 6 files changed, 51 insertions(+), 52 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index bc2a7e918da7..535b23c73c92 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -503,6 +503,7 @@ extern rwlock_t hci_dev_list_lock;
 extern struct mutex hci_cb_list_lock;
 
 #define hci_dev_set_flag(hdev, nr)    set_bit((nr), &(hdev)->dev_flags)
+#define hci_dev_clear_flag(hdev, nr)  clear_bit((nr), &(hdev)->dev_flags)
 #define hci_dev_test_flag(hdev, nr)   test_bit((nr), &(hdev)->dev_flags)
 
 /* ----- HCI interface to upper protocols ----- */
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index c0fd42ceff61..ee5e59839b02 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -700,7 +700,7 @@ static void hci_req_directed_advertising(struct hci_request *req,
 	 * and write a new random address. The flag will be set back on
 	 * as soon as the SET_ADV_ENABLE HCI command completes.
 	 */
-	clear_bit(HCI_LE_ADV, &hdev->dev_flags);
+	hci_dev_clear_flag(hdev, HCI_LE_ADV);
 
 	/* Set require_privacy to false so that the remote device has a
 	 * chance of identifying us.
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 252b597362eb..01710698e547 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -591,7 +591,7 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt)
 	if (lmp_bredr_capable(hdev))
 		bredr_setup(req);
 	else
-		clear_bit(HCI_BREDR_ENABLED, &hdev->dev_flags);
+		hci_dev_clear_flag(hdev, HCI_BREDR_ENABLED);
 
 	if (lmp_le_capable(hdev))
 		le_setup(req);
@@ -1625,8 +1625,8 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 	if (hdev->discov_timeout > 0) {
 		cancel_delayed_work(&hdev->discov_off);
 		hdev->discov_timeout = 0;
-		clear_bit(HCI_DISCOVERABLE, &hdev->dev_flags);
-		clear_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags);
+		hci_dev_clear_flag(hdev, HCI_DISCOVERABLE);
+		hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE);
 	}
 
 	if (test_and_clear_bit(HCI_SERVICE_CACHE, &hdev->dev_flags))
@@ -1846,7 +1846,7 @@ static void hci_update_scan_state(struct hci_dev *hdev, u8 scan)
 		discov_changed = !test_and_set_bit(HCI_DISCOVERABLE,
 						   &hdev->dev_flags);
 	} else {
-		clear_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags);
+		hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE);
 		discov_changed = test_and_clear_bit(HCI_DISCOVERABLE,
 						    &hdev->dev_flags);
 	}
@@ -2087,7 +2087,7 @@ static int hci_rfkill_set_block(void *data, bool blocked)
 		    !hci_dev_test_flag(hdev, HCI_CONFIG))
 			hci_dev_do_close(hdev);
 	} else {
-		clear_bit(HCI_RFKILLED, &hdev->dev_flags);
+		hci_dev_clear_flag(hdev, HCI_RFKILLED);
 	}
 
 	return 0;
@@ -2121,7 +2121,7 @@ static void hci_power_on(struct work_struct *work)
 	    (hdev->dev_type == HCI_BREDR &&
 	     !bacmp(&hdev->bdaddr, BDADDR_ANY) &&
 	     !bacmp(&hdev->static_addr, BDADDR_ANY))) {
-		clear_bit(HCI_AUTO_OFF, &hdev->dev_flags);
+		hci_dev_clear_flag(hdev, HCI_AUTO_OFF);
 		hci_dev_do_close(hdev);
 	} else if (hci_dev_test_flag(hdev, HCI_AUTO_OFF)) {
 		queue_delayed_work(hdev->req_workqueue, &hdev->power_off,
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index b01a93efada8..808b78cf8ad0 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -82,7 +82,7 @@ static void hci_cc_exit_periodic_inq(struct hci_dev *hdev, struct sk_buff *skb)
 	if (status)
 		return;
 
-	clear_bit(HCI_PERIODIC_INQ, &hdev->dev_flags);
+	hci_dev_clear_flag(hdev, HCI_PERIODIC_INQ);
 
 	hci_conn_check_pending(hdev);
 }
@@ -503,7 +503,7 @@ static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb)
 		if (sent->mode)
 			hci_dev_set_flag(hdev, HCI_SSP_ENABLED);
 		else
-			clear_bit(HCI_SSP_ENABLED, &hdev->dev_flags);
+			hci_dev_clear_flag(hdev, HCI_SSP_ENABLED);
 	}
 
 	hci_dev_unlock(hdev);
@@ -533,7 +533,7 @@ static void hci_cc_write_sc_support(struct hci_dev *hdev, struct sk_buff *skb)
 		if (sent->support)
 			hci_dev_set_flag(hdev, HCI_SC_ENABLED);
 		else
-			clear_bit(HCI_SC_ENABLED, &hdev->dev_flags);
+			hci_dev_clear_flag(hdev, HCI_SC_ENABLED);
 	}
 
 	hci_dev_unlock(hdev);
@@ -1117,7 +1117,7 @@ static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb)
 					   &conn->le_conn_timeout,
 					   conn->conn_timeout);
 	} else {
-		clear_bit(HCI_LE_ADV, &hdev->dev_flags);
+		hci_dev_clear_flag(hdev, HCI_LE_ADV);
 	}
 
 	hci_dev_unlock(hdev);
@@ -1217,7 +1217,7 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
 		 */
 		cancel_delayed_work(&hdev->le_scan_disable);
 
-		clear_bit(HCI_LE_SCAN, &hdev->dev_flags);
+		hci_dev_clear_flag(hdev, HCI_LE_SCAN);
 
 		/* The HCI_LE_SCAN_INTERRUPTED flag indicates that we
 		 * interrupted scanning due to a connect request. Mark
@@ -1391,8 +1391,8 @@ static void hci_cc_write_le_host_supported(struct hci_dev *hdev,
 		hci_dev_set_flag(hdev, HCI_LE_ENABLED);
 	} else {
 		hdev->features[1][0] &= ~LMP_HOST_LE;
-		clear_bit(HCI_LE_ENABLED, &hdev->dev_flags);
-		clear_bit(HCI_ADVERTISING, &hdev->dev_flags);
+		hci_dev_clear_flag(hdev, HCI_LE_ENABLED);
+		hci_dev_clear_flag(hdev, HCI_ADVERTISING);
 	}
 
 	if (sent->simul)
@@ -4409,7 +4409,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 	/* All controllers implicitly stop advertising in the event of a
 	 * connection, so ensure that the state bit is cleared.
 	 */
-	clear_bit(HCI_LE_ADV, &hdev->dev_flags);
+	hci_dev_clear_flag(hdev, HCI_LE_ADV);
 
 	conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT);
 	if (!conn) {
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index b297709d82bf..ca402a0a6740 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -474,7 +474,7 @@ static int hci_sock_release(struct socket *sock)
 	if (hdev) {
 		if (hci_pi(sk)->channel == HCI_CHANNEL_USER) {
 			mgmt_index_added(hdev);
-			clear_bit(HCI_USER_CHANNEL, &hdev->dev_flags);
+			hci_dev_clear_flag(hdev, HCI_USER_CHANNEL);
 			hci_dev_close(hdev->id);
 		}
 
@@ -730,7 +730,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
 
 		err = hci_dev_open(hdev->id);
 		if (err) {
-			clear_bit(HCI_USER_CHANNEL, &hdev->dev_flags);
+			hci_dev_clear_flag(hdev, HCI_USER_CHANNEL);
 			mgmt_index_added(hdev);
 			hci_dev_put(hdev);
 			goto done;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 924bf3ee6261..aa233e37fc93 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -1108,7 +1108,7 @@ static void enable_advertising(struct hci_request *req)
 	 * and write a new random address. The flag will be set back on
 	 * as soon as the SET_ADV_ENABLE HCI command completes.
 	 */
-	clear_bit(HCI_LE_ADV, &hdev->dev_flags);
+	hci_dev_clear_flag(hdev, HCI_LE_ADV);
 
 	if (hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE))
 		connectable = true;
@@ -1189,7 +1189,7 @@ static void mgmt_init_hdev(struct sock *sk, struct hci_dev *hdev)
 	 * for mgmt we require user-space to explicitly enable
 	 * it
 	 */
-	clear_bit(HCI_BONDABLE, &hdev->dev_flags);
+	hci_dev_clear_flag(hdev, HCI_BONDABLE);
 }
 
 static int read_controller_info(struct sock *sk, struct hci_dev *hdev,
@@ -1573,7 +1573,7 @@ static void set_discoverable_complete(struct hci_dev *hdev, u8 status,
 	if (status) {
 		u8 mgmt_err = mgmt_status(status);
 		mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err);
-		clear_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags);
+		hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE);
 		goto remove_cmd;
 	}
 
@@ -1725,7 +1725,7 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data,
 	if (cp->val == 0x02)
 		hci_dev_set_flag(hdev, HCI_LIMITED_DISCOVERABLE);
 	else
-		clear_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags);
+		hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE);
 
 	hci_req_init(&req, hdev);
 
@@ -1762,7 +1762,7 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data,
 
 		scan |= SCAN_INQUIRY;
 	} else {
-		clear_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags);
+		hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE);
 	}
 
 	hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, sizeof(scan), &scan);
@@ -1876,8 +1876,8 @@ static int set_connectable_update_settings(struct hci_dev *hdev,
 	if (val) {
 		hci_dev_set_flag(hdev, HCI_CONNECTABLE);
 	} else {
-		clear_bit(HCI_CONNECTABLE, &hdev->dev_flags);
-		clear_bit(HCI_DISCOVERABLE, &hdev->dev_flags);
+		hci_dev_clear_flag(hdev, HCI_CONNECTABLE);
+		hci_dev_clear_flag(hdev, HCI_DISCOVERABLE);
 	}
 
 	err = send_settings_rsp(sk, MGMT_OP_SET_CONNECTABLE, hdev);
@@ -1941,8 +1941,8 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data,
 	 */
 	if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) {
 		if (!cp->val) {
-			clear_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags);
-			clear_bit(HCI_DISCOVERABLE, &hdev->dev_flags);
+			hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE);
+			hci_dev_clear_flag(hdev, HCI_DISCOVERABLE);
 		}
 		update_adv_data(&req);
 	} else if (cp->val != test_bit(HCI_PSCAN, &hdev->flags)) {
@@ -2126,7 +2126,7 @@ static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 				changed = test_and_clear_bit(HCI_HS_ENABLED,
 							     &hdev->dev_flags);
 			else
-				clear_bit(HCI_HS_ENABLED, &hdev->dev_flags);
+				hci_dev_clear_flag(hdev, HCI_HS_ENABLED);
 		}
 
 		err = send_settings_rsp(sk, MGMT_OP_SET_SSP, hdev);
@@ -2306,7 +2306,7 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 		}
 
 		if (!val && hci_dev_test_flag(hdev, HCI_ADVERTISING)) {
-			clear_bit(HCI_ADVERTISING, &hdev->dev_flags);
+			hci_dev_clear_flag(hdev, HCI_ADVERTISING);
 			changed = true;
 		}
 
@@ -4412,7 +4412,7 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status,
 	if (hci_dev_test_flag(hdev, HCI_LE_ADV))
 		hci_dev_set_flag(hdev, HCI_ADVERTISING);
 	else
-		clear_bit(HCI_ADVERTISING, &hdev->dev_flags);
+		hci_dev_clear_flag(hdev, HCI_ADVERTISING);
 
 	mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, settings_rsp,
 			     &match);
@@ -4469,13 +4469,11 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data,
 			if (cp->val == 0x02)
 				hci_dev_set_flag(hdev, HCI_ADVERTISING_CONNECTABLE);
 			else
-				clear_bit(HCI_ADVERTISING_CONNECTABLE,
-					  &hdev->dev_flags);
+				hci_dev_clear_flag(hdev, HCI_ADVERTISING_CONNECTABLE);
 		} else {
 			changed = test_and_clear_bit(HCI_ADVERTISING,
 						     &hdev->dev_flags);
-			clear_bit(HCI_ADVERTISING_CONNECTABLE,
-				  &hdev->dev_flags);
+			hci_dev_clear_flag(hdev, HCI_ADVERTISING_CONNECTABLE);
 		}
 
 		err = send_settings_rsp(sk, MGMT_OP_SET_ADVERTISING, hdev);
@@ -4506,7 +4504,7 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data,
 	if (cp->val == 0x02)
 		hci_dev_set_flag(hdev, HCI_ADVERTISING_CONNECTABLE);
 	else
-		clear_bit(HCI_ADVERTISING_CONNECTABLE, &hdev->dev_flags);
+		hci_dev_clear_flag(hdev, HCI_ADVERTISING_CONNECTABLE);
 
 	if (val)
 		enable_advertising(&req);
@@ -4645,7 +4643,7 @@ static void fast_connectable_complete(struct hci_dev *hdev, u8 status,
 		if (cp->val)
 			hci_dev_set_flag(hdev, HCI_FAST_CONNECTABLE);
 		else
-			clear_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags);
+			hci_dev_clear_flag(hdev, HCI_FAST_CONNECTABLE);
 
 		send_settings_rsp(cmd->sk, MGMT_OP_SET_FAST_CONNECTABLE, hdev);
 		new_settings(hdev, cmd->sk);
@@ -4740,7 +4738,7 @@ static void set_bredr_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 		/* We need to restore the flag if related HCI commands
 		 * failed.
 		 */
-		clear_bit(HCI_BREDR_ENABLED, &hdev->dev_flags);
+		hci_dev_clear_flag(hdev, HCI_BREDR_ENABLED);
 
 		mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err);
 	} else {
@@ -4784,11 +4782,11 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 
 	if (!hdev_is_powered(hdev)) {
 		if (!cp->val) {
-			clear_bit(HCI_DISCOVERABLE, &hdev->dev_flags);
-			clear_bit(HCI_SSP_ENABLED, &hdev->dev_flags);
-			clear_bit(HCI_LINK_SECURITY, &hdev->dev_flags);
-			clear_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags);
-			clear_bit(HCI_HS_ENABLED, &hdev->dev_flags);
+			hci_dev_clear_flag(hdev, HCI_DISCOVERABLE);
+			hci_dev_clear_flag(hdev, HCI_SSP_ENABLED);
+			hci_dev_clear_flag(hdev, HCI_LINK_SECURITY);
+			hci_dev_clear_flag(hdev, HCI_FAST_CONNECTABLE);
+			hci_dev_clear_flag(hdev, HCI_HS_ENABLED);
 		}
 
 		change_bit(HCI_BREDR_ENABLED, &hdev->dev_flags);
@@ -4889,12 +4887,12 @@ static void sc_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 
 	switch (cp->val) {
 	case 0x00:
-		clear_bit(HCI_SC_ENABLED, &hdev->dev_flags);
-		clear_bit(HCI_SC_ONLY, &hdev->dev_flags);
+		hci_dev_clear_flag(hdev, HCI_SC_ENABLED);
+		hci_dev_clear_flag(hdev, HCI_SC_ONLY);
 		break;
 	case 0x01:
 		hci_dev_set_flag(hdev, HCI_SC_ENABLED);
-		clear_bit(HCI_SC_ONLY, &hdev->dev_flags);
+		hci_dev_clear_flag(hdev, HCI_SC_ONLY);
 		break;
 	case 0x02:
 		hci_dev_set_flag(hdev, HCI_SC_ENABLED);
@@ -4949,11 +4947,11 @@ static int set_secure_conn(struct sock *sk, struct hci_dev *hdev,
 			if (cp->val == 0x02)
 				hci_dev_set_flag(hdev, HCI_SC_ONLY);
 			else
-				clear_bit(HCI_SC_ONLY, &hdev->dev_flags);
+				hci_dev_clear_flag(hdev, HCI_SC_ONLY);
 		} else {
 			changed = test_and_clear_bit(HCI_SC_ENABLED,
 						     &hdev->dev_flags);
-			clear_bit(HCI_SC_ONLY, &hdev->dev_flags);
+			hci_dev_clear_flag(hdev, HCI_SC_ONLY);
 		}
 
 		err = send_settings_rsp(sk, MGMT_OP_SET_SECURE_CONN, hdev);
@@ -5082,7 +5080,7 @@ static int set_privacy(struct sock *sk, struct hci_dev *hdev, void *cp_data,
 	} else {
 		changed = test_and_clear_bit(HCI_PRIVACY, &hdev->dev_flags);
 		memset(hdev->irk, 0, sizeof(hdev->irk));
-		clear_bit(HCI_RPA_EXPIRED, &hdev->dev_flags);
+		hci_dev_clear_flag(hdev, HCI_RPA_EXPIRED);
 	}
 
 	err = send_settings_rsp(sk, MGMT_OP_SET_PRIVACY, hdev);
@@ -6159,7 +6157,7 @@ static int set_public_address(struct sock *sk, struct hci_dev *hdev,
 	if (is_configured(hdev)) {
 		mgmt_index_removed(hdev);
 
-		clear_bit(HCI_UNCONFIGURED, &hdev->dev_flags);
+		hci_dev_clear_flag(hdev, HCI_UNCONFIGURED);
 
 		hci_dev_set_flag(hdev, HCI_CONFIG);
 		hci_dev_set_flag(hdev, HCI_AUTO_OFF);
@@ -6587,8 +6585,8 @@ void mgmt_discoverable_timeout(struct hci_dev *hdev)
 	 * of a timeout triggered from general discoverable, it is
 	 * safe to unconditionally clear the flag.
 	 */
-	clear_bit(HCI_LIMITED_DISCOVERABLE, &hdev->dev_flags);
-	clear_bit(HCI_DISCOVERABLE, &hdev->dev_flags);
+	hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE);
+	hci_dev_clear_flag(hdev, HCI_DISCOVERABLE);
 
 	hci_req_init(&req, hdev);
 	if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) {
@@ -7137,7 +7135,7 @@ void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status)
 
 		if (enable && test_and_clear_bit(HCI_SSP_ENABLED,
 						 &hdev->dev_flags)) {
-			clear_bit(HCI_HS_ENABLED, &hdev->dev_flags);
+			hci_dev_clear_flag(hdev, HCI_HS_ENABLED);
 			new_settings(hdev, NULL);
 		}
 
@@ -7154,7 +7152,7 @@ void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status)
 			changed = test_and_clear_bit(HCI_HS_ENABLED,
 						     &hdev->dev_flags);
 		else
-			clear_bit(HCI_HS_ENABLED, &hdev->dev_flags);
+			hci_dev_clear_flag(hdev, HCI_HS_ENABLED);
 	}
 
 	mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, settings_rsp, &match);
-- 
cgit v1.2.3


From ce05d603af7c9b5be66a1f3358443e20e2a2ae7a Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Fri, 13 Mar 2015 02:11:03 -0700
Subject: Bluetooth: Introduce hci_dev_change_flag helper macro

Instead of manually coding change_bit on hdev->dev_flags all the time,
use hci_dev_change_flag helper macro.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h |  1 +
 net/bluetooth/mgmt.c             | 10 +++++-----
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 535b23c73c92..92b2148702e6 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -504,6 +504,7 @@ extern struct mutex hci_cb_list_lock;
 
 #define hci_dev_set_flag(hdev, nr)    set_bit((nr), &(hdev)->dev_flags)
 #define hci_dev_clear_flag(hdev, nr)  clear_bit((nr), &(hdev)->dev_flags)
+#define hci_dev_change_flag(hdev, nr) change_bit((nr), &(hdev)->dev_flags)
 #define hci_dev_test_flag(hdev, nr)   test_bit((nr), &(hdev)->dev_flags)
 
 /* ----- HCI interface to upper protocols ----- */
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index aa233e37fc93..c3af3b87dbb5 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -1674,7 +1674,7 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data,
 		 * and so no need to check HCI_LIMITED_DISCOVERABLE.
 		 */
 		if (!!cp->val != hci_dev_test_flag(hdev, HCI_DISCOVERABLE)) {
-			change_bit(HCI_DISCOVERABLE, &hdev->dev_flags);
+			hci_dev_change_flag(hdev, HCI_DISCOVERABLE);
 			changed = true;
 		}
 
@@ -2046,7 +2046,7 @@ static int set_link_security(struct sock *sk, struct hci_dev *hdev, void *data,
 		bool changed = false;
 
 		if (!!cp->val != hci_dev_test_flag(hdev, HCI_LINK_SECURITY)) {
-			change_bit(HCI_LINK_SECURITY, &hdev->dev_flags);
+			hci_dev_change_flag(hdev, HCI_LINK_SECURITY);
 			changed = true;
 		}
 
@@ -2301,7 +2301,7 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 		bool changed = false;
 
 		if (val != hci_dev_test_flag(hdev, HCI_LE_ENABLED)) {
-			change_bit(HCI_LE_ENABLED, &hdev->dev_flags);
+			hci_dev_change_flag(hdev, HCI_LE_ENABLED);
 			changed = true;
 		}
 
@@ -4689,7 +4689,7 @@ static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev,
 	}
 
 	if (!hdev_is_powered(hdev)) {
-		change_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags);
+		hci_dev_change_flag(hdev, HCI_FAST_CONNECTABLE);
 		err = send_settings_rsp(sk, MGMT_OP_SET_FAST_CONNECTABLE,
 					hdev);
 		new_settings(hdev, sk);
@@ -4789,7 +4789,7 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 			hci_dev_clear_flag(hdev, HCI_HS_ENABLED);
 		}
 
-		change_bit(HCI_BREDR_ENABLED, &hdev->dev_flags);
+		hci_dev_change_flag(hdev, HCI_BREDR_ENABLED);
 
 		err = send_settings_rsp(sk, MGMT_OP_SET_BREDR, hdev);
 		if (err < 0)
-- 
cgit v1.2.3


From 516018a9c057a7c179dd6b4df917a6f5d43b3547 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Fri, 13 Mar 2015 02:11:04 -0700
Subject: Bluetooth: Introduce hci_dev_test_and_change_flag helper macro

Instead of manually coding test_and_change_bit on hdev->dev_flags all the
time, use hci_dev_test_and_change_flag helper macro.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h | 2 ++
 net/bluetooth/mgmt.c             | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 92b2148702e6..cd2682c64a4c 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -507,6 +507,8 @@ extern struct mutex hci_cb_list_lock;
 #define hci_dev_change_flag(hdev, nr) change_bit((nr), &(hdev)->dev_flags)
 #define hci_dev_test_flag(hdev, nr)   test_bit((nr), &(hdev)->dev_flags)
 
+#define hci_dev_test_and_change_flag(hdev, nr) test_and_change_bit((nr), &(hdev)->dev_flags)
+
 /* ----- HCI interface to upper protocols ----- */
 int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr);
 int l2cap_disconn_ind(struct hci_conn *hcon);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index c3af3b87dbb5..3c579a45cff0 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -6102,7 +6102,7 @@ static int set_external_config(struct sock *sk, struct hci_dev *hdev,
 	if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED) == is_configured(hdev)) {
 		mgmt_index_removed(hdev);
 
-		if (test_and_change_bit(HCI_UNCONFIGURED, &hdev->dev_flags)) {
+		if (hci_dev_test_and_change_flag(hdev, HCI_UNCONFIGURED)) {
 			hci_dev_set_flag(hdev, HCI_CONFIG);
 			hci_dev_set_flag(hdev, HCI_AUTO_OFF);
 
-- 
cgit v1.2.3


From a69d89272698d1c31ccb78348562af6461cf1eb7 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Fri, 13 Mar 2015 02:11:05 -0700
Subject: Bluetooth: Introduce hci_dev_test_and_clear_flag helper macro

Instead of manually coding test_and_clear_bit on hdev->dev_flags all the
time, use hci_dev_test_and_clear_flag helper macro.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h |  1 +
 net/bluetooth/hci_core.c         | 20 ++++++------
 net/bluetooth/hci_event.c        |  3 +-
 net/bluetooth/hci_request.c      |  2 +-
 net/bluetooth/mgmt.c             | 66 +++++++++++++++++++---------------------
 5 files changed, 44 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index cd2682c64a4c..4b7632b15051 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -507,6 +507,7 @@ extern struct mutex hci_cb_list_lock;
 #define hci_dev_change_flag(hdev, nr) change_bit((nr), &(hdev)->dev_flags)
 #define hci_dev_test_flag(hdev, nr)   test_bit((nr), &(hdev)->dev_flags)
 
+#define hci_dev_test_and_clear_flag(hdev, nr)  test_and_clear_bit((nr), &(hdev)->dev_flags)
 #define hci_dev_test_and_change_flag(hdev, nr) test_and_change_bit((nr), &(hdev)->dev_flags)
 
 /* ----- HCI interface to upper protocols ----- */
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 01710698e547..85d5222c70ae 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1554,7 +1554,7 @@ int hci_dev_open(__u16 dev)
 	 * particularly important if the setup procedure has not yet
 	 * completed.
 	 */
-	if (test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags))
+	if (hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF))
 		cancel_delayed_work(&hdev->power_off);
 
 	/* After this call it is guaranteed that the setup procedure
@@ -1629,7 +1629,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 		hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE);
 	}
 
-	if (test_and_clear_bit(HCI_SERVICE_CACHE, &hdev->dev_flags))
+	if (hci_dev_test_and_clear_flag(hdev, HCI_SERVICE_CACHE))
 		cancel_delayed_work(&hdev->service_cache);
 
 	cancel_delayed_work_sync(&hdev->le_scan_disable);
@@ -1647,7 +1647,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 
 	hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
 
-	if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) {
+	if (!hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF)) {
 		if (hdev->dev_type == HCI_BREDR)
 			mgmt_powered(hdev, 0);
 	}
@@ -1728,7 +1728,7 @@ int hci_dev_close(__u16 dev)
 		goto done;
 	}
 
-	if (test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags))
+	if (hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF))
 		cancel_delayed_work(&hdev->power_off);
 
 	err = hci_dev_do_close(hdev);
@@ -1839,16 +1839,16 @@ static void hci_update_scan_state(struct hci_dev *hdev, u8 scan)
 		conn_changed = !test_and_set_bit(HCI_CONNECTABLE,
 						 &hdev->dev_flags);
 	else
-		conn_changed = test_and_clear_bit(HCI_CONNECTABLE,
-						  &hdev->dev_flags);
+		conn_changed = hci_dev_test_and_clear_flag(hdev,
+							   HCI_CONNECTABLE);
 
 	if ((scan & SCAN_INQUIRY)) {
 		discov_changed = !test_and_set_bit(HCI_DISCOVERABLE,
 						   &hdev->dev_flags);
 	} else {
 		hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE);
-		discov_changed = test_and_clear_bit(HCI_DISCOVERABLE,
-						    &hdev->dev_flags);
+		discov_changed = hci_dev_test_and_clear_flag(hdev,
+							     HCI_DISCOVERABLE);
 	}
 
 	if (!hci_dev_test_flag(hdev, HCI_MGMT))
@@ -2128,7 +2128,7 @@ static void hci_power_on(struct work_struct *work)
 				   HCI_AUTO_OFF_TIMEOUT);
 	}
 
-	if (test_and_clear_bit(HCI_SETUP, &hdev->dev_flags)) {
+	if (hci_dev_test_and_clear_flag(hdev, HCI_SETUP)) {
 		/* For unconfigured devices, set the HCI_RAW flag
 		 * so that userspace can easily identify them.
 		 */
@@ -2143,7 +2143,7 @@ static void hci_power_on(struct work_struct *work)
 		 * and no event will be send.
 		 */
 		mgmt_index_added(hdev);
-	} else if (test_and_clear_bit(HCI_CONFIG, &hdev->dev_flags)) {
+	} else if (hci_dev_test_and_clear_flag(hdev, HCI_CONFIG)) {
 		/* When the controller is now configured, then it
 		 * is important to clear the HCI_RAW flag.
 		 */
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 808b78cf8ad0..4958b24ae5c7 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1226,8 +1226,7 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
 		 * been disabled because of active scanning, so
 		 * re-enable it again if necessary.
 		 */
-		if (test_and_clear_bit(HCI_LE_SCAN_INTERRUPTED,
-				       &hdev->dev_flags))
+		if (hci_dev_test_and_clear_flag(hdev, HCI_LE_SCAN_INTERRUPTED))
 			hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
 		else if (!hci_dev_test_flag(hdev, HCI_LE_ADV) &&
 			 hdev->discovery.state == DISCOVERY_FINDING)
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index fd7b2a97740b..e85f9ec9f73a 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -329,7 +329,7 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
 
 		*own_addr_type = ADDR_LE_DEV_RANDOM;
 
-		if (!test_and_clear_bit(HCI_RPA_EXPIRED, &hdev->dev_flags) &&
+		if (!hci_dev_test_and_clear_flag(hdev, HCI_RPA_EXPIRED) &&
 		    !bacmp(&hdev->random_addr, &hdev->rpa))
 			return 0;
 
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 3c579a45cff0..cc5c04728848 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -1140,7 +1140,7 @@ static void service_cache_off(struct work_struct *work)
 					    service_cache.work);
 	struct hci_request req;
 
-	if (!test_and_clear_bit(HCI_SERVICE_CACHE, &hdev->dev_flags))
+	if (!hci_dev_test_and_clear_flag(hdev, HCI_SERVICE_CACHE))
 		return;
 
 	hci_req_init(&req, hdev);
@@ -1419,7 +1419,7 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data,
 		goto failed;
 	}
 
-	if (test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) {
+	if (hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF)) {
 		cancel_delayed_work(&hdev->power_off);
 
 		if (cp->val) {
@@ -1588,8 +1588,7 @@ static void set_discoverable_complete(struct hci_dev *hdev, u8 status,
 					   to);
 		}
 	} else {
-		changed = test_and_clear_bit(HCI_DISCOVERABLE,
-					     &hdev->dev_flags);
+		changed = hci_dev_test_and_clear_flag(hdev, HCI_DISCOVERABLE);
 	}
 
 	send_settings_rsp(cmd->sk, MGMT_OP_SET_DISCOVERABLE, hdev);
@@ -1841,10 +1840,10 @@ static void set_connectable_complete(struct hci_dev *hdev, u8 status,
 						 &hdev->dev_flags);
 		discov_changed = false;
 	} else {
-		conn_changed = test_and_clear_bit(HCI_CONNECTABLE,
-						  &hdev->dev_flags);
-		discov_changed = test_and_clear_bit(HCI_DISCOVERABLE,
-						    &hdev->dev_flags);
+		conn_changed = hci_dev_test_and_clear_flag(hdev,
+							   HCI_CONNECTABLE);
+		discov_changed = hci_dev_test_and_clear_flag(hdev,
+							     HCI_DISCOVERABLE);
 	}
 
 	send_settings_rsp(cmd->sk, MGMT_OP_SET_CONNECTABLE, hdev);
@@ -2007,7 +2006,7 @@ static int set_bondable(struct sock *sk, struct hci_dev *hdev, void *data,
 	if (cp->val)
 		changed = !test_and_set_bit(HCI_BONDABLE, &hdev->dev_flags);
 	else
-		changed = test_and_clear_bit(HCI_BONDABLE, &hdev->dev_flags);
+		changed = hci_dev_test_and_clear_flag(hdev, HCI_BONDABLE);
 
 	err = send_settings_rsp(sk, MGMT_OP_SET_BONDABLE, hdev);
 	if (err < 0)
@@ -2120,11 +2119,11 @@ static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 			changed = !test_and_set_bit(HCI_SSP_ENABLED,
 						    &hdev->dev_flags);
 		} else {
-			changed = test_and_clear_bit(HCI_SSP_ENABLED,
-						     &hdev->dev_flags);
+			changed = hci_dev_test_and_clear_flag(hdev,
+							      HCI_SSP_ENABLED);
 			if (!changed)
-				changed = test_and_clear_bit(HCI_HS_ENABLED,
-							     &hdev->dev_flags);
+				changed = hci_dev_test_and_clear_flag(hdev,
+								      HCI_HS_ENABLED);
 			else
 				hci_dev_clear_flag(hdev, HCI_HS_ENABLED);
 		}
@@ -2213,7 +2212,7 @@ static int set_hs(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 			goto unlock;
 		}
 
-		changed = test_and_clear_bit(HCI_HS_ENABLED, &hdev->dev_flags);
+		changed = hci_dev_test_and_clear_flag(hdev, HCI_HS_ENABLED);
 	}
 
 	err = send_settings_rsp(sk, MGMT_OP_SET_HS, hdev);
@@ -2629,7 +2628,7 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data,
 
 	hci_req_init(&req, hdev);
 
-	if (test_and_clear_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) {
+	if (hci_dev_test_and_clear_flag(hdev, HCI_SERVICE_CACHE)) {
 		hci_dev_unlock(hdev);
 		cancel_delayed_work_sync(&hdev->service_cache);
 		hci_dev_lock(hdev);
@@ -2718,8 +2717,8 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data,
 		changed = !test_and_set_bit(HCI_KEEP_DEBUG_KEYS,
 					    &hdev->dev_flags);
 	else
-		changed = test_and_clear_bit(HCI_KEEP_DEBUG_KEYS,
-					     &hdev->dev_flags);
+		changed = hci_dev_test_and_clear_flag(hdev,
+						      HCI_KEEP_DEBUG_KEYS);
 
 	if (changed)
 		new_settings(hdev, NULL);
@@ -4471,8 +4470,7 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data,
 			else
 				hci_dev_clear_flag(hdev, HCI_ADVERTISING_CONNECTABLE);
 		} else {
-			changed = test_and_clear_bit(HCI_ADVERTISING,
-						     &hdev->dev_flags);
+			changed = hci_dev_test_and_clear_flag(hdev, HCI_ADVERTISING);
 			hci_dev_clear_flag(hdev, HCI_ADVERTISING_CONNECTABLE);
 		}
 
@@ -4949,8 +4947,8 @@ static int set_secure_conn(struct sock *sk, struct hci_dev *hdev,
 			else
 				hci_dev_clear_flag(hdev, HCI_SC_ONLY);
 		} else {
-			changed = test_and_clear_bit(HCI_SC_ENABLED,
-						     &hdev->dev_flags);
+			changed = hci_dev_test_and_clear_flag(hdev,
+							      HCI_SC_ENABLED);
 			hci_dev_clear_flag(hdev, HCI_SC_ONLY);
 		}
 
@@ -5016,15 +5014,15 @@ static int set_debug_keys(struct sock *sk, struct hci_dev *hdev,
 		changed = !test_and_set_bit(HCI_KEEP_DEBUG_KEYS,
 					    &hdev->dev_flags);
 	else
-		changed = test_and_clear_bit(HCI_KEEP_DEBUG_KEYS,
-					     &hdev->dev_flags);
+		changed = hci_dev_test_and_clear_flag(hdev,
+						      HCI_KEEP_DEBUG_KEYS);
 
 	if (cp->val == 0x02)
 		use_changed = !test_and_set_bit(HCI_USE_DEBUG_KEYS,
 						&hdev->dev_flags);
 	else
-		use_changed = test_and_clear_bit(HCI_USE_DEBUG_KEYS,
-						 &hdev->dev_flags);
+		use_changed = hci_dev_test_and_clear_flag(hdev,
+							  HCI_USE_DEBUG_KEYS);
 
 	if (hdev_is_powered(hdev) && use_changed &&
 	    hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) {
@@ -5078,7 +5076,7 @@ static int set_privacy(struct sock *sk, struct hci_dev *hdev, void *cp_data,
 		memcpy(hdev->irk, cp->irk, sizeof(hdev->irk));
 		hci_dev_set_flag(hdev, HCI_RPA_EXPIRED);
 	} else {
-		changed = test_and_clear_bit(HCI_PRIVACY, &hdev->dev_flags);
+		changed = hci_dev_test_and_clear_flag(hdev, HCI_PRIVACY);
 		memset(hdev->irk, 0, sizeof(hdev->irk));
 		hci_dev_clear_flag(hdev, HCI_RPA_EXPIRED);
 	}
@@ -6087,8 +6085,7 @@ static int set_external_config(struct sock *sk, struct hci_dev *hdev,
 		changed = !test_and_set_bit(HCI_EXT_CONFIGURED,
 					    &hdev->dev_flags);
 	else
-		changed = test_and_clear_bit(HCI_EXT_CONFIGURED,
-					     &hdev->dev_flags);
+		changed = hci_dev_test_and_clear_flag(hdev, HCI_EXT_CONFIGURED);
 
 	err = send_options_rsp(sk, MGMT_OP_SET_EXTERNAL_CONFIG, hdev);
 	if (err < 0)
@@ -7096,8 +7093,7 @@ void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status)
 		changed = !test_and_set_bit(HCI_LINK_SECURITY,
 					    &hdev->dev_flags);
 	else
-		changed = test_and_clear_bit(HCI_LINK_SECURITY,
-					     &hdev->dev_flags);
+		changed = hci_dev_test_and_clear_flag(hdev, HCI_LINK_SECURITY);
 
 	mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, settings_rsp,
 			     &match);
@@ -7133,8 +7129,8 @@ void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status)
 	if (status) {
 		u8 mgmt_err = mgmt_status(status);
 
-		if (enable && test_and_clear_bit(HCI_SSP_ENABLED,
-						 &hdev->dev_flags)) {
+		if (enable && hci_dev_test_and_clear_flag(hdev,
+							  HCI_SSP_ENABLED)) {
 			hci_dev_clear_flag(hdev, HCI_HS_ENABLED);
 			new_settings(hdev, NULL);
 		}
@@ -7147,10 +7143,10 @@ void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status)
 	if (enable) {
 		changed = !test_and_set_bit(HCI_SSP_ENABLED, &hdev->dev_flags);
 	} else {
-		changed = test_and_clear_bit(HCI_SSP_ENABLED, &hdev->dev_flags);
+		changed = hci_dev_test_and_clear_flag(hdev, HCI_SSP_ENABLED);
 		if (!changed)
-			changed = test_and_clear_bit(HCI_HS_ENABLED,
-						     &hdev->dev_flags);
+			changed = hci_dev_test_and_clear_flag(hdev,
+							      HCI_HS_ENABLED);
 		else
 			hci_dev_clear_flag(hdev, HCI_HS_ENABLED);
 	}
-- 
cgit v1.2.3


From 238be788fcb75870661ec165dc90f2a2674e7fcb Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Fri, 13 Mar 2015 02:11:06 -0700
Subject: Bluetooth: Introduce hci_dev_test_and_set_flag helper macro

Instead of manually coding test_and_set_bit on hdev->dev_flags all the
time, use hci_dev_test_and_set_flag helper macro.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h |  1 +
 net/bluetooth/hci_core.c         |  8 +++----
 net/bluetooth/hci_sock.c         |  2 +-
 net/bluetooth/mgmt.c             | 46 +++++++++++++++++-----------------------
 4 files changed, 26 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 4b7632b15051..6db1333a114f 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -507,6 +507,7 @@ extern struct mutex hci_cb_list_lock;
 #define hci_dev_change_flag(hdev, nr) change_bit((nr), &(hdev)->dev_flags)
 #define hci_dev_test_flag(hdev, nr)   test_bit((nr), &(hdev)->dev_flags)
 
+#define hci_dev_test_and_set_flag(hdev, nr)    test_and_set_bit((nr), &(hdev)->dev_flags)
 #define hci_dev_test_and_clear_flag(hdev, nr)  test_and_clear_bit((nr), &(hdev)->dev_flags)
 #define hci_dev_test_and_change_flag(hdev, nr) test_and_change_bit((nr), &(hdev)->dev_flags)
 
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 85d5222c70ae..c6ed46c4f45a 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1836,15 +1836,15 @@ static void hci_update_scan_state(struct hci_dev *hdev, u8 scan)
 	BT_DBG("%s scan 0x%02x", hdev->name, scan);
 
 	if ((scan & SCAN_PAGE))
-		conn_changed = !test_and_set_bit(HCI_CONNECTABLE,
-						 &hdev->dev_flags);
+		conn_changed = !hci_dev_test_and_set_flag(hdev,
+							  HCI_CONNECTABLE);
 	else
 		conn_changed = hci_dev_test_and_clear_flag(hdev,
 							   HCI_CONNECTABLE);
 
 	if ((scan & SCAN_INQUIRY)) {
-		discov_changed = !test_and_set_bit(HCI_DISCOVERABLE,
-						   &hdev->dev_flags);
+		discov_changed = !hci_dev_test_and_set_flag(hdev,
+							    HCI_DISCOVERABLE);
 	} else {
 		hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE);
 		discov_changed = hci_dev_test_and_clear_flag(hdev,
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index ca402a0a6740..b614543b4fe3 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -720,7 +720,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
 			goto done;
 		}
 
-		if (test_and_set_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) {
+		if (hci_dev_test_and_set_flag(hdev, HCI_USER_CHANNEL)) {
 			err = -EUSERS;
 			hci_dev_put(hdev);
 			goto done;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index cc5c04728848..d97719d04be0 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -1178,7 +1178,7 @@ static void rpa_expired(struct work_struct *work)
 
 static void mgmt_init_hdev(struct sock *sk, struct hci_dev *hdev)
 {
-	if (test_and_set_bit(HCI_MGMT, &hdev->dev_flags))
+	if (hci_dev_test_and_set_flag(hdev, HCI_MGMT))
 		return;
 
 	INIT_DELAYED_WORK(&hdev->service_cache, service_cache_off);
@@ -1579,8 +1579,7 @@ static void set_discoverable_complete(struct hci_dev *hdev, u8 status,
 
 	cp = cmd->param;
 	if (cp->val) {
-		changed = !test_and_set_bit(HCI_DISCOVERABLE,
-					    &hdev->dev_flags);
+		changed = !hci_dev_test_and_set_flag(hdev, HCI_DISCOVERABLE);
 
 		if (hdev->discov_timeout > 0) {
 			int to = msecs_to_jiffies(hdev->discov_timeout * 1000);
@@ -1836,8 +1835,8 @@ static void set_connectable_complete(struct hci_dev *hdev, u8 status,
 
 	cp = cmd->param;
 	if (cp->val) {
-		conn_changed = !test_and_set_bit(HCI_CONNECTABLE,
-						 &hdev->dev_flags);
+		conn_changed = !hci_dev_test_and_set_flag(hdev,
+							  HCI_CONNECTABLE);
 		discov_changed = false;
 	} else {
 		conn_changed = hci_dev_test_and_clear_flag(hdev,
@@ -2004,7 +2003,7 @@ static int set_bondable(struct sock *sk, struct hci_dev *hdev, void *data,
 	hci_dev_lock(hdev);
 
 	if (cp->val)
-		changed = !test_and_set_bit(HCI_BONDABLE, &hdev->dev_flags);
+		changed = !hci_dev_test_and_set_flag(hdev, HCI_BONDABLE);
 	else
 		changed = hci_dev_test_and_clear_flag(hdev, HCI_BONDABLE);
 
@@ -2116,8 +2115,8 @@ static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 		bool changed;
 
 		if (cp->val) {
-			changed = !test_and_set_bit(HCI_SSP_ENABLED,
-						    &hdev->dev_flags);
+			changed = !hci_dev_test_and_set_flag(hdev,
+							     HCI_SSP_ENABLED);
 		} else {
 			changed = hci_dev_test_and_clear_flag(hdev,
 							      HCI_SSP_ENABLED);
@@ -2204,7 +2203,7 @@ static int set_hs(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 	}
 
 	if (cp->val) {
-		changed = !test_and_set_bit(HCI_HS_ENABLED, &hdev->dev_flags);
+		changed = !hci_dev_test_and_set_flag(hdev, HCI_HS_ENABLED);
 	} else {
 		if (hdev_is_powered(hdev)) {
 			err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS,
@@ -2487,7 +2486,7 @@ static bool enable_service_cache(struct hci_dev *hdev)
 	if (!hdev_is_powered(hdev))
 		return false;
 
-	if (!test_and_set_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) {
+	if (!hci_dev_test_and_set_flag(hdev, HCI_SERVICE_CACHE)) {
 		queue_delayed_work(hdev->workqueue, &hdev->service_cache,
 				   CACHE_TIMEOUT);
 		return true;
@@ -2714,8 +2713,7 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data,
 	hci_link_keys_clear(hdev);
 
 	if (cp->debug_keys)
-		changed = !test_and_set_bit(HCI_KEEP_DEBUG_KEYS,
-					    &hdev->dev_flags);
+		changed = !hci_dev_test_and_set_flag(hdev, HCI_KEEP_DEBUG_KEYS);
 	else
 		changed = hci_dev_test_and_clear_flag(hdev,
 						      HCI_KEEP_DEBUG_KEYS);
@@ -4463,8 +4461,7 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data,
 		bool changed;
 
 		if (cp->val) {
-			changed = !test_and_set_bit(HCI_ADVERTISING,
-						    &hdev->dev_flags);
+			changed = !hci_dev_test_and_set_flag(hdev, HCI_ADVERTISING);
 			if (cp->val == 0x02)
 				hci_dev_set_flag(hdev, HCI_ADVERTISING_CONNECTABLE);
 			else
@@ -4940,8 +4937,8 @@ static int set_secure_conn(struct sock *sk, struct hci_dev *hdev,
 		bool changed;
 
 		if (cp->val) {
-			changed = !test_and_set_bit(HCI_SC_ENABLED,
-						    &hdev->dev_flags);
+			changed = !hci_dev_test_and_set_flag(hdev,
+							     HCI_SC_ENABLED);
 			if (cp->val == 0x02)
 				hci_dev_set_flag(hdev, HCI_SC_ONLY);
 			else
@@ -5011,15 +5008,14 @@ static int set_debug_keys(struct sock *sk, struct hci_dev *hdev,
 	hci_dev_lock(hdev);
 
 	if (cp->val)
-		changed = !test_and_set_bit(HCI_KEEP_DEBUG_KEYS,
-					    &hdev->dev_flags);
+		changed = !hci_dev_test_and_set_flag(hdev, HCI_KEEP_DEBUG_KEYS);
 	else
 		changed = hci_dev_test_and_clear_flag(hdev,
 						      HCI_KEEP_DEBUG_KEYS);
 
 	if (cp->val == 0x02)
-		use_changed = !test_and_set_bit(HCI_USE_DEBUG_KEYS,
-						&hdev->dev_flags);
+		use_changed = !hci_dev_test_and_set_flag(hdev,
+							 HCI_USE_DEBUG_KEYS);
 	else
 		use_changed = hci_dev_test_and_clear_flag(hdev,
 							  HCI_USE_DEBUG_KEYS);
@@ -5072,7 +5068,7 @@ static int set_privacy(struct sock *sk, struct hci_dev *hdev, void *cp_data,
 	hci_dev_set_flag(hdev, HCI_RPA_RESOLVING);
 
 	if (cp->privacy) {
-		changed = !test_and_set_bit(HCI_PRIVACY, &hdev->dev_flags);
+		changed = !hci_dev_test_and_set_flag(hdev, HCI_PRIVACY);
 		memcpy(hdev->irk, cp->irk, sizeof(hdev->irk));
 		hci_dev_set_flag(hdev, HCI_RPA_EXPIRED);
 	} else {
@@ -6082,8 +6078,7 @@ static int set_external_config(struct sock *sk, struct hci_dev *hdev,
 	hci_dev_lock(hdev);
 
 	if (cp->config)
-		changed = !test_and_set_bit(HCI_EXT_CONFIGURED,
-					    &hdev->dev_flags);
+		changed = !hci_dev_test_and_set_flag(hdev, HCI_EXT_CONFIGURED);
 	else
 		changed = hci_dev_test_and_clear_flag(hdev, HCI_EXT_CONFIGURED);
 
@@ -7090,8 +7085,7 @@ void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status)
 	}
 
 	if (test_bit(HCI_AUTH, &hdev->flags))
-		changed = !test_and_set_bit(HCI_LINK_SECURITY,
-					    &hdev->dev_flags);
+		changed = !hci_dev_test_and_set_flag(hdev, HCI_LINK_SECURITY);
 	else
 		changed = hci_dev_test_and_clear_flag(hdev, HCI_LINK_SECURITY);
 
@@ -7141,7 +7135,7 @@ void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status)
 	}
 
 	if (enable) {
-		changed = !test_and_set_bit(HCI_SSP_ENABLED, &hdev->dev_flags);
+		changed = !hci_dev_test_and_set_flag(hdev, HCI_SSP_ENABLED);
 	} else {
 		changed = hci_dev_test_and_clear_flag(hdev, HCI_SSP_ENABLED);
 		if (!changed)
-- 
cgit v1.2.3


From 9c51026509d7fd11d84e0035008e1a9768960f2b Mon Sep 17 00:00:00 2001
From: Syed Asifful Dayyan <syedd@broadcom.com>
Date: Fri, 6 Mar 2015 18:40:42 +0100
Subject: brcmfmac: Add support for BCM4345 SDIO chipset.

These changes add support for BCM4345 SDIO chipset.

Reviewed-by: Pieter-Paul Giesberts <pieterpg@broadcom.com>
Reviewed-by: Arend Van Spriel <arend@broadcom.com>
Reviewed-by: Hante Meuleman <meuleman@broadcom.com>
Reviewed-by: Daniel (Deognyoun) Kim <dekim@broadcom.com>
Signed-off-by: Syed Asifful Dayyan <syedd@broadcom.com>
Signed-off-by: Arend van Spriel <arend@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c     | 1 +
 drivers/net/wireless/brcm80211/brcmfmac/chip.c       | 4 ++++
 drivers/net/wireless/brcm80211/brcmfmac/sdio.c       | 5 +++++
 drivers/net/wireless/brcm80211/include/brcm_hw_ids.h | 1 +
 include/linux/mmc/sdio_ids.h                         | 1 +
 5 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c
index ffb0e2d382ea..43995308307d 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c
@@ -1096,6 +1096,7 @@ static const struct sdio_device_id brcmf_sdmmc_ids[] = {
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43341),
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43362),
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4335_4339),
+	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4345),
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4354),
 	{ /* end: all zeroes */ }
 };
diff --git a/drivers/net/wireless/brcm80211/brcmfmac/chip.c b/drivers/net/wireless/brcm80211/brcmfmac/chip.c
index 04d2ca0d87d6..e679edca081d 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/chip.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/chip.c
@@ -491,6 +491,10 @@ static void brcmf_chip_get_raminfo(struct brcmf_chip_priv *ci)
 	case BRCM_CC_43362_CHIP_ID:
 		ci->pub.ramsize = 0x3c000;
 		break;
+	case BRCM_CC_4345_CHIP_ID:
+		ci->pub.ramsize = 0xc8000;
+		ci->pub.rambase = 0x198000;
+		break;
 	case BRCM_CC_4339_CHIP_ID:
 	case BRCM_CC_4354_CHIP_ID:
 	case BRCM_CC_4356_CHIP_ID:
diff --git a/drivers/net/wireless/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/brcm80211/brcmfmac/sdio.c
index c9a9ff191616..973fa2afc2fc 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/sdio.c
@@ -617,6 +617,8 @@ static const struct sdiod_drive_str sdiod_drvstr_tab2_3v3[] = {
 #define BCM43362_NVRAM_NAME		"brcm/brcmfmac43362-sdio.txt"
 #define BCM4339_FIRMWARE_NAME		"brcm/brcmfmac4339-sdio.bin"
 #define BCM4339_NVRAM_NAME		"brcm/brcmfmac4339-sdio.txt"
+#define BCM4345_FIRMWARE_NAME		"brcm/brcmfmac4345-sdio.bin"
+#define BCM4345_NVRAM_NAME		"brcm/brcmfmac4345-sdio.txt"
 #define BCM4354_FIRMWARE_NAME		"brcm/brcmfmac4354-sdio.bin"
 #define BCM4354_NVRAM_NAME		"brcm/brcmfmac4354-sdio.txt"
 
@@ -640,6 +642,8 @@ MODULE_FIRMWARE(BCM43362_FIRMWARE_NAME);
 MODULE_FIRMWARE(BCM43362_NVRAM_NAME);
 MODULE_FIRMWARE(BCM4339_FIRMWARE_NAME);
 MODULE_FIRMWARE(BCM4339_NVRAM_NAME);
+MODULE_FIRMWARE(BCM4345_FIRMWARE_NAME);
+MODULE_FIRMWARE(BCM4345_NVRAM_NAME);
 MODULE_FIRMWARE(BCM4354_FIRMWARE_NAME);
 MODULE_FIRMWARE(BCM4354_NVRAM_NAME);
 
@@ -669,6 +673,7 @@ static const struct brcmf_firmware_names brcmf_fwname_data[] = {
 	{ BRCM_CC_4335_CHIP_ID, 0xFFFFFFFF, BRCMF_FIRMWARE_NVRAM(BCM4335) },
 	{ BRCM_CC_43362_CHIP_ID, 0xFFFFFFFE, BRCMF_FIRMWARE_NVRAM(BCM43362) },
 	{ BRCM_CC_4339_CHIP_ID, 0xFFFFFFFF, BRCMF_FIRMWARE_NVRAM(BCM4339) },
+	{ BRCM_CC_4345_CHIP_ID, 0xFFFFFFFF, BRCMF_FIRMWARE_NVRAM(BCM4345) },
 	{ BRCM_CC_4354_CHIP_ID, 0xFFFFFFFF, BRCMF_FIRMWARE_NVRAM(BCM4354) }
 };
 
diff --git a/drivers/net/wireless/brcm80211/include/brcm_hw_ids.h b/drivers/net/wireless/brcm80211/include/brcm_hw_ids.h
index 2124a17d0bfd..b599e7e41148 100644
--- a/drivers/net/wireless/brcm80211/include/brcm_hw_ids.h
+++ b/drivers/net/wireless/brcm80211/include/brcm_hw_ids.h
@@ -37,6 +37,7 @@
 #define BRCM_CC_43362_CHIP_ID		43362
 #define BRCM_CC_4335_CHIP_ID		0x4335
 #define BRCM_CC_4339_CHIP_ID		0x4339
+#define BRCM_CC_4345_CHIP_ID		0x4345
 #define BRCM_CC_4354_CHIP_ID		0x4354
 #define BRCM_CC_4356_CHIP_ID		0x4356
 #define BRCM_CC_43566_CHIP_ID		43566
diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h
index 996807963716..91397858dc95 100644
--- a/include/linux/mmc/sdio_ids.h
+++ b/include/linux/mmc/sdio_ids.h
@@ -33,6 +33,7 @@
 #define SDIO_DEVICE_ID_BROADCOM_43341		0xa94d
 #define SDIO_DEVICE_ID_BROADCOM_4335_4339	0x4335
 #define SDIO_DEVICE_ID_BROADCOM_43362		0xa962
+#define SDIO_DEVICE_ID_BROADCOM_4345		0x4345
 #define SDIO_DEVICE_ID_BROADCOM_4354		0x4354
 
 #define SDIO_VENDOR_ID_INTEL			0x0089
-- 
cgit v1.2.3


From 702131e2a393b45174be326f1dbe20b658b4f157 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Thu, 5 Mar 2015 18:25:10 +0100
Subject: bcma: move PCI IRQ control function to host specific code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This function isn't really related to any bus core. It touches PCI
device config registers only, so move it to the (PCI) host file.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/bcma/driver_pci.c                      | 33 -------------------------
 drivers/bcma/host_pci.c                        | 34 ++++++++++++++++++++++++++
 drivers/net/wireless/b43/main.c                |  2 +-
 drivers/net/wireless/brcm80211/brcmsmac/main.c |  2 +-
 include/linux/bcma/bcma.h                      |  9 +++++++
 include/linux/bcma/bcma_driver_pci.h           |  2 --
 6 files changed, 45 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/drivers/bcma/driver_pci.c b/drivers/bcma/driver_pci.c
index cfd35bc1c5a3..f499a469e66d 100644
--- a/drivers/bcma/driver_pci.c
+++ b/drivers/bcma/driver_pci.c
@@ -282,39 +282,6 @@ void bcma_core_pci_power_save(struct bcma_bus *bus, bool up)
 }
 EXPORT_SYMBOL_GPL(bcma_core_pci_power_save);
 
-int bcma_core_pci_irq_ctl(struct bcma_bus *bus, struct bcma_device *core,
-			  bool enable)
-{
-	struct pci_dev *pdev;
-	u32 coremask, tmp;
-	int err = 0;
-
-	if (bus->hosttype != BCMA_HOSTTYPE_PCI) {
-		/* This bcma device is not on a PCI host-bus. So the IRQs are
-		 * not routed through the PCI core.
-		 * So we must not enable routing through the PCI core. */
-		goto out;
-	}
-
-	pdev = bus->host_pci;
-
-	err = pci_read_config_dword(pdev, BCMA_PCI_IRQMASK, &tmp);
-	if (err)
-		goto out;
-
-	coremask = BIT(core->core_index) << 8;
-	if (enable)
-		tmp |= coremask;
-	else
-		tmp &= ~coremask;
-
-	err = pci_write_config_dword(pdev, BCMA_PCI_IRQMASK, tmp);
-
-out:
-	return err;
-}
-EXPORT_SYMBOL_GPL(bcma_core_pci_irq_ctl);
-
 static void bcma_core_pci_extend_L1timer(struct bcma_drv_pci *pc, bool extend)
 {
 	u32 w;
diff --git a/drivers/bcma/host_pci.c b/drivers/bcma/host_pci.c
index a62a2f9091f5..0856189c065f 100644
--- a/drivers/bcma/host_pci.c
+++ b/drivers/bcma/host_pci.c
@@ -351,3 +351,37 @@ void bcma_host_pci_down(struct bcma_bus *bus)
 		bcma_core_pci_down(&bus->drv_pci[0]);
 }
 EXPORT_SYMBOL_GPL(bcma_host_pci_down);
+
+/* See also si_pci_setup */
+int bcma_host_pci_irq_ctl(struct bcma_bus *bus, struct bcma_device *core,
+			  bool enable)
+{
+	struct pci_dev *pdev;
+	u32 coremask, tmp;
+	int err = 0;
+
+	if (bus->hosttype != BCMA_HOSTTYPE_PCI) {
+		/* This bcma device is not on a PCI host-bus. So the IRQs are
+		 * not routed through the PCI core.
+		 * So we must not enable routing through the PCI core. */
+		goto out;
+	}
+
+	pdev = bus->host_pci;
+
+	err = pci_read_config_dword(pdev, BCMA_PCI_IRQMASK, &tmp);
+	if (err)
+		goto out;
+
+	coremask = BIT(core->core_index) << 8;
+	if (enable)
+		tmp |= coremask;
+	else
+		tmp &= ~coremask;
+
+	err = pci_write_config_dword(pdev, BCMA_PCI_IRQMASK, tmp);
+
+out:
+	return err;
+}
+EXPORT_SYMBOL_GPL(bcma_host_pci_irq_ctl);
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index ac99798570e8..998490a7b167 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -4866,7 +4866,7 @@ static int b43_wireless_core_init(struct b43_wldev *dev)
 	switch (dev->dev->bus_type) {
 #ifdef CONFIG_B43_BCMA
 	case B43_BUS_BCMA:
-		bcma_core_pci_irq_ctl(dev->dev->bdev->bus,
+		bcma_host_pci_irq_ctl(dev->dev->bdev->bus,
 				      dev->dev->bdev, true);
 		bcma_host_pci_up(dev->dev->bdev->bus);
 		break;
diff --git a/drivers/net/wireless/brcm80211/brcmsmac/main.c b/drivers/net/wireless/brcm80211/brcmsmac/main.c
index c84af1dfc88f..369527e27689 100644
--- a/drivers/net/wireless/brcm80211/brcmsmac/main.c
+++ b/drivers/net/wireless/brcm80211/brcmsmac/main.c
@@ -4959,7 +4959,7 @@ static int brcms_b_up_prep(struct brcms_hardware *wlc_hw)
 	 * Configure pci/pcmcia here instead of in brcms_c_attach()
 	 * to allow mfg hotswap:  down, hotswap (chip power cycle), up.
 	 */
-	bcma_core_pci_irq_ctl(wlc_hw->d11core->bus, wlc_hw->d11core,
+	bcma_host_pci_irq_ctl(wlc_hw->d11core->bus, wlc_hw->d11core,
 			      true);
 
 	/*
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index 44057b45ed32..e34f906647d3 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -437,6 +437,8 @@ static inline struct bcma_device *bcma_find_core(struct bcma_bus *bus,
 #ifdef CONFIG_BCMA_HOST_PCI
 extern void bcma_host_pci_up(struct bcma_bus *bus);
 extern void bcma_host_pci_down(struct bcma_bus *bus);
+extern int bcma_host_pci_irq_ctl(struct bcma_bus *bus,
+				 struct bcma_device *core, bool enable);
 #else
 static inline void bcma_host_pci_up(struct bcma_bus *bus)
 {
@@ -444,6 +446,13 @@ static inline void bcma_host_pci_up(struct bcma_bus *bus)
 static inline void bcma_host_pci_down(struct bcma_bus *bus)
 {
 }
+static inline int bcma_host_pci_irq_ctl(struct bcma_bus *bus,
+					struct bcma_device *core, bool enable)
+{
+	if (bus->hosttype == BCMA_HOSTTYPE_PCI)
+		return -ENOTSUPP;
+	return 0;
+}
 #endif
 
 extern bool bcma_core_is_enabled(struct bcma_device *core);
diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h
index 8e90004fdfd7..3a468687c170 100644
--- a/include/linux/bcma/bcma_driver_pci.h
+++ b/include/linux/bcma/bcma_driver_pci.h
@@ -238,8 +238,6 @@ struct bcma_drv_pci {
 #define pcicore_write16(pc, offset, val)	bcma_write16((pc)->core, offset, val)
 #define pcicore_write32(pc, offset, val)	bcma_write32((pc)->core, offset, val)
 
-extern int bcma_core_pci_irq_ctl(struct bcma_bus *bus,
-				 struct bcma_device *core, bool enable);
 extern void bcma_core_pci_power_save(struct bcma_bus *bus, bool up);
 
 extern int bcma_core_pci_pcibios_map_irq(const struct pci_dev *dev);
-- 
cgit v1.2.3


From 982a40f5c0bb03368989a6b1ae833b474854e931 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Thu, 5 Mar 2015 18:25:11 +0100
Subject: bcma: allow disabling (not building) PCI driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It isn't required for bcma bus on SoCs, so provide some empty functions
and allow disabling it.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/bcma/Kconfig                 |  4 ++--
 drivers/bcma/bcma_private.h          | 20 ++++++++++++++++++++
 include/linux/bcma/bcma_driver_pci.h |  6 ++++++
 3 files changed, 28 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/bcma/Kconfig b/drivers/bcma/Kconfig
index 9be17d3431bb..1500b7120fc7 100644
--- a/drivers/bcma/Kconfig
+++ b/drivers/bcma/Kconfig
@@ -45,9 +45,9 @@ config BCMA_HOST_SOC
 
 	  If unsure, say N
 
-# TODO: make it depend on PCI when ready
 config BCMA_DRIVER_PCI
-	bool
+	bool "BCMA Broadcom PCI core driver"
+	depends on BCMA && PCI
 	default y
 	help
 	  BCMA bus may have many versions of PCIe core. This driver
diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h
index 5a1d22489afc..15f2b2e242ea 100644
--- a/drivers/bcma/bcma_private.h
+++ b/drivers/bcma/bcma_private.h
@@ -106,15 +106,35 @@ static inline void __exit bcma_host_soc_unregister_driver(void)
 #endif /* CONFIG_BCMA_HOST_SOC && CONFIG_OF */
 
 /* driver_pci.c */
+#ifdef CONFIG_BCMA_DRIVER_PCI
 u32 bcma_pcie_read(struct bcma_drv_pci *pc, u32 address);
 void bcma_core_pci_early_init(struct bcma_drv_pci *pc);
 void bcma_core_pci_init(struct bcma_drv_pci *pc);
 void bcma_core_pci_up(struct bcma_drv_pci *pc);
 void bcma_core_pci_down(struct bcma_drv_pci *pc);
+#else
+static inline void bcma_core_pci_early_init(struct bcma_drv_pci *pc)
+{
+	WARN_ON(pc->core->bus->hosttype == BCMA_HOSTTYPE_PCI);
+}
+static inline void bcma_core_pci_init(struct bcma_drv_pci *pc)
+{
+	/* Initialization is required for PCI hosted bus */
+	WARN_ON(pc->core->bus->hosttype == BCMA_HOSTTYPE_PCI);
+}
+#endif
 
 /* driver_pcie2.c */
+#ifdef CONFIG_BCMA_DRIVER_PCI
 void bcma_core_pcie2_init(struct bcma_drv_pcie2 *pcie2);
 void bcma_core_pcie2_up(struct bcma_drv_pcie2 *pcie2);
+#else
+static inline void bcma_core_pcie2_init(struct bcma_drv_pcie2 *pcie2)
+{
+	/* Initialization is required for PCI hosted bus */
+	WARN_ON(pcie2->core->bus->hosttype == BCMA_HOSTTYPE_PCI);
+}
+#endif
 
 extern int bcma_chipco_watchdog_register(struct bcma_drv_cc *cc);
 
diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h
index 3a468687c170..5ba6918ca20b 100644
--- a/include/linux/bcma/bcma_driver_pci.h
+++ b/include/linux/bcma/bcma_driver_pci.h
@@ -238,7 +238,13 @@ struct bcma_drv_pci {
 #define pcicore_write16(pc, offset, val)	bcma_write16((pc)->core, offset, val)
 #define pcicore_write32(pc, offset, val)	bcma_write32((pc)->core, offset, val)
 
+#ifdef CONFIG_BCMA_DRIVER_PCI
 extern void bcma_core_pci_power_save(struct bcma_bus *bus, bool up);
+#else
+static inline void bcma_core_pci_power_save(struct bcma_bus *bus, bool up)
+{
+}
+#endif
 
 extern int bcma_core_pci_pcibios_map_irq(const struct pci_dev *dev);
 extern int bcma_core_pci_plat_dev_init(struct pci_dev *dev);
-- 
cgit v1.2.3


From eacb44dff98559d4682072c0061e1ecb63687e9c Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Fri, 13 Mar 2015 09:04:17 -0700
Subject: Bluetooth: Use DECLARE_BITMAP for hdev->dev_flags field

The hdev->dev_flags field has outgrown itself on 32-bit systems. So
instead of hacking around it, switch to using DECLARE_BITMAP.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci.h      |  7 +------
 include/net/bluetooth/hci_core.h | 24 +++++++++++++++---------
 net/bluetooth/hci_core.c         |  2 +-
 net/bluetooth/hci_event.c        |  2 +-
 4 files changed, 18 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 0f3413b285a5..7a0272a6f0ba 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -226,14 +226,9 @@ enum {
 	HCI_FAST_CONNECTABLE,
 	HCI_BREDR_ENABLED,
 	HCI_LE_SCAN_INTERRUPTED,
+	__HCI_NUM_FLAGS,
 };
 
-/* A mask for the flags that are supposed to remain when a reset happens
- * or the HCI device is closed.
- */
-#define HCI_PERSISTENT_MASK (BIT(HCI_LE_SCAN) | BIT(HCI_PERIODIC_INQ) | \
-			     BIT(HCI_LE_ADV))
-
 /* HCI timeouts */
 #define HCI_DISCONN_TIMEOUT	msecs_to_jiffies(2000)	/* 2 seconds */
 #define HCI_PAIRING_TIMEOUT	msecs_to_jiffies(60000)	/* 60 seconds */
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 6db1333a114f..889a489d913f 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -354,7 +354,7 @@ struct hci_dev {
 	struct rfkill		*rfkill;
 
 	unsigned long		dbg_flags;
-	unsigned long		dev_flags;
+	DECLARE_BITMAP(dev_flags, __HCI_NUM_FLAGS);
 
 	struct delayed_work	le_scan_disable;
 	struct delayed_work	le_scan_restart;
@@ -502,14 +502,20 @@ extern struct list_head hci_cb_list;
 extern rwlock_t hci_dev_list_lock;
 extern struct mutex hci_cb_list_lock;
 
-#define hci_dev_set_flag(hdev, nr)    set_bit((nr), &(hdev)->dev_flags)
-#define hci_dev_clear_flag(hdev, nr)  clear_bit((nr), &(hdev)->dev_flags)
-#define hci_dev_change_flag(hdev, nr) change_bit((nr), &(hdev)->dev_flags)
-#define hci_dev_test_flag(hdev, nr)   test_bit((nr), &(hdev)->dev_flags)
-
-#define hci_dev_test_and_set_flag(hdev, nr)    test_and_set_bit((nr), &(hdev)->dev_flags)
-#define hci_dev_test_and_clear_flag(hdev, nr)  test_and_clear_bit((nr), &(hdev)->dev_flags)
-#define hci_dev_test_and_change_flag(hdev, nr) test_and_change_bit((nr), &(hdev)->dev_flags)
+#define hci_dev_set_flag(hdev, nr)             set_bit((nr), (hdev)->dev_flags)
+#define hci_dev_clear_flag(hdev, nr)           clear_bit((nr), (hdev)->dev_flags)
+#define hci_dev_change_flag(hdev, nr)          change_bit((nr), (hdev)->dev_flags)
+#define hci_dev_test_flag(hdev, nr)            test_bit((nr), (hdev)->dev_flags)
+#define hci_dev_test_and_set_flag(hdev, nr)    test_and_set_bit((nr), (hdev)->dev_flags)
+#define hci_dev_test_and_clear_flag(hdev, nr)  test_and_clear_bit((nr), (hdev)->dev_flags)
+#define hci_dev_test_and_change_flag(hdev, nr) test_and_change_bit((nr), (hdev)->dev_flags)
+
+#define hci_dev_clear_volatile_flags(hdev)			\
+	do {							\
+		hci_dev_clear_flag(hdev, HCI_LE_SCAN);		\
+		hci_dev_clear_flag(hdev, HCI_LE_ADV);		\
+		hci_dev_clear_flag(hdev, HCI_PERIODIC_INQ);	\
+	} while (0)
 
 /* ----- HCI interface to upper protocols ----- */
 int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index c6ed46c4f45a..23a43ca98785 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1699,7 +1699,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 
 	/* Clear flags */
 	hdev->flags &= BIT(HCI_RAW);
-	hdev->dev_flags &= ~HCI_PERSISTENT_MASK;
+	hci_dev_clear_volatile_flags(hdev);
 
 	/* Controller radio is available but is currently powered down */
 	hdev->amp_status = AMP_STATUS_POWERED_DOWN;
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 4958b24ae5c7..c7376cd42b1c 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -198,7 +198,7 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb)
 		return;
 
 	/* Reset all non-persistent flags */
-	hdev->dev_flags &= ~HCI_PERSISTENT_MASK;
+	hci_dev_clear_volatile_flags(hdev);
 
 	hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
 
-- 
cgit v1.2.3


From b7cb93e52839ee44959adabc17c2a17422e6bd4b Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Fri, 13 Mar 2015 10:20:35 -0700
Subject: Bluetooth: Merge hdev->dbg_flags fields into hdev->dev_flags

With the extension of hdev->dev_flags utilizing a bitmap now, the space
is no longer restricted. Merge the hdev->dbg_flags into hdev->dev_flags
to save space on 64-bit architectures. On 32-bit architectures no size
reduction happens.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci.h      | 14 +++++---------
 include/net/bluetooth/hci_core.h |  1 -
 net/bluetooth/hci_core.c         |  8 ++++----
 net/bluetooth/hci_debugfs.c      |  6 +++---
 net/bluetooth/hci_request.c      |  2 +-
 net/bluetooth/l2cap_core.c       |  2 +-
 net/bluetooth/mgmt.c             |  2 +-
 net/bluetooth/smp.c              | 10 +++++-----
 8 files changed, 20 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 7a0272a6f0ba..d942fedbaedd 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -179,15 +179,6 @@ enum {
 	HCI_RESET,
 };
 
-/* BR/EDR and/or LE controller flags: the flags defined here should represent
- * states configured via debugfs for debugging and testing purposes only.
- */
-enum {
-	HCI_DUT_MODE,
-	HCI_FORCE_BREDR_SMP,
-	HCI_FORCE_STATIC_ADDR,
-};
-
 /*
  * BR/EDR and/or LE controller flags: the flags defined here should represent
  * states from the controller.
@@ -226,6 +217,11 @@ enum {
 	HCI_FAST_CONNECTABLE,
 	HCI_BREDR_ENABLED,
 	HCI_LE_SCAN_INTERRUPTED,
+
+	HCI_DUT_MODE,
+	HCI_FORCE_BREDR_SMP,
+	HCI_FORCE_STATIC_ADDR,
+
 	__HCI_NUM_FLAGS,
 };
 
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 889a489d913f..6afbf5b014a1 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -353,7 +353,6 @@ struct hci_dev {
 
 	struct rfkill		*rfkill;
 
-	unsigned long		dbg_flags;
 	DECLARE_BITMAP(dev_flags, __HCI_NUM_FLAGS);
 
 	struct delayed_work	le_scan_disable;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 23a43ca98785..750d3445f2d2 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -80,7 +80,7 @@ static ssize_t dut_mode_read(struct file *file, char __user *user_buf,
 	struct hci_dev *hdev = file->private_data;
 	char buf[3];
 
-	buf[0] = test_bit(HCI_DUT_MODE, &hdev->dbg_flags) ? 'Y': 'N';
+	buf[0] = hci_dev_test_flag(hdev, HCI_DUT_MODE) ? 'Y': 'N';
 	buf[1] = '\n';
 	buf[2] = '\0';
 	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
@@ -106,7 +106,7 @@ static ssize_t dut_mode_write(struct file *file, const char __user *user_buf,
 	if (strtobool(buf, &enable))
 		return -EINVAL;
 
-	if (enable == test_bit(HCI_DUT_MODE, &hdev->dbg_flags))
+	if (enable == hci_dev_test_flag(hdev, HCI_DUT_MODE))
 		return -EALREADY;
 
 	hci_req_lock(hdev);
@@ -127,7 +127,7 @@ static ssize_t dut_mode_write(struct file *file, const char __user *user_buf,
 	if (err < 0)
 		return err;
 
-	change_bit(HCI_DUT_MODE, &hdev->dbg_flags);
+	hci_dev_change_flag(hdev, HCI_DUT_MODE);
 
 	return count;
 }
@@ -3019,7 +3019,7 @@ static void le_scan_restart_work(struct work_struct *work)
 void hci_copy_identity_address(struct hci_dev *hdev, bdaddr_t *bdaddr,
 			       u8 *bdaddr_type)
 {
-	if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) ||
+	if (hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) ||
 	    !bacmp(&hdev->bdaddr, BDADDR_ANY) ||
 	    (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) &&
 	     bacmp(&hdev->static_addr, BDADDR_ANY))) {
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index 3c025ee5572c..bc801e9db834 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -679,7 +679,7 @@ static ssize_t force_static_address_read(struct file *file,
 	struct hci_dev *hdev = file->private_data;
 	char buf[3];
 
-	buf[0] = test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) ? 'Y': 'N';
+	buf[0] = hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) ? 'Y': 'N';
 	buf[1] = '\n';
 	buf[2] = '\0';
 	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
@@ -704,10 +704,10 @@ static ssize_t force_static_address_write(struct file *file,
 	if (strtobool(buf, &enable))
 		return -EINVAL;
 
-	if (enable == test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags))
+	if (enable == hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR))
 		return -EALREADY;
 
-	change_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags);
+	hci_dev_change_flag(hdev, HCI_FORCE_STATIC_ADDR);
 
 	return count;
 }
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index e85f9ec9f73a..55e096d20a0f 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -383,7 +383,7 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
 	 * and a static address has been configured, then use that
 	 * address instead of the public BR/EDR address.
 	 */
-	if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) ||
+	if (hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) ||
 	    !bacmp(&hdev->bdaddr, BDADDR_ANY) ||
 	    (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) &&
 	     bacmp(&hdev->static_addr, BDADDR_ANY))) {
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index af30d8240c80..d69861c89bb5 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -6992,7 +6992,7 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon)
 
 	if (hci_dev_test_flag(hcon->hdev, HCI_LE_ENABLED) &&
 	    (bredr_sc_enabled(hcon->hdev) ||
-	     test_bit(HCI_FORCE_BREDR_SMP, &hcon->hdev->dbg_flags)))
+	     hci_dev_test_flag(hcon->hdev, HCI_FORCE_BREDR_SMP)))
 		conn->local_fixed_chan |= L2CAP_FC_SMP_BREDR;
 
 	mutex_init(&conn->ident_lock);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index d97719d04be0..c58908652519 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -659,7 +659,7 @@ static u32 get_current_settings(struct hci_dev *hdev)
 	 * with BR/EDR disabled, the existence of the static address will
 	 * be evaluated.
 	 */
-	if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) ||
+	if (hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) ||
 	    !hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) ||
 	    !bacmp(&hdev->bdaddr, BDADDR_ANY)) {
 		if (bacmp(&hdev->static_addr, BDADDR_ANY))
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 81975f274c2b..9155840068cf 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -1679,7 +1679,7 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb)
 	if (conn->hcon->type == ACL_LINK) {
 		/* We must have a BR/EDR SC link */
 		if (!test_bit(HCI_CONN_AES_CCM, &conn->hcon->flags) &&
-		    !test_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags))
+		    !hci_dev_test_flag(hdev, HCI_FORCE_BREDR_SMP))
 			return SMP_CROSS_TRANSP_NOT_ALLOWED;
 
 		set_bit(SMP_FLAG_SC, &smp->flags);
@@ -2749,7 +2749,7 @@ static void bredr_pairing(struct l2cap_chan *chan)
 
 	/* BR/EDR must use Secure Connections for SMP */
 	if (!test_bit(HCI_CONN_AES_CCM, &hcon->flags) &&
-	    !test_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags))
+	    !hci_dev_test_flag(hdev, HCI_FORCE_BREDR_SMP))
 		return;
 
 	/* If our LE support is not enabled don't do anything */
@@ -3003,7 +3003,7 @@ static ssize_t force_bredr_smp_read(struct file *file,
 	struct hci_dev *hdev = file->private_data;
 	char buf[3];
 
-	buf[0] = test_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags) ? 'Y': 'N';
+	buf[0] = hci_dev_test_flag(hdev, HCI_FORCE_BREDR_SMP) ? 'Y': 'N';
 	buf[1] = '\n';
 	buf[2] = '\0';
 	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
@@ -3025,7 +3025,7 @@ static ssize_t force_bredr_smp_write(struct file *file,
 	if (strtobool(buf, &enable))
 		return -EINVAL;
 
-	if (enable == test_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags))
+	if (enable == hci_dev_test_flag(hdev, HCI_FORCE_BREDR_SMP))
 		return -EALREADY;
 
 	if (enable) {
@@ -3044,7 +3044,7 @@ static ssize_t force_bredr_smp_write(struct file *file,
 		smp_del_chan(chan);
 	}
 
-	change_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags);
+	hci_dev_change_flag(hdev, HCI_FORCE_BREDR_SMP);
 
 	return count;
 }
-- 
cgit v1.2.3


From 3f3c4bb5ec7c645d1151e1e8d6e56c71a050cf85 Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Wed, 4 Mar 2015 21:19:59 +0100
Subject: mac802154: correct max sifs size handling

This patch fix the max sifs size correction when the
IEEE802154_HW_TX_OMIT_CKSUM flag is set. With this flag the sk_buff
doesn't contain the CRC, because the transceiver will add the CRC
while transmit.

Also add some defines for the max sifs frame size value and frame check
sequence according to 802.15.4 standard.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Acked-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/linux/ieee802154.h |  2 ++
 net/mac802154/util.c       | 13 ++++++++++++-
 2 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h
index 40b0ab953937..8872ca103d06 100644
--- a/include/linux/ieee802154.h
+++ b/include/linux/ieee802154.h
@@ -30,6 +30,7 @@
 #define IEEE802154_MTU			127
 #define IEEE802154_ACK_PSDU_LEN		5
 #define IEEE802154_MIN_PSDU_LEN		9
+#define IEEE802154_FCS_LEN		2
 
 #define IEEE802154_PAN_ID_BROADCAST	0xffff
 #define IEEE802154_ADDR_SHORT_BROADCAST	0xffff
@@ -39,6 +40,7 @@
 
 #define IEEE802154_LIFS_PERIOD		40
 #define IEEE802154_SIFS_PERIOD		12
+#define IEEE802154_MAX_SIFS_FRAME_SIZE	18
 
 #define IEEE802154_MAX_CHANNEL		26
 #define IEEE802154_MAX_PAGE		31
diff --git a/net/mac802154/util.c b/net/mac802154/util.c
index 5fc979027919..150bf807e572 100644
--- a/net/mac802154/util.c
+++ b/net/mac802154/util.c
@@ -65,8 +65,19 @@ void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb,
 {
 	if (ifs_handling) {
 		struct ieee802154_local *local = hw_to_local(hw);
+		u8 max_sifs_size;
 
-		if (skb->len > 18)
+		/* If transceiver sets CRC on his own we need to use lifs
+		 * threshold len above 16 otherwise 18, because it's not
+		 * part of skb->len.
+		 */
+		if (hw->flags & IEEE802154_HW_TX_OMIT_CKSUM)
+			max_sifs_size = IEEE802154_MAX_SIFS_FRAME_SIZE -
+					IEEE802154_FCS_LEN;
+		else
+			max_sifs_size = IEEE802154_MAX_SIFS_FRAME_SIZE;
+
+		if (skb->len > max_sifs_size)
 			hrtimer_start(&local->ifs_timer,
 				      ktime_set(0, hw->phy->lifs_period * NSEC_PER_USEC),
 				      HRTIMER_MODE_REL);
-- 
cgit v1.2.3


From eddee5ba34eb6c9890ef106f19ead2b370e5342f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 14 Mar 2015 13:57:20 +1100
Subject: rhashtable: Fix walker behaviour during rehash

Previously whenever the walker encountered a resize it simply
snaps back to the beginning and starts again.  However, this only
works if the rehash started and completed while the walker was
idle.

If the walker attempts to restart while the rehash is still ongoing,
we may miss objects that we shouldn't have.

This patch fixes this by making the walker walk the old table
followed by the new table just like all other readers.  If a
rehash is detected we will still signal our caller of the fact
so they can prepare for duplicates but we will simply continue
the walk onto the new table after the old one is finished either
by us or by the rehasher.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h |  8 +++---
 lib/rhashtable.c           | 69 ++++++++++++++++++++++++++++++----------------
 2 files changed, 50 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index c93ff8ac474a..4192682c1d5c 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -53,6 +53,7 @@ struct rhash_head {
  * @shift: Current size (1 << shift)
  * @locks_mask: Mask to apply before accessing locks[]
  * @locks: Array of spinlocks protecting individual buckets
+ * @walkers: List of active walkers
  * @buckets: size * hash buckets
  */
 struct bucket_table {
@@ -61,6 +62,7 @@ struct bucket_table {
 	u32			shift;
 	unsigned int		locks_mask;
 	spinlock_t		*locks;
+	struct list_head	walkers;
 
 	struct rhash_head __rcu	*buckets[] ____cacheline_aligned_in_smp;
 };
@@ -104,7 +106,6 @@ struct rhashtable_params {
  * @p: Configuration parameters
  * @run_work: Deferred worker to expand/shrink asynchronously
  * @mutex: Mutex to protect current/future table swapping
- * @walkers: List of active walkers
  * @being_destroyed: True if table is set up for destruction
  */
 struct rhashtable {
@@ -115,17 +116,16 @@ struct rhashtable {
 	struct rhashtable_params	p;
 	struct work_struct		run_work;
 	struct mutex                    mutex;
-	struct list_head		walkers;
 };
 
 /**
  * struct rhashtable_walker - Hash table walker
  * @list: List entry on list of walkers
- * @resize: Resize event occured
+ * @tbl: The table that we were walking over
  */
 struct rhashtable_walker {
 	struct list_head list;
-	bool resize;
+	struct bucket_table *tbl;
 };
 
 /**
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index fc0d451279f0..f7c76079f8f1 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -170,6 +170,8 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 		return NULL;
 	}
 
+	INIT_LIST_HEAD(&tbl->walkers);
+
 	for (i = 0; i < nbuckets; i++)
 		INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i);
 
@@ -264,6 +266,7 @@ static void rhashtable_rehash(struct rhashtable *ht,
 			      struct bucket_table *new_tbl)
 {
 	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
+	struct rhashtable_walker *walker;
 	unsigned old_hash;
 
 	get_random_bytes(&new_tbl->hash_rnd, sizeof(new_tbl->hash_rnd));
@@ -284,6 +287,9 @@ static void rhashtable_rehash(struct rhashtable *ht,
 	/* Publish the new table pointer. */
 	rcu_assign_pointer(ht->tbl, new_tbl);
 
+	list_for_each_entry(walker, &old_tbl->walkers, list)
+		walker->tbl = NULL;
+
 	/* Wait for readers. All new readers will see the new
 	 * table, and thus no references to the old table will
 	 * remain.
@@ -358,7 +364,6 @@ static void rht_deferred_worker(struct work_struct *work)
 {
 	struct rhashtable *ht;
 	struct bucket_table *tbl;
-	struct rhashtable_walker *walker;
 
 	ht = container_of(work, struct rhashtable, run_work);
 	mutex_lock(&ht->mutex);
@@ -367,9 +372,6 @@ static void rht_deferred_worker(struct work_struct *work)
 
 	tbl = rht_dereference(ht->tbl, ht);
 
-	list_for_each_entry(walker, &ht->walkers, list)
-		walker->resize = true;
-
 	if (rht_grow_above_75(ht, tbl))
 		rhashtable_expand(ht);
 	else if (rht_shrink_below_30(ht, tbl))
@@ -725,11 +727,9 @@ int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter)
 	if (!iter->walker)
 		return -ENOMEM;
 
-	INIT_LIST_HEAD(&iter->walker->list);
-	iter->walker->resize = false;
-
 	mutex_lock(&ht->mutex);
-	list_add(&iter->walker->list, &ht->walkers);
+	iter->walker->tbl = rht_dereference(ht->tbl, ht);
+	list_add(&iter->walker->list, &iter->walker->tbl->walkers);
 	mutex_unlock(&ht->mutex);
 
 	return 0;
@@ -745,7 +745,8 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_init);
 void rhashtable_walk_exit(struct rhashtable_iter *iter)
 {
 	mutex_lock(&iter->ht->mutex);
-	list_del(&iter->walker->list);
+	if (iter->walker->tbl)
+		list_del(&iter->walker->list);
 	mutex_unlock(&iter->ht->mutex);
 	kfree(iter->walker);
 }
@@ -767,12 +768,19 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
  */
 int rhashtable_walk_start(struct rhashtable_iter *iter)
 {
+	struct rhashtable *ht = iter->ht;
+
+	mutex_lock(&ht->mutex);
+
+	if (iter->walker->tbl)
+		list_del(&iter->walker->list);
+
 	rcu_read_lock();
 
-	if (iter->walker->resize) {
-		iter->slot = 0;
-		iter->skip = 0;
-		iter->walker->resize = false;
+	mutex_unlock(&ht->mutex);
+
+	if (!iter->walker->tbl) {
+		iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht);
 		return -EAGAIN;
 	}
 
@@ -794,13 +802,11 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_start);
  */
 void *rhashtable_walk_next(struct rhashtable_iter *iter)
 {
-	const struct bucket_table *tbl;
+	struct bucket_table *tbl = iter->walker->tbl;
 	struct rhashtable *ht = iter->ht;
 	struct rhash_head *p = iter->p;
 	void *obj = NULL;
 
-	tbl = rht_dereference_rcu(ht->tbl, ht);
-
 	if (p) {
 		p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot);
 		goto next;
@@ -826,17 +832,18 @@ next:
 		iter->skip = 0;
 	}
 
-	iter->p = NULL;
-
-out:
-	if (iter->walker->resize) {
-		iter->p = NULL;
+	iter->walker->tbl = rht_dereference_rcu(ht->future_tbl, ht);
+	if (iter->walker->tbl != tbl) {
 		iter->slot = 0;
 		iter->skip = 0;
-		iter->walker->resize = false;
 		return ERR_PTR(-EAGAIN);
 	}
 
+	iter->walker->tbl = NULL;
+	iter->p = NULL;
+
+out:
+
 	return obj;
 }
 EXPORT_SYMBOL_GPL(rhashtable_walk_next);
@@ -849,7 +856,24 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_next);
  */
 void rhashtable_walk_stop(struct rhashtable_iter *iter)
 {
+	struct rhashtable *ht;
+	struct bucket_table *tbl = iter->walker->tbl;
+
 	rcu_read_unlock();
+
+	if (!tbl)
+		return;
+
+	ht = iter->ht;
+
+	mutex_lock(&ht->mutex);
+	if (rht_dereference(ht->tbl, ht) == tbl ||
+	    rht_dereference(ht->future_tbl, ht) == tbl)
+		list_add(&iter->walker->list, &tbl->walkers);
+	else
+		iter->walker->tbl = NULL;
+	mutex_unlock(&ht->mutex);
+
 	iter->p = NULL;
 }
 EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
@@ -927,7 +951,6 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
 	memset(ht, 0, sizeof(*ht));
 	mutex_init(&ht->mutex);
 	memcpy(&ht->p, params, sizeof(*params));
-	INIT_LIST_HEAD(&ht->walkers);
 
 	if (params->locks_mul)
 		ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
-- 
cgit v1.2.3


From 9d901bc05153bbf33b5da2cd6266865e531f0545 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 14 Mar 2015 13:57:23 +1100
Subject: rhashtable: Free bucket tables asynchronously after rehash

There is in fact no need to wait for an RCU grace period in the
rehash function, since all insertions are guaranteed to go into
the new table through spin locks.

This patch uses call_rcu to free the old/rehashed table at our
leisure.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 2 ++
 lib/rhashtable.c           | 9 ++++++---
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 4192682c1d5c..a0abddd226b3 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -54,6 +54,7 @@ struct rhash_head {
  * @locks_mask: Mask to apply before accessing locks[]
  * @locks: Array of spinlocks protecting individual buckets
  * @walkers: List of active walkers
+ * @rcu: RCU structure for freeing the table
  * @buckets: size * hash buckets
  */
 struct bucket_table {
@@ -63,6 +64,7 @@ struct bucket_table {
 	unsigned int		locks_mask;
 	spinlock_t		*locks;
 	struct list_head	walkers;
+	struct rcu_head		rcu;
 
 	struct rhash_head __rcu	*buckets[] ____cacheline_aligned_in_smp;
 };
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index e55bbc84c449..36fb0910bec2 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -141,6 +141,11 @@ static void bucket_table_free(const struct bucket_table *tbl)
 	kvfree(tbl);
 }
 
+static void bucket_table_free_rcu(struct rcu_head *head)
+{
+	bucket_table_free(container_of(head, struct bucket_table, rcu));
+}
+
 static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 					       size_t nbuckets)
 {
@@ -288,9 +293,7 @@ static void rhashtable_rehash(struct rhashtable *ht,
 	 * table, and thus no references to the old table will
 	 * remain.
 	 */
-	synchronize_rcu();
-
-	bucket_table_free(old_tbl);
+	call_rcu(&old_tbl->rcu, bucket_table_free_rcu);
 }
 
 /**
-- 
cgit v1.2.3


From 63d512d0cffcae40505d9448abd509972465e846 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 14 Mar 2015 13:57:24 +1100
Subject: rhashtable: Add rehash counter to bucket_table

This patch adds a rehash counter to bucket_table to indicate
the last bucket that has been rehashed.  This serves two purposes:

1. Any bucket that has been rehashed can never gain a new object.
2. If the rehash counter reaches the size of the table, the table
will forever remain empty.

This patch also downsizes bucket_table->size to an unsigned int
since we do not support sizes greater than 32 bits yet.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 4 +++-
 lib/rhashtable.c           | 1 +
 lib/test_rhashtable.c      | 2 +-
 3 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index a0abddd226b3..ed7562ad4ca0 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -49,6 +49,7 @@ struct rhash_head {
 /**
  * struct bucket_table - Table of hash buckets
  * @size: Number of hash buckets
+ * @rehash: Current bucket being rehashed
  * @hash_rnd: Random seed to fold into hash
  * @shift: Current size (1 << shift)
  * @locks_mask: Mask to apply before accessing locks[]
@@ -58,7 +59,8 @@ struct rhash_head {
  * @buckets: size * hash buckets
  */
 struct bucket_table {
-	size_t			size;
+	unsigned int		size;
+	unsigned int		rehash;
 	u32			hash_rnd;
 	u32			shift;
 	unsigned int		locks_mask;
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 36fb0910bec2..ff4ea1704546 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -260,6 +260,7 @@ static void rhashtable_rehash_chain(struct rhashtable *ht, unsigned old_hash)
 	spin_lock_bh(old_bucket_lock);
 	while (!rhashtable_rehash_one(ht, old_hash))
 		;
+	old_tbl->rehash++;
 	spin_unlock_bh(old_bucket_lock);
 }
 
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 67c7593d1dd6..16974fd89e4e 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -80,7 +80,7 @@ static void test_bucket_stats(struct rhashtable *ht, bool quiet)
 		rcu_cnt = cnt = 0;
 
 		if (!quiet)
-			pr_info(" [%#4x/%zu]", i, tbl->size);
+			pr_info(" [%#4x/%u]", i, tbl->size);
 
 		rht_for_each_entry_rcu(obj, pos, tbl, i, node) {
 			cnt++;
-- 
cgit v1.2.3


From c4db8848af6af92f90462258603be844baeab44d Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 14 Mar 2015 13:57:25 +1100
Subject: rhashtable: Move future_tbl into struct bucket_table

This patch moves future_tbl to open up the possibility of having
multiple rehashes on the same table.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h |  5 +++--
 lib/rhashtable.c           | 27 +++++++++++----------------
 2 files changed, 14 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index ed7562ad4ca0..1695378b3c5b 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -56,6 +56,7 @@ struct rhash_head {
  * @locks: Array of spinlocks protecting individual buckets
  * @walkers: List of active walkers
  * @rcu: RCU structure for freeing the table
+ * @future_tbl: Table under construction during rehashing
  * @buckets: size * hash buckets
  */
 struct bucket_table {
@@ -68,6 +69,8 @@ struct bucket_table {
 	struct list_head	walkers;
 	struct rcu_head		rcu;
 
+	struct bucket_table __rcu *future_tbl;
+
 	struct rhash_head __rcu	*buckets[] ____cacheline_aligned_in_smp;
 };
 
@@ -105,7 +108,6 @@ struct rhashtable_params {
 /**
  * struct rhashtable - Hash table handle
  * @tbl: Bucket table
- * @future_tbl: Table under construction during expansion/shrinking
  * @nelems: Number of elements in table
  * @p: Configuration parameters
  * @run_work: Deferred worker to expand/shrink asynchronously
@@ -114,7 +116,6 @@ struct rhashtable_params {
  */
 struct rhashtable {
 	struct bucket_table __rcu	*tbl;
-	struct bucket_table __rcu       *future_tbl;
 	atomic_t			nelems;
 	bool                            being_destroyed;
 	struct rhashtable_params	p;
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index ff4ea1704546..9d53a46dcca9 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -207,8 +207,9 @@ static bool rht_shrink_below_30(const struct rhashtable *ht,
 
 static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash)
 {
-	struct bucket_table *new_tbl = rht_dereference(ht->future_tbl, ht);
 	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
+	struct bucket_table *new_tbl =
+		rht_dereference(old_tbl->future_tbl, ht) ?: old_tbl;
 	struct rhash_head __rcu **pprev = &old_tbl->buckets[old_hash];
 	int err = -ENOENT;
 	struct rhash_head *head, *next, *entry;
@@ -273,10 +274,8 @@ static void rhashtable_rehash(struct rhashtable *ht,
 
 	/* Make insertions go into the new, empty table right away. Deletions
 	 * and lookups will be attempted in both tables until we synchronize.
-	 * The synchronize_rcu() guarantees for the new table to be picked up
-	 * so no new additions go into the old table while we relink.
 	 */
-	rcu_assign_pointer(ht->future_tbl, new_tbl);
+	rcu_assign_pointer(old_tbl->future_tbl, new_tbl);
 
 	/* Ensure the new table is visible to readers. */
 	smp_wmb();
@@ -400,7 +399,7 @@ static bool __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
 	 * also grab the bucket lock in old_tbl because until the
 	 * rehash completes ht->tbl won't be changed.
 	 */
-	tbl = rht_dereference_rcu(ht->future_tbl, ht);
+	tbl = rht_dereference_rcu(old_tbl->future_tbl, ht) ?: old_tbl;
 	if (tbl != old_tbl) {
 		hash = head_hashfn(ht, tbl, obj);
 		spin_lock_nested(bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING);
@@ -525,7 +524,7 @@ bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj)
 	 * visible then that guarantees the entry to still be in
 	 * old_tbl if it exists.
 	 */
-	tbl = rht_dereference_rcu(ht->future_tbl, ht);
+	tbl = rht_dereference_rcu(old_tbl->future_tbl, ht) ?: old_tbl;
 	if (!ret && old_tbl != tbl)
 		ret = __rhashtable_remove(ht, tbl, obj);
 
@@ -599,7 +598,7 @@ EXPORT_SYMBOL_GPL(rhashtable_lookup);
 void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key,
 				bool (*compare)(void *, void *), void *arg)
 {
-	const struct bucket_table *tbl, *old_tbl;
+	const struct bucket_table *tbl;
 	struct rhash_head *he;
 	u32 hash;
 
@@ -618,9 +617,8 @@ restart:
 	/* Ensure we see any new tables. */
 	smp_rmb();
 
-	old_tbl = tbl;
-	tbl = rht_dereference_rcu(ht->future_tbl, ht);
-	if (unlikely(tbl != old_tbl))
+	tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+	if (unlikely(tbl))
 		goto restart;
 	rcu_read_unlock();
 
@@ -830,14 +828,13 @@ next:
 		iter->skip = 0;
 	}
 
-	iter->walker->tbl = rht_dereference_rcu(ht->future_tbl, ht);
-	if (iter->walker->tbl != tbl) {
+	iter->walker->tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+	if (iter->walker->tbl) {
 		iter->slot = 0;
 		iter->skip = 0;
 		return ERR_PTR(-EAGAIN);
 	}
 
-	iter->walker->tbl = NULL;
 	iter->p = NULL;
 
 out:
@@ -865,8 +862,7 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter)
 	ht = iter->ht;
 
 	mutex_lock(&ht->mutex);
-	if (rht_dereference(ht->tbl, ht) == tbl ||
-	    rht_dereference(ht->future_tbl, ht) == tbl)
+	if (tbl->rehash < tbl->size)
 		list_add(&iter->walker->list, &tbl->walkers);
 	else
 		iter->walker->tbl = NULL;
@@ -961,7 +957,6 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
 	atomic_set(&ht->nelems, 0);
 
 	RCU_INIT_POINTER(ht->tbl, tbl);
-	RCU_INIT_POINTER(ht->future_tbl, tbl);
 
 	INIT_WORK(&ht->run_work, rht_deferred_worker);
 
-- 
cgit v1.2.3


From 6befc6445ffc6868ee6e6d0e012fc149e88d96db Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 14 Mar 2015 19:27:53 -0700
Subject: Bluetooth: Add flags field and setting function for HCI sockets

To filter out certain actions for certain HCI sockets introcuce a flags
field that allows to configure specific settings on individual sockets.

Since the hci_pinfo structure is private in hci_sock.c, provide helper
functions for setting and clearing a given flag.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/bluetooth.h |  3 +++
 net/bluetooth/hci_sock.c          | 11 +++++++++++
 2 files changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index e598ca096ec9..ae1f2ee1eaf3 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -335,6 +335,9 @@ out:
 
 int bt_to_errno(__u16 code);
 
+void hci_sock_set_flag(struct sock *sk, int nr);
+void hci_sock_clear_flag(struct sock *sk, int nr);
+
 int hci_sock_init(void);
 void hci_sock_cleanup(void);
 
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index b614543b4fe3..bf5365c49c9c 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -47,8 +47,19 @@ struct hci_pinfo {
 	struct hci_filter filter;
 	__u32             cmsg_mask;
 	unsigned short    channel;
+	unsigned long     flags;
 };
 
+void hci_sock_set_flag(struct sock *sk, int nr)
+{
+	set_bit(nr, &hci_pi(sk)->flags);
+}
+
+void hci_sock_clear_flag(struct sock *sk, int nr)
+{
+	clear_bit(nr, &hci_pi(sk)->flags);
+}
+
 static inline int hci_test_bit(int nr, const void *addr)
 {
 	return *((const __u32 *) addr + (nr >> 5)) & ((__u32) 1 << (nr & 31));
-- 
cgit v1.2.3


From 17711c62915dd62ab83a5a83a64c0d6105d13b6c Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 14 Mar 2015 19:27:54 -0700
Subject: Bluetooth: Provide hci_send_to_flagged_channel helper function

The hci_send_to_flagged_channel helper function can be used to send
packets to all channels that have a certain HCI socket flag set.

This is especially useful for managment events that are limited to
sockets that have first enabled certain functionality. This allows
for filtering of events without confusing existing users.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h |  2 ++
 net/bluetooth/hci_sock.c         | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 6afbf5b014a1..d38f6e426e84 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1285,6 +1285,8 @@ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode);
 void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb);
 void hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
 			 struct sock *skip_sk);
+void hci_send_to_flagged_channel(unsigned short channel, struct sk_buff *skb,
+				 int flag);
 void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb);
 
 void hci_sock_dev_event(struct hci_dev *hdev, int event);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index bf5365c49c9c..174a353a7dcf 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -231,6 +231,39 @@ void hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
 	read_unlock(&hci_sk_list.lock);
 }
 
+/* Send frame to sockets with specific channel flag set */
+void hci_send_to_flagged_channel(unsigned short channel, struct sk_buff *skb,
+				 int flag)
+{
+	struct sock *sk;
+
+	BT_DBG("channel %u len %d", channel, skb->len);
+
+	read_lock(&hci_sk_list.lock);
+
+	sk_for_each(sk, &hci_sk_list.head) {
+		struct sk_buff *nskb;
+
+		if (!test_bit(flag, &hci_pi(sk)->flags))
+			continue;
+
+		if (sk->sk_state != BT_BOUND)
+			continue;
+
+		if (hci_pi(sk)->channel != channel)
+			continue;
+
+		nskb = skb_clone(skb, GFP_ATOMIC);
+		if (!nskb)
+			continue;
+
+		if (sock_queue_rcv_skb(sk, nskb))
+			kfree_skb(nskb);
+	}
+
+	read_unlock(&hci_sk_list.lock);
+}
+
 /* Send frame to monitor socket */
 void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb)
 {
-- 
cgit v1.2.3


From f920733885546af2fd8d4b3dd5f8a1ac029f6248 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 14 Mar 2015 19:27:55 -0700
Subject: Bluetooth: Use special function to send filter management index
 events

For sending Index Added, Index Removed, Unconfigured Index Added and
Unconfigured Index Removed managment events the new helper functions
allows taking into account if these events are enabled for a certain
management socket or not.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci.h |  6 +++++
 net/bluetooth/hci_sock.c    | 10 +++++++
 net/bluetooth/mgmt.c        | 64 +++++++++++++++++++++++++++++++++++----------
 3 files changed, 66 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index d942fedbaedd..0995ec755959 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -179,6 +179,12 @@ enum {
 	HCI_RESET,
 };
 
+/* HCI socket flags */
+enum {
+	HCI_MGMT_INDEX_EVENTS,
+	HCI_MGMT_UNCONF_INDEX_EVENTS,
+};
+
 /*
  * BR/EDR and/or LE controller flags: the flags defined here should represent
  * states from the controller.
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 174a353a7dcf..00775c4fef83 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -817,6 +817,16 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
 			goto done;
 		}
 
+		/* At the moment the index and unconfigured index events
+		 * are enabled unconditionally. Setting them on each
+		 * socket when binding keeps this functionality. They
+		 * however might be cleared later and then sending of these
+		 * events will be disabled, but that is then intentional.
+		 */
+		if (haddr.hci_channel == HCI_CHANNEL_CONTROL) {
+			hci_sock_set_flag(sk, HCI_MGMT_INDEX_EVENTS);
+			hci_sock_set_flag(sk, HCI_MGMT_UNCONF_INDEX_EVENTS);
+		}
 		break;
 	}
 
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index c58908652519..6b58c13b2b51 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -250,6 +250,33 @@ static int mgmt_send_event(u16 event, struct hci_dev *hdev,
 	return 0;
 }
 
+static int mgmt_index_event(u16 event, struct hci_dev *hdev,
+			    void *data, u16 data_len, int flag)
+{
+	struct sk_buff *skb;
+	struct mgmt_hdr *hdr;
+
+	skb = alloc_skb(sizeof(*hdr) + data_len, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	hdr = (void *) skb_put(skb, sizeof(*hdr));
+	hdr->opcode = cpu_to_le16(event);
+	hdr->index = cpu_to_le16(hdev->id);
+	hdr->len = cpu_to_le16(data_len);
+
+	if (data)
+		memcpy(skb_put(skb, data_len), data, data_len);
+
+	/* Time stamp */
+	__net_timestamp(skb);
+
+	hci_send_to_flagged_channel(HCI_CHANNEL_CONTROL, skb, flag);
+	kfree_skb(skb);
+
+	return 0;
+}
+
 static int mgmt_event(u16 event, struct hci_dev *hdev, void *data, u16 len,
 		      struct sock *skip_sk)
 {
@@ -6343,34 +6370,43 @@ done:
 
 void mgmt_index_added(struct hci_dev *hdev)
 {
-	if (hdev->dev_type != HCI_BREDR)
-		return;
 
 	if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
 		return;
 
-	if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED))
-		mgmt_event(MGMT_EV_UNCONF_INDEX_ADDED, hdev, NULL, 0, NULL);
-	else
-		mgmt_event(MGMT_EV_INDEX_ADDED, hdev, NULL, 0, NULL);
+	switch (hdev->dev_type) {
+	case HCI_BREDR:
+		if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
+			mgmt_index_event(MGMT_EV_UNCONF_INDEX_ADDED, hdev,
+					 NULL, 0, HCI_MGMT_UNCONF_INDEX_EVENTS);
+		} else {
+			mgmt_index_event(MGMT_EV_INDEX_ADDED, hdev, NULL, 0,
+					 HCI_MGMT_INDEX_EVENTS);
+		}
+		break;
+	}
 }
 
 void mgmt_index_removed(struct hci_dev *hdev)
 {
 	u8 status = MGMT_STATUS_INVALID_INDEX;
 
-	if (hdev->dev_type != HCI_BREDR)
-		return;
-
 	if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
 		return;
 
-	mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status);
+	switch (hdev->dev_type) {
+	case HCI_BREDR:
+		mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status);
 
-	if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED))
-		mgmt_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev, NULL, 0, NULL);
-	else
-		mgmt_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0, NULL);
+		if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
+			mgmt_index_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev,
+					 NULL, 0, HCI_MGMT_UNCONF_INDEX_EVENTS);
+		} else {
+			mgmt_index_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0,
+					 HCI_MGMT_INDEX_EVENTS);
+		}
+		break;
+	}
 }
 
 /* This function requires the caller holds hdev->lock */
-- 
cgit v1.2.3


From ced85549c3a769dfb9d084bb8d6d9ca8075f8728 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 14 Mar 2015 19:27:56 -0700
Subject: Bluetooth: Add support for extended index management events

This introduces support for using Extended Index Added and Extended
Index Removed events. These events contain the controller type and
also the hardware bus information from the driver.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci.h  |  1 +
 include/net/bluetooth/mgmt.h |  9 +++++++++
 net/bluetooth/mgmt.c         | 28 ++++++++++++++++++++++++++++
 3 files changed, 38 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 0995ec755959..b8318711135a 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -183,6 +183,7 @@ enum {
 enum {
 	HCI_MGMT_INDEX_EVENTS,
 	HCI_MGMT_UNCONF_INDEX_EVENTS,
+	HCI_MGMT_EXT_INDEX_EVENTS,
 };
 
 /*
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 5bf6af9cee78..8562f9ecf230 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -692,3 +692,12 @@ struct mgmt_ev_new_conn_param {
 #define MGMT_EV_UNCONF_INDEX_REMOVED	0x001e
 
 #define MGMT_EV_NEW_CONFIG_OPTIONS	0x001f
+
+struct mgmt_ev_ext_index {
+	__u8 type;
+	__u8 bus;
+} __packed;
+
+#define MGMT_EV_EXT_INDEX_ADDED		0x0020
+
+#define MGMT_EV_EXT_INDEX_REMOVED	0x0021
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 6b58c13b2b51..56f49e9c4189 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -128,6 +128,8 @@ static const u16 mgmt_events[] = {
 	MGMT_EV_UNCONF_INDEX_ADDED,
 	MGMT_EV_UNCONF_INDEX_REMOVED,
 	MGMT_EV_NEW_CONFIG_OPTIONS,
+	MGMT_EV_EXT_INDEX_ADDED,
+	MGMT_EV_EXT_INDEX_REMOVED,
 };
 
 #define CACHE_TIMEOUT	msecs_to_jiffies(2 * 1000)
@@ -6370,6 +6372,7 @@ done:
 
 void mgmt_index_added(struct hci_dev *hdev)
 {
+	struct mgmt_ev_ext_index ev;
 
 	if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
 		return;
@@ -6379,16 +6382,29 @@ void mgmt_index_added(struct hci_dev *hdev)
 		if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
 			mgmt_index_event(MGMT_EV_UNCONF_INDEX_ADDED, hdev,
 					 NULL, 0, HCI_MGMT_UNCONF_INDEX_EVENTS);
+			ev.type = 0x01;
 		} else {
 			mgmt_index_event(MGMT_EV_INDEX_ADDED, hdev, NULL, 0,
 					 HCI_MGMT_INDEX_EVENTS);
+			ev.type = 0x00;
 		}
 		break;
+	case HCI_AMP:
+		ev.type = 0x02;
+		break;
+	default:
+		return;
 	}
+
+	ev.bus = hdev->bus;
+
+	mgmt_index_event(MGMT_EV_EXT_INDEX_ADDED, hdev, &ev, sizeof(ev),
+			 HCI_MGMT_EXT_INDEX_EVENTS);
 }
 
 void mgmt_index_removed(struct hci_dev *hdev)
 {
+	struct mgmt_ev_ext_index ev;
 	u8 status = MGMT_STATUS_INVALID_INDEX;
 
 	if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
@@ -6401,12 +6417,24 @@ void mgmt_index_removed(struct hci_dev *hdev)
 		if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
 			mgmt_index_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev,
 					 NULL, 0, HCI_MGMT_UNCONF_INDEX_EVENTS);
+			ev.type = 0x01;
 		} else {
 			mgmt_index_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0,
 					 HCI_MGMT_INDEX_EVENTS);
+			ev.type = 0x00;
 		}
 		break;
+	case HCI_AMP:
+		ev.type = 0x02;
+		break;
+	default:
+		return;
 	}
+
+	ev.bus = hdev->bus;
+
+	mgmt_index_event(MGMT_EV_EXT_INDEX_REMOVED, hdev, &ev, sizeof(ev),
+			 HCI_MGMT_EXT_INDEX_EVENTS);
 }
 
 /* This function requires the caller holds hdev->lock */
-- 
cgit v1.2.3


From 96f1474af040a4ec267efe141cbf264891e67e5a Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 14 Mar 2015 19:27:57 -0700
Subject: Bluetooth: Add support for extended index management command

The Read Extended Contoller Index List command can be used for
retrieving the complete list of local available controllers. This
included configured, unconfigured and also AMP controllers.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/mgmt.h | 11 ++++++
 net/bluetooth/mgmt.c         | 80 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 91 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 8562f9ecf230..2534bd4d22b2 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -505,6 +505,17 @@ struct mgmt_cp_start_service_discovery {
 } __packed;
 #define MGMT_START_SERVICE_DISCOVERY_SIZE 4
 
+#define MGMT_OP_READ_EXT_INDEX_LIST	0x003C
+#define MGMT_READ_EXT_INDEX_LIST_SIZE	0
+struct mgmt_rp_read_ext_index_list {
+	__le16	num_controllers;
+	struct {
+		__le16 index;
+		__u8   type;
+		__u8   bus;
+	} entry[0];
+} __packed;
+
 #define MGMT_EV_CMD_COMPLETE		0x0001
 struct mgmt_ev_cmd_complete {
 	__le16	opcode;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 56f49e9c4189..ff636bd9523b 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -96,6 +96,7 @@ static const u16 mgmt_commands[] = {
 	MGMT_OP_SET_EXTERNAL_CONFIG,
 	MGMT_OP_SET_PUBLIC_ADDRESS,
 	MGMT_OP_START_SERVICE_DISCOVERY,
+	MGMT_OP_READ_EXT_INDEX_LIST,
 };
 
 static const u16 mgmt_events[] = {
@@ -518,6 +519,82 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev,
 	return err;
 }
 
+static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev,
+			       void *data, u16 data_len)
+{
+	struct mgmt_rp_read_ext_index_list *rp;
+	struct hci_dev *d;
+	size_t rp_len;
+	u16 count;
+	int err;
+
+	BT_DBG("sock %p", sk);
+
+	read_lock(&hci_dev_list_lock);
+
+	count = 0;
+	list_for_each_entry(d, &hci_dev_list, list) {
+		if (d->dev_type == HCI_BREDR || d->dev_type == HCI_AMP)
+			count++;
+	}
+
+	rp_len = sizeof(*rp) + (sizeof(rp->entry[0]) * count);
+	rp = kmalloc(rp_len, GFP_ATOMIC);
+	if (!rp) {
+		read_unlock(&hci_dev_list_lock);
+		return -ENOMEM;
+	}
+
+	count = 0;
+	list_for_each_entry(d, &hci_dev_list, list) {
+		if (hci_dev_test_flag(d, HCI_SETUP) ||
+		    hci_dev_test_flag(d, HCI_CONFIG) ||
+		    hci_dev_test_flag(d, HCI_USER_CHANNEL))
+			continue;
+
+		/* Devices marked as raw-only are neither configured
+		 * nor unconfigured controllers.
+		 */
+		if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks))
+			continue;
+
+		if (d->dev_type == HCI_BREDR) {
+			if (hci_dev_test_flag(d, HCI_UNCONFIGURED))
+				rp->entry[count].type = 0x01;
+			else
+				rp->entry[count].type = 0x00;
+		} else if (d->dev_type == HCI_AMP) {
+			rp->entry[count].type = 0x02;
+		} else {
+			continue;
+		}
+
+		rp->entry[count].bus = d->bus;
+		rp->entry[count++].index = cpu_to_le16(d->id);
+		BT_DBG("Added hci%u", d->id);
+	}
+
+	rp->num_controllers = cpu_to_le16(count);
+	rp_len = sizeof(*rp) + (sizeof(rp->entry[0]) * count);
+
+	read_unlock(&hci_dev_list_lock);
+
+	/* If this command is called at least once, then all the
+	 * default index and unconfigured index events are disabled
+	 * and from now on only extended index events are used.
+	 */
+	hci_sock_set_flag(sk, HCI_MGMT_EXT_INDEX_EVENTS);
+	hci_sock_clear_flag(sk, HCI_MGMT_INDEX_EVENTS);
+	hci_sock_clear_flag(sk, HCI_MGMT_UNCONF_INDEX_EVENTS);
+
+	err = mgmt_cmd_complete(sk, MGMT_INDEX_NONE,
+				MGMT_OP_READ_EXT_INDEX_LIST, 0, rp, rp_len);
+
+	kfree(rp);
+
+	return err;
+}
+
 static bool is_configured(struct hci_dev *hdev)
 {
 	if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) &&
@@ -6264,6 +6341,9 @@ static const struct hci_mgmt_handler mgmt_handlers[] = {
 						HCI_MGMT_UNCONFIGURED },
 	{ start_service_discovery, MGMT_START_SERVICE_DISCOVERY_SIZE,
 						HCI_MGMT_VAR_LEN },
+	{ NULL },
+	{ read_ext_index_list,     MGMT_READ_EXT_INDEX_LIST_SIZE,
+						HCI_MGMT_NO_HDEV },
 };
 
 int mgmt_control(struct hci_mgmt_chan *chan, struct sock *sk,
-- 
cgit v1.2.3


From 50ebc055fa758c731e6e1ce174608327aab07aec Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 14 Mar 2015 19:27:58 -0700
Subject: Bluetooth: Introduce trusted flag for management control sockets

Providing a global trusted flag for management control sockets provides
an easy way for identifying sockets and imposing restriction on it. For
now all management sockets are trusted since they require CAP_NET_ADMIN.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci.h |  1 +
 net/bluetooth/hci_sock.c    | 11 +++++++++++
 2 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index b8318711135a..7a24acaafeea 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -181,6 +181,7 @@ enum {
 
 /* HCI socket flags */
 enum {
+	HCI_SOCK_TRUSTED,
 	HCI_MGMT_INDEX_EVENTS,
 	HCI_MGMT_UNCONF_INDEX_EVENTS,
 	HCI_MGMT_EXT_INDEX_EVENTS,
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 00775c4fef83..54118868b3f6 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -796,6 +796,11 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
 			goto done;
 		}
 
+		/* The monitor interface is restricted to CAP_NET_RAW
+		 * capabilities and with that implicitly trusted.
+		 */
+		hci_sock_set_flag(sk, HCI_SOCK_TRUSTED);
+
 		send_monitor_replay(sk);
 
 		atomic_inc(&monitor_promisc);
@@ -817,6 +822,12 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
 			goto done;
 		}
 
+		/* Since the access to control channels is currently
+		 * restricted to CAP_NET_ADMIN capabilities, every
+		 * socket is implicitly trusted.
+		 */
+		hci_sock_set_flag(sk, HCI_SOCK_TRUSTED);
+
 		/* At the moment the index and unconfigured index events
 		 * are enabled unconditionally. Setting them on each
 		 * socket when binding keeps this functionality. They
-- 
cgit v1.2.3


From c08b1a1dba524c1cdef331c1f169db3a1b37bb4c Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 14 Mar 2015 19:27:59 -0700
Subject: Bluetooth: Consolidate socket channel sending function back into one

With the introduction of trusted socket flag for control and monitor
channels, it is now possible to use a single function for sending
packets to these sockets. And with that consolidate the handling.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h |  4 +---
 net/bluetooth/hci_sock.c         | 45 ++++++++--------------------------------
 net/bluetooth/mgmt.c             | 34 +++++++-----------------------
 3 files changed, 17 insertions(+), 66 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index d38f6e426e84..859005c9a8fc 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1284,9 +1284,7 @@ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode);
 /* ----- HCI Sockets ----- */
 void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb);
 void hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
-			 struct sock *skip_sk);
-void hci_send_to_flagged_channel(unsigned short channel, struct sk_buff *skb,
-				 int flag);
+			 int flag, struct sock *skip_sk);
 void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb);
 
 void hci_sock_dev_event(struct hci_dev *hdev, int event);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 54118868b3f6..e7f463f6fd69 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -199,7 +199,7 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
 
 /* Send frame to sockets with specific channel */
 void hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
-			 struct sock *skip_sk)
+			 int flag, struct sock *skip_sk)
 {
 	struct sock *sk;
 
@@ -210,41 +210,12 @@ void hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
 	sk_for_each(sk, &hci_sk_list.head) {
 		struct sk_buff *nskb;
 
-		/* Skip the original socket */
-		if (sk == skip_sk)
-			continue;
-
-		if (sk->sk_state != BT_BOUND)
-			continue;
-
-		if (hci_pi(sk)->channel != channel)
-			continue;
-
-		nskb = skb_clone(skb, GFP_ATOMIC);
-		if (!nskb)
+		/* Ignore socket without the flag set */
+		if (!test_bit(flag, &hci_pi(sk)->flags))
 			continue;
 
-		if (sock_queue_rcv_skb(sk, nskb))
-			kfree_skb(nskb);
-	}
-
-	read_unlock(&hci_sk_list.lock);
-}
-
-/* Send frame to sockets with specific channel flag set */
-void hci_send_to_flagged_channel(unsigned short channel, struct sk_buff *skb,
-				 int flag)
-{
-	struct sock *sk;
-
-	BT_DBG("channel %u len %d", channel, skb->len);
-
-	read_lock(&hci_sk_list.lock);
-
-	sk_for_each(sk, &hci_sk_list.head) {
-		struct sk_buff *nskb;
-
-		if (!test_bit(flag, &hci_pi(sk)->flags))
+		/* Skip the original socket */
+		if (sk == skip_sk)
 			continue;
 
 		if (sk->sk_state != BT_BOUND)
@@ -310,7 +281,8 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb)
 	hdr->index = cpu_to_le16(hdev->id);
 	hdr->len = cpu_to_le16(skb->len);
 
-	hci_send_to_channel(HCI_CHANNEL_MONITOR, skb_copy, NULL);
+	hci_send_to_channel(HCI_CHANNEL_MONITOR, skb_copy,
+			    HCI_SOCK_TRUSTED, NULL);
 	kfree_skb(skb_copy);
 }
 
@@ -417,7 +389,8 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event)
 
 		skb = create_monitor_event(hdev, event);
 		if (skb) {
-			hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, NULL);
+			hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
+					    HCI_SOCK_TRUSTED, NULL);
 			kfree_skb(skb);
 		}
 	}
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index ff636bd9523b..1e5afa76e371 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -224,7 +224,7 @@ static u8 mgmt_status(u8 hci_status)
 
 static int mgmt_send_event(u16 event, struct hci_dev *hdev,
 			   unsigned short channel, void *data, u16 data_len,
-			   struct sock *skip_sk)
+			   int flag, struct sock *skip_sk)
 {
 	struct sk_buff *skb;
 	struct mgmt_hdr *hdr;
@@ -247,44 +247,24 @@ static int mgmt_send_event(u16 event, struct hci_dev *hdev,
 	/* Time stamp */
 	__net_timestamp(skb);
 
-	hci_send_to_channel(channel, skb, skip_sk);
+	hci_send_to_channel(channel, skb, flag, skip_sk);
 	kfree_skb(skb);
 
 	return 0;
 }
 
-static int mgmt_index_event(u16 event, struct hci_dev *hdev,
-			    void *data, u16 data_len, int flag)
+static int mgmt_index_event(u16 event, struct hci_dev *hdev, void *data,
+			    u16 len, int flag)
 {
-	struct sk_buff *skb;
-	struct mgmt_hdr *hdr;
-
-	skb = alloc_skb(sizeof(*hdr) + data_len, GFP_KERNEL);
-	if (!skb)
-		return -ENOMEM;
-
-	hdr = (void *) skb_put(skb, sizeof(*hdr));
-	hdr->opcode = cpu_to_le16(event);
-	hdr->index = cpu_to_le16(hdev->id);
-	hdr->len = cpu_to_le16(data_len);
-
-	if (data)
-		memcpy(skb_put(skb, data_len), data, data_len);
-
-	/* Time stamp */
-	__net_timestamp(skb);
-
-	hci_send_to_flagged_channel(HCI_CHANNEL_CONTROL, skb, flag);
-	kfree_skb(skb);
-
-	return 0;
+	return mgmt_send_event(event, hdev, HCI_CHANNEL_CONTROL, data, len,
+			       flag, NULL);
 }
 
 static int mgmt_event(u16 event, struct hci_dev *hdev, void *data, u16 len,
 		      struct sock *skip_sk)
 {
 	return mgmt_send_event(event, hdev, HCI_CHANNEL_CONTROL, data, len,
-			       skip_sk);
+			       HCI_SOCK_TRUSTED, skip_sk);
 }
 
 static int mgmt_cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status)
-- 
cgit v1.2.3


From c85be545ea23a4fe590c89683242a9be823394e0 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 14 Mar 2015 19:28:00 -0700
Subject: Bluetooth: Add hci_sock_test_flag helper function

The management interface will need access to the socket flags and so
provide a helper function for checking them.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/bluetooth.h | 1 +
 net/bluetooth/hci_sock.c          | 7 ++++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index ae1f2ee1eaf3..d871ba313f64 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -337,6 +337,7 @@ int bt_to_errno(__u16 code);
 
 void hci_sock_set_flag(struct sock *sk, int nr);
 void hci_sock_clear_flag(struct sock *sk, int nr);
+int hci_sock_test_flag(struct sock *sk, int nr);
 
 int hci_sock_init(void);
 void hci_sock_cleanup(void);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index e7f463f6fd69..df23c184c897 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -60,6 +60,11 @@ void hci_sock_clear_flag(struct sock *sk, int nr)
 	clear_bit(nr, &hci_pi(sk)->flags);
 }
 
+int hci_sock_test_flag(struct sock *sk, int nr)
+{
+	return test_bit(nr, &hci_pi(sk)->flags);
+}
+
 static inline int hci_test_bit(int nr, const void *addr)
 {
 	return *((const __u32 *) addr + (nr >> 5)) & ((__u32) 1 << (nr & 31));
@@ -211,7 +216,7 @@ void hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
 		struct sk_buff *nskb;
 
 		/* Ignore socket without the flag set */
-		if (!test_bit(flag, &hci_pi(sk)->flags))
+		if (!hci_sock_test_flag(sk, flag))
 			continue;
 
 		/* Skip the original socket */
-- 
cgit v1.2.3


From c91041dc4efff71f29f1dd4c9a4a5e80b841395c Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 14 Mar 2015 19:28:01 -0700
Subject: Bluetooth: Add support for untrusted access to management commands

Some management commands are safe to be accessed from any user without
special permissions. First step for allowing access to any of these
commands from untrusted application is to mark them accordingly.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h |  3 ++-
 net/bluetooth/mgmt.c             | 21 ++++++++++++++-------
 2 files changed, 16 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 859005c9a8fc..3546789c1616 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1291,7 +1291,8 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event);
 
 #define HCI_MGMT_VAR_LEN	(1 << 0)
 #define HCI_MGMT_NO_HDEV	(1 << 1)
-#define HCI_MGMT_UNCONFIGURED	(1 << 2)
+#define HCI_MGMT_UNTRUSTED	(1 << 2)
+#define HCI_MGMT_UNCONFIGURED	(1 << 3)
 
 struct hci_mgmt_handler {
 	int (*func) (struct sock *sk, struct hci_dev *hdev, void *data,
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 1e5afa76e371..09f31f9642b7 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -6251,12 +6251,16 @@ unlock:
 static const struct hci_mgmt_handler mgmt_handlers[] = {
 	{ NULL }, /* 0x0000 (no command) */
 	{ read_version,            MGMT_READ_VERSION_SIZE,
-						HCI_MGMT_NO_HDEV },
+						HCI_MGMT_NO_HDEV |
+						HCI_MGMT_UNTRUSTED },
 	{ read_commands,           MGMT_READ_COMMANDS_SIZE,
-						HCI_MGMT_NO_HDEV },
+						HCI_MGMT_NO_HDEV |
+						HCI_MGMT_UNTRUSTED },
 	{ read_index_list,         MGMT_READ_INDEX_LIST_SIZE,
-						HCI_MGMT_NO_HDEV },
-	{ read_controller_info,    MGMT_READ_INFO_SIZE,                 0 },
+						HCI_MGMT_NO_HDEV |
+						HCI_MGMT_UNTRUSTED },
+	{ read_controller_info,    MGMT_READ_INFO_SIZE,
+						HCI_MGMT_UNTRUSTED },
 	{ set_powered,             MGMT_SETTING_SIZE,                   0 },
 	{ set_discoverable,        MGMT_SET_DISCOVERABLE_SIZE,          0 },
 	{ set_connectable,         MGMT_SETTING_SIZE,                   0 },
@@ -6312,9 +6316,11 @@ static const struct hci_mgmt_handler mgmt_handlers[] = {
 	{ load_conn_param,         MGMT_LOAD_CONN_PARAM_SIZE,
 						HCI_MGMT_VAR_LEN },
 	{ read_unconf_index_list,  MGMT_READ_UNCONF_INDEX_LIST_SIZE,
-						HCI_MGMT_NO_HDEV },
+						HCI_MGMT_NO_HDEV |
+						HCI_MGMT_UNTRUSTED },
 	{ read_config_info,        MGMT_READ_CONFIG_INFO_SIZE,
-						HCI_MGMT_UNCONFIGURED },
+						HCI_MGMT_UNCONFIGURED |
+						HCI_MGMT_UNTRUSTED },
 	{ set_external_config,     MGMT_SET_EXTERNAL_CONFIG_SIZE,
 						HCI_MGMT_UNCONFIGURED },
 	{ set_public_address,      MGMT_SET_PUBLIC_ADDRESS_SIZE,
@@ -6323,7 +6329,8 @@ static const struct hci_mgmt_handler mgmt_handlers[] = {
 						HCI_MGMT_VAR_LEN },
 	{ NULL },
 	{ read_ext_index_list,     MGMT_READ_EXT_INDEX_LIST_SIZE,
-						HCI_MGMT_NO_HDEV },
+						HCI_MGMT_NO_HDEV |
+						HCI_MGMT_UNTRUSTED },
 };
 
 int mgmt_control(struct hci_mgmt_chan *chan, struct sock *sk,
-- 
cgit v1.2.3


From c927a10487477eeed0441e5c88147700e69e5db9 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 14 Mar 2015 19:28:03 -0700
Subject: Bluetooth: Add support for trust verification of management commands

Check the required trust level of each management command with the trust
level of the management socket. If it does not match up, then return the
newly introduced permission denied error.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/mgmt.h | 1 +
 net/bluetooth/mgmt.c         | 7 +++++++
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 2534bd4d22b2..f3baad589db0 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -44,6 +44,7 @@
 #define MGMT_STATUS_INVALID_INDEX	0x11
 #define MGMT_STATUS_RFKILLED		0x12
 #define MGMT_STATUS_ALREADY_PAIRED	0x13
+#define MGMT_STATUS_PERMISSION_DENIED	0x14
 
 struct mgmt_hdr {
 	__le16	opcode;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 2162f7bc89be..920acf0625f6 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -6379,6 +6379,13 @@ int mgmt_control(struct hci_mgmt_chan *chan, struct sock *sk,
 
 	handler = &chan->handlers[opcode];
 
+	if (!hci_sock_test_flag(sk, HCI_SOCK_TRUSTED) &&
+	    !(handler->flags & HCI_MGMT_UNTRUSTED)) {
+		err = mgmt_cmd_status(sk, index, opcode,
+				      MGMT_STATUS_PERMISSION_DENIED);
+		goto done;
+	}
+
 	if (index != MGMT_INDEX_NONE) {
 		hdev = hci_dev_get(index);
 		if (!hdev) {
-- 
cgit v1.2.3


From f6b7712eb660c50877a56772908326cd31125b21 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 14 Mar 2015 19:28:05 -0700
Subject: Bluetooth: Send global configuration updates to all management users

Changes to the global configuration updates like settings, class of
device, name etc. can be received by every user. They are allowed to
read them in the first place so provide the updates via events as
well. Otherwise untrusted users start polling for updates and that
is not a desired behavior.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci.h |  1 +
 net/bluetooth/hci_sock.c    |  5 +++++
 net/bluetooth/mgmt.c        | 34 ++++++++++++++++++++--------------
 3 files changed, 26 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 7a24acaafeea..7f41c7741e76 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -185,6 +185,7 @@ enum {
 	HCI_MGMT_INDEX_EVENTS,
 	HCI_MGMT_UNCONF_INDEX_EVENTS,
 	HCI_MGMT_EXT_INDEX_EVENTS,
+	HCI_MGMT_GENERIC_EVENTS,
 };
 
 /*
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index f4b10344b1e5..7c719602dbca 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -808,10 +808,15 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
 		 * socket when binding keeps this functionality. They
 		 * however might be cleared later and then sending of these
 		 * events will be disabled, but that is then intentional.
+		 *
+		 * This also enables generic events that are safe to be
+		 * received by untrusted users. Example for such events
+		 * are changes to settings, class of device, name etc.
 		 */
 		if (haddr.hci_channel == HCI_CHANNEL_CONTROL) {
 			hci_sock_set_flag(sk, HCI_MGMT_INDEX_EVENTS);
 			hci_sock_set_flag(sk, HCI_MGMT_UNCONF_INDEX_EVENTS);
+			hci_sock_set_flag(sk, HCI_MGMT_GENERIC_EVENTS);
 		}
 		break;
 	}
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 920acf0625f6..fa5654d89702 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -260,6 +260,13 @@ static int mgmt_index_event(u16 event, struct hci_dev *hdev, void *data,
 			       flag, NULL);
 }
 
+static int mgmt_generic_event(u16 event, struct hci_dev *hdev, void *data,
+			      u16 len, struct sock *skip_sk)
+{
+	return mgmt_send_event(event, hdev, HCI_CHANNEL_CONTROL, data, len,
+			       HCI_MGMT_GENERIC_EVENTS, skip_sk);
+}
+
 static int mgmt_event(u16 event, struct hci_dev *hdev, void *data, u16 len,
 		      struct sock *skip_sk)
 {
@@ -607,8 +614,8 @@ static int new_options(struct hci_dev *hdev, struct sock *skip)
 {
 	__le32 options = get_missing_options(hdev);
 
-	return mgmt_event(MGMT_EV_NEW_CONFIG_OPTIONS, hdev, &options,
-			  sizeof(options), skip);
+	return mgmt_generic_event(MGMT_EV_NEW_CONFIG_OPTIONS, hdev, &options,
+				  sizeof(options), skip);
 }
 
 static int send_options_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev)
@@ -1552,11 +1559,10 @@ failed:
 
 static int new_settings(struct hci_dev *hdev, struct sock *skip)
 {
-	__le32 ev;
-
-	ev = cpu_to_le32(get_current_settings(hdev));
+	__le32 ev = cpu_to_le32(get_current_settings(hdev));
 
-	return mgmt_event(MGMT_EV_NEW_SETTINGS, hdev, &ev, sizeof(ev), skip);
+	return mgmt_generic_event(MGMT_EV_NEW_SETTINGS, hdev, &ev,
+				  sizeof(ev), skip);
 }
 
 int mgmt_new_settings(struct hci_dev *hdev)
@@ -3677,8 +3683,8 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data,
 		if (err < 0)
 			goto failed;
 
-		err = mgmt_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, data, len,
-				 sk);
+		err = mgmt_generic_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev,
+					 data, len, sk);
 
 		goto failed;
 	}
@@ -6673,8 +6679,8 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered)
 	mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status);
 
 	if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0)
-		mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev,
-			   zero_cod, sizeof(zero_cod), NULL);
+		mgmt_generic_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev,
+				   zero_cod, sizeof(zero_cod), NULL);
 
 new_settings:
 	err = new_settings(hdev, match.sk);
@@ -7325,8 +7331,8 @@ void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class,
 	mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, sk_lookup, &match);
 
 	if (!status)
-		mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, dev_class, 3,
-			   NULL);
+		mgmt_generic_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev,
+				   dev_class, 3, NULL);
 
 	if (match.sk)
 		sock_put(match.sk);
@@ -7355,8 +7361,8 @@ void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status)
 			return;
 	}
 
-	mgmt_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, sizeof(ev),
-		   cmd ? cmd->sk : NULL);
+	mgmt_generic_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, sizeof(ev),
+			   cmd ? cmd->sk : NULL);
 }
 
 void mgmt_read_local_oob_data_complete(struct hci_dev *hdev, u8 *hash192,
-- 
cgit v1.2.3


From a958452aa40c57a0407ecf84ba1bfa31ad532313 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 14 Mar 2015 19:28:06 -0700
Subject: Bluetooth: Use BIT(n) macro instead of manually encoding (1 << n)

The flags for the management command table used manual encoding of
bits in the form of (1 << n). It is however preferred to use BIT(n)
macro instead.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 3546789c1616..ce94bcb33600 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1289,10 +1289,10 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb);
 
 void hci_sock_dev_event(struct hci_dev *hdev, int event);
 
-#define HCI_MGMT_VAR_LEN	(1 << 0)
-#define HCI_MGMT_NO_HDEV	(1 << 1)
-#define HCI_MGMT_UNTRUSTED	(1 << 2)
-#define HCI_MGMT_UNCONFIGURED	(1 << 3)
+#define HCI_MGMT_VAR_LEN	BIT(0)
+#define HCI_MGMT_NO_HDEV	BIT(1)
+#define HCI_MGMT_UNTRUSTED	BIT(2)
+#define HCI_MGMT_UNCONFIGURED	BIT(3)
 
 struct hci_mgmt_handler {
 	int (*func) (struct sock *sk, struct hci_dev *hdev, void *data,
-- 
cgit v1.2.3


From d3d5305bfd1cb48c8f44207abb567276a1e09cc7 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 14 Mar 2015 20:53:25 -0700
Subject: Bluetooth: Add simple version of Read Advertising Features command

This adds support for the simplest possible version of Read Advertising
Features management command. It allows basic testing of the interface.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/mgmt.h | 11 +++++++++++
 net/bluetooth/mgmt.c         | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index f3baad589db0..4d0ccd194c01 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -517,6 +517,17 @@ struct mgmt_rp_read_ext_index_list {
 	} entry[0];
 } __packed;
 
+#define MGMT_OP_READ_ADV_FEATURES	0x0003D
+#define MGMT_READ_ADV_FEATURES_SIZE	0
+struct mgmt_rp_read_adv_features {
+	__le32 supported_flags;
+	__u8   max_adv_data_len;
+	__u8   max_scan_rsp_len;
+	__u8   max_instances;
+	__u8   num_instances;
+	__u8   instance[0];
+} __packed;
+
 #define MGMT_EV_CMD_COMPLETE		0x0001
 struct mgmt_ev_cmd_complete {
 	__le16	opcode;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index fa5654d89702..25a687c2a112 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -97,6 +97,7 @@ static const u16 mgmt_commands[] = {
 	MGMT_OP_SET_PUBLIC_ADDRESS,
 	MGMT_OP_START_SERVICE_DISCOVERY,
 	MGMT_OP_READ_EXT_INDEX_LIST,
+	MGMT_OP_READ_ADV_FEATURES,
 };
 
 static const u16 mgmt_events[] = {
@@ -6254,6 +6255,40 @@ unlock:
 	return err;
 }
 
+static int read_adv_features(struct sock *sk, struct hci_dev *hdev,
+			     void *data, u16 data_len)
+{
+	struct mgmt_rp_read_adv_features *rp;
+	size_t rp_len;
+	int err;
+
+	BT_DBG("%s", hdev->name);
+
+	hci_dev_lock(hdev);
+
+	rp_len = sizeof(*rp);
+	rp = kmalloc(rp_len, GFP_ATOMIC);
+	if (!rp) {
+		hci_dev_unlock(hdev);
+		return -ENOMEM;
+	}
+
+	rp->supported_flags = cpu_to_le32(0);
+	rp->max_adv_data_len = 31;
+	rp->max_scan_rsp_len = 31;
+	rp->max_instances = 0;
+	rp->num_instances = 0;
+
+	hci_dev_unlock(hdev);
+
+	err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_ADV_FEATURES,
+				MGMT_STATUS_SUCCESS, rp, rp_len);
+
+	kfree(rp);
+
+	return err;
+}
+
 static const struct hci_mgmt_handler mgmt_handlers[] = {
 	{ NULL }, /* 0x0000 (no command) */
 	{ read_version,            MGMT_READ_VERSION_SIZE,
@@ -6337,6 +6372,7 @@ static const struct hci_mgmt_handler mgmt_handlers[] = {
 	{ read_ext_index_list,     MGMT_READ_EXT_INDEX_LIST_SIZE,
 						HCI_MGMT_NO_HDEV |
 						HCI_MGMT_UNTRUSTED },
+	{ read_adv_features,       MGMT_READ_ADV_FEATURES_SIZE },
 };
 
 int mgmt_control(struct hci_mgmt_chan *chan, struct sock *sk,
-- 
cgit v1.2.3


From 1471aae0d04d4e0df2bf1e5a5af861e28371b8b4 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 14 Mar 2015 22:43:18 -0700
Subject: Bluetooth: Add defines for LE Bluetooth Device Address and LE Role

The OOB data requires to include LE Bluetooth Device Address and LE Role
and so add the type constants for these fields.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 7f41c7741e76..af9893b704ff 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -456,6 +456,8 @@ enum {
 #define EIR_SSP_HASH_C		0x0E /* Simple Pairing Hash C */
 #define EIR_SSP_RAND_R		0x0F /* Simple Pairing Randomizer R */
 #define EIR_DEVICE_ID		0x10 /* device ID */
+#define EIR_LE_BDADDR		0x1B /* LE Bluetooth device address */
+#define EIR_LE_ROLE		0x1C /* LE role */
 
 /* Low Energy Advertising Flags */
 #define LE_AD_LIMITED		0x01 /* Limited Discoverable */
-- 
cgit v1.2.3


From 4f0f155ceaf7e1b59d210a8afb24d4ea63ce13cc Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 14 Mar 2015 22:43:19 -0700
Subject: Bluetooth: Add simple version of Read Local OOB Extended Data command

This adds support for the simplest possible version of Read Local OOB
Extended Data management command. It includes all mandatory fields,
but none of the actual pairing related ones.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/mgmt.h |  11 +++++
 net/bluetooth/mgmt.c         | 111 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 121 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 4d0ccd194c01..543c1ba3d892 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -506,6 +506,17 @@ struct mgmt_cp_start_service_discovery {
 } __packed;
 #define MGMT_START_SERVICE_DISCOVERY_SIZE 4
 
+#define MGMT_OP_READ_LOCAL_OOB_EXT_DATA	0x003B
+struct mgmt_cp_read_local_oob_ext_data {
+	__u8 type;
+} __packed;
+#define MGMT_READ_LOCAL_OOB_EXT_DATA_SIZE 1
+struct mgmt_rp_read_local_oob_ext_data {
+	__u8    type;
+	__le16	eir_len;
+	__u8	eir[0];
+} __packed;
+
 #define MGMT_OP_READ_EXT_INDEX_LIST	0x003C
 #define MGMT_READ_EXT_INDEX_LIST_SIZE	0
 struct mgmt_rp_read_ext_index_list {
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 07c75a04829a..6cb0a304182f 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -96,6 +96,7 @@ static const u16 mgmt_commands[] = {
 	MGMT_OP_SET_EXTERNAL_CONFIG,
 	MGMT_OP_SET_PUBLIC_ADDRESS,
 	MGMT_OP_START_SERVICE_DISCOVERY,
+	MGMT_OP_READ_LOCAL_OOB_EXT_DATA,
 	MGMT_OP_READ_EXT_INDEX_LIST,
 	MGMT_OP_READ_ADV_FEATURES,
 };
@@ -6266,6 +6267,114 @@ static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data,
 	return eir_len;
 }
 
+static int read_local_oob_ext_data(struct sock *sk, struct hci_dev *hdev,
+				   void *data, u16 data_len)
+{
+	struct mgmt_cp_read_local_oob_ext_data *cp = data;
+	struct mgmt_rp_read_local_oob_ext_data *rp;
+	size_t rp_len;
+	u16 eir_len;
+	u8 status, flags, role, addr[7];
+	int err;
+
+	BT_DBG("%s", hdev->name);
+
+	if (!hdev_is_powered(hdev))
+		return mgmt_cmd_complete(sk, hdev->id,
+					 MGMT_OP_READ_LOCAL_OOB_EXT_DATA,
+					 MGMT_STATUS_NOT_POWERED,
+					 &cp->type, sizeof(cp->type));
+
+	switch (cp->type) {
+	case BIT(BDADDR_BREDR):
+		status = mgmt_bredr_support(hdev);
+		if (status)
+			return mgmt_cmd_complete(sk, hdev->id,
+						 MGMT_OP_READ_LOCAL_OOB_EXT_DATA,
+						 status, &cp->type,
+						 sizeof(cp->type));
+		eir_len = 5;
+		break;
+	case (BIT(BDADDR_LE_PUBLIC) | BIT(BDADDR_LE_RANDOM)):
+		status = mgmt_le_support(hdev);
+		if (status)
+			return mgmt_cmd_complete(sk, hdev->id,
+						 MGMT_OP_READ_LOCAL_OOB_EXT_DATA,
+						 status, &cp->type,
+						 sizeof(cp->type));
+		eir_len = 15;
+		break;
+	default:
+		return mgmt_cmd_complete(sk, hdev->id,
+					 MGMT_OP_READ_LOCAL_OOB_EXT_DATA,
+					 MGMT_STATUS_INVALID_PARAMS,
+					 &cp->type, sizeof(cp->type));
+	}
+
+	hci_dev_lock(hdev);
+
+	rp_len = sizeof(*rp) + eir_len;
+	rp = kmalloc(rp_len, GFP_ATOMIC);
+	if (!rp) {
+		hci_dev_unlock(hdev);
+		return -ENOMEM;
+	}
+
+	eir_len = 0;
+	switch (cp->type) {
+	case BIT(BDADDR_BREDR):
+		eir_len = eir_append_data(rp->eir, eir_len, EIR_CLASS_OF_DEV,
+					  hdev->dev_class, 3);
+		break;
+	case (BIT(BDADDR_LE_PUBLIC) | BIT(BDADDR_LE_RANDOM)):
+		if (hci_dev_test_flag(hdev, HCI_PRIVACY)) {
+			memcpy(addr, &hdev->rpa, 6);
+			addr[6] = 0x01;
+		} else if (hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) ||
+			   !bacmp(&hdev->bdaddr, BDADDR_ANY) ||
+			   (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) &&
+			    bacmp(&hdev->static_addr, BDADDR_ANY))) {
+			memcpy(addr, &hdev->static_addr, 6);
+			addr[6] = 0x01;
+		} else {
+			memcpy(addr, &hdev->bdaddr, 6);
+			addr[6] = 0x00;
+		}
+
+		eir_len = eir_append_data(rp->eir, eir_len, EIR_LE_BDADDR,
+					  addr, sizeof(addr));
+
+		if (hci_dev_test_flag(hdev, HCI_ADVERTISING))
+			role = 0x02;
+		else
+			role = 0x01;
+
+		eir_len = eir_append_data(rp->eir, eir_len, EIR_LE_ROLE,
+					  &role, sizeof(role));
+
+		flags = get_adv_discov_flags(hdev);
+
+		if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
+			flags |= LE_AD_NO_BREDR;
+
+		eir_len = eir_append_data(rp->eir, eir_len, EIR_FLAGS,
+					  &flags, sizeof(flags));
+		break;
+	}
+
+	rp->type = cp->type;
+	rp->eir_len = cpu_to_le16(eir_len);
+
+	hci_dev_unlock(hdev);
+
+	err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_EXT_DATA,
+				MGMT_STATUS_SUCCESS, rp, rp_len);
+
+	kfree(rp);
+
+	return err;
+}
+
 static int read_adv_features(struct sock *sk, struct hci_dev *hdev,
 			     void *data, u16 data_len)
 {
@@ -6379,7 +6488,7 @@ static const struct hci_mgmt_handler mgmt_handlers[] = {
 						HCI_MGMT_UNCONFIGURED },
 	{ start_service_discovery, MGMT_START_SERVICE_DISCOVERY_SIZE,
 						HCI_MGMT_VAR_LEN },
-	{ NULL },
+	{ read_local_oob_ext_data, MGMT_READ_LOCAL_OOB_EXT_DATA_SIZE },
 	{ read_ext_index_list,     MGMT_READ_EXT_INDEX_LIST_SIZE,
 						HCI_MGMT_NO_HDEV |
 						HCI_MGMT_UNTRUSTED },
-- 
cgit v1.2.3


From 03e69b508b6f7c51743055c9f61d1dfeadf4b635 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 14 Mar 2015 02:27:16 +0100
Subject: ebpf: add prandom helper for packet sampling

This work is similar to commit 4cd3675ebf74 ("filter: added BPF
random opcode") and adds a possibility for packet sampling in eBPF.

Currently, this is only possible in classic BPF and useful to
combine sampling with f.e. packet sockets, possible also with tc.

Example function proto-type looks like:

  u32 (*prandom_u32)(void) = (void *)BPF_FUNC_get_prandom_u32;

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h      |  2 ++
 include/uapi/linux/bpf.h |  1 +
 kernel/bpf/core.c        |  2 ++
 kernel/bpf/helpers.c     | 12 ++++++++++++
 net/core/filter.c        |  2 ++
 5 files changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 80f2e0fc3d02..50bf95e29a96 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -154,4 +154,6 @@ extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
 extern const struct bpf_func_proto bpf_map_update_elem_proto;
 extern const struct bpf_func_proto bpf_map_delete_elem_proto;
 
+extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
+
 #endif /* _LINUX_BPF_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 3fa1af8a58d7..1c2ca2b477c8 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -165,6 +165,7 @@ enum bpf_func_id {
 	BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */
 	BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */
 	BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
+	BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 50603aec766a..c1dbbb5d289b 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -661,6 +661,8 @@ const struct bpf_func_proto bpf_map_lookup_elem_proto __weak;
 const struct bpf_func_proto bpf_map_update_elem_proto __weak;
 const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
 
+const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
+
 /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call
  * skb_copy_bits(), so provide a weak definition of it for NET-less config.
  */
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index a3c7701a8b5e..95eb59a045ea 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -11,6 +11,7 @@
  */
 #include <linux/bpf.h>
 #include <linux/rcupdate.h>
+#include <linux/random.h>
 
 /* If kernel subsystem is allowing eBPF programs to call this function,
  * inside its own verifier_ops->get_func_proto() callback it should return
@@ -87,3 +88,14 @@ const struct bpf_func_proto bpf_map_delete_elem_proto = {
 	.arg1_type = ARG_CONST_MAP_PTR,
 	.arg2_type = ARG_PTR_TO_MAP_KEY,
 };
+
+static u64 bpf_get_prandom_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	return prandom_u32();
+}
+
+const struct bpf_func_proto bpf_get_prandom_u32_proto = {
+	.func		= bpf_get_prandom_u32,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+};
diff --git a/net/core/filter.c b/net/core/filter.c
index 7a4eb7030dba..4344db39af2e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1139,6 +1139,8 @@ sk_filter_func_proto(enum bpf_func_id func_id)
 		return &bpf_map_update_elem_proto;
 	case BPF_FUNC_map_delete_elem:
 		return &bpf_map_delete_elem_proto;
+	case BPF_FUNC_get_prandom_u32:
+		return &bpf_get_prandom_u32_proto;
 	default:
 		return NULL;
 	}
-- 
cgit v1.2.3


From c04167ce2ca0ecaeaafef006cb0d65cf01b68e42 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 14 Mar 2015 02:27:17 +0100
Subject: ebpf: add helper for obtaining current processor id

This patch adds the possibility to obtain raw_smp_processor_id() in
eBPF. Currently, this is only possible in classic BPF where commit
da2033c28226 ("filter: add SKF_AD_RXHASH and SKF_AD_CPU") has added
facilities for this.

Perhaps most importantly, this would also allow us to track per CPU
statistics with eBPF maps, or to implement a poor-man's per CPU data
structure through eBPF maps.

Example function proto-type looks like:

  u32 (*smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id;

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h      |  1 +
 include/uapi/linux/bpf.h |  1 +
 kernel/bpf/core.c        |  1 +
 kernel/bpf/helpers.c     | 12 ++++++++++++
 net/core/filter.c        |  2 ++
 5 files changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 50bf95e29a96..30bfd331882a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -155,5 +155,6 @@ extern const struct bpf_func_proto bpf_map_update_elem_proto;
 extern const struct bpf_func_proto bpf_map_delete_elem_proto;
 
 extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
+extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
 
 #endif /* _LINUX_BPF_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1c2ca2b477c8..de1f63668daf 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -166,6 +166,7 @@ enum bpf_func_id {
 	BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */
 	BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
 	BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */
+	BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index c1dbbb5d289b..4139a0f8b558 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -662,6 +662,7 @@ const struct bpf_func_proto bpf_map_update_elem_proto __weak;
 const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
 
 const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
+const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
 
 /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call
  * skb_copy_bits(), so provide a weak definition of it for NET-less config.
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 95eb59a045ea..bd7f5988ed9c 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -12,6 +12,7 @@
 #include <linux/bpf.h>
 #include <linux/rcupdate.h>
 #include <linux/random.h>
+#include <linux/smp.h>
 
 /* If kernel subsystem is allowing eBPF programs to call this function,
  * inside its own verifier_ops->get_func_proto() callback it should return
@@ -99,3 +100,14 @@ const struct bpf_func_proto bpf_get_prandom_u32_proto = {
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
 };
+
+static u64 bpf_get_smp_processor_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	return raw_smp_processor_id();
+}
+
+const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
+	.func		= bpf_get_smp_processor_id,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+};
diff --git a/net/core/filter.c b/net/core/filter.c
index 4344db39af2e..33310eee6134 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1141,6 +1141,8 @@ sk_filter_func_proto(enum bpf_func_id func_id)
 		return &bpf_map_delete_elem_proto;
 	case BPF_FUNC_get_prandom_u32:
 		return &bpf_get_prandom_u32_proto;
+	case BPF_FUNC_get_smp_processor_id:
+		return &bpf_get_smp_processor_id_proto;
 	default:
 		return NULL;
 	}
-- 
cgit v1.2.3


From 9bac3d6d548e5cc925570b263f35b70a00a00ffd Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 13 Mar 2015 11:57:42 -0700
Subject: bpf: allow extended BPF programs access skb fields

introduce user accessible mirror of in-kernel 'struct sk_buff':
struct __sk_buff {
    __u32 len;
    __u32 pkt_type;
    __u32 mark;
    __u32 queue_mapping;
};

bpf programs can do:

int bpf_prog(struct __sk_buff *skb)
{
    __u32 var = skb->pkt_type;

which will be compiled to bpf assembler as:

dst_reg = *(u32 *)(src_reg + 4) // 4 == offsetof(struct __sk_buff, pkt_type)

bpf verifier will check validity of access and will convert it to:

dst_reg = *(u8 *)(src_reg + offsetof(struct sk_buff, __pkt_type_offset))
dst_reg &= 7

since skb->pkt_type is a bitfield.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h      |   5 +-
 include/uapi/linux/bpf.h |  10 ++++
 kernel/bpf/syscall.c     |   2 +-
 kernel/bpf/verifier.c    | 152 ++++++++++++++++++++++++++++++++++++++++++-----
 net/core/filter.c        | 100 +++++++++++++++++++++++++------
 5 files changed, 234 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 30bfd331882a..280a315de8d6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -103,6 +103,9 @@ struct bpf_verifier_ops {
 	 * with 'type' (read or write) is allowed
 	 */
 	bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
+
+	u32 (*convert_ctx_access)(int dst_reg, int src_reg, int ctx_off,
+				  struct bpf_insn *insn);
 };
 
 struct bpf_prog_type_list {
@@ -133,7 +136,7 @@ struct bpf_map *bpf_map_get(struct fd f);
 void bpf_map_put(struct bpf_map *map);
 
 /* verify correctness of eBPF program */
-int bpf_check(struct bpf_prog *fp, union bpf_attr *attr);
+int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
 #else
 static inline void bpf_register_prog_type(struct bpf_prog_type_list *tl)
 {
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index de1f63668daf..929545a27546 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -170,4 +170,14 @@ enum bpf_func_id {
 	__BPF_FUNC_MAX_ID,
 };
 
+/* user accessible mirror of in-kernel sk_buff.
+ * new fields can only be added to the end of this structure
+ */
+struct __sk_buff {
+	__u32 len;
+	__u32 pkt_type;
+	__u32 mark;
+	__u32 queue_mapping;
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 669719ccc9ee..ea75c654af1b 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -519,7 +519,7 @@ static int bpf_prog_load(union bpf_attr *attr)
 		goto free_prog;
 
 	/* run eBPF verifier */
-	err = bpf_check(prog, attr);
+	err = bpf_check(&prog, attr);
 	if (err < 0)
 		goto free_used_maps;
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e6b522496250..c22ebd36fa4b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1620,11 +1620,10 @@ static int do_check(struct verifier_env *env)
 				return err;
 
 		} else if (class == BPF_LDX) {
-			if (BPF_MODE(insn->code) != BPF_MEM ||
-			    insn->imm != 0) {
-				verbose("BPF_LDX uses reserved fields\n");
-				return -EINVAL;
-			}
+			enum bpf_reg_type src_reg_type;
+
+			/* check for reserved fields is already done */
+
 			/* check src operand */
 			err = check_reg_arg(regs, insn->src_reg, SRC_OP);
 			if (err)
@@ -1643,6 +1642,29 @@ static int do_check(struct verifier_env *env)
 			if (err)
 				return err;
 
+			src_reg_type = regs[insn->src_reg].type;
+
+			if (insn->imm == 0 && BPF_SIZE(insn->code) == BPF_W) {
+				/* saw a valid insn
+				 * dst_reg = *(u32 *)(src_reg + off)
+				 * use reserved 'imm' field to mark this insn
+				 */
+				insn->imm = src_reg_type;
+
+			} else if (src_reg_type != insn->imm &&
+				   (src_reg_type == PTR_TO_CTX ||
+				    insn->imm == PTR_TO_CTX)) {
+				/* ABuser program is trying to use the same insn
+				 * dst_reg = *(u32*) (src_reg + off)
+				 * with different pointer types:
+				 * src_reg == ctx in one branch and
+				 * src_reg == stack|map in some other branch.
+				 * Reject it.
+				 */
+				verbose("same insn cannot be used with different pointers\n");
+				return -EINVAL;
+			}
+
 		} else if (class == BPF_STX) {
 			if (BPF_MODE(insn->code) == BPF_XADD) {
 				err = check_xadd(env, insn);
@@ -1790,6 +1812,13 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
 	int i, j;
 
 	for (i = 0; i < insn_cnt; i++, insn++) {
+		if (BPF_CLASS(insn->code) == BPF_LDX &&
+		    (BPF_MODE(insn->code) != BPF_MEM ||
+		     insn->imm != 0)) {
+			verbose("BPF_LDX uses reserved fields\n");
+			return -EINVAL;
+		}
+
 		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
 			struct bpf_map *map;
 			struct fd f;
@@ -1881,6 +1910,92 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env)
 			insn->src_reg = 0;
 }
 
+static void adjust_branches(struct bpf_prog *prog, int pos, int delta)
+{
+	struct bpf_insn *insn = prog->insnsi;
+	int insn_cnt = prog->len;
+	int i;
+
+	for (i = 0; i < insn_cnt; i++, insn++) {
+		if (BPF_CLASS(insn->code) != BPF_JMP ||
+		    BPF_OP(insn->code) == BPF_CALL ||
+		    BPF_OP(insn->code) == BPF_EXIT)
+			continue;
+
+		/* adjust offset of jmps if necessary */
+		if (i < pos && i + insn->off + 1 > pos)
+			insn->off += delta;
+		else if (i > pos && i + insn->off + 1 < pos)
+			insn->off -= delta;
+	}
+}
+
+/* convert load instructions that access fields of 'struct __sk_buff'
+ * into sequence of instructions that access fields of 'struct sk_buff'
+ */
+static int convert_ctx_accesses(struct verifier_env *env)
+{
+	struct bpf_insn *insn = env->prog->insnsi;
+	int insn_cnt = env->prog->len;
+	struct bpf_insn insn_buf[16];
+	struct bpf_prog *new_prog;
+	u32 cnt;
+	int i;
+
+	if (!env->prog->aux->ops->convert_ctx_access)
+		return 0;
+
+	for (i = 0; i < insn_cnt; i++, insn++) {
+		if (insn->code != (BPF_LDX | BPF_MEM | BPF_W))
+			continue;
+
+		if (insn->imm != PTR_TO_CTX) {
+			/* clear internal mark */
+			insn->imm = 0;
+			continue;
+		}
+
+		cnt = env->prog->aux->ops->
+			convert_ctx_access(insn->dst_reg, insn->src_reg,
+					   insn->off, insn_buf);
+		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
+			verbose("bpf verifier is misconfigured\n");
+			return -EINVAL;
+		}
+
+		if (cnt == 1) {
+			memcpy(insn, insn_buf, sizeof(*insn));
+			continue;
+		}
+
+		/* several new insns need to be inserted. Make room for them */
+		insn_cnt += cnt - 1;
+		new_prog = bpf_prog_realloc(env->prog,
+					    bpf_prog_size(insn_cnt),
+					    GFP_USER);
+		if (!new_prog)
+			return -ENOMEM;
+
+		new_prog->len = insn_cnt;
+
+		memmove(new_prog->insnsi + i + cnt, new_prog->insns + i + 1,
+			sizeof(*insn) * (insn_cnt - i - cnt));
+
+		/* copy substitute insns in place of load instruction */
+		memcpy(new_prog->insnsi + i, insn_buf, sizeof(*insn) * cnt);
+
+		/* adjust branches in the whole program */
+		adjust_branches(new_prog, i, cnt - 1);
+
+		/* keep walking new program and skip insns we just inserted */
+		env->prog = new_prog;
+		insn = new_prog->insnsi + i + cnt - 1;
+		i += cnt - 1;
+	}
+
+	return 0;
+}
+
 static void free_states(struct verifier_env *env)
 {
 	struct verifier_state_list *sl, *sln;
@@ -1903,13 +2018,13 @@ static void free_states(struct verifier_env *env)
 	kfree(env->explored_states);
 }
 
-int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
+int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
 {
 	char __user *log_ubuf = NULL;
 	struct verifier_env *env;
 	int ret = -EINVAL;
 
-	if (prog->len <= 0 || prog->len > BPF_MAXINSNS)
+	if ((*prog)->len <= 0 || (*prog)->len > BPF_MAXINSNS)
 		return -E2BIG;
 
 	/* 'struct verifier_env' can be global, but since it's not small,
@@ -1919,7 +2034,7 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
 	if (!env)
 		return -ENOMEM;
 
-	env->prog = prog;
+	env->prog = *prog;
 
 	/* grab the mutex to protect few globals used by verifier */
 	mutex_lock(&bpf_verifier_lock);
@@ -1951,7 +2066,7 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
 	if (ret < 0)
 		goto skip_full_check;
 
-	env->explored_states = kcalloc(prog->len,
+	env->explored_states = kcalloc(env->prog->len,
 				       sizeof(struct verifier_state_list *),
 				       GFP_USER);
 	ret = -ENOMEM;
@@ -1968,6 +2083,10 @@ skip_full_check:
 	while (pop_stack(env, NULL) >= 0);
 	free_states(env);
 
+	if (ret == 0)
+		/* program is valid, convert *(u32*)(ctx + off) accesses */
+		ret = convert_ctx_accesses(env);
+
 	if (log_level && log_len >= log_size - 1) {
 		BUG_ON(log_len >= log_size);
 		/* verifier log exceeded user supplied buffer */
@@ -1983,18 +2102,18 @@ skip_full_check:
 
 	if (ret == 0 && env->used_map_cnt) {
 		/* if program passed verifier, update used_maps in bpf_prog_info */
-		prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
-						     sizeof(env->used_maps[0]),
-						     GFP_KERNEL);
+		env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
+							  sizeof(env->used_maps[0]),
+							  GFP_KERNEL);
 
-		if (!prog->aux->used_maps) {
+		if (!env->prog->aux->used_maps) {
 			ret = -ENOMEM;
 			goto free_log_buf;
 		}
 
-		memcpy(prog->aux->used_maps, env->used_maps,
+		memcpy(env->prog->aux->used_maps, env->used_maps,
 		       sizeof(env->used_maps[0]) * env->used_map_cnt);
-		prog->aux->used_map_cnt = env->used_map_cnt;
+		env->prog->aux->used_map_cnt = env->used_map_cnt;
 
 		/* program is valid. Convert pseudo bpf_ld_imm64 into generic
 		 * bpf_ld_imm64 instructions
@@ -2006,11 +2125,12 @@ free_log_buf:
 	if (log_level)
 		vfree(log_buf);
 free_env:
-	if (!prog->aux->used_maps)
+	if (!env->prog->aux->used_maps)
 		/* if we didn't copy map pointers into bpf_prog_info, release
 		 * them now. Otherwise free_bpf_prog_info() will release them.
 		 */
 		release_maps(env);
+	*prog = env->prog;
 	kfree(env);
 	mutex_unlock(&bpf_verifier_lock);
 	return ret;
diff --git a/net/core/filter.c b/net/core/filter.c
index 33310eee6134..4e9dd0ad0d5b 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -150,10 +150,43 @@ static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
 	return prandom_u32();
 }
 
+static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
+			      struct bpf_insn *insn_buf)
+{
+	struct bpf_insn *insn = insn_buf;
+
+	switch (skb_field) {
+	case SKF_AD_MARK:
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
+
+		*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+				      offsetof(struct sk_buff, mark));
+		break;
+
+	case SKF_AD_PKTTYPE:
+		*insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET());
+		*insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, PKT_TYPE_MAX);
+#ifdef __BIG_ENDIAN_BITFIELD
+		*insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 5);
+#endif
+		break;
+
+	case SKF_AD_QUEUE:
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
+
+		*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
+				      offsetof(struct sk_buff, queue_mapping));
+		break;
+	}
+
+	return insn - insn_buf;
+}
+
 static bool convert_bpf_extensions(struct sock_filter *fp,
 				   struct bpf_insn **insnp)
 {
 	struct bpf_insn *insn = *insnp;
+	u32 cnt;
 
 	switch (fp->k) {
 	case SKF_AD_OFF + SKF_AD_PROTOCOL:
@@ -167,13 +200,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		break;
 
 	case SKF_AD_OFF + SKF_AD_PKTTYPE:
-		*insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX,
-				      PKT_TYPE_OFFSET());
-		*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX);
-#ifdef __BIG_ENDIAN_BITFIELD
-		insn++;
-                *insn = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 5);
-#endif
+		cnt = convert_skb_access(SKF_AD_PKTTYPE, BPF_REG_A, BPF_REG_CTX, insn);
+		insn += cnt - 1;
 		break;
 
 	case SKF_AD_OFF + SKF_AD_IFINDEX:
@@ -197,10 +225,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		break;
 
 	case SKF_AD_OFF + SKF_AD_MARK:
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
-
-		*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
-				    offsetof(struct sk_buff, mark));
+		cnt = convert_skb_access(SKF_AD_MARK, BPF_REG_A, BPF_REG_CTX, insn);
+		insn += cnt - 1;
 		break;
 
 	case SKF_AD_OFF + SKF_AD_RXHASH:
@@ -211,10 +237,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		break;
 
 	case SKF_AD_OFF + SKF_AD_QUEUE:
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
-
-		*insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
-				    offsetof(struct sk_buff, queue_mapping));
+		cnt = convert_skb_access(SKF_AD_QUEUE, BPF_REG_A, BPF_REG_CTX, insn);
+		insn += cnt - 1;
 		break;
 
 	case SKF_AD_OFF + SKF_AD_VLAN_TAG:
@@ -1151,13 +1175,55 @@ sk_filter_func_proto(enum bpf_func_id func_id)
 static bool sk_filter_is_valid_access(int off, int size,
 				      enum bpf_access_type type)
 {
-	/* skb fields cannot be accessed yet */
-	return false;
+	/* only read is allowed */
+	if (type != BPF_READ)
+		return false;
+
+	/* check bounds */
+	if (off < 0 || off >= sizeof(struct __sk_buff))
+		return false;
+
+	/* disallow misaligned access */
+	if (off % size != 0)
+		return false;
+
+	/* all __sk_buff fields are __u32 */
+	if (size != 4)
+		return false;
+
+	return true;
+}
+
+static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
+					struct bpf_insn *insn_buf)
+{
+	struct bpf_insn *insn = insn_buf;
+
+	switch (ctx_off) {
+	case offsetof(struct __sk_buff, len):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
+
+		*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+				      offsetof(struct sk_buff, len));
+		break;
+
+	case offsetof(struct __sk_buff, mark):
+		return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn);
+
+	case offsetof(struct __sk_buff, pkt_type):
+		return convert_skb_access(SKF_AD_PKTTYPE, dst_reg, src_reg, insn);
+
+	case offsetof(struct __sk_buff, queue_mapping):
+		return convert_skb_access(SKF_AD_QUEUE, dst_reg, src_reg, insn);
+	}
+
+	return insn - insn_buf;
 }
 
 static const struct bpf_verifier_ops sk_filter_ops = {
 	.get_func_proto = sk_filter_func_proto,
 	.is_valid_access = sk_filter_is_valid_access,
+	.convert_ctx_access = sk_filter_convert_ctx_access,
 };
 
 static struct bpf_prog_type_list sk_filter_type __read_mostly = {
-- 
cgit v1.2.3


From 4170604feec780d00e7511c24fa0f6e5c2e4ed75 Mon Sep 17 00:00:00 2001
From: Scott Feldman <sfeldma@gmail.com>
Date: Sun, 15 Mar 2015 21:07:14 -0700
Subject: switchdev: add swdev ops

As discussed at netconf, introduce swdev_ops as first step to move switchdev
ops from ndo to swdev.  This will keep switchdev from cluttering up ndo ops
space.

Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  3 +++
 include/net/switchdev.h   | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ddab1a2a07a0..9e8a2a933c68 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1577,6 +1577,9 @@ struct net_device {
 	const struct net_device_ops *netdev_ops;
 	const struct ethtool_ops *ethtool_ops;
 	const struct forwarding_accel_ops *fwd_ops;
+#ifdef CONFIG_NET_SWITCHDEV
+	const struct swdev_ops *swdev_ops;
+#endif
 
 	const struct header_ops *header_ops;
 
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 1a9382febcc3..e5de53f92482 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -14,6 +14,44 @@
 #include <linux/netdevice.h>
 #include <linux/notifier.h>
 
+struct fib_info;
+
+/**
+ * struct switchdev_ops - switchdev operations
+ *
+ * int (*swdev_parent_id_get)(struct net_device *dev,
+ *			      struct netdev_phys_item_id *psid);
+ *	Called to get an ID of the switch chip this port is part of.
+ *	If driver implements this, it indicates that it represents a port
+ *	of a switch chip.
+ *
+ * int (*swdev_port_stp_update)(struct net_device *dev, u8 state);
+ *	Called to notify switch device port of bridge port STP
+ *	state change.
+ *
+ * int (*swdev_fib_ipv4_add)(struct net_device *dev, __be32 dst,
+ *			     int dst_len, struct fib_info *fi,
+ *			     u8 tos, u8 type, u32 nlflags, u32 tb_id);
+ *	Called to add/modify IPv4 route to switch device.
+ *
+ * int (*swdev_fib_ipv4_del)(struct net_device *dev, __be32 dst,
+ *			     int dst_len, struct fib_info *fi,
+ *			     u8 tos, u8 type, u32 tb_id);
+ *	Called to delete IPv4 route from switch device.
+ */
+struct swdev_ops {
+	int	(*swdev_parent_id_get)(struct net_device *dev,
+				       struct netdev_phys_item_id *psid);
+	int	(*swdev_port_stp_update)(struct net_device *dev, u8 state);
+	int	(*swdev_fib_ipv4_add)(struct net_device *dev, __be32 dst,
+				      int dst_len, struct fib_info *fi,
+				      u8 tos, u8 type, u32 nlflags,
+				      u32 tb_id);
+	int	(*swdev_fib_ipv4_del)(struct net_device *dev, __be32 dst,
+				      int dst_len, struct fib_info *fi,
+				      u8 tos, u8 type, u32 tb_id);
+};
+
 enum netdev_switch_notifier_type {
 	NETDEV_SWITCH_FDB_ADD = 1,
 	NETDEV_SWITCH_FDB_DEL,
-- 
cgit v1.2.3


From 812a1c3ff3ee9d5100e0e71edb06681014e84a9b Mon Sep 17 00:00:00 2001
From: Scott Feldman <sfeldma@gmail.com>
Date: Sun, 15 Mar 2015 21:07:16 -0700
Subject: netdev: remove ndo ops for switchdev

Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 38 --------------------------------------
 1 file changed, 38 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9e8a2a933c68..dd1d069758be 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -768,8 +768,6 @@ struct netdev_phys_item_id {
 typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
 				       struct sk_buff *skb);
 
-struct fib_info;
-
 /*
  * This structure defines the management hooks for network devices.
  * The following hooks can be defined; unless noted otherwise, they are
@@ -1024,23 +1022,6 @@ struct fib_info;
  *	be otherwise expressed by feature flags. The check is called with
  *	the set of features that the stack has calculated and it returns
  *	those the driver believes to be appropriate.
- *
- * int (*ndo_switch_parent_id_get)(struct net_device *dev,
- *				   struct netdev_phys_item_id *psid);
- *	Called to get an ID of the switch chip this port is part of.
- *	If driver implements this, it indicates that it represents a port
- *	of a switch chip.
- * int (*ndo_switch_port_stp_update)(struct net_device *dev, u8 state);
- *	Called to notify switch device port of bridge port STP
- *	state change.
- * int (*ndo_sw_parent_fib_ipv4_add)(struct net_device *dev, __be32 dst,
- *				     int dst_len, struct fib_info *fi,
- *				     u8 tos, u8 type, u32 nlflags, u32 tb_id);
- *	Called to add/modify IPv4 route to switch device.
- * int (*ndo_sw_parent_fib_ipv4_del)(struct net_device *dev, __be32 dst,
- *				     int dst_len, struct fib_info *fi,
- *				     u8 tos, u8 type, u32 tb_id);
- *	Called to delete IPv4 route from switch device.
  */
 struct net_device_ops {
 	int			(*ndo_init)(struct net_device *dev);
@@ -1197,25 +1178,6 @@ struct net_device_ops {
 	netdev_features_t	(*ndo_features_check) (struct sk_buff *skb,
 						       struct net_device *dev,
 						       netdev_features_t features);
-#ifdef CONFIG_NET_SWITCHDEV
-	int			(*ndo_switch_parent_id_get)(struct net_device *dev,
-							    struct netdev_phys_item_id *psid);
-	int			(*ndo_switch_port_stp_update)(struct net_device *dev,
-							      u8 state);
-	int			(*ndo_switch_fib_ipv4_add)(struct net_device *dev,
-							   __be32 dst,
-							   int dst_len,
-							   struct fib_info *fi,
-							   u8 tos, u8 type,
-							   u32 nlflags,
-							   u32 tb_id);
-	int			(*ndo_switch_fib_ipv4_del)(struct net_device *dev,
-							   __be32 dst,
-							   int dst_len,
-							   struct fib_info *fi,
-							   u8 tos, u8 type,
-							   u32 tb_id);
-#endif
 };
 
 /**
-- 
cgit v1.2.3


From aefedc1a4cfe4c874b8a7ac743f9deedc289d9e6 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sun, 15 Mar 2015 17:08:19 -0700
Subject: Bluetooth: Remove unneeded HCI_CONN_REMOTE_OOB connection flag

The HCI_CONN_REMOTE_OOB connection flag is used to indicate if the
pairing initiator has provided out-of-band data. However since that
value is no longer used in any decision making, just remove it.

It is actually unclear what purpose the OOB data present field from
the HCI IO Capability Response event serves in the first place. If
either side provided out-of-band data, then that data will be used
for pairing.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h | 1 -
 net/bluetooth/hci_event.c        | 2 --
 2 files changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index ce94bcb33600..e4dc18eed446 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -596,7 +596,6 @@ enum {
 	HCI_CONN_SC_ENABLED,
 	HCI_CONN_AES_CCM,
 	HCI_CONN_POWER_SAVE,
-	HCI_CONN_REMOTE_OOB,
 	HCI_CONN_FLUSH_KEY,
 	HCI_CONN_ENCRYPT,
 	HCI_CONN_AUTH,
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 10d760c46df1..d800f0c5aa21 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -4006,8 +4006,6 @@ static void hci_io_capa_reply_evt(struct hci_dev *hdev, struct sk_buff *skb)
 
 	conn->remote_cap = ev->capability;
 	conn->remote_auth = ev->authentication;
-	if (ev->oob_data)
-		set_bit(HCI_CONN_REMOTE_OOB, &conn->flags);
 
 unlock:
 	hci_dev_unlock(hdev);
-- 
cgit v1.2.3


From dc5a1ad7bd830b7789ba2950342bdecfe4787945 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Thu, 12 Mar 2015 08:53:24 +0200
Subject: mac80211: allow to get wireless_dev structure from ieee80211_vif

This will allow mac80211 drivers to call cfg80211 APIs with
the right handle.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 13 +++++++++++++
 net/mac80211/util.c    | 12 ++++++++++++
 2 files changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index a7756e45465e..157c0f151766 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1281,6 +1281,19 @@ static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif)
  */
 struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev);
 
+/**
+ * ieee80211_vif_to_wdev - return a wdev struct from a vif
+ * @vif: the vif to get the wdev for
+ *
+ * This can be used by mac80211 drivers with direct cfg80211 APIs
+ * (like the vendor commands) that needs to get the wdev for a vif.
+ *
+ * Note that this function may return %NULL if the given wdev isn't
+ * associated with a vif that the driver knows about (e.g. monitor
+ * or AP_VLAN interfaces.)
+ */
+struct wireless_dev *ieee80211_vif_to_wdev(struct ieee80211_vif *vif);
+
 /**
  * enum ieee80211_key_flags - key flags
  *
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 37d85d36dd2c..e664b28821a2 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -745,6 +745,18 @@ struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev)
 }
 EXPORT_SYMBOL_GPL(wdev_to_ieee80211_vif);
 
+struct wireless_dev *ieee80211_vif_to_wdev(struct ieee80211_vif *vif)
+{
+	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+
+	if (!ieee80211_sdata_running(sdata) ||
+	    !(sdata->flags & IEEE80211_SDATA_IN_DRIVER))
+		return NULL;
+
+	return &sdata->wdev;
+}
+EXPORT_SYMBOL_GPL(ieee80211_vif_to_wdev);
+
 /*
  * Nothing should have been stuffed into the workqueue during
  * the suspend->resume cycle. Since we can't check each caller
-- 
cgit v1.2.3


From f709bfcf6a292560ce187c33fd099ee495c0a404 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 16 Mar 2015 01:10:20 -0700
Subject: Bluetooth: Add constants for LE SC Confirmation and Random values

The LE Secure Connections Confirmation Value and LE Secure Connections
Random Value contants are required for the out-of-band data and so
just define them.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index af9893b704ff..ce757303dc07 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -458,6 +458,8 @@ enum {
 #define EIR_DEVICE_ID		0x10 /* device ID */
 #define EIR_LE_BDADDR		0x1B /* LE Bluetooth device address */
 #define EIR_LE_ROLE		0x1C /* LE role */
+#define EIR_LE_SC_CONFIRM	0x22 /* LE SC Confirmation Value */
+#define EIR_LE_SC_RANDOM	0x23 /* LE SC Random Value */
 
 /* Low Energy Advertising Flags */
 #define LE_AD_LIMITED		0x01 /* Limited Discoverable */
-- 
cgit v1.2.3


From c055d5b03bb4cb69d349d787c9787c0383abd8b2 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 10 Mar 2015 05:08:19 +0100
Subject: netfilter: bridge: query conntrack about skb dnat

ask conntrack instead of storing ipv4 address in nf_bridge_info->data.

Ths avoids the need to use ->data during NF_PRE_ROUTING.
Only two functions that need ->data remain.

These will be addressed in followup patches.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_bridge.h |  6 ------
 net/bridge/br_netfilter.c        | 27 +++++++++++++++++++++------
 2 files changed, 21 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index bb39113ea596..de123d769ffc 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -54,12 +54,6 @@ static inline unsigned int nf_bridge_pad(const struct sk_buff *skb)
 	return 0;
 }
 
-struct bridge_skb_cb {
-	union {
-		__be32 ipv4;
-	} daddr;
-};
-
 static inline void br_drop_fake_rtable(struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index b260a97275db..261fcd5a42d6 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -37,17 +37,16 @@
 #include <net/route.h>
 #include <net/netfilter/br_netfilter.h>
 
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
 #include <asm/uaccess.h>
 #include "br_private.h"
 #ifdef CONFIG_SYSCTL
 #include <linux/sysctl.h>
 #endif
 
-#define skb_origaddr(skb)	 (((struct bridge_skb_cb *) \
-				 (skb->nf_bridge->data))->daddr.ipv4)
-#define store_orig_dstaddr(skb)	 (skb_origaddr(skb) = ip_hdr(skb)->daddr)
-#define dnat_took_place(skb)	 (skb_origaddr(skb) != ip_hdr(skb)->daddr)
-
 #ifdef CONFIG_SYSCTL
 static struct ctl_table_header *brnf_sysctl_header;
 static int brnf_call_iptables __read_mostly = 1;
@@ -322,6 +321,22 @@ free_skb:
 	return 0;
 }
 
+static bool dnat_took_place(const struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct || nf_ct_is_untracked(ct))
+		return false;
+
+	return test_bit(IPS_DST_NAT_BIT, &ct->status);
+#else
+	return false;
+#endif
+}
+
 /* This requires some explaining. If DNAT has taken place,
  * we will need to fix up the destination Ethernet address.
  *
@@ -625,7 +640,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
 		return NF_DROP;
 	if (!setup_pre_routing(skb))
 		return NF_DROP;
-	store_orig_dstaddr(skb);
+
 	skb->protocol = htons(ETH_P_IP);
 
 	NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
-- 
cgit v1.2.3


From e4bb9bcbfb7d67431dfd49860f62770a7f40193b Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 10 Mar 2015 10:36:48 +0100
Subject: netfilter: bridge: remove BRNF_STATE_BRIDGED flag

Its not needed anymore since 2bf540b73ed5b
([NETFILTER]: bridge-netfilter: remove deferred hooks).
Before this it was possible to have physoutdev set for locally generated
packets -- this isn't the case anymore:

BRNF_STATE_BRIDGED flag is set when we assign nf_bridge->physoutdev,
so physoutdev != NULL means BRNF_STATE_BRIDGED is set.
If physoutdev is NULL, then we are looking at locally-delivered and
routed packet.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_bridge.h | 1 -
 net/bridge/br_netfilter.c        | 9 ++++++---
 net/netfilter/xt_physdev.c       | 3 +--
 3 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index de123d769ffc..ed0d3bf953c3 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -19,7 +19,6 @@ enum nf_br_hook_priorities {
 
 #define BRNF_PKT_TYPE			0x01
 #define BRNF_BRIDGED_DNAT		0x02
-#define BRNF_BRIDGED			0x04
 #define BRNF_NF_BRIDGE_PREROUTING	0x08
 #define BRNF_8021Q			0x10
 #define BRNF_PPPoE			0x20
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 261fcd5a42d6..bd2d24d1ff21 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -736,8 +736,6 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
 	if (pf == NFPROTO_IPV4 && br_parse_ip_options(skb))
 		return NF_DROP;
 
-	/* The physdev module checks on this */
-	nf_bridge->mask |= BRNF_BRIDGED;
 	nf_bridge->physoutdev = skb->dev;
 	if (pf == NFPROTO_IPV4)
 		skb->protocol = htons(ETH_P_IP);
@@ -857,7 +855,12 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops,
 	struct net_device *realoutdev = bridge_parent(skb->dev);
 	u_int8_t pf;
 
-	if (!nf_bridge || !(nf_bridge->mask & BRNF_BRIDGED))
+	/* if nf_bridge is set, but ->physoutdev is NULL, this packet came in
+	 * on a bridge, but was delivered locally and is now being routed:
+	 *
+	 * POST_ROUTING was already invoked from the ip stack.
+	 */
+	if (!nf_bridge || !nf_bridge->physoutdev)
 		return NF_ACCEPT;
 
 	if (!realoutdev)
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index f440f57a452f..50a52043650f 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -56,8 +56,7 @@ physdev_mt(const struct sk_buff *skb, struct xt_action_param *par)
 
 	/* This only makes sense in the FORWARD and POSTROUTING chains */
 	if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) &&
-	    (!!(nf_bridge->mask & BRNF_BRIDGED) ^
-	    !(info->invert & XT_PHYSDEV_OP_BRIDGED)))
+	    (!!nf_bridge->physoutdev ^ !(info->invert & XT_PHYSDEV_OP_BRIDGED)))
 		return false;
 
 	if ((info->bitmask & XT_PHYSDEV_OP_ISIN &&
-- 
cgit v1.2.3


From 1d0ab253872cdd3d8e7913f59c266c7fd01771d0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 15 Mar 2015 21:12:12 -0700
Subject: net: add sk_fullsock() helper

We have many places where we want to check if a socket is
not a timewait or request socket. Use a helper to avoid
hard coding this.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index f10832ca2e90..e0360f5a53e9 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -67,6 +67,7 @@
 #include <linux/atomic.h>
 #include <net/dst.h>
 #include <net/checksum.h>
+#include <net/tcp_states.h>
 #include <linux/net_tstamp.h>
 
 struct cgroup;
@@ -2218,6 +2219,14 @@ static inline struct sock *skb_steal_sock(struct sk_buff *skb)
 	return NULL;
 }
 
+/* This helper checks if a socket is a full socket,
+ * ie _not_ a timewait or request socket.
+ */
+static inline bool sk_fullsock(const struct sock *sk)
+{
+	return (1 << sk->sk_state) & ~(TCPF_TIME_WAIT | TCPF_NEW_SYN_RECV);
+}
+
 void sock_enable_timestamp(struct sock *sk, int flag);
 int sock_get_timestamp(struct sock *, struct timeval __user *);
 int sock_get_timestampns(struct sock *, struct timespec __user *);
-- 
cgit v1.2.3


From 13854e5a60461daee08ce99842b7f4d37553d911 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 15 Mar 2015 21:12:16 -0700
Subject: inet: add proper refcounting to request sock

reqsk_put() is the generic function that should be used
to release a refcount (and automatically call reqsk_free())

reqsk_free() might be called if refcount is known to be 0
or undefined.

refcnt is set to one in inet_csk_reqsk_queue_add()

As request socks are not yet in global ehash table,
I added temporary debugging checks in reqsk_put() and reqsk_free()

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_connection_sock.h |  5 +++++
 include/net/inet_sock.h            |  5 +++++
 include/net/request_sock.h         | 13 +++++++------
 net/core/request_sock.c            |  4 ++--
 net/ipv4/inet_connection_sock.c    |  8 ++++----
 net/ipv4/syncookies.c              | 10 +++++-----
 net/ipv4/tcp_fastopen.c            |  2 +-
 7 files changed, 29 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index b9a6b0a94cc6..191feec60205 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -275,6 +275,11 @@ static inline void inet_csk_reqsk_queue_add(struct sock *sk,
 					    struct sock *child)
 {
 	reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child);
+	/* before letting lookups find us, make sure all req fields
+	 * are committed to memory.
+	 */
+	smp_wmb();
+	atomic_set(&req->rsk_refcnt, 1);
 }
 
 void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index b3053fdd871e..3d8c09abb097 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -255,6 +255,11 @@ static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops
 		ireq->opt = NULL;
 		atomic64_set(&ireq->ir_cookie, 0);
 		ireq->ireq_state = TCP_NEW_SYN_RECV;
+
+		/* Following is temporary. It is coupled with debugging
+		 * helpers in reqsk_put() & reqsk_free()
+		 */
+		atomic_set(&ireq->ireq_refcnt, 0);
 	}
 
 	return req;
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 3275cf31f731..56dc2faba47e 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -82,19 +82,20 @@ static inline struct request_sock *inet_reqsk(struct sock *sk)
 	return (struct request_sock *)sk;
 }
 
-static inline void __reqsk_free(struct request_sock *req)
-{
-	kmem_cache_free(req->rsk_ops->slab, req);
-}
-
 static inline void reqsk_free(struct request_sock *req)
 {
+	/* temporary debugging */
+	WARN_ON_ONCE(atomic_read(&req->rsk_refcnt) != 0);
+
 	req->rsk_ops->destructor(req);
-	__reqsk_free(req);
+	kmem_cache_free(req->rsk_ops->slab, req);
 }
 
 static inline void reqsk_put(struct request_sock *req)
 {
+	/* temporary debugging, until req sock are put into ehash table */
+	WARN_ON_ONCE(atomic_read(&req->rsk_refcnt) != 1);
+
 	if (atomic_dec_and_test(&req->rsk_refcnt))
 		reqsk_free(req);
 }
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 04db318e6218..e910317ef6d9 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -103,7 +103,7 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
 			while ((req = lopt->syn_table[i]) != NULL) {
 				lopt->syn_table[i] = req->dl_next;
 				lopt->qlen--;
-				reqsk_free(req);
+				reqsk_put(req);
 			}
 		}
 	}
@@ -180,7 +180,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
 		 */
 		spin_unlock_bh(&fastopenq->lock);
 		sock_put(lsk);
-		reqsk_free(req);
+		reqsk_put(req);
 		return;
 	}
 	/* Wait for 60secs before removing a req that has triggered RST.
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 34581f928afa..3390ba6f96b2 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -340,7 +340,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
 out:
 	release_sock(sk);
 	if (req)
-		__reqsk_free(req);
+		reqsk_put(req);
 	return newsk;
 out_err:
 	newsk = NULL;
@@ -635,7 +635,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
 				/* Drop this request */
 				inet_csk_reqsk_queue_unlink(parent, req, reqp);
 				reqsk_queue_removed(queue, req);
-				reqsk_free(req);
+				reqsk_put(req);
 				continue;
 			}
 			reqp = &req->dl_next;
@@ -837,7 +837,7 @@ void inet_csk_listen_stop(struct sock *sk)
 		sock_put(child);
 
 		sk_acceptq_removed(sk);
-		__reqsk_free(req);
+		reqsk_put(req);
 	}
 	if (queue->fastopenq != NULL) {
 		/* Free all the reqs queued in rskq_rst_head. */
@@ -847,7 +847,7 @@ void inet_csk_listen_stop(struct sock *sk)
 		spin_unlock_bh(&queue->fastopenq->lock);
 		while ((req = acc_req) != NULL) {
 			acc_req = req->dl_next;
-			__reqsk_free(req);
+			reqsk_put(req);
 		}
 	}
 	WARN_ON(sk->sk_ack_backlog);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index f17db898ed26..5ae0c49f5e2e 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -219,9 +219,9 @@ int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
 }
 EXPORT_SYMBOL_GPL(__cookie_v4_check);
 
-static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
-					   struct request_sock *req,
-					   struct dst_entry *dst)
+static struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
+				    struct request_sock *req,
+				    struct dst_entry *dst)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct sock *child;
@@ -357,7 +357,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 	ireq->opt = tcp_v4_save_options(skb);
 
 	if (security_inet_conn_request(sk, skb, req)) {
-		reqsk_free(req);
+		reqsk_put(req);
 		goto out;
 	}
 
@@ -378,7 +378,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 	security_req_classify_flow(req, flowi4_to_flowi(&fl4));
 	rt = ip_route_output_key(sock_net(sk), &fl4);
 	if (IS_ERR(rt)) {
-		reqsk_free(req);
+		reqsk_put(req);
 		goto out;
 	}
 
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index fe77417fc137..84381319e1bc 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -253,7 +253,7 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
 		fastopenq->rskq_rst_head = req1->dl_next;
 		fastopenq->qlen--;
 		spin_unlock(&fastopenq->lock);
-		reqsk_free(req1);
+		reqsk_put(req1);
 	}
 	return true;
 }
-- 
cgit v1.2.3


From 72000df2c01d6927319ad7e3f43460f6d0227de5 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 16 Mar 2015 16:11:21 -0700
Subject: Bluetooth: Add support for Local OOB Extended Data Update events

When a different user requests a new set of local out-of-band data, then
inform all previous users that the data has been updated. To limit the
scope of users, the updates are limited to previous users. If a user has
never requested out-of-band data, it will also not see the update.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci.h  |  1 +
 include/net/bluetooth/mgmt.h |  7 +++++++
 net/bluetooth/mgmt.c         | 16 ++++++++++++++++
 3 files changed, 24 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index ce757303dc07..a6ea156dc7e9 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -186,6 +186,7 @@ enum {
 	HCI_MGMT_UNCONF_INDEX_EVENTS,
 	HCI_MGMT_EXT_INDEX_EVENTS,
 	HCI_MGMT_GENERIC_EVENTS,
+	HCI_MGMT_OOB_DATA_EVENTS,
 };
 
 /*
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 543c1ba3d892..a1a68671bf88 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -735,3 +735,10 @@ struct mgmt_ev_ext_index {
 #define MGMT_EV_EXT_INDEX_ADDED		0x0020
 
 #define MGMT_EV_EXT_INDEX_REMOVED	0x0021
+
+#define MGMT_EV_LOCAL_OOB_DATA_UPDATED	0x0022
+struct mgmt_ev_local_oob_data_updated {
+	__u8    type;
+	__le16	eir_len;
+	__u8	eir[0];
+} __packed;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 7fa3c4b8384b..72e41d29e301 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -133,6 +133,7 @@ static const u16 mgmt_events[] = {
 	MGMT_EV_NEW_CONFIG_OPTIONS,
 	MGMT_EV_EXT_INDEX_ADDED,
 	MGMT_EV_EXT_INDEX_REMOVED,
+	MGMT_EV_LOCAL_OOB_DATA_UPDATED,
 };
 
 #define CACHE_TIMEOUT	msecs_to_jiffies(2 * 1000)
@@ -262,6 +263,13 @@ static int mgmt_index_event(u16 event, struct hci_dev *hdev, void *data,
 			       flag, NULL);
 }
 
+static int mgmt_limited_event(u16 event, struct hci_dev *hdev, void *data,
+			      u16 len, int flag, struct sock *skip_sk)
+{
+	return mgmt_send_event(event, hdev, HCI_CHANNEL_CONTROL, data, len,
+			       flag, skip_sk);
+}
+
 static int mgmt_generic_event(u16 event, struct hci_dev *hdev, void *data,
 			      u16 len, struct sock *skip_sk)
 {
@@ -6387,8 +6395,16 @@ static int read_local_oob_ext_data(struct sock *sk, struct hci_dev *hdev,
 
 	hci_dev_unlock(hdev);
 
+	hci_sock_set_flag(sk, HCI_MGMT_OOB_DATA_EVENTS);
+
 	err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_EXT_DATA,
 				MGMT_STATUS_SUCCESS, rp, sizeof(*rp) + eir_len);
+	if (err < 0)
+		goto done;
+
+	err = mgmt_limited_event(MGMT_EV_LOCAL_OOB_DATA_UPDATED, hdev,
+				 rp, sizeof(*rp) + eir_len,
+				 HCI_MGMT_OOB_DATA_EVENTS, sk);
 
 done:
 	kfree(rp);
-- 
cgit v1.2.3


From e03826d5045e81a66a4fad7be9a8ecdaeb7911cf Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Tue, 17 Mar 2015 15:56:04 +0530
Subject: regulator: palmas: Correct TPS659038 register definition for REGEN2

The register offset for REGEN2_CTRL in different for TPS659038 chip as when
compared with other Palmas family PMICs. In the case of TPS659038 the wrong
offset pointed to PLLEN_CTRL and was causing a hang. Correcting the same.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: stable@vger.kernel.org
---
 drivers/regulator/palmas-regulator.c | 4 ++++
 include/linux/mfd/palmas.h           | 3 +++
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/drivers/regulator/palmas-regulator.c b/drivers/regulator/palmas-regulator.c
index 9205f433573c..18198316b6cf 100644
--- a/drivers/regulator/palmas-regulator.c
+++ b/drivers/regulator/palmas-regulator.c
@@ -1572,6 +1572,10 @@ static int palmas_regulators_probe(struct platform_device *pdev)
 	if (!pmic)
 		return -ENOMEM;
 
+	if (of_device_is_compatible(node, "ti,tps659038-pmic"))
+		palmas_generic_regs_info[PALMAS_REG_REGEN2].ctrl_addr =
+							TPS659038_REGEN2_CTRL;
+
 	pmic->dev = &pdev->dev;
 	pmic->palmas = palmas;
 	palmas->pmic = pmic;
diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h
index fb0390a1a498..ee7b1ce7a6f8 100644
--- a/include/linux/mfd/palmas.h
+++ b/include/linux/mfd/palmas.h
@@ -2999,6 +2999,9 @@ enum usb_irq_events {
 #define PALMAS_GPADC_TRIM15					0x0E
 #define PALMAS_GPADC_TRIM16					0x0F
 
+/* TPS659038 regen2_ctrl offset iss different from palmas */
+#define TPS659038_REGEN2_CTRL					0x12
+
 /* TPS65917 Interrupt registers */
 
 /* Registers for function INTERRUPT */
-- 
cgit v1.2.3


From a2f4870697a5bcf4a87073ec6b32dd2928c1211d Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Tue, 17 Mar 2015 12:23:19 -0400
Subject: fs: make sure the timestamps for lazytime inodes eventually get
 written

Jan Kara pointed out that if there is an inode which is constantly
getting dirtied with I_DIRTY_PAGES, an inode with an updated timestamp
will never be written since inode->dirtied_when is constantly getting
updated.  We fix this by adding an extra field to the inode,
dirtied_time_when, so inodes with a stale dirtytime can get detected
and handled.

In addition, if we have a dirtytime inode caused by an atime update,
and there is no write activity on the file system, we need to have a
secondary system to make sure these inodes get written out.  We do
this by setting up a second delayed work structure which wakes up the
CPU much more rarely compared to writeback_expire_centisecs.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 fs/fs-writeback.c  | 82 +++++++++++++++++++++++++++++++++++++++++++++++-------
 include/linux/fs.h |  1 +
 2 files changed, 73 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index e907052eeadb..2cfcd74faf87 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -53,6 +53,18 @@ struct wb_writeback_work {
 	struct completion *done;	/* set if the caller waits */
 };
 
+/*
+ * If an inode is constantly having its pages dirtied, but then the
+ * updates stop dirtytime_expire_interval seconds in the past, it's
+ * possible for the worst case time between when an inode has its
+ * timestamps updated and when they finally get written out to be two
+ * dirtytime_expire_intervals.  We set the default to 12 hours (in
+ * seconds), which means most of the time inodes will have their
+ * timestamps written to disk after 12 hours, but in the worst case a
+ * few inodes might not their timestamps updated for 24 hours.
+ */
+unsigned int dirtytime_expire_interval = 12 * 60 * 60;
+
 /**
  * writeback_in_progress - determine whether there is writeback in progress
  * @bdi: the device's backing_dev_info structure.
@@ -275,8 +287,8 @@ static int move_expired_inodes(struct list_head *delaying_queue,
 
 	if ((flags & EXPIRE_DIRTY_ATIME) == 0)
 		older_than_this = work->older_than_this;
-	else if ((work->reason == WB_REASON_SYNC) == 0) {
-		expire_time = jiffies - (HZ * 86400);
+	else if (!work->for_sync) {
+		expire_time = jiffies - (dirtytime_expire_interval * HZ);
 		older_than_this = &expire_time;
 	}
 	while (!list_empty(delaying_queue)) {
@@ -458,6 +470,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
 		 */
 		redirty_tail(inode, wb);
 	} else if (inode->i_state & I_DIRTY_TIME) {
+		inode->dirtied_when = jiffies;
 		list_move(&inode->i_wb_list, &wb->b_dirty_time);
 	} else {
 		/* The inode is clean. Remove from writeback lists. */
@@ -505,12 +518,17 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 	spin_lock(&inode->i_lock);
 
 	dirty = inode->i_state & I_DIRTY;
-	if (((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) &&
-	     (inode->i_state & I_DIRTY_TIME)) ||
-	    (inode->i_state & I_DIRTY_TIME_EXPIRED)) {
-		dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED;
-		trace_writeback_lazytime(inode);
-	}
+	if (inode->i_state & I_DIRTY_TIME) {
+		if ((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) ||
+		    unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) ||
+		    unlikely(time_after(jiffies,
+					(inode->dirtied_time_when +
+					 dirtytime_expire_interval * HZ)))) {
+			dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED;
+			trace_writeback_lazytime(inode);
+		}
+	} else
+		inode->i_state &= ~I_DIRTY_TIME_EXPIRED;
 	inode->i_state &= ~dirty;
 
 	/*
@@ -1131,6 +1149,45 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
 	rcu_read_unlock();
 }
 
+/*
+ * Wake up bdi's periodically to make sure dirtytime inodes gets
+ * written back periodically.  We deliberately do *not* check the
+ * b_dirtytime list in wb_has_dirty_io(), since this would cause the
+ * kernel to be constantly waking up once there are any dirtytime
+ * inodes on the system.  So instead we define a separate delayed work
+ * function which gets called much more rarely.  (By default, only
+ * once every 12 hours.)
+ *
+ * If there is any other write activity going on in the file system,
+ * this function won't be necessary.  But if the only thing that has
+ * happened on the file system is a dirtytime inode caused by an atime
+ * update, we need this infrastructure below to make sure that inode
+ * eventually gets pushed out to disk.
+ */
+static void wakeup_dirtytime_writeback(struct work_struct *w);
+static DECLARE_DELAYED_WORK(dirtytime_work, wakeup_dirtytime_writeback);
+
+static void wakeup_dirtytime_writeback(struct work_struct *w)
+{
+	struct backing_dev_info *bdi;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
+		if (list_empty(&bdi->wb.b_dirty_time))
+			continue;
+		bdi_wakeup_thread(bdi);
+	}
+	rcu_read_unlock();
+	schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
+}
+
+static int __init start_dirtytime_writeback(void)
+{
+	schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
+	return 0;
+}
+__initcall(start_dirtytime_writeback);
+
 static noinline void block_dump___mark_inode_dirty(struct inode *inode)
 {
 	if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
@@ -1269,8 +1326,13 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 			}
 
 			inode->dirtied_when = jiffies;
-			list_move(&inode->i_wb_list, dirtytime ?
-				  &bdi->wb.b_dirty_time : &bdi->wb.b_dirty);
+			if (dirtytime)
+				inode->dirtied_time_when = jiffies;
+			if (inode->i_state & (I_DIRTY_INODE | I_DIRTY_PAGES))
+				list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
+			else
+				list_move(&inode->i_wb_list,
+					  &bdi->wb.b_dirty_time);
 			spin_unlock(&bdi->wb.list_lock);
 			trace_writeback_dirty_inode_enqueue(inode);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b4d71b5e1ff2..f4131e8ead74 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -604,6 +604,7 @@ struct inode {
 	struct mutex		i_mutex;
 
 	unsigned long		dirtied_when;	/* jiffies of first dirtying */
+	unsigned long		dirtied_time_when;
 
 	struct hlist_node	i_hash;
 	struct list_head	i_wb_list;	/* backing dev IO list */
-- 
cgit v1.2.3


From 1efff914afac8a965ad63817ecf8861a927c2ace Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Tue, 17 Mar 2015 12:23:32 -0400
Subject: fs: add dirtytime_expire_seconds sysctl

Add a tuning knob so we can adjust the dirtytime expiration timeout,
which is very useful for testing lazytime.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 fs/fs-writeback.c         | 11 +++++++++++
 include/linux/writeback.h |  3 +++
 kernel/sysctl.c           |  8 ++++++++
 3 files changed, 22 insertions(+)

(limited to 'include')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 2cfcd74faf87..32a8bbd7a9ad 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1188,6 +1188,17 @@ static int __init start_dirtytime_writeback(void)
 }
 __initcall(start_dirtytime_writeback);
 
+int dirtytime_interval_handler(struct ctl_table *table, int write,
+			       void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret;
+
+	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+	if (ret == 0 && write)
+		mod_delayed_work(system_wq, &dirtytime_work, 0);
+	return ret;
+}
+
 static noinline void block_dump___mark_inode_dirty(struct inode *inode)
 {
 	if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 00048339c23e..b2dd371ec0ca 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -130,6 +130,7 @@ extern int vm_dirty_ratio;
 extern unsigned long vm_dirty_bytes;
 extern unsigned int dirty_writeback_interval;
 extern unsigned int dirty_expire_interval;
+extern unsigned int dirtytime_expire_interval;
 extern int vm_highmem_is_dirtyable;
 extern int block_dump;
 extern int laptop_mode;
@@ -146,6 +147,8 @@ extern int dirty_ratio_handler(struct ctl_table *table, int write,
 extern int dirty_bytes_handler(struct ctl_table *table, int write,
 		void __user *buffer, size_t *lenp,
 		loff_t *ppos);
+int dirtytime_interval_handler(struct ctl_table *table, int write,
+			       void __user *buffer, size_t *lenp, loff_t *ppos);
 
 struct ctl_table;
 int dirty_writeback_centisecs_handler(struct ctl_table *, int,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 88ea2d6e0031..ce410bb9f2e1 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1227,6 +1227,14 @@ static struct ctl_table vm_table[] = {
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= &zero,
 	},
+	{
+		.procname	= "dirtytime_expire_seconds",
+		.data		= &dirtytime_expire_interval,
+		.maxlen		= sizeof(dirty_expire_interval),
+		.mode		= 0644,
+		.proc_handler	= dirtytime_interval_handler,
+		.extra1		= &zero,
+	},
 	{
 		.procname       = "nr_pdflush_threads",
 		.mode           = 0444 /* read-only */,
-- 
cgit v1.2.3


From 695c4cb61926ddef2481dbcefef44498a7c1e5b5 Mon Sep 17 00:00:00 2001
From: Jakub Pawlowski <jpawlowski@google.com>
Date: Tue, 17 Mar 2015 09:04:12 -0700
Subject: Bluetooth: Introduce HCI_QUIRK_SIMULTANEOUS_DISCOVERY

Some controllers allow both LE scan and BR/EDR inquiry to run at
the same time, while others allow only one, LE SCAN or BR/EDR
inquiry at given time.

Since this is specific to each controller, add a new quirk setting
that allows drivers to tell the core wether given controller can
do both LE scan and BR/EDR inquiry at same time.

Signed-off-by: Jakub Pawlowski <jpawlowski@google.com>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index a6ea156dc7e9..06e7eee31ce4 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -160,6 +160,14 @@ enum {
 	 * during the hdev->setup vendor callback.
 	 */
 	HCI_QUIRK_STRICT_DUPLICATE_FILTER,
+
+	/* When this quirk is set, LE scan and BR/EDR inquiry is done
+	 * simultaneously, otherwise it's interleaved.
+	 *
+	 * This quirk can be set before hci_register_dev is called or
+	 * during the hdev->setup vendor callback.
+	 */
+	HCI_QUIRK_SIMULTANEOUS_DISCOVERY,
 };
 
 /* HCI device flags */
-- 
cgit v1.2.3


From d0f172b14afa7e0d8a19db5baa07e20b3ec8dcc8 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Tue, 17 Mar 2015 13:48:46 +0200
Subject: Bluetooth: Add helper to get HCI channel of a socket

We'll need to have access to which HCI channel a socket is bound to, in
order to manage pending mgmt commands in clean way. This patch adds a
helper for the purpose.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/bluetooth.h | 1 +
 net/bluetooth/hci_sock.c          | 5 +++++
 2 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index d871ba313f64..33a5e00025aa 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -338,6 +338,7 @@ int bt_to_errno(__u16 code);
 void hci_sock_set_flag(struct sock *sk, int nr);
 void hci_sock_clear_flag(struct sock *sk, int nr);
 int hci_sock_test_flag(struct sock *sk, int nr);
+unsigned short hci_sock_get_channel(struct sock *sk);
 
 int hci_sock_init(void);
 void hci_sock_cleanup(void);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 7c719602dbca..9ba1a2667eaa 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -65,6 +65,11 @@ int hci_sock_test_flag(struct sock *sk, int nr)
 	return test_bit(nr, &hci_pi(sk)->flags);
 }
 
+unsigned short hci_sock_get_channel(struct sock *sk)
+{
+	return hci_pi(sk)->channel;
+}
+
 static inline int hci_test_bit(int nr, const void *addr)
 {
 	return *((const __u32 *) addr + (nr >> 5)) & ((__u32) 1 << (nr & 31));
-- 
cgit v1.2.3


From 88b94ce925cb103851f39bfa7e23e09823573d30 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Tue, 17 Mar 2015 13:48:49 +0200
Subject: Bluetooth: Add hdev_init callback for HCI channels

In order to make the mgmt command handling more generic we can't have a
direct call to mgmt_init_hdev() from mgmt_control(). This patch adds a
new callback to struct hci_mgmt_chan. And sets it to point to the
mgmt_init_hdev() function for the HCI_CHANNEL_CONTROL instance.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 1 +
 net/bluetooth/mgmt.c             | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index e4dc18eed446..93e7b2b05a17 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1305,6 +1305,7 @@ struct hci_mgmt_chan {
 	unsigned short channel;
 	size_t handler_count;
 	const struct hci_mgmt_handler *handlers;
+	void (*hdev_init) (struct sock *sk, struct hci_dev *hdev);
 };
 
 int hci_mgmt_chan_register(struct hci_mgmt_chan *c);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 915a2a1f0a71..ac897e676d5e 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -6493,8 +6493,8 @@ int mgmt_control(struct hci_mgmt_chan *chan, struct sock *sk,
 		goto done;
 	}
 
-	if (hdev)
-		mgmt_init_hdev(sk, hdev);
+	if (hdev && chan->hdev_init)
+		chan->hdev_init(sk, hdev);
 
 	cp = buf + sizeof(*hdr);
 
@@ -7706,6 +7706,7 @@ static struct hci_mgmt_chan chan = {
 	.channel	= HCI_CHANNEL_CONTROL,
 	.handler_count	= ARRAY_SIZE(mgmt_handlers),
 	.handlers	= mgmt_handlers,
+	.hdev_init	= mgmt_init_hdev,
 };
 
 int mgmt_init(void)
-- 
cgit v1.2.3


From fa4335d71a1408d0112c15874538f3f4e153ba01 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Tue, 17 Mar 2015 13:48:50 +0200
Subject: Bluetooth: Move generic mgmt command dispatcher to hci_sock.c

The mgmt.c file should be reserved purely for HCI_CHANNEL_CONTROL. The
mgmt_control() function in it is already completely generic and has a
single user in hci_sock.c. This patch moves the function there and
renames it a bit more appropriately to hci_mgmt_cmd() (as it's a command
dispatcher).

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |   3 -
 net/bluetooth/hci_sock.c         | 116 ++++++++++++++++++++++++++++++++++++++-
 net/bluetooth/mgmt.c             | 111 -------------------------------------
 3 files changed, 115 insertions(+), 115 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 93e7b2b05a17..b65c53de6a69 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1330,9 +1330,6 @@ void hci_mgmt_chan_unregister(struct hci_mgmt_chan *c);
 #define DISCOV_BREDR_INQUIRY_LEN	0x08
 #define DISCOV_LE_RESTART_DELAY		msecs_to_jiffies(200)	/* msec */
 
-int mgmt_control(struct hci_mgmt_chan *chan, struct sock *sk,
-		 struct msghdr *msg, size_t msglen);
-
 int mgmt_new_settings(struct hci_dev *hdev);
 void mgmt_index_added(struct hci_dev *hdev);
 void mgmt_index_removed(struct hci_dev *hdev);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 9ba1a2667eaa..85a44a7dc150 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -30,6 +30,9 @@
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
 #include <net/bluetooth/hci_mon.h>
+#include <net/bluetooth/mgmt.h>
+
+#include "mgmt_util.h"
 
 static LIST_HEAD(mgmt_chan_list);
 static DEFINE_MUTEX(mgmt_chan_list_lock);
@@ -951,6 +954,117 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 	return err ? : copied;
 }
 
+static int hci_mgmt_cmd(struct hci_mgmt_chan *chan, struct sock *sk,
+			struct msghdr *msg, size_t msglen)
+{
+	void *buf;
+	u8 *cp;
+	struct mgmt_hdr *hdr;
+	u16 opcode, index, len;
+	struct hci_dev *hdev = NULL;
+	const struct hci_mgmt_handler *handler;
+	bool var_len, no_hdev;
+	int err;
+
+	BT_DBG("got %zu bytes", msglen);
+
+	if (msglen < sizeof(*hdr))
+		return -EINVAL;
+
+	buf = kmalloc(msglen, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	if (memcpy_from_msg(buf, msg, msglen)) {
+		err = -EFAULT;
+		goto done;
+	}
+
+	hdr = buf;
+	opcode = __le16_to_cpu(hdr->opcode);
+	index = __le16_to_cpu(hdr->index);
+	len = __le16_to_cpu(hdr->len);
+
+	if (len != msglen - sizeof(*hdr)) {
+		err = -EINVAL;
+		goto done;
+	}
+
+	if (opcode >= chan->handler_count ||
+	    chan->handlers[opcode].func == NULL) {
+		BT_DBG("Unknown op %u", opcode);
+		err = mgmt_cmd_status(sk, index, opcode,
+				      MGMT_STATUS_UNKNOWN_COMMAND);
+		goto done;
+	}
+
+	handler = &chan->handlers[opcode];
+
+	if (!hci_sock_test_flag(sk, HCI_SOCK_TRUSTED) &&
+	    !(handler->flags & HCI_MGMT_UNTRUSTED)) {
+		err = mgmt_cmd_status(sk, index, opcode,
+				      MGMT_STATUS_PERMISSION_DENIED);
+		goto done;
+	}
+
+	if (index != MGMT_INDEX_NONE) {
+		hdev = hci_dev_get(index);
+		if (!hdev) {
+			err = mgmt_cmd_status(sk, index, opcode,
+					      MGMT_STATUS_INVALID_INDEX);
+			goto done;
+		}
+
+		if (hci_dev_test_flag(hdev, HCI_SETUP) ||
+		    hci_dev_test_flag(hdev, HCI_CONFIG) ||
+		    hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) {
+			err = mgmt_cmd_status(sk, index, opcode,
+					      MGMT_STATUS_INVALID_INDEX);
+			goto done;
+		}
+
+		if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED) &&
+		    !(handler->flags & HCI_MGMT_UNCONFIGURED)) {
+			err = mgmt_cmd_status(sk, index, opcode,
+					      MGMT_STATUS_INVALID_INDEX);
+			goto done;
+		}
+	}
+
+	no_hdev = (handler->flags & HCI_MGMT_NO_HDEV);
+	if (no_hdev != !hdev) {
+		err = mgmt_cmd_status(sk, index, opcode,
+				      MGMT_STATUS_INVALID_INDEX);
+		goto done;
+	}
+
+	var_len = (handler->flags & HCI_MGMT_VAR_LEN);
+	if ((var_len && len < handler->data_len) ||
+	    (!var_len && len != handler->data_len)) {
+		err = mgmt_cmd_status(sk, index, opcode,
+				      MGMT_STATUS_INVALID_PARAMS);
+		goto done;
+	}
+
+	if (hdev && chan->hdev_init)
+		chan->hdev_init(sk, hdev);
+
+	cp = buf + sizeof(*hdr);
+
+	err = handler->func(sk, hdev, cp, len);
+	if (err < 0)
+		goto done;
+
+	err = msglen;
+
+done:
+	if (hdev)
+		hci_dev_put(hdev);
+
+	kfree(buf);
+	return err;
+}
+
 static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
 			    size_t len)
 {
@@ -984,7 +1098,7 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
 		mutex_lock(&mgmt_chan_list_lock);
 		chan = __hci_mgmt_chan_find(hci_pi(sk)->channel);
 		if (chan)
-			err = mgmt_control(chan, sk, msg, len);
+			err = hci_mgmt_cmd(chan, sk, msg, len);
 		else
 			err = -EINVAL;
 
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index ac897e676d5e..f3a957905193 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -6401,117 +6401,6 @@ static const struct hci_mgmt_handler mgmt_handlers[] = {
 	{ read_adv_features,       MGMT_READ_ADV_FEATURES_SIZE },
 };
 
-int mgmt_control(struct hci_mgmt_chan *chan, struct sock *sk,
-		 struct msghdr *msg, size_t msglen)
-{
-	void *buf;
-	u8 *cp;
-	struct mgmt_hdr *hdr;
-	u16 opcode, index, len;
-	struct hci_dev *hdev = NULL;
-	const struct hci_mgmt_handler *handler;
-	bool var_len, no_hdev;
-	int err;
-
-	BT_DBG("got %zu bytes", msglen);
-
-	if (msglen < sizeof(*hdr))
-		return -EINVAL;
-
-	buf = kmalloc(msglen, GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-
-	if (memcpy_from_msg(buf, msg, msglen)) {
-		err = -EFAULT;
-		goto done;
-	}
-
-	hdr = buf;
-	opcode = __le16_to_cpu(hdr->opcode);
-	index = __le16_to_cpu(hdr->index);
-	len = __le16_to_cpu(hdr->len);
-
-	if (len != msglen - sizeof(*hdr)) {
-		err = -EINVAL;
-		goto done;
-	}
-
-	if (opcode >= chan->handler_count ||
-	    chan->handlers[opcode].func == NULL) {
-		BT_DBG("Unknown op %u", opcode);
-		err = mgmt_cmd_status(sk, index, opcode,
-				      MGMT_STATUS_UNKNOWN_COMMAND);
-		goto done;
-	}
-
-	handler = &chan->handlers[opcode];
-
-	if (!hci_sock_test_flag(sk, HCI_SOCK_TRUSTED) &&
-	    !(handler->flags & HCI_MGMT_UNTRUSTED)) {
-		err = mgmt_cmd_status(sk, index, opcode,
-				      MGMT_STATUS_PERMISSION_DENIED);
-		goto done;
-	}
-
-	if (index != MGMT_INDEX_NONE) {
-		hdev = hci_dev_get(index);
-		if (!hdev) {
-			err = mgmt_cmd_status(sk, index, opcode,
-					      MGMT_STATUS_INVALID_INDEX);
-			goto done;
-		}
-
-		if (hci_dev_test_flag(hdev, HCI_SETUP) ||
-		    hci_dev_test_flag(hdev, HCI_CONFIG) ||
-		    hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) {
-			err = mgmt_cmd_status(sk, index, opcode,
-					      MGMT_STATUS_INVALID_INDEX);
-			goto done;
-		}
-
-		if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED) &&
-		    !(handler->flags & HCI_MGMT_UNCONFIGURED)) {
-			err = mgmt_cmd_status(sk, index, opcode,
-					      MGMT_STATUS_INVALID_INDEX);
-			goto done;
-		}
-	}
-
-	no_hdev = (handler->flags & HCI_MGMT_NO_HDEV);
-	if (no_hdev != !hdev) {
-		err = mgmt_cmd_status(sk, index, opcode,
-				      MGMT_STATUS_INVALID_INDEX);
-		goto done;
-	}
-
-	var_len = (handler->flags & HCI_MGMT_VAR_LEN);
-	if ((var_len && len < handler->data_len) ||
-	    (!var_len && len != handler->data_len)) {
-		err = mgmt_cmd_status(sk, index, opcode,
-				      MGMT_STATUS_INVALID_PARAMS);
-		goto done;
-	}
-
-	if (hdev && chan->hdev_init)
-		chan->hdev_init(sk, hdev);
-
-	cp = buf + sizeof(*hdr);
-
-	err = handler->func(sk, hdev, cp, len);
-	if (err < 0)
-		goto done;
-
-	err = msglen;
-
-done:
-	if (hdev)
-		hci_dev_put(hdev);
-
-	kfree(buf);
-	return err;
-}
-
 void mgmt_index_added(struct hci_dev *hdev)
 {
 	struct mgmt_ev_ext_index ev;
-- 
cgit v1.2.3


From c24973957975403521ca76a776c2dfd12fbe9add Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Mon, 16 Mar 2015 18:06:02 -0700
Subject: bpf: allow BPF programs access 'protocol' and 'vlan_tci' fields

as a follow on to patch 70006af95515 ("bpf: allow eBPF access skb fields")
this patch allows 'protocol' and 'vlan_tci' fields to be accessible
from extended BPF programs.

The usage of 'protocol', 'vlan_present' and 'vlan_tci' fields is the same as
corresponding SKF_AD_PROTOCOL, SKF_AD_VLAN_TAG_PRESENT and SKF_AD_VLAN_TAG
accesses in classic BPF.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h    |  3 ++
 net/core/filter.c           | 72 +++++++++++++++++++++++++++++++--------------
 samples/bpf/test_verifier.c |  9 ++++++
 3 files changed, 62 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 929545a27546..1623047af463 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -178,6 +178,9 @@ struct __sk_buff {
 	__u32 pkt_type;
 	__u32 mark;
 	__u32 queue_mapping;
+	__u32 protocol;
+	__u32 vlan_present;
+	__u32 vlan_tci;
 };
 
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/net/core/filter.c b/net/core/filter.c
index 4e9dd0ad0d5b..b95ae7fe7e4f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -177,6 +177,35 @@ static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
 		*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
 				      offsetof(struct sk_buff, queue_mapping));
 		break;
+
+	case SKF_AD_PROTOCOL:
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
+
+		/* dst_reg = *(u16 *) (src_reg + offsetof(protocol)) */
+		*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
+				      offsetof(struct sk_buff, protocol));
+		/* dst_reg = ntohs(dst_reg) [emitting a nop or swap16] */
+		*insn++ = BPF_ENDIAN(BPF_FROM_BE, dst_reg, 16);
+		break;
+
+	case SKF_AD_VLAN_TAG:
+	case SKF_AD_VLAN_TAG_PRESENT:
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
+		BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
+
+		/* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */
+		*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
+				      offsetof(struct sk_buff, vlan_tci));
+		if (skb_field == SKF_AD_VLAN_TAG) {
+			*insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg,
+						~VLAN_TAG_PRESENT);
+		} else {
+			/* dst_reg >>= 12 */
+			*insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 12);
+			/* dst_reg &= 1 */
+			*insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, 1);
+		}
+		break;
 	}
 
 	return insn - insn_buf;
@@ -190,13 +219,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 
 	switch (fp->k) {
 	case SKF_AD_OFF + SKF_AD_PROTOCOL:
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
-
-		/* A = *(u16 *) (CTX + offsetof(protocol)) */
-		*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
-				      offsetof(struct sk_buff, protocol));
-		/* A = ntohs(A) [emitting a nop or swap16] */
-		*insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
+		cnt = convert_skb_access(SKF_AD_PROTOCOL, BPF_REG_A, BPF_REG_CTX, insn);
+		insn += cnt - 1;
 		break;
 
 	case SKF_AD_OFF + SKF_AD_PKTTYPE:
@@ -242,22 +266,15 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		break;
 
 	case SKF_AD_OFF + SKF_AD_VLAN_TAG:
-	case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
-		BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
+		cnt = convert_skb_access(SKF_AD_VLAN_TAG,
+					 BPF_REG_A, BPF_REG_CTX, insn);
+		insn += cnt - 1;
+		break;
 
-		/* A = *(u16 *) (CTX + offsetof(vlan_tci)) */
-		*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
-				      offsetof(struct sk_buff, vlan_tci));
-		if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) {
-			*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A,
-					      ~VLAN_TAG_PRESENT);
-		} else {
-			/* A >>= 12 */
-			*insn++ = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 12);
-			/* A &= 1 */
-			*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 1);
-		}
+	case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
+		cnt = convert_skb_access(SKF_AD_VLAN_TAG_PRESENT,
+					 BPF_REG_A, BPF_REG_CTX, insn);
+		insn += cnt - 1;
 		break;
 
 	case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
@@ -1215,6 +1232,17 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
 
 	case offsetof(struct __sk_buff, queue_mapping):
 		return convert_skb_access(SKF_AD_QUEUE, dst_reg, src_reg, insn);
+
+	case offsetof(struct __sk_buff, protocol):
+		return convert_skb_access(SKF_AD_PROTOCOL, dst_reg, src_reg, insn);
+
+	case offsetof(struct __sk_buff, vlan_present):
+		return convert_skb_access(SKF_AD_VLAN_TAG_PRESENT,
+					  dst_reg, src_reg, insn);
+
+	case offsetof(struct __sk_buff, vlan_tci):
+		return convert_skb_access(SKF_AD_VLAN_TAG,
+					  dst_reg, src_reg, insn);
 	}
 
 	return insn - insn_buf;
diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c
index df6dbb6576f6..75d561f9fd6a 100644
--- a/samples/bpf/test_verifier.c
+++ b/samples/bpf/test_verifier.c
@@ -658,6 +658,15 @@ static struct bpf_test tests[] = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
 				    offsetof(struct __sk_buff, queue_mapping)),
 			BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, protocol)),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, vlan_present)),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, vlan_tci)),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
 			BPF_EXIT_INSN(),
 		},
 		.result = ACCEPT,
-- 
cgit v1.2.3


From adc17d6a6ca08d11f70f6c49f3d40b87b68fe53f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 16 Mar 2015 21:06:18 -0700
Subject: inet: move ir_mark to fill a hole

On 64bit arches, we can save 8 bytes in inet_request_sock
by moving ir_mark to fill a hole.

While we are at it, inet_request_mark() can get a const qualifier
for listener socket.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 3d8c09abb097..c9ed91891887 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -94,11 +94,11 @@ struct inet_request_sock {
 				acked	   : 1,
 				no_srccheck: 1;
 	kmemcheck_bitfield_end(flags);
+	u32                     ir_mark;
 	union {
 		struct ip_options_rcu	*opt;
 		struct sk_buff		*pktopts;
 	};
-	u32                     ir_mark;
 };
 
 static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
@@ -106,13 +106,12 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
 	return (struct inet_request_sock *)sk;
 }
 
-static inline u32 inet_request_mark(struct sock *sk, struct sk_buff *skb)
+static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb)
 {
-	if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept) {
+	if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)
 		return skb->mark;
-	} else {
-		return sk->sk_mark;
-	}
+
+	return sk->sk_mark;
 }
 
 struct inet_cork {
-- 
cgit v1.2.3


From 1bfc4438a7ef99e8a6cba0bd3a86249430256621 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 16 Mar 2015 21:06:19 -0700
Subject: tcp: move tcp_openreq_init() to tcp_input.c

This big helper is called once from tcp_conn_request(), there is no
point having it in an include. Compiler will inline it anyway.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h    | 25 -------------------------
 net/ipv4/tcp_input.c | 25 +++++++++++++++++++++++++
 2 files changed, 25 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 2e11e38205c2..9fe1d535cd6c 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1137,31 +1137,6 @@ static inline int tcp_full_space(const struct sock *sk)
 	return tcp_win_from_space(sk->sk_rcvbuf);
 }
 
-static inline void tcp_openreq_init(struct request_sock *req,
-				    struct tcp_options_received *rx_opt,
-				    struct sk_buff *skb, struct sock *sk)
-{
-	struct inet_request_sock *ireq = inet_rsk(req);
-
-	req->rcv_wnd = 0;		/* So that tcp_send_synack() knows! */
-	req->cookie_ts = 0;
-	tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
-	tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
-	tcp_rsk(req)->snt_synack = tcp_time_stamp;
-	tcp_rsk(req)->last_oow_ack_time = 0;
-	req->mss = rx_opt->mss_clamp;
-	req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
-	ireq->tstamp_ok = rx_opt->tstamp_ok;
-	ireq->sack_ok = rx_opt->sack_ok;
-	ireq->snd_wscale = rx_opt->snd_wscale;
-	ireq->wscale_ok = rx_opt->wscale_ok;
-	ireq->acked = 0;
-	ireq->ecn_ok = 0;
-	ireq->ir_rmt_port = tcp_hdr(skb)->source;
-	ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
-	ireq->ir_mark = inet_request_mark(sk, skb);
-}
-
 extern void tcp_openreq_init_rwin(struct request_sock *req,
 				  struct sock *sk, struct dst_entry *dst);
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 717d437b6ce1..10d6bd93f229 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5912,6 +5912,31 @@ static void tcp_ecn_create_request(struct request_sock *req,
 		inet_rsk(req)->ecn_ok = 1;
 }
 
+static void tcp_openreq_init(struct request_sock *req,
+			     const struct tcp_options_received *rx_opt,
+			     struct sk_buff *skb, const struct sock *sk)
+{
+	struct inet_request_sock *ireq = inet_rsk(req);
+
+	req->rcv_wnd = 0;		/* So that tcp_send_synack() knows! */
+	req->cookie_ts = 0;
+	tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
+	tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
+	tcp_rsk(req)->snt_synack = tcp_time_stamp;
+	tcp_rsk(req)->last_oow_ack_time = 0;
+	req->mss = rx_opt->mss_clamp;
+	req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
+	ireq->tstamp_ok = rx_opt->tstamp_ok;
+	ireq->sack_ok = rx_opt->sack_ok;
+	ireq->snd_wscale = rx_opt->snd_wscale;
+	ireq->wscale_ok = rx_opt->wscale_ok;
+	ireq->acked = 0;
+	ireq->ecn_ok = 0;
+	ireq->ir_rmt_port = tcp_hdr(skb)->source;
+	ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
+	ireq->ir_mark = inet_request_mark(sk, skb);
+}
+
 int tcp_conn_request(struct request_sock_ops *rsk_ops,
 		     const struct tcp_request_sock_ops *af_ops,
 		     struct sock *sk, struct sk_buff *skb)
-- 
cgit v1.2.3


From 7970ddc8f9ffe149b392975da60739ccd1796dea Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 16 Mar 2015 21:06:20 -0700
Subject: tcp: uninline tcp_oow_rate_limited()

tcp_oow_rate_limited() is hardly used in fast path, there is
no point inlining it.

Signed-of-by: Eric Dumazet <edumazet@google.com>

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h    | 32 ++------------------------------
 net/ipv4/tcp_input.c | 30 ++++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 9fe1d535cd6c..5b29835b81d8 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1216,36 +1216,8 @@ static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt,
 	return true;
 }
 
-/* Return true if we're currently rate-limiting out-of-window ACKs and
- * thus shouldn't send a dupack right now. We rate-limit dupacks in
- * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS
- * attacks that send repeated SYNs or ACKs for the same connection. To
- * do this, we do not send a duplicate SYNACK or ACK if the remote
- * endpoint is sending out-of-window SYNs or pure ACKs at a high rate.
- */
-static inline bool tcp_oow_rate_limited(struct net *net,
-					const struct sk_buff *skb,
-					int mib_idx, u32 *last_oow_ack_time)
-{
-	/* Data packets without SYNs are not likely part of an ACK loop. */
-	if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
-	    !tcp_hdr(skb)->syn)
-		goto not_rate_limited;
-
-	if (*last_oow_ack_time) {
-		s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
-
-		if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
-			NET_INC_STATS_BH(net, mib_idx);
-			return true;	/* rate-limited: don't send yet! */
-		}
-	}
-
-	*last_oow_ack_time = tcp_time_stamp;
-
-not_rate_limited:
-	return false;	/* not rate-limited: go ahead, send dupack now! */
-}
+bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
+			  int mib_idx, u32 *last_oow_ack_time);
 
 static inline void tcp_mib_init(struct net *net)
 {
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 10d6bd93f229..7257eb206c07 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3321,6 +3321,36 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
 	return flag;
 }
 
+/* Return true if we're currently rate-limiting out-of-window ACKs and
+ * thus shouldn't send a dupack right now. We rate-limit dupacks in
+ * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS
+ * attacks that send repeated SYNs or ACKs for the same connection. To
+ * do this, we do not send a duplicate SYNACK or ACK if the remote
+ * endpoint is sending out-of-window SYNs or pure ACKs at a high rate.
+ */
+bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
+			  int mib_idx, u32 *last_oow_ack_time)
+{
+	/* Data packets without SYNs are not likely part of an ACK loop. */
+	if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
+	    !tcp_hdr(skb)->syn)
+		goto not_rate_limited;
+
+	if (*last_oow_ack_time) {
+		s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
+
+		if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
+			NET_INC_STATS_BH(net, mib_idx);
+			return true;	/* rate-limited: don't send yet! */
+		}
+	}
+
+	*last_oow_ack_time = tcp_time_stamp;
+
+not_rate_limited:
+	return false;	/* not rate-limited: go ahead, send dupack now! */
+}
+
 /* RFC 5961 7 [ACK Throttling] */
 static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
 {
-- 
cgit v1.2.3


From 407640de2152e33341ce1131dac269672c3d50f7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 17 Mar 2015 18:32:26 -0700
Subject: inet: add sk_listener argument to inet_reqsk_alloc()

listener socket can be used to set net pointer, and will
be later used to hold a reference on listener.

Add a const qualifier to first argument (struct request_sock_ops *),
and factorize all write_pnet(&ireq->ireq_net, sock_net(sk));

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h | 9 ++++++---
 net/dccp/ipv4.c         | 3 +--
 net/dccp/ipv6.c         | 3 +--
 net/ipv4/syncookies.c   | 3 +--
 net/ipv4/tcp_input.c    | 3 +--
 net/ipv6/syncookies.c   | 3 +--
 6 files changed, 11 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index c9ed91891887..cf7abb00941b 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -244,16 +244,19 @@ static inline unsigned int __inet_ehashfn(const __be32 laddr,
 			    initval);
 }
 
-static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops)
+static inline struct request_sock *
+inet_reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener)
 {
 	struct request_sock *req = reqsk_alloc(ops);
-	struct inet_request_sock *ireq = inet_rsk(req);
 
-	if (req != NULL) {
+	if (req) {
+		struct inet_request_sock *ireq = inet_rsk(req);
+
 		kmemcheck_annotate_bitfield(ireq, flags);
 		ireq->opt = NULL;
 		atomic64_set(&ireq->ir_cookie, 0);
 		ireq->ireq_state = TCP_NEW_SYN_RECV;
+		write_pnet(&ireq->ireq_net, sock_net(sk_listener));
 
 		/* Following is temporary. It is coupled with debugging
 		 * helpers in reqsk_put() & reqsk_free()
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 7f6456afbaec..bf897829f4f0 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -624,7 +624,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
 		goto drop;
 
-	req = inet_reqsk_alloc(&dccp_request_sock_ops);
+	req = inet_reqsk_alloc(&dccp_request_sock_ops, sk);
 	if (req == NULL)
 		goto drop;
 
@@ -641,7 +641,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	ireq = inet_rsk(req);
 	ireq->ir_loc_addr = ip_hdr(skb)->daddr;
 	ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
-	write_pnet(&ireq->ireq_net, sock_net(sk));
 	ireq->ireq_family = AF_INET;
 	ireq->ir_iif = sk->sk_bound_dev_if;
 
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 5166b0043f95..d7e7c7b0a3f1 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -386,7 +386,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
 		goto drop;
 
-	req = inet_reqsk_alloc(&dccp6_request_sock_ops);
+	req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk);
 	if (req == NULL)
 		goto drop;
 
@@ -403,7 +403,6 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	ireq = inet_rsk(req);
 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
-	write_pnet(&ireq->ireq_net, sock_net(sk));
 	ireq->ireq_family = AF_INET6;
 
 	if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) ||
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 5ae0c49f5e2e..eb940750bb1b 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -325,7 +325,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 		goto out;
 
 	ret = NULL;
-	req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */
+	req = inet_reqsk_alloc(&tcp_request_sock_ops, sk); /* for safety */
 	if (!req)
 		goto out;
 
@@ -346,7 +346,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 	req->ts_recent		= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
 	treq->snt_synack	= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
 	treq->listener		= NULL;
-	write_pnet(&ireq->ireq_net, sock_net(sk));
 	ireq->ireq_family = AF_INET;
 
 	ireq->ir_iif = sk->sk_bound_dev_if;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 7257eb206c07..2a480f6811ea 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6004,7 +6004,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 		goto drop;
 	}
 
-	req = inet_reqsk_alloc(rsk_ops);
+	req = inet_reqsk_alloc(rsk_ops, sk);
 	if (!req)
 		goto drop;
 
@@ -6020,7 +6020,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 
 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
 	tcp_openreq_init(req, &tmp_opt, skb, sk);
-	write_pnet(&inet_rsk(req)->ireq_net, sock_net(sk));
 
 	/* Note: tcp_v6_init_req() might override ir_iif for link locals */
 	inet_rsk(req)->ir_iif = sk->sk_bound_dev_if;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 58875ce8e178..039e74dd29fe 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -189,14 +189,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 		goto out;
 
 	ret = NULL;
-	req = inet_reqsk_alloc(&tcp6_request_sock_ops);
+	req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk);
 	if (!req)
 		goto out;
 
 	ireq = inet_rsk(req);
 	treq = tcp_rsk(req);
 	treq->listener = NULL;
-	write_pnet(&ireq->ireq_net, sock_net(sk));
 	ireq->ireq_family = AF_INET6;
 
 	if (security_inet_conn_request(sk, skb, req))
-- 
cgit v1.2.3


From e49bb337d77d54afebe4fe5b9008955e1337f83d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 17 Mar 2015 18:32:27 -0700
Subject: inet: uninline inet_reqsk_alloc()

inet_reqsk_alloc() is becoming fat and should not be inlined.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h | 24 ++----------------------
 net/ipv4/tcp_input.c    | 24 ++++++++++++++++++++++++
 2 files changed, 26 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index cf7abb00941b..6fec7343070f 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -244,28 +244,8 @@ static inline unsigned int __inet_ehashfn(const __be32 laddr,
 			    initval);
 }
 
-static inline struct request_sock *
-inet_reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener)
-{
-	struct request_sock *req = reqsk_alloc(ops);
-
-	if (req) {
-		struct inet_request_sock *ireq = inet_rsk(req);
-
-		kmemcheck_annotate_bitfield(ireq, flags);
-		ireq->opt = NULL;
-		atomic64_set(&ireq->ir_cookie, 0);
-		ireq->ireq_state = TCP_NEW_SYN_RECV;
-		write_pnet(&ireq->ireq_net, sock_net(sk_listener));
-
-		/* Following is temporary. It is coupled with debugging
-		 * helpers in reqsk_put() & reqsk_free()
-		 */
-		atomic_set(&ireq->ireq_refcnt, 0);
-	}
-
-	return req;
-}
+struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
+				      struct sock *sk_listener);
 
 static inline __u8 inet_sk_flowi_flags(const struct sock *sk)
 {
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2a480f6811ea..52b74e0eab2a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5967,6 +5967,30 @@ static void tcp_openreq_init(struct request_sock *req,
 	ireq->ir_mark = inet_request_mark(sk, skb);
 }
 
+struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
+				      struct sock *sk_listener)
+{
+	struct request_sock *req = reqsk_alloc(ops);
+
+	if (req) {
+		struct inet_request_sock *ireq = inet_rsk(req);
+
+		kmemcheck_annotate_bitfield(ireq, flags);
+		ireq->opt = NULL;
+		atomic64_set(&ireq->ir_cookie, 0);
+		ireq->ireq_state = TCP_NEW_SYN_RECV;
+		write_pnet(&ireq->ireq_net, sock_net(sk_listener));
+
+		/* Following is temporary. It is coupled with debugging
+		 * helpers in reqsk_put() & reqsk_free()
+		 */
+		atomic_set(&ireq->ireq_refcnt, 0);
+	}
+
+	return req;
+}
+EXPORT_SYMBOL(inet_reqsk_alloc);
+
 int tcp_conn_request(struct request_sock_ops *rsk_ops,
 		     const struct tcp_request_sock_ops *af_ops,
 		     struct sock *sk, struct sk_buff *skb)
-- 
cgit v1.2.3


From 4e9a578e5b6bdfa8b7fed7a41f28a86a7cffc85f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 17 Mar 2015 18:32:28 -0700
Subject: inet: add rsk_listener field to struct request_sock

Once we'll be able to lookup request sockets in ehash table,
we'll need to get access to listener which created this request.

This avoid doing a lookup to find the listener, which benefits
for a more solid SO_REUSEPORT, and is needed once we no
longer queue request sock into a listener private queue.

Note that 'struct tcp_request_sock'->listener could be reduced
to a single bit, as TFO listener should match req->rsk_listener.
TFO will no longer need to hold a reference on the listener.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/request_sock.h | 12 +++++++++---
 net/ipv4/tcp_input.c       |  2 +-
 2 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 56dc2faba47e..723d1cbdf20e 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -52,6 +52,7 @@ struct request_sock {
 #define rsk_refcnt			__req_common.skc_refcnt
 
 	struct request_sock		*dl_next;
+	struct sock			*rsk_listener;
 	u16				mss;
 	u8				num_retrans; /* number of retransmits */
 	u8				cookie_ts:1; /* syncookie: encode tcpopts in timestamp */
@@ -67,13 +68,16 @@ struct request_sock {
 	u32				peer_secid;
 };
 
-static inline struct request_sock *reqsk_alloc(const struct request_sock_ops *ops)
+static inline struct request_sock *
+reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener)
 {
 	struct request_sock *req = kmem_cache_alloc(ops->slab, GFP_ATOMIC);
 
-	if (req != NULL)
+	if (req) {
 		req->rsk_ops = ops;
-
+		sock_hold(sk_listener);
+		req->rsk_listener = sk_listener;
+	}
 	return req;
 }
 
@@ -88,6 +92,8 @@ static inline void reqsk_free(struct request_sock *req)
 	WARN_ON_ONCE(atomic_read(&req->rsk_refcnt) != 0);
 
 	req->rsk_ops->destructor(req);
+	if (req->rsk_listener)
+		sock_put(req->rsk_listener);
 	kmem_cache_free(req->rsk_ops->slab, req);
 }
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 52b74e0eab2a..fbe518981d36 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5970,7 +5970,7 @@ static void tcp_openreq_init(struct request_sock *req,
 struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
 				      struct sock *sk_listener)
 {
-	struct request_sock *req = reqsk_alloc(ops);
+	struct request_sock *req = reqsk_alloc(ops, sk_listener);
 
 	if (req) {
 		struct inet_request_sock *ireq = inet_rsk(req);
-- 
cgit v1.2.3


From 9439ce00f208d95703a6725e4ea986dd90e37ffd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 17 Mar 2015 18:32:29 -0700
Subject: tcp: rename struct tcp_request_sock listener

The listener field in struct tcp_request_sock is a pointer
back to the listener. We now have req->rsk_listener, so TCP
only needs one boolean and not a full pointer.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h             |  2 +-
 net/core/request_sock.c         | 18 +++++++-----------
 net/ipv4/inet_connection_sock.c |  7 +++----
 net/ipv4/syncookies.c           |  2 +-
 net/ipv4/tcp_fastopen.c         |  7 +------
 net/ipv4/tcp_input.c            |  2 +-
 net/ipv6/syncookies.c           |  2 +-
 7 files changed, 15 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 97dbf16f7d9d..f869ae8afbaf 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -111,7 +111,7 @@ struct tcp_request_sock_ops;
 struct tcp_request_sock {
 	struct inet_request_sock 	req;
 	const struct tcp_request_sock_ops *af_specific;
-	struct sock			*listener; /* needed for TFO */
+	bool				tfo_listener;
 	u32				rcv_isn;
 	u32				snt_isn;
 	u32				snt_synack; /* synack sent time */
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index e910317ef6d9..cc39a2aa663a 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -153,24 +153,22 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
  * case might also exist in tcp_v4_hnd_req() that will trigger this locking
  * order.
  *
- * When a TFO req is created, it needs to sock_hold its listener to prevent
- * the latter data structure from going away.
- *
- * This function also sets "treq->listener" to NULL and unreference listener
- * socket. treq->listener is used by the listener so it is protected by the
+ * This function also sets "treq->tfo_listener" to false.
+ * treq->tfo_listener is used by the listener so it is protected by the
  * fastopenq->lock in this function.
  */
 void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
 			   bool reset)
 {
-	struct sock *lsk = tcp_rsk(req)->listener;
-	struct fastopen_queue *fastopenq =
-	    inet_csk(lsk)->icsk_accept_queue.fastopenq;
+	struct sock *lsk = req->rsk_listener;
+	struct fastopen_queue *fastopenq;
+
+	fastopenq = inet_csk(lsk)->icsk_accept_queue.fastopenq;
 
 	tcp_sk(sk)->fastopen_rsk = NULL;
 	spin_lock_bh(&fastopenq->lock);
 	fastopenq->qlen--;
-	tcp_rsk(req)->listener = NULL;
+	tcp_rsk(req)->tfo_listener = false;
 	if (req->sk)	/* the child socket hasn't been accepted yet */
 		goto out;
 
@@ -179,7 +177,6 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
 		 * special RST handling below.
 		 */
 		spin_unlock_bh(&fastopenq->lock);
-		sock_put(lsk);
 		reqsk_put(req);
 		return;
 	}
@@ -201,5 +198,4 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
 	fastopenq->qlen++;
 out:
 	spin_unlock_bh(&fastopenq->lock);
-	sock_put(lsk);
 }
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 3390ba6f96b2..741f0d96a7f7 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -325,7 +325,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
 	sk_acceptq_removed(sk);
 	if (sk->sk_protocol == IPPROTO_TCP && queue->fastopenq != NULL) {
 		spin_lock_bh(&queue->fastopenq->lock);
-		if (tcp_rsk(req)->listener) {
+		if (tcp_rsk(req)->tfo_listener) {
 			/* We are still waiting for the final ACK from 3WHS
 			 * so can't free req now. Instead, we set req->sk to
 			 * NULL to signify that the child socket is taken
@@ -817,9 +817,9 @@ void inet_csk_listen_stop(struct sock *sk)
 
 		percpu_counter_inc(sk->sk_prot->orphan_count);
 
-		if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->listener) {
+		if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) {
 			BUG_ON(tcp_sk(child)->fastopen_rsk != req);
-			BUG_ON(sk != tcp_rsk(req)->listener);
+			BUG_ON(sk != req->rsk_listener);
 
 			/* Paranoid, to prevent race condition if
 			 * an inbound pkt destined for child is
@@ -828,7 +828,6 @@ void inet_csk_listen_stop(struct sock *sk)
 			 * tcp_v4_destroy_sock().
 			 */
 			tcp_sk(child)->fastopen_rsk = NULL;
-			sock_put(sk);
 		}
 		inet_csk_destroy_sock(child);
 
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index eb940750bb1b..574b67765a06 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -345,7 +345,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 	ireq->tstamp_ok		= tcp_opt.saw_tstamp;
 	req->ts_recent		= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
 	treq->snt_synack	= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
-	treq->listener		= NULL;
+	treq->tfo_listener	= false;
 	ireq->ireq_family = AF_INET;
 
 	ireq->ir_iif = sk->sk_bound_dev_if;
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 84381319e1bc..186fd394ec0a 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -155,12 +155,7 @@ static bool tcp_fastopen_create_child(struct sock *sk,
 	tp = tcp_sk(child);
 
 	tp->fastopen_rsk = req;
-	/* Do a hold on the listner sk so that if the listener is being
-	 * closed, the child that has been accepted can live on and still
-	 * access listen_lock.
-	 */
-	sock_hold(sk);
-	tcp_rsk(req)->listener = sk;
+	tcp_rsk(req)->tfo_listener = true;
 
 	/* RFC1323: The window in SYN & SYN/ACK segments is never
 	 * scaled. So correct it appropriately.
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index fbe518981d36..a94ddb96fc85 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6120,7 +6120,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 		if (err || want_cookie)
 			goto drop_and_free;
 
-		tcp_rsk(req)->listener = NULL;
+		tcp_rsk(req)->tfo_listener = false;
 		af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
 	}
 
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 039e74dd29fe..1ef0c926ce9d 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -195,7 +195,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 
 	ireq = inet_rsk(req);
 	treq = tcp_rsk(req);
-	treq->listener = NULL;
+	treq->tfo_listener = false;
 	ireq->ireq_family = AF_INET6;
 
 	if (security_inet_conn_request(sk, skb, req))
-- 
cgit v1.2.3


From 0470c8ca1d57927f2cc3e1d5add1fb2834609447 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 17 Mar 2015 18:32:31 -0700
Subject: inet: fix request sock refcounting

While testing last patch series, I found req sock refcounting was wrong.

We must set skc_refcnt to 1 for all request socks added in hashes,
but also on request sockets created by FastOpen or syncookies.

It is tricky because we need to defer this initialization so that
future RCU lookups do not try to take a refcount on a not yet
fully initialized request socket.

Also get rid of ireq_refcnt alias.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Fixes: 13854e5a6046 ("inet: add proper refcounting to request sock")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_connection_sock.h |  5 -----
 include/net/inet_sock.h            |  1 -
 include/net/request_sock.h         | 11 +++++++++++
 net/ipv4/syncookies.c              | 11 ++++++-----
 net/ipv4/tcp_fastopen.c            |  1 +
 net/ipv4/tcp_input.c               |  4 ----
 net/ipv6/syncookies.c              |  7 ++++---
 7 files changed, 22 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 191feec60205..b9a6b0a94cc6 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -275,11 +275,6 @@ static inline void inet_csk_reqsk_queue_add(struct sock *sk,
 					    struct sock *child)
 {
 	reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child);
-	/* before letting lookups find us, make sure all req fields
-	 * are committed to memory.
-	 */
-	smp_wmb();
-	atomic_set(&req->rsk_refcnt, 1);
 }
 
 void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 6fec7343070f..b6c3737da4e9 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -81,7 +81,6 @@ struct inet_request_sock {
 #define ir_cookie		req.__req_common.skc_cookie
 #define ireq_net		req.__req_common.skc_net
 #define ireq_state		req.__req_common.skc_state
-#define ireq_refcnt		req.__req_common.skc_refcnt
 #define ireq_family		req.__req_common.skc_family
 
 	kmemcheck_bitfield_begin(flags);
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 723d1cbdf20e..3fa4f824900a 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -77,6 +77,11 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener)
 		req->rsk_ops = ops;
 		sock_hold(sk_listener);
 		req->rsk_listener = sk_listener;
+
+		/* Following is temporary. It is coupled with debugging
+		 * helpers in reqsk_put() & reqsk_free()
+		 */
+		atomic_set(&req->rsk_refcnt, 0);
 	}
 	return req;
 }
@@ -292,6 +297,12 @@ static inline void reqsk_queue_hash_req(struct request_sock_queue *queue,
 	req->sk = NULL;
 	req->dl_next = lopt->syn_table[hash];
 
+	/* before letting lookups find us, make sure all req fields
+	 * are committed to memory and refcnt initialized.
+	 */
+	smp_wmb();
+	atomic_set(&req->rsk_refcnt, 1);
+
 	write_lock(&queue->syn_wait_lock);
 	lopt->syn_table[hash] = req;
 	write_unlock(&queue->syn_wait_lock);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 574b67765a06..34e755403715 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -227,11 +227,12 @@ static struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
 	struct sock *child;
 
 	child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst);
-	if (child)
+	if (child) {
+		atomic_set(&req->rsk_refcnt, 1);
 		inet_csk_reqsk_queue_add(sk, req, child);
-	else
+	} else {
 		reqsk_free(req);
-
+	}
 	return child;
 }
 
@@ -356,7 +357,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 	ireq->opt = tcp_v4_save_options(skb);
 
 	if (security_inet_conn_request(sk, skb, req)) {
-		reqsk_put(req);
+		reqsk_free(req);
 		goto out;
 	}
 
@@ -377,7 +378,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 	security_req_classify_flow(req, flowi4_to_flowi(&fl4));
 	rt = ip_route_output_key(sock_net(sk), &fl4);
 	if (IS_ERR(rt)) {
-		reqsk_put(req);
+		reqsk_free(req);
 		goto out;
 	}
 
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 186fd394ec0a..82e375a0cbcf 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -169,6 +169,7 @@ static bool tcp_fastopen_create_child(struct sock *sk,
 	inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
 				  TCP_TIMEOUT_INIT, TCP_RTO_MAX);
 
+	atomic_set(&req->rsk_refcnt, 1);
 	/* Add the child socket directly into the accept queue */
 	inet_csk_reqsk_queue_add(sk, req, child);
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a94ddb96fc85..1dfbaee3554e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5981,10 +5981,6 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
 		ireq->ireq_state = TCP_NEW_SYN_RECV;
 		write_pnet(&ireq->ireq_net, sock_net(sk_listener));
 
-		/* Following is temporary. It is coupled with debugging
-		 * helpers in reqsk_put() & reqsk_free()
-		 */
-		atomic_set(&ireq->ireq_refcnt, 0);
 	}
 
 	return req;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 1ef0c926ce9d..da5823e5e5a7 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -49,11 +49,12 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
 	struct sock *child;
 
 	child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst);
-	if (child)
+	if (child) {
+		atomic_set(&req->rsk_refcnt, 1);
 		inet_csk_reqsk_queue_add(sk, req, child);
-	else
+	} else {
 		reqsk_free(req);
-
+	}
 	return child;
 }
 
-- 
cgit v1.2.3


From cf39284d41f67964cf42b21bb386c012cf5b7f65 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Wed, 18 Mar 2015 08:57:41 +0800
Subject: regulator: Fix documentation for regmap in the config

dev_get_regulator() does not exist, fix the typo.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/driver.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index d4ad5b5a02bb..045f709cb89b 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -316,7 +316,7 @@ struct regulator_desc {
  * @driver_data: private regulator data
  * @of_node: OpenFirmware node to parse for device tree bindings (may be
  *           NULL).
- * @regmap: regmap to use for core regmap helpers if dev_get_regulator() is
+ * @regmap: regmap to use for core regmap helpers if dev_get_regmap() is
  *          insufficient.
  * @ena_gpio_initialized: GPIO controlling regulator enable was properly
  *                        initialized, meaning that >= 0 is a valid gpio
-- 
cgit v1.2.3


From 6aebd940840a4d3a0a8ffc5883d3892f4bd61e90 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 18 Mar 2015 20:01:15 +1100
Subject: rhashtable: Remove shift from bucket_table

Keeping both size and shift is silly.  We only need one.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 2 --
 lib/rhashtable.c           | 5 ++---
 2 files changed, 2 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 1695378b3c5b..f16e85692959 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -51,7 +51,6 @@ struct rhash_head {
  * @size: Number of hash buckets
  * @rehash: Current bucket being rehashed
  * @hash_rnd: Random seed to fold into hash
- * @shift: Current size (1 << shift)
  * @locks_mask: Mask to apply before accessing locks[]
  * @locks: Array of spinlocks protecting individual buckets
  * @walkers: List of active walkers
@@ -63,7 +62,6 @@ struct bucket_table {
 	unsigned int		size;
 	unsigned int		rehash;
 	u32			hash_rnd;
-	u32			shift;
 	unsigned int		locks_mask;
 	spinlock_t		*locks;
 	struct list_head	walkers;
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 09a7ada89ade..097400362467 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -162,7 +162,6 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 		return NULL;
 
 	tbl->size = nbuckets;
-	tbl->shift = ilog2(nbuckets);
 
 	if (alloc_bucket_locks(ht, tbl) < 0) {
 		bucket_table_free(tbl);
@@ -189,7 +188,7 @@ static bool rht_grow_above_75(const struct rhashtable *ht,
 {
 	/* Expand table when exceeding 75% load */
 	return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) &&
-	       (!ht->p.max_shift || tbl->shift < ht->p.max_shift);
+	       (!ht->p.max_shift || tbl->size < (1 << ht->p.max_shift));
 }
 
 /**
@@ -202,7 +201,7 @@ static bool rht_shrink_below_30(const struct rhashtable *ht,
 {
 	/* Shrink table beneath 30% load */
 	return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) &&
-	       tbl->shift > ht->p.min_shift;
+	       tbl->size > (1 << ht->p.min_shift);
 }
 
 static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash)
-- 
cgit v1.2.3


From c2e213cff701fce71a0aba8de82f2c2a4acf52ae Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 18 Mar 2015 20:01:16 +1100
Subject: rhashtable: Introduce max_size/min_size

This patch adds the parameters max_size and min_size which are
meant to replace max_shift and min_shift.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h |  4 ++++
 lib/rhashtable.c           | 12 ++++++++----
 2 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index f16e85692959..81267fef85d7 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -85,6 +85,8 @@ struct rhashtable;
  * @head_offset: Offset of rhash_head in struct to be hashed
  * @max_shift: Maximum number of shifts while expanding
  * @min_shift: Minimum number of shifts while shrinking
+ * @max_size: Maximum size while expanding
+ * @min_size: Minimum size while shrinking
  * @nulls_base: Base value to generate nulls marker
  * @locks_mul: Number of bucket locks to allocate per cpu (default: 128)
  * @hashfn: Function to hash key
@@ -97,6 +99,8 @@ struct rhashtable_params {
 	size_t			head_offset;
 	size_t			max_shift;
 	size_t			min_shift;
+	unsigned int		max_size;
+	unsigned int		min_size;
 	u32			nulls_base;
 	size_t			locks_mul;
 	rht_hashfn_t		hashfn;
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 097400362467..c4061bbd0113 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -27,7 +27,7 @@
 #include <linux/err.h>
 
 #define HASH_DEFAULT_SIZE	64UL
-#define HASH_MIN_SIZE		4UL
+#define HASH_MIN_SIZE		4U
 #define BUCKET_LOCKS_PER_CPU   128UL
 
 /* Base bits plus 1 bit for nulls marker */
@@ -188,7 +188,8 @@ static bool rht_grow_above_75(const struct rhashtable *ht,
 {
 	/* Expand table when exceeding 75% load */
 	return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) &&
-	       (!ht->p.max_shift || tbl->size < (1 << ht->p.max_shift));
+	       (!ht->p.max_shift || tbl->size < (1 << ht->p.max_shift)) &&
+	       (!ht->p.max_size || tbl->size < ht->p.max_size);
 }
 
 /**
@@ -201,7 +202,8 @@ static bool rht_shrink_below_30(const struct rhashtable *ht,
 {
 	/* Shrink table beneath 30% load */
 	return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) &&
-	       tbl->size > (1 << ht->p.min_shift);
+	       tbl->size > (1 << ht->p.min_shift) &&
+	       tbl->size > ht->p.min_size;
 }
 
 static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash)
@@ -873,7 +875,8 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
 static size_t rounded_hashtable_size(struct rhashtable_params *params)
 {
 	return max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
-		   1UL << params->min_shift);
+		   max(1UL << params->min_shift,
+		       (unsigned long)params->min_size));
 }
 
 /**
@@ -935,6 +938,7 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
 
 	params->min_shift = max_t(size_t, params->min_shift,
 				  ilog2(HASH_MIN_SIZE));
+	params->min_size = max(params->min_size, HASH_MIN_SIZE);
 
 	if (params->nelem_hint)
 		size = rounded_hashtable_size(params);
-- 
cgit v1.2.3


From e2e21c1c5808e5dfd88d3606cd6386cf85f6f5b1 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 18 Mar 2015 20:01:21 +1100
Subject: rhashtable: Remove max_shift and min_shift

Now that nobody uses max_shift and min_shift, we can safely remove
them.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 4 ----
 lib/rhashtable.c           | 7 +------
 2 files changed, 1 insertion(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 81267fef85d7..99425f2be708 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -83,8 +83,6 @@ struct rhashtable;
  * @key_len: Length of key
  * @key_offset: Offset of key in struct to be hashed
  * @head_offset: Offset of rhash_head in struct to be hashed
- * @max_shift: Maximum number of shifts while expanding
- * @min_shift: Minimum number of shifts while shrinking
  * @max_size: Maximum size while expanding
  * @min_size: Minimum size while shrinking
  * @nulls_base: Base value to generate nulls marker
@@ -97,8 +95,6 @@ struct rhashtable_params {
 	size_t			key_len;
 	size_t			key_offset;
 	size_t			head_offset;
-	size_t			max_shift;
-	size_t			min_shift;
 	unsigned int		max_size;
 	unsigned int		min_size;
 	u32			nulls_base;
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index c4061bbd0113..5f8fe3e88219 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -188,7 +188,6 @@ static bool rht_grow_above_75(const struct rhashtable *ht,
 {
 	/* Expand table when exceeding 75% load */
 	return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) &&
-	       (!ht->p.max_shift || tbl->size < (1 << ht->p.max_shift)) &&
 	       (!ht->p.max_size || tbl->size < ht->p.max_size);
 }
 
@@ -202,7 +201,6 @@ static bool rht_shrink_below_30(const struct rhashtable *ht,
 {
 	/* Shrink table beneath 30% load */
 	return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) &&
-	       tbl->size > (1 << ht->p.min_shift) &&
 	       tbl->size > ht->p.min_size;
 }
 
@@ -875,8 +873,7 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
 static size_t rounded_hashtable_size(struct rhashtable_params *params)
 {
 	return max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
-		   max(1UL << params->min_shift,
-		       (unsigned long)params->min_size));
+		   (unsigned long)params->min_size);
 }
 
 /**
@@ -936,8 +933,6 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
 	if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT))
 		return -EINVAL;
 
-	params->min_shift = max_t(size_t, params->min_shift,
-				  ilog2(HASH_MIN_SIZE));
 	params->min_size = max(params->min_size, HASH_MIN_SIZE);
 
 	if (params->nelem_hint)
-- 
cgit v1.2.3


From f0b7d43c8a28155f50adb087a563cfc97566e477 Mon Sep 17 00:00:00 2001
From: Brad Campbell <bradjc5@gmail.com>
Date: Tue, 17 Mar 2015 16:25:46 -0400
Subject: cc2520: Add support for CC2591 amplifier.

The TI CC2521 is an RF power amplifier that is designed to interface
with the CC2520. Conveniently, it directly interfaces with the CC2520
and does not require any pins to be connected to a
microcontroller/processor. Adding a CC2591 increases the CC2520's range,
which is useful for border router and other wall-powered applications.

Using the CC2591 with the CC2520 requires configuring the CC2520 GPIOs
that are connected to the CC2591 to correctly set the CC2591 into TX and
RX modes. Further, TI recommends that the CC2520_TXPOWER and
CC2520_AGCCTRL1 registers are set differently to maximize the CC2591's
performance. These settings are covered in TI Application Note AN065.

This patch adds an optional `amplified` field to the cc2520 entry in the
device tree. If present, the CC2520 will be configured to operate with a
CC2591.

The expected pin mapping is:
CC2520 GPIO0 --> CC2591 EN
CC2520 GPIO5 --> CC2591 PAEN

Signed-off-by: Brad Campbell <bradjc5@gmail.com>
Acked-by: Varka Bhadram <varkabhadram@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 .../devicetree/bindings/net/ieee802154/cc2520.txt  |  4 ++
 drivers/net/ieee802154/cc2520.c                    | 55 ++++++++++++++++++----
 include/linux/spi/cc2520.h                         |  1 +
 3 files changed, 52 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/net/ieee802154/cc2520.txt b/Documentation/devicetree/bindings/net/ieee802154/cc2520.txt
index 0071883c08d8..fb6d49f184ed 100644
--- a/Documentation/devicetree/bindings/net/ieee802154/cc2520.txt
+++ b/Documentation/devicetree/bindings/net/ieee802154/cc2520.txt
@@ -13,11 +13,15 @@ Required properties:
 	- cca-gpio:		GPIO spec for the CCA pin
 	- vreg-gpio:		GPIO spec for the VREG pin
 	- reset-gpio:		GPIO spec for the RESET pin
+Optional properties:
+	- amplified:		include if the CC2520 is connected to a CC2591 amplifier
+
 Example:
 	cc2520@0 {
 		compatible = "ti,cc2520";
 		reg = <0>;
 		spi-max-frequency = <4000000>;
+		amplified;
 		pinctrl-names = "default";
 		pinctrl-0 = <&cc2520_cape_pins>;
 		fifo-gpio = <&gpio1 18 0>;
diff --git a/drivers/net/ieee802154/cc2520.c b/drivers/net/ieee802154/cc2520.c
index 233b6c6017d4..f833b8bb6663 100644
--- a/drivers/net/ieee802154/cc2520.c
+++ b/drivers/net/ieee802154/cc2520.c
@@ -738,6 +738,8 @@ static int cc2520_get_platform_data(struct spi_device *spi,
 	pdata->vreg = of_get_named_gpio(np, "vreg-gpio", 0);
 	pdata->reset = of_get_named_gpio(np, "reset-gpio", 0);
 
+	pdata->amplified = of_property_read_bool(np, "amplified");
+
 	return 0;
 }
 
@@ -746,6 +748,11 @@ static int cc2520_hw_init(struct cc2520_private *priv)
 	u8 status = 0, state = 0xff;
 	int ret;
 	int timeout = 100;
+	struct cc2520_platform_data pdata;
+
+	ret = cc2520_get_platform_data(priv->spi, &pdata);
+	if (ret)
+		goto err_ret;
 
 	ret = cc2520_read_register(priv, CC2520_FSMSTAT1, &state);
 	if (ret)
@@ -768,11 +775,47 @@ static int cc2520_hw_init(struct cc2520_private *priv)
 
 	dev_vdbg(&priv->spi->dev, "oscillator brought up\n");
 
-	/* Registers default value: section 28.1 in Datasheet */
-	ret = cc2520_write_register(priv, CC2520_TXPOWER, 0xF7);
-	if (ret)
-		goto err_ret;
+	/* If the CC2520 is connected to a CC2591 amplifier, we must both
+	 * configure GPIOs on the CC2520 to correctly configure the CC2591
+	 * and change a couple settings of the CC2520 to work with the
+	 * amplifier. See section 8 page 17 of TI application note AN065.
+	 * http://www.ti.com/lit/an/swra229a/swra229a.pdf
+	 */
+	if (pdata.amplified) {
+		ret = cc2520_write_register(priv, CC2520_TXPOWER, 0xF9);
+		if (ret)
+			goto err_ret;
 
+		ret = cc2520_write_register(priv, CC2520_AGCCTRL1, 0x16);
+		if (ret)
+			goto err_ret;
+
+		ret = cc2520_write_register(priv, CC2520_GPIOCTRL0, 0x46);
+		if (ret)
+			goto err_ret;
+
+		ret = cc2520_write_register(priv, CC2520_GPIOCTRL5, 0x47);
+		if (ret)
+			goto err_ret;
+
+		ret = cc2520_write_register(priv, CC2520_GPIOPOLARITY, 0x1e);
+		if (ret)
+			goto err_ret;
+
+		ret = cc2520_write_register(priv, CC2520_TXCTRL, 0xc1);
+		if (ret)
+			goto err_ret;
+	} else {
+		ret = cc2520_write_register(priv, CC2520_TXPOWER, 0xF7);
+		if (ret)
+			goto err_ret;
+
+		ret = cc2520_write_register(priv, CC2520_AGCCTRL1, 0x11);
+		if (ret)
+			goto err_ret;
+	}
+
+	/* Registers default value: section 28.1 in Datasheet */
 	ret = cc2520_write_register(priv, CC2520_CCACTRL0, 0x1A);
 	if (ret)
 		goto err_ret;
@@ -797,10 +840,6 @@ static int cc2520_hw_init(struct cc2520_private *priv)
 	if (ret)
 		goto err_ret;
 
-	ret = cc2520_write_register(priv, CC2520_AGCCTRL1, 0x11);
-	if (ret)
-		goto err_ret;
-
 	ret = cc2520_write_register(priv, CC2520_ADCTEST0, 0x10);
 	if (ret)
 		goto err_ret;
diff --git a/include/linux/spi/cc2520.h b/include/linux/spi/cc2520.h
index 85b8ee67e937..e741e8baad92 100644
--- a/include/linux/spi/cc2520.h
+++ b/include/linux/spi/cc2520.h
@@ -21,6 +21,7 @@ struct cc2520_platform_data {
 	int sfd;
 	int reset;
 	int vreg;
+	bool amplified;
 };
 
 #endif
-- 
cgit v1.2.3


From 822b3b2ebfff8e9b3d006086c527738a7ca00cd0 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Wed, 18 Mar 2015 14:57:33 +0200
Subject: net: Add max rate tx queue attribute

This adds a tx_maxrate attribute to the tx queue sysfs entry allowing
for max-rate limiting. Along with DCB-ETS and BQL this provides another
knob to tune queue performance. The limit units are Mbps.

By default it is disabled. To disable the rate limitation after it
has been set for a queue, it should be set to zero.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/ABI/testing/sysfs-class-net-queues |  8 +++
 Documentation/networking/scaling.txt             |  9 ++++
 include/linux/netdevice.h                        |  8 +++
 net/core/net-sysfs.c                             | 67 +++++++++++++++++++-----
 4 files changed, 80 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-class-net-queues b/Documentation/ABI/testing/sysfs-class-net-queues
index 5e9aeb91d355..0c0df91b1516 100644
--- a/Documentation/ABI/testing/sysfs-class-net-queues
+++ b/Documentation/ABI/testing/sysfs-class-net-queues
@@ -24,6 +24,14 @@ Description:
 		Indicates the number of transmit timeout events seen by this
 		network interface transmit queue.
 
+What:		/sys/class/<iface>/queues/tx-<queue>/tx_maxrate
+Date:		March 2015
+KernelVersion:	4.1
+Contact:	netdev@vger.kernel.org
+Description:
+		A Mbps max-rate set for the queue, a value of zero means disabled,
+		default is disabled.
+
 What:		/sys/class/<iface>/queues/tx-<queue>/xps_cpus
 Date:		November 2010
 KernelVersion:	2.6.38
diff --git a/Documentation/networking/scaling.txt b/Documentation/networking/scaling.txt
index 99ca40e8e810..cbfac0949635 100644
--- a/Documentation/networking/scaling.txt
+++ b/Documentation/networking/scaling.txt
@@ -421,6 +421,15 @@ best CPUs to share a given queue are probably those that share the cache
 with the CPU that processes transmit completions for that queue
 (transmit interrupts).
 
+Per TX Queue rate limitation:
+=============================
+
+These are rate-limitation mechanisms implemented by HW, where currently
+a max-rate attribute is supported, by setting a Mbps value to
+
+/sys/class/net/<dev>/queues/tx-<n>/tx_maxrate
+
+A value of zero means disabled, and this is the default.
 
 Further Information
 ===================
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index dd1d069758be..76c5de4978a8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -587,6 +587,7 @@ struct netdev_queue {
 #ifdef CONFIG_BQL
 	struct dql		dql;
 #endif
+	unsigned long		tx_maxrate;
 } ____cacheline_aligned_in_smp;
 
 static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
@@ -1022,6 +1023,10 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  *	be otherwise expressed by feature flags. The check is called with
  *	the set of features that the stack has calculated and it returns
  *	those the driver believes to be appropriate.
+ * int (*ndo_set_tx_maxrate)(struct net_device *dev,
+ *			     int queue_index, u32 maxrate);
+ *	Called when a user wants to set a max-rate limitation of specific
+ *	TX queue.
  */
 struct net_device_ops {
 	int			(*ndo_init)(struct net_device *dev);
@@ -1178,6 +1183,9 @@ struct net_device_ops {
 	netdev_features_t	(*ndo_features_check) (struct sk_buff *skb,
 						       struct net_device *dev,
 						       netdev_features_t features);
+	int			(*ndo_set_tx_maxrate)(struct net_device *dev,
+						      int queue_index,
+						      u32 maxrate);
 };
 
 /**
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index cf30620a88e1..7e58bd7ec232 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -951,6 +951,60 @@ static ssize_t show_trans_timeout(struct netdev_queue *queue,
 	return sprintf(buf, "%lu", trans_timeout);
 }
 
+#ifdef CONFIG_XPS
+static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
+{
+	struct net_device *dev = queue->dev;
+	int i;
+
+	for (i = 0; i < dev->num_tx_queues; i++)
+		if (queue == &dev->_tx[i])
+			break;
+
+	BUG_ON(i >= dev->num_tx_queues);
+
+	return i;
+}
+
+static ssize_t show_tx_maxrate(struct netdev_queue *queue,
+			       struct netdev_queue_attribute *attribute,
+			       char *buf)
+{
+	return sprintf(buf, "%lu\n", queue->tx_maxrate);
+}
+
+static ssize_t set_tx_maxrate(struct netdev_queue *queue,
+			      struct netdev_queue_attribute *attribute,
+			      const char *buf, size_t len)
+{
+	struct net_device *dev = queue->dev;
+	int err, index = get_netdev_queue_index(queue);
+	u32 rate = 0;
+
+	err = kstrtou32(buf, 10, &rate);
+	if (err < 0)
+		return err;
+
+	if (!rtnl_trylock())
+		return restart_syscall();
+
+	err = -EOPNOTSUPP;
+	if (dev->netdev_ops->ndo_set_tx_maxrate)
+		err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate);
+
+	rtnl_unlock();
+	if (!err) {
+		queue->tx_maxrate = rate;
+		return len;
+	}
+	return err;
+}
+
+static struct netdev_queue_attribute queue_tx_maxrate =
+	__ATTR(tx_maxrate, S_IRUGO | S_IWUSR,
+	       show_tx_maxrate, set_tx_maxrate);
+#endif
+
 static struct netdev_queue_attribute queue_trans_timeout =
 	__ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL);
 
@@ -1065,18 +1119,6 @@ static struct attribute_group dql_group = {
 #endif /* CONFIG_BQL */
 
 #ifdef CONFIG_XPS
-static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
-{
-	struct net_device *dev = queue->dev;
-	unsigned int i;
-
-	i = queue - dev->_tx;
-	BUG_ON(i >= dev->num_tx_queues);
-
-	return i;
-}
-
-
 static ssize_t show_xps_map(struct netdev_queue *queue,
 			    struct netdev_queue_attribute *attribute, char *buf)
 {
@@ -1153,6 +1195,7 @@ static struct attribute *netdev_queue_default_attrs[] = {
 	&queue_trans_timeout.attr,
 #ifdef CONFIG_XPS
 	&xps_cpus_attribute.attr,
+	&queue_tx_maxrate.attr,
 #endif
 	NULL
 };
-- 
cgit v1.2.3


From fc31e2560a2443410fe45c27116fae736541a7b5 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Wed, 18 Mar 2015 14:57:34 +0200
Subject: net/mlx4_core: Add basic support for QP max-rate limiting

Add the low-level device commands and definitions used for QP max-rate limiting.

This is done through the following elements:

  - read rate-limit device caps in QUERY_DEV_CAP: number of different
    rates and the min/max rates in Kbs/Mbs/Gbs units

  - enhance the QP context struct to contain rate limit units and value

  - allow to do run time rate-limit setting to QPs through the
    update-qp firmware command

  - QP rate-limiting is disallowed for VFs

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/fw.c            | 33 +++++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlx4/fw.h            |  1 +
 drivers/net/ethernet/mellanox/mlx4/main.c          |  2 ++
 drivers/net/ethernet/mellanox/mlx4/qp.c            |  5 ++++
 .../net/ethernet/mellanox/mlx4/resource_tracker.c  |  6 +++-
 include/linux/mlx4/device.h                        | 17 +++++++++++
 include/linux/mlx4/qp.h                            | 14 ++++++---
 7 files changed, 72 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 242bcee5d774..4a471f5d1b56 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -144,7 +144,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
 		[19] = "Performance optimized for limited rule configuration flow steering support",
 		[20] = "Recoverable error events support",
 		[21] = "Port Remap support",
-		[22] = "QCN support"
+		[22] = "QCN support",
+		[23] = "QP rate limiting support"
 	};
 	int i;
 
@@ -697,6 +698,10 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_MAD_DEMUX_OFFSET		0xb0
 #define QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_BASE_OFFSET	0xa8
 #define QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_RANGE_OFFSET	0xac
+#define QUERY_DEV_CAP_QP_RATE_LIMIT_NUM_OFFSET	0xcc
+#define QUERY_DEV_CAP_QP_RATE_LIMIT_MAX_OFFSET	0xd0
+#define QUERY_DEV_CAP_QP_RATE_LIMIT_MIN_OFFSET	0xd2
+
 
 	dev_cap->flags2 = 0;
 	mailbox = mlx4_alloc_cmd_mailbox(dev);
@@ -904,6 +909,18 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		 QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_RANGE_OFFSET);
 	dev_cap->dmfs_high_rate_qpn_range &= MGM_QPN_MASK;
 
+	MLX4_GET(size, outbox, QUERY_DEV_CAP_QP_RATE_LIMIT_NUM_OFFSET);
+	dev_cap->rl_caps.num_rates = size;
+	if (dev_cap->rl_caps.num_rates) {
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT;
+		MLX4_GET(size, outbox, QUERY_DEV_CAP_QP_RATE_LIMIT_MAX_OFFSET);
+		dev_cap->rl_caps.max_val  = size & 0xfff;
+		dev_cap->rl_caps.max_unit = size >> 14;
+		MLX4_GET(size, outbox, QUERY_DEV_CAP_QP_RATE_LIMIT_MIN_OFFSET);
+		dev_cap->rl_caps.min_val  = size & 0xfff;
+		dev_cap->rl_caps.min_unit = size >> 14;
+	}
+
 	MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
 	if (field32 & (1 << 16))
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP;
@@ -979,6 +996,15 @@ void mlx4_dev_cap_dump(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		 dev_cap->dmfs_high_rate_qpn_base);
 	mlx4_dbg(dev, "DMFS high rate steer QPn range: %d\n",
 		 dev_cap->dmfs_high_rate_qpn_range);
+
+	if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT) {
+		struct mlx4_rate_limit_caps *rl_caps = &dev_cap->rl_caps;
+
+		mlx4_dbg(dev, "QP Rate-Limit: #rates %d, unit/val max %d/%d, min %d/%d\n",
+			 rl_caps->num_rates, rl_caps->max_unit, rl_caps->max_val,
+			 rl_caps->min_unit, rl_caps->min_val);
+	}
+
 	dump_dev_cap_flags(dev, dev_cap->flags);
 	dump_dev_cap_flags2(dev, dev_cap->flags2);
 }
@@ -1075,6 +1101,7 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
 	u64	flags;
 	int	err = 0;
 	u8	field;
+	u16	field16;
 	u32	bmme_flags, field32;
 	int	real_port;
 	int	slave_port;
@@ -1158,6 +1185,10 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
 	field &= 0xfe;
 	MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_ECN_QCN_VER_OFFSET);
 
+	/* turn off QP max-rate limiting for guests */
+	field16 = 0;
+	MLX4_PUT(outbox->buf, field16, QUERY_DEV_CAP_QP_RATE_LIMIT_NUM_OFFSET);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index f44f7f6017ed..863655bd3947 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -127,6 +127,7 @@ struct mlx4_dev_cap {
 	u32 max_counters;
 	u32 dmfs_high_rate_qpn_base;
 	u32 dmfs_high_rate_qpn_range;
+	struct mlx4_rate_limit_caps rl_caps;
 	struct mlx4_port_cap port_cap[MLX4_MAX_PORTS + 1];
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 7e487223489a..43aa76775b5f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -489,6 +489,8 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		dev->caps.dmfs_high_rate_qpn_range = MLX4_A0_STEERING_TABLE_SIZE;
 	}
 
+	dev->caps.rl_caps = dev_cap->rl_caps;
+
 	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_RSS_RAW_ETH] =
 		dev->caps.dmfs_high_rate_qpn_range;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index eda29dbbfcd2..69e4462e4ee4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -442,6 +442,11 @@ int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn,
 			cmd->qp_context.param3 |= cpu_to_be32(MLX4_STRIP_VLAN);
 	}
 
+	if (attr & MLX4_UPDATE_QP_RATE_LIMIT) {
+		qp_mask |= 1ULL << MLX4_UPD_QP_MASK_RATE_LIMIT;
+		cmd->qp_context.rate_limit_params = cpu_to_be16((params->rate_unit << 14) | params->rate_val);
+	}
+
 	cmd->primary_addr_path_mask = cpu_to_be64(pri_addr_path_mask);
 	cmd->qp_mask = cpu_to_be64(qp_mask);
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index d43e25914d19..c258f8625aac 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -2947,8 +2947,12 @@ static int verify_qp_parameters(struct mlx4_dev *dev,
 	qp_type	= (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff;
 	optpar	= be32_to_cpu(*(__be32 *) inbox->buf);
 
-	if (slave != mlx4_master_func_num(dev))
+	if (slave != mlx4_master_func_num(dev)) {
 		qp_ctx->params2 &= ~MLX4_QP_BIT_FPP;
+		/* setting QP rate-limit is disallowed for VFs */
+		if (qp_ctx->rate_limit_params)
+			return -EPERM;
+	}
 
 	switch (qp_type) {
 	case MLX4_QP_ST_RC:
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 1cc54822b931..4550c67b92e4 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -205,6 +205,7 @@ enum {
 	MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT = 1LL << 20,
 	MLX4_DEV_CAP_FLAG2_PORT_REMAP		= 1LL <<  21,
 	MLX4_DEV_CAP_FLAG2_QCN			= 1LL <<  22,
+	MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT	= 1LL <<  23
 };
 
 enum {
@@ -450,6 +451,21 @@ enum mlx4_module_id {
 	MLX4_MODULE_ID_QSFP28           = 0x11,
 };
 
+enum { /* rl */
+	MLX4_QP_RATE_LIMIT_NONE		= 0,
+	MLX4_QP_RATE_LIMIT_KBS		= 1,
+	MLX4_QP_RATE_LIMIT_MBS		= 2,
+	MLX4_QP_RATE_LIMIT_GBS		= 3
+};
+
+struct mlx4_rate_limit_caps {
+	u16	num_rates; /* Number of different rates */
+	u8	min_unit;
+	u16	min_val;
+	u8	max_unit;
+	u16	max_val;
+};
+
 static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor)
 {
 	return (major << 32) | (minor << 16) | subminor;
@@ -565,6 +581,7 @@ struct mlx4_caps {
 	u32			dmfs_high_rate_qpn_base;
 	u32			dmfs_high_rate_qpn_range;
 	u32			vf_caps;
+	struct mlx4_rate_limit_caps rl_caps;
 };
 
 struct mlx4_buf_list {
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 551f85456c11..1023ebe035b7 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -207,14 +207,16 @@ struct mlx4_qp_context {
 	__be32			msn;
 	__be16			rq_wqe_counter;
 	__be16			sq_wqe_counter;
-	u32			reserved3[2];
+	u32			reserved3;
+	__be16			rate_limit_params;
+	__be16			reserved4;
 	__be32			param3;
 	__be32			nummmcpeers_basemkey;
 	u8			log_page_size;
-	u8			reserved4[2];
+	u8			reserved5[2];
 	u8			mtt_base_addr_h;
 	__be32			mtt_base_addr_l;
-	u32			reserved5[10];
+	u32			reserved6[10];
 };
 
 struct mlx4_update_qp_context {
@@ -229,6 +231,7 @@ struct mlx4_update_qp_context {
 enum {
 	MLX4_UPD_QP_MASK_PM_STATE	= 32,
 	MLX4_UPD_QP_MASK_VSD		= 33,
+	MLX4_UPD_QP_MASK_RATE_LIMIT	= 35,
 };
 
 enum {
@@ -428,7 +431,8 @@ struct mlx4_wqe_inline_seg {
 enum mlx4_update_qp_attr {
 	MLX4_UPDATE_QP_SMAC		= 1 << 0,
 	MLX4_UPDATE_QP_VSD		= 1 << 1,
-	MLX4_UPDATE_QP_SUPPORTED_ATTRS	= (1 << 2) - 1
+	MLX4_UPDATE_QP_RATE_LIMIT	= 1 << 2,
+	MLX4_UPDATE_QP_SUPPORTED_ATTRS	= (1 << 3) - 1
 };
 
 enum mlx4_update_qp_params_flags {
@@ -438,6 +442,8 @@ enum mlx4_update_qp_params_flags {
 struct mlx4_update_qp_params {
 	u8	smac_index;
 	u32	flags;
+	u16	rate_unit;
+	u16	rate_val;
 };
 
 int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn,
-- 
cgit v1.2.3


From 6eada0110c8984477f5f1e57a0b7f7b2fc841e30 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 18 Mar 2015 14:05:33 -0700
Subject: netns: constify net_hash_mix() and various callers

const qualifiers ease code review by making clear
which objects are not written in a function.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/udp.h           |  2 +-
 include/net/inet_hashtables.h |  6 +++---
 include/net/netns/hash.h      |  4 ++--
 net/ipv4/devinet.c            |  2 +-
 net/ipv4/inet_hashtables.c    |  6 +++---
 net/ipv4/ping.c               |  6 +++---
 net/ipv4/udp.c                | 10 +++++-----
 net/ipv6/inet6_hashtables.c   | 10 +++++-----
 net/ipv6/udp.c                | 16 ++++++++--------
 9 files changed, 31 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/include/linux/udp.h b/include/linux/udp.h
index 247cfdcc4b08..87c094961bd5 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -34,7 +34,7 @@ static inline struct udphdr *inner_udp_hdr(const struct sk_buff *skb)
 
 #define UDP_HTABLE_SIZE_MIN		(CONFIG_BASE_SMALL ? 128 : 256)
 
-static inline int udp_hashfn(struct net *net, unsigned num, unsigned mask)
+static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask)
 {
 	return (num + net_hash_mix(net)) & mask;
 }
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index bcd64756e5fe..eb1963af0ebd 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -221,8 +221,8 @@ inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net,
 void inet_bind_bucket_destroy(struct kmem_cache *cachep,
 			      struct inet_bind_bucket *tb);
 
-static inline int inet_bhashfn(struct net *net, const __u16 lport,
-			       const int bhash_size)
+static inline u32 inet_bhashfn(const struct net *net, const __u16 lport,
+			       const u32 bhash_size)
 {
 	return (lport + net_hash_mix(net)) & (bhash_size - 1);
 }
@@ -231,7 +231,7 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
 		    const unsigned short snum);
 
 /* These can have wildcards, don't try too hard. */
-static inline int inet_lhashfn(struct net *net, const unsigned short num)
+static inline u32 inet_lhashfn(const struct net *net, const unsigned short num)
 {
 	return (num + net_hash_mix(net)) & (INET_LHTABLE_SIZE - 1);
 }
diff --git a/include/net/netns/hash.h b/include/net/netns/hash.h
index c06ac58ca107..69a6715d9f3f 100644
--- a/include/net/netns/hash.h
+++ b/include/net/netns/hash.h
@@ -5,7 +5,7 @@
 
 struct net;
 
-static inline unsigned int net_hash_mix(struct net *net)
+static inline u32 net_hash_mix(const struct net *net)
 {
 #ifdef CONFIG_NET_NS
 	/*
@@ -13,7 +13,7 @@ static inline unsigned int net_hash_mix(struct net *net)
 	 * always zeroed
 	 */
 
-	return (unsigned)(((unsigned long)net) >> L1_CACHE_SHIFT);
+	return (u32)(((unsigned long)net) >> L1_CACHE_SHIFT);
 #else
 	return 0;
 #endif
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 5105759e4e00..375dc71b9a64 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -107,7 +107,7 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
 
 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
 
-static u32 inet_addr_hash(struct net *net, __be32 addr)
+static u32 inet_addr_hash(const struct net *net, __be32 addr)
 {
 	u32 val = (__force u32) addr ^ net_hash_mix(net);
 
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index c28bca4cc15b..330a08bcd1c1 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -24,9 +24,9 @@
 #include <net/secure_seq.h>
 #include <net/ip.h>
 
-static unsigned int inet_ehashfn(struct net *net, const __be32 laddr,
-				 const __u16 lport, const __be32 faddr,
-				 const __be16 fport)
+static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
+			const __u16 lport, const __be32 faddr,
+			const __be16 fport)
 {
 	static u32 inet_ehash_secret __read_mostly;
 
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index fd88f868776f..344e7cdfb8d4 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -64,11 +64,11 @@ EXPORT_SYMBOL_GPL(pingv6_ops);
 
 static u16 ping_port_rover;
 
-static inline int ping_hashfn(struct net *net, unsigned int num, unsigned int mask)
+static inline u32 ping_hashfn(const struct net *net, u32 num, u32 mask)
 {
-	int res = (num + net_hash_mix(net)) & mask;
+	u32 res = (num + net_hash_mix(net)) & mask;
 
-	pr_debug("hash(%d) = %d\n", num, res);
+	pr_debug("hash(%u) = %u\n", num, res);
 	return res;
 }
 EXPORT_SYMBOL_GPL(ping_hash);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f27556e2158b..294af16633af 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -318,8 +318,8 @@ static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
 		   inet1->inet_rcv_saddr == inet2->inet_rcv_saddr));
 }
 
-static unsigned int udp4_portaddr_hash(struct net *net, __be32 saddr,
-				       unsigned int port)
+static u32 udp4_portaddr_hash(const struct net *net, __be32 saddr,
+			      unsigned int port)
 {
 	return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port;
 }
@@ -421,9 +421,9 @@ static inline int compute_score2(struct sock *sk, struct net *net,
 	return score;
 }
 
-static unsigned int udp_ehashfn(struct net *net, const __be32 laddr,
-				 const __u16 lport, const __be32 faddr,
-				 const __be16 fport)
+static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
+		       const __u16 lport, const __be32 faddr,
+		       const __be16 fport)
 {
 	static u32 udp_ehash_secret __read_mostly;
 
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 051dffb49c90..df7df99d1d7e 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -23,11 +23,11 @@
 #include <net/secure_seq.h>
 #include <net/ip.h>
 
-static unsigned int inet6_ehashfn(struct net *net,
-				  const struct in6_addr *laddr,
-				  const u16 lport,
-				  const struct in6_addr *faddr,
-				  const __be16 fport)
+static u32 inet6_ehashfn(const struct net *net,
+			 const struct in6_addr *laddr,
+			 const u16 lport,
+			 const struct in6_addr *faddr,
+			 const __be16 fport)
 {
 	static u32 inet6_ehash_secret __read_mostly;
 	static u32 ipv6_hash_secret __read_mostly;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 70568a4548e4..7fe0329c0d37 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -53,11 +53,11 @@
 #include <trace/events/skb.h>
 #include "udp_impl.h"
 
-static unsigned int udp6_ehashfn(struct net *net,
-				  const struct in6_addr *laddr,
-				  const u16 lport,
-				  const struct in6_addr *faddr,
-				  const __be16 fport)
+static u32 udp6_ehashfn(const struct net *net,
+			const struct in6_addr *laddr,
+			const u16 lport,
+			const struct in6_addr *faddr,
+			const __be16 fport)
 {
 	static u32 udp6_ehash_secret __read_mostly;
 	static u32 udp_ipv6_hash_secret __read_mostly;
@@ -104,9 +104,9 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
 	return 0;
 }
 
-static unsigned int udp6_portaddr_hash(struct net *net,
-				       const struct in6_addr *addr6,
-				       unsigned int port)
+static u32 udp6_portaddr_hash(const struct net *net,
+			      const struct in6_addr *addr6,
+			      unsigned int port)
 {
 	unsigned int hash, mix = net_hash_mix(net);
 
-- 
cgit v1.2.3


From 5b441f76f1b83591e8cd9d60ba1df3a2aacde27f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 18 Mar 2015 14:05:34 -0700
Subject: net: introduce sk_ehashfn() helper

Goal is to unify IPv4/IPv6 inet_hash handling, and use common helpers
for all kind of sockets (full sockets, timewait and request sockets)

inet_sk_ehashfn() becomes sk_ehashfn() but still only copes with IPv4

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_hashtables.h |  2 ++
 net/ipv4/inet_hashtables.c    | 17 ++++++-----------
 2 files changed, 8 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index eb1963af0ebd..ef993ef571ea 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -383,6 +383,8 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
 				     iph->daddr, dport, inet_iif(skb));
 }
 
+u32 sk_ehashfn(const struct sock *sk);
+
 int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 			struct sock *sk, u32 port_offset,
 			int (*check_established)(struct inet_timewait_death_row *,
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 330a08bcd1c1..3a86dfd7ae33 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -37,16 +37,11 @@ static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
 }
 
 
-static unsigned int inet_sk_ehashfn(const struct sock *sk)
+u32 sk_ehashfn(const struct sock *sk)
 {
-	const struct inet_sock *inet = inet_sk(sk);
-	const __be32 laddr = inet->inet_rcv_saddr;
-	const __u16 lport = inet->inet_num;
-	const __be32 faddr = inet->inet_daddr;
-	const __be16 fport = inet->inet_dport;
-	struct net *net = sock_net(sk);
-
-	return inet_ehashfn(net, laddr, lport, faddr, fport);
+	return inet_ehashfn(sock_net(sk),
+			    sk->sk_rcv_saddr, sk->sk_num,
+			    sk->sk_daddr, sk->sk_dport);
 }
 
 /*
@@ -407,13 +402,13 @@ int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw)
 {
 	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
 	struct hlist_nulls_head *list;
-	spinlock_t *lock;
 	struct inet_ehash_bucket *head;
+	spinlock_t *lock;
 	int twrefcnt = 0;
 
 	WARN_ON(!sk_unhashed(sk));
 
-	sk->sk_hash = inet_sk_ehashfn(sk);
+	sk->sk_hash = sk_ehashfn(sk);
 	head = inet_ehash_bucket(hashinfo, sk->sk_hash);
 	list = &head->chain;
 	lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
-- 
cgit v1.2.3


From d1e559d0b1b0d02f76a6bd5b768a99dc834ae926 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 18 Mar 2015 14:05:35 -0700
Subject: inet: add IPv6 support to sk_ehashfn()

Intent is to converge IPv4 & IPv6 inet_hash functions to
factorize code.

IPv4 sockets initialize sk_rcv_saddr and sk_v6_daddr
in this patch, thanks to new sk_daddr_set() and sk_rcv_saddr_set()
helpers.

__inet6_hash can now use sk_ehashfn() instead of a private
inet6_sk_ehashfn() and will simply use __inet_hash() in a
following patch.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_hashtables.h | 19 +++++++++++++++++++
 net/dccp/ipv4.c               |  9 ++++-----
 net/dccp/ipv6.c               | 10 ++--------
 net/ipv4/inet_hashtables.c    | 11 ++++++++++-
 net/ipv4/tcp_ipv4.c           |  8 ++++----
 net/ipv6/inet6_hashtables.c   | 22 ++++------------------
 net/ipv6/tcp_ipv6.c           | 11 ++---------
 7 files changed, 45 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index ef993ef571ea..06ad42182ec2 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -384,6 +384,25 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
 }
 
 u32 sk_ehashfn(const struct sock *sk);
+u32 inet6_ehashfn(const struct net *net,
+		  const struct in6_addr *laddr, const u16 lport,
+		  const struct in6_addr *faddr, const __be16 fport);
+
+static inline void sk_daddr_set(struct sock *sk, __be32 addr)
+{
+	sk->sk_daddr = addr; /* alias of inet_daddr */
+#if IS_ENABLED(CONFIG_IPV6)
+	ipv6_addr_set_v4mapped(addr, &sk->sk_v6_daddr);
+#endif
+}
+
+static inline void sk_rcv_saddr_set(struct sock *sk, __be32 addr)
+{
+	sk->sk_rcv_saddr = addr; /* alias of inet_rcv_saddr */
+#if IS_ENABLED(CONFIG_IPV6)
+	ipv6_addr_set_v4mapped(addr, &sk->sk_v6_rcv_saddr);
+#endif
+}
 
 int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 			struct sock *sk, u32 port_offset,
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index bf897829f4f0..f3f8906f482e 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -89,10 +89,9 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 
 	if (inet->inet_saddr == 0)
 		inet->inet_saddr = fl4->saddr;
-	inet->inet_rcv_saddr = inet->inet_saddr;
-
+	sk_rcv_saddr_set(sk, inet->inet_saddr);
 	inet->inet_dport = usin->sin_port;
-	inet->inet_daddr = daddr;
+	sk_daddr_set(sk, daddr);
 
 	inet_csk(sk)->icsk_ext_hdr_len = 0;
 	if (inet_opt)
@@ -408,8 +407,8 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	newinet		   = inet_sk(newsk);
 	ireq		   = inet_rsk(req);
-	newinet->inet_daddr	= ireq->ir_rmt_addr;
-	newinet->inet_rcv_saddr = ireq->ir_loc_addr;
+	sk_daddr_set(newsk, ireq->ir_rmt_addr);
+	sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
 	newinet->inet_saddr	= ireq->ir_loc_addr;
 	newinet->inet_opt	= ireq->opt;
 	ireq->opt	   = NULL;
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index d7e7c7b0a3f1..9216d173dd5f 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -470,11 +470,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 
 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
 
-		ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr);
-
-		ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
-
-		newsk->sk_v6_rcv_saddr = newnp->saddr;
+		newnp->saddr = newsk->sk_v6_rcv_saddr;
 
 		inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped;
 		newsk->sk_backlog_rcv = dccp_v4_do_rcv;
@@ -917,9 +913,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 			sk->sk_backlog_rcv = dccp_v6_do_rcv;
 			goto failure;
 		}
-		ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
-		ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &sk->sk_v6_rcv_saddr);
-
+		np->saddr = sk->sk_v6_rcv_saddr;
 		return err;
 	}
 
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 3a86dfd7ae33..ab7f677a97db 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -36,9 +36,18 @@ static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
 			      inet_ehash_secret + net_hash_mix(net));
 }
 
-
+/* This function handles inet_sock, but also timewait and request sockets
+ * for IPv4/IPv6.
+ */
 u32 sk_ehashfn(const struct sock *sk)
 {
+#if IS_ENABLED(CONFIG_IPV6)
+	if (sk->sk_family == AF_INET6 &&
+	    !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
+		return inet6_ehashfn(sock_net(sk),
+				     &sk->sk_v6_rcv_saddr, sk->sk_num,
+				     &sk->sk_v6_daddr, sk->sk_dport);
+#endif
 	return inet_ehashfn(sock_net(sk),
 			    sk->sk_rcv_saddr, sk->sk_num,
 			    sk->sk_daddr, sk->sk_dport);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 80067d5858b4..ca207df4af1c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -189,7 +189,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 
 	if (!inet->inet_saddr)
 		inet->inet_saddr = fl4->saddr;
-	inet->inet_rcv_saddr = inet->inet_saddr;
+	sk_rcv_saddr_set(sk, inet->inet_saddr);
 
 	if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
 		/* Reset inherited state */
@@ -204,7 +204,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		tcp_fetch_timewait_stamp(sk, &rt->dst);
 
 	inet->inet_dport = usin->sin_port;
-	inet->inet_daddr = daddr;
+	sk_daddr_set(sk, daddr);
 
 	inet_csk(sk)->icsk_ext_hdr_len = 0;
 	if (inet_opt)
@@ -1319,8 +1319,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	newtp		      = tcp_sk(newsk);
 	newinet		      = inet_sk(newsk);
 	ireq		      = inet_rsk(req);
-	newinet->inet_daddr   = ireq->ir_rmt_addr;
-	newinet->inet_rcv_saddr = ireq->ir_loc_addr;
+	sk_daddr_set(newsk, ireq->ir_rmt_addr);
+	sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
 	newinet->inet_saddr	      = ireq->ir_loc_addr;
 	inet_opt	      = ireq->opt;
 	rcu_assign_pointer(newinet->inet_opt, inet_opt);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index df7df99d1d7e..ed5787b20192 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -23,11 +23,9 @@
 #include <net/secure_seq.h>
 #include <net/ip.h>
 
-static u32 inet6_ehashfn(const struct net *net,
-			 const struct in6_addr *laddr,
-			 const u16 lport,
-			 const struct in6_addr *faddr,
-			 const __be16 fport)
+u32 inet6_ehashfn(const struct net *net,
+		  const struct in6_addr *laddr, const u16 lport,
+		  const struct in6_addr *faddr, const __be16 fport)
 {
 	static u32 inet6_ehash_secret __read_mostly;
 	static u32 ipv6_hash_secret __read_mostly;
@@ -44,18 +42,6 @@ static u32 inet6_ehashfn(const struct net *net,
 			       inet6_ehash_secret + net_hash_mix(net));
 }
 
-static int inet6_sk_ehashfn(const struct sock *sk)
-{
-	const struct inet_sock *inet = inet_sk(sk);
-	const struct in6_addr *laddr = &sk->sk_v6_rcv_saddr;
-	const struct in6_addr *faddr = &sk->sk_v6_daddr;
-	const __u16 lport = inet->inet_num;
-	const __be16 fport = inet->inet_dport;
-	struct net *net = sock_net(sk);
-
-	return inet6_ehashfn(net, laddr, lport, faddr, fport);
-}
-
 int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw)
 {
 	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
@@ -75,7 +61,7 @@ int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw)
 		struct hlist_nulls_head *list;
 		spinlock_t *lock;
 
-		sk->sk_hash = hash = inet6_sk_ehashfn(sk);
+		sk->sk_hash = hash = sk_ehashfn(sk);
 		list = &inet_ehash_bucket(hashinfo, hash)->chain;
 		lock = inet_ehash_lockp(hashinfo, hash);
 		spin_lock(lock);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e4761b22307b..5546df074583 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -233,11 +233,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 			tp->af_specific = &tcp_sock_ipv6_specific;
 #endif
 			goto failure;
-		} else {
-			ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
-			ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
-					       &sk->sk_v6_rcv_saddr);
 		}
+		np->saddr = sk->sk_v6_rcv_saddr;
 
 		return err;
 	}
@@ -1078,11 +1075,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
 
-		ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr);
-
-		ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
-
-		newsk->sk_v6_rcv_saddr = newnp->saddr;
+		newnp->saddr = newsk->sk_v6_rcv_saddr;
 
 		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
-- 
cgit v1.2.3


From 77a6a471bc18763cb2e80a8cc92f4c04eae37d32 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 18 Mar 2015 14:05:36 -0700
Subject: ipv6: get rid of __inet6_hash()

We can now use inet_hash() and __inet_hash() instead of private
functions.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet6_hashtables.h |  2 --
 include/net/inet_hashtables.h  |  1 +
 net/dccp/ipv6.c                | 17 ++---------------
 net/ipv4/inet_hashtables.c     | 12 ++++++------
 net/ipv6/inet6_hashtables.c    | 38 +-------------------------------------
 net/ipv6/tcp_ipv6.c            | 17 ++---------------
 6 files changed, 12 insertions(+), 75 deletions(-)

(limited to 'include')

diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index 9201afe083fa..7ff588ca6817 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -38,8 +38,6 @@ static inline unsigned int __inet6_ehashfn(const u32 lhash,
 	return jhash_3words(lhash, fhash, ports, initval);
 }
 
-int __inet6_hash(struct sock *sk, struct inet_timewait_sock *twp);
-
 /*
  * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
  * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 06ad42182ec2..eee6c3399990 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -249,6 +249,7 @@ void inet_put_port(struct sock *sk);
 void inet_hashinfo_init(struct inet_hashinfo *h);
 
 int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw);
+int __inet_hash(struct sock *sk, struct inet_timewait_sock *tw);
 void inet_hash(struct sock *sk);
 void inet_unhash(struct sock *sk);
 
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 9216d173dd5f..c655de5f67c9 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -40,19 +40,6 @@
 static const struct inet_connection_sock_af_ops dccp_ipv6_mapped;
 static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops;
 
-static void dccp_v6_hash(struct sock *sk)
-{
-	if (sk->sk_state != DCCP_CLOSED) {
-		if (inet_csk(sk)->icsk_af_ops == &dccp_ipv6_mapped) {
-			inet_hash(sk);
-			return;
-		}
-		local_bh_disable();
-		__inet6_hash(sk, NULL);
-		local_bh_enable();
-	}
-}
-
 /* add pseudo-header to DCCP checksum stored in skb->csum */
 static inline __sum16 dccp_v6_csum_finish(struct sk_buff *skb,
 				      const struct in6_addr *saddr,
@@ -588,7 +575,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 		dccp_done(newsk);
 		goto out;
 	}
-	__inet6_hash(newsk, NULL);
+	__inet_hash(newsk, NULL);
 
 	return newsk;
 
@@ -1056,7 +1043,7 @@ static struct proto dccp_v6_prot = {
 	.sendmsg	   = dccp_sendmsg,
 	.recvmsg	   = dccp_recvmsg,
 	.backlog_rcv	   = dccp_v6_do_rcv,
-	.hash		   = dccp_v6_hash,
+	.hash		   = inet_hash,
 	.unhash		   = inet_unhash,
 	.accept		   = inet_csk_accept,
 	.get_port	   = inet_csk_get_port,
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index ab7f677a97db..82753bd57e79 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -434,15 +434,13 @@ int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw)
 }
 EXPORT_SYMBOL_GPL(__inet_hash_nolisten);
 
-static void __inet_hash(struct sock *sk)
+int __inet_hash(struct sock *sk, struct inet_timewait_sock *tw)
 {
 	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
 	struct inet_listen_hashbucket *ilb;
 
-	if (sk->sk_state != TCP_LISTEN) {
-		__inet_hash_nolisten(sk, NULL);
-		return;
-	}
+	if (sk->sk_state != TCP_LISTEN)
+		return __inet_hash_nolisten(sk, tw);
 
 	WARN_ON(!sk_unhashed(sk));
 	ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
@@ -451,13 +449,15 @@ static void __inet_hash(struct sock *sk)
 	__sk_nulls_add_node_rcu(sk, &ilb->head);
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 	spin_unlock(&ilb->lock);
+	return 0;
 }
+EXPORT_SYMBOL(__inet_hash);
 
 void inet_hash(struct sock *sk)
 {
 	if (sk->sk_state != TCP_CLOSE) {
 		local_bh_disable();
-		__inet_hash(sk);
+		__inet_hash(sk, NULL);
 		local_bh_enable();
 	}
 }
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index ed5787b20192..b86b429f5f81 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -42,42 +42,6 @@ u32 inet6_ehashfn(const struct net *net,
 			       inet6_ehash_secret + net_hash_mix(net));
 }
 
-int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw)
-{
-	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
-	int twrefcnt = 0;
-
-	WARN_ON(!sk_unhashed(sk));
-
-	if (sk->sk_state == TCP_LISTEN) {
-		struct inet_listen_hashbucket *ilb;
-
-		ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
-		spin_lock(&ilb->lock);
-		__sk_nulls_add_node_rcu(sk, &ilb->head);
-		spin_unlock(&ilb->lock);
-	} else {
-		unsigned int hash;
-		struct hlist_nulls_head *list;
-		spinlock_t *lock;
-
-		sk->sk_hash = hash = sk_ehashfn(sk);
-		list = &inet_ehash_bucket(hashinfo, hash)->chain;
-		lock = inet_ehash_lockp(hashinfo, hash);
-		spin_lock(lock);
-		__sk_nulls_add_node_rcu(sk, list);
-		if (tw) {
-			WARN_ON(sk->sk_hash != tw->tw_hash);
-			twrefcnt = inet_twsk_unhash(tw);
-		}
-		spin_unlock(lock);
-	}
-
-	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
-	return twrefcnt;
-}
-EXPORT_SYMBOL(__inet6_hash);
-
 /*
  * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
  * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
@@ -306,6 +270,6 @@ int inet6_hash_connect(struct inet_timewait_death_row *death_row,
 		       struct sock *sk)
 {
 	return __inet_hash_connect(death_row, sk, inet6_sk_port_offset(sk),
-			__inet6_check_established, __inet6_hash);
+			__inet6_check_established, __inet_hash_nolisten);
 }
 EXPORT_SYMBOL_GPL(inet6_hash_connect);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5546df074583..720676d073d9 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -104,19 +104,6 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
 	}
 }
 
-static void tcp_v6_hash(struct sock *sk)
-{
-	if (sk->sk_state != TCP_CLOSE) {
-		if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
-			tcp_prot.hash(sk);
-			return;
-		}
-		local_bh_disable();
-		__inet6_hash(sk, NULL);
-		local_bh_enable();
-	}
-}
-
 static __u32 tcp_v6_init_sequence(const struct sk_buff *skb)
 {
 	return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
@@ -1224,7 +1211,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		tcp_done(newsk);
 		goto out;
 	}
-	__inet6_hash(newsk, NULL);
+	__inet_hash(newsk, NULL);
 
 	return newsk;
 
@@ -1883,7 +1870,7 @@ struct proto tcpv6_prot = {
 	.sendpage		= tcp_sendpage,
 	.backlog_rcv		= tcp_v6_do_rcv,
 	.release_cb		= tcp_release_cb,
-	.hash			= tcp_v6_hash,
+	.hash			= inet_hash,
 	.unhash			= inet_unhash,
 	.get_port		= inet_csk_get_port,
 	.enter_memory_pressure	= tcp_enter_memory_pressure,
-- 
cgit v1.2.3


From b4d6444ea3b50bf368639432657bcf2b4e5e1062 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 18 Mar 2015 14:05:37 -0700
Subject: inet: get rid of last __inet_hash_connect() argument

We now always call __inet_hash_nolisten(), no need to pass it
as an argument.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_hashtables.h | 4 +---
 net/ipv4/inet_hashtables.c    | 9 ++++-----
 net/ipv6/inet6_hashtables.c   | 2 +-
 3 files changed, 6 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index eee6c3399990..73fe0f9525d9 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -409,9 +409,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 			struct sock *sk, u32 port_offset,
 			int (*check_established)(struct inet_timewait_death_row *,
 						 struct sock *, __u16,
-						 struct inet_timewait_sock **),
-			int (*hash)(struct sock *sk,
-				    struct inet_timewait_sock *twp));
+						 struct inet_timewait_sock **));
 
 int inet_hash_connect(struct inet_timewait_death_row *death_row,
 		      struct sock *sk);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 82753bd57e79..0fb841b9d834 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -488,8 +488,7 @@ EXPORT_SYMBOL_GPL(inet_unhash);
 int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 		struct sock *sk, u32 port_offset,
 		int (*check_established)(struct inet_timewait_death_row *,
-			struct sock *, __u16, struct inet_timewait_sock **),
-		int (*hash)(struct sock *sk, struct inet_timewait_sock *twp))
+			struct sock *, __u16, struct inet_timewait_sock **))
 {
 	struct inet_hashinfo *hinfo = death_row->hashinfo;
 	const unsigned short snum = inet_sk(sk)->inet_num;
@@ -559,7 +558,7 @@ ok:
 		inet_bind_hash(sk, tb, port);
 		if (sk_unhashed(sk)) {
 			inet_sk(sk)->inet_sport = htons(port);
-			twrefcnt += hash(sk, tw);
+			twrefcnt += __inet_hash_nolisten(sk, tw);
 		}
 		if (tw)
 			twrefcnt += inet_twsk_bind_unhash(tw, hinfo);
@@ -581,7 +580,7 @@ ok:
 	tb  = inet_csk(sk)->icsk_bind_hash;
 	spin_lock_bh(&head->lock);
 	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
-		hash(sk, NULL);
+		__inet_hash_nolisten(sk, NULL);
 		spin_unlock_bh(&head->lock);
 		return 0;
 	} else {
@@ -601,7 +600,7 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row,
 		      struct sock *sk)
 {
 	return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk),
-			__inet_check_established, __inet_hash_nolisten);
+				   __inet_check_established);
 }
 EXPORT_SYMBOL_GPL(inet_hash_connect);
 
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index b86b429f5f81..033f17816ef4 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -270,6 +270,6 @@ int inet6_hash_connect(struct inet_timewait_death_row *death_row,
 		       struct sock *sk)
 {
 	return __inet_hash_connect(death_row, sk, inet6_sk_port_offset(sk),
-			__inet6_check_established, __inet_hash_nolisten);
+				   __inet6_check_established);
 }
 EXPORT_SYMBOL_GPL(inet6_hash_connect);
-- 
cgit v1.2.3


From 08d2cc3b26554cae21f279b520ae5c2a3b2be421 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 18 Mar 2015 14:05:38 -0700
Subject: inet: request sock should init IPv6/IPv4 addresses

In order to be able to use sk_ehashfn() for request socks,
we need to initialize their IPv6/IPv4 addresses.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/request_sock.h | 5 +++++
 net/dccp/ipv4.c            | 4 ++--
 net/ipv4/inet_diag.c       | 4 ++--
 net/ipv4/syncookies.c      | 4 ++--
 net/ipv4/tcp_ipv4.c        | 8 ++++----
 5 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 3fa4f824900a..e7ef86340514 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -91,6 +91,11 @@ static inline struct request_sock *inet_reqsk(struct sock *sk)
 	return (struct request_sock *)sk;
 }
 
+static inline struct sock *req_to_sk(struct request_sock *req)
+{
+	return (struct sock *)req;
+}
+
 static inline void reqsk_free(struct request_sock *req)
 {
 	/* temporary debugging */
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index f3f8906f482e..e7ad291cd96b 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -638,8 +638,8 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		goto drop_and_free;
 
 	ireq = inet_rsk(req);
-	ireq->ir_loc_addr = ip_hdr(skb)->daddr;
-	ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
+	sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
+	sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
 	ireq->ireq_family = AF_INET;
 	ireq->ir_iif = sk->sk_bound_dev_if;
 
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index e7ba59038c8d..74c39c9f3e11 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -742,14 +742,14 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
 
 			if (bc) {
 				/* Note: entry.sport and entry.userlocks are already set */
-				entry_fill_addrs(&entry, (struct sock *)req);
+				entry_fill_addrs(&entry, req_to_sk(req));
 				entry.dport = ntohs(ireq->ir_rmt_port);
 
 				if (!inet_diag_bc_run(bc, &entry))
 					continue;
 			}
 
-			err = inet_req_diag_fill((struct sock *)req, skb,
+			err = inet_req_diag_fill(req_to_sk(req), skb,
 						 NETLINK_CB(cb->skb).portid,
 						 cb->nlh->nlmsg_seq,
 						 NLM_F_MULTI, cb->nlh);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 34e755403715..ef01d8570358 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -337,8 +337,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 	req->mss		= mss;
 	ireq->ir_num		= ntohs(th->dest);
 	ireq->ir_rmt_port	= th->source;
-	ireq->ir_loc_addr	= ip_hdr(skb)->daddr;
-	ireq->ir_rmt_addr	= ip_hdr(skb)->saddr;
+	sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
+	sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
 	ireq->ir_mark		= inet_request_mark(sk, skb);
 	ireq->snd_wscale	= tcp_opt.snd_wscale;
 	ireq->sack_ok		= tcp_opt.sack_ok;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ca207df4af1c..ddd0b1f25b96 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1219,14 +1219,14 @@ static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
 
 #endif
 
-static void tcp_v4_init_req(struct request_sock *req, struct sock *sk,
+static void tcp_v4_init_req(struct request_sock *req, struct sock *sk_listener,
 			    struct sk_buff *skb)
 {
 	struct inet_request_sock *ireq = inet_rsk(req);
 
-	ireq->ir_loc_addr = ip_hdr(skb)->daddr;
-	ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
-	ireq->no_srccheck = inet_sk(sk)->transparent;
+	sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
+	sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
+	ireq->no_srccheck = inet_sk(sk_listener)->transparent;
 	ireq->opt = tcp_v4_save_options(skb);
 	ireq->ireq_family = AF_INET;
 }
-- 
cgit v1.2.3


From 54ff9ef36bdf84d469a098cbf8e2a103fbc77054 Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Wed, 18 Mar 2015 14:50:43 -0300
Subject: ipv4, ipv6: kill ip_mc_{join, leave}_group and ipv6_sock_mc_{join,
 drop}

in favor of their inner __ ones, which doesn't grab rtnl.

As these functions need to operate on a locked socket, we can't be
grabbing rtnl by then. It's too late and doing so causes reversed
locking.

So this patch:
- move rtnl handling to callers instead while already fixing some
  reversed locking situations, like on vxlan and ipvs code.
- renames __ ones to not have the __ mark:
  __ip_mc_{join,leave}_group -> ip_mc_{join,leave}_group
  __ipv6_sock_mc_{join,drop} -> ipv6_sock_mc_{join,drop}

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c             |  5 ++++-
 include/linux/igmp.h            |  2 --
 include/net/ipv6.h              |  4 ----
 net/ipv4/devinet.c              |  4 ++--
 net/ipv4/igmp.c                 | 41 ++++++++---------------------------------
 net/ipv4/ip_sockglue.c          | 21 +++++++++++++++------
 net/ipv6/addrconf.c             |  4 ++--
 net/ipv6/ipv6_sockglue.c        | 21 +++++++++++++--------
 net/ipv6/mcast.c                | 30 +++---------------------------
 net/netfilter/ipvs/ip_vs_sync.c |  2 ++
 net/tipc/udp_media.c            |  4 ++--
 11 files changed, 51 insertions(+), 87 deletions(-)

(limited to 'include')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 25d92d4fc625..8b8ca7492d56 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1097,7 +1097,6 @@ EXPORT_SYMBOL_GPL(vxlan_sock_release);
 
 /* Callback to update multicast group membership when first VNI on
  * multicast asddress is brought up
- * Done as workqueue because ip_mc_join_group acquires RTNL.
  */
 static void vxlan_igmp_join(struct work_struct *work)
 {
@@ -1107,6 +1106,7 @@ static void vxlan_igmp_join(struct work_struct *work)
 	union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
 	int ifindex = vxlan->default_dst.remote_ifindex;
 
+	rtnl_lock();
 	lock_sock(sk);
 	if (ip->sa.sa_family == AF_INET) {
 		struct ip_mreqn mreq = {
@@ -1122,6 +1122,7 @@ static void vxlan_igmp_join(struct work_struct *work)
 #endif
 	}
 	release_sock(sk);
+	rtnl_unlock();
 
 	vxlan_sock_release(vs);
 	dev_put(vxlan->dev);
@@ -1136,6 +1137,7 @@ static void vxlan_igmp_leave(struct work_struct *work)
 	union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
 	int ifindex = vxlan->default_dst.remote_ifindex;
 
+	rtnl_lock();
 	lock_sock(sk);
 	if (ip->sa.sa_family == AF_INET) {
 		struct ip_mreqn mreq = {
@@ -1152,6 +1154,7 @@ static void vxlan_igmp_leave(struct work_struct *work)
 	}
 
 	release_sock(sk);
+	rtnl_unlock();
 
 	vxlan_sock_release(vs);
 	dev_put(vxlan->dev);
diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index b5a6470e686c..2c677afeea47 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -111,9 +111,7 @@ struct ip_mc_list {
 
 extern int ip_check_mc_rcu(struct in_device *dev, __be32 mc_addr, __be32 src_addr, u16 proto);
 extern int igmp_rcv(struct sk_buff *);
-extern int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr);
 extern int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr);
-extern int __ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr);
 extern int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr);
 extern void ip_mc_drop_socket(struct sock *sk);
 extern int ip_mc_source(int add, int omode, struct sock *sk,
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index b7673065c074..e7ba9758a345 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -942,10 +942,6 @@ void ipv6_sysctl_unregister(void);
 
 int ipv6_sock_mc_join(struct sock *sk, int ifindex,
 		      const struct in6_addr *addr);
-int __ipv6_sock_mc_join(struct sock *sk, int ifindex,
-			const struct in6_addr *addr);
 int ipv6_sock_mc_drop(struct sock *sk, int ifindex,
 		      const struct in6_addr *addr);
-int __ipv6_sock_mc_drop(struct sock *sk, int ifindex,
-			const struct in6_addr *addr);
 #endif /* _NET_IPV6_H */
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 375dc71b9a64..975ee5e30c64 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -560,9 +560,9 @@ static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
 
 	lock_sock(sk);
 	if (join)
-		ret = __ip_mc_join_group(sk, &mreq);
+		ret = ip_mc_join_group(sk, &mreq);
 	else
-		ret = __ip_mc_leave_group(sk, &mreq);
+		ret = ip_mc_leave_group(sk, &mreq);
 	release_sock(sk);
 
 	return ret;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 5cb1ef4ce292..ad3f866085de 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1850,7 +1850,10 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc)
 	pmc->sfcount[MCAST_EXCLUDE] = 1;
 }
 
-int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr)
+/* Join a multicast group
+ */
+
+int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr)
 {
 	__be32 addr = imr->imr_multiaddr.s_addr;
 	struct ip_mc_socklist *iml, *i;
@@ -1898,20 +1901,6 @@ int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr)
 done:
 	return err;
 }
-EXPORT_SYMBOL(__ip_mc_join_group);
-
-/* Join a multicast group
- */
-int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr)
-{
-	int ret;
-
-	rtnl_lock();
-	ret = __ip_mc_join_group(sk, imr);
-	rtnl_unlock();
-
-	return ret;
-}
 EXPORT_SYMBOL(ip_mc_join_group);
 
 static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
@@ -1934,7 +1923,7 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
 	return err;
 }
 
-int __ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
+int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct ip_mc_socklist *iml;
@@ -1979,18 +1968,6 @@ int __ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
 out:
 	return ret;
 }
-EXPORT_SYMBOL(__ip_mc_leave_group);
-
-int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
-{
-	int ret;
-
-	rtnl_lock();
-	ret = __ip_mc_leave_group(sk, imr);
-	rtnl_unlock();
-
-	return ret;
-}
 EXPORT_SYMBOL(ip_mc_leave_group);
 
 int ip_mc_source(int add, int omode, struct sock *sk, struct
@@ -2010,7 +1987,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
 	if (!ipv4_is_multicast(addr))
 		return -EINVAL;
 
-	rtnl_lock();
+	ASSERT_RTNL();
 
 	imr.imr_multiaddr.s_addr = mreqs->imr_multiaddr;
 	imr.imr_address.s_addr = mreqs->imr_interface;
@@ -2124,9 +2101,8 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
 	ip_mc_add_src(in_dev, &mreqs->imr_multiaddr, omode, 1,
 		&mreqs->imr_sourceaddr, 1);
 done:
-	rtnl_unlock();
 	if (leavegroup)
-		return ip_mc_leave_group(sk, &imr);
+		err = ip_mc_leave_group(sk, &imr);
 	return err;
 }
 
@@ -2148,7 +2124,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
 	    msf->imsf_fmode != MCAST_EXCLUDE)
 		return -EINVAL;
 
-	rtnl_lock();
+	ASSERT_RTNL();
 
 	imr.imr_multiaddr.s_addr = msf->imsf_multiaddr;
 	imr.imr_address.s_addr = msf->imsf_interface;
@@ -2210,7 +2186,6 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
 	pmc->sfmode = msf->imsf_fmode;
 	err = 0;
 done:
-	rtnl_unlock();
 	if (leavegroup)
 		err = ip_mc_leave_group(sk, &imr);
 	return err;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 5171709199f4..f6a0d54b308a 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -541,9 +541,18 @@ static bool setsockopt_needs_rtnl(int optname)
 	switch (optname) {
 	case IP_ADD_MEMBERSHIP:
 	case IP_ADD_SOURCE_MEMBERSHIP:
+	case IP_BLOCK_SOURCE:
 	case IP_DROP_MEMBERSHIP:
+	case IP_DROP_SOURCE_MEMBERSHIP:
+	case IP_MSFILTER:
+	case IP_UNBLOCK_SOURCE:
+	case MCAST_BLOCK_SOURCE:
+	case MCAST_MSFILTER:
 	case MCAST_JOIN_GROUP:
+	case MCAST_JOIN_SOURCE_GROUP:
 	case MCAST_LEAVE_GROUP:
+	case MCAST_LEAVE_SOURCE_GROUP:
+	case MCAST_UNBLOCK_SOURCE:
 		return true;
 	}
 	return false;
@@ -861,9 +870,9 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 		}
 
 		if (optname == IP_ADD_MEMBERSHIP)
-			err = __ip_mc_join_group(sk, &mreq);
+			err = ip_mc_join_group(sk, &mreq);
 		else
-			err = __ip_mc_leave_group(sk, &mreq);
+			err = ip_mc_leave_group(sk, &mreq);
 		break;
 	}
 	case IP_MSFILTER:
@@ -928,7 +937,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 			mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr;
 			mreq.imr_address.s_addr = mreqs.imr_interface;
 			mreq.imr_ifindex = 0;
-			err = __ip_mc_join_group(sk, &mreq);
+			err = ip_mc_join_group(sk, &mreq);
 			if (err && err != -EADDRINUSE)
 				break;
 			omode = MCAST_INCLUDE;
@@ -960,9 +969,9 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 		mreq.imr_ifindex = greq.gr_interface;
 
 		if (optname == MCAST_JOIN_GROUP)
-			err = __ip_mc_join_group(sk, &mreq);
+			err = ip_mc_join_group(sk, &mreq);
 		else
-			err = __ip_mc_leave_group(sk, &mreq);
+			err = ip_mc_leave_group(sk, &mreq);
 		break;
 	}
 	case MCAST_JOIN_SOURCE_GROUP:
@@ -1005,7 +1014,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 			mreq.imr_multiaddr = psin->sin_addr;
 			mreq.imr_address.s_addr = 0;
 			mreq.imr_ifindex = greqs.gsr_interface;
-			err = __ip_mc_join_group(sk, &mreq);
+			err = ip_mc_join_group(sk, &mreq);
 			if (err && err != -EADDRINUSE)
 				break;
 			greqs.gsr_interface = mreq.imr_ifindex;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 88d2cf0cae52..158378e73f0a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2473,9 +2473,9 @@ static int ipv6_mc_config(struct sock *sk, bool join,
 
 	lock_sock(sk);
 	if (join)
-		ret = __ipv6_sock_mc_join(sk, ifindex, addr);
+		ret = ipv6_sock_mc_join(sk, ifindex, addr);
 	else
-		ret = __ipv6_sock_mc_drop(sk, ifindex, addr);
+		ret = ipv6_sock_mc_drop(sk, ifindex, addr);
 	release_sock(sk);
 
 	return ret;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index f2b731df8d77..cc5883791bac 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -124,6 +124,11 @@ static bool setsockopt_needs_rtnl(int optname)
 	case IPV6_DROP_MEMBERSHIP:
 	case MCAST_JOIN_GROUP:
 	case MCAST_LEAVE_GROUP:
+	case MCAST_JOIN_SOURCE_GROUP:
+	case MCAST_LEAVE_SOURCE_GROUP:
+	case MCAST_BLOCK_SOURCE:
+	case MCAST_UNBLOCK_SOURCE:
+	case MCAST_MSFILTER:
 		return true;
 	}
 	return false;
@@ -597,9 +602,9 @@ done:
 			break;
 
 		if (optname == IPV6_ADD_MEMBERSHIP)
-			retv = __ipv6_sock_mc_join(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr);
+			retv = ipv6_sock_mc_join(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr);
 		else
-			retv = __ipv6_sock_mc_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr);
+			retv = ipv6_sock_mc_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr);
 		break;
 	}
 	case IPV6_JOIN_ANYCAST:
@@ -638,11 +643,11 @@ done:
 		}
 		psin6 = (struct sockaddr_in6 *)&greq.gr_group;
 		if (optname == MCAST_JOIN_GROUP)
-			retv = __ipv6_sock_mc_join(sk, greq.gr_interface,
-						   &psin6->sin6_addr);
+			retv = ipv6_sock_mc_join(sk, greq.gr_interface,
+						 &psin6->sin6_addr);
 		else
-			retv = __ipv6_sock_mc_drop(sk, greq.gr_interface,
-						   &psin6->sin6_addr);
+			retv = ipv6_sock_mc_drop(sk, greq.gr_interface,
+						 &psin6->sin6_addr);
 		break;
 	}
 	case MCAST_JOIN_SOURCE_GROUP:
@@ -674,8 +679,8 @@ done:
 			struct sockaddr_in6 *psin6;
 
 			psin6 = (struct sockaddr_in6 *)&greqs.gsr_group;
-			retv = __ipv6_sock_mc_join(sk, greqs.gsr_interface,
-						   &psin6->sin6_addr);
+			retv = ipv6_sock_mc_join(sk, greqs.gsr_interface,
+						 &psin6->sin6_addr);
 			/* prior join w/ different source is ok */
 			if (retv && retv != -EADDRINUSE)
 				break;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 1dd1fedff9f4..cbb66fd3da6d 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -132,7 +132,7 @@ static int unsolicited_report_interval(struct inet6_dev *idev)
 	return iv > 0 ? iv : 1;
 }
 
-int __ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
+int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 {
 	struct net_device *dev = NULL;
 	struct ipv6_mc_socklist *mc_lst;
@@ -199,24 +199,12 @@ int __ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *add
 
 	return 0;
 }
-EXPORT_SYMBOL(__ipv6_sock_mc_join);
-
-int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
-{
-	int ret;
-
-	rtnl_lock();
-	ret = __ipv6_sock_mc_join(sk, ifindex, addr);
-	rtnl_unlock();
-
-	return ret;
-}
 EXPORT_SYMBOL(ipv6_sock_mc_join);
 
 /*
  *	socket leave on multicast group
  */
-int __ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
+int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct ipv6_mc_socklist *mc_lst;
@@ -255,18 +243,6 @@ int __ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *add
 
 	return -EADDRNOTAVAIL;
 }
-EXPORT_SYMBOL(__ipv6_sock_mc_drop);
-
-int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
-{
-	int ret;
-
-	rtnl_lock();
-	ret = __ipv6_sock_mc_drop(sk, ifindex, addr);
-	rtnl_unlock();
-
-	return ret;
-}
 EXPORT_SYMBOL(ipv6_sock_mc_drop);
 
 /* called with rcu_read_lock() */
@@ -460,7 +436,7 @@ done:
 	read_unlock_bh(&idev->lock);
 	rcu_read_unlock();
 	if (leavegroup)
-		return ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group);
+		err = ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group);
 	return err;
 }
 
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 08d95559b6f7..19b9cce6c210 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1405,9 +1405,11 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
 
 	mreq.imr_ifindex = dev->ifindex;
 
+	rtnl_lock();
 	lock_sock(sk);
 	ret = ip_mc_join_group(sk, &mreq);
 	release_sock(sk);
+	rtnl_unlock();
 
 	return ret;
 }
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index fc2fb11a354d..04836dd70c2b 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -246,11 +246,11 @@ static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote)
 			return 0;
 		mreqn.imr_multiaddr = remote->ipv4;
 		mreqn.imr_ifindex = ub->ifindex;
-		err = __ip_mc_join_group(sk, &mreqn);
+		err = ip_mc_join_group(sk, &mreqn);
 	} else {
 		if (!ipv6_addr_is_multicast(&remote->ipv6))
 			return 0;
-		err = __ipv6_sock_mc_join(sk, ub->ifindex, &remote->ipv6);
+		err = ipv6_sock_mc_join(sk, ub->ifindex, &remote->ipv6);
 	}
 	return err;
 }
-- 
cgit v1.2.3


From db24a9044ee191c397dcd1c6574f56d67d7c8df5 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 17 Mar 2015 20:23:15 -0600
Subject: net: add support for phys_port_name

Similar to port id allow netdevices to specify port names and export
the name via sysfs. Drivers can implement the netdevice operation to
assist udev in having sane default names for the devices using the
rule:

$ cat /etc/udev/rules.d/80-net-setup-link.rules
SUBSYSTEM=="net", ACTION=="add", ATTR{phys_port_name}!="",
NAME="$attr{phys_port_name}"

Use of phys_name versus phys_id was suggested-by Jiri Pirko.

Signed-off-by: David Ahern <dsahern@gmail.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/ABI/testing/sysfs-class-net |  8 ++++++++
 include/linux/netdevice.h                 |  4 ++++
 include/uapi/linux/if_link.h              |  1 +
 net/core/dev.c                            | 18 ++++++++++++++++++
 net/core/net-sysfs.c                      | 23 +++++++++++++++++++++++
 net/core/rtnetlink.c                      | 21 +++++++++++++++++++++
 6 files changed, 75 insertions(+)

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net
index beb8ec4dabbc..5ecfd72ba684 100644
--- a/Documentation/ABI/testing/sysfs-class-net
+++ b/Documentation/ABI/testing/sysfs-class-net
@@ -188,6 +188,14 @@ Description:
 		Indicates the interface unique physical port identifier within
 		the NIC, as a string.
 
+What:		/sys/class/net/<iface>/phys_port_name
+Date:		March 2015
+KernelVersion:	4.0
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the interface physical port name within the NIC,
+		as a string.
+
 What:		/sys/class/net/<iface>/speed
 Date:		October 2009
 KernelVersion:	2.6.33
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 76c5de4978a8..ec8f9b5f6500 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1164,6 +1164,8 @@ struct net_device_ops {
 						      bool new_carrier);
 	int			(*ndo_get_phys_port_id)(struct net_device *dev,
 							struct netdev_phys_item_id *ppid);
+	int			(*ndo_get_phys_port_name)(struct net_device *dev,
+							  char *name, size_t len);
 	void			(*ndo_add_vxlan_port)(struct  net_device *dev,
 						      sa_family_t sa_family,
 						      __be16 port);
@@ -2947,6 +2949,8 @@ int dev_set_mac_address(struct net_device *, struct sockaddr *);
 int dev_change_carrier(struct net_device *, bool new_carrier);
 int dev_get_phys_port_id(struct net_device *dev,
 			 struct netdev_phys_item_id *ppid);
+int dev_get_phys_port_name(struct net_device *dev,
+			   char *name, size_t len);
 struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev);
 struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 				    struct netdev_queue *txq, int *ret);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 756436e1ce89..7158fd00a109 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -147,6 +147,7 @@ enum {
 	IFLA_CARRIER_CHANGES,
 	IFLA_PHYS_SWITCH_ID,
 	IFLA_LINK_NETNSID,
+	IFLA_PHYS_PORT_NAME,
 	__IFLA_MAX
 };
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 39fe369b46ad..a1f24151db5b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5911,6 +5911,24 @@ int dev_get_phys_port_id(struct net_device *dev,
 }
 EXPORT_SYMBOL(dev_get_phys_port_id);
 
+/**
+ *	dev_get_phys_port_name - Get device physical port name
+ *	@dev: device
+ *	@name: port name
+ *
+ *	Get device physical port name
+ */
+int dev_get_phys_port_name(struct net_device *dev,
+			   char *name, size_t len)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	if (!ops->ndo_get_phys_port_name)
+		return -EOPNOTSUPP;
+	return ops->ndo_get_phys_port_name(dev, name, len);
+}
+EXPORT_SYMBOL(dev_get_phys_port_name);
+
 /**
  *	dev_new_index	-	allocate an ifindex
  *	@net: the applicable net namespace
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 7e58bd7ec232..cc5cf689809c 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -418,6 +418,28 @@ static ssize_t phys_port_id_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(phys_port_id);
 
+static ssize_t phys_port_name_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct net_device *netdev = to_net_dev(dev);
+	ssize_t ret = -EINVAL;
+
+	if (!rtnl_trylock())
+		return restart_syscall();
+
+	if (dev_isalive(netdev)) {
+		char name[IFNAMSIZ];
+
+		ret = dev_get_phys_port_name(netdev, name, sizeof(name));
+		if (!ret)
+			ret = sprintf(buf, "%s\n", name);
+	}
+	rtnl_unlock();
+
+	return ret;
+}
+static DEVICE_ATTR_RO(phys_port_name);
+
 static ssize_t phys_switch_id_show(struct device *dev,
 				   struct device_attribute *attr, char *buf)
 {
@@ -465,6 +487,7 @@ static struct attribute *net_class_attrs[] = {
 	&dev_attr_tx_queue_len.attr,
 	&dev_attr_gro_flush_timeout.attr,
 	&dev_attr_phys_port_id.attr,
+	&dev_attr_phys_port_name.attr,
 	&dev_attr_phys_switch_id.attr,
 	NULL,
 };
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 25b4b5d23485..6abe634c666c 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -982,6 +982,24 @@ static int rtnl_phys_port_id_fill(struct sk_buff *skb, struct net_device *dev)
 	return 0;
 }
 
+static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev)
+{
+	char name[IFNAMSIZ];
+	int err;
+
+	err = dev_get_phys_port_name(dev, name, sizeof(name));
+	if (err) {
+		if (err == -EOPNOTSUPP)
+			return 0;
+		return err;
+	}
+
+	if (nla_put(skb, IFLA_PHYS_PORT_NAME, strlen(name), name))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
 static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
 {
 	int err;
@@ -1072,6 +1090,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	if (rtnl_phys_port_id_fill(skb, dev))
 		goto nla_put_failure;
 
+	if (rtnl_phys_port_name_fill(skb, dev))
+		goto nla_put_failure;
+
 	if (rtnl_phys_switch_id_fill(skb, dev))
 		goto nla_put_failure;
 
-- 
cgit v1.2.3


From 99c4a26a159b28fa46a3e746a9b41b297e73d261 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 18 Mar 2015 22:52:33 -0400
Subject: net: Fix high overhead of vlan sub-device teardown.

When a networking device is taken down that has a non-trivial number
of VLAN devices configured under it, we eat a full synchronize_net()
for every such VLAN device.

This is because of the call chain:

	NETDEV_DOWN notifier
	--> vlan_device_event()
		--> dev_change_flags()
		--> __dev_change_flags()
		--> __dev_close()
		--> __dev_close_many()
		--> dev_deactivate_many()
			--> synchronize_net()

This is kind of rediculous because we already have infrastructure for
batching doing operation X to a list of net devices so that we only
incur one sync.

So make use of that by exporting dev_close_many() and adjusting it's
interfaace so that the caller can fully manage the batch list.  Use
this in vlan_device_event() and all the overhead goes away.

Reported-by: Salam Noureddine <noureddine@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 net/8021q/vlan.c          | 16 +++++++++++++---
 net/core/dev.c            | 10 ++++++----
 3 files changed, 20 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ec8f9b5f6500..76951c5fbedf 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2156,6 +2156,7 @@ struct net_device *__dev_get_by_name(struct net *net, const char *name);
 int dev_alloc_name(struct net_device *dev, const char *name);
 int dev_open(struct net_device *dev);
 int dev_close(struct net_device *dev);
+int dev_close_many(struct list_head *head, bool unlink);
 void dev_disable_lro(struct net_device *dev);
 int dev_loopback_xmit(struct sk_buff *newskb);
 int dev_queue_xmit(struct sk_buff *skb);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 64c6bed4a3d3..98a30a5b8664 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -413,7 +413,10 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 			vlan_transfer_features(dev, vlandev);
 		break;
 
-	case NETDEV_DOWN:
+	case NETDEV_DOWN: {
+		struct net_device *tmp;
+		LIST_HEAD(close_list);
+
 		if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
 			vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
 
@@ -425,11 +428,18 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 
 			vlan = vlan_dev_priv(vlandev);
 			if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING))
-				dev_change_flags(vlandev, flgs & ~IFF_UP);
+				list_add(&vlandev->close_list, &close_list);
+		}
+
+		dev_close_many(&close_list, false);
+
+		list_for_each_entry_safe(vlandev, tmp, &close_list, close_list) {
 			netif_stacked_transfer_operstate(dev, vlandev);
+			list_del_init(&vlandev->close_list);
 		}
+		list_del(&close_list);
 		break;
-
+	}
 	case NETDEV_UP:
 		/* Put all VLANs for this dev in the up state too.  */
 		vlan_group_for_each_dev(grp, i, vlandev) {
diff --git a/net/core/dev.c b/net/core/dev.c
index a1f24151db5b..5d43e010ef87 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1385,7 +1385,7 @@ static int __dev_close(struct net_device *dev)
 	return retval;
 }
 
-static int dev_close_many(struct list_head *head)
+int dev_close_many(struct list_head *head, bool unlink)
 {
 	struct net_device *dev, *tmp;
 
@@ -1399,11 +1399,13 @@ static int dev_close_many(struct list_head *head)
 	list_for_each_entry_safe(dev, tmp, head, close_list) {
 		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
 		call_netdevice_notifiers(NETDEV_DOWN, dev);
-		list_del_init(&dev->close_list);
+		if (unlink)
+			list_del_init(&dev->close_list);
 	}
 
 	return 0;
 }
+EXPORT_SYMBOL(dev_close_many);
 
 /**
  *	dev_close - shutdown an interface.
@@ -1420,7 +1422,7 @@ int dev_close(struct net_device *dev)
 		LIST_HEAD(single);
 
 		list_add(&dev->close_list, &single);
-		dev_close_many(&single);
+		dev_close_many(&single, true);
 		list_del(&single);
 	}
 	return 0;
@@ -5986,7 +5988,7 @@ static void rollback_registered_many(struct list_head *head)
 	/* If device is running, close it first. */
 	list_for_each_entry(dev, head, unreg_list)
 		list_add_tail(&dev->close_list, &close_head);
-	dev_close_many(&close_head);
+	dev_close_many(&close_head, true);
 
 	list_for_each_entry(dev, head, unreg_list) {
 		/* And unlink it from device chain. */
-- 
cgit v1.2.3


From af615762e972be0c66cf1d156ca4fac13b93c0b0 Mon Sep 17 00:00:00 2001
From: Jörg Thalheim <joerg@higgsboson.tk>
Date: Wed, 18 Mar 2015 10:06:58 +0100
Subject: bridge: add ageing_time, stp_state, priority over netlink
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jörg Thalheim <joerg@higgsboson.tk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_link.h |  3 +++
 net/bridge/br_netlink.c      | 32 +++++++++++++++++++++++++++++++-
 2 files changed, 34 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 7158fd00a109..f5f5edd5ae5f 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -225,6 +225,9 @@ enum {
 	IFLA_BR_FORWARD_DELAY,
 	IFLA_BR_HELLO_TIME,
 	IFLA_BR_MAX_AGE,
+	IFLA_BR_AGEING_TIME,
+	IFLA_BR_STP_STATE,
+	IFLA_BR_PRIORITY,
 	__IFLA_BR_MAX,
 };
 
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 8bc6b67457dc..e1115a224a95 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -733,6 +733,9 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = {
 	[IFLA_BR_FORWARD_DELAY]	= { .type = NLA_U32 },
 	[IFLA_BR_HELLO_TIME]	= { .type = NLA_U32 },
 	[IFLA_BR_MAX_AGE]	= { .type = NLA_U32 },
+	[IFLA_BR_AGEING_TIME] = { .type = NLA_U32 },
+	[IFLA_BR_STP_STATE] = { .type = NLA_U32 },
+	[IFLA_BR_PRIORITY] = { .type = NLA_U16 },
 };
 
 static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
@@ -762,6 +765,24 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
 			return err;
 	}
 
+	if (data[IFLA_BR_AGEING_TIME]) {
+		u32 ageing_time = nla_get_u32(data[IFLA_BR_AGEING_TIME]);
+
+		br->ageing_time = clock_t_to_jiffies(ageing_time);
+	}
+
+	if (data[IFLA_BR_STP_STATE]) {
+		u32 stp_enabled = nla_get_u32(data[IFLA_BR_STP_STATE]);
+
+		br_stp_set_enabled(br, stp_enabled);
+	}
+
+	if (data[IFLA_BR_PRIORITY]) {
+		u32 priority = nla_get_u16(data[IFLA_BR_PRIORITY]);
+
+		br_stp_set_bridge_priority(br, priority);
+	}
+
 	return 0;
 }
 
@@ -770,6 +791,9 @@ static size_t br_get_size(const struct net_device *brdev)
 	return nla_total_size(sizeof(u32)) +	/* IFLA_BR_FORWARD_DELAY  */
 	       nla_total_size(sizeof(u32)) +	/* IFLA_BR_HELLO_TIME */
 	       nla_total_size(sizeof(u32)) +	/* IFLA_BR_MAX_AGE */
+	       nla_total_size(sizeof(u32)) +    /* IFLA_BR_AGEING_TIME */
+	       nla_total_size(sizeof(u32)) +    /* IFLA_BR_STP_STATE */
+	       nla_total_size(sizeof(u16)) +    /* IFLA_BR_PRIORITY */
 	       0;
 }
 
@@ -779,10 +803,16 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
 	u32 forward_delay = jiffies_to_clock_t(br->forward_delay);
 	u32 hello_time = jiffies_to_clock_t(br->hello_time);
 	u32 age_time = jiffies_to_clock_t(br->max_age);
+	u32 ageing_time = jiffies_to_clock_t(br->ageing_time);
+	u32 stp_enabled = br->stp_enabled;
+	u16 priority = (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1];
 
 	if (nla_put_u32(skb, IFLA_BR_FORWARD_DELAY, forward_delay) ||
 	    nla_put_u32(skb, IFLA_BR_HELLO_TIME, hello_time) ||
-	    nla_put_u32(skb, IFLA_BR_MAX_AGE, age_time))
+	    nla_put_u32(skb, IFLA_BR_MAX_AGE, age_time) ||
+	    nla_put_u32(skb, IFLA_BR_AGEING_TIME, ageing_time) ||
+	    nla_put_u32(skb, IFLA_BR_STP_STATE, stp_enabled) ||
+	    nla_put_u16(skb, IFLA_BR_PRIORITY, priority))
 		return -EMSGSIZE;
 
 	return 0;
-- 
cgit v1.2.3


From 5067c0469c643512f24786990e315f9c15cc7d24 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Thu, 12 Mar 2015 10:32:18 -0700
Subject: ata: Add a new flag to destinguish sas controller

SAS controller has its own tag allocation, which doesn't directly match to ATA
tag, so SAS and SATA have different code path for ata tags. Originally we use
port->scsi_host (98bd4be1) to destinguish SAS controller, but libsas set
->scsi_host too, so we can't use it for the destinguish, we add a new flag for
this purpose.

Without this patch, the following oops can happen because scsi-mq uses
a host-wide tag map shared among all devices with some integer tag
values >= ATA_MAX_QUEUE.  These unexpectedly high tag values cause
__ata_qc_from_tag() to return NULL, which is then dereferenced in
ata_qc_new_init().

  BUG: unable to handle kernel NULL pointer dereference at 0000000000000058
  IP: [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
  PGD 32adf0067 PUD 32adf1067 PMD 0
  Oops: 0002 [#1] SMP DEBUG_PAGEALLOC
  Modules linked in: iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi igb
  i2c_algo_bit ptp pps_core pm80xx libsas scsi_transport_sas sg coretemp
  eeprom w83795 i2c_i801
  CPU: 4 PID: 1450 Comm: cydiskbench Not tainted 4.0.0-rc3 #1
  Hardware name: Supermicro X8DTH-i/6/iF/6F/X8DTH, BIOS 2.1b       05/04/12
  task: ffff8800ba86d500 ti: ffff88032a064000 task.ti: ffff88032a064000
  RIP: 0010:[<ffffffff804fd46e>]  [<ffffffff804fd46e>] ata_qc_new_init+0x3e/0x120
  RSP: 0018:ffff88032a067858  EFLAGS: 00010046
  RAX: 0000000000000000 RBX: ffff8800ba0d2230 RCX: 000000000000002a
  RDX: ffffffff80505ae0 RSI: 0000000000000020 RDI: ffff8800ba0d2230
  RBP: ffff88032a067868 R08: 0000000000000201 R09: 0000000000000001
  R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800ba0d0000
  R13: ffff8800ba0d2230 R14: ffffffff80505ae0 R15: ffff8800ba0d0000
  FS:  0000000041223950(0063) GS:ffff88033e480000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
  CR2: 0000000000000058 CR3: 000000032a0a3000 CR4: 00000000000006e0
  Stack:
   ffff880329eee758 ffff880329eee758 ffff88032a0678a8 ffffffff80502dad
   ffff8800ba167978 ffff880329eee758 ffff88032bf9c520 ffff8800ba167978
   ffff88032bf9c520 ffff88032bf9a290 ffff88032a0678b8 ffffffff80506909
  Call Trace:
   [<ffffffff80502dad>] ata_scsi_translate+0x3d/0x1b0
   [<ffffffff80506909>] ata_sas_queuecmd+0x149/0x2a0
   [<ffffffffa0046650>] sas_queuecommand+0xa0/0x1f0 [libsas]
   [<ffffffff804ea544>] scsi_dispatch_cmd+0xd4/0x1a0
   [<ffffffff804eb50f>] scsi_queue_rq+0x66f/0x7f0
   [<ffffffff803e5098>] __blk_mq_run_hw_queue+0x208/0x3f0
   [<ffffffff803e54b8>] blk_mq_run_hw_queue+0x88/0xc0
   [<ffffffff803e5c74>] blk_mq_insert_request+0xc4/0x130
   [<ffffffff803e0b63>] blk_execute_rq_nowait+0x73/0x160
   [<ffffffffa0023fca>] sg_common_write+0x3da/0x720 [sg]
   [<ffffffffa0025100>] sg_new_write+0x250/0x360 [sg]
   [<ffffffffa0025feb>] sg_write+0x13b/0x450 [sg]
   [<ffffffff8032ec91>] vfs_write+0xd1/0x1b0
   [<ffffffff8032ee54>] SyS_write+0x54/0xc0
   [<ffffffff80689932>] system_call_fastpath+0x12/0x17

tj: updated description.

Fixes: 12cb5ce101ab ("libata: use blk taging")
Reported-and-tested-by: Tony Battersby <tonyb@cybernetics.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/libata-core.c     | 4 ++--
 drivers/scsi/ipr.c            | 3 ++-
 drivers/scsi/libsas/sas_ata.c | 3 ++-
 include/linux/libata.h        | 1 +
 4 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 4c35f0822d06..ef150ebb4c30 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4737,7 +4737,7 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag)
 		return NULL;
 
 	/* libsas case */
-	if (!ap->scsi_host) {
+	if (ap->flags & ATA_FLAG_SAS_HOST) {
 		tag = ata_sas_allocate_tag(ap);
 		if (tag < 0)
 			return NULL;
@@ -4776,7 +4776,7 @@ void ata_qc_free(struct ata_queued_cmd *qc)
 	tag = qc->tag;
 	if (likely(ata_tag_valid(tag))) {
 		qc->tag = ATA_TAG_POISON;
-		if (!ap->scsi_host)
+		if (ap->flags & ATA_FLAG_SAS_HOST)
 			ata_sas_free_tag(tag, ap);
 	}
 }
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 9219953ee949..d9afc51af7d3 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -6815,7 +6815,8 @@ static struct ata_port_operations ipr_sata_ops = {
 };
 
 static struct ata_port_info sata_port_info = {
-	.flags		= ATA_FLAG_SATA | ATA_FLAG_PIO_DMA,
+	.flags		= ATA_FLAG_SATA | ATA_FLAG_PIO_DMA |
+			  ATA_FLAG_SAS_HOST,
 	.pio_mask	= ATA_PIO4_ONLY,
 	.mwdma_mask	= ATA_MWDMA2,
 	.udma_mask	= ATA_UDMA6,
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index 932d9cc98d2f..9c706d8c1441 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -547,7 +547,8 @@ static struct ata_port_operations sas_sata_ops = {
 };
 
 static struct ata_port_info sata_port_info = {
-	.flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ,
+	.flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ |
+		 ATA_FLAG_SAS_HOST,
 	.pio_mask = ATA_PIO4,
 	.mwdma_mask = ATA_MWDMA2,
 	.udma_mask = ATA_UDMA6,
diff --git a/include/linux/libata.h b/include/linux/libata.h
index fc03efa64ffe..6b08cc106c21 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -232,6 +232,7 @@ enum {
 					      * led */
 	ATA_FLAG_NO_DIPM	= (1 << 23), /* host not happy with DIPM */
 	ATA_FLAG_LOWTAG		= (1 << 24), /* host wants lowest available tag */
+	ATA_FLAG_SAS_HOST	= (1 << 25), /* SAS host */
 
 	/* bits 24:31 of ap->flags are reserved for LLD specific flags */
 
-- 
cgit v1.2.3


From c6b570d97c0e77f570bb6b2ed30d372b2b1e9aae Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Mon, 9 Mar 2015 12:20:13 +0100
Subject: regmap: introduce regmap_name to fix syscon regmap trace events

This patch fixes a NULL pointer dereference when enabling regmap event
tracing in the presence of a syscon regmap, introduced by commit bdb0066df96e
("mfd: syscon: Decouple syscon interface from platform devices").
That patch introduced syscon regmaps that have their dev field set to NULL.
The regmap trace events expect it to point to a valid struct device and feed
it to dev_name():

  $ echo 1 > /sys/kernel/debug/tracing/events/regmap/enable

  Unable to handle kernel NULL pointer dereference at virtual address 0000002c
  pgd = 80004000
  [0000002c] *pgd=00000000
  Internal error: Oops: 17 [#1] SMP ARM
  Modules linked in: coda videobuf2_vmalloc
  CPU: 0 PID: 304 Comm: kworker/0:2 Not tainted 4.0.0-rc2+ #9197
  Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
  Workqueue: events_freezable thermal_zone_device_check
  task: 9f25a200 ti: 9f1ee000 task.ti: 9f1ee000
  PC is at ftrace_raw_event_regmap_block+0x3c/0xe4
  LR is at _regmap_raw_read+0x1bc/0x1cc
  pc : [<803636e8>]    lr : [<80365f2c>]    psr: 600f0093
  sp : 9f1efd78  ip : 9f1efdb8  fp : 9f1efdb4
  r10: 00000004  r9 : 00000001  r8 : 00000001
  r7 : 00000180  r6 : 00000000  r5 : 9f00e3c0  r4 : 00000003
  r3 : 00000001  r2 : 00000180  r1 : 00000000  r0 : 9f00e3c0
  Flags: nZCv  IRQs off  FIQs on  Mode SVC_32  ISA ARM  Segment kernel
  Control: 10c5387d  Table: 2d91004a  DAC: 00000015
  Process kworker/0:2 (pid: 304, stack limit = 0x9f1ee210)
  Stack: (0x9f1efd78 to 0x9f1f0000)
  fd60:                                                       9f1efda4 9f1efd88
  fd80: 800708c0 805f9510 80927140 800f0013 9f1fc800 9eb2f490 00000000 00000180
  fda0: 808e3840 00000001 9f1efdfc 9f1efdb8 80365f2c 803636b8 805f8958 800708e0
  fdc0: a00f0013 803636ac 9f16de00 00000180 80927140 9f1fc800 9f1fc800 9f1efe6c
  fde0: 9f1efe6c 9f732400 00000000 00000000 9f1efe1c 9f1efe00 80365f70 80365d7c
  fe00: 80365f3c 9f1fc800 9f1fc800 00000180 9f1efe44 9f1efe20 803656a4 80365f48
  fe20: 9f1fc800 00000180 9f1efe6c 9f1efe6c 9f732400 00000000 9f1efe64 9f1efe48
  fe40: 803657bc 80365634 00000001 9e95f910 9f1fc800 9f1efeb4 9f1efe8c 9f1efe68
  fe60: 80452ac0 80365778 9f1efe8c 9f1efe78 9e93d400 9e93d5e8 9f1efeb4 9f72ef40
  fe80: 9f1efeac 9f1efe90 8044e11c 80452998 8045298c 9e93d608 9e93d400 808e1978
  fea0: 9f1efecc 9f1efeb0 8044fd14 8044e0d0 ffffffff 9f25a200 9e93d608 9e481380
  fec0: 9f1efedc 9f1efed0 8044fde8 8044fcec 9f1eff1c 9f1efee0 80038d50 8044fdd8
  fee0: 9f1ee020 9f72ef40 9e481398 00000000 00000008 9f72ef54 9f1ee020 9f72ef40
  ff00: 9e481398 9e481380 00000008 9f72ef40 9f1eff5c 9f1eff20 80039754 80038bfc
  ff20: 00000000 9e481380 80894100 808e1662 00000000 9e4f2ec0 00000000 9e481380
  ff40: 800396f8 00000000 00000000 00000000 9f1effac 9f1eff60 8003e020 80039704
  ff60: ffffffff 00000000 ffffffff 9e481380 00000000 00000000 9f1eff78 9f1eff78
  ff80: 00000000 00000000 9f1eff88 9f1eff88 9e4f2ec0 8003df30 00000000 00000000
  ffa0: 00000000 9f1effb0 8000eb60 8003df3c 00000000 00000000 00000000 00000000
  ffc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
  ffe0: 00000000 00000000 00000000 00000000 00000013 00000000 ffffffff ffffffff
  Backtrace:
  [<803636ac>] (ftrace_raw_event_regmap_block) from [<80365f2c>] (_regmap_raw_read+0x1bc/0x1cc)
   r9:00000001 r8:808e3840 r7:00000180 r6:00000000 r5:9eb2f490 r4:9f1fc800
  [<80365d70>] (_regmap_raw_read) from [<80365f70>] (_regmap_bus_read+0x34/0x6c)
   r10:00000000 r9:00000000 r8:9f732400 r7:9f1efe6c r6:9f1efe6c r5:9f1fc800
   r4:9f1fc800
  [<80365f3c>] (_regmap_bus_read) from [<803656a4>] (_regmap_read+0x7c/0x144)
   r6:00000180 r5:9f1fc800 r4:9f1fc800 r3:80365f3c
  [<80365628>] (_regmap_read) from [<803657bc>] (regmap_read+0x50/0x70)
   r9:00000000 r8:9f732400 r7:9f1efe6c r6:9f1efe6c r5:00000180 r4:9f1fc800
  [<8036576c>] (regmap_read) from [<80452ac0>] (imx_get_temp+0x134/0x1a4)
   r6:9f1efeb4 r5:9f1fc800 r4:9e95f910 r3:00000001
  [<8045298c>] (imx_get_temp) from [<8044e11c>] (thermal_zone_get_temp+0x58/0x74)
   r7:9f72ef40 r6:9f1efeb4 r5:9e93d5e8 r4:9e93d400
  [<8044e0c4>] (thermal_zone_get_temp) from [<8044fd14>] (thermal_zone_device_update+0x34/0xec)
   r6:808e1978 r5:9e93d400 r4:9e93d608 r3:8045298c
  [<8044fce0>] (thermal_zone_device_update) from [<8044fde8>] (thermal_zone_device_check+0x1c/0x20)
   r5:9e481380 r4:9e93d608
  [<8044fdcc>] (thermal_zone_device_check) from [<80038d50>] (process_one_work+0x160/0x3d4)
  [<80038bf0>] (process_one_work) from [<80039754>] (worker_thread+0x5c/0x4f4)
   r10:9f72ef40 r9:00000008 r8:9e481380 r7:9e481398 r6:9f72ef40 r5:9f1ee020
   r4:9f72ef54
  [<800396f8>] (worker_thread) from [<8003e020>] (kthread+0xf0/0x108)
   r10:00000000 r9:00000000 r8:00000000 r7:800396f8 r6:9e481380 r5:00000000
   r4:9e4f2ec0
  [<8003df30>] (kthread) from [<8000eb60>] (ret_from_fork+0x14/0x34)
   r7:00000000 r6:00000000 r5:8003df30 r4:9e4f2ec0
  Code: e3140040 1a00001a e3140020 1a000016 (e596002c)
  ---[ end trace 193c15c2494ec960 ]---

Fixes: bdb0066df96e (mfd: syscon: Decouple syscon interface from platform devices)
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: stable@vger.kernel.org
---
 drivers/base/regmap/internal.h |   8 +++
 drivers/base/regmap/regcache.c |  16 +++---
 drivers/base/regmap/regmap.c   |  32 +++++------
 include/trace/events/regmap.h  | 123 ++++++++++++++++++++---------------------
 4 files changed, 91 insertions(+), 88 deletions(-)

(limited to 'include')

diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index beb8b27d4621..a13587b5c2be 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -243,4 +243,12 @@ extern struct regcache_ops regcache_rbtree_ops;
 extern struct regcache_ops regcache_lzo_ops;
 extern struct regcache_ops regcache_flat_ops;
 
+static inline const char *regmap_name(const struct regmap *map)
+{
+	if (map->dev)
+		return dev_name(map->dev);
+
+	return map->name;
+}
+
 #endif
diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c
index f373c35f9e1d..f5db662e951e 100644
--- a/drivers/base/regmap/regcache.c
+++ b/drivers/base/regmap/regcache.c
@@ -218,7 +218,7 @@ int regcache_read(struct regmap *map,
 		ret = map->cache_ops->read(map, reg, value);
 
 		if (ret == 0)
-			trace_regmap_reg_read_cache(map->dev, reg, *value);
+			trace_regmap_reg_read_cache(map, reg, *value);
 
 		return ret;
 	}
@@ -311,7 +311,7 @@ int regcache_sync(struct regmap *map)
 	dev_dbg(map->dev, "Syncing %s cache\n",
 		map->cache_ops->name);
 	name = map->cache_ops->name;
-	trace_regcache_sync(map->dev, name, "start");
+	trace_regcache_sync(map, name, "start");
 
 	if (!map->cache_dirty)
 		goto out;
@@ -346,7 +346,7 @@ out:
 
 	regmap_async_complete(map);
 
-	trace_regcache_sync(map->dev, name, "stop");
+	trace_regcache_sync(map, name, "stop");
 
 	return ret;
 }
@@ -381,7 +381,7 @@ int regcache_sync_region(struct regmap *map, unsigned int min,
 	name = map->cache_ops->name;
 	dev_dbg(map->dev, "Syncing %s cache from %d-%d\n", name, min, max);
 
-	trace_regcache_sync(map->dev, name, "start region");
+	trace_regcache_sync(map, name, "start region");
 
 	if (!map->cache_dirty)
 		goto out;
@@ -401,7 +401,7 @@ out:
 
 	regmap_async_complete(map);
 
-	trace_regcache_sync(map->dev, name, "stop region");
+	trace_regcache_sync(map, name, "stop region");
 
 	return ret;
 }
@@ -428,7 +428,7 @@ int regcache_drop_region(struct regmap *map, unsigned int min,
 
 	map->lock(map->lock_arg);
 
-	trace_regcache_drop_region(map->dev, min, max);
+	trace_regcache_drop_region(map, min, max);
 
 	ret = map->cache_ops->drop(map, min, max);
 
@@ -455,7 +455,7 @@ void regcache_cache_only(struct regmap *map, bool enable)
 	map->lock(map->lock_arg);
 	WARN_ON(map->cache_bypass && enable);
 	map->cache_only = enable;
-	trace_regmap_cache_only(map->dev, enable);
+	trace_regmap_cache_only(map, enable);
 	map->unlock(map->lock_arg);
 }
 EXPORT_SYMBOL_GPL(regcache_cache_only);
@@ -493,7 +493,7 @@ void regcache_cache_bypass(struct regmap *map, bool enable)
 	map->lock(map->lock_arg);
 	WARN_ON(map->cache_only && enable);
 	map->cache_bypass = enable;
-	trace_regmap_cache_bypass(map->dev, enable);
+	trace_regmap_cache_bypass(map, enable);
 	map->unlock(map->lock_arg);
 }
 EXPORT_SYMBOL_GPL(regcache_cache_bypass);
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index f99b098ddabf..dbfe6a69c3da 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -1281,7 +1281,7 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg,
 	if (map->async && map->bus->async_write) {
 		struct regmap_async *async;
 
-		trace_regmap_async_write_start(map->dev, reg, val_len);
+		trace_regmap_async_write_start(map, reg, val_len);
 
 		spin_lock_irqsave(&map->async_lock, flags);
 		async = list_first_entry_or_null(&map->async_free,
@@ -1339,8 +1339,7 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg,
 		return ret;
 	}
 
-	trace_regmap_hw_write_start(map->dev, reg,
-				    val_len / map->format.val_bytes);
+	trace_regmap_hw_write_start(map, reg, val_len / map->format.val_bytes);
 
 	/* If we're doing a single register write we can probably just
 	 * send the work_buf directly, otherwise try to do a gather
@@ -1372,8 +1371,7 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg,
 		kfree(buf);
 	}
 
-	trace_regmap_hw_write_done(map->dev, reg,
-				   val_len / map->format.val_bytes);
+	trace_regmap_hw_write_done(map, reg, val_len / map->format.val_bytes);
 
 	return ret;
 }
@@ -1407,12 +1405,12 @@ static int _regmap_bus_formatted_write(void *context, unsigned int reg,
 
 	map->format.format_write(map, reg, val);
 
-	trace_regmap_hw_write_start(map->dev, reg, 1);
+	trace_regmap_hw_write_start(map, reg, 1);
 
 	ret = map->bus->write(map->bus_context, map->work_buf,
 			      map->format.buf_size);
 
-	trace_regmap_hw_write_done(map->dev, reg, 1);
+	trace_regmap_hw_write_done(map, reg, 1);
 
 	return ret;
 }
@@ -1470,7 +1468,7 @@ int _regmap_write(struct regmap *map, unsigned int reg,
 		dev_info(map->dev, "%x <= %x\n", reg, val);
 #endif
 
-	trace_regmap_reg_write(map->dev, reg, val);
+	trace_regmap_reg_write(map, reg, val);
 
 	return map->reg_write(context, reg, val);
 }
@@ -1773,7 +1771,7 @@ static int _regmap_raw_multi_reg_write(struct regmap *map,
 	for (i = 0; i < num_regs; i++) {
 		int reg = regs[i].reg;
 		int val = regs[i].def;
-		trace_regmap_hw_write_start(map->dev, reg, 1);
+		trace_regmap_hw_write_start(map, reg, 1);
 		map->format.format_reg(u8, reg, map->reg_shift);
 		u8 += reg_bytes + pad_bytes;
 		map->format.format_val(u8, val, 0);
@@ -1788,7 +1786,7 @@ static int _regmap_raw_multi_reg_write(struct regmap *map,
 
 	for (i = 0; i < num_regs; i++) {
 		int reg = regs[i].reg;
-		trace_regmap_hw_write_done(map->dev, reg, 1);
+		trace_regmap_hw_write_done(map, reg, 1);
 	}
 	return ret;
 }
@@ -2059,15 +2057,13 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
 	 */
 	u8[0] |= map->read_flag_mask;
 
-	trace_regmap_hw_read_start(map->dev, reg,
-				   val_len / map->format.val_bytes);
+	trace_regmap_hw_read_start(map, reg, val_len / map->format.val_bytes);
 
 	ret = map->bus->read(map->bus_context, map->work_buf,
 			     map->format.reg_bytes + map->format.pad_bytes,
 			     val, val_len);
 
-	trace_regmap_hw_read_done(map->dev, reg,
-				  val_len / map->format.val_bytes);
+	trace_regmap_hw_read_done(map, reg, val_len / map->format.val_bytes);
 
 	return ret;
 }
@@ -2123,7 +2119,7 @@ static int _regmap_read(struct regmap *map, unsigned int reg,
 			dev_info(map->dev, "%x => %x\n", reg, *val);
 #endif
 
-		trace_regmap_reg_read(map->dev, reg, *val);
+		trace_regmap_reg_read(map, reg, *val);
 
 		if (!map->cache_bypass)
 			regcache_write(map, reg, *val);
@@ -2480,7 +2476,7 @@ void regmap_async_complete_cb(struct regmap_async *async, int ret)
 	struct regmap *map = async->map;
 	bool wake;
 
-	trace_regmap_async_io_complete(map->dev);
+	trace_regmap_async_io_complete(map);
 
 	spin_lock(&map->async_lock);
 	list_move(&async->list, &map->async_free);
@@ -2525,7 +2521,7 @@ int regmap_async_complete(struct regmap *map)
 	if (!map->bus || !map->bus->async_write)
 		return 0;
 
-	trace_regmap_async_complete_start(map->dev);
+	trace_regmap_async_complete_start(map);
 
 	wait_event(map->async_waitq, regmap_async_is_done(map));
 
@@ -2534,7 +2530,7 @@ int regmap_async_complete(struct regmap *map)
 	map->async_ret = 0;
 	spin_unlock_irqrestore(&map->async_lock, flags);
 
-	trace_regmap_async_complete_done(map->dev);
+	trace_regmap_async_complete_done(map);
 
 	return ret;
 }
diff --git a/include/trace/events/regmap.h b/include/trace/events/regmap.h
index 23d561512f64..22317d2b52ab 100644
--- a/include/trace/events/regmap.h
+++ b/include/trace/events/regmap.h
@@ -7,27 +7,26 @@
 #include <linux/ktime.h>
 #include <linux/tracepoint.h>
 
-struct device;
-struct regmap;
+#include "../../../drivers/base/regmap/internal.h"
 
 /*
  * Log register events
  */
 DECLARE_EVENT_CLASS(regmap_reg,
 
-	TP_PROTO(struct device *dev, unsigned int reg,
+	TP_PROTO(struct regmap *map, unsigned int reg,
 		 unsigned int val),
 
-	TP_ARGS(dev, reg, val),
+	TP_ARGS(map, reg, val),
 
 	TP_STRUCT__entry(
-		__string(	name,		dev_name(dev)	)
-		__field(	unsigned int,	reg		)
-		__field(	unsigned int,	val		)
+		__string(	name,		regmap_name(map)	)
+		__field(	unsigned int,	reg			)
+		__field(	unsigned int,	val			)
 	),
 
 	TP_fast_assign(
-		__assign_str(name, dev_name(dev));
+		__assign_str(name, regmap_name(map));
 		__entry->reg = reg;
 		__entry->val = val;
 	),
@@ -39,45 +38,45 @@ DECLARE_EVENT_CLASS(regmap_reg,
 
 DEFINE_EVENT(regmap_reg, regmap_reg_write,
 
-	TP_PROTO(struct device *dev, unsigned int reg,
+	TP_PROTO(struct regmap *map, unsigned int reg,
 		 unsigned int val),
 
-	TP_ARGS(dev, reg, val)
+	TP_ARGS(map, reg, val)
 
 );
 
 DEFINE_EVENT(regmap_reg, regmap_reg_read,
 
-	TP_PROTO(struct device *dev, unsigned int reg,
+	TP_PROTO(struct regmap *map, unsigned int reg,
 		 unsigned int val),
 
-	TP_ARGS(dev, reg, val)
+	TP_ARGS(map, reg, val)
 
 );
 
 DEFINE_EVENT(regmap_reg, regmap_reg_read_cache,
 
-	TP_PROTO(struct device *dev, unsigned int reg,
+	TP_PROTO(struct regmap *map, unsigned int reg,
 		 unsigned int val),
 
-	TP_ARGS(dev, reg, val)
+	TP_ARGS(map, reg, val)
 
 );
 
 DECLARE_EVENT_CLASS(regmap_block,
 
-	TP_PROTO(struct device *dev, unsigned int reg, int count),
+	TP_PROTO(struct regmap *map, unsigned int reg, int count),
 
-	TP_ARGS(dev, reg, count),
+	TP_ARGS(map, reg, count),
 
 	TP_STRUCT__entry(
-		__string(	name,		dev_name(dev)	)
-		__field(	unsigned int,	reg		)
-		__field(	int,		count		)
+		__string(	name,		regmap_name(map)	)
+		__field(	unsigned int,	reg			)
+		__field(	int,		count			)
 	),
 
 	TP_fast_assign(
-		__assign_str(name, dev_name(dev));
+		__assign_str(name, regmap_name(map));
 		__entry->reg = reg;
 		__entry->count = count;
 	),
@@ -89,48 +88,48 @@ DECLARE_EVENT_CLASS(regmap_block,
 
 DEFINE_EVENT(regmap_block, regmap_hw_read_start,
 
-	TP_PROTO(struct device *dev, unsigned int reg, int count),
+	TP_PROTO(struct regmap *map, unsigned int reg, int count),
 
-	TP_ARGS(dev, reg, count)
+	TP_ARGS(map, reg, count)
 );
 
 DEFINE_EVENT(regmap_block, regmap_hw_read_done,
 
-	TP_PROTO(struct device *dev, unsigned int reg, int count),
+	TP_PROTO(struct regmap *map, unsigned int reg, int count),
 
-	TP_ARGS(dev, reg, count)
+	TP_ARGS(map, reg, count)
 );
 
 DEFINE_EVENT(regmap_block, regmap_hw_write_start,
 
-	TP_PROTO(struct device *dev, unsigned int reg, int count),
+	TP_PROTO(struct regmap *map, unsigned int reg, int count),
 
-	TP_ARGS(dev, reg, count)
+	TP_ARGS(map, reg, count)
 );
 
 DEFINE_EVENT(regmap_block, regmap_hw_write_done,
 
-	TP_PROTO(struct device *dev, unsigned int reg, int count),
+	TP_PROTO(struct regmap *map, unsigned int reg, int count),
 
-	TP_ARGS(dev, reg, count)
+	TP_ARGS(map, reg, count)
 );
 
 TRACE_EVENT(regcache_sync,
 
-	TP_PROTO(struct device *dev, const char *type,
+	TP_PROTO(struct regmap *map, const char *type,
 		 const char *status),
 
-	TP_ARGS(dev, type, status),
+	TP_ARGS(map, type, status),
 
 	TP_STRUCT__entry(
-		__string(       name,           dev_name(dev)   )
-		__string(	status,		status		)
-		__string(	type,		type		)
-		__field(	int,		type		)
+		__string(       name,           regmap_name(map)	)
+		__string(	status,		status			)
+		__string(	type,		type			)
+		__field(	int,		type			)
 	),
 
 	TP_fast_assign(
-		__assign_str(name, dev_name(dev));
+		__assign_str(name, regmap_name(map));
 		__assign_str(status, status);
 		__assign_str(type, type);
 	),
@@ -141,17 +140,17 @@ TRACE_EVENT(regcache_sync,
 
 DECLARE_EVENT_CLASS(regmap_bool,
 
-	TP_PROTO(struct device *dev, bool flag),
+	TP_PROTO(struct regmap *map, bool flag),
 
-	TP_ARGS(dev, flag),
+	TP_ARGS(map, flag),
 
 	TP_STRUCT__entry(
-		__string(	name,		dev_name(dev)	)
-		__field(	int,		flag		)
+		__string(	name,		regmap_name(map)	)
+		__field(	int,		flag			)
 	),
 
 	TP_fast_assign(
-		__assign_str(name, dev_name(dev));
+		__assign_str(name, regmap_name(map));
 		__entry->flag = flag;
 	),
 
@@ -161,32 +160,32 @@ DECLARE_EVENT_CLASS(regmap_bool,
 
 DEFINE_EVENT(regmap_bool, regmap_cache_only,
 
-	TP_PROTO(struct device *dev, bool flag),
+	TP_PROTO(struct regmap *map, bool flag),
 
-	TP_ARGS(dev, flag)
+	TP_ARGS(map, flag)
 
 );
 
 DEFINE_EVENT(regmap_bool, regmap_cache_bypass,
 
-	TP_PROTO(struct device *dev, bool flag),
+	TP_PROTO(struct regmap *map, bool flag),
 
-	TP_ARGS(dev, flag)
+	TP_ARGS(map, flag)
 
 );
 
 DECLARE_EVENT_CLASS(regmap_async,
 
-	TP_PROTO(struct device *dev),
+	TP_PROTO(struct regmap *map),
 
-	TP_ARGS(dev),
+	TP_ARGS(map),
 
 	TP_STRUCT__entry(
-		__string(	name,		dev_name(dev)	)
+		__string(	name,		regmap_name(map)	)
 	),
 
 	TP_fast_assign(
-		__assign_str(name, dev_name(dev));
+		__assign_str(name, regmap_name(map));
 	),
 
 	TP_printk("%s", __get_str(name))
@@ -194,50 +193,50 @@ DECLARE_EVENT_CLASS(regmap_async,
 
 DEFINE_EVENT(regmap_block, regmap_async_write_start,
 
-	TP_PROTO(struct device *dev, unsigned int reg, int count),
+	TP_PROTO(struct regmap *map, unsigned int reg, int count),
 
-	TP_ARGS(dev, reg, count)
+	TP_ARGS(map, reg, count)
 );
 
 DEFINE_EVENT(regmap_async, regmap_async_io_complete,
 
-	TP_PROTO(struct device *dev),
+	TP_PROTO(struct regmap *map),
 
-	TP_ARGS(dev)
+	TP_ARGS(map)
 
 );
 
 DEFINE_EVENT(regmap_async, regmap_async_complete_start,
 
-	TP_PROTO(struct device *dev),
+	TP_PROTO(struct regmap *map),
 
-	TP_ARGS(dev)
+	TP_ARGS(map)
 
 );
 
 DEFINE_EVENT(regmap_async, regmap_async_complete_done,
 
-	TP_PROTO(struct device *dev),
+	TP_PROTO(struct regmap *map),
 
-	TP_ARGS(dev)
+	TP_ARGS(map)
 
 );
 
 TRACE_EVENT(regcache_drop_region,
 
-	TP_PROTO(struct device *dev, unsigned int from,
+	TP_PROTO(struct regmap *map, unsigned int from,
 		 unsigned int to),
 
-	TP_ARGS(dev, from, to),
+	TP_ARGS(map, from, to),
 
 	TP_STRUCT__entry(
-		__string(       name,           dev_name(dev)   )
-		__field(	unsigned int,	from		)
-		__field(	unsigned int,	to		)
+		__string(       name,           regmap_name(map)	)
+		__field(	unsigned int,	from			)
+		__field(	unsigned int,	to			)
 	),
 
 	TP_fast_assign(
-		__assign_str(name, dev_name(dev));
+		__assign_str(name, regmap_name(map));
 		__entry->from = from;
 		__entry->to = to;
 	),
-- 
cgit v1.2.3


From 25911556283e5093fca235d7ba03faae7ed5dbf2 Mon Sep 17 00:00:00 2001
From: Arend van Spriel <arend@broadcom.com>
Date: Wed, 18 Mar 2015 13:25:26 +0100
Subject: brcmfmac: add support for BCM43430 SDIO chipset

This patch added support for the BCM43430 802.11n SDIO chipset.

Reviewed-by: Hante Meuleman <meuleman@broadcom.com>
Reviewed-by: Daniel (Deognyoun) Kim <dekim@broadcom.com>
Reviewed-by: Franky (Zhenhui) Lin <frankyl@broadcom.com>
Reviewed-by: Pieter-Paul Giesberts <pieterpg@broadcom.com>
Signed-off-by: Arend van Spriel <arend@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c     |  1 +
 drivers/net/wireless/brcm80211/brcmfmac/chip.c       | 18 ++++++++++++++++++
 drivers/net/wireless/brcm80211/brcmfmac/sdio.c       |  5 +++++
 drivers/net/wireless/brcm80211/include/brcm_hw_ids.h |  1 +
 drivers/net/wireless/brcm80211/include/chipcommon.h  |  9 ++++++++-
 include/linux/mmc/sdio_ids.h                         |  1 +
 6 files changed, 34 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c
index 9667b6aabc00..9b508bd3b839 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c
@@ -1098,6 +1098,7 @@ static const struct sdio_device_id brcmf_sdmmc_ids[] = {
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43341),
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43362),
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4335_4339),
+	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43430),
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4345),
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4354),
 	{ /* end: all zeroes */ }
diff --git a/drivers/net/wireless/brcm80211/brcmfmac/chip.c b/drivers/net/wireless/brcm80211/brcmfmac/chip.c
index 734172570054..ab2fac8b2760 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/chip.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/chip.c
@@ -600,6 +600,12 @@ static void brcmf_chip_socram_ramsize(struct brcmf_core_priv *sr, u32 *ramsize,
 		if (sr->chip->pub.chiprev < 2)
 			*srsize = (32 * 1024);
 		break;
+	case BRCM_CC_43430_CHIP_ID:
+		/* assume sr for now as we can not check
+		 * firmware sr capability at this point.
+		 */
+		*srsize = (64 * 1024);
+		break;
 	default:
 		break;
 	}
@@ -1072,6 +1078,7 @@ static void
 brcmf_chip_cm3_set_passive(struct brcmf_chip_priv *chip)
 {
 	struct brcmf_core *core;
+	struct brcmf_core_priv *sr;
 
 	brcmf_chip_disable_arm(chip, BCMA_CORE_ARM_CM3);
 	core = brcmf_chip_get_core(&chip->pub, BCMA_CORE_80211);
@@ -1081,6 +1088,13 @@ brcmf_chip_cm3_set_passive(struct brcmf_chip_priv *chip)
 			     D11_BCMA_IOCTL_PHYCLOCKEN);
 	core = brcmf_chip_get_core(&chip->pub, BCMA_CORE_INTERNAL_MEM);
 	brcmf_chip_resetcore(core, 0, 0, 0);
+
+	/* disable bank #3 remap for this device */
+	if (chip->pub.chip == BRCM_CC_43430_CHIP_ID) {
+		sr = container_of(core, struct brcmf_core_priv, pub);
+		brcmf_chip_core_write32(sr, SOCRAMREGOFFS(bankidx), 3);
+		brcmf_chip_core_write32(sr, SOCRAMREGOFFS(bankpda), 0);
+	}
 }
 
 static bool brcmf_chip_cm3_set_active(struct brcmf_chip_priv *chip)
@@ -1188,6 +1202,10 @@ bool brcmf_chip_sr_capable(struct brcmf_chip *pub)
 		addr = CORE_CC_REG(base, chipcontrol_data);
 		reg = chip->ops->read32(chip->ctx, addr);
 		return (reg & pmu_cc3_mask) != 0;
+	case BRCM_CC_43430_CHIP_ID:
+		addr = CORE_CC_REG(base, sr_control1);
+		reg = chip->ops->read32(chip->ctx, addr);
+		return reg != 0;
 	default:
 		addr = CORE_CC_REG(base, pmucapabilities_ext);
 		reg = chip->ops->read32(chip->ctx, addr);
diff --git a/drivers/net/wireless/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/brcm80211/brcmfmac/sdio.c
index 38fa0e8f859d..e162860b265c 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/sdio.c
@@ -615,6 +615,8 @@ static const struct sdiod_drive_str sdiod_drvstr_tab2_3v3[] = {
 #define BCM43362_NVRAM_NAME		"brcm/brcmfmac43362-sdio.txt"
 #define BCM4339_FIRMWARE_NAME		"brcm/brcmfmac4339-sdio.bin"
 #define BCM4339_NVRAM_NAME		"brcm/brcmfmac4339-sdio.txt"
+#define BCM43430_FIRMWARE_NAME		"brcm/brcmfmac43430-sdio.bin"
+#define BCM43430_NVRAM_NAME		"brcm/brcmfmac43430-sdio.txt"
 #define BCM4345_FIRMWARE_NAME		"brcm/brcmfmac4345-sdio.bin"
 #define BCM4345_NVRAM_NAME		"brcm/brcmfmac4345-sdio.txt"
 #define BCM4354_FIRMWARE_NAME		"brcm/brcmfmac4354-sdio.bin"
@@ -640,6 +642,8 @@ MODULE_FIRMWARE(BCM43362_FIRMWARE_NAME);
 MODULE_FIRMWARE(BCM43362_NVRAM_NAME);
 MODULE_FIRMWARE(BCM4339_FIRMWARE_NAME);
 MODULE_FIRMWARE(BCM4339_NVRAM_NAME);
+MODULE_FIRMWARE(BCM43430_FIRMWARE_NAME);
+MODULE_FIRMWARE(BCM43430_NVRAM_NAME);
 MODULE_FIRMWARE(BCM4345_FIRMWARE_NAME);
 MODULE_FIRMWARE(BCM4345_NVRAM_NAME);
 MODULE_FIRMWARE(BCM4354_FIRMWARE_NAME);
@@ -671,6 +675,7 @@ static const struct brcmf_firmware_names brcmf_fwname_data[] = {
 	{ BRCM_CC_4335_CHIP_ID, 0xFFFFFFFF, BRCMF_FIRMWARE_NVRAM(BCM4335) },
 	{ BRCM_CC_43362_CHIP_ID, 0xFFFFFFFE, BRCMF_FIRMWARE_NVRAM(BCM43362) },
 	{ BRCM_CC_4339_CHIP_ID, 0xFFFFFFFF, BRCMF_FIRMWARE_NVRAM(BCM4339) },
+	{ BRCM_CC_43430_CHIP_ID, 0xFFFFFFFF, BRCMF_FIRMWARE_NVRAM(BCM43430) },
 	{ BRCM_CC_4345_CHIP_ID, 0xFFFFFFFF, BRCMF_FIRMWARE_NVRAM(BCM4345) },
 	{ BRCM_CC_4354_CHIP_ID, 0xFFFFFFFF, BRCMF_FIRMWARE_NVRAM(BCM4354) }
 };
diff --git a/drivers/net/wireless/brcm80211/include/brcm_hw_ids.h b/drivers/net/wireless/brcm80211/include/brcm_hw_ids.h
index b599e7e41148..4efdd51af9c8 100644
--- a/drivers/net/wireless/brcm80211/include/brcm_hw_ids.h
+++ b/drivers/net/wireless/brcm80211/include/brcm_hw_ids.h
@@ -37,6 +37,7 @@
 #define BRCM_CC_43362_CHIP_ID		43362
 #define BRCM_CC_4335_CHIP_ID		0x4335
 #define BRCM_CC_4339_CHIP_ID		0x4339
+#define BRCM_CC_43430_CHIP_ID		43430
 #define BRCM_CC_4345_CHIP_ID		0x4345
 #define BRCM_CC_4354_CHIP_ID		0x4354
 #define BRCM_CC_4356_CHIP_ID		0x4356
diff --git a/drivers/net/wireless/brcm80211/include/chipcommon.h b/drivers/net/wireless/brcm80211/include/chipcommon.h
index d242333b7559..e1fd499930a0 100644
--- a/drivers/net/wireless/brcm80211/include/chipcommon.h
+++ b/drivers/net/wireless/brcm80211/include/chipcommon.h
@@ -183,7 +183,14 @@ struct chipcregs {
 	u8 uart1lsr;
 	u8 uart1msr;
 	u8 uart1scratch;
-	u32 PAD[126];
+	u32 PAD[62];
+
+	/* save/restore, corerev >= 48 */
+	u32 sr_capability;          /* 0x500 */
+	u32 sr_control0;            /* 0x504 */
+	u32 sr_control1;            /* 0x508 */
+	u32 gpio_control;           /* 0x50C */
+	u32 PAD[60];
 
 	/* PMU registers (corerev >= 20) */
 	u32 pmucontrol;	/* 0x600 */
diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h
index 91397858dc95..83430f2ea757 100644
--- a/include/linux/mmc/sdio_ids.h
+++ b/include/linux/mmc/sdio_ids.h
@@ -33,6 +33,7 @@
 #define SDIO_DEVICE_ID_BROADCOM_43341		0xa94d
 #define SDIO_DEVICE_ID_BROADCOM_4335_4339	0x4335
 #define SDIO_DEVICE_ID_BROADCOM_43362		0xa962
+#define SDIO_DEVICE_ID_BROADCOM_43430		0xa9a6
 #define SDIO_DEVICE_ID_BROADCOM_4345		0x4345
 #define SDIO_DEVICE_ID_BROADCOM_4354		0x4354
 
-- 
cgit v1.2.3


From 52452c542559ac980b48dbf22a30ee7fa0af507c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 19 Mar 2015 19:04:19 -0700
Subject: inet: drop prev pointer handling in request sock

When request sock are put in ehash table, the whole notion
of having a previous request to update dl_next is pointless.

Also, following patch will get rid of big purge timer,
so we want to delete a request sock without holding listener lock.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet6_connection_sock.h |  1 -
 include/net/inet_connection_sock.h  | 11 ++++-------
 include/net/request_sock.h          | 15 +++++++++++----
 include/net/tcp.h                   |  3 +--
 net/dccp/dccp.h                     |  3 +--
 net/dccp/ipv4.c                     | 14 ++++++--------
 net/dccp/ipv6.c                     | 19 ++++++++-----------
 net/dccp/minisocks.c                |  7 +++----
 net/ipv4/inet_connection_sock.c     | 22 ++++++++++++----------
 net/ipv4/tcp_input.c                |  2 +-
 net/ipv4/tcp_ipv4.c                 | 17 ++++++++---------
 net/ipv4/tcp_minisocks.c            |  5 ++---
 net/ipv6/inet6_connection_sock.c    | 10 ++++------
 net/ipv6/tcp_ipv6.c                 | 12 ++++++------
 14 files changed, 67 insertions(+), 74 deletions(-)

(limited to 'include')

diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h
index 74af137304be..15bd40878d2a 100644
--- a/include/net/inet6_connection_sock.h
+++ b/include/net/inet6_connection_sock.h
@@ -29,7 +29,6 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, struct flowi6 *fl6,
 				      const struct request_sock *req);
 
 struct request_sock *inet6_csk_search_req(const struct sock *sk,
-					  struct request_sock ***prevp,
 					  const __be16 rport,
 					  const struct in6_addr *raddr,
 					  const struct in6_addr *laddr,
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index b9a6b0a94cc6..423a46106e57 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -257,7 +257,6 @@ inet_csk_rto_backoff(const struct inet_connection_sock *icsk,
 struct sock *inet_csk_accept(struct sock *sk, int flags, int *err);
 
 struct request_sock *inet_csk_search_req(const struct sock *sk,
-					 struct request_sock ***prevp,
 					 const __be16 rport,
 					 const __be32 raddr,
 					 const __be32 laddr);
@@ -310,17 +309,15 @@ static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk)
 }
 
 static inline void inet_csk_reqsk_queue_unlink(struct sock *sk,
-					       struct request_sock *req,
-					       struct request_sock **prev)
+					       struct request_sock *req)
 {
-	reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req, prev);
+	reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req);
 }
 
 static inline void inet_csk_reqsk_queue_drop(struct sock *sk,
-					     struct request_sock *req,
-					     struct request_sock **prev)
+					     struct request_sock *req)
 {
-	inet_csk_reqsk_queue_unlink(sk, req, prev);
+	inet_csk_reqsk_queue_unlink(sk, req);
 	inet_csk_reqsk_queue_removed(sk, req);
 	reqsk_free(req);
 }
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index e7ef86340514..65223905d139 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -50,6 +50,7 @@ int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req);
 struct request_sock {
 	struct sock_common		__req_common;
 #define rsk_refcnt			__req_common.skc_refcnt
+#define rsk_hash			__req_common.skc_hash
 
 	struct request_sock		*dl_next;
 	struct sock			*rsk_listener;
@@ -216,11 +217,16 @@ static inline int reqsk_queue_empty(struct request_sock_queue *queue)
 }
 
 static inline void reqsk_queue_unlink(struct request_sock_queue *queue,
-				      struct request_sock *req,
-				      struct request_sock **prev_req)
+				      struct request_sock *req)
 {
+	struct listen_sock *lopt = queue->listen_opt;
+	struct request_sock **prev;
+
 	write_lock(&queue->syn_wait_lock);
-	*prev_req = req->dl_next;
+	prev = &lopt->syn_table[req->rsk_hash];
+	while (*prev != req)
+		prev = &(*prev)->dl_next;
+	*prev = req->dl_next;
 	write_unlock(&queue->syn_wait_lock);
 }
 
@@ -300,7 +306,6 @@ static inline void reqsk_queue_hash_req(struct request_sock_queue *queue,
 	req->num_retrans = 0;
 	req->num_timeout = 0;
 	req->sk = NULL;
-	req->dl_next = lopt->syn_table[hash];
 
 	/* before letting lookups find us, make sure all req fields
 	 * are committed to memory and refcnt initialized.
@@ -308,7 +313,9 @@ static inline void reqsk_queue_hash_req(struct request_sock_queue *queue,
 	smp_wmb();
 	atomic_set(&req->rsk_refcnt, 1);
 
+	req->rsk_hash = hash;
 	write_lock(&queue->syn_wait_lock);
+	req->dl_next = lopt->syn_table[hash];
 	lopt->syn_table[hash] = req;
 	write_unlock(&queue->syn_wait_lock);
 }
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 5b29835b81d8..082fd79132b7 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -406,8 +406,7 @@ enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
 					      struct sk_buff *skb,
 					      const struct tcphdr *th);
 struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
-			   struct request_sock *req, struct request_sock **prev,
-			   bool fastopen);
+			   struct request_sock *req, bool fastopen);
 int tcp_child_process(struct sock *parent, struct sock *child,
 		      struct sk_buff *skb);
 void tcp_enter_loss(struct sock *sk);
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 3b1d64d6e093..2396f50c5b04 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -280,8 +280,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
 				       struct request_sock *req,
 				       struct dst_entry *dst);
 struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
-			    struct request_sock *req,
-			    struct request_sock **prev);
+			    struct request_sock *req);
 
 int dccp_child_process(struct sock *parent, struct sock *child,
 		       struct sk_buff *skb);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index e7ad291cd96b..5bffbbaf1fac 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -288,11 +288,11 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
 	}
 
 	switch (sk->sk_state) {
-		struct request_sock *req , **prev;
+		struct request_sock *req;
 	case DCCP_LISTEN:
 		if (sock_owned_by_user(sk))
 			goto out;
-		req = inet_csk_search_req(sk, &prev, dh->dccph_dport,
+		req = inet_csk_search_req(sk, dh->dccph_dport,
 					  iph->daddr, iph->saddr);
 		if (!req)
 			goto out;
@@ -314,7 +314,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
 		 * created socket, and POSIX does not want network
 		 * errors returned from accept().
 		 */
-		inet_csk_reqsk_queue_drop(sk, req, prev);
+		inet_csk_reqsk_queue_drop(sk, req);
 		goto out;
 
 	case DCCP_REQUESTING:
@@ -448,13 +448,11 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 	const struct dccp_hdr *dh = dccp_hdr(skb);
 	const struct iphdr *iph = ip_hdr(skb);
 	struct sock *nsk;
-	struct request_sock **prev;
 	/* Find possible connection requests. */
-	struct request_sock *req = inet_csk_search_req(sk, &prev,
-						       dh->dccph_sport,
+	struct request_sock *req = inet_csk_search_req(sk, dh->dccph_sport,
 						       iph->saddr, iph->daddr);
-	if (req != NULL)
-		return dccp_check_req(sk, skb, req, prev);
+	if (req)
+		return dccp_check_req(sk, skb, req);
 
 	nsk = inet_lookup_established(sock_net(sk), &dccp_hashinfo,
 				      iph->saddr, dh->dccph_sport,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index c655de5f67c9..ae2184039fe3 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -149,12 +149,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 	/* Might be for an request_sock */
 	switch (sk->sk_state) {
-		struct request_sock *req, **prev;
+		struct request_sock *req;
 	case DCCP_LISTEN:
 		if (sock_owned_by_user(sk))
 			goto out;
 
-		req = inet6_csk_search_req(sk, &prev, dh->dccph_dport,
+		req = inet6_csk_search_req(sk, dh->dccph_dport,
 					   &hdr->daddr, &hdr->saddr,
 					   inet6_iif(skb));
 		if (req == NULL)
@@ -172,7 +172,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			goto out;
 		}
 
-		inet_csk_reqsk_queue_drop(sk, req, prev);
+		inet_csk_reqsk_queue_drop(sk, req);
 		goto out;
 
 	case DCCP_REQUESTING:
@@ -317,16 +317,13 @@ static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
 {
 	const struct dccp_hdr *dh = dccp_hdr(skb);
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
+	struct request_sock *req;
 	struct sock *nsk;
-	struct request_sock **prev;
-	/* Find possible connection requests. */
-	struct request_sock *req = inet6_csk_search_req(sk, &prev,
-							dh->dccph_sport,
-							&iph->saddr,
-							&iph->daddr,
-							inet6_iif(skb));
+
+	req = inet6_csk_search_req(sk, dh->dccph_sport, &iph->saddr,
+				   &iph->daddr, inet6_iif(skb));
 	if (req != NULL)
-		return dccp_check_req(sk, skb, req, prev);
+		return dccp_check_req(sk, skb, req);
 
 	nsk = __inet6_lookup_established(sock_net(sk), &dccp_hashinfo,
 					 &iph->saddr, dh->dccph_sport,
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index b50dc436db1f..332f7d6d9942 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -152,8 +152,7 @@ EXPORT_SYMBOL_GPL(dccp_create_openreq_child);
  * as an request_sock.
  */
 struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
-			    struct request_sock *req,
-			    struct request_sock **prev)
+			    struct request_sock *req)
 {
 	struct sock *child = NULL;
 	struct dccp_request_sock *dreq = dccp_rsk(req);
@@ -200,7 +199,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
 	if (child == NULL)
 		goto listen_overflow;
 
-	inet_csk_reqsk_queue_unlink(sk, req, prev);
+	inet_csk_reqsk_queue_unlink(sk, req);
 	inet_csk_reqsk_queue_removed(sk, req);
 	inet_csk_reqsk_queue_add(sk, req, child);
 out:
@@ -212,7 +211,7 @@ drop:
 	if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET)
 		req->rsk_ops->send_reset(sk, skb);
 
-	inet_csk_reqsk_queue_drop(sk, req, prev);
+	inet_csk_reqsk_queue_drop(sk, req);
 	goto out;
 }
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index f0f91858aecf..4f57a017928c 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -480,18 +480,17 @@ static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport,
 #endif
 
 struct request_sock *inet_csk_search_req(const struct sock *sk,
-					 struct request_sock ***prevp,
 					 const __be16 rport, const __be32 raddr,
 					 const __be32 laddr)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
-	struct request_sock *req, **prev;
+	struct request_sock *req;
 
-	for (prev = &lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd,
-						    lopt->nr_table_entries)];
-	     (req = *prev) != NULL;
-	     prev = &req->dl_next) {
+	for (req = lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd,
+						  lopt->nr_table_entries)];
+	     req != NULL;
+	     req = req->dl_next) {
 		const struct inet_request_sock *ireq = inet_rsk(req);
 
 		if (ireq->ir_rmt_port == rport &&
@@ -499,7 +498,6 @@ struct request_sock *inet_csk_search_req(const struct sock *sk,
 		    ireq->ir_loc_addr == laddr &&
 		    AF_INET_FAMILY(req->rsk_ops->family)) {
 			WARN_ON(req->sk);
-			*prevp = prev;
 			break;
 		}
 	}
@@ -610,7 +608,10 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
 	i = lopt->clock_hand;
 
 	do {
-		reqp=&lopt->syn_table[i];
+		reqp = &lopt->syn_table[i];
+		if (!*reqp)
+			goto next_bucket;
+		write_lock(&queue->syn_wait_lock);
 		while ((req = *reqp) != NULL) {
 			if (time_after_eq(now, req->expires)) {
 				int expire = 0, resend = 0;
@@ -635,14 +636,15 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
 				}
 
 				/* Drop this request */
-				inet_csk_reqsk_queue_unlink(parent, req, reqp);
+				*reqp = req->dl_next;
 				reqsk_queue_removed(queue, req);
 				reqsk_put(req);
 				continue;
 			}
 			reqp = &req->dl_next;
 		}
-
+		write_unlock(&queue->syn_wait_lock);
+next_bucket:
 		i = (i + 1) & (lopt->nr_table_entries - 1);
 
 	} while (--budget > 0);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1dfbaee3554e..95caea707f54 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5694,7 +5694,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
 		    sk->sk_state != TCP_FIN_WAIT1);
 
-		if (tcp_check_req(sk, skb, req, NULL, true) == NULL)
+		if (tcp_check_req(sk, skb, req, true) == NULL)
 			goto discard;
 	}
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ddd0b1f25b96..19c3770f1e97 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -458,12 +458,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 	}
 
 	switch (sk->sk_state) {
-		struct request_sock *req, **prev;
+		struct request_sock *req;
 	case TCP_LISTEN:
 		if (sock_owned_by_user(sk))
 			goto out;
 
-		req = inet_csk_search_req(sk, &prev, th->dest,
+		req = inet_csk_search_req(sk, th->dest,
 					  iph->daddr, iph->saddr);
 		if (!req)
 			goto out;
@@ -484,7 +484,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 		 * created socket, and POSIX does not want network
 		 * errors returned from accept().
 		 */
-		inet_csk_reqsk_queue_drop(sk, req, prev);
+		inet_csk_reqsk_queue_drop(sk, req);
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
 		goto out;
 
@@ -1392,15 +1392,14 @@ EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
 
 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 {
-	struct tcphdr *th = tcp_hdr(skb);
+	const struct tcphdr *th = tcp_hdr(skb);
 	const struct iphdr *iph = ip_hdr(skb);
+	struct request_sock *req;
 	struct sock *nsk;
-	struct request_sock **prev;
-	/* Find possible connection requests. */
-	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
-						       iph->saddr, iph->daddr);
+
+	req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr);
 	if (req)
-		return tcp_check_req(sk, skb, req, prev, false);
+		return tcp_check_req(sk, skb, req, false);
 
 	nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
 			th->source, iph->daddr, th->dest, inet_iif(skb));
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index dd11ac7798c6..848bcab358e4 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -572,7 +572,6 @@ EXPORT_SYMBOL(tcp_create_openreq_child);
 
 struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 			   struct request_sock *req,
-			   struct request_sock **prev,
 			   bool fastopen)
 {
 	struct tcp_options_received tmp_opt;
@@ -766,7 +765,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 	if (child == NULL)
 		goto listen_overflow;
 
-	inet_csk_reqsk_queue_unlink(sk, req, prev);
+	inet_csk_reqsk_queue_unlink(sk, req);
 	inet_csk_reqsk_queue_removed(sk, req);
 
 	inet_csk_reqsk_queue_add(sk, req, child);
@@ -791,7 +790,7 @@ embryonic_reset:
 		tcp_reset(sk);
 	}
 	if (!fastopen) {
-		inet_csk_reqsk_queue_drop(sk, req, prev);
+		inet_csk_reqsk_queue_drop(sk, req);
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
 	}
 	return NULL;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 29b32206e494..b7acb9ebc4f5 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -113,7 +113,6 @@ static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport,
 }
 
 struct request_sock *inet6_csk_search_req(const struct sock *sk,
-					  struct request_sock ***prevp,
 					  const __be16 rport,
 					  const struct in6_addr *raddr,
 					  const struct in6_addr *laddr,
@@ -121,13 +120,13 @@ struct request_sock *inet6_csk_search_req(const struct sock *sk,
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
-	struct request_sock *req, **prev;
+	struct request_sock *req;
 
-	for (prev = &lopt->syn_table[inet6_synq_hash(raddr, rport,
+	for (req = lopt->syn_table[inet6_synq_hash(raddr, rport,
 						     lopt->hash_rnd,
 						     lopt->nr_table_entries)];
-	     (req = *prev) != NULL;
-	     prev = &req->dl_next) {
+	     req != NULL;
+	     req = req->dl_next) {
 		const struct inet_request_sock *ireq = inet_rsk(req);
 
 		if (ireq->ir_rmt_port == rport &&
@@ -136,7 +135,6 @@ struct request_sock *inet6_csk_search_req(const struct sock *sk,
 		    ipv6_addr_equal(&ireq->ir_v6_loc_addr, laddr) &&
 		    (!ireq->ir_iif || ireq->ir_iif == iif)) {
 			WARN_ON(req->sk != NULL);
-			*prevp = prev;
 			return req;
 		}
 	}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 720676d073d9..146f123b52c9 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -403,13 +403,13 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 	/* Might be for an request_sock */
 	switch (sk->sk_state) {
-		struct request_sock *req, **prev;
+		struct request_sock *req;
 	case TCP_LISTEN:
 		if (sock_owned_by_user(sk))
 			goto out;
 
 		/* Note : We use inet6_iif() here, not tcp_v6_iif() */
-		req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
+		req = inet6_csk_search_req(sk, th->dest, &hdr->daddr,
 					   &hdr->saddr, inet6_iif(skb));
 		if (!req)
 			goto out;
@@ -424,7 +424,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			goto out;
 		}
 
-		inet_csk_reqsk_queue_drop(sk, req, prev);
+		inet_csk_reqsk_queue_drop(sk, req);
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
 		goto out;
 
@@ -980,16 +980,16 @@ static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 
 static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb)
 {
-	struct request_sock *req, **prev;
 	const struct tcphdr *th = tcp_hdr(skb);
+	struct request_sock *req;
 	struct sock *nsk;
 
 	/* Find possible connection requests. */
-	req = inet6_csk_search_req(sk, &prev, th->source,
+	req = inet6_csk_search_req(sk, th->source,
 				   &ipv6_hdr(skb)->saddr,
 				   &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb));
 	if (req)
-		return tcp_check_req(sk, skb, req, prev, false);
+		return tcp_check_req(sk, skb, req, false);
 
 	nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
 					 &ipv6_hdr(skb)->saddr, th->source,
-- 
cgit v1.2.3


From fa76ce7328b289b6edd476e24eb52fd634261720 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 19 Mar 2015 19:04:20 -0700
Subject: inet: get rid of central tcp/dccp listener timer

One of the major issue for TCP is the SYNACK rtx handling,
done by inet_csk_reqsk_queue_prune(), fired by the keepalive
timer of a TCP_LISTEN socket.

This function runs for awful long times, with socket lock held,
meaning that other cpus needing this lock have to spin for hundred of ms.

SYNACK are sent in huge bursts, likely to cause severe drops anyway.

This model was OK 15 years ago when memory was very tight.

We now can afford to have a timer per request sock.

Timer invocations no longer need to lock the listener,
and can be run from all cpus in parallel.

With following patch increasing somaxconn width to 32 bits,
I tested a listener with more than 4 million active request sockets,
and a steady SYNFLOOD of ~200,000 SYN per second.
Host was sending ~830,000 SYNACK per second.

This is ~100 times more what we could achieve before this patch.

Later, we will get rid of the listener hash and use ehash instead.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet6_connection_sock.h |   2 +-
 include/net/inet_connection_sock.h  |  15 ++--
 include/net/request_sock.h          |  87 +++++++++++-----------
 net/core/request_sock.c             |  13 ++--
 net/core/sock.c                     |   2 +-
 net/dccp/ipv4.c                     |  10 ++-
 net/dccp/ipv6.c                     |  12 ++--
 net/dccp/timer.c                    |  24 +------
 net/ipv4/inet_connection_sock.c     | 139 ++++++++++++++++++------------------
 net/ipv4/inet_diag.c                |   4 +-
 net/ipv4/syncookies.c               |   1 -
 net/ipv4/tcp_fastopen.c             |   2 +-
 net/ipv4/tcp_ipv4.c                 |  11 ++-
 net/ipv4/tcp_minisocks.c            |   5 +-
 net/ipv4/tcp_timer.c                |  12 +---
 net/ipv6/inet6_connection_sock.c    |  19 ++---
 net/ipv6/syncookies.c               |   1 -
 net/ipv6/tcp_ipv6.c                 |  12 ++--
 18 files changed, 173 insertions(+), 198 deletions(-)

(limited to 'include')

diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h
index 15bd40878d2a..6d539e4e5ba7 100644
--- a/include/net/inet6_connection_sock.h
+++ b/include/net/inet6_connection_sock.h
@@ -28,7 +28,7 @@ int inet6_csk_bind_conflict(const struct sock *sk,
 struct dst_entry *inet6_csk_route_req(struct sock *sk, struct flowi6 *fl6,
 				      const struct request_sock *req);
 
-struct request_sock *inet6_csk_search_req(const struct sock *sk,
+struct request_sock *inet6_csk_search_req(struct sock *sk,
 					  const __be16 rport,
 					  const struct in6_addr *raddr,
 					  const struct in6_addr *laddr,
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 423a46106e57..7b5887cd1172 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -256,7 +256,7 @@ inet_csk_rto_backoff(const struct inet_connection_sock *icsk,
 
 struct sock *inet_csk_accept(struct sock *sk, int flags, int *err);
 
-struct request_sock *inet_csk_search_req(const struct sock *sk,
+struct request_sock *inet_csk_search_req(struct sock *sk,
 					 const __be16 rport,
 					 const __be32 raddr,
 					 const __be32 laddr);
@@ -282,15 +282,13 @@ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
 static inline void inet_csk_reqsk_queue_removed(struct sock *sk,
 						struct request_sock *req)
 {
-	if (reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req) == 0)
-		inet_csk_delete_keepalive_timer(sk);
+	reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
 }
 
 static inline void inet_csk_reqsk_queue_added(struct sock *sk,
 					      const unsigned long timeout)
 {
-	if (reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue) == 0)
-		inet_csk_reset_keepalive_timer(sk, timeout);
+	reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue);
 }
 
 static inline int inet_csk_reqsk_queue_len(const struct sock *sk)
@@ -319,14 +317,9 @@ static inline void inet_csk_reqsk_queue_drop(struct sock *sk,
 {
 	inet_csk_reqsk_queue_unlink(sk, req);
 	inet_csk_reqsk_queue_removed(sk, req);
-	reqsk_free(req);
+	reqsk_put(req);
 }
 
-void inet_csk_reqsk_queue_prune(struct sock *parent,
-				const unsigned long interval,
-				const unsigned long timeout,
-				const unsigned long max_rto);
-
 void inet_csk_destroy_sock(struct sock *sk);
 void inet_csk_prepare_forced_close(struct sock *sk);
 
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 65223905d139..6a91261d9b7b 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -62,7 +62,7 @@ struct request_sock {
 	u32				window_clamp; /* window clamp at creation time */
 	u32				rcv_wnd;	  /* rcv_wnd offered first time */
 	u32				ts_recent;
-	unsigned long			expires;
+	struct timer_list		rsk_timer;
 	const struct request_sock_ops	*rsk_ops;
 	struct sock			*sk;
 	u32				secid;
@@ -110,9 +110,6 @@ static inline void reqsk_free(struct request_sock *req)
 
 static inline void reqsk_put(struct request_sock *req)
 {
-	/* temporary debugging, until req sock are put into ehash table */
-	WARN_ON_ONCE(atomic_read(&req->rsk_refcnt) != 1);
-
 	if (atomic_dec_and_test(&req->rsk_refcnt))
 		reqsk_free(req);
 }
@@ -124,12 +121,16 @@ extern int sysctl_max_syn_backlog;
  * @max_qlen_log - log_2 of maximal queued SYNs/REQUESTs
  */
 struct listen_sock {
-	u8			max_qlen_log;
+	int			qlen_inc; /* protected by listener lock */
+	int			young_inc;/* protected by listener lock */
+
+	/* following fields can be updated by timer */
+	atomic_t		qlen_dec; /* qlen = qlen_inc - qlen_dec */
+	atomic_t		young_dec;
+
+	u8			max_qlen_log ____cacheline_aligned_in_smp;
 	u8			synflood_warned;
 	/* 2 bytes hole, try to use */
-	int			qlen;
-	int			qlen_young;
-	int			clock_hand;
 	u32			hash_rnd;
 	u32			nr_table_entries;
 	struct request_sock	*syn_table[0];
@@ -182,9 +183,7 @@ struct fastopen_queue {
 struct request_sock_queue {
 	struct request_sock	*rskq_accept_head;
 	struct request_sock	*rskq_accept_tail;
-	rwlock_t		syn_wait_lock;
 	u8			rskq_defer_accept;
-	/* 3 bytes hole, try to pack */
 	struct listen_sock	*listen_opt;
 	struct fastopen_queue	*fastopenq; /* This is non-NULL iff TFO has been
 					     * enabled on this listener. Check
@@ -192,6 +191,9 @@ struct request_sock_queue {
 					     * to determine if TFO is enabled
 					     * right at this moment.
 					     */
+
+	/* temporary alignment, our goal is to get rid of this lock */
+	rwlock_t		syn_wait_lock ____cacheline_aligned_in_smp;
 };
 
 int reqsk_queue_alloc(struct request_sock_queue *queue,
@@ -223,11 +225,15 @@ static inline void reqsk_queue_unlink(struct request_sock_queue *queue,
 	struct request_sock **prev;
 
 	write_lock(&queue->syn_wait_lock);
+
 	prev = &lopt->syn_table[req->rsk_hash];
 	while (*prev != req)
 		prev = &(*prev)->dl_next;
 	*prev = req->dl_next;
+
 	write_unlock(&queue->syn_wait_lock);
+	if (del_timer(&req->rsk_timer))
+		reqsk_put(req);
 }
 
 static inline void reqsk_queue_add(struct request_sock_queue *queue,
@@ -260,64 +266,53 @@ static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue
 	return req;
 }
 
-static inline int reqsk_queue_removed(struct request_sock_queue *queue,
-				      struct request_sock *req)
+static inline void reqsk_queue_removed(struct request_sock_queue *queue,
+				       const struct request_sock *req)
 {
 	struct listen_sock *lopt = queue->listen_opt;
 
 	if (req->num_timeout == 0)
-		--lopt->qlen_young;
-
-	return --lopt->qlen;
+		atomic_inc(&lopt->young_dec);
+	atomic_inc(&lopt->qlen_dec);
 }
 
-static inline int reqsk_queue_added(struct request_sock_queue *queue)
+static inline void reqsk_queue_added(struct request_sock_queue *queue)
 {
 	struct listen_sock *lopt = queue->listen_opt;
-	const int prev_qlen = lopt->qlen;
 
-	lopt->qlen_young++;
-	lopt->qlen++;
-	return prev_qlen;
+	lopt->young_inc++;
+	lopt->qlen_inc++;
 }
 
-static inline int reqsk_queue_len(const struct request_sock_queue *queue)
+static inline int listen_sock_qlen(const struct listen_sock *lopt)
 {
-	return queue->listen_opt != NULL ? queue->listen_opt->qlen : 0;
+	return lopt->qlen_inc - atomic_read(&lopt->qlen_dec);
 }
 
-static inline int reqsk_queue_len_young(const struct request_sock_queue *queue)
+static inline int listen_sock_young(const struct listen_sock *lopt)
 {
-	return queue->listen_opt->qlen_young;
+	return lopt->young_inc - atomic_read(&lopt->young_dec);
 }
 
-static inline int reqsk_queue_is_full(const struct request_sock_queue *queue)
+static inline int reqsk_queue_len(const struct request_sock_queue *queue)
 {
-	return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log;
+	const struct listen_sock *lopt = queue->listen_opt;
+
+	return lopt ? listen_sock_qlen(lopt) : 0;
 }
 
-static inline void reqsk_queue_hash_req(struct request_sock_queue *queue,
-					u32 hash, struct request_sock *req,
-					unsigned long timeout)
+static inline int reqsk_queue_len_young(const struct request_sock_queue *queue)
 {
-	struct listen_sock *lopt = queue->listen_opt;
-
-	req->expires = jiffies + timeout;
-	req->num_retrans = 0;
-	req->num_timeout = 0;
-	req->sk = NULL;
-
-	/* before letting lookups find us, make sure all req fields
-	 * are committed to memory and refcnt initialized.
-	 */
-	smp_wmb();
-	atomic_set(&req->rsk_refcnt, 1);
+	return listen_sock_young(queue->listen_opt);
+}
 
-	req->rsk_hash = hash;
-	write_lock(&queue->syn_wait_lock);
-	req->dl_next = lopt->syn_table[hash];
-	lopt->syn_table[hash] = req;
-	write_unlock(&queue->syn_wait_lock);
+static inline int reqsk_queue_is_full(const struct request_sock_queue *queue)
+{
+	return reqsk_queue_len(queue) >> queue->listen_opt->max_qlen_log;
 }
 
+void reqsk_queue_hash_req(struct request_sock_queue *queue,
+			  u32 hash, struct request_sock *req,
+			  unsigned long timeout);
+
 #endif /* _REQUEST_SOCK_H */
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index cc39a2aa663a..cdc0ddd9ac9f 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -94,21 +94,26 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
 	/* make all the listen_opt local to us */
 	struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue);
 
-	if (lopt->qlen != 0) {
+	if (listen_sock_qlen(lopt) != 0) {
 		unsigned int i;
 
 		for (i = 0; i < lopt->nr_table_entries; i++) {
 			struct request_sock *req;
 
+			write_lock_bh(&queue->syn_wait_lock);
 			while ((req = lopt->syn_table[i]) != NULL) {
 				lopt->syn_table[i] = req->dl_next;
-				lopt->qlen--;
+				atomic_inc(&lopt->qlen_dec);
+				if (del_timer(&req->rsk_timer))
+					reqsk_put(req);
 				reqsk_put(req);
 			}
+			write_unlock_bh(&queue->syn_wait_lock);
 		}
 	}
 
-	WARN_ON(lopt->qlen != 0);
+	if (WARN_ON(listen_sock_qlen(lopt) != 0))
+		pr_err("qlen %u\n", listen_sock_qlen(lopt));
 	kvfree(lopt);
 }
 
@@ -187,7 +192,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
 	 *
 	 * For more details see CoNext'11 "TCP Fast Open" paper.
 	 */
-	req->expires = jiffies + 60*HZ;
+	req->rsk_timer.expires = jiffies + 60*HZ;
 	if (fastopenq->rskq_rst_head == NULL)
 		fastopenq->rskq_rst_head = req;
 	else
diff --git a/net/core/sock.c b/net/core/sock.c
index d9f9e4825362..744a04ddb61c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2739,7 +2739,7 @@ static int req_prot_init(const struct proto *prot)
 
 	rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,
 					   rsk_prot->obj_size, 0,
-					   SLAB_HWCACHE_ALIGN, NULL);
+					   0, NULL);
 
 	if (!rsk_prot->slab) {
 		pr_crit("%s: Can't create request sock SLAB cache!\n",
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 5bffbbaf1fac..25a9615b3b88 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -306,6 +306,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
 		if (!between48(seq, dccp_rsk(req)->dreq_iss,
 				    dccp_rsk(req)->dreq_gss)) {
 			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
+			reqsk_put(req);
 			goto out;
 		}
 		/*
@@ -315,6 +316,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
 		 * errors returned from accept().
 		 */
 		inet_csk_reqsk_queue_drop(sk, req);
+		reqsk_put(req);
 		goto out;
 
 	case DCCP_REQUESTING:
@@ -451,9 +453,11 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 	/* Find possible connection requests. */
 	struct request_sock *req = inet_csk_search_req(sk, dh->dccph_sport,
 						       iph->saddr, iph->daddr);
-	if (req)
-		return dccp_check_req(sk, skb, req);
-
+	if (req) {
+		nsk = dccp_check_req(sk, skb, req);
+		reqsk_put(req);
+		return nsk;
+	}
 	nsk = inet_lookup_established(sock_net(sk), &dccp_hashinfo,
 				      iph->saddr, dh->dccph_sport,
 				      iph->daddr, dh->dccph_dport,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index ae2184039fe3..69d8f13895ba 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -157,7 +157,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		req = inet6_csk_search_req(sk, dh->dccph_dport,
 					   &hdr->daddr, &hdr->saddr,
 					   inet6_iif(skb));
-		if (req == NULL)
+		if (!req)
 			goto out;
 
 		/*
@@ -169,10 +169,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		if (!between48(seq, dccp_rsk(req)->dreq_iss,
 				    dccp_rsk(req)->dreq_gss)) {
 			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
+			reqsk_put(req);
 			goto out;
 		}
 
 		inet_csk_reqsk_queue_drop(sk, req);
+		reqsk_put(req);
 		goto out;
 
 	case DCCP_REQUESTING:
@@ -322,9 +324,11 @@ static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
 
 	req = inet6_csk_search_req(sk, dh->dccph_sport, &iph->saddr,
 				   &iph->daddr, inet6_iif(skb));
-	if (req != NULL)
-		return dccp_check_req(sk, skb, req);
-
+	if (req) {
+		nsk = dccp_check_req(sk, skb, req);
+		reqsk_put(req);
+		return nsk;
+	}
 	nsk = __inet6_lookup_established(sock_net(sk), &dccp_hashinfo,
 					 &iph->saddr, dh->dccph_sport,
 					 &iph->daddr, ntohs(dh->dccph_dport),
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 1cd46a345cb0..3ef7acef3ce8 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -161,33 +161,11 @@ out:
 	sock_put(sk);
 }
 
-/*
- *	Timer for listening sockets
- */
-static void dccp_response_timer(struct sock *sk)
-{
-	inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, DCCP_TIMEOUT_INIT,
-				   DCCP_RTO_MAX);
-}
-
 static void dccp_keepalive_timer(unsigned long data)
 {
 	struct sock *sk = (struct sock *)data;
 
-	/* Only process if socket is not in use. */
-	bh_lock_sock(sk);
-	if (sock_owned_by_user(sk)) {
-		/* Try again later. */
-		inet_csk_reset_keepalive_timer(sk, HZ / 20);
-		goto out;
-	}
-
-	if (sk->sk_state == DCCP_LISTEN) {
-		dccp_response_timer(sk);
-		goto out;
-	}
-out:
-	bh_unlock_sock(sk);
+	pr_err("dccp should not use a keepalive timer !\n");
 	sock_put(sk);
 }
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 4f57a017928c..126a37a156cf 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -23,6 +23,7 @@
 #include <net/route.h>
 #include <net/tcp_states.h>
 #include <net/xfrm.h>
+#include <net/tcp.h>
 
 #ifdef INET_CSK_DEBUG
 const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
@@ -476,31 +477,37 @@ static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport,
 #if IS_ENABLED(CONFIG_IPV6)
 #define AF_INET_FAMILY(fam) ((fam) == AF_INET)
 #else
-#define AF_INET_FAMILY(fam) 1
+#define AF_INET_FAMILY(fam) true
 #endif
 
-struct request_sock *inet_csk_search_req(const struct sock *sk,
-					 const __be16 rport, const __be32 raddr,
+/* Note: this is temporary :
+ * req sock will no longer be in listener hash table
+*/
+struct request_sock *inet_csk_search_req(struct sock *sk,
+					 const __be16 rport,
+					 const __be32 raddr,
 					 const __be32 laddr)
 {
-	const struct inet_connection_sock *icsk = inet_csk(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
 	struct request_sock *req;
+	u32 hash = inet_synq_hash(raddr, rport, lopt->hash_rnd,
+				  lopt->nr_table_entries);
 
-	for (req = lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd,
-						  lopt->nr_table_entries)];
-	     req != NULL;
-	     req = req->dl_next) {
+	write_lock(&icsk->icsk_accept_queue.syn_wait_lock);
+	for (req = lopt->syn_table[hash]; req != NULL; req = req->dl_next) {
 		const struct inet_request_sock *ireq = inet_rsk(req);
 
 		if (ireq->ir_rmt_port == rport &&
 		    ireq->ir_rmt_addr == raddr &&
 		    ireq->ir_loc_addr == laddr &&
 		    AF_INET_FAMILY(req->rsk_ops->family)) {
+			atomic_inc(&req->rsk_refcnt);
 			WARN_ON(req->sk);
 			break;
 		}
 	}
+	write_unlock(&icsk->icsk_accept_queue.syn_wait_lock);
 
 	return req;
 }
@@ -556,23 +563,23 @@ int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req)
 }
 EXPORT_SYMBOL(inet_rtx_syn_ack);
 
-void inet_csk_reqsk_queue_prune(struct sock *parent,
-				const unsigned long interval,
-				const unsigned long timeout,
-				const unsigned long max_rto)
+static void reqsk_timer_handler(unsigned long data)
 {
-	struct inet_connection_sock *icsk = inet_csk(parent);
+	struct request_sock *req = (struct request_sock *)data;
+	struct sock *sk_listener = req->rsk_listener;
+	struct inet_connection_sock *icsk = inet_csk(sk_listener);
 	struct request_sock_queue *queue = &icsk->icsk_accept_queue;
 	struct listen_sock *lopt = queue->listen_opt;
-	int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
-	int thresh = max_retries;
-	unsigned long now = jiffies;
-	struct request_sock **reqp, *req;
-	int i, budget;
+	int expire = 0, resend = 0;
+	int max_retries, thresh;
 
-	if (lopt == NULL || lopt->qlen == 0)
+	if (sk_listener->sk_state != TCP_LISTEN || !lopt) {
+		reqsk_put(req);
 		return;
+	}
 
+	max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
+	thresh = max_retries;
 	/* Normally all the openreqs are young and become mature
 	 * (i.e. converted to established socket) for first timeout.
 	 * If synack was not acknowledged for 1 second, it means
@@ -590,71 +597,63 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
 	 * embrions; and abort old ones without pity, if old
 	 * ones are about to clog our table.
 	 */
-	if (lopt->qlen>>(lopt->max_qlen_log-1)) {
-		int young = (lopt->qlen_young<<1);
+	if (listen_sock_qlen(lopt) >> (lopt->max_qlen_log - 1)) {
+		int young = listen_sock_young(lopt) << 1;
 
 		while (thresh > 2) {
-			if (lopt->qlen < young)
+			if (listen_sock_qlen(lopt) < young)
 				break;
 			thresh--;
 			young <<= 1;
 		}
 	}
-
 	if (queue->rskq_defer_accept)
 		max_retries = queue->rskq_defer_accept;
+	syn_ack_recalc(req, thresh, max_retries, queue->rskq_defer_accept,
+		       &expire, &resend);
+	req->rsk_ops->syn_ack_timeout(sk_listener, req);
+	if (!expire &&
+	    (!resend ||
+	     !inet_rtx_syn_ack(sk_listener, req) ||
+	     inet_rsk(req)->acked)) {
+		unsigned long timeo;
+
+		if (req->num_timeout++ == 0)
+			atomic_inc(&lopt->young_dec);
+		timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
+		mod_timer_pinned(&req->rsk_timer, jiffies + timeo);
+		return;
+	}
+	inet_csk_reqsk_queue_drop(sk_listener, req);
+	reqsk_put(req);
+}
 
-	budget = 2 * (lopt->nr_table_entries / (timeout / interval));
-	i = lopt->clock_hand;
-
-	do {
-		reqp = &lopt->syn_table[i];
-		if (!*reqp)
-			goto next_bucket;
-		write_lock(&queue->syn_wait_lock);
-		while ((req = *reqp) != NULL) {
-			if (time_after_eq(now, req->expires)) {
-				int expire = 0, resend = 0;
-
-				syn_ack_recalc(req, thresh, max_retries,
-					       queue->rskq_defer_accept,
-					       &expire, &resend);
-				req->rsk_ops->syn_ack_timeout(parent, req);
-				if (!expire &&
-				    (!resend ||
-				     !inet_rtx_syn_ack(parent, req) ||
-				     inet_rsk(req)->acked)) {
-					unsigned long timeo;
-
-					if (req->num_timeout++ == 0)
-						lopt->qlen_young--;
-					timeo = min(timeout << req->num_timeout,
-						    max_rto);
-					req->expires = now + timeo;
-					reqp = &req->dl_next;
-					continue;
-				}
+void reqsk_queue_hash_req(struct request_sock_queue *queue,
+			  u32 hash, struct request_sock *req,
+			  unsigned long timeout)
+{
+	struct listen_sock *lopt = queue->listen_opt;
 
-				/* Drop this request */
-				*reqp = req->dl_next;
-				reqsk_queue_removed(queue, req);
-				reqsk_put(req);
-				continue;
-			}
-			reqp = &req->dl_next;
-		}
-		write_unlock(&queue->syn_wait_lock);
-next_bucket:
-		i = (i + 1) & (lopt->nr_table_entries - 1);
+	req->num_retrans = 0;
+	req->num_timeout = 0;
+	req->sk = NULL;
 
-	} while (--budget > 0);
+	/* before letting lookups find us, make sure all req fields
+	 * are committed to memory and refcnt initialized.
+	 */
+	smp_wmb();
+	atomic_set(&req->rsk_refcnt, 2);
+	setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req);
+	req->rsk_hash = hash;
 
-	lopt->clock_hand = i;
+	write_lock(&queue->syn_wait_lock);
+	req->dl_next = lopt->syn_table[hash];
+	lopt->syn_table[hash] = req;
+	write_unlock(&queue->syn_wait_lock);
 
-	if (lopt->qlen)
-		inet_csk_reset_keepalive_timer(parent, interval);
+	mod_timer_pinned(&req->rsk_timer, jiffies + timeout);
 }
-EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune);
+EXPORT_SYMBOL(reqsk_queue_hash_req);
 
 /**
  *	inet_csk_clone_lock - clone an inet socket, and lock its clone
@@ -790,8 +789,6 @@ void inet_csk_listen_stop(struct sock *sk)
 	struct request_sock *acc_req;
 	struct request_sock *req;
 
-	inet_csk_delete_keepalive_timer(sk);
-
 	/* make all the listen_opt local to us */
 	acc_req = reqsk_queue_yank_acceptq(queue);
 
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 74c39c9f3e11..34073bbe2700 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -285,7 +285,7 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb,
 	BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) !=
 		     offsetof(struct sock, sk_cookie));
 
-	tmo = inet_reqsk(sk)->expires - jiffies;
+	tmo = inet_reqsk(sk)->rsk_timer.expires - jiffies;
 	r->idiag_expires = (tmo >= 0) ? jiffies_to_msecs(tmo) : 0;
 	r->idiag_rqueue	= 0;
 	r->idiag_wqueue	= 0;
@@ -719,7 +719,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
 	read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
 
 	lopt = icsk->icsk_accept_queue.listen_opt;
-	if (!lopt || !lopt->qlen)
+	if (!lopt || !listen_sock_qlen(lopt))
 		goto out;
 
 	if (bc) {
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index ef01d8570358..805dc444741d 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -361,7 +361,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 		goto out;
 	}
 
-	req->expires	= 0UL;
 	req->num_retrans = 0;
 
 	/*
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 82e375a0cbcf..2eb887ec0ce3 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -240,7 +240,7 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
 		struct request_sock *req1;
 		spin_lock(&fastopenq->lock);
 		req1 = fastopenq->rskq_rst_head;
-		if ((req1 == NULL) || time_after(req1->expires, jiffies)) {
+		if (!req1 || time_after(req1->rsk_timer.expires, jiffies)) {
 			spin_unlock(&fastopenq->lock);
 			NET_INC_STATS_BH(sock_net(sk),
 					 LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 19c3770f1e97..5554b8f33d41 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -475,6 +475,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 
 		if (seq != tcp_rsk(req)->snt_isn) {
 			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
+			reqsk_put(req);
 			goto out;
 		}
 
@@ -486,6 +487,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 		 */
 		inet_csk_reqsk_queue_drop(sk, req);
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
+		reqsk_put(req);
 		goto out;
 
 	case TCP_SYN_SENT:
@@ -1398,8 +1400,11 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 	struct sock *nsk;
 
 	req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr);
-	if (req)
-		return tcp_check_req(sk, skb, req, false);
+	if (req) {
+		nsk = tcp_check_req(sk, skb, req, false);
+		reqsk_put(req);
+		return nsk;
+	}
 
 	nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
 			th->source, iph->daddr, th->dest, inet_iif(skb));
@@ -2208,7 +2213,7 @@ static void get_openreq4(const struct request_sock *req,
 			 struct seq_file *f, int i, kuid_t uid)
 {
 	const struct inet_request_sock *ireq = inet_rsk(req);
-	long delta = req->expires - jiffies;
+	long delta = req->rsk_timer.expires - jiffies;
 
 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
 		" %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 848bcab358e4..274e96fb369b 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -629,8 +629,9 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 					  &tcp_rsk(req)->last_oow_ack_time) &&
 
 		    !inet_rtx_syn_ack(sk, req))
-			req->expires = min(TCP_TIMEOUT_INIT << req->num_timeout,
-					   TCP_RTO_MAX) + jiffies;
+			mod_timer_pending(&req->rsk_timer, jiffies +
+				min(TCP_TIMEOUT_INIT << req->num_timeout,
+				    TCP_RTO_MAX));
 		return NULL;
 	}
 
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 15505936511d..3daa6b5d766d 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -539,16 +539,6 @@ static void tcp_write_timer(unsigned long data)
 	sock_put(sk);
 }
 
-/*
- *	Timer for listening sockets
- */
-
-static void tcp_synack_timer(struct sock *sk)
-{
-	inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL,
-				   TCP_TIMEOUT_INIT, TCP_RTO_MAX);
-}
-
 void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req)
 {
 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEOUTS);
@@ -583,7 +573,7 @@ static void tcp_keepalive_timer (unsigned long data)
 	}
 
 	if (sk->sk_state == TCP_LISTEN) {
-		tcp_synack_timer(sk);
+		pr_err("Hmm... keepalive on a LISTEN ???\n");
 		goto out;
 	}
 
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index b7acb9ebc4f5..2f3bbe569e8f 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -112,21 +112,20 @@ static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport,
 	return c & (synq_hsize - 1);
 }
 
-struct request_sock *inet6_csk_search_req(const struct sock *sk,
+struct request_sock *inet6_csk_search_req(struct sock *sk,
 					  const __be16 rport,
 					  const struct in6_addr *raddr,
 					  const struct in6_addr *laddr,
 					  const int iif)
 {
-	const struct inet_connection_sock *icsk = inet_csk(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
 	struct request_sock *req;
+	u32 hash = inet6_synq_hash(raddr, rport, lopt->hash_rnd,
+				   lopt->nr_table_entries);
 
-	for (req = lopt->syn_table[inet6_synq_hash(raddr, rport,
-						     lopt->hash_rnd,
-						     lopt->nr_table_entries)];
-	     req != NULL;
-	     req = req->dl_next) {
+	write_lock(&icsk->icsk_accept_queue.syn_wait_lock);
+	for (req = lopt->syn_table[hash]; req != NULL; req = req->dl_next) {
 		const struct inet_request_sock *ireq = inet_rsk(req);
 
 		if (ireq->ir_rmt_port == rport &&
@@ -134,12 +133,14 @@ struct request_sock *inet6_csk_search_req(const struct sock *sk,
 		    ipv6_addr_equal(&ireq->ir_v6_rmt_addr, raddr) &&
 		    ipv6_addr_equal(&ireq->ir_v6_loc_addr, laddr) &&
 		    (!ireq->ir_iif || ireq->ir_iif == iif)) {
+			atomic_inc(&req->rsk_refcnt);
 			WARN_ON(req->sk != NULL);
-			return req;
+			break;
 		}
 	}
+	write_unlock(&icsk->icsk_accept_queue.syn_wait_lock);
 
-	return NULL;
+	return req;
 }
 EXPORT_SYMBOL_GPL(inet6_csk_search_req);
 
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index da5823e5e5a7..2819137fc87d 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -222,7 +222,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 
 	ireq->ir_mark = inet_request_mark(sk, skb);
 
-	req->expires = 0UL;
 	req->num_retrans = 0;
 	ireq->snd_wscale	= tcp_opt.snd_wscale;
 	ireq->sack_ok		= tcp_opt.sack_ok;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 146f123b52c9..6e3f90db038c 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -421,11 +421,13 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 		if (seq != tcp_rsk(req)->snt_isn) {
 			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
+			reqsk_put(req);
 			goto out;
 		}
 
 		inet_csk_reqsk_queue_drop(sk, req);
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
+		reqsk_put(req);
 		goto out;
 
 	case TCP_SYN_SENT:
@@ -988,9 +990,11 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb)
 	req = inet6_csk_search_req(sk, th->source,
 				   &ipv6_hdr(skb)->saddr,
 				   &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb));
-	if (req)
-		return tcp_check_req(sk, skb, req, false);
-
+	if (req) {
+		nsk = tcp_check_req(sk, skb, req, false);
+		reqsk_put(req);
+		return nsk;
+	}
 	nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
 					 &ipv6_hdr(skb)->saddr, th->source,
 					 &ipv6_hdr(skb)->daddr, ntohs(th->dest),
@@ -1670,7 +1674,7 @@ static void tcp_v6_destroy_sock(struct sock *sk)
 static void get_openreq6(struct seq_file *seq,
 			 struct request_sock *req, int i, kuid_t uid)
 {
-	int ttd = req->expires - jiffies;
+	long ttd = req->rsk_timer.expires - jiffies;
 	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
 	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
 
-- 
cgit v1.2.3


From becb74f0acca19b5abfcb24dc602530f3deea66a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 19 Mar 2015 19:04:21 -0700
Subject: net: increase sk_[max_]ack_backlog

sk_ack_backlog & sk_max_ack_backlog were 16bit fields, meaning
listen() backlog was limited to 65535.

It is time to increase the width to allow much bigger backlog,
if admins change /proc/sys/net/core/somaxconn &
/proc/sys/net/ipv4/tcp_max_syn_backlog default values.

Tested:

echo 5000000 >/proc/sys/net/core/somaxconn
echo 5000000 >/proc/sys/net/ipv4/tcp_max_syn_backlog

Ran a SYNFLOOD test against a listener using listen(fd, 5000000)

myhost~# grep request_sock_TCP /proc/slabinfo
request_sock_TCP  4185642 4411940    304   13    1 : tunables   54   27    8 : slabdata 339380 339380      0

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h         | 4 ++--
 net/core/sysctl_net_core.c | 2 --
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index e0360f5a53e9..3f9b8ce56948 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -405,8 +405,8 @@ struct sock {
 	rwlock_t		sk_callback_lock;
 	int			sk_err,
 				sk_err_soft;
-	unsigned short		sk_ack_backlog;
-	unsigned short		sk_max_ack_backlog;
+	u32			sk_ack_backlog;
+	u32			sk_max_ack_backlog;
 	__u32			sk_priority;
 #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
 	__u32			sk_cgrp_prioidx;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 433424804284..e1c85db5216f 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -24,7 +24,6 @@
 
 static int zero = 0;
 static int one = 1;
-static int ushort_max = USHRT_MAX;
 
 static int net_msg_warn;	/* Unused, but still a sysctl */
 
@@ -401,7 +400,6 @@ static struct ctl_table netns_core_table[] = {
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.extra1		= &zero,
-		.extra2		= &ushort_max,
 		.proc_handler	= proc_dointvec_minmax
 	},
 	{ }
-- 
cgit v1.2.3


From a736775db683174269c65c7c5cc8e5ee534e7681 Mon Sep 17 00:00:00 2001
From: Charlie Mooney <charliemooney@chromium.org>
Date: Fri, 20 Mar 2015 09:40:17 -0700
Subject: Input: add MT_TOOL_PALM

Currently there are only two "tools" that can be specified by a multi-touch
driver: MT_TOOL_FINGER and MT_TOOL_PEN. In working with Elan (The touch
vendor) and discussing their next-gen devices it seems that it will be
useful to have more tools so that their devices can give the upper layers
of the stack hints as to what is touching the sensor.

In particular they have new experimental firmware that can better
differentiate between palms vs fingertips and would like to plumb a patch
so that we can use their hints in higher-level gesture soft- ware.  The
firmware on the device can reasonably do a better job of palm detection
because it has access to all of the raw sensor readings as opposed to just
the width/pressure/etc that are exposed by the driver.  As such, the
firmware can characterize what a palm looks like in much finer-grained
detail and this change would allow such a device to share its findings with
the kernel.

Signed-off-by: Charlie Mooney <charliemooney@chromium.org>
Acked-by: Peter Hutterer <peter.hutterer@who-t.net>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 Documentation/input/multi-touch-protocol.txt | 9 ++++++---
 include/uapi/linux/input.h                   | 3 ++-
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/Documentation/input/multi-touch-protocol.txt b/Documentation/input/multi-touch-protocol.txt
index 7b4f59c09ee2..b85d000faeb4 100644
--- a/Documentation/input/multi-touch-protocol.txt
+++ b/Documentation/input/multi-touch-protocol.txt
@@ -312,9 +312,12 @@ ABS_MT_TOOL_TYPE
 
 The type of approaching tool. A lot of kernel drivers cannot distinguish
 between different tool types, such as a finger or a pen. In such cases, the
-event should be omitted. The protocol currently supports MT_TOOL_FINGER and
-MT_TOOL_PEN [2]. For type B devices, this event is handled by input core;
-drivers should instead use input_mt_report_slot_state().
+event should be omitted. The protocol currently supports MT_TOOL_FINGER,
+MT_TOOL_PEN, and MT_TOOL_PALM [2]. For type B devices, this event is handled
+by input core; drivers should instead use input_mt_report_slot_state().
+A contact's ABS_MT_TOOL_TYPE may change over time while still touching the
+device, because the firmware may not be able to determine which tool is being
+used when it first appears.
 
 ABS_MT_BLOB_ID
 
diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h
index a1d7e931ab72..2320b0ce7579 100644
--- a/include/uapi/linux/input.h
+++ b/include/uapi/linux/input.h
@@ -972,7 +972,8 @@ struct input_keymap_entry {
  */
 #define MT_TOOL_FINGER		0
 #define MT_TOOL_PEN		1
-#define MT_TOOL_MAX		1
+#define MT_TOOL_PALM		2
+#define MT_TOOL_MAX		2
 
 /*
  * Values describing the status of a force-feedback effect
-- 
cgit v1.2.3


From 488fb86ee91d3b1182c2e30a9f9b45da14eda46f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 20 Mar 2015 21:56:59 +1100
Subject: rhashtable: Make rhashtable_init params argument const

This patch marks the rhashtable_init params argument const as
there is no reason to modify it since we will always make a copy
of it in the rhashtable.

This patch also fixes a bug where we don't actually round up the
value of min_size unless it is less than HASH_MIN_SIZE.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 3 ++-
 lib/rhashtable.c           | 7 ++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 99425f2be708..c85363c45fc0 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -181,7 +181,8 @@ static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl,
 }
 #endif /* CONFIG_PROVE_LOCKING */
 
-int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params);
+int rhashtable_init(struct rhashtable *ht,
+		    const struct rhashtable_params *params);
 
 void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node);
 bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node);
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index e75c48d9d82f..e0a9d59f80d6 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -870,7 +870,7 @@ out:
 }
 EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
 
-static size_t rounded_hashtable_size(struct rhashtable_params *params)
+static size_t rounded_hashtable_size(const struct rhashtable_params *params)
 {
 	return max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
 		   (unsigned long)params->min_size);
@@ -919,7 +919,8 @@ static size_t rounded_hashtable_size(struct rhashtable_params *params)
  *	.obj_hashfn = my_hash_fn,
  * };
  */
-int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
+int rhashtable_init(struct rhashtable *ht,
+		    const struct rhashtable_params *params)
 {
 	struct bucket_table *tbl;
 	size_t size;
@@ -946,7 +947,7 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
 	if (params->max_size)
 		ht->p.max_size = rounddown_pow_of_two(params->max_size);
 
-	ht->p.min_size = max(params->min_size, HASH_MIN_SIZE);
+	ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE);
 
 	if (params->locks_mul)
 		ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
-- 
cgit v1.2.3


From 02fd97c3d4a8a14e222b0021c366db7041d28743 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 20 Mar 2015 21:57:00 +1100
Subject: rhashtable: Allow hash/comparison functions to be inlined

This patch deals with the complaint that we make indirect function
calls on the fast paths unnecessarily in rhashtable.  We resolve
it by moving the fast paths into inline functions that take struct
rhashtable_param (which obviously must be the same set of parameters
supplied to rhashtable_init) as an argument.

The only remaining indirect call is to obj_hashfn (or key_hashfn it
obj_hashfn is unset) on the rehash as well as the insert-during-
rehash slow path.

This patch also extends the support of vairable-length keys to
include those where the key is fixed but scattered in the object.
For example, in netlink we want to key off the namespace and the
portid but they're not next to each other.

This patch does this by directly using the object hash function
as the indicator of whether the key is accessible or not.  It
also adds a new function obj_cmpfn to compare a key against an
object.  This means that the caller no longer needs to supply
explicit compare functions.

All this is done in a backwards compatible manner so no existing
users are affected until they convert to the new interface.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 386 +++++++++++++++++++++++++++++++++++++++++++++
 lib/rhashtable.c           | 163 ++++++-------------
 2 files changed, 436 insertions(+), 113 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index c85363c45fc0..a7188eeb135b 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -22,6 +22,7 @@
 #include <linux/list_nulls.h>
 #include <linux/workqueue.h>
 #include <linux/mutex.h>
+#include <linux/rcupdate.h>
 
 /*
  * The end of the chain is marked with a special nulls marks which has
@@ -42,6 +43,9 @@
 #define RHT_HASH_BITS		27
 #define RHT_BASE_SHIFT		RHT_HASH_BITS
 
+/* Base bits plus 1 bit for nulls marker */
+#define RHT_HASH_RESERVED_SPACE	(RHT_BASE_BITS + 1)
+
 struct rhash_head {
 	struct rhash_head __rcu		*next;
 };
@@ -72,8 +76,20 @@ struct bucket_table {
 	struct rhash_head __rcu	*buckets[] ____cacheline_aligned_in_smp;
 };
 
+/**
+ * struct rhashtable_compare_arg - Key for the function rhashtable_compare
+ * @ht: Hash table
+ * @key: Key to compare against
+ */
+struct rhashtable_compare_arg {
+	struct rhashtable *ht;
+	const void *key;
+};
+
 typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed);
 typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 seed);
+typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg,
+			       const void *obj);
 
 struct rhashtable;
 
@@ -89,6 +105,7 @@ struct rhashtable;
  * @locks_mul: Number of bucket locks to allocate per cpu (default: 128)
  * @hashfn: Function to hash key
  * @obj_hashfn: Function to hash object
+ * @obj_cmpfn: Function to compare key with object
  */
 struct rhashtable_params {
 	size_t			nelem_hint;
@@ -101,6 +118,7 @@ struct rhashtable_params {
 	size_t			locks_mul;
 	rht_hashfn_t		hashfn;
 	rht_obj_hashfn_t	obj_hashfn;
+	rht_obj_cmpfn_t		obj_cmpfn;
 };
 
 /**
@@ -165,6 +183,83 @@ static inline unsigned long rht_get_nulls_value(const struct rhash_head *ptr)
 	return ((unsigned long) ptr) >> 1;
 }
 
+static inline void *rht_obj(const struct rhashtable *ht,
+			    const struct rhash_head *he)
+{
+	return (char *)he - ht->p.head_offset;
+}
+
+static inline unsigned int rht_bucket_index(const struct bucket_table *tbl,
+					    unsigned int hash)
+{
+	return (hash >> RHT_HASH_RESERVED_SPACE) & (tbl->size - 1);
+}
+
+static inline unsigned int rht_key_hashfn(
+	struct rhashtable *ht, const struct bucket_table *tbl,
+	const void *key, const struct rhashtable_params params)
+{
+	return rht_bucket_index(tbl, params.hashfn(key, params.key_len ?:
+							ht->p.key_len,
+						   tbl->hash_rnd));
+}
+
+static inline unsigned int rht_head_hashfn(
+	struct rhashtable *ht, const struct bucket_table *tbl,
+	const struct rhash_head *he, const struct rhashtable_params params)
+{
+	const char *ptr = rht_obj(ht, he);
+
+	return likely(params.obj_hashfn) ?
+	       rht_bucket_index(tbl, params.obj_hashfn(ptr, tbl->hash_rnd)) :
+	       rht_key_hashfn(ht, tbl, ptr + params.key_offset, params);
+}
+
+/**
+ * rht_grow_above_75 - returns true if nelems > 0.75 * table-size
+ * @ht:		hash table
+ * @tbl:	current table
+ */
+static inline bool rht_grow_above_75(const struct rhashtable *ht,
+				     const struct bucket_table *tbl)
+{
+	/* Expand table when exceeding 75% load */
+	return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) &&
+	       (!ht->p.max_size || tbl->size < ht->p.max_size);
+}
+
+/**
+ * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size
+ * @ht:		hash table
+ * @tbl:	current table
+ */
+static inline bool rht_shrink_below_30(const struct rhashtable *ht,
+				       const struct bucket_table *tbl)
+{
+	/* Shrink table beneath 30% load */
+	return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) &&
+	       tbl->size > ht->p.min_size;
+}
+
+/* The bucket lock is selected based on the hash and protects mutations
+ * on a group of hash buckets.
+ *
+ * A maximum of tbl->size/2 bucket locks is allocated. This ensures that
+ * a single lock always covers both buckets which may both contains
+ * entries which link to the same bucket of the old table during resizing.
+ * This allows to simplify the locking as locking the bucket in both
+ * tables during resize always guarantee protection.
+ *
+ * IMPORTANT: When holding the bucket lock of both the old and new table
+ * during expansions and shrinking, the old bucket lock must always be
+ * acquired first.
+ */
+static inline spinlock_t *rht_bucket_lock(const struct bucket_table *tbl,
+					  unsigned int hash)
+{
+	return &tbl->locks[hash & tbl->locks_mask];
+}
+
 #ifdef CONFIG_PROVE_LOCKING
 int lockdep_rht_mutex_is_held(struct rhashtable *ht);
 int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash);
@@ -184,6 +279,9 @@ static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl,
 int rhashtable_init(struct rhashtable *ht,
 		    const struct rhashtable_params *params);
 
+int rhashtable_insert_slow(struct rhashtable *ht, const void *key,
+			   struct rhash_head *obj,
+			   struct bucket_table *old_tbl);
 void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node);
 bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node);
 
@@ -356,4 +454,292 @@ void rhashtable_destroy(struct rhashtable *ht);
 	rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\
 					tbl, hash, member)
 
+static inline int rhashtable_compare(struct rhashtable_compare_arg *arg,
+				     const void *obj)
+{
+	struct rhashtable *ht = arg->ht;
+	const char *ptr = obj;
+
+	return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len);
+}
+
+/**
+ * rhashtable_lookup_fast - search hash table, inlined version
+ * @ht:		hash table
+ * @key:	the pointer to the key
+ * @params:	hash table parameters
+ *
+ * Computes the hash value for the key and traverses the bucket chain looking
+ * for a entry with an identical key. The first matching entry is returned.
+ *
+ * Returns the first entry on which the compare function returned true.
+ */
+static inline void *rhashtable_lookup_fast(
+	struct rhashtable *ht, const void *key,
+	const struct rhashtable_params params)
+{
+	struct rhashtable_compare_arg arg = {
+		.ht = ht,
+		.key = key,
+	};
+	const struct bucket_table *tbl;
+	struct rhash_head *he;
+	unsigned hash;
+
+	rcu_read_lock();
+
+	tbl = rht_dereference_rcu(ht->tbl, ht);
+restart:
+	hash = rht_key_hashfn(ht, tbl, key, params);
+	rht_for_each_rcu(he, tbl, hash) {
+		if (params.obj_cmpfn ?
+		    params.obj_cmpfn(&arg, rht_obj(ht, he)) :
+		    rhashtable_compare(&arg, rht_obj(ht, he)))
+			continue;
+		rcu_read_unlock();
+		return rht_obj(ht, he);
+	}
+
+	/* Ensure we see any new tables. */
+	smp_rmb();
+
+	tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+	if (unlikely(tbl))
+		goto restart;
+	rcu_read_unlock();
+
+	return NULL;
+}
+
+static inline int __rhashtable_insert_fast(
+	struct rhashtable *ht, const void *key, struct rhash_head *obj,
+	const struct rhashtable_params params)
+{
+	struct rhashtable_compare_arg arg = {
+		.ht = ht,
+		.key = key,
+	};
+	int err = -EEXIST;
+	struct bucket_table *tbl, *new_tbl;
+	struct rhash_head *head;
+	spinlock_t *lock;
+	unsigned hash;
+
+	rcu_read_lock();
+
+	tbl = rht_dereference_rcu(ht->tbl, ht);
+	hash = rht_head_hashfn(ht, tbl, obj, params);
+	lock = rht_bucket_lock(tbl, hash);
+
+	spin_lock_bh(lock);
+
+	/* Because we have already taken the bucket lock in tbl,
+	 * if we find that future_tbl is not yet visible then
+	 * that guarantees all other insertions of the same entry
+	 * will also grab the bucket lock in tbl because until
+	 * the rehash completes ht->tbl won't be changed.
+	 */
+	new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+	if (unlikely(new_tbl)) {
+		err = rhashtable_insert_slow(ht, key, obj, new_tbl);
+		goto out;
+	}
+
+	if (!key)
+		goto skip_lookup;
+
+	rht_for_each(head, tbl, hash) {
+		if (unlikely(!(params.obj_cmpfn ?
+			       params.obj_cmpfn(&arg, rht_obj(ht, head)) :
+			       rhashtable_compare(&arg, rht_obj(ht, head)))))
+			goto out;
+	}
+
+skip_lookup:
+	err = 0;
+
+	head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
+
+	RCU_INIT_POINTER(obj->next, head);
+
+	rcu_assign_pointer(tbl->buckets[hash], obj);
+
+	atomic_inc(&ht->nelems);
+	if (rht_grow_above_75(ht, tbl))
+		schedule_work(&ht->run_work);
+
+out:
+	spin_unlock_bh(lock);
+	rcu_read_unlock();
+
+	return err;
+}
+
+/**
+ * rhashtable_insert_fast - insert object into hash table
+ * @ht:		hash table
+ * @obj:	pointer to hash head inside object
+ * @params:	hash table parameters
+ *
+ * Will take a per bucket spinlock to protect against mutual mutations
+ * on the same bucket. Multiple insertions may occur in parallel unless
+ * they map to the same bucket lock.
+ *
+ * It is safe to call this function from atomic context.
+ *
+ * Will trigger an automatic deferred table resizing if the size grows
+ * beyond the watermark indicated by grow_decision() which can be passed
+ * to rhashtable_init().
+ */
+static inline int rhashtable_insert_fast(
+	struct rhashtable *ht, struct rhash_head *obj,
+	const struct rhashtable_params params)
+{
+	return __rhashtable_insert_fast(ht, NULL, obj, params);
+}
+
+/**
+ * rhashtable_lookup_insert_fast - lookup and insert object into hash table
+ * @ht:		hash table
+ * @obj:	pointer to hash head inside object
+ * @params:	hash table parameters
+ *
+ * Locks down the bucket chain in both the old and new table if a resize
+ * is in progress to ensure that writers can't remove from the old table
+ * and can't insert to the new table during the atomic operation of search
+ * and insertion. Searches for duplicates in both the old and new table if
+ * a resize is in progress.
+ *
+ * This lookup function may only be used for fixed key hash table (key_len
+ * parameter set). It will BUG() if used inappropriately.
+ *
+ * It is safe to call this function from atomic context.
+ *
+ * Will trigger an automatic deferred table resizing if the size grows
+ * beyond the watermark indicated by grow_decision() which can be passed
+ * to rhashtable_init().
+ */
+static inline int rhashtable_lookup_insert_fast(
+	struct rhashtable *ht, struct rhash_head *obj,
+	const struct rhashtable_params params)
+{
+	const char *key = rht_obj(ht, obj);
+
+	BUG_ON(ht->p.obj_hashfn);
+
+	return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj,
+					params);
+}
+
+/**
+ * rhashtable_lookup_insert_key - search and insert object to hash table
+ *				  with explicit key
+ * @ht:		hash table
+ * @key:	key
+ * @obj:	pointer to hash head inside object
+ * @params:	hash table parameters
+ *
+ * Locks down the bucket chain in both the old and new table if a resize
+ * is in progress to ensure that writers can't remove from the old table
+ * and can't insert to the new table during the atomic operation of search
+ * and insertion. Searches for duplicates in both the old and new table if
+ * a resize is in progress.
+ *
+ * Lookups may occur in parallel with hashtable mutations and resizing.
+ *
+ * Will trigger an automatic deferred table resizing if the size grows
+ * beyond the watermark indicated by grow_decision() which can be passed
+ * to rhashtable_init().
+ *
+ * Returns zero on success.
+ */
+static inline int rhashtable_lookup_insert_key(
+	struct rhashtable *ht, const void *key, struct rhash_head *obj,
+	const struct rhashtable_params params)
+{
+	BUG_ON(!ht->p.obj_hashfn || !key);
+
+	return __rhashtable_insert_fast(ht, key, obj, params);
+}
+
+static inline int __rhashtable_remove_fast(
+	struct rhashtable *ht, struct bucket_table *tbl,
+	struct rhash_head *obj, const struct rhashtable_params params)
+{
+	struct rhash_head __rcu **pprev;
+	struct rhash_head *he;
+	spinlock_t * lock;
+	unsigned hash;
+	int err = -ENOENT;
+
+	hash = rht_head_hashfn(ht, tbl, obj, params);
+	lock = rht_bucket_lock(tbl, hash);
+
+	spin_lock_bh(lock);
+
+	pprev = &tbl->buckets[hash];
+	rht_for_each(he, tbl, hash) {
+		if (he != obj) {
+			pprev = &he->next;
+			continue;
+		}
+
+		rcu_assign_pointer(*pprev, obj->next);
+		err = 0;
+		break;
+	}
+
+	spin_unlock_bh(lock);
+
+	return err;
+}
+
+/**
+ * rhashtable_remove_fast - remove object from hash table
+ * @ht:		hash table
+ * @obj:	pointer to hash head inside object
+ * @params:	hash table parameters
+ *
+ * Since the hash chain is single linked, the removal operation needs to
+ * walk the bucket chain upon removal. The removal operation is thus
+ * considerable slow if the hash table is not correctly sized.
+ *
+ * Will automatically shrink the table via rhashtable_expand() if the
+ * shrink_decision function specified at rhashtable_init() returns true.
+ *
+ * Returns zero on success, -ENOENT if the entry could not be found.
+ */
+static inline int rhashtable_remove_fast(
+	struct rhashtable *ht, struct rhash_head *obj,
+	const struct rhashtable_params params)
+{
+	struct bucket_table *tbl;
+	int err;
+
+	rcu_read_lock();
+
+	tbl = rht_dereference_rcu(ht->tbl, ht);
+
+	/* Because we have already taken (and released) the bucket
+	 * lock in old_tbl, if we find that future_tbl is not yet
+	 * visible then that guarantees the entry to still be in
+	 * the old tbl if it exists.
+	 */
+	while ((err = __rhashtable_remove_fast(ht, tbl, obj, params)) &&
+	       (tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
+		;
+
+	if (err)
+		goto out;
+
+	atomic_dec(&ht->nelems);
+	if (rht_shrink_below_30(ht, tbl))
+		schedule_work(&ht->run_work);
+
+out:
+	rcu_read_unlock();
+
+	return err;
+}
+
 #endif /* _LINUX_RHASHTABLE_H */
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index e0a9d59f80d6..d1d23fb58525 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -1,13 +1,13 @@
 /*
  * Resizable, Scalable, Concurrent Hash Table
  *
+ * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
  * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
  * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
  *
- * Based on the following paper:
- * https://www.usenix.org/legacy/event/atc11/tech/final_files/Triplett.pdf
- *
  * Code partially derived from nft_hash
+ * Rewritten with rehash code from br_multicast plus single list
+ * pointer as suggested by Josh Triplett
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -30,53 +30,11 @@
 #define HASH_MIN_SIZE		4U
 #define BUCKET_LOCKS_PER_CPU   128UL
 
-/* Base bits plus 1 bit for nulls marker */
-#define HASH_RESERVED_SPACE	(RHT_BASE_BITS + 1)
-
-/* The bucket lock is selected based on the hash and protects mutations
- * on a group of hash buckets.
- *
- * A maximum of tbl->size/2 bucket locks is allocated. This ensures that
- * a single lock always covers both buckets which may both contains
- * entries which link to the same bucket of the old table during resizing.
- * This allows to simplify the locking as locking the bucket in both
- * tables during resize always guarantee protection.
- *
- * IMPORTANT: When holding the bucket lock of both the old and new table
- * during expansions and shrinking, the old bucket lock must always be
- * acquired first.
- */
-static spinlock_t *bucket_lock(const struct bucket_table *tbl, u32 hash)
-{
-	return &tbl->locks[hash & tbl->locks_mask];
-}
-
-static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he)
-{
-	return (void *) he - ht->p.head_offset;
-}
-
-static u32 rht_bucket_index(const struct bucket_table *tbl, u32 hash)
-{
-	return (hash >> HASH_RESERVED_SPACE) & (tbl->size - 1);
-}
-
-static u32 key_hashfn(struct rhashtable *ht, const struct bucket_table *tbl,
-		      const void *key)
-{
-	return rht_bucket_index(tbl, ht->p.hashfn(key, ht->p.key_len,
-						  tbl->hash_rnd));
-}
-
 static u32 head_hashfn(struct rhashtable *ht,
 		       const struct bucket_table *tbl,
 		       const struct rhash_head *he)
 {
-	const char *ptr = rht_obj(ht, he);
-
-	return likely(ht->p.key_len) ?
-	       key_hashfn(ht, tbl, ptr + ht->p.key_offset) :
-	       rht_bucket_index(tbl, ht->p.obj_hashfn(ptr, tbl->hash_rnd));
+	return rht_head_hashfn(ht, tbl, he, ht->p);
 }
 
 #ifdef CONFIG_PROVE_LOCKING
@@ -90,7 +48,7 @@ EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
 
 int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
 {
-	spinlock_t *lock = bucket_lock(tbl, hash);
+	spinlock_t *lock = rht_bucket_lock(tbl, hash);
 
 	return (debug_locks) ? lockdep_is_held(lock) : 1;
 }
@@ -178,32 +136,6 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 	return tbl;
 }
 
-/**
- * rht_grow_above_75 - returns true if nelems > 0.75 * table-size
- * @ht:		hash table
- * @tbl:	current table
- */
-static bool rht_grow_above_75(const struct rhashtable *ht,
-			      const struct bucket_table *tbl)
-{
-	/* Expand table when exceeding 75% load */
-	return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) &&
-	       (!ht->p.max_size || tbl->size < ht->p.max_size);
-}
-
-/**
- * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size
- * @ht:		hash table
- * @tbl:	current table
- */
-static bool rht_shrink_below_30(const struct rhashtable *ht,
-				const struct bucket_table *tbl)
-{
-	/* Shrink table beneath 30% load */
-	return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) &&
-	       tbl->size > ht->p.min_size;
-}
-
 static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash)
 {
 	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
@@ -230,7 +162,7 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash)
 
 	new_hash = head_hashfn(ht, new_tbl, entry);
 
-	new_bucket_lock = bucket_lock(new_tbl, new_hash);
+	new_bucket_lock = rht_bucket_lock(new_tbl, new_hash);
 
 	spin_lock_nested(new_bucket_lock, SINGLE_DEPTH_NESTING);
 	head = rht_dereference_bucket(new_tbl->buckets[new_hash],
@@ -255,7 +187,7 @@ static void rhashtable_rehash_chain(struct rhashtable *ht, unsigned old_hash)
 	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
 	spinlock_t *old_bucket_lock;
 
-	old_bucket_lock = bucket_lock(old_tbl, old_hash);
+	old_bucket_lock = rht_bucket_lock(old_tbl, old_hash);
 
 	spin_lock_bh(old_bucket_lock);
 	while (!rhashtable_rehash_one(ht, old_hash))
@@ -376,6 +308,37 @@ unlock:
 	mutex_unlock(&ht->mutex);
 }
 
+int rhashtable_insert_slow(struct rhashtable *ht, const void *key,
+			   struct rhash_head *obj,
+			   struct bucket_table *tbl)
+{
+	struct rhash_head *head;
+	unsigned hash;
+	int err = -EEXIST;
+
+	hash = head_hashfn(ht, tbl, obj);
+	spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING);
+
+	if (key && rhashtable_lookup_fast(ht, key, ht->p))
+		goto exit;
+
+	err = 0;
+
+	head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
+
+	RCU_INIT_POINTER(obj->next, head);
+
+	rcu_assign_pointer(tbl->buckets[hash], obj);
+
+	atomic_inc(&ht->nelems);
+
+exit:
+	spin_unlock(rht_bucket_lock(tbl, hash));
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
+
 static bool __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
 				bool (*compare)(void *, void *), void *arg)
 {
@@ -390,7 +353,7 @@ static bool __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
 
 	old_tbl = rht_dereference_rcu(ht->tbl, ht);
 	hash = head_hashfn(ht, old_tbl, obj);
-	old_lock = bucket_lock(old_tbl, hash);
+	old_lock = rht_bucket_lock(old_tbl, hash);
 
 	spin_lock_bh(old_lock);
 
@@ -403,7 +366,8 @@ static bool __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
 	tbl = rht_dereference_rcu(old_tbl->future_tbl, ht) ?: old_tbl;
 	if (tbl != old_tbl) {
 		hash = head_hashfn(ht, tbl, obj);
-		spin_lock_nested(bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING);
+		spin_lock_nested(rht_bucket_lock(tbl, hash),
+				 SINGLE_DEPTH_NESTING);
 	}
 
 	if (compare &&
@@ -430,7 +394,7 @@ static bool __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
 
 exit:
 	if (tbl != old_tbl)
-		spin_unlock(bucket_lock(tbl, hash));
+		spin_unlock(rht_bucket_lock(tbl, hash));
 
 	spin_unlock_bh(old_lock);
 
@@ -471,7 +435,7 @@ static bool __rhashtable_remove(struct rhashtable *ht,
 	bool ret = false;
 
 	hash = head_hashfn(ht, tbl, obj);
-	lock = bucket_lock(tbl, hash);
+	lock = rht_bucket_lock(tbl, hash);
 
 	spin_lock_bh(lock);
 
@@ -537,19 +501,6 @@ bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj)
 }
 EXPORT_SYMBOL_GPL(rhashtable_remove);
 
-struct rhashtable_compare_arg {
-	struct rhashtable *ht;
-	const void *key;
-};
-
-static bool rhashtable_compare(void *ptr, void *arg)
-{
-	struct rhashtable_compare_arg *x = arg;
-	struct rhashtable *ht = x->ht;
-
-	return !memcmp(ptr + ht->p.key_offset, x->key, ht->p.key_len);
-}
-
 /**
  * rhashtable_lookup - lookup key in hash table
  * @ht:		hash table
@@ -565,14 +516,7 @@ static bool rhashtable_compare(void *ptr, void *arg)
  */
 void *rhashtable_lookup(struct rhashtable *ht, const void *key)
 {
-	struct rhashtable_compare_arg arg = {
-		.ht = ht,
-		.key = key,
-	};
-
-	BUG_ON(!ht->p.key_len);
-
-	return rhashtable_lookup_compare(ht, key, &rhashtable_compare, &arg);
+	return rhashtable_lookup_fast(ht, key, ht->p);
 }
 EXPORT_SYMBOL_GPL(rhashtable_lookup);
 
@@ -591,7 +535,8 @@ EXPORT_SYMBOL_GPL(rhashtable_lookup);
  * Returns the first entry on which the compare function returned true.
  */
 void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key,
-				bool (*compare)(void *, void *), void *arg)
+				bool (*compare)(void *, void *),
+				void *arg)
 {
 	const struct bucket_table *tbl;
 	struct rhash_head *he;
@@ -601,7 +546,7 @@ void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key,
 
 	tbl = rht_dereference_rcu(ht->tbl, ht);
 restart:
-	hash = key_hashfn(ht, tbl, key);
+	hash = rht_key_hashfn(ht, tbl, key, ht->p);
 	rht_for_each_rcu(he, tbl, hash) {
 		if (!compare(rht_obj(ht, he), arg))
 			continue;
@@ -643,15 +588,7 @@ EXPORT_SYMBOL_GPL(rhashtable_lookup_compare);
  */
 bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj)
 {
-	struct rhashtable_compare_arg arg = {
-		.ht = ht,
-		.key = rht_obj(ht, obj) + ht->p.key_offset,
-	};
-
-	BUG_ON(!ht->p.key_len);
-
-	return rhashtable_lookup_compare_insert(ht, obj, &rhashtable_compare,
-						&arg);
+	return rhashtable_lookup_insert_fast(ht, obj, ht->p);
 }
 EXPORT_SYMBOL_GPL(rhashtable_lookup_insert);
 
@@ -927,8 +864,8 @@ int rhashtable_init(struct rhashtable *ht,
 
 	size = HASH_DEFAULT_SIZE;
 
-	if ((params->key_len && !params->hashfn) ||
-	    (!params->key_len && !params->obj_hashfn))
+	if ((!(params->key_len && params->hashfn) && !params->obj_hashfn) ||
+	    (params->obj_hashfn && !params->obj_cmpfn))
 		return -EINVAL;
 
 	if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT))
-- 
cgit v1.2.3


From dc0ee268d85026530720d8c874716287b7ede25b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 20 Mar 2015 21:57:06 +1100
Subject: rhashtable: Rip out obsolete out-of-line interface

Now that all rhashtable users have been converted over to the
inline interface, this patch removes the unused out-of-line
interface.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h |  19 +--
 lib/rhashtable.c           | 284 ---------------------------------------------
 2 files changed, 3 insertions(+), 300 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index a7188eeb135b..c3034de2c235 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -1,14 +1,13 @@
 /*
  * Resizable, Scalable, Concurrent Hash Table
  *
+ * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
  * Copyright (c) 2014 Thomas Graf <tgraf@suug.ch>
  * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
  *
- * Based on the following paper by Josh Triplett, Paul E. McKenney
- * and Jonathan Walpole:
- * https://www.usenix.org/legacy/event/atc11/tech/final_files/Triplett.pdf
- *
  * Code partially derived from nft_hash
+ * Rewritten with rehash code from br_multicast plus single list
+ * pointer as suggested by Josh Triplett
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -282,22 +281,10 @@ int rhashtable_init(struct rhashtable *ht,
 int rhashtable_insert_slow(struct rhashtable *ht, const void *key,
 			   struct rhash_head *obj,
 			   struct bucket_table *old_tbl);
-void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node);
-bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node);
 
 int rhashtable_expand(struct rhashtable *ht);
 int rhashtable_shrink(struct rhashtable *ht);
 
-void *rhashtable_lookup(struct rhashtable *ht, const void *key);
-void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key,
-				bool (*compare)(void *, void *), void *arg);
-
-bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj);
-bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
-				      struct rhash_head *obj,
-				      bool (*compare)(void *, void *),
-				      void *arg);
-
 int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter);
 void rhashtable_walk_exit(struct rhashtable_iter *iter);
 int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU);
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index d1d23fb58525..83cfedd6612a 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -339,290 +339,6 @@ exit:
 }
 EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
 
-static bool __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
-				bool (*compare)(void *, void *), void *arg)
-{
-	struct bucket_table *tbl, *old_tbl;
-	struct rhash_head *head;
-	bool no_resize_running;
-	unsigned hash;
-	spinlock_t *old_lock;
-	bool success = true;
-
-	rcu_read_lock();
-
-	old_tbl = rht_dereference_rcu(ht->tbl, ht);
-	hash = head_hashfn(ht, old_tbl, obj);
-	old_lock = rht_bucket_lock(old_tbl, hash);
-
-	spin_lock_bh(old_lock);
-
-	/* Because we have already taken the bucket lock in old_tbl,
-	 * if we find that future_tbl is not yet visible then that
-	 * guarantees all other insertions of the same entry will
-	 * also grab the bucket lock in old_tbl because until the
-	 * rehash completes ht->tbl won't be changed.
-	 */
-	tbl = rht_dereference_rcu(old_tbl->future_tbl, ht) ?: old_tbl;
-	if (tbl != old_tbl) {
-		hash = head_hashfn(ht, tbl, obj);
-		spin_lock_nested(rht_bucket_lock(tbl, hash),
-				 SINGLE_DEPTH_NESTING);
-	}
-
-	if (compare &&
-	    rhashtable_lookup_compare(ht, rht_obj(ht, obj) + ht->p.key_offset,
-				      compare, arg)) {
-		success = false;
-		goto exit;
-	}
-
-	no_resize_running = tbl == old_tbl;
-
-	head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
-
-	if (rht_is_a_nulls(head))
-		INIT_RHT_NULLS_HEAD(obj->next, ht, hash);
-	else
-		RCU_INIT_POINTER(obj->next, head);
-
-	rcu_assign_pointer(tbl->buckets[hash], obj);
-
-	atomic_inc(&ht->nelems);
-	if (no_resize_running && rht_grow_above_75(ht, tbl))
-		schedule_work(&ht->run_work);
-
-exit:
-	if (tbl != old_tbl)
-		spin_unlock(rht_bucket_lock(tbl, hash));
-
-	spin_unlock_bh(old_lock);
-
-	rcu_read_unlock();
-
-	return success;
-}
-
-/**
- * rhashtable_insert - insert object into hash table
- * @ht:		hash table
- * @obj:	pointer to hash head inside object
- *
- * Will take a per bucket spinlock to protect against mutual mutations
- * on the same bucket. Multiple insertions may occur in parallel unless
- * they map to the same bucket lock.
- *
- * It is safe to call this function from atomic context.
- *
- * Will trigger an automatic deferred table resizing if the size grows
- * beyond the watermark indicated by grow_decision() which can be passed
- * to rhashtable_init().
- */
-void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj)
-{
-	__rhashtable_insert(ht, obj, NULL, NULL);
-}
-EXPORT_SYMBOL_GPL(rhashtable_insert);
-
-static bool __rhashtable_remove(struct rhashtable *ht,
-				struct bucket_table *tbl,
-				struct rhash_head *obj)
-{
-	struct rhash_head __rcu **pprev;
-	struct rhash_head *he;
-	spinlock_t * lock;
-	unsigned hash;
-	bool ret = false;
-
-	hash = head_hashfn(ht, tbl, obj);
-	lock = rht_bucket_lock(tbl, hash);
-
-	spin_lock_bh(lock);
-
-	pprev = &tbl->buckets[hash];
-	rht_for_each(he, tbl, hash) {
-		if (he != obj) {
-			pprev = &he->next;
-			continue;
-		}
-
-		rcu_assign_pointer(*pprev, obj->next);
-		ret = true;
-		break;
-	}
-
-	spin_unlock_bh(lock);
-
-	return ret;
-}
-
-/**
- * rhashtable_remove - remove object from hash table
- * @ht:		hash table
- * @obj:	pointer to hash head inside object
- *
- * Since the hash chain is single linked, the removal operation needs to
- * walk the bucket chain upon removal. The removal operation is thus
- * considerable slow if the hash table is not correctly sized.
- *
- * Will automatically shrink the table via rhashtable_expand() if the
- * shrink_decision function specified at rhashtable_init() returns true.
- *
- * The caller must ensure that no concurrent table mutations occur. It is
- * however valid to have concurrent lookups if they are RCU protected.
- */
-bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj)
-{
-	struct bucket_table *tbl;
-	bool ret;
-
-	rcu_read_lock();
-
-	tbl = rht_dereference_rcu(ht->tbl, ht);
-
-	/* Because we have already taken (and released) the bucket
-	 * lock in old_tbl, if we find that future_tbl is not yet
-	 * visible then that guarantees the entry to still be in
-	 * the old tbl if it exists.
-	 */
-	while (!(ret = __rhashtable_remove(ht, tbl, obj)) &&
-	       (tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
-		;
-
-	if (ret) {
-		atomic_dec(&ht->nelems);
-		if (rht_shrink_below_30(ht, tbl))
-			schedule_work(&ht->run_work);
-	}
-
-	rcu_read_unlock();
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(rhashtable_remove);
-
-/**
- * rhashtable_lookup - lookup key in hash table
- * @ht:		hash table
- * @key:	pointer to key
- *
- * Computes the hash value for the key and traverses the bucket chain looking
- * for a entry with an identical key. The first matching entry is returned.
- *
- * This lookup function may only be used for fixed key hash table (key_len
- * parameter set). It will BUG() if used inappropriately.
- *
- * Lookups may occur in parallel with hashtable mutations and resizing.
- */
-void *rhashtable_lookup(struct rhashtable *ht, const void *key)
-{
-	return rhashtable_lookup_fast(ht, key, ht->p);
-}
-EXPORT_SYMBOL_GPL(rhashtable_lookup);
-
-/**
- * rhashtable_lookup_compare - search hash table with compare function
- * @ht:		hash table
- * @key:	the pointer to the key
- * @compare:	compare function, must return true on match
- * @arg:	argument passed on to compare function
- *
- * Traverses the bucket chain behind the provided hash value and calls the
- * specified compare function for each entry.
- *
- * Lookups may occur in parallel with hashtable mutations and resizing.
- *
- * Returns the first entry on which the compare function returned true.
- */
-void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key,
-				bool (*compare)(void *, void *),
-				void *arg)
-{
-	const struct bucket_table *tbl;
-	struct rhash_head *he;
-	u32 hash;
-
-	rcu_read_lock();
-
-	tbl = rht_dereference_rcu(ht->tbl, ht);
-restart:
-	hash = rht_key_hashfn(ht, tbl, key, ht->p);
-	rht_for_each_rcu(he, tbl, hash) {
-		if (!compare(rht_obj(ht, he), arg))
-			continue;
-		rcu_read_unlock();
-		return rht_obj(ht, he);
-	}
-
-	/* Ensure we see any new tables. */
-	smp_rmb();
-
-	tbl = rht_dereference_rcu(tbl->future_tbl, ht);
-	if (unlikely(tbl))
-		goto restart;
-	rcu_read_unlock();
-
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(rhashtable_lookup_compare);
-
-/**
- * rhashtable_lookup_insert - lookup and insert object into hash table
- * @ht:		hash table
- * @obj:	pointer to hash head inside object
- *
- * Locks down the bucket chain in both the old and new table if a resize
- * is in progress to ensure that writers can't remove from the old table
- * and can't insert to the new table during the atomic operation of search
- * and insertion. Searches for duplicates in both the old and new table if
- * a resize is in progress.
- *
- * This lookup function may only be used for fixed key hash table (key_len
- * parameter set). It will BUG() if used inappropriately.
- *
- * It is safe to call this function from atomic context.
- *
- * Will trigger an automatic deferred table resizing if the size grows
- * beyond the watermark indicated by grow_decision() which can be passed
- * to rhashtable_init().
- */
-bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj)
-{
-	return rhashtable_lookup_insert_fast(ht, obj, ht->p);
-}
-EXPORT_SYMBOL_GPL(rhashtable_lookup_insert);
-
-/**
- * rhashtable_lookup_compare_insert - search and insert object to hash table
- *                                    with compare function
- * @ht:		hash table
- * @obj:	pointer to hash head inside object
- * @compare:	compare function, must return true on match
- * @arg:	argument passed on to compare function
- *
- * Locks down the bucket chain in both the old and new table if a resize
- * is in progress to ensure that writers can't remove from the old table
- * and can't insert to the new table during the atomic operation of search
- * and insertion. Searches for duplicates in both the old and new table if
- * a resize is in progress.
- *
- * Lookups may occur in parallel with hashtable mutations and resizing.
- *
- * Will trigger an automatic deferred table resizing if the size grows
- * beyond the watermark indicated by grow_decision() which can be passed
- * to rhashtable_init().
- */
-bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
-				      struct rhash_head *obj,
-				      bool (*compare)(void *, void *),
-				      void *arg)
-{
-	BUG_ON(!ht->p.key_len);
-
-	return __rhashtable_insert(ht, obj, compare, arg);
-}
-EXPORT_SYMBOL_GPL(rhashtable_lookup_compare_insert);
-
 /**
  * rhashtable_walk_init - Initialise an iterator
  * @ht:		Table to walk over
-- 
cgit v1.2.3


From 6626af692692b52c8f9e20ad8201a3255e5ab25b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 20 Mar 2015 18:18:45 -0400
Subject: rhashtable: Fix undeclared EEXIST build error on ia64

We need to include linux/errno.h in rhashtable.h since it doesn't
always get included otherwise.

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index c3034de2c235..89d102270570 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -18,6 +18,7 @@
 #define _LINUX_RHASHTABLE_H
 
 #include <linux/compiler.h>
+#include <linux/errno.h>
 #include <linux/list_nulls.h>
 #include <linux/workqueue.h>
 #include <linux/mutex.h>
-- 
cgit v1.2.3


From 94caee8c312d96522bcdae88791aaa9ebcd5f22c Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 20 Mar 2015 15:11:11 +0100
Subject: ebpf: add sched_act_type and map it to sk_filter's verifier ops

In order to prepare eBPF support for tc action, we need to add
sched_act_type, so that the eBPF verifier is aware of what helper
function act_bpf may use, that it can load skb data and read out
currently available skb fields.

This is bascially analogous to 96be4325f443 ("ebpf: add sched_cls_type
and map it to sk_filter's verifier ops").

BPF_PROG_TYPE_SCHED_CLS and BPF_PROG_TYPE_SCHED_ACT need to be
separate since both will have a different set of functionality in
future (classifier vs action), thus we won't run into ABI troubles
when the point in time comes to diverge functionality from the
classifier.

The future plan for act_bpf would be that it will be able to write
into skb->data and alter selected fields mirrored in struct __sk_buff.

For an initial support, it's sufficient to map it to sk_filter_ops.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jiri Pirko <jiri@resnulli.us>
Reviewed-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 1 +
 kernel/bpf/verifier.c    | 1 +
 net/core/filter.c        | 6 ++++++
 3 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1623047af463..3dd314a45d0d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -119,6 +119,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_UNSPEC,
 	BPF_PROG_TYPE_SOCKET_FILTER,
 	BPF_PROG_TYPE_SCHED_CLS,
+	BPF_PROG_TYPE_SCHED_ACT,
 };
 
 #define BPF_PSEUDO_MAP_FD	1
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c22ebd36fa4b..0e714f799ec0 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1180,6 +1180,7 @@ static bool may_access_skb(enum bpf_prog_type type)
 	switch (type) {
 	case BPF_PROG_TYPE_SOCKET_FILTER:
 	case BPF_PROG_TYPE_SCHED_CLS:
+	case BPF_PROG_TYPE_SCHED_ACT:
 		return true;
 	default:
 		return false;
diff --git a/net/core/filter.c b/net/core/filter.c
index bdaac5895def..084eacc4d1d4 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1263,10 +1263,16 @@ static struct bpf_prog_type_list sched_cls_type __read_mostly = {
 	.type = BPF_PROG_TYPE_SCHED_CLS,
 };
 
+static struct bpf_prog_type_list sched_act_type __read_mostly = {
+	.ops = &sk_filter_ops,
+	.type = BPF_PROG_TYPE_SCHED_ACT,
+};
+
 static int __init register_sk_filter_ops(void)
 {
 	bpf_register_prog_type(&sk_filter_type);
 	bpf_register_prog_type(&sched_cls_type);
+	bpf_register_prog_type(&sched_act_type);
 
 	return 0;
 }
-- 
cgit v1.2.3


From a8cb5f556b567974d75ea29c15181c445c541b1f Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 20 Mar 2015 15:11:12 +0100
Subject: act_bpf: add initial eBPF support for actions

This work extends the "classic" BPF programmable tc action by extending
its scope also to native eBPF code!

Together with commit e2e9b6541dd4 ("cls_bpf: add initial eBPF support
for programmable classifiers") this adds the facility to implement fully
flexible classifier and actions for tc that can be implemented in a C
subset in user space, "safely" loaded into the kernel, and being run in
native speed when JITed.

Also, since eBPF maps can be shared between eBPF programs, it offers the
possibility that cls_bpf and act_bpf can share data 1) between themselves
and 2) between user space applications. That means that, f.e. customized
runtime statistics can be collected in user space, but also more importantly
classifier and action behaviour could be altered based on map input from
the user space application.

For the remaining details on the workflow and integration, see the cls_bpf
commit e2e9b6541dd4. Preliminary iproute2 part can be found under [1].

  [1] http://git.breakpoint.cc/cgit/dborkman/iproute2.git/log/?h=ebpf-act

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_bpf.h        |   6 +-
 include/uapi/linux/tc_act/tc_bpf.h |   2 +
 net/sched/act_bpf.c                | 295 ++++++++++++++++++++++++++-----------
 3 files changed, 220 insertions(+), 83 deletions(-)

(limited to 'include')

diff --git a/include/net/tc_act/tc_bpf.h b/include/net/tc_act/tc_bpf.h
index 86a070ffc930..a152e9858b2c 100644
--- a/include/net/tc_act/tc_bpf.h
+++ b/include/net/tc_act/tc_bpf.h
@@ -16,8 +16,12 @@
 struct tcf_bpf {
 	struct tcf_common	common;
 	struct bpf_prog		*filter;
+	union {
+		u32		bpf_fd;
+		u16		bpf_num_ops;
+	};
 	struct sock_filter	*bpf_ops;
-	u16			bpf_num_ops;
+	const char		*bpf_name;
 };
 #define to_bpf(a) \
 	container_of(a->priv, struct tcf_bpf, common)
diff --git a/include/uapi/linux/tc_act/tc_bpf.h b/include/uapi/linux/tc_act/tc_bpf.h
index 5288bd77e63b..07f17cc70bb3 100644
--- a/include/uapi/linux/tc_act/tc_bpf.h
+++ b/include/uapi/linux/tc_act/tc_bpf.h
@@ -24,6 +24,8 @@ enum {
 	TCA_ACT_BPF_PARMS,
 	TCA_ACT_BPF_OPS_LEN,
 	TCA_ACT_BPF_OPS,
+	TCA_ACT_BPF_FD,
+	TCA_ACT_BPF_NAME,
 	__TCA_ACT_BPF_MAX,
 };
 #define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1)
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 5f6288fa3f12..4d2cede17468 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -13,26 +13,40 @@
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
 #include <linux/filter.h>
+#include <linux/bpf.h>
+
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 
 #include <linux/tc_act/tc_bpf.h>
 #include <net/tc_act/tc_bpf.h>
 
-#define BPF_TAB_MASK     15
+#define BPF_TAB_MASK		15
+#define ACT_BPF_NAME_LEN	256
+
+struct tcf_bpf_cfg {
+	struct bpf_prog *filter;
+	struct sock_filter *bpf_ops;
+	char *bpf_name;
+	u32 bpf_fd;
+	u16 bpf_num_ops;
+};
 
-static int tcf_bpf(struct sk_buff *skb, const struct tc_action *a,
+static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
 		   struct tcf_result *res)
 {
-	struct tcf_bpf *b = a->priv;
+	struct tcf_bpf *prog = act->priv;
 	int action, filter_res;
 
-	spin_lock(&b->tcf_lock);
+	spin_lock(&prog->tcf_lock);
 
-	b->tcf_tm.lastuse = jiffies;
-	bstats_update(&b->tcf_bstats, skb);
+	prog->tcf_tm.lastuse = jiffies;
+	bstats_update(&prog->tcf_bstats, skb);
 
-	filter_res = BPF_PROG_RUN(b->filter, skb);
+	/* Needed here for accessing maps. */
+	rcu_read_lock();
+	filter_res = BPF_PROG_RUN(prog->filter, skb);
+	rcu_read_unlock();
 
 	/* A BPF program may overwrite the default action opcode.
 	 * Similarly as in cls_bpf, if filter_res == -1 we use the
@@ -52,52 +66,87 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *a,
 		break;
 	case TC_ACT_SHOT:
 		action = filter_res;
-		b->tcf_qstats.drops++;
+		prog->tcf_qstats.drops++;
 		break;
 	case TC_ACT_UNSPEC:
-		action = b->tcf_action;
+		action = prog->tcf_action;
 		break;
 	default:
 		action = TC_ACT_UNSPEC;
 		break;
 	}
 
-	spin_unlock(&b->tcf_lock);
+	spin_unlock(&prog->tcf_lock);
 	return action;
 }
 
-static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *a,
+static bool tcf_bpf_is_ebpf(const struct tcf_bpf *prog)
+{
+	return !prog->bpf_ops;
+}
+
+static int tcf_bpf_dump_bpf_info(const struct tcf_bpf *prog,
+				 struct sk_buff *skb)
+{
+	struct nlattr *nla;
+
+	if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, prog->bpf_num_ops))
+		return -EMSGSIZE;
+
+	nla = nla_reserve(skb, TCA_ACT_BPF_OPS, prog->bpf_num_ops *
+			  sizeof(struct sock_filter));
+	if (nla == NULL)
+		return -EMSGSIZE;
+
+	memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla));
+
+	return 0;
+}
+
+static int tcf_bpf_dump_ebpf_info(const struct tcf_bpf *prog,
+				  struct sk_buff *skb)
+{
+	if (nla_put_u32(skb, TCA_ACT_BPF_FD, prog->bpf_fd))
+		return -EMSGSIZE;
+
+	if (prog->bpf_name &&
+	    nla_put_string(skb, TCA_ACT_BPF_NAME, prog->bpf_name))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act,
 			int bind, int ref)
 {
 	unsigned char *tp = skb_tail_pointer(skb);
-	struct tcf_bpf *b = a->priv;
+	struct tcf_bpf *prog = act->priv;
 	struct tc_act_bpf opt = {
-		.index    = b->tcf_index,
-		.refcnt   = b->tcf_refcnt - ref,
-		.bindcnt  = b->tcf_bindcnt - bind,
-		.action   = b->tcf_action,
+		.index   = prog->tcf_index,
+		.refcnt  = prog->tcf_refcnt - ref,
+		.bindcnt = prog->tcf_bindcnt - bind,
+		.action  = prog->tcf_action,
 	};
-	struct tcf_t t;
-	struct nlattr *nla;
+	struct tcf_t tm;
+	int ret;
 
 	if (nla_put(skb, TCA_ACT_BPF_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
 
-	if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, b->bpf_num_ops))
-		goto nla_put_failure;
-
-	nla = nla_reserve(skb, TCA_ACT_BPF_OPS, b->bpf_num_ops *
-			  sizeof(struct sock_filter));
-	if (!nla)
+	if (tcf_bpf_is_ebpf(prog))
+		ret = tcf_bpf_dump_ebpf_info(prog, skb);
+	else
+		ret = tcf_bpf_dump_bpf_info(prog, skb);
+	if (ret)
 		goto nla_put_failure;
 
-	memcpy(nla_data(nla), b->bpf_ops, nla_len(nla));
+	tm.install = jiffies_to_clock_t(jiffies - prog->tcf_tm.install);
+	tm.lastuse = jiffies_to_clock_t(jiffies - prog->tcf_tm.lastuse);
+	tm.expires = jiffies_to_clock_t(prog->tcf_tm.expires);
 
-	t.install = jiffies_to_clock_t(jiffies - b->tcf_tm.install);
-	t.lastuse = jiffies_to_clock_t(jiffies - b->tcf_tm.lastuse);
-	t.expires = jiffies_to_clock_t(b->tcf_tm.expires);
-	if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(t), &t))
+	if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(tm), &tm))
 		goto nla_put_failure;
+
 	return skb->len;
 
 nla_put_failure:
@@ -107,36 +156,21 @@ nla_put_failure:
 
 static const struct nla_policy act_bpf_policy[TCA_ACT_BPF_MAX + 1] = {
 	[TCA_ACT_BPF_PARMS]	= { .len = sizeof(struct tc_act_bpf) },
+	[TCA_ACT_BPF_FD]	= { .type = NLA_U32 },
+	[TCA_ACT_BPF_NAME]	= { .type = NLA_NUL_STRING, .len = ACT_BPF_NAME_LEN },
 	[TCA_ACT_BPF_OPS_LEN]	= { .type = NLA_U16 },
 	[TCA_ACT_BPF_OPS]	= { .type = NLA_BINARY,
 				    .len = sizeof(struct sock_filter) * BPF_MAXINSNS },
 };
 
-static int tcf_bpf_init(struct net *net, struct nlattr *nla,
-			struct nlattr *est, struct tc_action *a,
-			int ovr, int bind)
+static int tcf_bpf_init_from_ops(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
 {
-	struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
-	struct tc_act_bpf *parm;
-	struct tcf_bpf *b;
-	u16 bpf_size, bpf_num_ops;
 	struct sock_filter *bpf_ops;
-	struct sock_fprog_kern tmp;
+	struct sock_fprog_kern fprog_tmp;
 	struct bpf_prog *fp;
+	u16 bpf_size, bpf_num_ops;
 	int ret;
 
-	if (!nla)
-		return -EINVAL;
-
-	ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy);
-	if (ret < 0)
-		return ret;
-
-	if (!tb[TCA_ACT_BPF_PARMS] ||
-	    !tb[TCA_ACT_BPF_OPS_LEN] || !tb[TCA_ACT_BPF_OPS])
-		return -EINVAL;
-	parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
-
 	bpf_num_ops = nla_get_u16(tb[TCA_ACT_BPF_OPS_LEN]);
 	if (bpf_num_ops	> BPF_MAXINSNS || bpf_num_ops == 0)
 		return -EINVAL;
@@ -146,68 +180,165 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 
 	bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
-	if (!bpf_ops)
+	if (bpf_ops == NULL)
 		return -ENOMEM;
 
 	memcpy(bpf_ops, nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size);
 
-	tmp.len = bpf_num_ops;
-	tmp.filter = bpf_ops;
+	fprog_tmp.len = bpf_num_ops;
+	fprog_tmp.filter = bpf_ops;
 
-	ret = bpf_prog_create(&fp, &tmp);
-	if (ret)
-		goto free_bpf_ops;
+	ret = bpf_prog_create(&fp, &fprog_tmp);
+	if (ret < 0) {
+		kfree(bpf_ops);
+		return ret;
+	}
 
-	if (!tcf_hash_check(parm->index, a, bind)) {
-		ret = tcf_hash_create(parm->index, est, a, sizeof(*b), bind);
-		if (ret)
+	cfg->bpf_ops = bpf_ops;
+	cfg->bpf_num_ops = bpf_num_ops;
+	cfg->filter = fp;
+
+	return 0;
+}
+
+static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
+{
+	struct bpf_prog *fp;
+	char *name = NULL;
+	u32 bpf_fd;
+
+	bpf_fd = nla_get_u32(tb[TCA_ACT_BPF_FD]);
+
+	fp = bpf_prog_get(bpf_fd);
+	if (IS_ERR(fp))
+		return PTR_ERR(fp);
+
+	if (fp->type != BPF_PROG_TYPE_SCHED_ACT) {
+		bpf_prog_put(fp);
+		return -EINVAL;
+	}
+
+	if (tb[TCA_ACT_BPF_NAME]) {
+		name = kmemdup(nla_data(tb[TCA_ACT_BPF_NAME]),
+			       nla_len(tb[TCA_ACT_BPF_NAME]),
+			       GFP_KERNEL);
+		if (!name) {
+			bpf_prog_put(fp);
+			return -ENOMEM;
+		}
+	}
+
+	cfg->bpf_fd = bpf_fd;
+	cfg->bpf_name = name;
+	cfg->filter = fp;
+
+	return 0;
+}
+
+static int tcf_bpf_init(struct net *net, struct nlattr *nla,
+			struct nlattr *est, struct tc_action *act,
+			int replace, int bind)
+{
+	struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
+	struct tc_act_bpf *parm;
+	struct tcf_bpf *prog;
+	struct tcf_bpf_cfg cfg;
+	bool is_bpf, is_ebpf;
+	int ret;
+
+	if (!nla)
+		return -EINVAL;
+
+	ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy);
+	if (ret < 0)
+		return ret;
+
+	is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS];
+	is_ebpf = tb[TCA_ACT_BPF_FD];
+
+	if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf) ||
+	    !tb[TCA_ACT_BPF_PARMS])
+		return -EINVAL;
+
+	parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
+
+	memset(&cfg, 0, sizeof(cfg));
+
+	ret = is_bpf ? tcf_bpf_init_from_ops(tb, &cfg) :
+		       tcf_bpf_init_from_efd(tb, &cfg);
+	if (ret < 0)
+		return ret;
+
+	if (!tcf_hash_check(parm->index, act, bind)) {
+		ret = tcf_hash_create(parm->index, est, act,
+				      sizeof(*prog), bind);
+		if (ret < 0)
 			goto destroy_fp;
 
 		ret = ACT_P_CREATED;
 	} else {
+		/* Don't override defaults. */
 		if (bind)
 			goto destroy_fp;
-		tcf_hash_release(a, bind);
-		if (!ovr) {
+
+		tcf_hash_release(act, bind);
+		if (!replace) {
 			ret = -EEXIST;
 			goto destroy_fp;
 		}
 	}
 
-	b = to_bpf(a);
-	spin_lock_bh(&b->tcf_lock);
-	b->tcf_action = parm->action;
-	b->bpf_num_ops = bpf_num_ops;
-	b->bpf_ops = bpf_ops;
-	b->filter = fp;
-	spin_unlock_bh(&b->tcf_lock);
+	prog = to_bpf(act);
+	spin_lock_bh(&prog->tcf_lock);
+
+	prog->bpf_ops = cfg.bpf_ops;
+	prog->bpf_name = cfg.bpf_name;
+
+	if (cfg.bpf_num_ops)
+		prog->bpf_num_ops = cfg.bpf_num_ops;
+	if (cfg.bpf_fd)
+		prog->bpf_fd = cfg.bpf_fd;
+
+	prog->tcf_action = parm->action;
+	prog->filter = cfg.filter;
+
+	spin_unlock_bh(&prog->tcf_lock);
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(a);
+		tcf_hash_insert(act);
+
 	return ret;
 
 destroy_fp:
-	bpf_prog_destroy(fp);
-free_bpf_ops:
-	kfree(bpf_ops);
+	if (is_ebpf)
+		bpf_prog_put(cfg.filter);
+	else
+		bpf_prog_destroy(cfg.filter);
+
+	kfree(cfg.bpf_ops);
+	kfree(cfg.bpf_name);
+
 	return ret;
 }
 
-static void tcf_bpf_cleanup(struct tc_action *a, int bind)
+static void tcf_bpf_cleanup(struct tc_action *act, int bind)
 {
-	struct tcf_bpf *b = a->priv;
+	const struct tcf_bpf *prog = act->priv;
 
-	bpf_prog_destroy(b->filter);
+	if (tcf_bpf_is_ebpf(prog))
+		bpf_prog_put(prog->filter);
+	else
+		bpf_prog_destroy(prog->filter);
 }
 
-static struct tc_action_ops act_bpf_ops = {
-	.kind =		"bpf",
-	.type =		TCA_ACT_BPF,
-	.owner =	THIS_MODULE,
-	.act =		tcf_bpf,
-	.dump =		tcf_bpf_dump,
-	.cleanup =	tcf_bpf_cleanup,
-	.init =		tcf_bpf_init,
+static struct tc_action_ops act_bpf_ops __read_mostly = {
+	.kind		=	"bpf",
+	.type		=	TCA_ACT_BPF,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_bpf,
+	.dump		=	tcf_bpf_dump,
+	.cleanup	=	tcf_bpf_cleanup,
+	.init		=	tcf_bpf_init,
 };
 
 static int __init bpf_init_module(void)
-- 
cgit v1.2.3


From d3593b5cef76db45c864de23c599b58198879e8c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 20 Mar 2015 17:15:19 -0700
Subject: Revert "selinux: add a skb_owned_by() hook"

This reverts commit ca10b9e9a8ca7342ee07065289cbe74ac128c169.

No longer needed after commit eb8895debe1baba41fcb62c78a16f0c63c21662a
("tcp: tcp_make_synack() should use sock_wmalloc")

When under SYNFLOOD, we build lot of SYNACK and hit false sharing
because of multiple modifications done on sk_listener->sk_wmem_alloc

Since tcp_make_synack() uses sock_wmalloc(), there is no need
to call skb_set_owner_w() again, as this adds two atomic operations.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/security.h | 8 --------
 net/ipv4/tcp_output.c    | 1 -
 security/capability.c    | 6 ------
 security/security.c      | 5 -----
 security/selinux/hooks.c | 7 -------
 5 files changed, 27 deletions(-)

(limited to 'include')

diff --git a/include/linux/security.h b/include/linux/security.h
index a1b7dbd127ff..25a079a7c3b3 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1716,7 +1716,6 @@ struct security_operations {
 	int (*tun_dev_attach_queue) (void *security);
 	int (*tun_dev_attach) (struct sock *sk, void *security);
 	int (*tun_dev_open) (void *security);
-	void (*skb_owned_by) (struct sk_buff *skb, struct sock *sk);
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
@@ -2735,8 +2734,6 @@ int security_tun_dev_attach_queue(void *security);
 int security_tun_dev_attach(struct sock *sk, void *security);
 int security_tun_dev_open(void *security);
 
-void security_skb_owned_by(struct sk_buff *skb, struct sock *sk);
-
 #else	/* CONFIG_SECURITY_NETWORK */
 static inline int security_unix_stream_connect(struct sock *sock,
 					       struct sock *other,
@@ -2928,11 +2925,6 @@ static inline int security_tun_dev_open(void *security)
 {
 	return 0;
 }
-
-static inline void security_skb_owned_by(struct sk_buff *skb, struct sock *sk)
-{
-}
-
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index c2f0f6065cb1..18474088c3d0 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2926,7 +2926,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	skb_reserve(skb, MAX_TCP_HEADER);
 
 	skb_dst_set(skb, dst);
-	security_skb_owned_by(skb, sk);
 
 	mss = dst_metric_advmss(dst);
 	if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
diff --git a/security/capability.c b/security/capability.c
index 070dd46f62f4..58a1600c149b 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -776,11 +776,6 @@ static int cap_tun_dev_open(void *security)
 {
 	return 0;
 }
-
-static void cap_skb_owned_by(struct sk_buff *skb, struct sock *sk)
-{
-}
-
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
@@ -1134,7 +1129,6 @@ void __init security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, tun_dev_open);
 	set_to_cap_if_null(ops, tun_dev_attach_queue);
 	set_to_cap_if_null(ops, tun_dev_attach);
-	set_to_cap_if_null(ops, skb_owned_by);
 #endif	/* CONFIG_SECURITY_NETWORK */
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
 	set_to_cap_if_null(ops, xfrm_policy_alloc_security);
diff --git a/security/security.c b/security/security.c
index e81d5bbe7363..1f475aa53288 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1359,11 +1359,6 @@ int security_tun_dev_open(void *security)
 }
 EXPORT_SYMBOL(security_tun_dev_open);
 
-void security_skb_owned_by(struct sk_buff *skb, struct sock *sk)
-{
-	security_ops->skb_owned_by(skb, sk);
-}
-
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 4d1a54190388..edc66de39f2e 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -51,7 +51,6 @@
 #include <linux/tty.h>
 #include <net/icmp.h>
 #include <net/ip.h>		/* for local_port_range[] */
-#include <net/sock.h>
 #include <net/tcp.h>		/* struct or_callable used in sock_rcv_skb */
 #include <net/inet_connection_sock.h>
 #include <net/net_namespace.h>
@@ -4652,11 +4651,6 @@ static void selinux_inet_conn_established(struct sock *sk, struct sk_buff *skb)
 	selinux_skb_peerlbl_sid(skb, family, &sksec->peer_sid);
 }
 
-static void selinux_skb_owned_by(struct sk_buff *skb, struct sock *sk)
-{
-	skb_set_owner_w(skb, sk);
-}
-
 static int selinux_secmark_relabel_packet(u32 sid)
 {
 	const struct task_security_struct *__tsec;
@@ -6041,7 +6035,6 @@ static struct security_operations selinux_ops = {
 	.tun_dev_attach_queue =		selinux_tun_dev_attach_queue,
 	.tun_dev_attach =		selinux_tun_dev_attach,
 	.tun_dev_open =			selinux_tun_dev_open,
-	.skb_owned_by =			selinux_skb_owned_by,
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
 	.xfrm_policy_alloc_security =	selinux_xfrm_policy_alloc,
-- 
cgit v1.2.3


From 13bb8e2eb37eb3badf5bf4b6bb90e485fbb6b1cd Mon Sep 17 00:00:00 2001
From: Scott Feldman <sfeldma@gmail.com>
Date: Fri, 20 Mar 2015 17:42:46 -0700
Subject: switchdev: kernel-doc cleanup on swithdev ops

Suggested-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/switchdev.h | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index e5de53f92482..d2e69ee3019a 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -19,25 +19,16 @@ struct fib_info;
 /**
  * struct switchdev_ops - switchdev operations
  *
- * int (*swdev_parent_id_get)(struct net_device *dev,
- *			      struct netdev_phys_item_id *psid);
- *	Called to get an ID of the switch chip this port is part of.
- *	If driver implements this, it indicates that it represents a port
- *	of a switch chip.
+ * @swdev_parent_id_get: Called to get an ID of the switch chip this port
+ *   is part of.  If driver implements this, it indicates that it
+ *   represents a port of a switch chip.
  *
- * int (*swdev_port_stp_update)(struct net_device *dev, u8 state);
- *	Called to notify switch device port of bridge port STP
- *	state change.
+ * @swdev_port_stp_update: Called to notify switch device port of bridge
+ *   port STP state change.
  *
- * int (*swdev_fib_ipv4_add)(struct net_device *dev, __be32 dst,
- *			     int dst_len, struct fib_info *fi,
- *			     u8 tos, u8 type, u32 nlflags, u32 tb_id);
- *	Called to add/modify IPv4 route to switch device.
+ * @swdev_fib_ipv4_add: Called to add/modify IPv4 route to switch device.
  *
- * int (*swdev_fib_ipv4_del)(struct net_device *dev, __be32 dst,
- *			     int dst_len, struct fib_info *fi,
- *			     u8 tos, u8 type, u32 tb_id);
- *	Called to delete IPv4 route from switch device.
+ * @swdev_fib_ipv4_del: Called to delete IPv4 route from switch device.
  */
 struct swdev_ops {
 	int	(*swdev_parent_id_get)(struct net_device *dev,
-- 
cgit v1.2.3


From 8da86466b83787df0d4b89ec81c310de072d101c Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki/吉藤英明 <hideaki.yoshifuji@miraclelinux.com>
Date: Thu, 19 Mar 2015 22:41:46 +0900
Subject: net: neighbour: Add mcast_resolicit to configure the number of
 multicast resolicitations in PROBE state.

We send unicast neighbor (ARP or NDP) solicitations ucast_probes
times in PROBE state.  Zhu Yanjun reported that some implementation
does not reply against them and the entry will become FAILED, which
is undesirable.

We had been dealt with such nodes by sending multicast probes mcast_
solicit times after unicast probes in PROBE state.  In 2003, I made
a change not to send them to improve compatibility with IPv6 NDP.

Let's introduce per-protocol per-interface sysctl knob "mcast_
reprobe" to configure the number of multicast (re)solicitation for
reconfirmation in PROBE state.  The default is 0, since we have
been doing so for 10+ years.

Reported-by: Zhu Yanjun <Yanjun.Zhu@windriver.com>
CC: Ulf Samuelsson <ulf.samuelsson@ericsson.com>
Signed-off-by: YOSHIFUJI Hideaki <hideaki.yoshifuji@miraclelinux.com>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/neighbour.h        |  1 +
 include/uapi/linux/neighbour.h |  1 +
 net/core/neighbour.c           | 15 +++++++++++----
 3 files changed, 13 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index e7bdf5170802..bd33e66f49aa 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -42,6 +42,7 @@ enum {
 	NEIGH_VAR_MCAST_PROBES,
 	NEIGH_VAR_UCAST_PROBES,
 	NEIGH_VAR_APP_PROBES,
+	NEIGH_VAR_MCAST_REPROBES,
 	NEIGH_VAR_RETRANS_TIME,
 	NEIGH_VAR_BASE_REACHABLE_TIME,
 	NEIGH_VAR_DELAY_PROBE_TIME,
diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h
index 3873a35509aa..2e35c61bbdd1 100644
--- a/include/uapi/linux/neighbour.h
+++ b/include/uapi/linux/neighbour.h
@@ -126,6 +126,7 @@ enum {
 	NDTPA_PROXY_QLEN,		/* u32 */
 	NDTPA_LOCKTIME,			/* u64, msecs */
 	NDTPA_QUEUE_LENBYTES,		/* u32 */
+	NDTPA_MCAST_REPROBES,		/* u32 */
 	__NDTPA_MAX
 };
 #define NDTPA_MAX (__NDTPA_MAX - 1)
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 0e8b32efc031..3de654256028 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -817,10 +817,9 @@ out:
 static __inline__ int neigh_max_probes(struct neighbour *n)
 {
 	struct neigh_parms *p = n->parms;
-	int max_probes = NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES);
-	if (!(n->nud_state & NUD_PROBE))
-		max_probes += NEIGH_VAR(p, MCAST_PROBES);
-	return max_probes;
+	return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
+	       (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
+	        NEIGH_VAR(p, MCAST_PROBES));
 }
 
 static void neigh_invalidate(struct neighbour *neigh)
@@ -1742,6 +1741,8 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
 			NEIGH_VAR(parms, UCAST_PROBES)) ||
 	    nla_put_u32(skb, NDTPA_MCAST_PROBES,
 			NEIGH_VAR(parms, MCAST_PROBES)) ||
+	    nla_put_u32(skb, NDTPA_MCAST_REPROBES,
+			NEIGH_VAR(parms, MCAST_REPROBES)) ||
 	    nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
 	    nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
 			  NEIGH_VAR(parms, BASE_REACHABLE_TIME)) ||
@@ -1901,6 +1902,7 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
 	[NDTPA_APP_PROBES]		= { .type = NLA_U32 },
 	[NDTPA_UCAST_PROBES]		= { .type = NLA_U32 },
 	[NDTPA_MCAST_PROBES]		= { .type = NLA_U32 },
+	[NDTPA_MCAST_REPROBES]		= { .type = NLA_U32 },
 	[NDTPA_BASE_REACHABLE_TIME]	= { .type = NLA_U64 },
 	[NDTPA_GC_STALETIME]		= { .type = NLA_U64 },
 	[NDTPA_DELAY_PROBE_TIME]	= { .type = NLA_U64 },
@@ -2001,6 +2003,10 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
 				NEIGH_VAR_SET(p, MCAST_PROBES,
 					      nla_get_u32(tbp[i]));
 				break;
+			case NDTPA_MCAST_REPROBES:
+				NEIGH_VAR_SET(p, MCAST_REPROBES,
+					      nla_get_u32(tbp[i]));
+				break;
 			case NDTPA_BASE_REACHABLE_TIME:
 				NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
 					      nla_get_msecs(tbp[i]));
@@ -2987,6 +2993,7 @@ static struct neigh_sysctl_table {
 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
+		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
 		NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
 		NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
-- 
cgit v1.2.3


From 8d0451638ad3f7ccd5250c1dd90e06ad487b2703 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 18 Mar 2015 20:55:31 +0100
Subject: netfilter: bridge: kill nf_bridge_pad

The br_netfilter frag output function calls skb_cow_head() so in
case it needs a larger headroom to e.g. re-add a previously stripped PPPOE
or VLAN header things will still work (at cost of reallocation).

We can then move nf_bridge_encap_header_len to br_netfilter.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_bridge.h | 22 ----------------------
 net/bridge/br_netfilter.c        | 12 ++++++++++++
 net/ipv4/ip_output.c             |  5 +----
 3 files changed, 13 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index ed0d3bf953c3..2734977199ca 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -23,18 +23,6 @@ enum nf_br_hook_priorities {
 #define BRNF_8021Q			0x10
 #define BRNF_PPPoE			0x20
 
-static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
-{
-	switch (skb->protocol) {
-	case __cpu_to_be16(ETH_P_8021Q):
-		return VLAN_HLEN;
-	case __cpu_to_be16(ETH_P_PPP_SES):
-		return PPPOE_SES_HLEN;
-	default:
-		return 0;
-	}
-}
-
 static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
 {
 	if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE))
@@ -44,15 +32,6 @@ static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
 
 int br_handle_frame_finish(struct sk_buff *skb);
 
-/* This is called by the IP fragmenting code and it ensures there is
- * enough room for the encapsulating header (if there is one). */
-static inline unsigned int nf_bridge_pad(const struct sk_buff *skb)
-{
-	if (skb->nf_bridge)
-		return nf_bridge_encap_header_len(skb);
-	return 0;
-}
-
 static inline void br_drop_fake_rtable(struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
@@ -62,7 +41,6 @@ static inline void br_drop_fake_rtable(struct sk_buff *skb)
 }
 
 #else
-#define nf_bridge_pad(skb)			(0)
 #define br_drop_fake_rtable(skb)	        do { } while (0)
 #endif /* CONFIG_BRIDGE_NETFILTER */
 
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index bd2d24d1ff21..f3884a1b942f 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -153,6 +153,18 @@ static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb)
 	return nf_bridge;
 }
 
+static unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
+{
+	switch (skb->protocol) {
+	case __cpu_to_be16(ETH_P_8021Q):
+		return VLAN_HLEN;
+	case __cpu_to_be16(ETH_P_PPP_SES):
+		return PPPOE_SES_HLEN;
+	default:
+		return 0;
+	}
+}
+
 static inline void nf_bridge_push_encap_header(struct sk_buff *skb)
 {
 	unsigned int len = nf_bridge_encap_header_len(skb);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index a7aea2048a0d..90b49e88e84a 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -636,10 +636,7 @@ slow_path:
 	left = skb->len - hlen;		/* Space per frame */
 	ptr = hlen;		/* Where to start from */
 
-	/* for bridged IP traffic encapsulated inside f.e. a vlan header,
-	 * we need to make room for the encapsulating header
-	 */
-	ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, nf_bridge_pad(skb));
+	ll_rs = LL_RESERVED_SPACE(rt->dst.dev);
 
 	/*
 	 *	Fragment the datagram.
-- 
cgit v1.2.3


From 2b290bbb60847c0897c047b5214192810de529df Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 10 Mar 2015 04:48:20 +0100
Subject: can: use sock_efree instead of own destructor

It is identical to the can destructor.

Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/skb.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h
index cc00d15c6107..b6a52a4b457a 100644
--- a/include/linux/can/skb.h
+++ b/include/linux/can/skb.h
@@ -44,16 +44,11 @@ static inline void can_skb_reserve(struct sk_buff *skb)
 	skb_reserve(skb, sizeof(struct can_skb_priv));
 }
 
-static inline void can_skb_destructor(struct sk_buff *skb)
-{
-	sock_put(skb->sk);
-}
-
 static inline void can_skb_set_owner(struct sk_buff *skb, struct sock *sk)
 {
 	if (sk) {
 		sock_hold(sk);
-		skb->destructor = can_skb_destructor;
+		skb->destructor = sock_efree;
 		skb->sk = sk;
 	}
 }
-- 
cgit v1.2.3


From c54eb70e3bb8cd0d7b8564bedab63e834656c567 Mon Sep 17 00:00:00 2001
From: Yegor Yefremov <yegorslists@googlemail.com>
Date: Mon, 16 Mar 2015 09:38:13 +0100
Subject: can: add combined rx/tx LED trigger support

Add <ifname>-rxtx trigger, that will be activated both for tx
as rx events. This trigger mimics "activity" LED for Ethernet
devices.

Signed-off-by: Yegor Yefremov <yegorslists@googlemail.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/led.c   | 22 +++++++++++++++++++---
 include/linux/can/dev.h |  2 ++
 include/linux/can/led.h |  6 ++++--
 3 files changed, 25 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/net/can/led.c b/drivers/net/can/led.c
index ab7f1b01be49..c1b667675fa1 100644
--- a/drivers/net/can/led.c
+++ b/drivers/net/can/led.c
@@ -30,20 +30,28 @@ void can_led_event(struct net_device *netdev, enum can_led_event event)
 	case CAN_LED_EVENT_OPEN:
 		led_trigger_event(priv->tx_led_trig, LED_FULL);
 		led_trigger_event(priv->rx_led_trig, LED_FULL);
+		led_trigger_event(priv->rxtx_led_trig, LED_FULL);
 		break;
 	case CAN_LED_EVENT_STOP:
 		led_trigger_event(priv->tx_led_trig, LED_OFF);
 		led_trigger_event(priv->rx_led_trig, LED_OFF);
+		led_trigger_event(priv->rxtx_led_trig, LED_OFF);
 		break;
 	case CAN_LED_EVENT_TX:
-		if (led_delay)
+		if (led_delay) {
 			led_trigger_blink_oneshot(priv->tx_led_trig,
 						  &led_delay, &led_delay, 1);
+			led_trigger_blink_oneshot(priv->rxtx_led_trig,
+						  &led_delay, &led_delay, 1);
+		}
 		break;
 	case CAN_LED_EVENT_RX:
-		if (led_delay)
+		if (led_delay) {
 			led_trigger_blink_oneshot(priv->rx_led_trig,
 						  &led_delay, &led_delay, 1);
+			led_trigger_blink_oneshot(priv->rxtx_led_trig,
+						  &led_delay, &led_delay, 1);
+		}
 		break;
 	}
 }
@@ -55,6 +63,7 @@ static void can_led_release(struct device *gendev, void *res)
 
 	led_trigger_unregister_simple(priv->tx_led_trig);
 	led_trigger_unregister_simple(priv->rx_led_trig);
+	led_trigger_unregister_simple(priv->rxtx_led_trig);
 }
 
 /* Register CAN LED triggers for a CAN device
@@ -76,11 +85,15 @@ void devm_can_led_init(struct net_device *netdev)
 		 "%s-tx", netdev->name);
 	snprintf(priv->rx_led_trig_name, sizeof(priv->rx_led_trig_name),
 		 "%s-rx", netdev->name);
+	snprintf(priv->rxtx_led_trig_name, sizeof(priv->rxtx_led_trig_name),
+		 "%s-rxtx", netdev->name);
 
 	led_trigger_register_simple(priv->tx_led_trig_name,
 				    &priv->tx_led_trig);
 	led_trigger_register_simple(priv->rx_led_trig_name,
 				    &priv->rx_led_trig);
+	led_trigger_register_simple(priv->rxtx_led_trig_name,
+				    &priv->rxtx_led_trig);
 
 	devres_add(&netdev->dev, res);
 }
@@ -97,7 +110,7 @@ static int can_led_notifier(struct notifier_block *nb, unsigned long msg,
 	if (!priv)
 		return NOTIFY_DONE;
 
-	if (!priv->tx_led_trig || !priv->rx_led_trig)
+	if (!priv->tx_led_trig || !priv->rx_led_trig || !priv->rxtx_led_trig)
 		return NOTIFY_DONE;
 
 	if (msg == NETDEV_CHANGENAME) {
@@ -106,6 +119,9 @@ static int can_led_notifier(struct notifier_block *nb, unsigned long msg,
 
 		snprintf(name, sizeof(name), "%s-rx", netdev->name);
 		led_trigger_rename_static(name, priv->rx_led_trig);
+
+		snprintf(name, sizeof(name), "%s-rxtx", netdev->name);
+		led_trigger_rename_static(name, priv->rxtx_led_trig);
 	}
 
 	return NOTIFY_DONE;
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index c05ff0f9f9a5..c3a9c8fc60fa 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -61,6 +61,8 @@ struct can_priv {
 	char tx_led_trig_name[CAN_LED_NAME_SZ];
 	struct led_trigger *rx_led_trig;
 	char rx_led_trig_name[CAN_LED_NAME_SZ];
+	struct led_trigger *rxtx_led_trig;
+	char rxtx_led_trig_name[CAN_LED_NAME_SZ];
 #endif
 };
 
diff --git a/include/linux/can/led.h b/include/linux/can/led.h
index e0475c5cbb92..146de4506d21 100644
--- a/include/linux/can/led.h
+++ b/include/linux/can/led.h
@@ -21,8 +21,10 @@ enum can_led_event {
 
 #ifdef CONFIG_CAN_LEDS
 
-/* keep space for interface name + "-tx"/"-rx" suffix and null terminator */
-#define CAN_LED_NAME_SZ (IFNAMSIZ + 4)
+/* keep space for interface name + "-tx"/"-rx"/"-rxtx"
+ * suffix and null terminator
+ */
+#define CAN_LED_NAME_SZ (IFNAMSIZ + 6)
 
 void can_led_event(struct net_device *netdev, enum can_led_event event);
 void devm_can_led_init(struct net_device *netdev);
-- 
cgit v1.2.3


From 0345f93138b2224e0d7ce91fcffdb3dd23f364d7 Mon Sep 17 00:00:00 2001
From: "tadeusz.struk@intel.com" <tadeusz.struk@intel.com>
Date: Thu, 19 Mar 2015 12:31:25 -0700
Subject: net: socket: add support for async operations

Add support for async operations.

Signed-off-by: Tadeusz Struk <tadeusz.struk@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h | 1 +
 net/compat.c           | 2 ++
 net/socket.c           | 8 ++++++--
 3 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index fab4d0ddf4ed..c9852ef7e317 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -51,6 +51,7 @@ struct msghdr {
 	void		*msg_control;	/* ancillary data */
 	__kernel_size_t	msg_controllen;	/* ancillary data buffer length */
 	unsigned int	msg_flags;	/* flags on received message */
+	struct kiocb	*msg_iocb;	/* ptr to iocb for async requests */
 };
  
 struct user_msghdr {
diff --git a/net/compat.c b/net/compat.c
index 13c0c9a25cd9..c4b6b0f43d5d 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -79,6 +79,8 @@ ssize_t get_compat_msghdr(struct msghdr *kmsg,
 	if (nr_segs > UIO_MAXIOV)
 		return -EMSGSIZE;
 
+	kmsg->msg_iocb = NULL;
+
 	err = compat_rw_copy_check_uvector(save_addr ? READ : WRITE,
 					   compat_ptr(uiov), nr_segs,
 					   UIO_FASTIOV, *iov, iov);
diff --git a/net/socket.c b/net/socket.c
index 3e776776f42c..073809f4125f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -798,7 +798,8 @@ static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
 	struct file *file = iocb->ki_filp;
 	struct socket *sock = file->private_data;
-	struct msghdr msg = {.msg_iter = *to};
+	struct msghdr msg = {.msg_iter = *to,
+			     .msg_iocb = iocb};
 	ssize_t res;
 
 	if (file->f_flags & O_NONBLOCK)
@@ -819,7 +820,8 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct socket *sock = file->private_data;
-	struct msghdr msg = {.msg_iter = *from};
+	struct msghdr msg = {.msg_iter = *from,
+			     .msg_iocb = iocb};
 	ssize_t res;
 
 	if (iocb->ki_pos != 0)
@@ -1894,6 +1896,8 @@ static ssize_t copy_msghdr_from_user(struct msghdr *kmsg,
 	if (nr_segs > UIO_MAXIOV)
 		return -EMSGSIZE;
 
+	kmsg->msg_iocb = NULL;
+
 	err = rw_copy_check_uvector(save_addr ? READ : WRITE,
 				    uiov, nr_segs,
 				    UIO_FASTIOV, *iov, iov);
-- 
cgit v1.2.3


From 66db37391dad834a4f739984eec81af27a68b09c Mon Sep 17 00:00:00 2001
From: Tadeusz Struk <tadeusz.struk@intel.com>
Date: Thu, 19 Mar 2015 12:31:30 -0700
Subject: crypto: af_alg - Allow to link sgl

Allow to link af_alg sgls.

Signed-off-by: Tadeusz Struk <tadeusz.struk@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 crypto/af_alg.c         | 18 +++++++++++++-----
 include/crypto/if_alg.h |  4 +++-
 2 files changed, 16 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 7f8b7edcadca..26089d182cb7 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -358,8 +358,8 @@ int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len)
 	npages = (off + n + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	if (WARN_ON(npages == 0))
 		return -EINVAL;
-
-	sg_init_table(sgl->sg, npages);
+	/* Add one extra for linking */
+	sg_init_table(sgl->sg, npages + 1);
 
 	for (i = 0, len = n; i < npages; i++) {
 		int plen = min_t(int, len, PAGE_SIZE - off);
@@ -369,18 +369,26 @@ int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len)
 		off = 0;
 		len -= plen;
 	}
+	sg_mark_end(sgl->sg + npages - 1);
+	sgl->npages = npages;
+
 	return n;
 }
 EXPORT_SYMBOL_GPL(af_alg_make_sg);
 
+void af_alg_link_sg(struct af_alg_sgl *sgl_prev, struct af_alg_sgl *sgl_new)
+{
+	sg_unmark_end(sgl_prev->sg + sgl_prev->npages - 1);
+	sg_chain(sgl_prev->sg, sgl_prev->npages + 1, sgl_new->sg);
+}
+EXPORT_SYMBOL(af_alg_link_sg);
+
 void af_alg_free_sg(struct af_alg_sgl *sgl)
 {
 	int i;
 
-	i = 0;
-	do {
+	for (i = 0; i < sgl->npages; i++)
 		put_page(sgl->pages[i]);
-	} while (!sg_is_last(sgl->sg + (i++)));
 }
 EXPORT_SYMBOL_GPL(af_alg_free_sg);
 
diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h
index 178525e5f430..018afb264ac2 100644
--- a/include/crypto/if_alg.h
+++ b/include/crypto/if_alg.h
@@ -58,8 +58,9 @@ struct af_alg_type {
 };
 
 struct af_alg_sgl {
-	struct scatterlist sg[ALG_MAX_PAGES];
+	struct scatterlist sg[ALG_MAX_PAGES + 1];
 	struct page *pages[ALG_MAX_PAGES];
+	unsigned int npages;
 };
 
 int af_alg_register_type(const struct af_alg_type *type);
@@ -70,6 +71,7 @@ int af_alg_accept(struct sock *sk, struct socket *newsock);
 
 int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len);
 void af_alg_free_sg(struct af_alg_sgl *sgl);
+void af_alg_link_sg(struct af_alg_sgl *sgl_prev, struct af_alg_sgl *sgl_new);
 
 int af_alg_cmsg_send(struct msghdr *msg, struct af_alg_control *con);
 
-- 
cgit v1.2.3


From 42cb80a2353f42913ae78074ffa1f1b4a49e5436 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 22 Mar 2015 10:22:19 -0700
Subject: inet: remove sk_listener parameter from syn_ack_timeout()

It is not needed, and req->sk_listener points to the listener anyway.
request_sock argument can be const.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dccp.h            | 2 +-
 include/net/request_sock.h      | 3 +--
 include/net/tcp.h               | 2 +-
 net/dccp/ipv4.c                 | 2 +-
 net/ipv4/inet_connection_sock.c | 2 +-
 net/ipv4/tcp_timer.c            | 8 +++++---
 6 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 439ff698000a..3dca24d3ac67 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -317,6 +317,6 @@ static inline const char *dccp_role(const struct sock *sk)
 	return NULL;
 }
 
-extern void dccp_syn_ack_timeout(struct sock *sk, struct request_sock *req);
+extern void dccp_syn_ack_timeout(const struct request_sock *req);
 
 #endif /* _LINUX_DCCP_H */
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 6a91261d9b7b..8603c350fad0 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -39,8 +39,7 @@ struct request_sock_ops {
 	void		(*send_reset)(struct sock *sk,
 				      struct sk_buff *skb);
 	void		(*destructor)(struct request_sock *req);
-	void		(*syn_ack_timeout)(struct sock *sk,
-					   struct request_sock *req);
+	void		(*syn_ack_timeout)(const struct request_sock *req);
 };
 
 int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 082fd79132b7..1876262afd59 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -433,7 +433,7 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
 int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
 			  char __user *optval, unsigned int optlen);
 void tcp_set_keepalive(struct sock *sk, int val);
-void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req);
+void tcp_syn_ack_timeout(const struct request_sock *req);
 int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 		int flags, int *addr_len);
 void tcp_parse_options(const struct sk_buff *skb,
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 25a9615b3b88..1f7161e05403 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -576,7 +576,7 @@ static void dccp_v4_reqsk_destructor(struct request_sock *req)
 	kfree(inet_rsk(req)->opt);
 }
 
-void dccp_syn_ack_timeout(struct sock *sk, struct request_sock *req)
+void dccp_syn_ack_timeout(const struct request_sock *req)
 {
 }
 EXPORT_SYMBOL(dccp_syn_ack_timeout);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 7d011e825c48..a12b973164d0 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -615,7 +615,7 @@ static void reqsk_timer_handler(unsigned long data)
 		max_retries = defer_accept;
 	syn_ack_recalc(req, thresh, max_retries, defer_accept,
 		       &expire, &resend);
-	req->rsk_ops->syn_ack_timeout(sk_listener, req);
+	req->rsk_ops->syn_ack_timeout(req);
 	if (!expire &&
 	    (!resend ||
 	     !inet_rtx_syn_ack(sk_listener, req) ||
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 3daa6b5d766d..2568fd282873 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -327,7 +327,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk)
 	struct request_sock *req;
 
 	req = tcp_sk(sk)->fastopen_rsk;
-	req->rsk_ops->syn_ack_timeout(sk, req);
+	req->rsk_ops->syn_ack_timeout(req);
 
 	if (req->num_timeout >= max_retries) {
 		tcp_write_err(sk);
@@ -539,9 +539,11 @@ static void tcp_write_timer(unsigned long data)
 	sock_put(sk);
 }
 
-void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req)
+void tcp_syn_ack_timeout(const struct request_sock *req)
 {
-	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEOUTS);
+	struct net *net = read_pnet(&inet_rsk(req)->ireq_net);
+
+	NET_INC_STATS_BH(net, LINUX_MIB_TCPTIMEOUTS);
 }
 EXPORT_SYMBOL(tcp_syn_ack_timeout);
 
-- 
cgit v1.2.3


From b282705336e03fc7b9377a278939594870a40f96 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 22 Mar 2015 10:22:21 -0700
Subject: net: convert syn_wait_lock to a spinlock

This is a low hanging fruit, as we'll get rid of syn_wait_lock eventually.

We hold syn_wait_lock for such small sections, that it makes no sense to use
a read/write lock. A spin lock is simply faster.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/request_sock.h       | 11 +++--------
 net/core/request_sock.c          | 14 +++++++-------
 net/ipv4/inet_connection_sock.c  |  8 ++++----
 net/ipv4/inet_diag.c             |  4 ++--
 net/ipv4/tcp_ipv4.c              | 12 ++++++------
 net/ipv6/inet6_connection_sock.c |  4 ++--
 6 files changed, 24 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 8603c350fad0..fe41f3ceb008 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -173,11 +173,6 @@ struct fastopen_queue {
  * %syn_wait_lock is necessary only to avoid proc interface having to grab the main
  * lock sock while browsing the listening hash (otherwise it's deadlock prone).
  *
- * This lock is acquired in read mode only from listening_get_next() seq_file
- * op and it's acquired in write mode _only_ from code that is actively
- * changing rskq_accept_head. All readers that are holding the master sock lock
- * don't need to grab this lock in read mode too as rskq_accept_head. writes
- * are always protected from the main sock lock.
  */
 struct request_sock_queue {
 	struct request_sock	*rskq_accept_head;
@@ -192,7 +187,7 @@ struct request_sock_queue {
 					     */
 
 	/* temporary alignment, our goal is to get rid of this lock */
-	rwlock_t		syn_wait_lock ____cacheline_aligned_in_smp;
+	spinlock_t		syn_wait_lock ____cacheline_aligned_in_smp;
 };
 
 int reqsk_queue_alloc(struct request_sock_queue *queue,
@@ -223,14 +218,14 @@ static inline void reqsk_queue_unlink(struct request_sock_queue *queue,
 	struct listen_sock *lopt = queue->listen_opt;
 	struct request_sock **prev;
 
-	write_lock(&queue->syn_wait_lock);
+	spin_lock(&queue->syn_wait_lock);
 
 	prev = &lopt->syn_table[req->rsk_hash];
 	while (*prev != req)
 		prev = &(*prev)->dl_next;
 	*prev = req->dl_next;
 
-	write_unlock(&queue->syn_wait_lock);
+	spin_unlock(&queue->syn_wait_lock);
 	if (del_timer(&req->rsk_timer))
 		reqsk_put(req);
 }
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index cdc0ddd9ac9f..87b22c0bc08c 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -58,14 +58,14 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
 		return -ENOMEM;
 
 	get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
-	rwlock_init(&queue->syn_wait_lock);
+	spin_lock_init(&queue->syn_wait_lock);
 	queue->rskq_accept_head = NULL;
 	lopt->nr_table_entries = nr_table_entries;
 	lopt->max_qlen_log = ilog2(nr_table_entries);
 
-	write_lock_bh(&queue->syn_wait_lock);
+	spin_lock_bh(&queue->syn_wait_lock);
 	queue->listen_opt = lopt;
-	write_unlock_bh(&queue->syn_wait_lock);
+	spin_unlock_bh(&queue->syn_wait_lock);
 
 	return 0;
 }
@@ -81,10 +81,10 @@ static inline struct listen_sock *reqsk_queue_yank_listen_sk(
 {
 	struct listen_sock *lopt;
 
-	write_lock_bh(&queue->syn_wait_lock);
+	spin_lock_bh(&queue->syn_wait_lock);
 	lopt = queue->listen_opt;
 	queue->listen_opt = NULL;
-	write_unlock_bh(&queue->syn_wait_lock);
+	spin_unlock_bh(&queue->syn_wait_lock);
 
 	return lopt;
 }
@@ -100,7 +100,7 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
 		for (i = 0; i < lopt->nr_table_entries; i++) {
 			struct request_sock *req;
 
-			write_lock_bh(&queue->syn_wait_lock);
+			spin_lock_bh(&queue->syn_wait_lock);
 			while ((req = lopt->syn_table[i]) != NULL) {
 				lopt->syn_table[i] = req->dl_next;
 				atomic_inc(&lopt->qlen_dec);
@@ -108,7 +108,7 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
 					reqsk_put(req);
 				reqsk_put(req);
 			}
-			write_unlock_bh(&queue->syn_wait_lock);
+			spin_unlock_bh(&queue->syn_wait_lock);
 		}
 	}
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 711ab143d4cb..79c0c9439fdc 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -495,7 +495,7 @@ struct request_sock *inet_csk_search_req(struct sock *sk,
 	u32 hash = inet_synq_hash(raddr, rport, lopt->hash_rnd,
 				  lopt->nr_table_entries);
 
-	write_lock(&icsk->icsk_accept_queue.syn_wait_lock);
+	spin_lock(&icsk->icsk_accept_queue.syn_wait_lock);
 	for (req = lopt->syn_table[hash]; req != NULL; req = req->dl_next) {
 		const struct inet_request_sock *ireq = inet_rsk(req);
 
@@ -508,7 +508,7 @@ struct request_sock *inet_csk_search_req(struct sock *sk,
 			break;
 		}
 	}
-	write_unlock(&icsk->icsk_accept_queue.syn_wait_lock);
+	spin_unlock(&icsk->icsk_accept_queue.syn_wait_lock);
 
 	return req;
 }
@@ -650,10 +650,10 @@ void reqsk_queue_hash_req(struct request_sock_queue *queue,
 	setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req);
 	req->rsk_hash = hash;
 
-	write_lock(&queue->syn_wait_lock);
+	spin_lock(&queue->syn_wait_lock);
 	req->dl_next = lopt->syn_table[hash];
 	lopt->syn_table[hash] = req;
-	write_unlock(&queue->syn_wait_lock);
+	spin_unlock(&queue->syn_wait_lock);
 
 	mod_timer_pinned(&req->rsk_timer, jiffies + timeout);
 }
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index f984b2001d0a..76322c9867d5 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -728,7 +728,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
 
 	entry.family = sk->sk_family;
 
-	read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+	spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
 
 	lopt = icsk->icsk_accept_queue.listen_opt;
 	if (!lopt || !listen_sock_qlen(lopt))
@@ -776,7 +776,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
 	}
 
 out:
-	read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+	spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
 
 	return err;
 }
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5554b8f33d41..8028ad5920a4 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1909,13 +1909,13 @@ get_req:
 		}
 		sk	  = sk_nulls_next(st->syn_wait_sk);
 		st->state = TCP_SEQ_STATE_LISTENING;
-		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+		spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
 	} else {
 		icsk = inet_csk(sk);
-		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+		spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
 		if (reqsk_queue_len(&icsk->icsk_accept_queue))
 			goto start_req;
-		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+		spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
 		sk = sk_nulls_next(sk);
 	}
 get_sk:
@@ -1927,7 +1927,7 @@ get_sk:
 			goto out;
 		}
 		icsk = inet_csk(sk);
-		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+		spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
 		if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
 start_req:
 			st->uid		= sock_i_uid(sk);
@@ -1936,7 +1936,7 @@ start_req:
 			st->sbucket	= 0;
 			goto get_req;
 		}
-		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+		spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
 	}
 	spin_unlock_bh(&ilb->lock);
 	st->offset = 0;
@@ -2155,7 +2155,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
 	case TCP_SEQ_STATE_OPENREQ:
 		if (v) {
 			struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
-			read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+			spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
 		}
 	case TCP_SEQ_STATE_LISTENING:
 		if (v != SEQ_START_TOKEN)
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 2f3bbe569e8f..6927f3fb5597 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -124,7 +124,7 @@ struct request_sock *inet6_csk_search_req(struct sock *sk,
 	u32 hash = inet6_synq_hash(raddr, rport, lopt->hash_rnd,
 				   lopt->nr_table_entries);
 
-	write_lock(&icsk->icsk_accept_queue.syn_wait_lock);
+	spin_lock(&icsk->icsk_accept_queue.syn_wait_lock);
 	for (req = lopt->syn_table[hash]; req != NULL; req = req->dl_next) {
 		const struct inet_request_sock *ireq = inet_rsk(req);
 
@@ -138,7 +138,7 @@ struct request_sock *inet6_csk_search_req(struct sock *sk,
 			break;
 		}
 	}
-	write_unlock(&icsk->icsk_accept_queue.syn_wait_lock);
+	spin_unlock(&icsk->icsk_accept_queue.syn_wait_lock);
 
 	return req;
 }
-- 
cgit v1.2.3


From 26e3736090e1037ac929787df21c05497479b77f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 22 Mar 2015 10:22:22 -0700
Subject: ipv4: tcp: handle ICMP messages on TCP_NEW_SYN_RECV request sockets

tcp_v4_err() can restrict lookups to ehash table, and not to listeners.

Note this patch creates the infrastructure, but this means that ICMP
messages for request sockets are ignored until complete conversion.

New tcp_req_err() helper is exported so that we can use it in IPv6
in following patch.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h   |  1 +
 net/ipv4/tcp_ipv4.c | 69 ++++++++++++++++++++++++++---------------------------
 2 files changed, 35 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 1876262afd59..fe60e00e1919 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -447,6 +447,7 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
 
 void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb);
 void tcp_v4_mtu_reduced(struct sock *sk);
+void tcp_req_err(struct sock *sk, u32 seq);
 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
 struct sock *tcp_create_openreq_child(struct sock *sk,
 				      struct request_sock *req,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8028ad5920a4..a57615062b66 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -310,6 +310,34 @@ static void do_redirect(struct sk_buff *skb, struct sock *sk)
 		dst->ops->redirect(dst, sk, skb);
 }
 
+
+/* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
+void tcp_req_err(struct sock *sk, u32 seq)
+{
+	struct request_sock *req = inet_reqsk(sk);
+	struct net *net = sock_net(sk);
+
+	/* ICMPs are not backlogged, hence we cannot get
+	 * an established socket here.
+	 */
+	WARN_ON(req->sk);
+
+	if (seq != tcp_rsk(req)->snt_isn) {
+		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
+	} else {
+		/*
+		 * Still in SYN_RECV, just remove it silently.
+		 * There is no good way to pass the error to the newly
+		 * created socket, and POSIX does not want network
+		 * errors returned from accept().
+		 */
+		inet_csk_reqsk_queue_drop(req->rsk_listener, req);
+		NET_INC_STATS_BH(net, LINUX_MIB_LISTENDROPS);
+	}
+	reqsk_put(req);
+}
+EXPORT_SYMBOL(tcp_req_err);
+
 /*
  * This routine is called by the ICMP module when it gets some
  * sort of error condition.  If err < 0 then the socket should
@@ -343,8 +371,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 	int err;
 	struct net *net = dev_net(icmp_skb->dev);
 
-	sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
-			iph->saddr, th->source, inet_iif(icmp_skb));
+	sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
+				       th->dest, iph->saddr, ntohs(th->source),
+				       inet_iif(icmp_skb));
 	if (!sk) {
 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 		return;
@@ -353,6 +382,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 		inet_twsk_put(inet_twsk(sk));
 		return;
 	}
+	seq = ntohl(th->seq);
+	if (sk->sk_state == TCP_NEW_SYN_RECV)
+		return tcp_req_err(sk, seq);
 
 	bh_lock_sock(sk);
 	/* If too many ICMPs get dropped on busy
@@ -374,7 +406,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 
 	icsk = inet_csk(sk);
 	tp = tcp_sk(sk);
-	seq = ntohl(th->seq);
 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
 	fastopen = tp->fastopen_rsk;
 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
@@ -458,38 +489,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 	}
 
 	switch (sk->sk_state) {
-		struct request_sock *req;
-	case TCP_LISTEN:
-		if (sock_owned_by_user(sk))
-			goto out;
-
-		req = inet_csk_search_req(sk, th->dest,
-					  iph->daddr, iph->saddr);
-		if (!req)
-			goto out;
-
-		/* ICMPs are not backlogged, hence we cannot get
-		   an established socket here.
-		 */
-		WARN_ON(req->sk);
-
-		if (seq != tcp_rsk(req)->snt_isn) {
-			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
-			reqsk_put(req);
-			goto out;
-		}
-
-		/*
-		 * Still in SYN_RECV, just remove it silently.
-		 * There is no good way to pass the error to the newly
-		 * created socket, and POSIX does not want network
-		 * errors returned from accept().
-		 */
-		inet_csk_reqsk_queue_drop(sk, req);
-		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
-		reqsk_put(req);
-		goto out;
-
 	case TCP_SYN_SENT:
 	case TCP_SYN_RECV:
 		/* Only in fast or simultaneous open. If a fast open socket is
-- 
cgit v1.2.3


From 85645bab57bfc6b0b43bb96a301c4ef83925c07d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 22 Mar 2015 10:22:24 -0700
Subject: ipv4: dccp: handle ICMP messages on DCCP_NEW_SYN_RECV request sockets

dccp_v4_err() can restrict lookups to ehash table, and not to listeners.

Note this patch creates the infrastructure, but this means that ICMP
messages for request sockets are ignored until complete conversion.

New dccp_req_err() helper is exported so that we can use it in IPv6
in following patch.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dccp.h |  2 ++
 net/dccp/dccp.h      |  1 +
 net/dccp/ipv4.c      | 70 +++++++++++++++++++++++++---------------------------
 3 files changed, 37 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 3dca24d3ac67..221025423e6c 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -43,6 +43,7 @@ enum dccp_state {
 	DCCP_CLOSING	     = TCP_CLOSING,
 	DCCP_TIME_WAIT	     = TCP_TIME_WAIT,
 	DCCP_CLOSED	     = TCP_CLOSE,
+	DCCP_NEW_SYN_RECV    = TCP_NEW_SYN_RECV,
 	DCCP_PARTOPEN	     = TCP_MAX_STATES,
 	DCCP_PASSIVE_CLOSEREQ,			/* clients receiving CloseReq */
 	DCCP_MAX_STATES
@@ -57,6 +58,7 @@ enum {
 	DCCPF_CLOSING	      = TCPF_CLOSING,
 	DCCPF_TIME_WAIT	      = TCPF_TIME_WAIT,
 	DCCPF_CLOSED	      = TCPF_CLOSE,
+	DCCPF_NEW_SYN_RECV    = TCPF_NEW_SYN_RECV,
 	DCCPF_PARTOPEN	      = (1 << DCCP_PARTOPEN),
 };
 
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 2396f50c5b04..bebc735f5afc 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -317,6 +317,7 @@ int inet_dccp_listen(struct socket *sock, int backlog);
 unsigned int dccp_poll(struct file *file, struct socket *sock,
 		       poll_table *wait);
 int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
+void dccp_req_err(struct sock *sk, u64 seq);
 
 struct sk_buff *dccp_ctl_make_reset(struct sock *sk, struct sk_buff *skb);
 int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 1f7161e05403..6310b8b19598 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -195,6 +195,32 @@ static void dccp_do_redirect(struct sk_buff *skb, struct sock *sk)
 		dst->ops->redirect(dst, sk, skb);
 }
 
+void dccp_req_err(struct sock *sk, u64 seq)
+	{
+	struct request_sock *req = inet_reqsk(sk);
+	struct net *net = sock_net(sk);
+
+	/*
+	 * ICMPs are not backlogged, hence we cannot get an established
+	 * socket here.
+	 */
+	WARN_ON(req->sk);
+
+	if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) {
+		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
+	} else {
+		/*
+		 * Still in RESPOND, just remove it silently.
+		 * There is no good way to pass the error to the newly
+		 * created socket, and POSIX does not want network
+		 * errors returned from accept().
+		 */
+		inet_csk_reqsk_queue_drop(req->rsk_listener, req);
+	}
+	reqsk_put(req);
+}
+EXPORT_SYMBOL(dccp_req_err);
+
 /*
  * This routine is called by the ICMP module when it gets some sort of error
  * condition. If err < 0 then the socket should be closed and the error
@@ -227,10 +253,11 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
 		return;
 	}
 
-	sk = inet_lookup(net, &dccp_hashinfo,
-			iph->daddr, dh->dccph_dport,
-			iph->saddr, dh->dccph_sport, inet_iif(skb));
-	if (sk == NULL) {
+	sk = __inet_lookup_established(net, &dccp_hashinfo,
+				       iph->daddr, dh->dccph_dport,
+				       iph->saddr, ntohs(dh->dccph_sport),
+				       inet_iif(skb));
+	if (!sk) {
 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 		return;
 	}
@@ -239,6 +266,9 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
 		inet_twsk_put(inet_twsk(sk));
 		return;
 	}
+	seq = dccp_hdr_seq(dh);
+	if (sk->sk_state == DCCP_NEW_SYN_RECV)
+		return dccp_req_err(sk, seq);
 
 	bh_lock_sock(sk);
 	/* If too many ICMPs get dropped on busy
@@ -251,7 +281,6 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
 		goto out;
 
 	dp = dccp_sk(sk);
-	seq = dccp_hdr_seq(dh);
 	if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) &&
 	    !between48(seq, dp->dccps_awl, dp->dccps_awh)) {
 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
@@ -288,37 +317,6 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
 	}
 
 	switch (sk->sk_state) {
-		struct request_sock *req;
-	case DCCP_LISTEN:
-		if (sock_owned_by_user(sk))
-			goto out;
-		req = inet_csk_search_req(sk, dh->dccph_dport,
-					  iph->daddr, iph->saddr);
-		if (!req)
-			goto out;
-
-		/*
-		 * ICMPs are not backlogged, hence we cannot get an established
-		 * socket here.
-		 */
-		WARN_ON(req->sk);
-
-		if (!between48(seq, dccp_rsk(req)->dreq_iss,
-				    dccp_rsk(req)->dreq_gss)) {
-			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
-			reqsk_put(req);
-			goto out;
-		}
-		/*
-		 * Still in RESPOND, just remove it silently.
-		 * There is no good way to pass the error to the newly
-		 * created socket, and POSIX does not want network
-		 * errors returned from accept().
-		 */
-		inet_csk_reqsk_queue_drop(sk, req);
-		reqsk_put(req);
-		goto out;
-
 	case DCCP_REQUESTING:
 	case DCCP_RESPOND:
 		if (!sock_owned_by_user(sk)) {
-- 
cgit v1.2.3


From 841a6664f213f76a9bc1bfd07a466d3dbe281a88 Mon Sep 17 00:00:00 2001
From: Arman Uguray <armansito@chromium.org>
Date: Mon, 23 Mar 2015 15:57:09 -0700
Subject: Bluetooth: Add definitions for Add/Remove Advertising API

This patch adds definitions for the Add Advertising and Remove
Advertising MGMT commands and events.

Signed-off-by: Arman Uguray <armansito@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/mgmt.h | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index a1a68671bf88..68abd4b0c25d 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -539,6 +539,30 @@ struct mgmt_rp_read_adv_features {
 	__u8   instance[0];
 } __packed;
 
+#define MGMT_OP_ADD_ADVERTISING		0x003E
+struct mgmt_cp_add_advertising {
+	__u8	instance;
+	__le32	flags;
+	__le16	duration;
+	__le16	timeout;
+	__u8	adv_data_len;
+	__u8	scan_rsp_len;
+	__u8	data[0];
+} __packed;
+#define MGMT_ADD_ADVERTISING_SIZE	11
+struct mgmt_rp_add_advertising {
+	__u8	instance;
+} __packed;
+
+#define MGMT_OP_REMOVE_ADVERTISING	0x003F
+struct mgmt_cp_remove_advertising {
+	__u8	instance;
+} __packed;
+#define MGMT_REMOVE_ADVERTISING_SIZE	1
+struct mgmt_rp_remove_advertising {
+	__u8	instance;
+} __packed;
+
 #define MGMT_EV_CMD_COMPLETE		0x0001
 struct mgmt_ev_cmd_complete {
 	__le16	opcode;
@@ -742,3 +766,13 @@ struct mgmt_ev_local_oob_data_updated {
 	__le16	eir_len;
 	__u8	eir[0];
 } __packed;
+
+#define MGMT_EV_ADVERTISING_ADDED	0x0023
+struct mgmt_ev_advertising_added {
+	__u8    instance;
+} __packed;
+
+#define MGMT_EV_ADVERTISING_REMOVED	0x0024
+struct mgmt_ev_advertising_removed {
+	__u8    instance;
+} __packed;
-- 
cgit v1.2.3


From 4453b006538d02ada8294a195bb2dc2ada498436 Mon Sep 17 00:00:00 2001
From: Arman Uguray <armansito@chromium.org>
Date: Mon, 23 Mar 2015 15:57:10 -0700
Subject: Bluetooth: Introduce HCI_ADVERTISING_INSTANCE setting and add AD
 flags

This patch introduces the HCI_ADVERTISING_INSTANCE setting, which is set
when an at least one advertising instance has been added using the
"Add Advertising" mgmt command. This patch also adds a macro definition
for the EIR_APPEARANCE field type.

Signed-off-by: Arman Uguray <armansito@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 06e7eee31ce4..3acecf35420b 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -227,6 +227,7 @@ enum {
 	HCI_LE_ENABLED,
 	HCI_ADVERTISING,
 	HCI_ADVERTISING_CONNECTABLE,
+	HCI_ADVERTISING_INSTANCE,
 	HCI_CONNECTABLE,
 	HCI_DISCOVERABLE,
 	HCI_LIMITED_DISCOVERABLE,
@@ -465,6 +466,7 @@ enum {
 #define EIR_SSP_HASH_C		0x0E /* Simple Pairing Hash C */
 #define EIR_SSP_RAND_R		0x0F /* Simple Pairing Randomizer R */
 #define EIR_DEVICE_ID		0x10 /* device ID */
+#define EIR_APPEARANCE		0x19 /* Device appearance */
 #define EIR_LE_BDADDR		0x1B /* LE Bluetooth device address */
 #define EIR_LE_ROLE		0x1C /* LE role */
 #define EIR_LE_SC_CONFIRM	0x22 /* LE SC Confirmation Value */
-- 
cgit v1.2.3


From 203fea0178d7e165dbe834d1bdd9d243018fd5bf Mon Sep 17 00:00:00 2001
From: Arman Uguray <armansito@chromium.org>
Date: Mon, 23 Mar 2015 15:57:11 -0700
Subject: Bluetooth: Add data structure for advertising instance

This patch introduces a new data structure to represent advertising
instances that were added using the "Add Advertising" mgmt command.
Initially an hci_dev structure will support only one of these instances
at a time, so the current instance is simply stored as a direct member
of hci_dev.

Signed-off-by: Arman Uguray <armansito@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 16 ++++++++++++++++
 net/bluetooth/hci_core.c         |  1 +
 2 files changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index b65c53de6a69..3a6d4e3d68fe 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -155,6 +155,15 @@ struct oob_data {
 	u8 rand256[16];
 };
 
+struct adv_info {
+	__u8	instance;
+	__u32	flags;
+	__u16	adv_data_len;
+	__u8	adv_data[HCI_MAX_AD_LENGTH];
+	__u16	scan_rsp_len;
+	__u8	scan_rsp_data[HCI_MAX_AD_LENGTH];
+};
+
 #define HCI_MAX_SHORT_NAME_LENGTH	10
 
 /* Default LE RPA expiry time, 15 minutes */
@@ -364,6 +373,8 @@ struct hci_dev {
 	__u8			scan_rsp_data[HCI_MAX_AD_LENGTH];
 	__u8			scan_rsp_data_len;
 
+	struct adv_info		adv_instance;
+
 	__u8			irk[16];
 	__u32			rpa_timeout;
 	struct delayed_work	rpa_expired;
@@ -550,6 +561,11 @@ static inline void hci_discovery_filter_clear(struct hci_dev *hdev)
 	hdev->discovery.scan_duration = 0;
 }
 
+static inline void adv_info_init(struct hci_dev *hdev)
+{
+	memset(&hdev->adv_instance, 0, sizeof(struct adv_info));
+}
+
 bool hci_discovery_active(struct hci_dev *hdev);
 
 void hci_discovery_set_state(struct hci_dev *hdev, int state);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 17f52a195ba8..e6bfeb7b4415 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -3126,6 +3126,7 @@ struct hci_dev *hci_alloc_dev(void)
 
 	hci_init_sysfs(hdev);
 	discovery_init(hdev);
+	adv_info_init(hdev);
 
 	return hdev;
 }
-- 
cgit v1.2.3


From 912098a6308e37208b8dcc46c57c66d0778a854b Mon Sep 17 00:00:00 2001
From: Arman Uguray <armansito@chromium.org>
Date: Mon, 23 Mar 2015 15:57:15 -0700
Subject: Bluetooth: Add support for adv instance timeout

This patch implements support for the timeout parameter of the
Add Advertising command.

Signed-off-by: Arman Uguray <armansito@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |   2 +
 net/bluetooth/mgmt.c             | 101 +++++++++++++++++++++++++++++++--------
 2 files changed, 83 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 3a6d4e3d68fe..540c07feece7 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -156,8 +156,10 @@ struct oob_data {
 };
 
 struct adv_info {
+	struct delayed_work timeout_exp;
 	__u8	instance;
 	__u32	flags;
+	__u16	timeout;
 	__u16	adv_data_len;
 	__u8	adv_data[HCI_MAX_AD_LENGTH];
 	__u16	scan_rsp_len;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 762ca9be9806..eda52397a648 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -1336,6 +1336,49 @@ static bool hci_stop_discovery(struct hci_request *req)
 	return false;
 }
 
+static void advertising_added(struct sock *sk, struct hci_dev *hdev,
+			      u8 instance)
+{
+	struct mgmt_ev_advertising_added ev;
+
+	ev.instance = instance;
+
+	mgmt_event(MGMT_EV_ADVERTISING_ADDED, hdev, &ev, sizeof(ev), sk);
+}
+
+static void advertising_removed(struct sock *sk, struct hci_dev *hdev,
+				u8 instance)
+{
+	struct mgmt_ev_advertising_removed ev;
+
+	ev.instance = instance;
+
+	mgmt_event(MGMT_EV_ADVERTISING_REMOVED, hdev, &ev, sizeof(ev), sk);
+}
+
+static void clear_adv_instance(struct hci_dev *hdev)
+{
+	struct hci_request req;
+
+	if (!hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE))
+		return;
+
+	if (hdev->adv_instance.timeout)
+		cancel_delayed_work(&hdev->adv_instance.timeout_exp);
+
+	memset(&hdev->adv_instance, 0, sizeof(hdev->adv_instance));
+	advertising_removed(NULL, hdev, 1);
+	hci_dev_clear_flag(hdev, HCI_ADVERTISING_INSTANCE);
+
+	if (!hdev_is_powered(hdev) ||
+	    hci_dev_test_flag(hdev, HCI_ADVERTISING))
+		return;
+
+	hci_req_init(&req, hdev);
+	disable_advertising(&req);
+	hci_req_run(&req, NULL);
+}
+
 static int clean_up_hci_state(struct hci_dev *hdev)
 {
 	struct hci_request req;
@@ -1351,6 +1394,9 @@ static int clean_up_hci_state(struct hci_dev *hdev)
 		hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
 	}
 
+	if (hdev->adv_instance.timeout)
+		clear_adv_instance(hdev);
+
 	if (hci_dev_test_flag(hdev, HCI_LE_ADV))
 		disable_advertising(&req);
 
@@ -6468,26 +6514,6 @@ static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data,
 	return true;
 }
 
-static void advertising_added(struct sock *sk, struct hci_dev *hdev,
-			      u8 instance)
-{
-	struct mgmt_ev_advertising_added ev;
-
-	ev.instance = instance;
-
-	mgmt_event(MGMT_EV_ADVERTISING_ADDED, hdev, &ev, sizeof(ev), sk);
-}
-
-static void advertising_removed(struct sock *sk, struct hci_dev *hdev,
-				u8 instance)
-{
-	struct mgmt_ev_advertising_removed ev;
-
-	ev.instance = instance;
-
-	mgmt_event(MGMT_EV_ADVERTISING_REMOVED, hdev, &ev, sizeof(ev), sk);
-}
-
 static void add_advertising_complete(struct hci_dev *hdev, u8 status,
 				     u16 opcode)
 {
@@ -6524,6 +6550,18 @@ unlock:
 	hci_dev_unlock(hdev);
 }
 
+static void adv_timeout_expired(struct work_struct *work)
+{
+	struct hci_dev *hdev = container_of(work, struct hci_dev,
+					    adv_instance.timeout_exp.work);
+
+	hdev->adv_instance.timeout = 0;
+
+	hci_dev_lock(hdev);
+	clear_adv_instance(hdev);
+	hci_dev_unlock(hdev);
+}
+
 static int add_advertising(struct sock *sk, struct hci_dev *hdev,
 			   void *data, u16 data_len)
 {
@@ -6531,6 +6569,7 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
 	struct mgmt_rp_add_advertising rp;
 	u32 flags;
 	u8 status;
+	u16 timeout;
 	int err;
 	struct mgmt_pending_cmd *cmd;
 	struct hci_request req;
@@ -6543,6 +6582,7 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
 				       status);
 
 	flags = __le32_to_cpu(cp->flags);
+	timeout = __le16_to_cpu(cp->timeout);
 
 	/* The current implementation only supports adding one instance and
 	 * doesn't support flags.
@@ -6553,6 +6593,12 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
 
 	hci_dev_lock(hdev);
 
+	if (timeout && !hdev_is_powered(hdev)) {
+		err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING,
+				      MGMT_STATUS_REJECTED);
+		goto unlock;
+	}
+
 	if (pending_find(MGMT_OP_ADD_ADVERTISING, hdev) ||
 	    pending_find(MGMT_OP_REMOVE_ADVERTISING, hdev) ||
 	    pending_find(MGMT_OP_SET_LE, hdev)) {
@@ -6569,6 +6615,8 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
 		goto unlock;
 	}
 
+	INIT_DELAYED_WORK(&hdev->adv_instance.timeout_exp, adv_timeout_expired);
+
 	hdev->adv_instance.flags = flags;
 	hdev->adv_instance.adv_data_len = cp->adv_data_len;
 	hdev->adv_instance.scan_rsp_len = cp->scan_rsp_len;
@@ -6580,6 +6628,16 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
 		memcpy(hdev->adv_instance.scan_rsp_data,
 		       cp->data + cp->adv_data_len, cp->scan_rsp_len);
 
+	if (hdev->adv_instance.timeout)
+		cancel_delayed_work(&hdev->adv_instance.timeout_exp);
+
+	hdev->adv_instance.timeout = timeout;
+
+	if (timeout)
+		queue_delayed_work(hdev->workqueue,
+				   &hdev->adv_instance.timeout_exp,
+				   msecs_to_jiffies(timeout * 1000));
+
 	if (!hci_dev_test_and_set_flag(hdev, HCI_ADVERTISING_INSTANCE))
 		advertising_added(sk, hdev, 1);
 
@@ -6682,6 +6740,9 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev,
 		goto unlock;
 	}
 
+	if (hdev->adv_instance.timeout)
+		cancel_delayed_work(&hdev->adv_instance.timeout_exp);
+
 	memset(&hdev->adv_instance, 0, sizeof(hdev->adv_instance));
 
 	advertising_removed(sk, hdev, 1);
-- 
cgit v1.2.3


From 682f048bd49449f4ab978664a7f69a44a74e3caa Mon Sep 17 00:00:00 2001
From: Alexander Drozdov <al.drozdov@gmail.com>
Date: Mon, 23 Mar 2015 09:11:13 +0300
Subject: af_packet: pass checksum validation status to the user

Introduce TP_STATUS_CSUM_VALID tp_status flag to tell the
af_packet user that at least the transport header checksum
has been already validated.

For now, the flag may be set for incoming packets only.

Signed-off-by: Alexander Drozdov <al.drozdov@gmail.com>
Cc: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/packet_mmap.txt | 13 ++++++++++---
 include/uapi/linux/if_packet.h           |  1 +
 net/packet/af_packet.c                   |  9 +++++++++
 3 files changed, 20 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt
index a6d7cb91069e..daa015af16a0 100644
--- a/Documentation/networking/packet_mmap.txt
+++ b/Documentation/networking/packet_mmap.txt
@@ -440,9 +440,10 @@ and the following flags apply:
 +++ Capture process:
      from include/linux/if_packet.h
 
-     #define TP_STATUS_COPY          2 
-     #define TP_STATUS_LOSING        4 
-     #define TP_STATUS_CSUMNOTREADY  8 
+     #define TP_STATUS_COPY          (1 << 1)
+     #define TP_STATUS_LOSING        (1 << 2)
+     #define TP_STATUS_CSUMNOTREADY  (1 << 3)
+     #define TP_STATUS_CSUM_VALID    (1 << 7)
 
 TP_STATUS_COPY        : This flag indicates that the frame (and associated
                         meta information) has been truncated because it's 
@@ -466,6 +467,12 @@ TP_STATUS_CSUMNOTREADY: currently it's used for outgoing IP packets which
                         reading the packet we should not try to check the 
                         checksum. 
 
+TP_STATUS_CSUM_VALID  : This flag indicates that at least the transport
+                        header checksum of the packet has been already
+                        validated on the kernel side. If the flag is not set
+                        then we are free to check the checksum by ourselves
+                        provided that TP_STATUS_CSUMNOTREADY is also not set.
+
 for convenience there are also the following defines:
 
      #define TP_STATUS_KERNEL        0
diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h
index da2d668b8cf1..053bd102fbe0 100644
--- a/include/uapi/linux/if_packet.h
+++ b/include/uapi/linux/if_packet.h
@@ -99,6 +99,7 @@ struct tpacket_auxdata {
 #define TP_STATUS_VLAN_VALID		(1 << 4) /* auxdata has valid tp_vlan_tci */
 #define TP_STATUS_BLK_TMO		(1 << 5)
 #define TP_STATUS_VLAN_TPID_VALID	(1 << 6) /* auxdata has valid tp_vlan_tpid */
+#define TP_STATUS_CSUM_VALID		(1 << 7)
 
 /* Tx ring - header status */
 #define TP_STATUS_AVAILABLE	      0
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9d854c5ce0b5..5102c3cc4eec 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1924,6 +1924,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL)
 		status |= TP_STATUS_CSUMNOTREADY;
+	else if (skb->pkt_type != PACKET_OUTGOING &&
+		 (skb->ip_summed == CHECKSUM_COMPLETE ||
+		  skb_csum_unnecessary(skb)))
+		status |= TP_STATUS_CSUM_VALID;
 
 	if (snaplen > res)
 		snaplen = res;
@@ -3031,6 +3035,11 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 		aux.tp_status = TP_STATUS_USER;
 		if (skb->ip_summed == CHECKSUM_PARTIAL)
 			aux.tp_status |= TP_STATUS_CSUMNOTREADY;
+		else if (skb->pkt_type != PACKET_OUTGOING &&
+			 (skb->ip_summed == CHECKSUM_COMPLETE ||
+			  skb_csum_unnecessary(skb)))
+			aux.tp_status |= TP_STATUS_CSUM_VALID;
+
 		aux.tp_len = origlen;
 		aux.tp_snaplen = skb->len;
 		aux.tp_mac = 0;
-- 
cgit v1.2.3


From de91b25c8011089f5dd99b9d24743db1f550ca4b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 24 Mar 2015 00:50:20 +1100
Subject: rhashtable: Eliminate unnecessary branch in rht_key_hashfn

When rht_key_hashfn is called from rhashtable itself and params
is equal to ht->p, there is no point in checking params.key_len
and falling back to ht->p.key_len.

For some reason gcc couldn't figure out that params is the same
as ht->p.  So let's help it by only checking params.key_len when
it's a constant.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 89d102270570..3851952781d7 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -199,8 +199,12 @@ static inline unsigned int rht_key_hashfn(
 	struct rhashtable *ht, const struct bucket_table *tbl,
 	const void *key, const struct rhashtable_params params)
 {
-	return rht_bucket_index(tbl, params.hashfn(key, params.key_len ?:
-							ht->p.key_len,
+	/* params must be equal to ht->p if it isn't constant. */
+	unsigned key_len = __builtin_constant_p(params.key_len) ?
+			   (params.key_len ?: ht->p.key_len) :
+			   params.key_len;
+
+	return rht_bucket_index(tbl, params.hashfn(key, key_len,
 						   tbl->hash_rnd));
 }
 
-- 
cgit v1.2.3


From 31ccde2dacea8375c3a7d6fffbf0060ee0d40214 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 24 Mar 2015 00:50:21 +1100
Subject: rhashtable: Allow hashfn to be unset

Since every current rhashtable user uses jhash as their hash
function, the fact that jhash is an inline function causes each
user to generate a copy of its code.

This function provides a solution to this problem by allowing
hashfn to be unset.  In which case rhashtable will automatically
set it to jhash.  Furthermore, if the key length is a multiple
of 4, we will switch over to jhash2.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 33 +++++++++++++++++++++++++++------
 lib/rhashtable.c           | 17 ++++++++++++++++-
 2 files changed, 43 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 3851952781d7..bc2488b98321 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -19,6 +19,7 @@
 
 #include <linux/compiler.h>
 #include <linux/errno.h>
+#include <linux/jhash.h>
 #include <linux/list_nulls.h>
 #include <linux/workqueue.h>
 #include <linux/mutex.h>
@@ -103,7 +104,7 @@ struct rhashtable;
  * @min_size: Minimum size while shrinking
  * @nulls_base: Base value to generate nulls marker
  * @locks_mul: Number of bucket locks to allocate per cpu (default: 128)
- * @hashfn: Function to hash key
+ * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash)
  * @obj_hashfn: Function to hash object
  * @obj_cmpfn: Function to compare key with object
  */
@@ -125,6 +126,7 @@ struct rhashtable_params {
  * struct rhashtable - Hash table handle
  * @tbl: Bucket table
  * @nelems: Number of elements in table
+ * @key_len: Key length for hashfn
  * @p: Configuration parameters
  * @run_work: Deferred worker to expand/shrink asynchronously
  * @mutex: Mutex to protect current/future table swapping
@@ -134,6 +136,7 @@ struct rhashtable {
 	struct bucket_table __rcu	*tbl;
 	atomic_t			nelems;
 	bool                            being_destroyed;
+	unsigned int			key_len;
 	struct rhashtable_params	p;
 	struct work_struct		run_work;
 	struct mutex                    mutex;
@@ -199,13 +202,31 @@ static inline unsigned int rht_key_hashfn(
 	struct rhashtable *ht, const struct bucket_table *tbl,
 	const void *key, const struct rhashtable_params params)
 {
+	unsigned hash;
+
 	/* params must be equal to ht->p if it isn't constant. */
-	unsigned key_len = __builtin_constant_p(params.key_len) ?
-			   (params.key_len ?: ht->p.key_len) :
-			   params.key_len;
+	if (!__builtin_constant_p(params.key_len))
+		hash = ht->p.hashfn(key, ht->key_len, tbl->hash_rnd);
+	else if (params.key_len) {
+		unsigned key_len = params.key_len;
+
+		if (params.hashfn)
+			hash = params.hashfn(key, key_len, tbl->hash_rnd);
+		else if (key_len & (sizeof(u32) - 1))
+			hash = jhash(key, key_len, tbl->hash_rnd);
+		else
+			hash = jhash2(key, key_len / sizeof(u32),
+				      tbl->hash_rnd);
+	} else {
+		unsigned key_len = ht->p.key_len;
+
+		if (params.hashfn)
+			hash = params.hashfn(key, key_len, tbl->hash_rnd);
+		else
+			hash = jhash(key, key_len, tbl->hash_rnd);
+	}
 
-	return rht_bucket_index(tbl, params.hashfn(key, key_len,
-						   tbl->hash_rnd));
+	return rht_bucket_index(tbl, hash);
 }
 
 static inline unsigned int rht_head_hashfn(
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 618a3f00d712..798f01d64ab0 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -532,6 +532,11 @@ static size_t rounded_hashtable_size(const struct rhashtable_params *params)
 		   (unsigned long)params->min_size);
 }
 
+static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
+{
+	return jhash2(key, length, seed);
+}
+
 /**
  * rhashtable_init - initialize a new hash table
  * @ht:		hash table to be initialized
@@ -583,7 +588,7 @@ int rhashtable_init(struct rhashtable *ht,
 
 	size = HASH_DEFAULT_SIZE;
 
-	if ((!(params->key_len && params->hashfn) && !params->obj_hashfn) ||
+	if ((!params->key_len && !params->obj_hashfn) ||
 	    (params->obj_hashfn && !params->obj_cmpfn))
 		return -EINVAL;
 
@@ -610,6 +615,16 @@ int rhashtable_init(struct rhashtable *ht,
 	else
 		ht->p.locks_mul = BUCKET_LOCKS_PER_CPU;
 
+	ht->key_len = ht->p.key_len;
+	if (!params->hashfn) {
+		ht->p.hashfn = jhash;
+
+		if (!(ht->key_len & (sizeof(u32) - 1))) {
+			ht->key_len /= sizeof(u32);
+			ht->p.hashfn = rhashtable_jhash2;
+		}
+	}
+
 	tbl = bucket_table_alloc(ht, size);
 	if (tbl == NULL)
 		return -ENOMEM;
-- 
cgit v1.2.3


From b824478b2145be78ac19e1cf44e2b9036c7a9608 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 24 Mar 2015 00:50:26 +1100
Subject: rhashtable: Add multiple rehash support

This patch adds the missing bits to allow multiple rehashes.  The
read-side as well as remove already handle this correctly.  So it's
only the rehasher and insertion that need modification to handle
this.

Note that this patch doesn't actually enable it so for now rehashing
is still only performed by the worker thread.

This patch also disables the explicit expand/shrink interface because
the table is meant to expand and shrink automatically, and continuing
to export these interfaces unnecessarily complicates the life of the
rehasher since the rehash process is now composed of two parts.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 26 +++++++-------
 lib/rhashtable.c           | 87 ++++++++++++++++++++++++++++++++++++++--------
 lib/test_rhashtable.c      | 24 -------------
 3 files changed, 86 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index bc2488b98321..e8ffcdb5e239 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -308,9 +308,6 @@ int rhashtable_insert_slow(struct rhashtable *ht, const void *key,
 			   struct rhash_head *obj,
 			   struct bucket_table *old_tbl);
 
-int rhashtable_expand(struct rhashtable *ht);
-int rhashtable_shrink(struct rhashtable *ht);
-
 int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter);
 void rhashtable_walk_exit(struct rhashtable_iter *iter);
 int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU);
@@ -541,17 +538,22 @@ static inline int __rhashtable_insert_fast(
 	rcu_read_lock();
 
 	tbl = rht_dereference_rcu(ht->tbl, ht);
-	hash = rht_head_hashfn(ht, tbl, obj, params);
-	lock = rht_bucket_lock(tbl, hash);
-
-	spin_lock_bh(lock);
 
-	/* Because we have already taken the bucket lock in tbl,
-	 * if we find that future_tbl is not yet visible then
-	 * that guarantees all other insertions of the same entry
-	 * will also grab the bucket lock in tbl because until
-	 * the rehash completes ht->tbl won't be changed.
+	/* All insertions must grab the oldest table containing
+	 * the hashed bucket that is yet to be rehashed.
 	 */
+	for (;;) {
+		hash = rht_head_hashfn(ht, tbl, obj, params);
+		lock = rht_bucket_lock(tbl, hash);
+		spin_lock_bh(lock);
+
+		if (tbl->rehash <= hash)
+			break;
+
+		spin_unlock_bh(lock);
+		tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+	}
+
 	new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
 	if (unlikely(new_tbl)) {
 		err = rhashtable_insert_slow(ht, key, obj, new_tbl);
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 9623be345d9c..5e04403e25f5 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -136,11 +136,24 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 	return tbl;
 }
 
+static struct bucket_table *rhashtable_last_table(struct rhashtable *ht,
+						  struct bucket_table *tbl)
+{
+	struct bucket_table *new_tbl;
+
+	do {
+		new_tbl = tbl;
+		tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+	} while (tbl);
+
+	return new_tbl;
+}
+
 static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash)
 {
 	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
-	struct bucket_table *new_tbl =
-		rht_dereference(old_tbl->future_tbl, ht) ?: old_tbl;
+	struct bucket_table *new_tbl = rhashtable_last_table(ht,
+		rht_dereference_rcu(old_tbl->future_tbl, ht));
 	struct rhash_head __rcu **pprev = &old_tbl->buckets[old_hash];
 	int err = -ENOENT;
 	struct rhash_head *head, *next, *entry;
@@ -196,12 +209,18 @@ static void rhashtable_rehash_chain(struct rhashtable *ht, unsigned old_hash)
 	spin_unlock_bh(old_bucket_lock);
 }
 
-static void rhashtable_rehash(struct rhashtable *ht,
-			      struct bucket_table *new_tbl)
+static int rhashtable_rehash_attach(struct rhashtable *ht,
+				    struct bucket_table *old_tbl,
+				    struct bucket_table *new_tbl)
 {
-	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
-	struct rhashtable_walker *walker;
-	unsigned old_hash;
+	/* Protect future_tbl using the first bucket lock. */
+	spin_lock_bh(old_tbl->locks);
+
+	/* Did somebody beat us to it? */
+	if (rcu_access_pointer(old_tbl->future_tbl)) {
+		spin_unlock_bh(old_tbl->locks);
+		return -EEXIST;
+	}
 
 	/* Make insertions go into the new, empty table right away. Deletions
 	 * and lookups will be attempted in both tables until we synchronize.
@@ -211,6 +230,22 @@ static void rhashtable_rehash(struct rhashtable *ht,
 	/* Ensure the new table is visible to readers. */
 	smp_wmb();
 
+	spin_unlock_bh(old_tbl->locks);
+
+	return 0;
+}
+
+static int rhashtable_rehash_table(struct rhashtable *ht)
+{
+	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
+	struct bucket_table *new_tbl;
+	struct rhashtable_walker *walker;
+	unsigned old_hash;
+
+	new_tbl = rht_dereference(old_tbl->future_tbl, ht);
+	if (!new_tbl)
+		return 0;
+
 	for (old_hash = 0; old_hash < old_tbl->size; old_hash++)
 		rhashtable_rehash_chain(ht, old_hash);
 
@@ -225,6 +260,8 @@ static void rhashtable_rehash(struct rhashtable *ht,
 	 * remain.
 	 */
 	call_rcu(&old_tbl->rcu, bucket_table_free_rcu);
+
+	return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0;
 }
 
 /**
@@ -242,20 +279,25 @@ static void rhashtable_rehash(struct rhashtable *ht,
  * It is valid to have concurrent insertions and deletions protected by per
  * bucket locks or concurrent RCU protected lookups and traversals.
  */
-int rhashtable_expand(struct rhashtable *ht)
+static int rhashtable_expand(struct rhashtable *ht)
 {
 	struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
+	int err;
 
 	ASSERT_RHT_MUTEX(ht);
 
+	old_tbl = rhashtable_last_table(ht, old_tbl);
+
 	new_tbl = bucket_table_alloc(ht, old_tbl->size * 2);
 	if (new_tbl == NULL)
 		return -ENOMEM;
 
-	rhashtable_rehash(ht, new_tbl);
-	return 0;
+	err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
+	if (err)
+		bucket_table_free(new_tbl);
+
+	return err;
 }
-EXPORT_SYMBOL_GPL(rhashtable_expand);
 
 /**
  * rhashtable_shrink - Shrink hash table while allowing concurrent lookups
@@ -273,10 +315,11 @@ EXPORT_SYMBOL_GPL(rhashtable_expand);
  * It is valid to have concurrent insertions and deletions protected by per
  * bucket locks or concurrent RCU protected lookups and traversals.
  */
-int rhashtable_shrink(struct rhashtable *ht)
+static int rhashtable_shrink(struct rhashtable *ht)
 {
 	struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
 	unsigned size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2);
+	int err;
 
 	ASSERT_RHT_MUTEX(ht);
 
@@ -286,19 +329,25 @@ int rhashtable_shrink(struct rhashtable *ht)
 	if (old_tbl->size <= size)
 		return 0;
 
+	if (rht_dereference(old_tbl->future_tbl, ht))
+		return -EEXIST;
+
 	new_tbl = bucket_table_alloc(ht, size);
 	if (new_tbl == NULL)
 		return -ENOMEM;
 
-	rhashtable_rehash(ht, new_tbl);
-	return 0;
+	err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
+	if (err)
+		bucket_table_free(new_tbl);
+
+	return err;
 }
-EXPORT_SYMBOL_GPL(rhashtable_shrink);
 
 static void rht_deferred_worker(struct work_struct *work)
 {
 	struct rhashtable *ht;
 	struct bucket_table *tbl;
+	int err = 0;
 
 	ht = container_of(work, struct rhashtable, run_work);
 	mutex_lock(&ht->mutex);
@@ -306,13 +355,20 @@ static void rht_deferred_worker(struct work_struct *work)
 		goto unlock;
 
 	tbl = rht_dereference(ht->tbl, ht);
+	tbl = rhashtable_last_table(ht, tbl);
 
 	if (rht_grow_above_75(ht, tbl))
 		rhashtable_expand(ht);
 	else if (rht_shrink_below_30(ht, tbl))
 		rhashtable_shrink(ht);
+
+	err = rhashtable_rehash_table(ht);
+
 unlock:
 	mutex_unlock(&ht->mutex);
+
+	if (err)
+		schedule_work(&ht->run_work);
 }
 
 int rhashtable_insert_slow(struct rhashtable *ht, const void *key,
@@ -323,6 +379,7 @@ int rhashtable_insert_slow(struct rhashtable *ht, const void *key,
 	unsigned hash;
 	int err = -EEXIST;
 
+	tbl = rhashtable_last_table(ht, tbl);
 	hash = head_hashfn(ht, tbl, obj);
 	spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING);
 
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index a2ba6adb60a2..a42a0d44e818 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -155,30 +155,6 @@ static int __init test_rhashtable(struct rhashtable *ht)
 	test_rht_lookup(ht);
 	rcu_read_unlock();
 
-	for (i = 0; i < TEST_NEXPANDS; i++) {
-		pr_info("  Table expansion iteration %u...\n", i);
-		mutex_lock(&ht->mutex);
-		rhashtable_expand(ht);
-		mutex_unlock(&ht->mutex);
-
-		rcu_read_lock();
-		pr_info("  Verifying lookups...\n");
-		test_rht_lookup(ht);
-		rcu_read_unlock();
-	}
-
-	for (i = 0; i < TEST_NEXPANDS; i++) {
-		pr_info("  Table shrinkage iteration %u...\n", i);
-		mutex_lock(&ht->mutex);
-		rhashtable_shrink(ht);
-		mutex_unlock(&ht->mutex);
-
-		rcu_read_lock();
-		pr_info("  Verifying lookups...\n");
-		test_rht_lookup(ht);
-		rcu_read_unlock();
-	}
-
 	rcu_read_lock();
 	test_bucket_stats(ht, true);
 	rcu_read_unlock();
-- 
cgit v1.2.3


From ccd57b1bd32460d27bbb9c599e795628a3c66983 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 24 Mar 2015 00:50:28 +1100
Subject: rhashtable: Add immediate rehash during insertion

This patch reintroduces immediate rehash during insertion.  If
we find during insertion that the table is full or the chain
length exceeds a set limit (currently 16 but may be disabled
with insecure_elasticity) then we will force an immediate rehash.
The rehash will contain an expansion if the table utilisation
exceeds 75%.

If this rehash fails then the insertion will fail.  Otherwise the
insertion will be reattempted in the new hash table.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 42 ++++++++++++++++++++++++++++----
 lib/rhashtable.c           | 60 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 96 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index e8ffcdb5e239..f9ecf32bce55 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -103,6 +103,7 @@ struct rhashtable;
  * @max_size: Maximum size while expanding
  * @min_size: Minimum size while shrinking
  * @nulls_base: Base value to generate nulls marker
+ * @insecure_elasticity: Set to true to disable chain length checks
  * @locks_mul: Number of bucket locks to allocate per cpu (default: 128)
  * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash)
  * @obj_hashfn: Function to hash object
@@ -116,6 +117,7 @@ struct rhashtable_params {
 	unsigned int		max_size;
 	unsigned int		min_size;
 	u32			nulls_base;
+	bool			insecure_elasticity;
 	size_t			locks_mul;
 	rht_hashfn_t		hashfn;
 	rht_obj_hashfn_t	obj_hashfn;
@@ -127,6 +129,7 @@ struct rhashtable_params {
  * @tbl: Bucket table
  * @nelems: Number of elements in table
  * @key_len: Key length for hashfn
+ * @elasticity: Maximum chain length before rehash
  * @p: Configuration parameters
  * @run_work: Deferred worker to expand/shrink asynchronously
  * @mutex: Mutex to protect current/future table swapping
@@ -137,6 +140,7 @@ struct rhashtable {
 	atomic_t			nelems;
 	bool                            being_destroyed;
 	unsigned int			key_len;
+	unsigned int			elasticity;
 	struct rhashtable_params	p;
 	struct work_struct		run_work;
 	struct mutex                    mutex;
@@ -266,6 +270,17 @@ static inline bool rht_shrink_below_30(const struct rhashtable *ht,
 	       tbl->size > ht->p.min_size;
 }
 
+/**
+ * rht_grow_above_100 - returns true if nelems > table-size
+ * @ht:		hash table
+ * @tbl:	current table
+ */
+static inline bool rht_grow_above_100(const struct rhashtable *ht,
+				      const struct bucket_table *tbl)
+{
+	return atomic_read(&ht->nelems) > tbl->size;
+}
+
 /* The bucket lock is selected based on the hash and protects mutations
  * on a group of hash buckets.
  *
@@ -307,6 +322,7 @@ int rhashtable_init(struct rhashtable *ht,
 int rhashtable_insert_slow(struct rhashtable *ht, const void *key,
 			   struct rhash_head *obj,
 			   struct bucket_table *old_tbl);
+int rhashtable_insert_rehash(struct rhashtable *ht);
 
 int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter);
 void rhashtable_walk_exit(struct rhashtable_iter *iter);
@@ -529,12 +545,14 @@ static inline int __rhashtable_insert_fast(
 		.ht = ht,
 		.key = key,
 	};
-	int err = -EEXIST;
 	struct bucket_table *tbl, *new_tbl;
 	struct rhash_head *head;
 	spinlock_t *lock;
+	unsigned elasticity;
 	unsigned hash;
+	int err;
 
+restart:
 	rcu_read_lock();
 
 	tbl = rht_dereference_rcu(ht->tbl, ht);
@@ -557,20 +575,34 @@ static inline int __rhashtable_insert_fast(
 	new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
 	if (unlikely(new_tbl)) {
 		err = rhashtable_insert_slow(ht, key, obj, new_tbl);
+		if (err == -EAGAIN)
+			goto slow_path;
 		goto out;
 	}
 
-	if (!key)
-		goto skip_lookup;
+	if (unlikely(rht_grow_above_100(ht, tbl))) {
+slow_path:
+		spin_unlock_bh(lock);
+		rcu_read_unlock();
+		err = rhashtable_insert_rehash(ht);
+		if (err)
+			return err;
+
+		goto restart;
+	}
 
+	err = -EEXIST;
+	elasticity = ht->elasticity;
 	rht_for_each(head, tbl, hash) {
-		if (unlikely(!(params.obj_cmpfn ?
+		if (key &&
+		    unlikely(!(params.obj_cmpfn ?
 			       params.obj_cmpfn(&arg, rht_obj(ht, head)) :
 			       rhashtable_compare(&arg, rht_obj(ht, head)))))
 			goto out;
+		if (!--elasticity)
+			goto slow_path;
 	}
 
-skip_lookup:
 	err = 0;
 
 	head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 220a11a13d40..7686c1e9934a 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -375,21 +375,76 @@ unlock:
 		schedule_work(&ht->run_work);
 }
 
+static bool rhashtable_check_elasticity(struct rhashtable *ht,
+					struct bucket_table *tbl,
+					unsigned hash)
+{
+	unsigned elasticity = ht->elasticity;
+	struct rhash_head *head;
+
+	rht_for_each(head, tbl, hash)
+		if (!--elasticity)
+			return true;
+
+	return false;
+}
+
+int rhashtable_insert_rehash(struct rhashtable *ht)
+{
+	struct bucket_table *old_tbl;
+	struct bucket_table *new_tbl;
+	struct bucket_table *tbl;
+	unsigned int size;
+	int err;
+
+	old_tbl = rht_dereference_rcu(ht->tbl, ht);
+	tbl = rhashtable_last_table(ht, old_tbl);
+
+	size = tbl->size;
+
+	if (rht_grow_above_75(ht, tbl))
+		size *= 2;
+	/* More than two rehashes (not resizes) detected. */
+	else if (WARN_ON(old_tbl != tbl && old_tbl->size == size))
+		return -EBUSY;
+
+	new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC);
+	if (new_tbl == NULL)
+		return -ENOMEM;
+
+	err = rhashtable_rehash_attach(ht, tbl, new_tbl);
+	if (err) {
+		bucket_table_free(new_tbl);
+		if (err == -EEXIST)
+			err = 0;
+	} else
+		schedule_work(&ht->run_work);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(rhashtable_insert_rehash);
+
 int rhashtable_insert_slow(struct rhashtable *ht, const void *key,
 			   struct rhash_head *obj,
 			   struct bucket_table *tbl)
 {
 	struct rhash_head *head;
 	unsigned hash;
-	int err = -EEXIST;
+	int err;
 
 	tbl = rhashtable_last_table(ht, tbl);
 	hash = head_hashfn(ht, tbl, obj);
 	spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING);
 
+	err = -EEXIST;
 	if (key && rhashtable_lookup_fast(ht, key, ht->p))
 		goto exit;
 
+	err = -EAGAIN;
+	if (rhashtable_check_elasticity(ht, tbl, hash) ||
+	    rht_grow_above_100(ht, tbl))
+		goto exit;
+
 	err = 0;
 
 	head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
@@ -678,6 +733,9 @@ int rhashtable_init(struct rhashtable *ht,
 
 	ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE);
 
+	if (!params->insecure_elasticity)
+		ht->elasticity = 16;
+
 	if (params->locks_mul)
 		ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
 	else
-- 
cgit v1.2.3


From 3d1bec99320d4e96897805440f8cf4f68eff226b Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Mon, 23 Mar 2015 23:36:00 +0100
Subject: ipv6: introduce secret_stable to ipv6_devconf
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch implements the procfs logic for the stable_address knob:
The secret is formatted as an ipv6 address and will be stored per
interface and per namespace. We track initialized flag and return EIO
errors until the secret is set.

We don't inherit the secret to newly created namespaces.

Cc: Erik Kline <ek@google.com>
Cc: Fernando Gont <fgont@si6networks.com>
Cc: Lorenzo Colitti <lorenzo@google.com>
Cc: YOSHIFUJI Hideaki/吉藤英明 <hideaki.yoshifuji@miraclelinux.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h      |  4 +++
 include/uapi/linux/ipv6.h |  1 +
 net/ipv6/addrconf.c       | 68 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 73 insertions(+)

(limited to 'include')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 4d5169f5d7d1..82806c60aa42 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -53,6 +53,10 @@ struct ipv6_devconf {
 	__s32           ndisc_notify;
 	__s32		suppress_frag_ndisc;
 	__s32		accept_ra_mtu;
+	struct ipv6_stable_secret {
+		bool initialized;
+		struct in6_addr secret;
+	} stable_secret;
 	void		*sysctl;
 };
 
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index 437a6a4b125a..5efa54ae567c 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -170,6 +170,7 @@ enum {
 	DEVCONF_ACCEPT_RA_FROM_LOCAL,
 	DEVCONF_USE_OPTIMISTIC,
 	DEVCONF_ACCEPT_RA_MTU,
+	DEVCONF_STABLE_SECRET,
 	DEVCONF_MAX
 };
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 158378e73f0a..5b967c8a617a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -46,6 +46,7 @@
 #include <linux/socket.h>
 #include <linux/sockios.h>
 #include <linux/net.h>
+#include <linux/inet.h>
 #include <linux/in6.h>
 #include <linux/netdevice.h>
 #include <linux/if_addr.h>
@@ -102,6 +103,9 @@
 
 #define	INFINITY_LIFE_TIME	0xFFFFFFFF
 
+#define IPV6_MAX_STRLEN \
+	sizeof("ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255")
+
 static inline u32 cstamp_delta(unsigned long cstamp)
 {
 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
@@ -202,6 +206,9 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
 	.accept_dad		= 1,
 	.suppress_frag_ndisc	= 1,
 	.accept_ra_mtu		= 1,
+	.stable_secret		= {
+		.initialized = false,
+	}
 };
 
 static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -240,6 +247,9 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
 	.accept_dad		= 1,
 	.suppress_frag_ndisc	= 1,
 	.accept_ra_mtu		= 1,
+	.stable_secret		= {
+		.initialized = false,
+	},
 };
 
 /* Check if a valid qdisc is available */
@@ -4430,6 +4440,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc;
 	array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local;
 	array[DEVCONF_ACCEPT_RA_MTU] = cnf->accept_ra_mtu;
+	/* we omit DEVCONF_STABLE_SECRET for now */
 }
 
 static inline size_t inet6_ifla6_size(void)
@@ -5074,6 +5085,53 @@ int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write,
 	return ret;
 }
 
+static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write,
+					 void __user *buffer, size_t *lenp,
+					 loff_t *ppos)
+{
+	int err;
+	struct in6_addr addr;
+	char str[IPV6_MAX_STRLEN];
+	struct ctl_table lctl = *ctl;
+	struct ipv6_stable_secret *secret = ctl->data;
+
+	lctl.maxlen = IPV6_MAX_STRLEN;
+	lctl.data = str;
+
+	if (!rtnl_trylock())
+		return restart_syscall();
+
+	if (!write && !secret->initialized) {
+		err = -EIO;
+		goto out;
+	}
+
+	if (!write) {
+		err = snprintf(str, sizeof(str), "%pI6",
+			       &secret->secret);
+		if (err >= sizeof(str)) {
+			err = -EIO;
+			goto out;
+		}
+	}
+
+	err = proc_dostring(&lctl, write, buffer, lenp, ppos);
+	if (err || !write)
+		goto out;
+
+	if (in6_pton(str, -1, addr.in6_u.u6_addr8, -1, NULL) != 1) {
+		err = -EIO;
+		goto out;
+	}
+
+	secret->initialized = true;
+	secret->secret = addr;
+
+out:
+	rtnl_unlock();
+
+	return err;
+}
 
 static struct addrconf_sysctl_table
 {
@@ -5346,6 +5404,13 @@ static struct addrconf_sysctl_table
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec,
 		},
+		{
+			.procname	= "stable_secret",
+			.data		= &ipv6_devconf.stable_secret,
+			.maxlen		= IPV6_MAX_STRLEN,
+			.mode		= 0600,
+			.proc_handler	= addrconf_sysctl_stable_secret,
+		},
 		{
 			/* sentinel */
 		}
@@ -5442,6 +5507,9 @@ static int __net_init addrconf_init_net(struct net *net)
 	dflt->autoconf = ipv6_defaults.autoconf;
 	dflt->disable_ipv6 = ipv6_defaults.disable_ipv6;
 
+	dflt->stable_secret.initialized = false;
+	all->stable_secret.initialized = false;
+
 	net->ipv6.devconf_all = all;
 	net->ipv6.devconf_dflt = dflt;
 
-- 
cgit v1.2.3


From 622c81d57b392cc9be836670eb464a4dfaa9adfe Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Mon, 23 Mar 2015 23:36:01 +0100
Subject: ipv6: generation of stable privacy addresses for link-local and
 autoconf
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch implements the stable privacy address generation for
link-local and autoconf addresses as specified in RFC7217.

  RID = F(Prefix, Net_Iface, Network_ID, DAD_Counter, secret_key)

is the RID (random identifier). As the hash function F we chose one
round of sha1. Prefix will be either the link-local prefix or the
router advertised one. As Net_Iface we use the MAC address of the
device. DAD_Counter and secret_key are implemented as specified.

We don't use Network_ID, as it couples the code too closely to other
subsystems. It is specified as optional in the RFC.

As Net_Iface we only use the MAC address: we simply have no stable
identifier in the kernel we could possibly use: because this code might
run very early, we cannot depend on names, as they might be changed by
user space early on during the boot process.

A new address generation mode is introduced,
IN6_ADDR_GEN_MODE_STABLE_PRIVACY. With iproute2 one can switch back to
none or eui64 address configuration mode although the stable_secret is
already set.

We refuse writes to ipv6/conf/all/stable_secret but only allow
ipv6/conf/default/stable_secret and the interface specific file to be
written to. The default stable_secret is used as the parameter for the
namespace, the interface specific can overwrite the secret, e.g. when
switching a network configuration from one system to another while
inheriting the secret.

Cc: Erik Kline <ek@google.com>
Cc: Fernando Gont <fgont@si6networks.com>
Cc: Lorenzo Colitti <lorenzo@google.com>
Cc: YOSHIFUJI Hideaki/吉藤英明 <hideaki.yoshifuji@miraclelinux.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_link.h |   1 +
 net/ipv6/addrconf.c          | 130 +++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 127 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index f5f5edd5ae5f..7ffb18df01ca 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -216,6 +216,7 @@ enum {
 enum in6_addr_gen_mode {
 	IN6_ADDR_GEN_MODE_EUI64,
 	IN6_ADDR_GEN_MODE_NONE,
+	IN6_ADDR_GEN_MODE_STABLE_PRIVACY,
 };
 
 /* Bridge section */
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 5b967c8a617a..6813268ce8b8 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -131,6 +131,9 @@ static void ipv6_regen_rndid(unsigned long data);
 
 static int ipv6_generate_eui64(u8 *eui, struct net_device *dev);
 static int ipv6_count_addresses(struct inet6_dev *idev);
+static int ipv6_generate_stable_address(struct in6_addr *addr,
+					u8 dad_count,
+					const struct inet6_dev *idev);
 
 /*
  *	Configured unicast address hash table
@@ -2302,6 +2305,11 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
 				       in6_dev->token.s6_addr + 8, 8);
 				read_unlock_bh(&in6_dev->lock);
 				tokenized = true;
+			} else if (in6_dev->addr_gen_mode ==
+				   IN6_ADDR_GEN_MODE_STABLE_PRIVACY &&
+				   !ipv6_generate_stable_address(&addr, 0,
+								 in6_dev)) {
+				goto ok;
 			} else if (ipv6_generate_eui64(addr.s6_addr + 8, dev) &&
 				   ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) {
 				in6_dev_put(in6_dev);
@@ -2820,12 +2828,98 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr
 	}
 }
 
+static bool ipv6_reserved_interfaceid(struct in6_addr address)
+{
+	if ((address.s6_addr32[2] | address.s6_addr32[3]) == 0)
+		return true;
+
+	if (address.s6_addr32[2] == htonl(0x02005eff) &&
+	    ((address.s6_addr32[3] & htonl(0xfe000000)) == htonl(0xfe000000)))
+		return true;
+
+	if (address.s6_addr32[2] == htonl(0xfdffffff) &&
+	    ((address.s6_addr32[3] & htonl(0xffffff80)) == htonl(0xffffff80)))
+		return true;
+
+	return false;
+}
+
+static int ipv6_generate_stable_address(struct in6_addr *address,
+					u8 dad_count,
+					const struct inet6_dev *idev)
+{
+	static const int idgen_retries = 3;
+
+	static DEFINE_SPINLOCK(lock);
+	static __u32 digest[SHA_DIGEST_WORDS];
+	static __u32 workspace[SHA_WORKSPACE_WORDS];
+
+	static union {
+		char __data[SHA_MESSAGE_BYTES];
+		struct {
+			struct in6_addr secret;
+			__be64 prefix;
+			unsigned char hwaddr[MAX_ADDR_LEN];
+			u8 dad_count;
+		} __packed;
+	} data;
+
+	struct in6_addr secret;
+	struct in6_addr temp;
+	struct net *net = dev_net(idev->dev);
+
+	BUILD_BUG_ON(sizeof(data.__data) != sizeof(data));
+
+	if (idev->cnf.stable_secret.initialized)
+		secret = idev->cnf.stable_secret.secret;
+	else if (net->ipv6.devconf_dflt->stable_secret.initialized)
+		secret = net->ipv6.devconf_dflt->stable_secret.secret;
+	else
+		return -1;
+
+retry:
+	spin_lock_bh(&lock);
+
+	sha_init(digest);
+	memset(&data, 0, sizeof(data));
+	memset(workspace, 0, sizeof(workspace));
+	memcpy(data.hwaddr, idev->dev->perm_addr, idev->dev->addr_len);
+	data.prefix = ((__be64)address->s6_addr32[0] << 32) |
+		       (__be64)address->s6_addr32[1];
+	data.secret = secret;
+	data.dad_count = dad_count;
+
+	sha_transform(digest, data.__data, workspace);
+
+	temp = *address;
+	temp.s6_addr32[2] = digest[0];
+	temp.s6_addr32[3] = digest[1];
+
+	spin_unlock_bh(&lock);
+
+	if (ipv6_reserved_interfaceid(temp)) {
+		dad_count++;
+		if (dad_count > idgen_retries)
+			return -1;
+		goto retry;
+	}
+
+	*address = temp;
+	return 0;
+}
+
 static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
 {
-	if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) {
-		struct in6_addr addr;
+	struct in6_addr addr;
+
+	ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
 
-		ipv6_addr_set(&addr,  htonl(0xFE800000), 0, 0, 0);
+	if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY) {
+		if (!ipv6_generate_stable_address(&addr, 0, idev))
+			addrconf_add_linklocal(idev, &addr);
+		else if (prefix_route)
+			addrconf_prefix_route(&addr, 64, idev->dev, 0, 0);
+	} else if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) {
 		/* addrconf_add_linklocal also adds a prefix_route and we
 		 * only need to care about prefix routes if ipv6_generate_eui64
 		 * couldn't generate one.
@@ -4675,8 +4769,15 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla)
 		u8 mode = nla_get_u8(tb[IFLA_INET6_ADDR_GEN_MODE]);
 
 		if (mode != IN6_ADDR_GEN_MODE_EUI64 &&
-		    mode != IN6_ADDR_GEN_MODE_NONE)
+		    mode != IN6_ADDR_GEN_MODE_NONE &&
+		    mode != IN6_ADDR_GEN_MODE_STABLE_PRIVACY)
+			return -EINVAL;
+
+		if (mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY &&
+		    !idev->cnf.stable_secret.initialized &&
+		    !dev_net(dev)->ipv6.devconf_dflt->stable_secret.initialized)
 			return -EINVAL;
+
 		idev->addr_gen_mode = mode;
 		err = 0;
 	}
@@ -5093,8 +5194,12 @@ static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write,
 	struct in6_addr addr;
 	char str[IPV6_MAX_STRLEN];
 	struct ctl_table lctl = *ctl;
+	struct net *net = ctl->extra2;
 	struct ipv6_stable_secret *secret = ctl->data;
 
+	if (&net->ipv6.devconf_all->stable_secret == ctl->data)
+		return -EIO;
+
 	lctl.maxlen = IPV6_MAX_STRLEN;
 	lctl.data = str;
 
@@ -5127,6 +5232,23 @@ static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write,
 	secret->initialized = true;
 	secret->secret = addr;
 
+	if (&net->ipv6.devconf_dflt->stable_secret == ctl->data) {
+		struct net_device *dev;
+
+		for_each_netdev(net, dev) {
+			struct inet6_dev *idev = __in6_dev_get(dev);
+
+			if (idev) {
+				idev->addr_gen_mode =
+					IN6_ADDR_GEN_MODE_STABLE_PRIVACY;
+			}
+		}
+	} else {
+		struct inet6_dev *idev = ctl->extra1;
+
+		idev->addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY;
+	}
+
 out:
 	rtnl_unlock();
 
-- 
cgit v1.2.3


From 64236f3f3d742469e4027b83a9515e84e9ab21b4 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Mon, 23 Mar 2015 23:36:02 +0100
Subject: ipv6: introduce IFA_F_STABLE_PRIVACY flag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We need to mark appropriate addresses so we can do retries in case their
DAD failed.

Cc: Erik Kline <ek@google.com>
Cc: Fernando Gont <fgont@si6networks.com>
Cc: Lorenzo Colitti <lorenzo@google.com>
Cc: YOSHIFUJI Hideaki/吉藤英明 <hideaki.yoshifuji@miraclelinux.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_addr.h |  1 +
 net/ipv6/addrconf.c          | 14 ++++++++------
 2 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/if_addr.h b/include/uapi/linux/if_addr.h
index 40fdfea39714..4318ab1635ce 100644
--- a/include/uapi/linux/if_addr.h
+++ b/include/uapi/linux/if_addr.h
@@ -51,6 +51,7 @@ enum {
 #define IFA_F_MANAGETEMPADDR	0x100
 #define IFA_F_NOPREFIXROUTE	0x200
 #define IFA_F_MCAUTOJOIN	0x400
+#define IFA_F_STABLE_PRIVACY	0x800
 
 struct ifa_cacheinfo {
 	__u32	ifa_prefered;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 6813268ce8b8..c2357b6f62dd 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2199,6 +2199,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
 	__u32 valid_lft;
 	__u32 prefered_lft;
 	int addr_type;
+	u32 addr_flags = 0;
 	struct inet6_dev *in6_dev;
 	struct net *net = dev_net(dev);
 
@@ -2309,6 +2310,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
 				   IN6_ADDR_GEN_MODE_STABLE_PRIVACY &&
 				   !ipv6_generate_stable_address(&addr, 0,
 								 in6_dev)) {
+				addr_flags |= IFA_F_STABLE_PRIVACY;
 				goto ok;
 			} else if (ipv6_generate_eui64(addr.s6_addr + 8, dev) &&
 				   ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) {
@@ -2328,7 +2330,6 @@ ok:
 
 		if (ifp == NULL && valid_lft) {
 			int max_addresses = in6_dev->cnf.max_addresses;
-			u32 addr_flags = 0;
 
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 			if (in6_dev->cnf.optimistic_dad &&
@@ -2807,10 +2808,11 @@ static void init_loopback(struct net_device *dev)
 	}
 }
 
-static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr *addr)
+static void addrconf_add_linklocal(struct inet6_dev *idev,
+				   const struct in6_addr *addr, u32 flags)
 {
 	struct inet6_ifaddr *ifp;
-	u32 addr_flags = IFA_F_PERMANENT;
+	u32 addr_flags = flags | IFA_F_PERMANENT;
 
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 	if (idev->cnf.optimistic_dad &&
@@ -2818,7 +2820,6 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr
 		addr_flags |= IFA_F_OPTIMISTIC;
 #endif
 
-
 	ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags,
 			    INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
 	if (!IS_ERR(ifp)) {
@@ -2916,7 +2917,8 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
 
 	if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY) {
 		if (!ipv6_generate_stable_address(&addr, 0, idev))
-			addrconf_add_linklocal(idev, &addr);
+			addrconf_add_linklocal(idev, &addr,
+					       IFA_F_STABLE_PRIVACY);
 		else if (prefix_route)
 			addrconf_prefix_route(&addr, 64, idev->dev, 0, 0);
 	} else if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) {
@@ -2925,7 +2927,7 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
 		 * couldn't generate one.
 		 */
 		if (ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) == 0)
-			addrconf_add_linklocal(idev, &addr);
+			addrconf_add_linklocal(idev, &addr, 0);
 		else if (prefix_route)
 			addrconf_prefix_route(&addr, 64, idev->dev, 0, 0);
 	}
-- 
cgit v1.2.3


From 8e8e676d0b3c7f074c719c7c05b20296b9b0b0b1 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Mon, 23 Mar 2015 23:36:03 +0100
Subject: ipv6: collapse state_lock and lock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cc: Erik Kline <ek@google.com>
Cc: Fernando Gont <fgont@si6networks.com>
Cc: Lorenzo Colitti <lorenzo@google.com>
Cc: YOSHIFUJI Hideaki/吉藤英明 <hideaki.yoshifuji@miraclelinux.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/if_inet6.h |  3 +--
 net/ipv6/addrconf.c    | 31 +++++++++++++++----------------
 2 files changed, 16 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 98e5f9578f86..d89397a22903 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -41,13 +41,12 @@ enum {
 struct inet6_ifaddr {
 	struct in6_addr		addr;
 	__u32			prefix_len;
-	
+
 	/* In seconds, relative to tstamp. Expiry is at tstamp + HZ * lft. */
 	__u32			valid_lft;
 	__u32			prefered_lft;
 	atomic_t		refcnt;
 	spinlock_t		lock;
-	spinlock_t		state_lock;
 
 	int			state;
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c2357b6f62dd..1cc5320e510f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -873,7 +873,6 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 		ifa->peer_addr = *peer_addr;
 
 	spin_lock_init(&ifa->lock);
-	spin_lock_init(&ifa->state_lock);
 	INIT_DELAYED_WORK(&ifa->dad_work, addrconf_dad_work);
 	INIT_HLIST_NODE(&ifa->addr_lst);
 	ifa->scope = scope;
@@ -1016,10 +1015,10 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 
 	ASSERT_RTNL();
 
-	spin_lock_bh(&ifp->state_lock);
+	spin_lock_bh(&ifp->lock);
 	state = ifp->state;
 	ifp->state = INET6_IFADDR_STATE_DEAD;
-	spin_unlock_bh(&ifp->state_lock);
+	spin_unlock_bh(&ifp->lock);
 
 	if (state == INET6_IFADDR_STATE_DEAD)
 		goto out;
@@ -1699,12 +1698,12 @@ static int addrconf_dad_end(struct inet6_ifaddr *ifp)
 {
 	int err = -ENOENT;
 
-	spin_lock_bh(&ifp->state_lock);
+	spin_lock_bh(&ifp->lock);
 	if (ifp->state == INET6_IFADDR_STATE_DAD) {
 		ifp->state = INET6_IFADDR_STATE_POSTDAD;
 		err = 0;
 	}
-	spin_unlock_bh(&ifp->state_lock);
+	spin_unlock_bh(&ifp->lock);
 
 	return err;
 }
@@ -1737,10 +1736,10 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
 		}
 	}
 
-	spin_lock_bh(&ifp->state_lock);
+	spin_lock_bh(&ifp->lock);
 	/* transition from _POSTDAD to _ERRDAD */
 	ifp->state = INET6_IFADDR_STATE_ERRDAD;
-	spin_unlock_bh(&ifp->state_lock);
+	spin_unlock_bh(&ifp->lock);
 
 	addrconf_mod_dad_work(ifp, 0);
 }
@@ -2369,7 +2368,7 @@ ok:
 			u32 stored_lft;
 
 			/* update lifetime (RFC2462 5.5.3 e) */
-			spin_lock(&ifp->lock);
+			spin_lock_bh(&ifp->lock);
 			now = jiffies;
 			if (ifp->valid_lft > (now - ifp->tstamp) / HZ)
 				stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
@@ -2399,12 +2398,12 @@ ok:
 				ifp->tstamp = now;
 				flags = ifp->flags;
 				ifp->flags &= ~IFA_F_DEPRECATED;
-				spin_unlock(&ifp->lock);
+				spin_unlock_bh(&ifp->lock);
 
 				if (!(flags&IFA_F_TENTATIVE))
 					ipv6_ifa_notify(0, ifp);
 			} else
-				spin_unlock(&ifp->lock);
+				spin_unlock_bh(&ifp->lock);
 
 			manage_tempaddrs(in6_dev, ifp, valid_lft, prefered_lft,
 					 create, now);
@@ -3265,10 +3264,10 @@ restart:
 
 		write_unlock_bh(&idev->lock);
 
-		spin_lock_bh(&ifa->state_lock);
+		spin_lock_bh(&ifa->lock);
 		state = ifa->state;
 		ifa->state = INET6_IFADDR_STATE_DEAD;
-		spin_unlock_bh(&ifa->state_lock);
+		spin_unlock_bh(&ifa->lock);
 
 		if (state != INET6_IFADDR_STATE_DEAD) {
 			__ipv6_ifa_notify(RTM_DELADDR, ifa);
@@ -3426,12 +3425,12 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp)
 {
 	bool begin_dad = false;
 
-	spin_lock_bh(&ifp->state_lock);
+	spin_lock_bh(&ifp->lock);
 	if (ifp->state != INET6_IFADDR_STATE_DEAD) {
 		ifp->state = INET6_IFADDR_STATE_PREDAD;
 		begin_dad = true;
 	}
-	spin_unlock_bh(&ifp->state_lock);
+	spin_unlock_bh(&ifp->lock);
 
 	if (begin_dad)
 		addrconf_mod_dad_work(ifp, 0);
@@ -3453,7 +3452,7 @@ static void addrconf_dad_work(struct work_struct *w)
 
 	rtnl_lock();
 
-	spin_lock_bh(&ifp->state_lock);
+	spin_lock_bh(&ifp->lock);
 	if (ifp->state == INET6_IFADDR_STATE_PREDAD) {
 		action = DAD_BEGIN;
 		ifp->state = INET6_IFADDR_STATE_DAD;
@@ -3461,7 +3460,7 @@ static void addrconf_dad_work(struct work_struct *w)
 		action = DAD_ABORT;
 		ifp->state = INET6_IFADDR_STATE_POSTDAD;
 	}
-	spin_unlock_bh(&ifp->state_lock);
+	spin_unlock_bh(&ifp->lock);
 
 	if (action == DAD_BEGIN) {
 		addrconf_dad_begin(ifp);
-- 
cgit v1.2.3


From 5f40ef77adb237954d615a76621df1b80a329b31 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Mon, 23 Mar 2015 23:36:04 +0100
Subject: ipv6: do retries on stable privacy addresses
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If a DAD conflict is detected, we want to retry privacy stable address
generation up to idgen_retries (= 3) times with a delay of idgen_delay
(= 1 second). Add the logic to addrconf_dad_failure.

By design, we don't clean up dad failed permanent addresses.

Cc: Erik Kline <ek@google.com>
Cc: Fernando Gont <fgont@si6networks.com>
Cc: Lorenzo Colitti <lorenzo@google.com>
Cc: YOSHIFUJI Hideaki/吉藤英明 <hideaki.yoshifuji@miraclelinux.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/if_inet6.h |  1 +
 net/ipv6/addrconf.c    | 57 +++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 55 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index d89397a22903..1c8b6820b694 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -52,6 +52,7 @@ struct inet6_ifaddr {
 
 	__u32			flags;
 	__u8			dad_probes;
+	__u8			stable_privacy_retry;
 
 	__u16			scope;
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 1cc5320e510f..9b51fdb42ba9 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1710,6 +1710,7 @@ static int addrconf_dad_end(struct inet6_ifaddr *ifp)
 
 void addrconf_dad_failure(struct inet6_ifaddr *ifp)
 {
+	struct in6_addr addr;
 	struct inet6_dev *idev = ifp->idev;
 
 	if (addrconf_dad_end(ifp)) {
@@ -1720,9 +1721,59 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
 	net_info_ratelimited("%s: IPv6 duplicate address %pI6c detected!\n",
 			     ifp->idev->dev->name, &ifp->addr);
 
-	if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6) {
-		struct in6_addr addr;
+	spin_lock_bh(&ifp->lock);
+
+	if (ifp->flags & IFA_F_STABLE_PRIVACY) {
+		int scope = ifp->scope;
+		u32 flags = ifp->flags;
+		struct in6_addr new_addr;
+		struct inet6_ifaddr *ifp2;
+		u32 valid_lft, preferred_lft;
+		int pfxlen = ifp->prefix_len;
+		const unsigned int idgen_retries = 3;
+		const unsigned int idgen_delay = 1 * HZ;
+		int retries = ifp->stable_privacy_retry + 1;
+
+		if (retries > idgen_retries) {
+			net_info_ratelimited("%s: privacy stable address generation failed because of DAD conflicts!\n",
+					     ifp->idev->dev->name);
+			goto errdad;
+		}
+
+		new_addr = ifp->addr;
+		if (ipv6_generate_stable_address(&new_addr, retries,
+						 idev))
+			goto errdad;
 
+		valid_lft = ifp->valid_lft;
+		preferred_lft = ifp->prefered_lft;
+
+		spin_unlock_bh(&ifp->lock);
+
+		if (idev->cnf.max_addresses &&
+		    ipv6_count_addresses(idev) >=
+		    idev->cnf.max_addresses)
+			goto lock_errdad;
+
+		net_info_ratelimited("%s: generating new stable privacy address because of DAD conflict\n",
+				     ifp->idev->dev->name);
+
+		ifp2 = ipv6_add_addr(idev, &new_addr, NULL, pfxlen,
+				     scope, flags, valid_lft,
+				     preferred_lft);
+		if (IS_ERR(ifp2))
+			goto lock_errdad;
+
+		spin_lock_bh(&ifp2->lock);
+		ifp2->stable_privacy_retry = retries;
+		ifp2->state = INET6_IFADDR_STATE_PREDAD;
+		spin_unlock_bh(&ifp2->lock);
+
+		addrconf_mod_dad_work(ifp2, idgen_delay);
+		in6_ifa_put(ifp2);
+lock_errdad:
+		spin_lock_bh(&ifp->lock);
+	} else if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6) {
 		addr.s6_addr32[0] = htonl(0xfe800000);
 		addr.s6_addr32[1] = 0;
 
@@ -1736,7 +1787,7 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
 		}
 	}
 
-	spin_lock_bh(&ifp->lock);
+errdad:
 	/* transition from _POSTDAD to _ERRDAD */
 	ifp->state = INET6_IFADDR_STATE_ERRDAD;
 	spin_unlock_bh(&ifp->lock);
-- 
cgit v1.2.3


From 1855b7c3e8537c2a4f5a53c797624713bb3becb4 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Mon, 23 Mar 2015 23:36:05 +0100
Subject: ipv6: introduce idgen_delay and idgen_retries knobs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is specified by RFC 7217.

Cc: Erik Kline <ek@google.com>
Cc: Fernando Gont <fgont@si6networks.com>
Cc: Lorenzo Colitti <lorenzo@google.com>
Cc: YOSHIFUJI Hideaki/吉藤英明 <hideaki.yoshifuji@miraclelinux.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv6.h   |  2 ++
 net/ipv6/addrconf.c        | 11 ++++-------
 net/ipv6/af_inet6.c        |  2 ++
 net/ipv6/sysctl_net_ipv6.c | 16 ++++++++++++++++
 4 files changed, 24 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index ca0db12cd089..d2527bf81142 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -32,6 +32,8 @@ struct netns_sysctl_ipv6 {
 	int icmpv6_time;
 	int anycast_src_echo_reply;
 	int fwmark_reflect;
+	int idgen_retries;
+	int idgen_delay;
 };
 
 struct netns_ipv6 {
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 9b51fdb42ba9..d2d238334a11 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1712,6 +1712,7 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
 {
 	struct in6_addr addr;
 	struct inet6_dev *idev = ifp->idev;
+	struct net *net = dev_net(ifp->idev->dev);
 
 	if (addrconf_dad_end(ifp)) {
 		in6_ifa_put(ifp);
@@ -1730,11 +1731,9 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
 		struct inet6_ifaddr *ifp2;
 		u32 valid_lft, preferred_lft;
 		int pfxlen = ifp->prefix_len;
-		const unsigned int idgen_retries = 3;
-		const unsigned int idgen_delay = 1 * HZ;
 		int retries = ifp->stable_privacy_retry + 1;
 
-		if (retries > idgen_retries) {
+		if (retries > net->ipv6.sysctl.idgen_retries) {
 			net_info_ratelimited("%s: privacy stable address generation failed because of DAD conflicts!\n",
 					     ifp->idev->dev->name);
 			goto errdad;
@@ -1769,7 +1768,7 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
 		ifp2->state = INET6_IFADDR_STATE_PREDAD;
 		spin_unlock_bh(&ifp2->lock);
 
-		addrconf_mod_dad_work(ifp2, idgen_delay);
+		addrconf_mod_dad_work(ifp2, net->ipv6.sysctl.idgen_delay);
 		in6_ifa_put(ifp2);
 lock_errdad:
 		spin_lock_bh(&ifp->lock);
@@ -2899,8 +2898,6 @@ static int ipv6_generate_stable_address(struct in6_addr *address,
 					u8 dad_count,
 					const struct inet6_dev *idev)
 {
-	static const int idgen_retries = 3;
-
 	static DEFINE_SPINLOCK(lock);
 	static __u32 digest[SHA_DIGEST_WORDS];
 	static __u32 workspace[SHA_WORKSPACE_WORDS];
@@ -2950,7 +2947,7 @@ retry:
 
 	if (ipv6_reserved_interfaceid(temp)) {
 		dad_count++;
-		if (dad_count > idgen_retries)
+		if (dad_count > dev_net(idev->dev)->ipv6.sysctl.idgen_retries)
 			return -1;
 		goto retry;
 	}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 6bafcc2c79e3..d8dcc526339e 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -766,6 +766,8 @@ static int __net_init inet6_net_init(struct net *net)
 	net->ipv6.sysctl.icmpv6_time = 1*HZ;
 	net->ipv6.sysctl.flowlabel_consistency = 1;
 	net->ipv6.sysctl.auto_flowlabels = 0;
+	net->ipv6.sysctl.idgen_retries = 3;
+	net->ipv6.sysctl.idgen_delay = 1 * HZ;
 	atomic_set(&net->ipv6.fib6_sernum, 1);
 
 	err = ipv6_init_mibs(net);
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index c5c10fafcfe2..30f5a4ad04eb 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -54,6 +54,20 @@ static struct ctl_table ipv6_table_template[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "idgen_retries",
+		.data		= &init_net.ipv6.sysctl.idgen_retries,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "idgen_delay",
+		.data		= &init_net.ipv6.sysctl.idgen_delay,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
 	{ }
 };
 
@@ -93,6 +107,8 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
 	ipv6_table[2].data = &net->ipv6.sysctl.flowlabel_consistency;
 	ipv6_table[3].data = &net->ipv6.sysctl.auto_flowlabels;
 	ipv6_table[4].data = &net->ipv6.sysctl.fwmark_reflect;
+	ipv6_table[5].data = &net->ipv6.sysctl.idgen_retries;
+	ipv6_table[6].data = &net->ipv6.sysctl.idgen_delay;
 
 	ipv6_route_table = ipv6_route_sysctl_init(net);
 	if (!ipv6_route_table)
-- 
cgit v1.2.3


From ba7c95ea3870fe7b847466d39a049ab6f156aa2c Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 24 Mar 2015 09:53:17 +1100
Subject: rhashtable: Fix sleeping inside RCU critical section in walk_stop

The commit 963ecbd41a1026d99ec7537c050867428c397b89 ("rhashtable:
Fix use-after-free in rhashtable_walk_stop") fixed a real bug
but created another one because we may end up sleeping inside an
RCU critical section.

This patch fixes it properly by replacing the mutex with a spin
lock that specifically protects the walker lists.

Reported-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 2 ++
 lib/rhashtable.c           | 7 +++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index f9ecf32bce55..d7be9cb0e91f 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -133,6 +133,7 @@ struct rhashtable_params {
  * @p: Configuration parameters
  * @run_work: Deferred worker to expand/shrink asynchronously
  * @mutex: Mutex to protect current/future table swapping
+ * @lock: Spin lock to protect walker list
  * @being_destroyed: True if table is set up for destruction
  */
 struct rhashtable {
@@ -144,6 +145,7 @@ struct rhashtable {
 	struct rhashtable_params	p;
 	struct work_struct		run_work;
 	struct mutex                    mutex;
+	spinlock_t			lock;
 };
 
 /**
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 7686c1e9934a..e96ad1a52c90 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -256,8 +256,10 @@ static int rhashtable_rehash_table(struct rhashtable *ht)
 	/* Publish the new table pointer. */
 	rcu_assign_pointer(ht->tbl, new_tbl);
 
+	spin_lock(&ht->lock);
 	list_for_each_entry(walker, &old_tbl->walkers, list)
 		walker->tbl = NULL;
+	spin_unlock(&ht->lock);
 
 	/* Wait for readers. All new readers will see the new
 	 * table, and thus no references to the old table will
@@ -635,12 +637,12 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter)
 
 	ht = iter->ht;
 
-	mutex_lock(&ht->mutex);
+	spin_lock(&ht->lock);
 	if (tbl->rehash < tbl->size)
 		list_add(&iter->walker->list, &tbl->walkers);
 	else
 		iter->walker->tbl = NULL;
-	mutex_unlock(&ht->mutex);
+	spin_unlock(&ht->lock);
 
 	iter->p = NULL;
 
@@ -723,6 +725,7 @@ int rhashtable_init(struct rhashtable *ht,
 
 	memset(ht, 0, sizeof(*ht));
 	mutex_init(&ht->mutex);
+	spin_lock_init(&ht->lock);
 	memcpy(&ht->p, params, sizeof(*params));
 
 	if (params->min_size)
-- 
cgit v1.2.3


From 0117ec1970c5fa9c566045e7df8db76acc8f150e Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Mon, 23 Mar 2015 18:40:02 +0100
Subject: net: remove never used forwarding_accel_ops pointer from net_device

Cc: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5ae69e7df867..08c4ab37189f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1342,7 +1342,6 @@ enum netdev_priv_flags {
  *	@netdev_ops:	Includes several pointers to callbacks,
  *			if one wants to override the ndo_*() functions
  *	@ethtool_ops:	Management operations
- *	@fwd_ops:	Management operations
  *	@header_ops:	Includes callbacks for creating,parsing,caching,etc
  *			of Layer 2 headers.
  *
@@ -1551,7 +1550,6 @@ struct net_device {
 #endif
 	const struct net_device_ops *netdev_ops;
 	const struct ethtool_ops *ethtool_ops;
-	const struct forwarding_accel_ops *fwd_ops;
 #ifdef CONFIG_NET_SWITCHDEV
 	const struct swdev_ops *swdev_ops;
 #endif
-- 
cgit v1.2.3


From 27cd5452476978283decb19e429e81fc6c71e74b Mon Sep 17 00:00:00 2001
From: Michal Sekletar <msekleta@redhat.com>
Date: Tue, 24 Mar 2015 14:48:41 +0100
Subject: filter: introduce SKF_AD_VLAN_TPID BPF extension

If vlan offloading takes place then vlan header is removed from frame
and its contents, both vlan_tci and vlan_proto, is available to user
space via TPACKET interface. However, only vlan_tci can be used in BPF
filters.

This commit introduces a new BPF extension. It makes possible to load
the value of vlan_proto (vlan TPID) to register A. Support for classic
BPF and eBPF is being added, analogous to skb->protocol.

Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Jiri Pirko <jpirko@redhat.com>

Signed-off-by: Michal Sekletar <msekleta@redhat.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/filter.txt |  3 ++-
 include/linux/filter.h              |  1 +
 include/uapi/linux/bpf.h            |  1 +
 include/uapi/linux/filter.h         |  3 ++-
 net/core/filter.c                   | 17 +++++++++++++++++
 tools/net/bpf_exp.l                 |  2 ++
 tools/net/bpf_exp.y                 | 11 ++++++++++-
 7 files changed, 35 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index 9930ecfbb465..135581f015e1 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -280,7 +280,8 @@ Possible BPF extensions are shown in the following table:
   rxhash                                skb->hash
   cpu                                   raw_smp_processor_id()
   vlan_tci                              skb_vlan_tag_get(skb)
-  vlan_pr                               skb_vlan_tag_present(skb)
+  vlan_avail                            skb_vlan_tag_present(skb)
+  vlan_tpid                             skb->vlan_proto
   rand                                  prandom_u32()
 
 These extensions can also be prefixed with '#'.
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 9ee8c67ea249..fa11b3a367be 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -454,6 +454,7 @@ static inline u16 bpf_anc_helper(const struct sock_filter *ftest)
 		BPF_ANCILLARY(VLAN_TAG_PRESENT);
 		BPF_ANCILLARY(PAY_OFFSET);
 		BPF_ANCILLARY(RANDOM);
+		BPF_ANCILLARY(VLAN_TPID);
 		}
 		/* Fallthrough. */
 	default:
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 3dd314a45d0d..27dc4ec58840 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -182,6 +182,7 @@ struct __sk_buff {
 	__u32 protocol;
 	__u32 vlan_present;
 	__u32 vlan_tci;
+	__u32 vlan_proto;
 };
 
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/include/uapi/linux/filter.h b/include/uapi/linux/filter.h
index 47785d5ecf17..34c7936ca114 100644
--- a/include/uapi/linux/filter.h
+++ b/include/uapi/linux/filter.h
@@ -77,7 +77,8 @@ struct sock_fprog {	/* Required for SO_ATTACH_FILTER. */
 #define SKF_AD_VLAN_TAG_PRESENT 48
 #define SKF_AD_PAY_OFFSET	52
 #define SKF_AD_RANDOM	56
-#define SKF_AD_MAX	60
+#define SKF_AD_VLAN_TPID	60
+#define SKF_AD_MAX	64
 #define SKF_NET_OFF   (-0x100000)
 #define SKF_LL_OFF    (-0x200000)
 
diff --git a/net/core/filter.c b/net/core/filter.c
index 084eacc4d1d4..32f43c59908c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -272,6 +272,16 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		insn += cnt - 1;
 		break;
 
+	case SKF_AD_OFF + SKF_AD_VLAN_TPID:
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2);
+
+		/* A = *(u16 *) (CTX + offsetof(vlan_proto)) */
+		*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
+				      offsetof(struct sk_buff, vlan_proto));
+		/* A = ntohs(A) [emitting a nop or swap16] */
+		*insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
+		break;
+
 	case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
 	case SKF_AD_OFF + SKF_AD_NLATTR:
 	case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
@@ -1226,6 +1236,13 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
 				      offsetof(struct sk_buff, protocol));
 		break;
 
+	case offsetof(struct __sk_buff, vlan_proto):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2);
+
+		*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
+				      offsetof(struct sk_buff, vlan_proto));
+		break;
+
 	case offsetof(struct __sk_buff, mark):
 		return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn);
 
diff --git a/tools/net/bpf_exp.l b/tools/net/bpf_exp.l
index 833a96611da6..c83af3fb77de 100644
--- a/tools/net/bpf_exp.l
+++ b/tools/net/bpf_exp.l
@@ -92,6 +92,8 @@ extern void yyerror(const char *str);
 "#"?("cpu")	{ return K_CPU; }
 "#"?("vlan_tci") { return K_VLANT; }
 "#"?("vlan_pr")	{ return K_VLANP; }
+"#"?("vlan_avail")	{ return K_VLANP; }
+"#"?("vlan_tpid")	{ return K_VLANTPID; }
 "#"?("rand")	{ return K_RAND; }
 
 ":"		{ return ':'; }
diff --git a/tools/net/bpf_exp.y b/tools/net/bpf_exp.y
index e6306c51c26f..f8332749b44c 100644
--- a/tools/net/bpf_exp.y
+++ b/tools/net/bpf_exp.y
@@ -56,7 +56,7 @@ static void bpf_set_jmp_label(char *label, enum jmp_type type);
 %token OP_LDXI
 
 %token K_PKT_LEN K_PROTO K_TYPE K_NLATTR K_NLATTR_NEST K_MARK K_QUEUE K_HATYPE
-%token K_RXHASH K_CPU K_IFIDX K_VLANT K_VLANP K_POFF K_RAND
+%token K_RXHASH K_CPU K_IFIDX K_VLANT K_VLANP K_VLANTPID K_POFF K_RAND
 
 %token ':' ',' '[' ']' '(' ')' 'x' 'a' '+' 'M' '*' '&' '#' '%'
 
@@ -167,6 +167,9 @@ ldb
 	| OP_LDB K_RAND {
 		bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
 				   SKF_AD_OFF + SKF_AD_RANDOM); }
+	| OP_LDB K_VLANTPID {
+		bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
+				   SKF_AD_OFF + SKF_AD_VLAN_TPID); }
 	;
 
 ldh
@@ -218,6 +221,9 @@ ldh
 	| OP_LDH K_RAND {
 		bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
 				   SKF_AD_OFF + SKF_AD_RANDOM); }
+	| OP_LDH K_VLANTPID {
+		bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
+				   SKF_AD_OFF + SKF_AD_VLAN_TPID); }
 	;
 
 ldi
@@ -274,6 +280,9 @@ ld
 	| OP_LD K_RAND {
 		bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
 				   SKF_AD_OFF + SKF_AD_RANDOM); }
+	| OP_LD K_VLANTPID {
+		bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
+				   SKF_AD_OFF + SKF_AD_VLAN_TPID); }
 	| OP_LD 'M' '[' number ']' {
 		bpf_set_curr_instr(BPF_LD | BPF_MEM, 0, 0, $4); }
 	| OP_LD '[' 'x' '+' number ']' {
-- 
cgit v1.2.3


From 58be8a583d8d316448bafa5926414cfb83c02dec Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 24 Mar 2015 14:18:16 +0100
Subject: rhashtable: Extend RCU read lock into rhashtable_insert_rehash()

rhashtable_insert_rehash() requires RCU locks to be held in order
to access ht->tbl and traverse to the last table.

Fixes: ccd57b1bd324 ("rhashtable: Add immediate rehash during insertion")
Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index d7be9cb0e91f..5976ab59b88f 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -585,8 +585,8 @@ restart:
 	if (unlikely(rht_grow_above_100(ht, tbl))) {
 slow_path:
 		spin_unlock_bh(lock);
-		rcu_read_unlock();
 		err = rhashtable_insert_rehash(ht);
+		rcu_read_unlock();
 		if (err)
 			return err;
 
-- 
cgit v1.2.3


From 299e5c32a37a6bca8175db177117467bd1ce970a Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 24 Mar 2015 14:18:17 +0100
Subject: rhashtable: Use 'unsigned int' consistently

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 14 +++++++-------
 lib/rhashtable.c           | 18 ++++++++++--------
 2 files changed, 17 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 5976ab59b88f..f89cda067cb9 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -208,13 +208,13 @@ static inline unsigned int rht_key_hashfn(
 	struct rhashtable *ht, const struct bucket_table *tbl,
 	const void *key, const struct rhashtable_params params)
 {
-	unsigned hash;
+	unsigned int hash;
 
 	/* params must be equal to ht->p if it isn't constant. */
 	if (!__builtin_constant_p(params.key_len))
 		hash = ht->p.hashfn(key, ht->key_len, tbl->hash_rnd);
 	else if (params.key_len) {
-		unsigned key_len = params.key_len;
+		unsigned int key_len = params.key_len;
 
 		if (params.hashfn)
 			hash = params.hashfn(key, key_len, tbl->hash_rnd);
@@ -224,7 +224,7 @@ static inline unsigned int rht_key_hashfn(
 			hash = jhash2(key, key_len / sizeof(u32),
 				      tbl->hash_rnd);
 	} else {
-		unsigned key_len = ht->p.key_len;
+		unsigned int key_len = ht->p.key_len;
 
 		if (params.hashfn)
 			hash = params.hashfn(key, key_len, tbl->hash_rnd);
@@ -512,7 +512,7 @@ static inline void *rhashtable_lookup_fast(
 	};
 	const struct bucket_table *tbl;
 	struct rhash_head *he;
-	unsigned hash;
+	unsigned int hash;
 
 	rcu_read_lock();
 
@@ -550,8 +550,8 @@ static inline int __rhashtable_insert_fast(
 	struct bucket_table *tbl, *new_tbl;
 	struct rhash_head *head;
 	spinlock_t *lock;
-	unsigned elasticity;
-	unsigned hash;
+	unsigned int elasticity;
+	unsigned int hash;
 	int err;
 
 restart:
@@ -718,7 +718,7 @@ static inline int __rhashtable_remove_fast(
 	struct rhash_head __rcu **pprev;
 	struct rhash_head *he;
 	spinlock_t * lock;
-	unsigned hash;
+	unsigned int hash;
 	int err = -ENOENT;
 
 	hash = rht_head_hashfn(ht, tbl, obj, params);
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 8514f7c5f029..50abe4fec4b8 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -153,7 +153,7 @@ static struct bucket_table *rhashtable_last_table(struct rhashtable *ht,
 	return new_tbl;
 }
 
-static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash)
+static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
 {
 	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
 	struct bucket_table *new_tbl = rhashtable_last_table(ht,
@@ -162,7 +162,7 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash)
 	int err = -ENOENT;
 	struct rhash_head *head, *next, *entry;
 	spinlock_t *new_bucket_lock;
-	unsigned new_hash;
+	unsigned int new_hash;
 
 	rht_for_each(entry, old_tbl, old_hash) {
 		err = 0;
@@ -199,7 +199,8 @@ out:
 	return err;
 }
 
-static void rhashtable_rehash_chain(struct rhashtable *ht, unsigned old_hash)
+static void rhashtable_rehash_chain(struct rhashtable *ht,
+				    unsigned int old_hash)
 {
 	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
 	spinlock_t *old_bucket_lock;
@@ -244,7 +245,7 @@ static int rhashtable_rehash_table(struct rhashtable *ht)
 	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
 	struct bucket_table *new_tbl;
 	struct rhashtable_walker *walker;
-	unsigned old_hash;
+	unsigned int old_hash;
 
 	new_tbl = rht_dereference(old_tbl->future_tbl, ht);
 	if (!new_tbl)
@@ -324,11 +325,12 @@ static int rhashtable_expand(struct rhashtable *ht)
 static int rhashtable_shrink(struct rhashtable *ht)
 {
 	struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
-	unsigned size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2);
+	unsigned int size;
 	int err;
 
 	ASSERT_RHT_MUTEX(ht);
 
+	size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2);
 	if (size < ht->p.min_size)
 		size = ht->p.min_size;
 
@@ -379,9 +381,9 @@ unlock:
 
 static bool rhashtable_check_elasticity(struct rhashtable *ht,
 					struct bucket_table *tbl,
-					unsigned hash)
+					unsigned int hash)
 {
-	unsigned elasticity = ht->elasticity;
+	unsigned int elasticity = ht->elasticity;
 	struct rhash_head *head;
 
 	rht_for_each(head, tbl, hash)
@@ -431,7 +433,7 @@ int rhashtable_insert_slow(struct rhashtable *ht, const void *key,
 			   struct bucket_table *tbl)
 {
 	struct rhash_head *head;
-	unsigned hash;
+	unsigned int hash;
 	int err;
 
 	tbl = rhashtable_last_table(ht, tbl);
-- 
cgit v1.2.3


From ac833bddb591b9c7a0609f440f196375be184044 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 24 Mar 2015 14:18:18 +0100
Subject: rhashtable: Mark internal/private inline functions as such

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index f89cda067cb9..0e1f975ad101 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -539,6 +539,7 @@ restart:
 	return NULL;
 }
 
+/* Internal function, please use rhashtable_insert_fast() instead */
 static inline int __rhashtable_insert_fast(
 	struct rhashtable *ht, const void *key, struct rhash_head *obj,
 	const struct rhashtable_params params)
@@ -711,6 +712,7 @@ static inline int rhashtable_lookup_insert_key(
 	return __rhashtable_insert_fast(ht, key, obj, params);
 }
 
+/* Internal function, please use rhashtable_remove_fast() instead */
 static inline int __rhashtable_remove_fast(
 	struct rhashtable *ht, struct bucket_table *tbl,
 	struct rhash_head *obj, const struct rhashtable_params params)
-- 
cgit v1.2.3


From b5e2c150ac914f28a28833b57397bec0b0a2bd5f Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 24 Mar 2015 20:42:19 +0000
Subject: rhashtable: Disable automatic shrinking by default

Introduce a new bool automatic_shrinking to require the
user to explicitly opt-in to automatic shrinking of tables.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 7 +++++--
 lib/rhashtable.c           | 2 +-
 net/netfilter/nft_hash.c   | 1 +
 net/netlink/af_netlink.c   | 1 +
 net/tipc/socket.c          | 1 +
 5 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 0e1f975ad101..ae26c494e230 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -2,7 +2,7 @@
  * Resizable, Scalable, Concurrent Hash Table
  *
  * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
- * Copyright (c) 2014 Thomas Graf <tgraf@suug.ch>
+ * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
  * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
  *
  * Code partially derived from nft_hash
@@ -104,6 +104,7 @@ struct rhashtable;
  * @min_size: Minimum size while shrinking
  * @nulls_base: Base value to generate nulls marker
  * @insecure_elasticity: Set to true to disable chain length checks
+ * @automatic_shrinking: Enable automatic shrinking of tables
  * @locks_mul: Number of bucket locks to allocate per cpu (default: 128)
  * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash)
  * @obj_hashfn: Function to hash object
@@ -118,6 +119,7 @@ struct rhashtable_params {
 	unsigned int		min_size;
 	u32			nulls_base;
 	bool			insecure_elasticity;
+	bool			automatic_shrinking;
 	size_t			locks_mul;
 	rht_hashfn_t		hashfn;
 	rht_obj_hashfn_t	obj_hashfn;
@@ -784,7 +786,8 @@ static inline int rhashtable_remove_fast(
 		goto out;
 
 	atomic_dec(&ht->nelems);
-	if (rht_shrink_below_30(ht, tbl))
+	if (unlikely(ht->p.automatic_shrinking &&
+		     rht_shrink_below_30(ht, tbl)))
 		schedule_work(&ht->run_work);
 
 out:
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 50abe4fec4b8..50374d181148 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -367,7 +367,7 @@ static void rht_deferred_worker(struct work_struct *work)
 
 	if (rht_grow_above_75(ht, tbl))
 		rhashtable_expand(ht);
-	else if (rht_shrink_below_30(ht, tbl))
+	else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))
 		rhashtable_shrink(ht);
 
 	err = rhashtable_rehash_table(ht);
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index ad3966976cf5..8577a37af18b 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -172,6 +172,7 @@ static const struct rhashtable_params nft_hash_params = {
 	.head_offset = offsetof(struct nft_hash_elem, node),
 	.key_offset = offsetof(struct nft_hash_elem, key),
 	.hashfn = jhash,
+	.automatic_shrinking = true,
 };
 
 static int nft_hash_init(const struct nft_set *set,
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index e2f7f28148e0..4caa809dbbe0 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -3142,6 +3142,7 @@ static const struct rhashtable_params netlink_rhashtable_params = {
 	.obj_hashfn = netlink_hash,
 	.obj_cmpfn = netlink_compare,
 	.max_size = 65536,
+	.automatic_shrinking = true,
 };
 
 static int __init netlink_proto_init(void)
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 094710519477..ee90d74d7516 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2297,6 +2297,7 @@ static const struct rhashtable_params tsk_rht_params = {
 	.key_len = sizeof(u32), /* portid */
 	.max_size = 1048576,
 	.min_size = 256,
+	.automatic_shrinking = true,
 };
 
 int tipc_sk_rht_init(struct net *net)
-- 
cgit v1.2.3


From 6b6f302ceda7a052dab545d6c69abf5f0d4a6cab Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 24 Mar 2015 14:18:20 +0100
Subject: rhashtable: Add rhashtable_free_and_destroy()

rhashtable_destroy() variant which stops rehashes, iterates over
the table and calls a callback to release resources.

Avoids need for nft_hash to embed rhashtable internals and allows to
get rid of the being_destroyed flag. It also saves a 2nd mutex
lock upon destruction.

Also fixes an RCU lockdep splash on nft set destruction due to
calling rht_for_each_entry_safe() without holding bucket locks.
Open code this loop as we need know that no mutations may occur in
parallel.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h |  5 +++--
 lib/rhashtable.c           | 49 ++++++++++++++++++++++++++++++++++++----------
 net/netfilter/nft_hash.c   | 25 +++++++----------------
 3 files changed, 49 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index ae26c494e230..99f2e49a8a07 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -136,12 +136,10 @@ struct rhashtable_params {
  * @run_work: Deferred worker to expand/shrink asynchronously
  * @mutex: Mutex to protect current/future table swapping
  * @lock: Spin lock to protect walker list
- * @being_destroyed: True if table is set up for destruction
  */
 struct rhashtable {
 	struct bucket_table __rcu	*tbl;
 	atomic_t			nelems;
-	bool                            being_destroyed;
 	unsigned int			key_len;
 	unsigned int			elasticity;
 	struct rhashtable_params	p;
@@ -334,6 +332,9 @@ int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU);
 void *rhashtable_walk_next(struct rhashtable_iter *iter);
 void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU);
 
+void rhashtable_free_and_destroy(struct rhashtable *ht,
+				 void (*free_fn)(void *ptr, void *arg),
+				 void *arg);
 void rhashtable_destroy(struct rhashtable *ht);
 
 #define rht_dereference(p, ht) \
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 50374d181148..4b7b7e672b93 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -359,8 +359,6 @@ static void rht_deferred_worker(struct work_struct *work)
 
 	ht = container_of(work, struct rhashtable, run_work);
 	mutex_lock(&ht->mutex);
-	if (ht->being_destroyed)
-		goto unlock;
 
 	tbl = rht_dereference(ht->tbl, ht);
 	tbl = rhashtable_last_table(ht, tbl);
@@ -372,7 +370,6 @@ static void rht_deferred_worker(struct work_struct *work)
 
 	err = rhashtable_rehash_table(ht);
 
-unlock:
 	mutex_unlock(&ht->mutex);
 
 	if (err)
@@ -783,21 +780,53 @@ int rhashtable_init(struct rhashtable *ht,
 EXPORT_SYMBOL_GPL(rhashtable_init);
 
 /**
- * rhashtable_destroy - destroy hash table
+ * rhashtable_free_and_destroy - free elements and destroy hash table
  * @ht:		the hash table to destroy
+ * @free_fn:	callback to release resources of element
+ * @arg:	pointer passed to free_fn
  *
- * Frees the bucket array. This function is not rcu safe, therefore the caller
- * has to make sure that no resizing may happen by unpublishing the hashtable
- * and waiting for the quiescent cycle before releasing the bucket array.
+ * Stops an eventual async resize. If defined, invokes free_fn for each
+ * element to releasal resources. Please note that RCU protected
+ * readers may still be accessing the elements. Releasing of resources
+ * must occur in a compatible manner. Then frees the bucket array.
+ *
+ * This function will eventually sleep to wait for an async resize
+ * to complete. The caller is responsible that no further write operations
+ * occurs in parallel.
  */
-void rhashtable_destroy(struct rhashtable *ht)
+void rhashtable_free_and_destroy(struct rhashtable *ht,
+				 void (*free_fn)(void *ptr, void *arg),
+				 void *arg)
 {
-	ht->being_destroyed = true;
+	const struct bucket_table *tbl;
+	unsigned int i;
 
 	cancel_work_sync(&ht->run_work);
 
 	mutex_lock(&ht->mutex);
-	bucket_table_free(rht_dereference(ht->tbl, ht));
+	tbl = rht_dereference(ht->tbl, ht);
+	if (free_fn) {
+		for (i = 0; i < tbl->size; i++) {
+			struct rhash_head *pos, *next;
+
+			for (pos = rht_dereference(tbl->buckets[i], ht),
+			     next = !rht_is_a_nulls(pos) ?
+					rht_dereference(pos->next, ht) : NULL;
+			     !rht_is_a_nulls(pos);
+			     pos = next,
+			     next = !rht_is_a_nulls(pos) ?
+					rht_dereference(pos->next, ht) : NULL)
+				free_fn(rht_obj(ht, pos), arg);
+		}
+	}
+
+	bucket_table_free(tbl);
 	mutex_unlock(&ht->mutex);
 }
+EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy);
+
+void rhashtable_destroy(struct rhashtable *ht)
+{
+	return rhashtable_free_and_destroy(ht, NULL, NULL);
+}
 EXPORT_SYMBOL_GPL(rhashtable_destroy);
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 8577a37af18b..f9ce2195fd63 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -188,26 +188,15 @@ static int nft_hash_init(const struct nft_set *set,
 	return rhashtable_init(priv, &params);
 }
 
-static void nft_hash_destroy(const struct nft_set *set)
+static void nft_free_element(void *ptr, void *arg)
 {
-	struct rhashtable *priv = nft_set_priv(set);
-	const struct bucket_table *tbl;
-	struct nft_hash_elem *he;
-	struct rhash_head *pos, *next;
-	unsigned int i;
-
-	/* Stop an eventual async resizing */
-	priv->being_destroyed = true;
-	mutex_lock(&priv->mutex);
-
-	tbl = rht_dereference(priv->tbl, priv);
-	for (i = 0; i < tbl->size; i++) {
-		rht_for_each_entry_safe(he, pos, next, tbl, i, node)
-			nft_hash_elem_destroy(set, he);
-	}
-	mutex_unlock(&priv->mutex);
+	nft_hash_elem_destroy((const struct nft_set *)arg, ptr);
+}
 
-	rhashtable_destroy(priv);
+static void nft_hash_destroy(const struct nft_set *set)
+{
+	rhashtable_free_and_destroy(nft_set_priv(set), nft_free_element,
+				    (void *)set);
 }
 
 static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
-- 
cgit v1.2.3


From 39f8e58e53be32ab758d30536e0bd2e6ce766462 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 24 Mar 2015 15:58:55 -0700
Subject: tcp: md5: remove request sock argument of calc_md5_hash()

Since request and established sockets now have same base,
there is no need to pass two pointers to tcp_v4_md5_hash_skb()
or tcp_v6_md5_hash_skb()

Also add a const qualifier to their struct tcp_md5sig_key argument.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     | 29 +++++++++++++----------------
 net/ipv4/tcp_ipv4.c   | 17 +++++++----------
 net/ipv4/tcp_output.c |  4 ++--
 net/ipv6/tcp_ipv6.c   | 15 ++++++---------
 4 files changed, 28 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index fe60e00e1919..992be858c370 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1296,9 +1296,8 @@ struct tcp_md5sig_pool {
 };
 
 /* - functions */
-int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
-			const struct sock *sk, const struct request_sock *req,
-			const struct sk_buff *skb);
+int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
+			const struct sock *sk, const struct sk_buff *skb);
 int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
 		   int family, const u8 *newkey, u8 newkeylen, gfp_t gfp);
 int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr,
@@ -1616,14 +1615,13 @@ struct tcp_sock_af_ops {
 #ifdef CONFIG_TCP_MD5SIG
 	struct tcp_md5sig_key	*(*md5_lookup) (struct sock *sk,
 						struct sock *addr_sk);
-	int			(*calc_md5_hash) (char *location,
-						  struct tcp_md5sig_key *md5,
-						  const struct sock *sk,
-						  const struct request_sock *req,
-						  const struct sk_buff *skb);
-	int			(*md5_parse) (struct sock *sk,
-					      char __user *optval,
-					      int optlen);
+	int		(*calc_md5_hash)(char *location,
+					 const struct tcp_md5sig_key *md5,
+					 const struct sock *sk,
+					 const struct sk_buff *skb);
+	int		(*md5_parse)(struct sock *sk,
+				     char __user *optval,
+				     int optlen);
 #endif
 };
 
@@ -1632,11 +1630,10 @@ struct tcp_request_sock_ops {
 #ifdef CONFIG_TCP_MD5SIG
 	struct tcp_md5sig_key	*(*md5_lookup) (struct sock *sk,
 						struct request_sock *req);
-	int			(*calc_md5_hash) (char *location,
-						  struct tcp_md5sig_key *md5,
-						  const struct sock *sk,
-						  const struct request_sock *req,
-						  const struct sk_buff *skb);
+	int		(*calc_md5_hash) (char *location,
+					  const struct tcp_md5sig_key *md5,
+					  const struct sock *sk,
+					  const struct sk_buff *skb);
 #endif
 	void (*init_req)(struct request_sock *req, struct sock *sk,
 			 struct sk_buff *skb);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d339a0488f51..79d5c641688c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -648,7 +648,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 		if (!key)
 			goto release_sk1;
 
-		genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
+		genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
 			goto release_sk1;
 	} else {
@@ -1102,8 +1102,8 @@ clear_hash_noput:
 	return 1;
 }
 
-int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
-			const struct sock *sk, const struct request_sock *req,
+int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
+			const struct sock *sk,
 			const struct sk_buff *skb)
 {
 	struct tcp_md5sig_pool *hp;
@@ -1111,12 +1111,9 @@ int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
 	const struct tcphdr *th = tcp_hdr(skb);
 	__be32 saddr, daddr;
 
-	if (sk) {
-		saddr = inet_sk(sk)->inet_saddr;
-		daddr = inet_sk(sk)->inet_daddr;
-	} else if (req) {
-		saddr = inet_rsk(req)->ir_loc_addr;
-		daddr = inet_rsk(req)->ir_rmt_addr;
+	if (sk) { /* valid for establish/request sockets */
+		saddr = sk->sk_rcv_saddr;
+		daddr = sk->sk_daddr;
 	} else {
 		const struct iphdr *iph = ip_hdr(skb);
 		saddr = iph->saddr;
@@ -1195,7 +1192,7 @@ static bool tcp_v4_inbound_md5_hash(struct sock *sk,
 	 */
 	genhash = tcp_v4_md5_hash_skb(newhash,
 				      hash_expected,
-				      NULL, NULL, skb);
+				      NULL, skb);
 
 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
 		net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5b7fad4b314c..501cf9d401c3 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -986,7 +986,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	if (md5) {
 		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
 		tp->af_specific->calc_md5_hash(opts.hash_location,
-					       md5, sk, NULL, skb);
+					       md5, sk, skb);
 	}
 #endif
 
@@ -2973,7 +2973,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	/* Okay, we have all we need - do the md5 hash if needed */
 	if (md5)
 		tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location,
-					       md5, NULL, req, skb);
+					       md5, req_to_sk(req), skb);
 	rcu_read_unlock();
 #endif
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 078e7d0f4cd8..57d1c41404ec 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -582,9 +582,9 @@ clear_hash_noput:
 	return 1;
 }
 
-static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
+static int tcp_v6_md5_hash_skb(char *md5_hash,
+			       const struct tcp_md5sig_key *key,
 			       const struct sock *sk,
-			       const struct request_sock *req,
 			       const struct sk_buff *skb)
 {
 	const struct in6_addr *saddr, *daddr;
@@ -592,12 +592,9 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
 	struct hash_desc *desc;
 	const struct tcphdr *th = tcp_hdr(skb);
 
-	if (sk) {
-		saddr = &inet6_sk(sk)->saddr;
+	if (sk) { /* valid for establish/request sockets */
+		saddr = &sk->sk_v6_rcv_saddr;
 		daddr = &sk->sk_v6_daddr;
-	} else if (req) {
-		saddr = &inet_rsk(req)->ir_v6_loc_addr;
-		daddr = &inet_rsk(req)->ir_v6_rmt_addr;
 	} else {
 		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
 		saddr = &ip6h->saddr;
@@ -662,7 +659,7 @@ static bool tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
 	/* check the signature */
 	genhash = tcp_v6_md5_hash_skb(newhash,
 				      hash_expected,
-				      NULL, NULL, skb);
+				      NULL, skb);
 
 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
 		net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
@@ -880,7 +877,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 		if (!key)
 			goto release_sk1;
 
-		genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, NULL, skb);
+		genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
 			goto release_sk1;
 	} else {
-- 
cgit v1.2.3


From fd3a154a00fb991872680f19021f5edbb40b4dbe Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 24 Mar 2015 15:58:56 -0700
Subject: tcp: md5: get rid of tcp_v[46]_reqsk_md5_lookup()

With request socks convergence, we no longer need
different lookup methods. A request socket can
use generic lookup function.

Add const qualifier to 2nd tcp_v[46]_md5_lookup() parameter.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     |  8 ++++----
 net/ipv4/tcp_ipv4.c   | 19 +++++--------------
 net/ipv4/tcp_output.c |  2 +-
 net/ipv6/tcp_ipv6.c   | 10 ++--------
 4 files changed, 12 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 992be858c370..42690daa924e 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1303,7 +1303,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
 int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr,
 		   int family);
 struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
-					 struct sock *addr_sk);
+					 const struct sock *addr_sk);
 
 #ifdef CONFIG_TCP_MD5SIG
 struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
@@ -1614,7 +1614,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 struct tcp_sock_af_ops {
 #ifdef CONFIG_TCP_MD5SIG
 	struct tcp_md5sig_key	*(*md5_lookup) (struct sock *sk,
-						struct sock *addr_sk);
+						const struct sock *addr_sk);
 	int		(*calc_md5_hash)(char *location,
 					 const struct tcp_md5sig_key *md5,
 					 const struct sock *sk,
@@ -1628,8 +1628,8 @@ struct tcp_sock_af_ops {
 struct tcp_request_sock_ops {
 	u16 mss_clamp;
 #ifdef CONFIG_TCP_MD5SIG
-	struct tcp_md5sig_key	*(*md5_lookup) (struct sock *sk,
-						struct request_sock *req);
+	struct tcp_md5sig_key *(*req_md5_lookup)(struct sock *sk,
+						 const struct sock *addr_sk);
 	int		(*calc_md5_hash) (char *location,
 					  const struct tcp_md5sig_key *md5,
 					  const struct sock *sk,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 79d5c641688c..fc8995a702a6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -898,10 +898,10 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
 					 const union tcp_md5_addr *addr,
 					 int family)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_md5sig_key *key;
 	unsigned int size = sizeof(struct in_addr);
-	struct tcp_md5sig_info *md5sig;
+	const struct tcp_md5sig_info *md5sig;
 
 	/* caller either holds rcu_read_lock() or socket lock */
 	md5sig = rcu_dereference_check(tp->md5sig_info,
@@ -924,24 +924,15 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
 EXPORT_SYMBOL(tcp_md5_do_lookup);
 
 struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
-					 struct sock *addr_sk)
+					 const struct sock *addr_sk)
 {
 	union tcp_md5_addr *addr;
 
-	addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
+	addr = (union tcp_md5_addr *)&sk->sk_daddr;
 	return tcp_md5_do_lookup(sk, addr, AF_INET);
 }
 EXPORT_SYMBOL(tcp_v4_md5_lookup);
 
-static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
-						      struct request_sock *req)
-{
-	union tcp_md5_addr *addr;
-
-	addr = (union tcp_md5_addr *)&inet_rsk(req)->ir_rmt_addr;
-	return tcp_md5_do_lookup(sk, addr, AF_INET);
-}
-
 /* This can be called on a newly created socket, from other files */
 int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
 		   int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
@@ -1247,7 +1238,7 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = {
 static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
 	.mss_clamp	=	TCP_MSS_DEFAULT,
 #ifdef CONFIG_TCP_MD5SIG
-	.md5_lookup	=	tcp_v4_reqsk_md5_lookup,
+	.req_md5_lookup	=	tcp_v4_md5_lookup,
 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
 #endif
 	.init_req	=	tcp_v4_init_req,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 501cf9d401c3..2e69b8d16e68 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2938,7 +2938,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 
 #ifdef CONFIG_TCP_MD5SIG
 	rcu_read_lock();
-	md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
+	md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
 #endif
 	tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
 					     foc) + sizeof(*th);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 57d1c41404ec..a9568caf4675 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -486,17 +486,11 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
 }
 
 static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
-						struct sock *addr_sk)
+						const struct sock *addr_sk)
 {
 	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
 }
 
-static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
-						      struct request_sock *req)
-{
-	return tcp_v6_md5_do_lookup(sk, &inet_rsk(req)->ir_v6_rmt_addr);
-}
-
 static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval,
 				 int optlen)
 {
@@ -720,7 +714,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
 	.mss_clamp	=	IPV6_MIN_MTU - sizeof(struct tcphdr) -
 				sizeof(struct ipv6hdr),
 #ifdef CONFIG_TCP_MD5SIG
-	.md5_lookup	=	tcp_v6_reqsk_md5_lookup,
+	.req_md5_lookup	=	tcp_v6_md5_lookup,
 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
 #endif
 	.init_req	=	tcp_v6_init_req,
-- 
cgit v1.2.3


From 5ebb335dcbe63470c88c4f80f2d571089543b638 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 21 Mar 2015 15:19:15 +0000
Subject: netfilter: nf_tables: move struct net pointer to base chain

The network namespace is only needed for base chains to get at the
gencursor. Also convert to possible_net_t.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 4 ++--
 net/netfilter/nf_tables_api.c     | 2 +-
 net/netfilter/nf_tables_core.c    | 3 ++-
 3 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index d756af559977..ace67a549b30 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -449,7 +449,6 @@ enum nft_chain_flags {
  *
  *	@rules: list of rules in the chain
  *	@list: used internally
- *	@net: net namespace that this chain belongs to
  *	@table: table that this chain belongs to
  *	@handle: chain handle
  *	@use: number of jump references to this chain
@@ -460,7 +459,6 @@ enum nft_chain_flags {
 struct nft_chain {
 	struct list_head		rules;
 	struct list_head		list;
-	struct net			*net;
 	struct nft_table		*table;
 	u64				handle;
 	u32				use;
@@ -512,6 +510,7 @@ struct nft_stats {
  *	struct nft_base_chain - nf_tables base chain
  *
  *	@ops: netfilter hook ops
+ *	@pnet: net namespace that this chain belongs to
  *	@type: chain type
  *	@policy: default policy
  *	@stats: per-cpu chain stats
@@ -519,6 +518,7 @@ struct nft_stats {
  */
 struct nft_base_chain {
 	struct nf_hook_ops		ops[NFT_HOOK_OPS_MAX];
+	possible_net_t			pnet;
 	const struct nf_chain_type	*type;
 	u8				policy;
 	struct nft_stats __percpu	*stats;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 363a39a6c286..0b969b66cb77 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1354,6 +1354,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
 			rcu_assign_pointer(basechain->stats, stats);
 		}
 
+		write_pnet(&basechain->pnet, net);
 		basechain->type = type;
 		chain = &basechain->chain;
 
@@ -1381,7 +1382,6 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
 
 	INIT_LIST_HEAD(&chain->rules);
 	chain->handle = nf_tables_alloc_handle(table);
-	chain->net = net;
 	chain->table = table;
 	nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
 
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 77165bf023f3..4c921a302cfd 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -112,6 +112,7 @@ unsigned int
 nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
 {
 	const struct nft_chain *chain = ops->priv, *basechain = chain;
+	const struct net *net = read_pnet(&nft_base_chain(basechain)->pnet);
 	const struct nft_rule *rule;
 	const struct nft_expr *expr, *last;
 	struct nft_data data[NFT_REG_MAX + 1];
@@ -123,7 +124,7 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
 	 * Cache cursor to avoid problems in case that the cursor is updated
 	 * while traversing the ruleset.
 	 */
-	unsigned int gencursor = ACCESS_ONCE(chain->net->nft.gencursor);
+	unsigned int gencursor = ACCESS_ONCE(net->nft.gencursor);
 
 do_chain:
 	rulenum = 0;
-- 
cgit v1.2.3


From 7e3ea6d5c4f4880dbdf85f4083f40d8a3dc9b906 Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Wed, 25 Mar 2015 14:13:01 +0800
Subject: sctp: avoid to repeatedly declare external variables

Move the declaration for external variables to sctp.h file avoiding
to repeatedly declare them with extern keyword.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h | 3 +++
 net/sctp/socket.c       | 5 -----
 net/sctp/sysctl.c       | 4 ----
 3 files changed, 3 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 856f01cb51dd..c56a438c3a1e 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -166,6 +166,9 @@ void sctp_remaddr_proc_exit(struct net *net);
   */
 extern struct kmem_cache *sctp_chunk_cachep __read_mostly;
 extern struct kmem_cache *sctp_bucket_cachep __read_mostly;
+extern long sysctl_sctp_mem[3];
+extern int sysctl_sctp_rmem[3];
+extern int sysctl_sctp_wmem[3];
 
 /*
  *  Section:  Macros, externs, and inlines
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index f1a65398f311..f09de7fac2e6 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -102,11 +102,6 @@ static int sctp_autobind(struct sock *sk);
 static void sctp_sock_migrate(struct sock *, struct sock *,
 			      struct sctp_association *, sctp_socket_type_t);
 
-extern struct kmem_cache *sctp_bucket_cachep;
-extern long sysctl_sctp_mem[3];
-extern int sysctl_sctp_rmem[3];
-extern int sysctl_sctp_wmem[3];
-
 static int sctp_memory_pressure;
 static atomic_long_t sctp_memory_allocated;
 struct percpu_counter sctp_sockets_allocated;
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 2e9ada10fd84..26d50c565f54 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -58,10 +58,6 @@ static unsigned long max_autoclose_max =
 	(MAX_SCHEDULE_TIMEOUT / HZ > UINT_MAX)
 	? UINT_MAX : MAX_SCHEDULE_TIMEOUT / HZ;
 
-extern long sysctl_sctp_mem[3];
-extern int sysctl_sctp_rmem[3];
-extern int sysctl_sctp_wmem[3];
-
 static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write,
 				void __user *buffer, size_t *lenp,
 				loff_t *ppos);
-- 
cgit v1.2.3


From 49f7b33e63fec9d16e7ee62ba8f8ab4159cbdc26 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 25 Mar 2015 13:07:45 +0000
Subject: rhashtable: provide len to obj_hashfn

nftables sets will be converted to use so called setextensions, moving
the key to a non-fixed position. To hash it, the obj_hashfn must be used,
however it so far doesn't receive the length parameter.

Pass the key length to obj_hashfn() and convert existing users.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/rhashtable.h | 6 ++++--
 lib/rhashtable.c           | 2 +-
 net/netlink/af_netlink.c   | 2 +-
 3 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 99f2e49a8a07..e23d242d1230 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -88,7 +88,7 @@ struct rhashtable_compare_arg {
 };
 
 typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed);
-typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 seed);
+typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed);
 typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg,
 			       const void *obj);
 
@@ -242,7 +242,9 @@ static inline unsigned int rht_head_hashfn(
 	const char *ptr = rht_obj(ht, he);
 
 	return likely(params.obj_hashfn) ?
-	       rht_bucket_index(tbl, params.obj_hashfn(ptr, tbl->hash_rnd)) :
+	       rht_bucket_index(tbl, params.obj_hashfn(ptr, params.key_len ?:
+							    ht->p.key_len,
+						       tbl->hash_rnd)) :
 	       rht_key_hashfn(ht, tbl, ptr + params.key_offset, params);
 }
 
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 4b7b7e672b93..4898442b837f 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -691,7 +691,7 @@ static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
  *	struct rhash_head	node;
  * };
  *
- * u32 my_hash_fn(const void *data, u32 seed)
+ * u32 my_hash_fn(const void *data, u32 len, u32 seed)
  * {
  *	struct test_obj *obj = data;
  *
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 4caa809dbbe0..19909d0786a2 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -3127,7 +3127,7 @@ static struct pernet_operations __net_initdata netlink_net_ops = {
 	.exit = netlink_net_exit,
 };
 
-static inline u32 netlink_hash(const void *data, u32 seed)
+static inline u32 netlink_hash(const void *data, u32 len, u32 seed)
 {
 	const struct netlink_sock *nlk = data;
 	struct netlink_compare_arg arg;
-- 
cgit v1.2.3


From 3ac4c07a24007f0f45d2082b745508768a8e21cf Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 25 Mar 2015 13:07:49 +0000
Subject: netfilter: nf_tables: add set extensions

Add simple set extension infrastructure for maintaining variable sized
and optional per element data.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 105 ++++++++++++++++++++++++++++++++++++++
 net/netfilter/nf_tables_api.c     |  16 ++++++
 2 files changed, 121 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index ace67a549b30..038f8a67ca1f 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -311,6 +311,111 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
 void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
 			  struct nft_set_binding *binding);
 
+/**
+ *	enum nft_set_extensions - set extension type IDs
+ *
+ *	@NFT_SET_EXT_KEY: element key
+ *	@NFT_SET_EXT_DATA: mapping data
+ *	@NFT_SET_EXT_FLAGS: element flags
+ *	@NFT_SET_EXT_NUM: number of extension types
+ */
+enum nft_set_extensions {
+	NFT_SET_EXT_KEY,
+	NFT_SET_EXT_DATA,
+	NFT_SET_EXT_FLAGS,
+	NFT_SET_EXT_NUM
+};
+
+/**
+ *	struct nft_set_ext_type - set extension type
+ *
+ * 	@len: fixed part length of the extension
+ * 	@align: alignment requirements of the extension
+ */
+struct nft_set_ext_type {
+	u8	len;
+	u8	align;
+};
+
+extern const struct nft_set_ext_type nft_set_ext_types[];
+
+/**
+ *	struct nft_set_ext_tmpl - set extension template
+ *
+ *	@len: length of extension area
+ *	@offset: offsets of individual extension types
+ */
+struct nft_set_ext_tmpl {
+	u16	len;
+	u8	offset[NFT_SET_EXT_NUM];
+};
+
+/**
+ *	struct nft_set_ext - set extensions
+ *
+ *	@offset: offsets of individual extension types
+ *	@data: beginning of extension data
+ */
+struct nft_set_ext {
+	u8	offset[NFT_SET_EXT_NUM];
+	char	data[0];
+};
+
+static inline void nft_set_ext_prepare(struct nft_set_ext_tmpl *tmpl)
+{
+	memset(tmpl, 0, sizeof(*tmpl));
+	tmpl->len = sizeof(struct nft_set_ext);
+}
+
+static inline void nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id,
+					  unsigned int len)
+{
+	tmpl->len	 = ALIGN(tmpl->len, nft_set_ext_types[id].align);
+	BUG_ON(tmpl->len > U8_MAX);
+	tmpl->offset[id] = tmpl->len;
+	tmpl->len	+= nft_set_ext_types[id].len + len;
+}
+
+static inline void nft_set_ext_add(struct nft_set_ext_tmpl *tmpl, u8 id)
+{
+	nft_set_ext_add_length(tmpl, id, 0);
+}
+
+static inline void nft_set_ext_init(struct nft_set_ext *ext,
+				    const struct nft_set_ext_tmpl *tmpl)
+{
+	memcpy(ext->offset, tmpl->offset, sizeof(ext->offset));
+}
+
+static inline bool __nft_set_ext_exists(const struct nft_set_ext *ext, u8 id)
+{
+	return !!ext->offset[id];
+}
+
+static inline bool nft_set_ext_exists(const struct nft_set_ext *ext, u8 id)
+{
+	return ext && __nft_set_ext_exists(ext, id);
+}
+
+static inline void *nft_set_ext(const struct nft_set_ext *ext, u8 id)
+{
+	return (void *)ext + ext->offset[id];
+}
+
+static inline struct nft_data *nft_set_ext_key(const struct nft_set_ext *ext)
+{
+	return nft_set_ext(ext, NFT_SET_EXT_KEY);
+}
+
+static inline struct nft_data *nft_set_ext_data(const struct nft_set_ext *ext)
+{
+	return nft_set_ext(ext, NFT_SET_EXT_DATA);
+}
+
+static inline u8 *nft_set_ext_flags(const struct nft_set_ext *ext)
+{
+	return nft_set_ext(ext, NFT_SET_EXT_FLAGS);
+}
 
 /**
  *	struct nft_expr_type - nf_tables expression type
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 0b969b66cb77..972c47f6e823 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2827,6 +2827,22 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
 		nf_tables_set_destroy(ctx, set);
 }
 
+const struct nft_set_ext_type nft_set_ext_types[] = {
+	[NFT_SET_EXT_KEY]		= {
+		.len	= sizeof(struct nft_data),
+		.align	= __alignof__(struct nft_data),
+	},
+	[NFT_SET_EXT_DATA]		= {
+		.len	= sizeof(struct nft_data),
+		.align	= __alignof__(struct nft_data),
+	},
+	[NFT_SET_EXT_FLAGS]		= {
+		.len	= sizeof(u8),
+		.align	= __alignof__(u8),
+	},
+};
+EXPORT_SYMBOL_GPL(nft_set_ext_types);
+
 /*
  * Set elements
  */
-- 
cgit v1.2.3


From fe2811ebeb97a7a76de0b2b35f13600169508393 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 25 Mar 2015 13:07:50 +0000
Subject: netfilter: nf_tables: convert hash and rbtree to set extensions

The set implementations' private struct will only contain the elements
needed to maintain the search structure, all other elements are moved
to the set extensions.

Element allocation and initialization is performed centrally by
nf_tables_api instead of by the different set implementations'
->insert() functions. A new "elemsize" member in the set ops specifies
the amount of memory to reserve for internal usage. Destruction
will also be moved out of the set implementations by a following patch.

Except for element allocation, the patch is a simple conversion to
using data from the extension area.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  14 +++--
 net/netfilter/nf_tables_api.c     | 119 ++++++++++++++++++++++++++++----------
 net/netfilter/nft_hash.c          |  56 +++++-------------
 net/netfilter/nft_rbtree.c        |  64 +++++++-------------
 4 files changed, 132 insertions(+), 121 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 038f8a67ca1f..ef3457c1cb62 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -140,8 +140,7 @@ struct nft_userdata {
  *
  *	@cookie: implementation specific element cookie
  *	@key: element key
- *	@data: element data (maps only)
- *	@flags: element flags (end of interval)
+ *	@priv: element private data and extensions
  *
  *	The cookie can be used to store a handle to the element for subsequent
  *	removal.
@@ -149,8 +148,7 @@ struct nft_userdata {
 struct nft_set_elem {
 	void			*cookie;
 	struct nft_data		key;
-	struct nft_data		data;
-	u32			flags;
+	void			*priv;
 };
 
 struct nft_set;
@@ -214,6 +212,7 @@ struct nft_set_estimate {
  *	@destroy: destroy private data of set instance
  *	@list: nf_tables_set_ops list node
  *	@owner: module reference
+ *	@elemsize: element private size
  *	@features: features supported by the implementation
  */
 struct nft_set_ops {
@@ -241,6 +240,7 @@ struct nft_set_ops {
 
 	struct list_head		list;
 	struct module			*owner;
+	unsigned int			elemsize;
 	u32				features;
 };
 
@@ -417,6 +417,12 @@ static inline u8 *nft_set_ext_flags(const struct nft_set_ext *ext)
 	return nft_set_ext(ext, NFT_SET_EXT_FLAGS);
 }
 
+static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set,
+						   void *elem)
+{
+	return elem + set->ops->elemsize;
+}
+
 /**
  *	struct nft_expr_type - nf_tables expression type
  *
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 972c47f6e823..99cb884b985f 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2771,10 +2771,11 @@ static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
 					const struct nft_set_iter *iter,
 					const struct nft_set_elem *elem)
 {
+	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
 	enum nft_registers dreg;
 
 	dreg = nft_type_to_reg(set->dtype);
-	return nft_validate_data_load(ctx, dreg, &elem->data,
+	return nft_validate_data_load(ctx, dreg, nft_set_ext_data(ext),
 				      set->dtype == NFT_DATA_VERDICT ?
 				      NFT_DATA_VERDICT : NFT_DATA_VALUE);
 }
@@ -2889,6 +2890,7 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
 				  const struct nft_set *set,
 				  const struct nft_set_elem *elem)
 {
+	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
 	unsigned char *b = skb_tail_pointer(skb);
 	struct nlattr *nest;
 
@@ -2896,20 +2898,20 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
 	if (nest == NULL)
 		goto nla_put_failure;
 
-	if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, &elem->key, NFT_DATA_VALUE,
-			  set->klen) < 0)
+	if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, nft_set_ext_key(ext),
+			  NFT_DATA_VALUE, set->klen) < 0)
 		goto nla_put_failure;
 
-	if (set->flags & NFT_SET_MAP &&
-	    !(elem->flags & NFT_SET_ELEM_INTERVAL_END) &&
-	    nft_data_dump(skb, NFTA_SET_ELEM_DATA, &elem->data,
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
+	    nft_data_dump(skb, NFTA_SET_ELEM_DATA, nft_set_ext_data(ext),
 			  set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE,
 			  set->dlen) < 0)
 		goto nla_put_failure;
 
-	if (elem->flags != 0)
-		if (nla_put_be32(skb, NFTA_SET_ELEM_FLAGS, htonl(elem->flags)))
-			goto nla_put_failure;
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
+	    nla_put_be32(skb, NFTA_SET_ELEM_FLAGS,
+		         htonl(*nft_set_ext_flags(ext))))
+		goto nla_put_failure;
 
 	nla_nest_end(skb, nest);
 	return 0;
@@ -3130,15 +3132,42 @@ static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx,
 	return trans;
 }
 
+static void *nft_set_elem_init(const struct nft_set *set,
+			       const struct nft_set_ext_tmpl *tmpl,
+			       const struct nft_data *key,
+			       const struct nft_data *data,
+			       gfp_t gfp)
+{
+	struct nft_set_ext *ext;
+	void *elem;
+
+	elem = kzalloc(set->ops->elemsize + tmpl->len, gfp);
+	if (elem == NULL)
+		return NULL;
+
+	ext = nft_set_elem_ext(set, elem);
+	nft_set_ext_init(ext, tmpl);
+
+	memcpy(nft_set_ext_key(ext), key, set->klen);
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
+		memcpy(nft_set_ext_data(ext), data, set->dlen);
+
+	return elem;
+}
+
 static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 			    const struct nlattr *attr)
 {
 	struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
 	struct nft_data_desc d1, d2;
+	struct nft_set_ext_tmpl tmpl;
+	struct nft_set_ext *ext;
 	struct nft_set_elem elem;
 	struct nft_set_binding *binding;
+	struct nft_data data;
 	enum nft_registers dreg;
 	struct nft_trans *trans;
+	u32 flags;
 	int err;
 
 	if (set->size && set->nelems == set->size)
@@ -3152,22 +3181,26 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 	if (nla[NFTA_SET_ELEM_KEY] == NULL)
 		return -EINVAL;
 
-	elem.flags = 0;
+	nft_set_ext_prepare(&tmpl);
+
+	flags = 0;
 	if (nla[NFTA_SET_ELEM_FLAGS] != NULL) {
-		elem.flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS]));
-		if (elem.flags & ~NFT_SET_ELEM_INTERVAL_END)
+		flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS]));
+		if (flags & ~NFT_SET_ELEM_INTERVAL_END)
 			return -EINVAL;
 		if (!(set->flags & NFT_SET_INTERVAL) &&
-		    elem.flags & NFT_SET_ELEM_INTERVAL_END)
+		    flags & NFT_SET_ELEM_INTERVAL_END)
 			return -EINVAL;
+		if (flags != 0)
+			nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
 	}
 
 	if (set->flags & NFT_SET_MAP) {
 		if (nla[NFTA_SET_ELEM_DATA] == NULL &&
-		    !(elem.flags & NFT_SET_ELEM_INTERVAL_END))
+		    !(flags & NFT_SET_ELEM_INTERVAL_END))
 			return -EINVAL;
 		if (nla[NFTA_SET_ELEM_DATA] != NULL &&
-		    elem.flags & NFT_SET_ELEM_INTERVAL_END)
+		    flags & NFT_SET_ELEM_INTERVAL_END)
 			return -EINVAL;
 	} else {
 		if (nla[NFTA_SET_ELEM_DATA] != NULL)
@@ -3185,8 +3218,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 	if (set->ops->get(set, &elem) == 0)
 		goto err2;
 
+	nft_set_ext_add(&tmpl, NFT_SET_EXT_KEY);
+
 	if (nla[NFTA_SET_ELEM_DATA] != NULL) {
-		err = nft_data_init(ctx, &elem.data, &d2, nla[NFTA_SET_ELEM_DATA]);
+		err = nft_data_init(ctx, &data, &d2, nla[NFTA_SET_ELEM_DATA]);
 		if (err < 0)
 			goto err2;
 
@@ -3203,29 +3238,42 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 			};
 
 			err = nft_validate_data_load(&bind_ctx, dreg,
-						     &elem.data, d2.type);
+						     &data, d2.type);
 			if (err < 0)
 				goto err3;
 		}
+
+		nft_set_ext_add(&tmpl, NFT_SET_EXT_DATA);
 	}
 
+	err = -ENOMEM;
+	elem.priv = nft_set_elem_init(set, &tmpl, &elem.key, &data, GFP_KERNEL);
+	if (elem.priv == NULL)
+		goto err3;
+
+	ext = nft_set_elem_ext(set, elem.priv);
+	if (flags)
+		*nft_set_ext_flags(ext) = flags;
+
 	trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set);
 	if (trans == NULL)
-		goto err3;
+		goto err4;
 
 	err = set->ops->insert(set, &elem);
 	if (err < 0)
-		goto err4;
+		goto err5;
 
 	nft_trans_elem(trans) = elem;
 	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
 	return 0;
 
-err4:
+err5:
 	kfree(trans);
+err4:
+	kfree(elem.priv);
 err3:
 	if (nla[NFTA_SET_ELEM_DATA] != NULL)
-		nft_data_uninit(&elem.data, d2.type);
+		nft_data_uninit(&data, d2.type);
 err2:
 	nft_data_uninit(&elem.key, d1.type);
 err1:
@@ -3557,6 +3605,7 @@ static int nf_tables_commit(struct sk_buff *skb)
 	struct net *net = sock_net(skb->sk);
 	struct nft_trans *trans, *next;
 	struct nft_trans_elem *te;
+	struct nft_set_ext *ext;
 
 	/* Bump generation counter, invalidate any dump in progress */
 	while (++net->nft.base_seq == 0);
@@ -3641,14 +3690,16 @@ static int nf_tables_commit(struct sk_buff *skb)
 			break;
 		case NFT_MSG_DELSETELEM:
 			te = (struct nft_trans_elem *)trans->data;
+			ext = nft_set_elem_ext(te->set, te->elem.priv);
+
 			nf_tables_setelem_notify(&trans->ctx, te->set,
 						 &te->elem,
 						 NFT_MSG_DELSETELEM, 0);
 			te->set->ops->get(te->set, &te->elem);
 			nft_data_uninit(&te->elem.key, NFT_DATA_VALUE);
-			if (te->set->flags & NFT_SET_MAP &&
-			    !(te->elem.flags & NFT_SET_ELEM_INTERVAL_END))
-				nft_data_uninit(&te->elem.data, te->set->dtype);
+			if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
+				nft_data_uninit(nft_set_ext_data(ext),
+						te->set->dtype);
 			te->set->ops->remove(te->set, &te->elem);
 			nft_trans_destroy(trans);
 			break;
@@ -3691,6 +3742,7 @@ static int nf_tables_abort(struct sk_buff *skb)
 	struct net *net = sock_net(skb->sk);
 	struct nft_trans *trans, *next;
 	struct nft_trans_elem *te;
+	struct nft_set_ext *ext;
 
 	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
 		switch (trans->msg_type) {
@@ -3752,11 +3804,13 @@ static int nf_tables_abort(struct sk_buff *skb)
 		case NFT_MSG_NEWSETELEM:
 			nft_trans_elem_set(trans)->nelems--;
 			te = (struct nft_trans_elem *)trans->data;
+			ext = nft_set_elem_ext(te->set, te->elem.priv);
+
 			te->set->ops->get(te->set, &te->elem);
 			nft_data_uninit(&te->elem.key, NFT_DATA_VALUE);
-			if (te->set->flags & NFT_SET_MAP &&
-			    !(te->elem.flags & NFT_SET_ELEM_INTERVAL_END))
-				nft_data_uninit(&te->elem.data, te->set->dtype);
+			if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
+				nft_data_uninit(nft_set_ext_data(ext),
+						te->set->dtype);
 			te->set->ops->remove(te->set, &te->elem);
 			nft_trans_destroy(trans);
 			break;
@@ -3836,13 +3890,18 @@ static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx,
 					const struct nft_set_iter *iter,
 					const struct nft_set_elem *elem)
 {
-	if (elem->flags & NFT_SET_ELEM_INTERVAL_END)
+	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+	const struct nft_data *data;
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
+	    *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
 		return 0;
 
-	switch (elem->data.verdict) {
+	data = nft_set_ext_data(ext);
+	switch (data->verdict) {
 	case NFT_JUMP:
 	case NFT_GOTO:
-		return nf_tables_check_loops(ctx, elem->data.chain);
+		return nf_tables_check_loops(ctx, data->chain);
 	default:
 		return 0;
 	}
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index dc96a7e94f80..15951a823d1d 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -29,8 +29,7 @@ struct nft_hash {
 
 struct nft_hash_elem {
 	struct rhash_head		node;
-	struct nft_data			key;
-	struct nft_data			data[];
+	struct nft_set_ext		ext;
 };
 
 struct nft_hash_cmp_arg {
@@ -51,7 +50,7 @@ static inline u32 nft_hash_obj(const void *data, u32 len, u32 seed)
 {
 	const struct nft_hash_elem *he = data;
 
-	return jhash(&he->key, len, seed);
+	return jhash(nft_set_ext_key(&he->ext), len, seed);
 }
 
 static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg,
@@ -60,7 +59,7 @@ static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg,
 	const struct nft_hash_cmp_arg *x = arg->key;
 	const struct nft_hash_elem *he = ptr;
 
-	if (nft_data_cmp(&he->key, x->key, x->set->klen))
+	if (nft_data_cmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
 		return 1;
 	return 0;
 }
@@ -78,7 +77,7 @@ static bool nft_hash_lookup(const struct nft_set *set,
 
 	he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
 	if (he && set->flags & NFT_SET_MAP)
-		nft_data_copy(data, he->data);
+		nft_data_copy(data, nft_set_ext_data(&he->ext));
 
 	return !!he;
 }
@@ -87,43 +86,22 @@ static int nft_hash_insert(const struct nft_set *set,
 			   const struct nft_set_elem *elem)
 {
 	struct nft_hash *priv = nft_set_priv(set);
-	struct nft_hash_elem *he;
+	struct nft_hash_elem *he = elem->priv;
 	struct nft_hash_cmp_arg arg = {
 		.set	 = set,
 		.key	 = &elem->key,
 	};
-	unsigned int size;
-	int err;
-
-	if (elem->flags != 0)
-		return -EINVAL;
-
-	size = sizeof(*he);
-	if (set->flags & NFT_SET_MAP)
-		size += sizeof(he->data[0]);
-
-	he = kzalloc(size, GFP_KERNEL);
-	if (he == NULL)
-		return -ENOMEM;
-
-	nft_data_copy(&he->key, &elem->key);
-	if (set->flags & NFT_SET_MAP)
-		nft_data_copy(he->data, &elem->data);
-
-	err = rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
-					   nft_hash_params);
-	if (err)
-		kfree(he);
 
-	return err;
+	return rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
+					    nft_hash_params);
 }
 
 static void nft_hash_elem_destroy(const struct nft_set *set,
 				  struct nft_hash_elem *he)
 {
-	nft_data_uninit(&he->key, NFT_DATA_VALUE);
+	nft_data_uninit(nft_set_ext_key(&he->ext), NFT_DATA_VALUE);
 	if (set->flags & NFT_SET_MAP)
-		nft_data_uninit(he->data, set->dtype);
+		nft_data_uninit(nft_set_ext_data(&he->ext), set->dtype);
 	kfree(he);
 }
 
@@ -150,10 +128,7 @@ static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
 	if (!he)
 		return -ENOENT;
 
-	elem->cookie = he;
-	elem->flags = 0;
-	if (set->flags & NFT_SET_MAP)
-		nft_data_copy(&elem->data, he->data);
+	elem->priv = he;
 
 	return 0;
 }
@@ -162,7 +137,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
 			  struct nft_set_iter *iter)
 {
 	struct nft_hash *priv = nft_set_priv(set);
-	const struct nft_hash_elem *he;
+	struct nft_hash_elem *he;
 	struct rhashtable_iter hti;
 	struct nft_set_elem elem;
 	int err;
@@ -192,10 +167,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
 		if (iter->count < iter->skip)
 			goto cont;
 
-		memcpy(&elem.key, &he->key, sizeof(elem.key));
-		if (set->flags & NFT_SET_MAP)
-			memcpy(&elem.data, he->data, sizeof(elem.data));
-		elem.flags = 0;
+		elem.priv = he;
 
 		iter->err = iter->fn(ctx, set, iter, &elem);
 		if (iter->err < 0)
@@ -254,9 +226,6 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
 	unsigned int esize;
 
 	esize = sizeof(struct nft_hash_elem);
-	if (features & NFT_SET_MAP)
-		esize += FIELD_SIZEOF(struct nft_hash_elem, data[0]);
-
 	if (desc->size) {
 		est->size = sizeof(struct nft_hash) +
 			    roundup_pow_of_two(desc->size * 4 / 3) *
@@ -278,6 +247,7 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
 
 static struct nft_set_ops nft_hash_ops __read_mostly = {
 	.privsize       = nft_hash_privsize,
+	.elemsize	= offsetof(struct nft_hash_elem, ext),
 	.estimate	= nft_hash_estimate,
 	.init		= nft_hash_init,
 	.destroy	= nft_hash_destroy,
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index 2c75361077f7..ebf6e60df41c 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -26,9 +26,7 @@ struct nft_rbtree {
 
 struct nft_rbtree_elem {
 	struct rb_node		node;
-	u16			flags;
-	struct nft_data		key;
-	struct nft_data		data[];
+	struct nft_set_ext	ext;
 };
 
 static bool nft_rbtree_lookup(const struct nft_set *set,
@@ -45,7 +43,7 @@ static bool nft_rbtree_lookup(const struct nft_set *set,
 	while (parent != NULL) {
 		rbe = rb_entry(parent, struct nft_rbtree_elem, node);
 
-		d = nft_data_cmp(&rbe->key, key, set->klen);
+		d = nft_data_cmp(nft_set_ext_key(&rbe->ext), key, set->klen);
 		if (d < 0) {
 			parent = parent->rb_left;
 			interval = rbe;
@@ -53,10 +51,12 @@ static bool nft_rbtree_lookup(const struct nft_set *set,
 			parent = parent->rb_right;
 		else {
 found:
-			if (rbe->flags & NFT_SET_ELEM_INTERVAL_END)
+			if (nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) &&
+			    *nft_set_ext_flags(&rbe->ext) &
+			    NFT_SET_ELEM_INTERVAL_END)
 				goto out;
 			if (set->flags & NFT_SET_MAP)
-				nft_data_copy(data, rbe->data);
+				nft_data_copy(data, nft_set_ext_data(&rbe->ext));
 
 			spin_unlock_bh(&nft_rbtree_lock);
 			return true;
@@ -75,10 +75,10 @@ out:
 static void nft_rbtree_elem_destroy(const struct nft_set *set,
 				    struct nft_rbtree_elem *rbe)
 {
-	nft_data_uninit(&rbe->key, NFT_DATA_VALUE);
+	nft_data_uninit(nft_set_ext_key(&rbe->ext), NFT_DATA_VALUE);
 	if (set->flags & NFT_SET_MAP &&
-	    !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
-		nft_data_uninit(rbe->data, set->dtype);
+	    nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_DATA))
+		nft_data_uninit(nft_set_ext_data(&rbe->ext), set->dtype);
 
 	kfree(rbe);
 }
@@ -96,7 +96,9 @@ static int __nft_rbtree_insert(const struct nft_set *set,
 	while (*p != NULL) {
 		parent = *p;
 		rbe = rb_entry(parent, struct nft_rbtree_elem, node);
-		d = nft_data_cmp(&rbe->key, &new->key, set->klen);
+		d = nft_data_cmp(nft_set_ext_key(&rbe->ext),
+				 nft_set_ext_key(&new->ext),
+				 set->klen);
 		if (d < 0)
 			p = &parent->rb_left;
 		else if (d > 0)
@@ -112,31 +114,13 @@ static int __nft_rbtree_insert(const struct nft_set *set,
 static int nft_rbtree_insert(const struct nft_set *set,
 			     const struct nft_set_elem *elem)
 {
-	struct nft_rbtree_elem *rbe;
-	unsigned int size;
+	struct nft_rbtree_elem *rbe = elem->priv;
 	int err;
 
-	size = sizeof(*rbe);
-	if (set->flags & NFT_SET_MAP &&
-	    !(elem->flags & NFT_SET_ELEM_INTERVAL_END))
-		size += sizeof(rbe->data[0]);
-
-	rbe = kzalloc(size, GFP_KERNEL);
-	if (rbe == NULL)
-		return -ENOMEM;
-
-	rbe->flags = elem->flags;
-	nft_data_copy(&rbe->key, &elem->key);
-	if (set->flags & NFT_SET_MAP &&
-	    !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
-		nft_data_copy(rbe->data, &elem->data);
-
 	spin_lock_bh(&nft_rbtree_lock);
 	err = __nft_rbtree_insert(set, rbe);
-	if (err < 0)
-		kfree(rbe);
-
 	spin_unlock_bh(&nft_rbtree_lock);
+
 	return err;
 }
 
@@ -162,17 +146,15 @@ static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem)
 	while (parent != NULL) {
 		rbe = rb_entry(parent, struct nft_rbtree_elem, node);
 
-		d = nft_data_cmp(&rbe->key, &elem->key, set->klen);
+		d = nft_data_cmp(nft_set_ext_key(&rbe->ext), &elem->key,
+				 set->klen);
 		if (d < 0)
 			parent = parent->rb_left;
 		else if (d > 0)
 			parent = parent->rb_right;
 		else {
 			elem->cookie = rbe;
-			if (set->flags & NFT_SET_MAP &&
-			    !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
-				nft_data_copy(&elem->data, rbe->data);
-			elem->flags = rbe->flags;
+			elem->priv   = rbe;
 			return 0;
 		}
 	}
@@ -184,7 +166,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
 			    struct nft_set_iter *iter)
 {
 	const struct nft_rbtree *priv = nft_set_priv(set);
-	const struct nft_rbtree_elem *rbe;
+	struct nft_rbtree_elem *rbe;
 	struct nft_set_elem elem;
 	struct rb_node *node;
 
@@ -194,11 +176,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
 			goto cont;
 
 		rbe = rb_entry(node, struct nft_rbtree_elem, node);
-		nft_data_copy(&elem.key, &rbe->key);
-		if (set->flags & NFT_SET_MAP &&
-		    !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
-			nft_data_copy(&elem.data, rbe->data);
-		elem.flags = rbe->flags;
+		elem.priv = rbe;
 
 		iter->err = iter->fn(ctx, set, iter, &elem);
 		if (iter->err < 0) {
@@ -245,9 +223,6 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
 	unsigned int nsize;
 
 	nsize = sizeof(struct nft_rbtree_elem);
-	if (features & NFT_SET_MAP)
-		nsize += FIELD_SIZEOF(struct nft_rbtree_elem, data[0]);
-
 	if (desc->size)
 		est->size = sizeof(struct nft_rbtree) + desc->size * nsize;
 	else
@@ -260,6 +235,7 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
 
 static struct nft_set_ops nft_rbtree_ops __read_mostly = {
 	.privsize	= nft_rbtree_privsize,
+	.elemsize	= offsetof(struct nft_rbtree_elem, ext),
 	.estimate	= nft_rbtree_estimate,
 	.init		= nft_rbtree_init,
 	.destroy	= nft_rbtree_destroy,
-- 
cgit v1.2.3


From b6a7719aedd7e5c0f2df7641aa47386111682df4 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Wed, 25 Mar 2015 17:07:44 +0100
Subject: ipv4: hash net ptr into fragmentation bucket selection

As namespaces are sometimes used with overlapping ip address ranges,
we should also use the namespace as input to the hash to select the ip
fragmentation counter bucket.

Cc: Eric Dumazet <edumazet@google.com>
Cc: Flavio Leitner <fbl@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp/pptp.c          |  2 +-
 include/net/ip.h                | 12 +++++++-----
 net/ipv4/igmp.c                 |  4 ++--
 net/ipv4/ip_output.c            |  7 ++++---
 net/ipv4/ip_tunnel_core.c       |  2 +-
 net/ipv4/ipmr.c                 |  7 ++++---
 net/ipv4/raw.c                  |  2 +-
 net/ipv4/route.c                |  4 ++--
 net/ipv4/xfrm4_mode_tunnel.c    |  2 +-
 net/netfilter/ipvs/ip_vs_xmit.c |  5 +++--
 10 files changed, 26 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index 1dc628ffce2b..e3bfbd4d0136 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c
@@ -281,7 +281,7 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 	nf_reset(skb);
 
 	skb->ip_summed = CHECKSUM_NONE;
-	ip_select_ident(skb, NULL);
+	ip_select_ident(sock_net(sk), skb, NULL);
 	ip_send_check(iph);
 
 	ip_local_out(skb);
diff --git a/include/net/ip.h b/include/net/ip.h
index 025c61c0dffb..d0808a323763 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -318,9 +318,10 @@ static inline unsigned int ip_skb_dst_mtu(const struct sk_buff *skb)
 }
 
 u32 ip_idents_reserve(u32 hash, int segs);
-void __ip_select_ident(struct iphdr *iph, int segs);
+void __ip_select_ident(struct net *net, struct iphdr *iph, int segs);
 
-static inline void ip_select_ident_segs(struct sk_buff *skb, struct sock *sk, int segs)
+static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb,
+					struct sock *sk, int segs)
 {
 	struct iphdr *iph = ip_hdr(skb);
 
@@ -337,13 +338,14 @@ static inline void ip_select_ident_segs(struct sk_buff *skb, struct sock *sk, in
 			iph->id = 0;
 		}
 	} else {
-		__ip_select_ident(iph, segs);
+		__ip_select_ident(net, iph, segs);
 	}
 }
 
-static inline void ip_select_ident(struct sk_buff *skb, struct sock *sk)
+static inline void ip_select_ident(struct net *net, struct sk_buff *skb,
+				   struct sock *sk)
 {
-	ip_select_ident_segs(skb, sk, 1);
+	ip_select_ident_segs(net, skb, sk, 1);
 }
 
 static inline __wsum inet_compute_pseudo(struct sk_buff *skb, int proto)
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index ad3f866085de..ad09213ac5b2 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -370,7 +370,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
 	pip->saddr    = fl4.saddr;
 	pip->protocol = IPPROTO_IGMP;
 	pip->tot_len  = 0;	/* filled in later */
-	ip_select_ident(skb, NULL);
+	ip_select_ident(net, skb, NULL);
 	((u8 *)&pip[1])[0] = IPOPT_RA;
 	((u8 *)&pip[1])[1] = 4;
 	((u8 *)&pip[1])[2] = 0;
@@ -714,7 +714,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
 	iph->daddr    = dst;
 	iph->saddr    = fl4.saddr;
 	iph->protocol = IPPROTO_IGMP;
-	ip_select_ident(skb, NULL);
+	ip_select_ident(net, skb, NULL);
 	((u8 *)&iph[1])[0] = IPOPT_RA;
 	((u8 *)&iph[1])[1] = 4;
 	((u8 *)&iph[1])[2] = 0;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 90b49e88e84a..8259e777b249 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -148,7 +148,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
 	iph->daddr    = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
 	iph->saddr    = saddr;
 	iph->protocol = sk->sk_protocol;
-	ip_select_ident(skb, sk);
+	ip_select_ident(sock_net(sk), skb, sk);
 
 	if (opt && opt->opt.optlen) {
 		iph->ihl += opt->opt.optlen>>2;
@@ -430,7 +430,8 @@ packet_routed:
 		ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
 	}
 
-	ip_select_ident_segs(skb, sk, skb_shinfo(skb)->gso_segs ?: 1);
+	ip_select_ident_segs(sock_net(sk), skb, sk,
+			     skb_shinfo(skb)->gso_segs ?: 1);
 
 	/* TODO : should we use skb->sk here instead of sk ? */
 	skb->priority = sk->sk_priority;
@@ -1379,7 +1380,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
 	iph->ttl = ttl;
 	iph->protocol = sk->sk_protocol;
 	ip_copy_addrs(iph, fl4);
-	ip_select_ident(skb, sk);
+	ip_select_ident(net, skb, sk);
 
 	if (opt) {
 		iph->ihl += opt->optlen>>2;
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 88c386cf7d85..8c4dcc46acd2 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -74,7 +74,7 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
 	iph->daddr	=	dst;
 	iph->saddr	=	src;
 	iph->ttl	=	ttl;
-	__ip_select_ident(iph, skb_shinfo(skb)->gso_segs ?: 1);
+	__ip_select_ident(sock_net(sk), iph, skb_shinfo(skb)->gso_segs ?: 1);
 
 	err = ip_local_out_sk(sk, skb);
 	if (unlikely(net_xmit_eval(err)))
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 5b188832800f..c688cd1b2110 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1642,7 +1642,8 @@ static struct notifier_block ip_mr_notifier = {
  *	important for multicast video.
  */
 
-static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
+static void ip_encap(struct net *net, struct sk_buff *skb,
+		     __be32 saddr, __be32 daddr)
 {
 	struct iphdr *iph;
 	const struct iphdr *old_iph = ip_hdr(skb);
@@ -1661,7 +1662,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
 	iph->protocol	=	IPPROTO_IPIP;
 	iph->ihl	=	5;
 	iph->tot_len	=	htons(skb->len);
-	ip_select_ident(skb, NULL);
+	ip_select_ident(net, skb, NULL);
 	ip_send_check(iph);
 
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
@@ -1758,7 +1759,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
 	 * What do we do with netfilter? -- RR
 	 */
 	if (vif->flags & VIFF_TUNNEL) {
-		ip_encap(skb, vif->local, vif->remote);
+		ip_encap(net, skb, vif->local, vif->remote);
 		/* FIXME: extra output firewall step used to be here. --RR */
 		vif->dev->stats.tx_packets++;
 		vif->dev->stats.tx_bytes += skb->len;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 923cf538fce1..56946f47d446 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -404,7 +404,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
 		iph->check   = 0;
 		iph->tot_len = htons(length);
 		if (!iph->id)
-			ip_select_ident(skb, NULL);
+			ip_select_ident(net, skb, NULL);
 
 		iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 	}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 649c8a3f0189..be8703d02ef0 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -482,7 +482,7 @@ u32 ip_idents_reserve(u32 hash, int segs)
 }
 EXPORT_SYMBOL(ip_idents_reserve);
 
-void __ip_select_ident(struct iphdr *iph, int segs)
+void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
 {
 	static u32 ip_idents_hashrnd __read_mostly;
 	u32 hash, id;
@@ -491,7 +491,7 @@ void __ip_select_ident(struct iphdr *iph, int segs)
 
 	hash = jhash_3words((__force u32)iph->daddr,
 			    (__force u32)iph->saddr,
-			    iph->protocol,
+			    iph->protocol ^ net_hash_mix(net),
 			    ip_idents_hashrnd);
 	id = ip_idents_reserve(hash, segs);
 	iph->id = htons(id);
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 91771a7c802f..35feda676464 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -63,7 +63,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	top_iph->saddr = x->props.saddr.a4;
 	top_iph->daddr = x->id.daddr.a4;
-	ip_select_ident(skb, NULL);
+	ip_select_ident(dev_net(dst->dev), skb, NULL);
 
 	return 0;
 }
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index f35c15b0de6b..bf02932b7188 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -924,7 +924,8 @@ int
 ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		  struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
 {
-	struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+	struct net *net = skb_net(skb);
+	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct rtable *rt;			/* Route to the other host */
 	__be32 saddr;				/* Source for tunnel */
 	struct net_device *tdev;		/* Device to other host */
@@ -991,7 +992,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	iph->daddr		=	cp->daddr.ip;
 	iph->saddr		=	saddr;
 	iph->ttl		=	ttl;
-	ip_select_ident(skb, NULL);
+	ip_select_ident(net, skb, NULL);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->ignore_df = 1;
-- 
cgit v1.2.3


From 5a352dd0a3aac03b443c94828dfd7144261c8636 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Wed, 25 Mar 2015 17:07:45 +0100
Subject: ipv6: hash net ptr into fragmentation bucket selection

As namespaces are sometimes used with overlapping ip address ranges,
we should also use the namespace as input to the hash to select the ip
fragmentation counter bucket.

Cc: Eric Dumazet <edumazet@google.com>
Cc: Flavio Leitner <fbl@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h     |  5 +++--
 net/ipv6/ip6_output.c  |  6 +++---
 net/ipv6/output_core.c | 14 ++++++++------
 net/ipv6/udp_offload.c |  4 ++--
 4 files changed, 16 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index e7ba9758a345..65142e6af440 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -671,8 +671,9 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add
 	return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr));
 }
 
-void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt);
-void ipv6_proxy_select_ident(struct sk_buff *skb);
+void ipv6_select_ident(struct net *net, struct frag_hdr *fhdr,
+		       struct rt6_info *rt);
+void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb);
 
 int ip6_dst_hoplimit(struct dst_entry *dst);
 
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 7e80b61b51ff..b06ad00048d5 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -628,7 +628,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 		skb_reset_network_header(skb);
 		memcpy(skb_network_header(skb), tmp_hdr, hlen);
 
-		ipv6_select_ident(fh, rt);
+		ipv6_select_ident(net, fh, rt);
 		fh->nexthdr = nexthdr;
 		fh->reserved = 0;
 		fh->frag_off = htons(IP6_MF);
@@ -775,7 +775,7 @@ slow_path:
 		fh->nexthdr = nexthdr;
 		fh->reserved = 0;
 		if (!frag_id) {
-			ipv6_select_ident(fh, rt);
+			ipv6_select_ident(net, fh, rt);
 			frag_id = fh->identification;
 		} else
 			fh->identification = frag_id;
@@ -1079,7 +1079,7 @@ static inline int ip6_ufo_append_data(struct sock *sk,
 	skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
 				     sizeof(struct frag_hdr)) & ~7;
 	skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
-	ipv6_select_ident(&fhdr, rt);
+	ipv6_select_ident(sock_net(sk), &fhdr, rt);
 	skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
 
 append:
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 74581f706c4d..4016a6ef9d61 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -9,13 +9,14 @@
 #include <net/addrconf.h>
 #include <net/secure_seq.h>
 
-static u32 __ipv6_select_ident(u32 hashrnd, struct in6_addr *dst,
-			       struct in6_addr *src)
+static u32 __ipv6_select_ident(struct net *net, u32 hashrnd,
+			       struct in6_addr *dst, struct in6_addr *src)
 {
 	u32 hash, id;
 
 	hash = __ipv6_addr_jhash(dst, hashrnd);
 	hash = __ipv6_addr_jhash(src, hash);
+	hash ^= net_hash_mix(net);
 
 	/* Treat id of 0 as unset and if we get 0 back from ip_idents_reserve,
 	 * set the hight order instead thus minimizing possible future
@@ -36,7 +37,7 @@ static u32 __ipv6_select_ident(u32 hashrnd, struct in6_addr *dst,
  *
  * The network header must be set before calling this.
  */
-void ipv6_proxy_select_ident(struct sk_buff *skb)
+void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb)
 {
 	static u32 ip6_proxy_idents_hashrnd __read_mostly;
 	struct in6_addr buf[2];
@@ -53,20 +54,21 @@ void ipv6_proxy_select_ident(struct sk_buff *skb)
 	net_get_random_once(&ip6_proxy_idents_hashrnd,
 			    sizeof(ip6_proxy_idents_hashrnd));
 
-	id = __ipv6_select_ident(ip6_proxy_idents_hashrnd,
+	id = __ipv6_select_ident(net, ip6_proxy_idents_hashrnd,
 				 &addrs[1], &addrs[0]);
 	skb_shinfo(skb)->ip6_frag_id = htonl(id);
 }
 EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident);
 
-void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
+void ipv6_select_ident(struct net *net, struct frag_hdr *fhdr,
+		       struct rt6_info *rt)
 {
 	static u32 ip6_idents_hashrnd __read_mostly;
 	u32 id;
 
 	net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
 
-	id = __ipv6_select_ident(ip6_idents_hashrnd, &rt->rt6i_dst.addr,
+	id = __ipv6_select_ident(net, ip6_idents_hashrnd, &rt->rt6i_dst.addr,
 				 &rt->rt6i_src.addr);
 	fhdr->identification = htonl(id);
 }
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index be2c0ba82c85..7441e1e63893 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -54,7 +54,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 
 		/* Set the IPv6 fragment id if not set yet */
 		if (!skb_shinfo(skb)->ip6_frag_id)
-			ipv6_proxy_select_ident(skb);
+			ipv6_proxy_select_ident(dev_net(skb->dev), skb);
 
 		segs = NULL;
 		goto out;
@@ -113,7 +113,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 		fptr->nexthdr = nexthdr;
 		fptr->reserved = 0;
 		if (!skb_shinfo(skb)->ip6_frag_id)
-			ipv6_proxy_select_ident(skb);
+			ipv6_proxy_select_ident(dev_net(skb->dev), skb);
 		fptr->identification = skb_shinfo(skb)->ip6_frag_id;
 
 		/* Fragment the skb. ipv6 header and the remaining fields of the
-- 
cgit v1.2.3


From 074c238177a75f5e79af3b2cb6a84e54823ef950 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Wed, 25 Mar 2015 15:55:42 -0700
Subject: mm: numa: slow PTE scan rate if migration failures occur

Dave Chinner reported the following on https://lkml.org/lkml/2015/3/1/226

  Across the board the 4.0-rc1 numbers are much slower, and the degradation
  is far worse when using the large memory footprint configs. Perf points
  straight at the cause - this is from 4.0-rc1 on the "-o bhash=101073" config:

   -   56.07%    56.07%  [kernel]            [k] default_send_IPI_mask_sequence_phys
      - default_send_IPI_mask_sequence_phys
         - 99.99% physflat_send_IPI_mask
            - 99.37% native_send_call_func_ipi
                 smp_call_function_many
               - native_flush_tlb_others
                  - 99.85% flush_tlb_page
                       ptep_clear_flush
                       try_to_unmap_one
                       rmap_walk
                       try_to_unmap
                       migrate_pages
                       migrate_misplaced_page
                     - handle_mm_fault
                        - 99.73% __do_page_fault
                             trace_do_page_fault
                             do_async_page_fault
                           + async_page_fault
              0.63% native_send_call_func_single_ipi
                 generic_exec_single
                 smp_call_function_single

This is showing excessive migration activity even though excessive
migrations are meant to get throttled.  Normally, the scan rate is tuned
on a per-task basis depending on the locality of faults.  However, if
migrations fail for any reason then the PTE scanner may scan faster if
the faults continue to be remote.  This means there is higher system CPU
overhead and fault trapping at exactly the time we know that migrations
cannot happen.  This patch tracks when migration failures occur and
slows the PTE scanner.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reported-by: Dave Chinner <david@fromorbit.com>
Tested-by: Dave Chinner <david@fromorbit.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 9 +++++----
 kernel/sched/fair.c   | 8 ++++++--
 mm/huge_memory.c      | 3 ++-
 mm/memory.c           | 3 ++-
 4 files changed, 15 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6d77432e14ff..a419b65770d6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1625,11 +1625,11 @@ struct task_struct {
 
 	/*
 	 * numa_faults_locality tracks if faults recorded during the last
-	 * scan window were remote/local. The task scan period is adapted
-	 * based on the locality of the faults with different weights
-	 * depending on whether they were shared or private faults
+	 * scan window were remote/local or failed to migrate. The task scan
+	 * period is adapted based on the locality of the faults with different
+	 * weights depending on whether they were shared or private faults
 	 */
-	unsigned long numa_faults_locality[2];
+	unsigned long numa_faults_locality[3];
 
 	unsigned long numa_pages_migrated;
 #endif /* CONFIG_NUMA_BALANCING */
@@ -1719,6 +1719,7 @@ struct task_struct {
 #define TNF_NO_GROUP	0x02
 #define TNF_SHARED	0x04
 #define TNF_FAULT_LOCAL	0x08
+#define TNF_MIGRATE_FAIL 0x10
 
 #ifdef CONFIG_NUMA_BALANCING
 extern void task_numa_fault(int last_node, int node, int pages, int flags);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7ce18f3c097a..bcfe32088b37 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1609,9 +1609,11 @@ static void update_task_scan_period(struct task_struct *p,
 	/*
 	 * If there were no record hinting faults then either the task is
 	 * completely idle or all activity is areas that are not of interest
-	 * to automatic numa balancing. Scan slower
+	 * to automatic numa balancing. Related to that, if there were failed
+	 * migration then it implies we are migrating too quickly or the local
+	 * node is overloaded. In either case, scan slower
 	 */
-	if (local + shared == 0) {
+	if (local + shared == 0 || p->numa_faults_locality[2]) {
 		p->numa_scan_period = min(p->numa_scan_period_max,
 			p->numa_scan_period << 1);
 
@@ -2080,6 +2082,8 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
 
 	if (migrated)
 		p->numa_pages_migrated += pages;
+	if (flags & TNF_MIGRATE_FAIL)
+		p->numa_faults_locality[2] += pages;
 
 	p->numa_faults[task_faults_idx(NUMA_MEMBUF, mem_node, priv)] += pages;
 	p->numa_faults[task_faults_idx(NUMA_CPUBUF, cpu_node, priv)] += pages;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 0a42d1521aa4..51b3e7c64622 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1350,7 +1350,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (migrated) {
 		flags |= TNF_MIGRATED;
 		page_nid = target_nid;
-	}
+	} else
+		flags |= TNF_MIGRATE_FAIL;
 
 	goto out;
 clear_pmdnuma:
diff --git a/mm/memory.c b/mm/memory.c
index d20e12da3a3c..97839f5c8c30 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3103,7 +3103,8 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (migrated) {
 		page_nid = target_nid;
 		flags |= TNF_MIGRATED;
-	}
+	} else
+		flags |= TNF_MIGRATE_FAIL;
 
 out:
 	if (page_nid != -1)
-- 
cgit v1.2.3


From 495099962138732c28449d07f3b1988836659851 Mon Sep 17 00:00:00 2001
From: Arman Uguray <armansito@chromium.org>
Date: Wed, 25 Mar 2015 18:53:39 -0700
Subject: Bluetooth: Add macros for advertising instance flags

This patch adds macro definitions for possible advertising instance
flags that can be passed to the "Add Advertising" command.

Signed-off-by: Arman Uguray <armansito@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/mgmt.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 68abd4b0c25d..b831242d48a4 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -554,6 +554,14 @@ struct mgmt_rp_add_advertising {
 	__u8	instance;
 } __packed;
 
+#define MGMT_ADV_FLAG_CONNECTABLE	BIT(0)
+#define MGMT_ADV_FLAG_DISCOV		BIT(1)
+#define MGMT_ADV_FLAG_LIMITED_DISCOV	BIT(2)
+#define MGMT_ADV_FLAG_MANAGED_FLAGS	BIT(3)
+#define MGMT_ADV_FLAG_TX_POWER		BIT(4)
+#define MGMT_ADV_FLAG_APPEARANCE	BIT(5)
+#define MGMT_ADV_FLAG_LOCAL_NAME	BIT(6)
+
 #define MGMT_OP_REMOVE_ADVERTISING	0x003F
 struct mgmt_cp_remove_advertising {
 	__u8	instance;
-- 
cgit v1.2.3


From 61edafbb47e9f46fb850035b1f8f062564445704 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 25 Mar 2015 14:08:47 +0000
Subject: netfilter: nf_tables: consolide set element destruction

With the conversion to set extensions, it is now possible to consolidate
the different set element destruction functions.

The set implementations' ->remove() functions are changed to only take
the element out of their internal data structures. Elements will be freed
in a batched fashion after the global transaction's completion RCU grace
period.

This reduces the amount of grace periods required for nft_hash from N
to zero additional ones, additionally this guarantees that the set
elements' extensions of all implementations can be used under RCU
protection.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  2 ++
 net/netfilter/nf_tables_api.c     | 34 ++++++++++++++++++++--------------
 net/netfilter/nft_hash.c          | 18 ++++--------------
 net/netfilter/nft_rbtree.c        | 14 +-------------
 4 files changed, 27 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index ef3457c1cb62..6ac63323afd2 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -423,6 +423,8 @@ static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set,
 	return elem + set->ops->elemsize;
 }
 
+void nft_set_elem_destroy(const struct nft_set *set, void *elem);
+
 /**
  *	struct nft_expr_type - nf_tables expression type
  *
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 99cb884b985f..b35512f1934c 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3155,6 +3155,18 @@ static void *nft_set_elem_init(const struct nft_set *set,
 	return elem;
 }
 
+void nft_set_elem_destroy(const struct nft_set *set, void *elem)
+{
+	struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
+
+	nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE);
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
+		nft_data_uninit(nft_set_ext_data(ext), set->dtype);
+
+	kfree(elem);
+}
+EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
+
 static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 			    const struct nlattr *attr)
 {
@@ -3596,6 +3608,10 @@ static void nf_tables_commit_release(struct nft_trans *trans)
 	case NFT_MSG_DELSET:
 		nft_set_destroy(nft_trans_set(trans));
 		break;
+	case NFT_MSG_DELSETELEM:
+		nft_set_elem_destroy(nft_trans_elem_set(trans),
+				     nft_trans_elem(trans).priv);
+		break;
 	}
 	kfree(trans);
 }
@@ -3605,7 +3621,6 @@ static int nf_tables_commit(struct sk_buff *skb)
 	struct net *net = sock_net(skb->sk);
 	struct nft_trans *trans, *next;
 	struct nft_trans_elem *te;
-	struct nft_set_ext *ext;
 
 	/* Bump generation counter, invalidate any dump in progress */
 	while (++net->nft.base_seq == 0);
@@ -3690,18 +3705,12 @@ static int nf_tables_commit(struct sk_buff *skb)
 			break;
 		case NFT_MSG_DELSETELEM:
 			te = (struct nft_trans_elem *)trans->data;
-			ext = nft_set_elem_ext(te->set, te->elem.priv);
 
 			nf_tables_setelem_notify(&trans->ctx, te->set,
 						 &te->elem,
 						 NFT_MSG_DELSETELEM, 0);
 			te->set->ops->get(te->set, &te->elem);
-			nft_data_uninit(&te->elem.key, NFT_DATA_VALUE);
-			if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
-				nft_data_uninit(nft_set_ext_data(ext),
-						te->set->dtype);
 			te->set->ops->remove(te->set, &te->elem);
-			nft_trans_destroy(trans);
 			break;
 		}
 	}
@@ -3733,6 +3742,10 @@ static void nf_tables_abort_release(struct nft_trans *trans)
 	case NFT_MSG_NEWSET:
 		nft_set_destroy(nft_trans_set(trans));
 		break;
+	case NFT_MSG_NEWSETELEM:
+		nft_set_elem_destroy(nft_trans_elem_set(trans),
+				     nft_trans_elem(trans).priv);
+		break;
 	}
 	kfree(trans);
 }
@@ -3742,7 +3755,6 @@ static int nf_tables_abort(struct sk_buff *skb)
 	struct net *net = sock_net(skb->sk);
 	struct nft_trans *trans, *next;
 	struct nft_trans_elem *te;
-	struct nft_set_ext *ext;
 
 	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
 		switch (trans->msg_type) {
@@ -3804,15 +3816,9 @@ static int nf_tables_abort(struct sk_buff *skb)
 		case NFT_MSG_NEWSETELEM:
 			nft_trans_elem_set(trans)->nelems--;
 			te = (struct nft_trans_elem *)trans->data;
-			ext = nft_set_elem_ext(te->set, te->elem.priv);
 
 			te->set->ops->get(te->set, &te->elem);
-			nft_data_uninit(&te->elem.key, NFT_DATA_VALUE);
-			if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
-				nft_data_uninit(nft_set_ext_data(ext),
-						te->set->dtype);
 			te->set->ops->remove(te->set, &te->elem);
-			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_DELSETELEM:
 			nft_trans_elem_set(trans)->nelems++;
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 15951a823d1d..94bf25def37f 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -96,23 +96,12 @@ static int nft_hash_insert(const struct nft_set *set,
 					    nft_hash_params);
 }
 
-static void nft_hash_elem_destroy(const struct nft_set *set,
-				  struct nft_hash_elem *he)
-{
-	nft_data_uninit(nft_set_ext_key(&he->ext), NFT_DATA_VALUE);
-	if (set->flags & NFT_SET_MAP)
-		nft_data_uninit(nft_set_ext_data(&he->ext), set->dtype);
-	kfree(he);
-}
-
 static void nft_hash_remove(const struct nft_set *set,
 			    const struct nft_set_elem *elem)
 {
 	struct nft_hash *priv = nft_set_priv(set);
 
 	rhashtable_remove_fast(&priv->ht, elem->cookie, nft_hash_params);
-	synchronize_rcu();
-	kfree(elem->cookie);
 }
 
 static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
@@ -208,16 +197,17 @@ static int nft_hash_init(const struct nft_set *set,
 	return rhashtable_init(&priv->ht, &params);
 }
 
-static void nft_free_element(void *ptr, void *arg)
+static void nft_hash_elem_destroy(void *ptr, void *arg)
 {
-	nft_hash_elem_destroy((const struct nft_set *)arg, ptr);
+	nft_set_elem_destroy((const struct nft_set *)arg, ptr);
 }
 
 static void nft_hash_destroy(const struct nft_set *set)
 {
 	struct nft_hash *priv = nft_set_priv(set);
 
-	rhashtable_free_and_destroy(&priv->ht, nft_free_element, (void *)set);
+	rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy,
+				    (void *)set);
 }
 
 static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index ebf6e60df41c..332c6afc77e9 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -72,17 +72,6 @@ out:
 	return false;
 }
 
-static void nft_rbtree_elem_destroy(const struct nft_set *set,
-				    struct nft_rbtree_elem *rbe)
-{
-	nft_data_uninit(nft_set_ext_key(&rbe->ext), NFT_DATA_VALUE);
-	if (set->flags & NFT_SET_MAP &&
-	    nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_DATA))
-		nft_data_uninit(nft_set_ext_data(&rbe->ext), set->dtype);
-
-	kfree(rbe);
-}
-
 static int __nft_rbtree_insert(const struct nft_set *set,
 			       struct nft_rbtree_elem *new)
 {
@@ -133,7 +122,6 @@ static void nft_rbtree_remove(const struct nft_set *set,
 	spin_lock_bh(&nft_rbtree_lock);
 	rb_erase(&rbe->node, &priv->root);
 	spin_unlock_bh(&nft_rbtree_lock);
-	kfree(rbe);
 }
 
 static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem)
@@ -213,7 +201,7 @@ static void nft_rbtree_destroy(const struct nft_set *set)
 	while ((node = priv->root.rb_node) != NULL) {
 		rb_erase(node, &priv->root);
 		rbe = rb_entry(node, struct nft_rbtree_elem, node);
-		nft_rbtree_elem_destroy(set, rbe);
+		nft_set_elem_destroy(set, rbe);
 	}
 }
 
-- 
cgit v1.2.3


From b2832dd6621bf73eb8ad38389a94bd83a5983886 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 25 Mar 2015 14:08:48 +0000
Subject: netfilter: nf_tables: return set extensions from ->lookup()

Return the extension area from the ->lookup() function to allow to
consolidate common actions.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 4 +++-
 net/netfilter/nft_hash.c          | 6 +++---
 net/netfilter/nft_lookup.c        | 6 +++++-
 net/netfilter/nft_rbtree.c        | 7 +++----
 4 files changed, 14 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 6ac63323afd2..f190d26bda7d 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -200,6 +200,8 @@ struct nft_set_estimate {
 	enum nft_set_class	class;
 };
 
+struct nft_set_ext;
+
 /**
  *	struct nft_set_ops - nf_tables set operations
  *
@@ -218,7 +220,7 @@ struct nft_set_estimate {
 struct nft_set_ops {
 	bool				(*lookup)(const struct nft_set *set,
 						  const struct nft_data *key,
-						  struct nft_data *data);
+						  const struct nft_set_ext **ext);
 	int				(*get)(const struct nft_set *set,
 					       struct nft_set_elem *elem);
 	int				(*insert)(const struct nft_set *set,
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 94bf25def37f..5bee82195ef5 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -66,7 +66,7 @@ static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg,
 
 static bool nft_hash_lookup(const struct nft_set *set,
 			    const struct nft_data *key,
-			    struct nft_data *data)
+			    const struct nft_set_ext **ext)
 {
 	struct nft_hash *priv = nft_set_priv(set);
 	const struct nft_hash_elem *he;
@@ -76,8 +76,8 @@ static bool nft_hash_lookup(const struct nft_set *set,
 	};
 
 	he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
-	if (he && set->flags & NFT_SET_MAP)
-		nft_data_copy(data, nft_set_ext_data(&he->ext));
+	if (he != NULL)
+		*ext = &he->ext;
 
 	return !!he;
 }
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 9615b8b9fb37..a5f30b8760ea 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -31,9 +31,13 @@ static void nft_lookup_eval(const struct nft_expr *expr,
 {
 	const struct nft_lookup *priv = nft_expr_priv(expr);
 	const struct nft_set *set = priv->set;
+	const struct nft_set_ext *ext;
 
-	if (set->ops->lookup(set, &data[priv->sreg], &data[priv->dreg]))
+	if (set->ops->lookup(set, &data[priv->sreg], &ext)) {
+		if (set->flags & NFT_SET_MAP)
+			nft_data_copy(&data[priv->dreg], nft_set_ext_data(ext));
 		return;
+	}
 	data[NFT_REG_VERDICT].verdict = NFT_BREAK;
 }
 
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index 332c6afc77e9..cbba755ebebc 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -31,7 +31,7 @@ struct nft_rbtree_elem {
 
 static bool nft_rbtree_lookup(const struct nft_set *set,
 			      const struct nft_data *key,
-			      struct nft_data *data)
+			      const struct nft_set_ext **ext)
 {
 	const struct nft_rbtree *priv = nft_set_priv(set);
 	const struct nft_rbtree_elem *rbe, *interval = NULL;
@@ -55,10 +55,9 @@ found:
 			    *nft_set_ext_flags(&rbe->ext) &
 			    NFT_SET_ELEM_INTERVAL_END)
 				goto out;
-			if (set->flags & NFT_SET_MAP)
-				nft_data_copy(data, nft_set_ext_data(&rbe->ext));
-
 			spin_unlock_bh(&nft_rbtree_lock);
+
+			*ext = &rbe->ext;
 			return true;
 		}
 	}
-- 
cgit v1.2.3


From ea4bd995b0f2fc5677ff8085e92a5d2544b9937c Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 25 Mar 2015 14:08:49 +0000
Subject: netfilter: nf_tables: add transaction helper functions

Add some helper functions for building the genmask as preparation for
set transactions.

Also add a little documentation how this stuff actually works.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 28 ++++++++++++++++++++++++++++
 net/netfilter/nf_tables_api.c     | 17 ++++++-----------
 net/netfilter/nf_tables_core.c    |  6 +-----
 3 files changed, 35 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index f190d26bda7d..4c46a325874e 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -720,6 +720,34 @@ void nft_unregister_expr(struct nft_expr_type *);
 #define MODULE_ALIAS_NFT_SET() \
 	MODULE_ALIAS("nft-set")
 
+/*
+ * The gencursor defines two generations, the currently active and the
+ * next one. Objects contain a bitmask of 2 bits specifying the generations
+ * they're active in. A set bit means they're inactive in the generation
+ * represented by that bit.
+ *
+ * New objects start out as inactive in the current and active in the
+ * next generation. When committing the ruleset the bitmask is cleared,
+ * meaning they're active in all generations. When removing an object,
+ * it is set inactive in the next generation. After committing the ruleset,
+ * the objects are removed.
+ */
+static inline unsigned int nft_gencursor_next(const struct net *net)
+{
+	return net->nft.gencursor + 1 == 1 ? 1 : 0;
+}
+
+static inline u8 nft_genmask_next(const struct net *net)
+{
+	return 1 << nft_gencursor_next(net);
+}
+
+static inline u8 nft_genmask_cur(const struct net *net)
+{
+	/* Use ACCESS_ONCE() to prevent refetching the value for atomicity */
+	return 1 << ACCESS_ONCE(net->nft.gencursor);
+}
+
 /**
  *	struct nft_trans - nf_tables object update in transaction
  *
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index b35512f1934c..66fa5e935a55 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -198,36 +198,31 @@ static int nft_delchain(struct nft_ctx *ctx)
 static inline bool
 nft_rule_is_active(struct net *net, const struct nft_rule *rule)
 {
-	return (rule->genmask & (1 << net->nft.gencursor)) == 0;
-}
-
-static inline int gencursor_next(struct net *net)
-{
-	return net->nft.gencursor+1 == 1 ? 1 : 0;
+	return (rule->genmask & nft_genmask_cur(net)) == 0;
 }
 
 static inline int
 nft_rule_is_active_next(struct net *net, const struct nft_rule *rule)
 {
-	return (rule->genmask & (1 << gencursor_next(net))) == 0;
+	return (rule->genmask & nft_genmask_next(net)) == 0;
 }
 
 static inline void
 nft_rule_activate_next(struct net *net, struct nft_rule *rule)
 {
 	/* Now inactive, will be active in the future */
-	rule->genmask = (1 << net->nft.gencursor);
+	rule->genmask = nft_genmask_cur(net);
 }
 
 static inline void
 nft_rule_deactivate_next(struct net *net, struct nft_rule *rule)
 {
-	rule->genmask = (1 << gencursor_next(net));
+	rule->genmask = nft_genmask_next(net);
 }
 
 static inline void nft_rule_clear(struct net *net, struct nft_rule *rule)
 {
-	rule->genmask &= ~(1 << gencursor_next(net));
+	rule->genmask &= ~nft_genmask_next(net);
 }
 
 static int
@@ -3626,7 +3621,7 @@ static int nf_tables_commit(struct sk_buff *skb)
 	while (++net->nft.base_seq == 0);
 
 	/* A new generation has just started */
-	net->nft.gencursor = gencursor_next(net);
+	net->nft.gencursor = nft_gencursor_next(net);
 
 	/* Make sure all packets have left the previous generation before
 	 * purging old rules.
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 4429008fe99d..ef4dfcbaf149 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -121,11 +121,7 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
 	struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
 	struct nft_stats *stats;
 	int rulenum;
-	/*
-	 * Cache cursor to avoid problems in case that the cursor is updated
-	 * while traversing the ruleset.
-	 */
-	unsigned int gencursor = ACCESS_ONCE(net->nft.gencursor);
+	unsigned int gencursor = nft_genmask_cur(net);
 
 do_chain:
 	rulenum = 0;
-- 
cgit v1.2.3


From cc02e457bb86f7b6ffee3651bab22d104b60effb Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 25 Mar 2015 14:08:50 +0000
Subject: netfilter: nf_tables: implement set transaction support

Set elements are the last object type not supporting transaction support.
Implement similar to the existing rule transactions:

The global transaction counter keeps track of two generations, current
and next. Each element contains a bitmask specifying in which generations
it is inactive.

New elements start out as inactive in the current generation and active
in the next. On commit, the previous next generation becomes the current
generation and the element becomes active. The bitmask is then cleared
to indicate that the element is active in all future generations. If the
transaction is aborted, the element is removed from the set before it
becomes active.

When removing an element, it gets marked as inactive in the next generation.
On commit the next generation becomes active and the therefor the element
inactive. It is then taken out of then set and released. On abort, the
element is marked as active for the next generation again.

Lookups ignore elements not active in the current generation.

The current set types (hash/rbtree) both use a field in the extension area
to store the generation mask. This (currently) does not require any
additional memory since we have some free space in there.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 33 +++++++++++++++++++++------
 net/netfilter/nf_tables_api.c     | 33 ++++++++++++++++-----------
 net/netfilter/nft_hash.c          | 38 +++++++++++++++++++++++--------
 net/netfilter/nft_rbtree.c        | 48 +++++++++++++++++++++++++++++++--------
 4 files changed, 112 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 4c46a325874e..b8cd60dcb4e1 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -138,15 +138,10 @@ struct nft_userdata {
 /**
  *	struct nft_set_elem - generic representation of set elements
  *
- *	@cookie: implementation specific element cookie
  *	@key: element key
  *	@priv: element private data and extensions
- *
- *	The cookie can be used to store a handle to the element for subsequent
- *	removal.
  */
 struct nft_set_elem {
-	void			*cookie;
 	struct nft_data		key;
 	void			*priv;
 };
@@ -207,6 +202,8 @@ struct nft_set_ext;
  *
  *	@lookup: look up an element within the set
  *	@insert: insert new element into set
+ *	@activate: activate new element in the next generation
+ *	@deactivate: deactivate element in the next generation
  *	@remove: remove element from set
  *	@walk: iterate over all set elemeennts
  *	@privsize: function to return size of set private data
@@ -221,10 +218,12 @@ struct nft_set_ops {
 	bool				(*lookup)(const struct nft_set *set,
 						  const struct nft_data *key,
 						  const struct nft_set_ext **ext);
-	int				(*get)(const struct nft_set *set,
-					       struct nft_set_elem *elem);
 	int				(*insert)(const struct nft_set *set,
 						  const struct nft_set_elem *elem);
+	void				(*activate)(const struct nft_set *set,
+						    const struct nft_set_elem *elem);
+	void *				(*deactivate)(const struct nft_set *set,
+						      const struct nft_set_elem *elem);
 	void				(*remove)(const struct nft_set *set,
 						  const struct nft_set_elem *elem);
 	void				(*walk)(const struct nft_ctx *ctx,
@@ -261,6 +260,7 @@ void nft_unregister_set(struct nft_set_ops *ops);
  * 	@nelems: number of elements
  *	@policy: set parameterization (see enum nft_set_policies)
  * 	@ops: set ops
+ * 	@pnet: network namespace
  * 	@flags: set flags
  * 	@klen: key length
  * 	@dlen: data length
@@ -277,6 +277,7 @@ struct nft_set {
 	u16				policy;
 	/* runtime data below here */
 	const struct nft_set_ops	*ops ____cacheline_aligned;
+	possible_net_t			pnet;
 	u16				flags;
 	u8				klen;
 	u8				dlen;
@@ -355,10 +356,12 @@ struct nft_set_ext_tmpl {
 /**
  *	struct nft_set_ext - set extensions
  *
+ *	@genmask: generation mask
  *	@offset: offsets of individual extension types
  *	@data: beginning of extension data
  */
 struct nft_set_ext {
+	u8	genmask;
 	u8	offset[NFT_SET_EXT_NUM];
 	char	data[0];
 };
@@ -748,6 +751,22 @@ static inline u8 nft_genmask_cur(const struct net *net)
 	return 1 << ACCESS_ONCE(net->nft.gencursor);
 }
 
+/*
+ * Set element transaction helpers
+ */
+
+static inline bool nft_set_elem_active(const struct nft_set_ext *ext,
+				       u8 genmask)
+{
+	return !(ext->genmask & genmask);
+}
+
+static inline void nft_set_elem_change_active(const struct nft_set *set,
+					      struct nft_set_ext *ext)
+{
+	ext->genmask ^= nft_genmask_next(read_pnet(&set->pnet));
+}
+
 /**
  *	struct nft_trans - nf_tables object update in transaction
  *
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 66fa5e935a55..5604c2df05d1 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2690,6 +2690,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
 		goto err2;
 
 	INIT_LIST_HEAD(&set->bindings);
+	write_pnet(&set->pnet, net);
 	set->ops   = ops;
 	set->ktype = ktype;
 	set->klen  = desc.klen;
@@ -3221,10 +3222,6 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 	if (d1.type != NFT_DATA_VALUE || d1.len != set->klen)
 		goto err2;
 
-	err = -EEXIST;
-	if (set->ops->get(set, &elem) == 0)
-		goto err2;
-
 	nft_set_ext_add(&tmpl, NFT_SET_EXT_KEY);
 
 	if (nla[NFTA_SET_ELEM_DATA] != NULL) {
@@ -3266,6 +3263,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 	if (trans == NULL)
 		goto err4;
 
+	ext->genmask = nft_genmask_cur(ctx->net);
 	err = set->ops->insert(set, &elem);
 	if (err < 0)
 		goto err5;
@@ -3353,19 +3351,24 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
 	if (desc.type != NFT_DATA_VALUE || desc.len != set->klen)
 		goto err2;
 
-	err = set->ops->get(set, &elem);
-	if (err < 0)
-		goto err2;
-
 	trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set);
 	if (trans == NULL) {
 		err = -ENOMEM;
 		goto err2;
 	}
 
+	elem.priv = set->ops->deactivate(set, &elem);
+	if (elem.priv == NULL) {
+		err = -ENOENT;
+		goto err3;
+	}
+
 	nft_trans_elem(trans) = elem;
 	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
 	return 0;
+
+err3:
+	kfree(trans);
 err2:
 	nft_data_uninit(&elem.key, desc.type);
 err1:
@@ -3692,9 +3695,11 @@ static int nf_tables_commit(struct sk_buff *skb)
 					     NFT_MSG_DELSET, GFP_KERNEL);
 			break;
 		case NFT_MSG_NEWSETELEM:
-			nf_tables_setelem_notify(&trans->ctx,
-						 nft_trans_elem_set(trans),
-						 &nft_trans_elem(trans),
+			te = (struct nft_trans_elem *)trans->data;
+
+			te->set->ops->activate(te->set, &te->elem);
+			nf_tables_setelem_notify(&trans->ctx, te->set,
+						 &te->elem,
 						 NFT_MSG_NEWSETELEM, 0);
 			nft_trans_destroy(trans);
 			break;
@@ -3704,7 +3709,6 @@ static int nf_tables_commit(struct sk_buff *skb)
 			nf_tables_setelem_notify(&trans->ctx, te->set,
 						 &te->elem,
 						 NFT_MSG_DELSETELEM, 0);
-			te->set->ops->get(te->set, &te->elem);
 			te->set->ops->remove(te->set, &te->elem);
 			break;
 		}
@@ -3812,11 +3816,14 @@ static int nf_tables_abort(struct sk_buff *skb)
 			nft_trans_elem_set(trans)->nelems--;
 			te = (struct nft_trans_elem *)trans->data;
 
-			te->set->ops->get(te->set, &te->elem);
 			te->set->ops->remove(te->set, &te->elem);
 			break;
 		case NFT_MSG_DELSETELEM:
+			te = (struct nft_trans_elem *)trans->data;
+
 			nft_trans_elem_set(trans)->nelems++;
+			te->set->ops->activate(te->set, &te->elem);
+
 			nft_trans_destroy(trans);
 			break;
 		}
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 5bee82195ef5..c7e1a9d7d46f 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -35,6 +35,7 @@ struct nft_hash_elem {
 struct nft_hash_cmp_arg {
 	const struct nft_set		*set;
 	const struct nft_data		*key;
+	u8				genmask;
 };
 
 static const struct rhashtable_params nft_hash_params;
@@ -61,6 +62,8 @@ static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg,
 
 	if (nft_data_cmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
 		return 1;
+	if (!nft_set_elem_active(&he->ext, x->genmask))
+		return 1;
 	return 0;
 }
 
@@ -71,6 +74,7 @@ static bool nft_hash_lookup(const struct nft_set *set,
 	struct nft_hash *priv = nft_set_priv(set);
 	const struct nft_hash_elem *he;
 	struct nft_hash_cmp_arg arg = {
+		.genmask = nft_genmask_cur(read_pnet(&set->pnet)),
 		.set	 = set,
 		.key	 = key,
 	};
@@ -88,6 +92,7 @@ static int nft_hash_insert(const struct nft_set *set,
 	struct nft_hash *priv = nft_set_priv(set);
 	struct nft_hash_elem *he = elem->priv;
 	struct nft_hash_cmp_arg arg = {
+		.genmask = nft_genmask_next(read_pnet(&set->pnet)),
 		.set	 = set,
 		.key	 = &elem->key,
 	};
@@ -96,30 +101,39 @@ static int nft_hash_insert(const struct nft_set *set,
 					    nft_hash_params);
 }
 
-static void nft_hash_remove(const struct nft_set *set,
-			    const struct nft_set_elem *elem)
+static void nft_hash_activate(const struct nft_set *set,
+			      const struct nft_set_elem *elem)
 {
-	struct nft_hash *priv = nft_set_priv(set);
+	struct nft_hash_elem *he = elem->priv;
 
-	rhashtable_remove_fast(&priv->ht, elem->cookie, nft_hash_params);
+	nft_set_elem_change_active(set, &he->ext);
 }
 
-static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
+static void *nft_hash_deactivate(const struct nft_set *set,
+				 const struct nft_set_elem *elem)
 {
 	struct nft_hash *priv = nft_set_priv(set);
 	struct nft_hash_elem *he;
 	struct nft_hash_cmp_arg arg = {
+		.genmask = nft_genmask_next(read_pnet(&set->pnet)),
 		.set	 = set,
 		.key	 = &elem->key,
 	};
 
 	he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
-	if (!he)
-		return -ENOENT;
+	if (he != NULL)
+		nft_set_elem_change_active(set, &he->ext);
 
-	elem->priv = he;
+	return he;
+}
 
-	return 0;
+static void nft_hash_remove(const struct nft_set *set,
+			    const struct nft_set_elem *elem)
+{
+	struct nft_hash *priv = nft_set_priv(set);
+	struct nft_hash_elem *he = elem->priv;
+
+	rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
 }
 
 static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
@@ -129,6 +143,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
 	struct nft_hash_elem *he;
 	struct rhashtable_iter hti;
 	struct nft_set_elem elem;
+	u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
 	int err;
 
 	err = rhashtable_walk_init(&priv->ht, &hti);
@@ -155,6 +170,8 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
 
 		if (iter->count < iter->skip)
 			goto cont;
+		if (!nft_set_elem_active(&he->ext, genmask))
+			goto cont;
 
 		elem.priv = he;
 
@@ -241,8 +258,9 @@ static struct nft_set_ops nft_hash_ops __read_mostly = {
 	.estimate	= nft_hash_estimate,
 	.init		= nft_hash_init,
 	.destroy	= nft_hash_destroy,
-	.get		= nft_hash_get,
 	.insert		= nft_hash_insert,
+	.activate	= nft_hash_activate,
+	.deactivate	= nft_hash_deactivate,
 	.remove		= nft_hash_remove,
 	.lookup		= nft_hash_lookup,
 	.walk		= nft_hash_walk,
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index cbba755ebebc..42d0ca45fb9e 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -29,6 +29,7 @@ struct nft_rbtree_elem {
 	struct nft_set_ext	ext;
 };
 
+
 static bool nft_rbtree_lookup(const struct nft_set *set,
 			      const struct nft_data *key,
 			      const struct nft_set_ext **ext)
@@ -36,6 +37,7 @@ static bool nft_rbtree_lookup(const struct nft_set *set,
 	const struct nft_rbtree *priv = nft_set_priv(set);
 	const struct nft_rbtree_elem *rbe, *interval = NULL;
 	const struct rb_node *parent;
+	u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
 	int d;
 
 	spin_lock_bh(&nft_rbtree_lock);
@@ -51,6 +53,10 @@ static bool nft_rbtree_lookup(const struct nft_set *set,
 			parent = parent->rb_right;
 		else {
 found:
+			if (!nft_set_elem_active(&rbe->ext, genmask)) {
+				parent = parent->rb_left;
+				continue;
+			}
 			if (nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) &&
 			    *nft_set_ext_flags(&rbe->ext) &
 			    NFT_SET_ELEM_INTERVAL_END)
@@ -77,6 +83,7 @@ static int __nft_rbtree_insert(const struct nft_set *set,
 	struct nft_rbtree *priv = nft_set_priv(set);
 	struct nft_rbtree_elem *rbe;
 	struct rb_node *parent, **p;
+	u8 genmask = nft_genmask_next(read_pnet(&set->pnet));
 	int d;
 
 	parent = NULL;
@@ -91,8 +98,11 @@ static int __nft_rbtree_insert(const struct nft_set *set,
 			p = &parent->rb_left;
 		else if (d > 0)
 			p = &parent->rb_right;
-		else
-			return -EEXIST;
+		else {
+			if (nft_set_elem_active(&rbe->ext, genmask))
+				return -EEXIST;
+			p = &parent->rb_left;
+		}
 	}
 	rb_link_node(&new->node, parent, p);
 	rb_insert_color(&new->node, &priv->root);
@@ -116,18 +126,28 @@ static void nft_rbtree_remove(const struct nft_set *set,
 			      const struct nft_set_elem *elem)
 {
 	struct nft_rbtree *priv = nft_set_priv(set);
-	struct nft_rbtree_elem *rbe = elem->cookie;
+	struct nft_rbtree_elem *rbe = elem->priv;
 
 	spin_lock_bh(&nft_rbtree_lock);
 	rb_erase(&rbe->node, &priv->root);
 	spin_unlock_bh(&nft_rbtree_lock);
 }
 
-static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem)
+static void nft_rbtree_activate(const struct nft_set *set,
+				const struct nft_set_elem *elem)
+{
+	struct nft_rbtree_elem *rbe = elem->priv;
+
+	nft_set_elem_change_active(set, &rbe->ext);
+}
+
+static void *nft_rbtree_deactivate(const struct nft_set *set,
+				   const struct nft_set_elem *elem)
 {
 	const struct nft_rbtree *priv = nft_set_priv(set);
 	const struct rb_node *parent = priv->root.rb_node;
 	struct nft_rbtree_elem *rbe;
+	u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
 	int d;
 
 	while (parent != NULL) {
@@ -140,12 +160,15 @@ static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem)
 		else if (d > 0)
 			parent = parent->rb_right;
 		else {
-			elem->cookie = rbe;
-			elem->priv   = rbe;
-			return 0;
+			if (!nft_set_elem_active(&rbe->ext, genmask)) {
+				parent = parent->rb_left;
+				continue;
+			}
+			nft_set_elem_change_active(set, &rbe->ext);
+			return rbe;
 		}
 	}
-	return -ENOENT;
+	return NULL;
 }
 
 static void nft_rbtree_walk(const struct nft_ctx *ctx,
@@ -156,13 +179,17 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
 	struct nft_rbtree_elem *rbe;
 	struct nft_set_elem elem;
 	struct rb_node *node;
+	u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
 
 	spin_lock_bh(&nft_rbtree_lock);
 	for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
+		rbe = rb_entry(node, struct nft_rbtree_elem, node);
+
 		if (iter->count < iter->skip)
 			goto cont;
+		if (!nft_set_elem_active(&rbe->ext, genmask))
+			goto cont;
 
-		rbe = rb_entry(node, struct nft_rbtree_elem, node);
 		elem.priv = rbe;
 
 		iter->err = iter->fn(ctx, set, iter, &elem);
@@ -228,7 +255,8 @@ static struct nft_set_ops nft_rbtree_ops __read_mostly = {
 	.destroy	= nft_rbtree_destroy,
 	.insert		= nft_rbtree_insert,
 	.remove		= nft_rbtree_remove,
-	.get		= nft_rbtree_get,
+	.deactivate	= nft_rbtree_deactivate,
+	.activate	= nft_rbtree_activate,
 	.lookup		= nft_rbtree_lookup,
 	.walk		= nft_rbtree_walk,
 	.features	= NFT_SET_INTERVAL | NFT_SET_MAP,
-- 
cgit v1.2.3


From 1e9e39f4a29857a396ac7b669d109f697f66695e Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben.hutchings@codethink.co.uk>
Date: Thu, 26 Feb 2015 19:34:37 +0000
Subject: usbnet: Fix tx_packets stat for FLAG_MULTI_FRAME drivers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently the usbnet core does not update the tx_packets statistic for
drivers with FLAG_MULTI_PACKET and there is no hook in the TX
completion path where they could do this.

cdc_ncm and dependent drivers are bumping tx_packets stat on the
transmit path while asix and sr9800 aren't updating it at all.

Add a packet count in struct skb_data so these drivers can fill it
in, initialise it to 1 for other drivers, and add the packet count
to the tx_packets statistic on completion.

Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
Tested-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/asix_common.c |  2 ++
 drivers/net/usb/cdc_ncm.c     |  3 ++-
 drivers/net/usb/sr9800.c      |  1 +
 drivers/net/usb/usbnet.c      |  5 +++--
 include/linux/usb/usbnet.h    | 12 ++++++++++++
 5 files changed, 20 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c
index 5c55f11572ba..724a9b50df7a 100644
--- a/drivers/net/usb/asix_common.c
+++ b/drivers/net/usb/asix_common.c
@@ -188,6 +188,8 @@ struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
 		memcpy(skb_tail_pointer(skb), &padbytes, sizeof(padbytes));
 		skb_put(skb, sizeof(padbytes));
 	}
+
+	usbnet_set_skb_tx_stats(skb, 1);
 	return skb;
 }
 
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 80a844e0ae03..70cbea551139 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -1172,7 +1172,6 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 
 	/* return skb */
 	ctx->tx_curr_skb = NULL;
-	dev->net->stats.tx_packets += ctx->tx_curr_frame_num;
 
 	/* keep private stats: framing overhead and number of NTBs */
 	ctx->tx_overhead += skb_out->len - ctx->tx_curr_frame_payload;
@@ -1184,6 +1183,8 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 	 */
 	dev->net->stats.tx_bytes -= skb_out->len - ctx->tx_curr_frame_payload;
 
+	usbnet_set_skb_tx_stats(skb_out, n);
+
 	return skb_out;
 
 exit_no_skb:
diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c
index b94a0fbb8b3b..7650cdc8fe6b 100644
--- a/drivers/net/usb/sr9800.c
+++ b/drivers/net/usb/sr9800.c
@@ -144,6 +144,7 @@ static struct sk_buff *sr_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
 		skb_put(skb, sizeof(padbytes));
 	}
 
+	usbnet_set_skb_tx_stats(skb, 1);
 	return skb;
 }
 
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 449835f4331e..0f3ff285f6a1 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1188,8 +1188,7 @@ static void tx_complete (struct urb *urb)
 	struct usbnet		*dev = entry->dev;
 
 	if (urb->status == 0) {
-		if (!(dev->driver_info->flags & FLAG_MULTI_PACKET))
-			dev->net->stats.tx_packets++;
+		dev->net->stats.tx_packets += entry->packets;
 		dev->net->stats.tx_bytes += entry->length;
 	} else {
 		dev->net->stats.tx_errors++;
@@ -1348,6 +1347,8 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb,
 			urb->transfer_flags |= URB_ZERO_PACKET;
 	}
 	entry->length = urb->transfer_buffer_length = length;
+	if (!(info->flags & FLAG_MULTI_PACKET))
+		usbnet_set_skb_tx_stats(skb, 1);
 
 	spin_lock_irqsave(&dev->txq.lock, flags);
 	retval = usb_autopm_get_interface_async(dev->intf);
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index d9a4905e01d0..ff3fb2bd0e90 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -228,8 +228,20 @@ struct skb_data {	/* skb->cb is one of these */
 	struct usbnet		*dev;
 	enum skb_state		state;
 	size_t			length;
+	unsigned long		packets;
 };
 
+/* Drivers that set FLAG_MULTI_PACKET must call this in their
+ * tx_fixup method before returning an skb.
+ */
+static inline void
+usbnet_set_skb_tx_stats(struct sk_buff *skb, unsigned long packets)
+{
+	struct skb_data *entry = (struct skb_data *) skb->cb;
+
+	entry->packets = packets;
+}
+
 extern int usbnet_open(struct net_device *net);
 extern int usbnet_stop(struct net_device *net);
 extern netdev_tx_t usbnet_start_xmit(struct sk_buff *skb,
-- 
cgit v1.2.3


From 7a1e890e2168e33fb62d84528e996b8b4b478fea Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben.hutchings@codethink.co.uk>
Date: Wed, 25 Mar 2015 21:41:33 +0100
Subject: usbnet: Fix tx_bytes statistic running backward in cdc_ncm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

cdc_ncm disagrees with usbnet about how much framing overhead should
be counted in the tx_bytes statistics, and tries 'fix' this by
decrementing tx_bytes on the transmit path.  But statistics must never
be decremented except due to roll-over; this will thoroughly confuse
user-space.  Also, tx_bytes is only incremented by usbnet in the
completion path.

Fix this by requiring drivers that set FLAG_MULTI_FRAME to set a
tx_bytes delta along with the tx_packets count.

Fixes: beeecd42c3b4 ("net: cdc_ncm/cdc_mbim: adding NCM protocol statistics")
Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
---
 drivers/net/usb/asix_common.c |  2 +-
 drivers/net/usb/cdc_ncm.c     |  7 +++----
 drivers/net/usb/sr9800.c      |  2 +-
 drivers/net/usb/usbnet.c      | 16 +++++++++++++---
 include/linux/usb/usbnet.h    |  6 ++++--
 5 files changed, 22 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c
index 724a9b50df7a..75d6f26729a3 100644
--- a/drivers/net/usb/asix_common.c
+++ b/drivers/net/usb/asix_common.c
@@ -189,7 +189,7 @@ struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
 		skb_put(skb, sizeof(padbytes));
 	}
 
-	usbnet_set_skb_tx_stats(skb, 1);
+	usbnet_set_skb_tx_stats(skb, 1, 0);
 	return skb;
 }
 
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 70cbea551139..c3e4da9e79ca 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -1177,13 +1177,12 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 	ctx->tx_overhead += skb_out->len - ctx->tx_curr_frame_payload;
 	ctx->tx_ntbs++;
 
-	/* usbnet has already counted all the framing overhead.
+	/* usbnet will count all the framing overhead by default.
 	 * Adjust the stats so that the tx_bytes counter show real
 	 * payload data instead.
 	 */
-	dev->net->stats.tx_bytes -= skb_out->len - ctx->tx_curr_frame_payload;
-
-	usbnet_set_skb_tx_stats(skb_out, n);
+	usbnet_set_skb_tx_stats(skb_out, n,
+				ctx->tx_curr_frame_payload - skb_out->len);
 
 	return skb_out;
 
diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c
index 7650cdc8fe6b..953de13267df 100644
--- a/drivers/net/usb/sr9800.c
+++ b/drivers/net/usb/sr9800.c
@@ -144,7 +144,7 @@ static struct sk_buff *sr_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
 		skb_put(skb, sizeof(padbytes));
 	}
 
-	usbnet_set_skb_tx_stats(skb, 1);
+	usbnet_set_skb_tx_stats(skb, 1, 0);
 	return skb;
 }
 
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 0f3ff285f6a1..777757ae1973 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1346,9 +1346,19 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb,
 		} else
 			urb->transfer_flags |= URB_ZERO_PACKET;
 	}
-	entry->length = urb->transfer_buffer_length = length;
-	if (!(info->flags & FLAG_MULTI_PACKET))
-		usbnet_set_skb_tx_stats(skb, 1);
+	urb->transfer_buffer_length = length;
+
+	if (info->flags & FLAG_MULTI_PACKET) {
+		/* Driver has set number of packets and a length delta.
+		 * Calculate the complete length and ensure that it's
+		 * positive.
+		 */
+		entry->length += length;
+		if (WARN_ON_ONCE(entry->length <= 0))
+			entry->length = length;
+	} else {
+		usbnet_set_skb_tx_stats(skb, 1, length);
+	}
 
 	spin_lock_irqsave(&dev->txq.lock, flags);
 	retval = usb_autopm_get_interface_async(dev->intf);
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index ff3fb2bd0e90..6e0ce8c7b8cb 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -227,7 +227,7 @@ struct skb_data {	/* skb->cb is one of these */
 	struct urb		*urb;
 	struct usbnet		*dev;
 	enum skb_state		state;
-	size_t			length;
+	long			length;
 	unsigned long		packets;
 };
 
@@ -235,11 +235,13 @@ struct skb_data {	/* skb->cb is one of these */
  * tx_fixup method before returning an skb.
  */
 static inline void
-usbnet_set_skb_tx_stats(struct sk_buff *skb, unsigned long packets)
+usbnet_set_skb_tx_stats(struct sk_buff *skb,
+			unsigned long packets, long bytes_delta)
 {
 	struct skb_data *entry = (struct skb_data *) skb->cb;
 
 	entry->packets = packets;
+	entry->length = bytes_delta;
 }
 
 extern int usbnet_open(struct net_device *net);
-- 
cgit v1.2.3


From 41d25fe0927aabb1d4b671871a99a55bcd203257 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 25 Mar 2015 15:08:47 -0700
Subject: tcp: tcp_syn_flood_action() can be static

After commit 1fb6f159fd21 ("tcp: add tcp_conn_request"),
tcp_syn_flood_action() is no longer used from IPv6.

We can make it static, by moving it above tcp_conn_request()

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Octavian Purdila <octavian.purdila@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h    |  2 --
 net/ipv4/tcp_input.c | 29 +++++++++++++++++++++++++++++
 net/ipv4/tcp_ipv4.c  | 29 -----------------------------
 3 files changed, 29 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 42690daa924e..963303fb96ae 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -529,8 +529,6 @@ int tcp_write_wakeup(struct sock *);
 void tcp_send_fin(struct sock *sk);
 void tcp_send_active_reset(struct sock *sk, gfp_t priority);
 int tcp_send_synack(struct sock *);
-bool tcp_syn_flood_action(struct sock *sk, const struct sk_buff *skb,
-			  const char *proto);
 void tcp_push_one(struct sock *, unsigned int mss_now);
 void tcp_send_ack(struct sock *sk);
 void tcp_send_delayed_ack(struct sock *sk);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 023196f7ec37..18b80e8bc533 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5987,6 +5987,35 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
 }
 EXPORT_SYMBOL(inet_reqsk_alloc);
 
+/*
+ * Return true if a syncookie should be sent
+ */
+static bool tcp_syn_flood_action(struct sock *sk,
+				 const struct sk_buff *skb,
+				 const char *proto)
+{
+	const char *msg = "Dropping request";
+	bool want_cookie = false;
+	struct listen_sock *lopt;
+
+#ifdef CONFIG_SYN_COOKIES
+	if (sysctl_tcp_syncookies) {
+		msg = "Sending cookies";
+		want_cookie = true;
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
+	} else
+#endif
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
+
+	lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
+	if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) {
+		lopt->synflood_warned = 1;
+		pr_info("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
+			proto, ntohs(tcp_hdr(skb)->dest), msg);
+	}
+	return want_cookie;
+}
+
 int tcp_conn_request(struct request_sock_ops *rsk_ops,
 		     const struct tcp_request_sock_ops *af_ops,
 		     struct sock *sk, struct sk_buff *skb)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index e073517b2cc7..5aababa20a21 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -856,35 +856,6 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
 	kfree(inet_rsk(req)->opt);
 }
 
-/*
- * Return true if a syncookie should be sent
- */
-bool tcp_syn_flood_action(struct sock *sk,
-			 const struct sk_buff *skb,
-			 const char *proto)
-{
-	const char *msg = "Dropping request";
-	bool want_cookie = false;
-	struct listen_sock *lopt;
-
-#ifdef CONFIG_SYN_COOKIES
-	if (sysctl_tcp_syncookies) {
-		msg = "Sending cookies";
-		want_cookie = true;
-		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
-	} else
-#endif
-		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
-
-	lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
-	if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) {
-		lopt->synflood_warned = 1;
-		pr_info("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
-			proto, ntohs(tcp_hdr(skb)->dest), msg);
-	}
-	return want_cookie;
-}
-EXPORT_SYMBOL(tcp_syn_flood_action);
 
 #ifdef CONFIG_TCP_MD5SIG
 /*
-- 
cgit v1.2.3


From 4ad3e3634a6cbe916722c7113c5b488d52c7a3dc Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 27 Mar 2015 14:15:04 +0000
Subject: irqchip: gicv3-its: Fix PROP/PEND and BASE/CBASE confusion

The ITS driver sometime mixes up the use of GICR_PROPBASE bitfields
for the GICR_PENDBASE register, and GITS_BASER for GICR_CBASE.

This does not lead to any observable bug because similar bits are
at the same location, but this just make the code even harder to
understand...

This patch provides the required #defines and fixes the mixup.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Link: https://lkml.kernel.org/r/1427465705-17126-4-git-send-email-marc.zyngier@arm.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 drivers/irqchip/irq-gic-v3-its.c   |  6 +++---
 include/linux/irqchip/arm-gic-v3.h | 13 +++++++++++++
 2 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index fa0c43660c8b..56353f6b5952 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -986,8 +986,8 @@ static void its_cpu_init_lpis(void)
 
 	/* set PENDBASE */
 	val = (page_to_phys(pend_page) |
-	       GICR_PROPBASER_InnerShareable |
-	       GICR_PROPBASER_WaWb);
+	       GICR_PENDBASER_InnerShareable |
+	       GICR_PENDBASER_WaWb);
 
 	writeq_relaxed(val, rbase + GICR_PENDBASER);
 
@@ -1425,7 +1425,7 @@ static int its_probe(struct device_node *node, struct irq_domain *parent)
 	writeq_relaxed(0, its->base + GITS_CWRITER);
 	writel_relaxed(GITS_CTLR_ENABLE, its->base + GITS_CTLR);
 
-	if ((tmp ^ baser) & GITS_BASER_SHAREABILITY_MASK) {
+	if ((tmp ^ baser) & GITS_CBASER_SHAREABILITY_MASK) {
 		pr_info("ITS: using cache flushing for cmd queue\n");
 		its->flags |= ITS_FLAGS_CMDQ_NEEDS_FLUSHING;
 	}
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 781974afff9f..826a4bd63d4a 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -128,6 +128,19 @@
 #define GICR_PROPBASER_RaWaWb		(7U << 7)
 #define GICR_PROPBASER_IDBITS_MASK	(0x1f)
 
+#define GICR_PENDBASER_NonShareable	(0U << 10)
+#define GICR_PENDBASER_InnerShareable	(1U << 10)
+#define GICR_PENDBASER_OuterShareable	(2U << 10)
+#define GICR_PENDBASER_SHAREABILITY_MASK (3UL << 10)
+#define GICR_PENDBASER_nCnB		(0U << 7)
+#define GICR_PENDBASER_nC		(1U << 7)
+#define GICR_PENDBASER_RaWt		(2U << 7)
+#define GICR_PENDBASER_RaWb		(3U << 7)
+#define GICR_PENDBASER_WaWt		(4U << 7)
+#define GICR_PENDBASER_WaWb		(5U << 7)
+#define GICR_PENDBASER_RaWaWt		(6U << 7)
+#define GICR_PENDBASER_RaWaWb		(7U << 7)
+
 /*
  * Re-Distributor registers, offsets from SGI_base
  */
-- 
cgit v1.2.3


From 241a386c7dbb8b0db400a1f92f2ebe3b10eb661d Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 27 Mar 2015 14:15:05 +0000
Subject: irqchip: gicv3-its: Use non-cacheable accesses when no shareability

If the ITS or the redistributors report their shareability as zero,
then it is important to make sure they will no generate any cacheable
traffic, as this is unlikely to produce the expected result.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Link: https://lkml.kernel.org/r/1427465705-17126-5-git-send-email-marc.zyngier@arm.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 drivers/irqchip/irq-gic-v3-its.c   | 47 ++++++++++++++++++++++++++++++++++----
 include/linux/irqchip/arm-gic-v3.h |  4 ++++
 2 files changed, 47 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 56353f6b5952..9687f8afebff 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -802,6 +802,7 @@ static int its_alloc_tables(struct its_node *its)
 	int i;
 	int psz = SZ_64K;
 	u64 shr = GITS_BASER_InnerShareable;
+	u64 cache = GITS_BASER_WaWb;
 
 	for (i = 0; i < GITS_BASER_NR_REGS; i++) {
 		u64 val = readq_relaxed(its->base + GITS_BASER + i * 8);
@@ -848,7 +849,7 @@ retry_baser:
 		val = (virt_to_phys(base) 				 |
 		       (type << GITS_BASER_TYPE_SHIFT)			 |
 		       ((entry_size - 1) << GITS_BASER_ENTRY_SIZE_SHIFT) |
-		       GITS_BASER_WaWb					 |
+		       cache						 |
 		       shr						 |
 		       GITS_BASER_VALID);
 
@@ -874,9 +875,12 @@ retry_baser:
 			 * Shareability didn't stick. Just use
 			 * whatever the read reported, which is likely
 			 * to be the only thing this redistributor
-			 * supports.
+			 * supports. If that's zero, make it
+			 * non-cacheable as well.
 			 */
 			shr = tmp & GITS_BASER_SHAREABILITY_MASK;
+			if (!shr)
+				cache = GITS_BASER_nC;
 			goto retry_baser;
 		}
 
@@ -980,6 +984,17 @@ static void its_cpu_init_lpis(void)
 	tmp = readq_relaxed(rbase + GICR_PROPBASER);
 
 	if ((tmp ^ val) & GICR_PROPBASER_SHAREABILITY_MASK) {
+		if (!(tmp & GICR_PROPBASER_SHAREABILITY_MASK)) {
+			/*
+			 * The HW reports non-shareable, we must
+			 * remove the cacheability attributes as
+			 * well.
+			 */
+			val &= ~(GICR_PROPBASER_SHAREABILITY_MASK |
+				 GICR_PROPBASER_CACHEABILITY_MASK);
+			val |= GICR_PROPBASER_nC;
+			writeq_relaxed(val, rbase + GICR_PROPBASER);
+		}
 		pr_info_once("GIC: using cache flushing for LPI property table\n");
 		gic_rdists->flags |= RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING;
 	}
@@ -990,6 +1005,18 @@ static void its_cpu_init_lpis(void)
 	       GICR_PENDBASER_WaWb);
 
 	writeq_relaxed(val, rbase + GICR_PENDBASER);
+	tmp = readq_relaxed(rbase + GICR_PENDBASER);
+
+	if (!(tmp & GICR_PENDBASER_SHAREABILITY_MASK)) {
+		/*
+		 * The HW reports non-shareable, we must remove the
+		 * cacheability attributes as well.
+		 */
+		val &= ~(GICR_PENDBASER_SHAREABILITY_MASK |
+			 GICR_PENDBASER_CACHEABILITY_MASK);
+		val |= GICR_PENDBASER_nC;
+		writeq_relaxed(val, rbase + GICR_PENDBASER);
+	}
 
 	/* Enable LPIs */
 	val = readl_relaxed(rbase + GICR_CTLR);
@@ -1422,14 +1449,26 @@ static int its_probe(struct device_node *node, struct irq_domain *parent)
 
 	writeq_relaxed(baser, its->base + GITS_CBASER);
 	tmp = readq_relaxed(its->base + GITS_CBASER);
-	writeq_relaxed(0, its->base + GITS_CWRITER);
-	writel_relaxed(GITS_CTLR_ENABLE, its->base + GITS_CTLR);
 
 	if ((tmp ^ baser) & GITS_CBASER_SHAREABILITY_MASK) {
+		if (!(tmp & GITS_CBASER_SHAREABILITY_MASK)) {
+			/*
+			 * The HW reports non-shareable, we must
+			 * remove the cacheability attributes as
+			 * well.
+			 */
+			baser &= ~(GITS_CBASER_SHAREABILITY_MASK |
+				   GITS_CBASER_CACHEABILITY_MASK);
+			baser |= GITS_CBASER_nC;
+			writeq_relaxed(baser, its->base + GITS_CBASER);
+		}
 		pr_info("ITS: using cache flushing for cmd queue\n");
 		its->flags |= ITS_FLAGS_CMDQ_NEEDS_FLUSHING;
 	}
 
+	writeq_relaxed(0, its->base + GITS_CWRITER);
+	writel_relaxed(GITS_CTLR_ENABLE, its->base + GITS_CTLR);
+
 	if (of_property_read_bool(its->msi_chip.of_node, "msi-controller")) {
 		its->domain = irq_domain_add_tree(NULL, &its_domain_ops, its);
 		if (!its->domain) {
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 826a4bd63d4a..ffbc034c8810 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -126,6 +126,7 @@
 #define GICR_PROPBASER_WaWb		(5U << 7)
 #define GICR_PROPBASER_RaWaWt		(6U << 7)
 #define GICR_PROPBASER_RaWaWb		(7U << 7)
+#define GICR_PROPBASER_CACHEABILITY_MASK (7U << 7)
 #define GICR_PROPBASER_IDBITS_MASK	(0x1f)
 
 #define GICR_PENDBASER_NonShareable	(0U << 10)
@@ -140,6 +141,7 @@
 #define GICR_PENDBASER_WaWb		(5U << 7)
 #define GICR_PENDBASER_RaWaWt		(6U << 7)
 #define GICR_PENDBASER_RaWaWb		(7U << 7)
+#define GICR_PENDBASER_CACHEABILITY_MASK (7U << 7)
 
 /*
  * Re-Distributor registers, offsets from SGI_base
@@ -195,6 +197,7 @@
 #define GITS_CBASER_WaWb		(5UL << 59)
 #define GITS_CBASER_RaWaWt		(6UL << 59)
 #define GITS_CBASER_RaWaWb		(7UL << 59)
+#define GITS_CBASER_CACHEABILITY_MASK	(7UL << 59)
 #define GITS_CBASER_NonShareable	(0UL << 10)
 #define GITS_CBASER_InnerShareable	(1UL << 10)
 #define GITS_CBASER_OuterShareable	(2UL << 10)
@@ -211,6 +214,7 @@
 #define GITS_BASER_WaWb			(5UL << 59)
 #define GITS_BASER_RaWaWt		(6UL << 59)
 #define GITS_BASER_RaWaWb		(7UL << 59)
+#define GITS_BASER_CACHEABILITY_MASK	(7UL << 59)
 #define GITS_BASER_TYPE_SHIFT		(56)
 #define GITS_BASER_TYPE(r)		(((r) >> GITS_BASER_TYPE_SHIFT) & 7)
 #define GITS_BASER_ENTRY_SIZE_SHIFT	(48)
-- 
cgit v1.2.3


From 339d82626d225e9b876665e4e89b7eb123e96b3d Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Thu, 26 Mar 2015 18:36:37 -0700
Subject: net: dsa: Add basic framework to support ndo_fdb functions

Provide callbacks for ndo_fdb_add, ndo_fdb_del, and ndo_fdb_dump.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Tested-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h |   6 ++++
 net/dsa/slave.c   | 102 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 108 insertions(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 47917e5e1e12..fbca63ba8f73 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -296,6 +296,12 @@ struct dsa_switch_driver {
 				     u32 br_port_mask);
 	int	(*port_stp_update)(struct dsa_switch *ds, int port,
 				   u8 state);
+	int	(*fdb_add)(struct dsa_switch *ds, int port,
+			   const unsigned char *addr, u16 vid);
+	int	(*fdb_del)(struct dsa_switch *ds, int port,
+			   const unsigned char *addr, u16 vid);
+	int	(*fdb_getnext)(struct dsa_switch *ds, int port,
+			       unsigned char *addr, bool *is_static);
 };
 
 void register_switch_driver(struct dsa_switch_driver *type);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 39555f3f263b..3597724ec3d8 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -201,6 +201,105 @@ out:
 	return 0;
 }
 
+static int dsa_slave_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
+			     struct net_device *dev,
+			     const unsigned char *addr, u16 vid, u16 nlm_flags)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_switch *ds = p->parent;
+	int ret = -EOPNOTSUPP;
+
+	if (ds->drv->fdb_add)
+		ret = ds->drv->fdb_add(ds, p->port, addr, vid);
+
+	return ret;
+}
+
+static int dsa_slave_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
+			     struct net_device *dev,
+			     const unsigned char *addr, u16 vid)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_switch *ds = p->parent;
+	int ret = -EOPNOTSUPP;
+
+	if (ds->drv->fdb_del)
+		ret = ds->drv->fdb_del(ds, p->port, addr, vid);
+
+	return ret;
+}
+
+static int dsa_slave_fill_info(struct net_device *dev, struct sk_buff *skb,
+			       const unsigned char *addr, u16 vid,
+			       bool is_static,
+			       u32 portid, u32 seq, int type,
+			       unsigned int flags)
+{
+	struct nlmsghdr *nlh;
+	struct ndmsg *ndm;
+
+	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags);
+	if (!nlh)
+		return -EMSGSIZE;
+
+	ndm = nlmsg_data(nlh);
+	ndm->ndm_family	 = AF_BRIDGE;
+	ndm->ndm_pad1    = 0;
+	ndm->ndm_pad2    = 0;
+	ndm->ndm_flags	 = NTF_EXT_LEARNED;
+	ndm->ndm_type	 = 0;
+	ndm->ndm_ifindex = dev->ifindex;
+	ndm->ndm_state   = is_static ? NUD_NOARP : NUD_REACHABLE;
+
+	if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr))
+		goto nla_put_failure;
+
+	if (vid && nla_put_u16(skb, NDA_VLAN, vid))
+		goto nla_put_failure;
+
+	nlmsg_end(skb, nlh);
+	return 0;
+
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+}
+
+/* Dump information about entries, in response to GETNEIGH */
+static int dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
+			      struct net_device *dev,
+			      struct net_device *filter_dev, int idx)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_switch *ds = p->parent;
+	unsigned char addr[ETH_ALEN] = { 0 };
+	int ret;
+
+	if (!ds->drv->fdb_getnext)
+		return -EOPNOTSUPP;
+
+	for (; ; idx++) {
+		bool is_static;
+
+		ret = ds->drv->fdb_getnext(ds, p->port, addr, &is_static);
+		if (ret < 0)
+			break;
+
+		if (idx < cb->args[0])
+			continue;
+
+		ret = dsa_slave_fill_info(dev, skb, addr, 0,
+					  is_static,
+					  NETLINK_CB(cb->skb).portid,
+					  cb->nlh->nlmsg_seq,
+					  RTM_NEWNEIGH, NLM_F_MULTI);
+		if (ret < 0)
+			break;
+	}
+
+	return idx;
+}
+
 static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
@@ -572,6 +671,9 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
 	.ndo_change_rx_flags	= dsa_slave_change_rx_flags,
 	.ndo_set_rx_mode	= dsa_slave_set_rx_mode,
 	.ndo_set_mac_address	= dsa_slave_set_mac_address,
+	.ndo_fdb_add		= dsa_slave_fdb_add,
+	.ndo_fdb_del		= dsa_slave_fdb_del,
+	.ndo_fdb_dump		= dsa_slave_fdb_dump,
 	.ndo_do_ioctl		= dsa_slave_ioctl,
 };
 
-- 
cgit v1.2.3


From 608cd71a9c7c9db76e78a792c5a4101e12fea43f Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Thu, 26 Mar 2015 19:53:57 -0700
Subject: tc: bpf: generalize pedit action

existing TC action 'pedit' can munge any bits of the packet.
Generalize it for use in bpf programs attached as cls_bpf and act_bpf via
bpf_skb_store_bytes() helper function.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h      |  1 +
 include/uapi/linux/bpf.h |  1 +
 kernel/bpf/verifier.c    |  2 ++
 net/core/filter.c        | 71 ++++++++++++++++++++++++++++++++++++++++++++++--
 4 files changed, 73 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 280a315de8d6..d5cda067115a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -59,6 +59,7 @@ enum bpf_arg_type {
 	ARG_PTR_TO_STACK,	/* any pointer to eBPF program stack */
 	ARG_CONST_STACK_SIZE,	/* number of bytes accessed from stack */
 
+	ARG_PTR_TO_CTX,		/* pointer to context */
 	ARG_ANYTHING,		/* any (initialized) argument is ok */
 };
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 27dc4ec58840..74aab6e0d964 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -168,6 +168,7 @@ enum bpf_func_id {
 	BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
 	BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */
 	BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */
+	BPF_FUNC_skb_store_bytes, /* int skb_store_bytes(skb, offset, from, len) */
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 0e714f799ec0..630a7bac1e51 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -773,6 +773,8 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
 		expected_type = CONST_IMM;
 	} else if (arg_type == ARG_CONST_MAP_PTR) {
 		expected_type = CONST_PTR_TO_MAP;
+	} else if (arg_type == ARG_PTR_TO_CTX) {
+		expected_type = PTR_TO_CTX;
 	} else {
 		verbose("unsupported arg_type %d\n", arg_type);
 		return -EFAULT;
diff --git a/net/core/filter.c b/net/core/filter.c
index 32f43c59908c..444a07e4f68d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1175,6 +1175,56 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
 	return 0;
 }
 
+static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	struct sk_buff *skb = (struct sk_buff *) (long) r1;
+	unsigned int offset = (unsigned int) r2;
+	void *from = (void *) (long) r3;
+	unsigned int len = (unsigned int) r4;
+	char buf[16];
+	void *ptr;
+
+	/* bpf verifier guarantees that:
+	 * 'from' pointer points to bpf program stack
+	 * 'len' bytes of it were initialized
+	 * 'len' > 0
+	 * 'skb' is a valid pointer to 'struct sk_buff'
+	 *
+	 * so check for invalid 'offset' and too large 'len'
+	 */
+	if (offset > 0xffff || len > sizeof(buf))
+		return -EFAULT;
+
+	if (skb_cloned(skb) && !skb_clone_writable(skb, offset + len))
+		return -EFAULT;
+
+	ptr = skb_header_pointer(skb, offset, len, buf);
+	if (unlikely(!ptr))
+		return -EFAULT;
+
+	skb_postpull_rcsum(skb, ptr, len);
+
+	memcpy(ptr, from, len);
+
+	if (ptr == buf)
+		/* skb_store_bits cannot return -EFAULT here */
+		skb_store_bits(skb, offset, ptr, len);
+
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		skb->csum = csum_add(skb->csum, csum_partial(ptr, len, 0));
+	return 0;
+}
+
+const struct bpf_func_proto bpf_skb_store_bytes_proto = {
+	.func		= bpf_skb_store_bytes,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_PTR_TO_STACK,
+	.arg4_type	= ARG_CONST_STACK_SIZE,
+};
+
 static const struct bpf_func_proto *
 sk_filter_func_proto(enum bpf_func_id func_id)
 {
@@ -1194,6 +1244,17 @@ sk_filter_func_proto(enum bpf_func_id func_id)
 	}
 }
 
+static const struct bpf_func_proto *
+tc_cls_act_func_proto(enum bpf_func_id func_id)
+{
+	switch (func_id) {
+	case BPF_FUNC_skb_store_bytes:
+		return &bpf_skb_store_bytes_proto;
+	default:
+		return sk_filter_func_proto(func_id);
+	}
+}
+
 static bool sk_filter_is_valid_access(int off, int size,
 				      enum bpf_access_type type)
 {
@@ -1270,18 +1331,24 @@ static const struct bpf_verifier_ops sk_filter_ops = {
 	.convert_ctx_access = sk_filter_convert_ctx_access,
 };
 
+static const struct bpf_verifier_ops tc_cls_act_ops = {
+	.get_func_proto = tc_cls_act_func_proto,
+	.is_valid_access = sk_filter_is_valid_access,
+	.convert_ctx_access = sk_filter_convert_ctx_access,
+};
+
 static struct bpf_prog_type_list sk_filter_type __read_mostly = {
 	.ops = &sk_filter_ops,
 	.type = BPF_PROG_TYPE_SOCKET_FILTER,
 };
 
 static struct bpf_prog_type_list sched_cls_type __read_mostly = {
-	.ops = &sk_filter_ops,
+	.ops = &tc_cls_act_ops,
 	.type = BPF_PROG_TYPE_SCHED_CLS,
 };
 
 static struct bpf_prog_type_list sched_act_type __read_mostly = {
-	.ops = &sk_filter_ops,
+	.ops = &tc_cls_act_ops,
 	.type = BPF_PROG_TYPE_SCHED_ACT,
 };
 
-- 
cgit v1.2.3


From f5a7fb88e1f82542ca14ba93a1d4fa35471c60ca Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Fri, 27 Mar 2015 14:31:11 +0900
Subject: vlan: Introduce helper functions to check if skb is tagged

Separate the two checks for single vlan and multiple vlans in
netif_skb_features().  This allows us to move the check for multiple
vlans to another function later.

Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 45 +++++++++++++++++++++++++++++++++++++++++++++
 net/core/dev.c          | 24 ++++++++----------------
 2 files changed, 53 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index b11b28a30b9e..4265d440ec4d 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -561,4 +561,49 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb,
 		skb->protocol = htons(ETH_P_802_2);
 }
 
+/**
+ * skb_vlan_tagged - check if skb is vlan tagged.
+ * @skb: skbuff to query
+ *
+ * Returns true if the skb is tagged, regardless of whether it is hardware
+ * accelerated or not.
+ */
+static inline bool skb_vlan_tagged(const struct sk_buff *skb)
+{
+	if (!skb_vlan_tag_present(skb) &&
+	    likely(skb->protocol != htons(ETH_P_8021Q) &&
+		   skb->protocol != htons(ETH_P_8021AD)))
+		return false;
+
+	return true;
+}
+
+/**
+ * skb_vlan_tagged_multi - check if skb is vlan tagged with multiple headers.
+ * @skb: skbuff to query
+ *
+ * Returns true if the skb is tagged with multiple vlan headers, regardless
+ * of whether it is hardware accelerated or not.
+ */
+static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb)
+{
+	__be16 protocol = skb->protocol;
+
+	if (!skb_vlan_tag_present(skb)) {
+		struct vlan_ethhdr *veh;
+
+		if (likely(protocol != htons(ETH_P_8021Q) &&
+			   protocol != htons(ETH_P_8021AD)))
+			return false;
+
+		veh = (struct vlan_ethhdr *)skb->data;
+		protocol = veh->h_vlan_encapsulated_proto;
+	}
+
+	if (protocol != htons(ETH_P_8021Q) && protocol != htons(ETH_P_8021AD))
+		return false;
+
+	return true;
+}
+
 #endif /* !(_LINUX_IF_VLAN_H_) */
diff --git a/net/core/dev.c b/net/core/dev.c
index a0408d497dae..04bffcd4a48d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2567,7 +2567,6 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
 	struct net_device *dev = skb->dev;
 	netdev_features_t features = dev->features;
 	u16 gso_segs = skb_shinfo(skb)->gso_segs;
-	__be16 protocol = skb->protocol;
 
 	if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs)
 		features &= ~NETIF_F_GSO_MASK;
@@ -2579,22 +2578,15 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
 	if (skb->encapsulation)
 		features &= dev->hw_enc_features;
 
-	if (!skb_vlan_tag_present(skb)) {
-		if (unlikely(protocol == htons(ETH_P_8021Q) ||
-			     protocol == htons(ETH_P_8021AD))) {
-			struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
-			protocol = veh->h_vlan_encapsulated_proto;
-		} else {
-			goto finalize;
-		}
-	}
-
-	features = netdev_intersect_features(features,
-					     dev->vlan_features |
-					     NETIF_F_HW_VLAN_CTAG_TX |
-					     NETIF_F_HW_VLAN_STAG_TX);
+	if (skb_vlan_tagged(skb))
+		features = netdev_intersect_features(features,
+						     dev->vlan_features |
+						     NETIF_F_HW_VLAN_CTAG_TX |
+						     NETIF_F_HW_VLAN_STAG_TX);
+	else
+		goto finalize;
 
-	if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))
+	if (skb_vlan_tagged_multi(skb))
 		features = netdev_intersect_features(features,
 						     NETIF_F_SG |
 						     NETIF_F_HIGHDMA |
-- 
cgit v1.2.3


From 8cb65d00086bfba22bac87ff18b751432fc74003 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Fri, 27 Mar 2015 14:31:12 +0900
Subject: net: Move check for multiple vlans to drivers

To allow drivers to handle the features check for multiple tags,
move the check to ndo_features_check().
As no drivers currently handle multiple tagged TSO, introduce
dflt_features_check() and call it if the driver does not have
ndo_features_check().

Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c |  1 +
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c   |  1 +
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c |  1 +
 include/linux/if_vlan.h                          | 22 ++++++++++++++++++++++
 net/core/dev.c                                   | 21 +++++++++------------
 5 files changed, 34 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 9677431c582a..039b0c1f480e 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -12557,6 +12557,7 @@ static netdev_features_t bnx2x_features_check(struct sk_buff *skb,
 					      struct net_device *dev,
 					      netdev_features_t features)
 {
+	features = vlan_features_check(skb, features);
 	return vxlan_features_check(skb, features);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index a8339e98ad24..ebc93a101c93 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2373,6 +2373,7 @@ static netdev_features_t mlx4_en_features_check(struct sk_buff *skb,
 						struct net_device *dev,
 						netdev_features_t features)
 {
+	features = vlan_features_check(skb, features);
 	return vxlan_features_check(skb, features);
 }
 #endif
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index a430a34a4434..367f3976df56 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -507,6 +507,7 @@ static netdev_features_t qlcnic_features_check(struct sk_buff *skb,
 					       struct net_device *dev,
 					       netdev_features_t features)
 {
+	features = vlan_features_check(skb, features);
 	return vxlan_features_check(skb, features);
 }
 #endif
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 4265d440ec4d..920e4457ce6e 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -606,4 +606,26 @@ static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb)
 	return true;
 }
 
+/**
+ * vlan_features_check - drop unsafe features for skb with multiple tags.
+ * @skb: skbuff to query
+ * @features: features to be checked
+ *
+ * Returns features without unsafe ones if the skb has multiple tags.
+ */
+static inline netdev_features_t vlan_features_check(const struct sk_buff *skb,
+						    netdev_features_t features)
+{
+	if (skb_vlan_tagged_multi(skb))
+		features = netdev_intersect_features(features,
+						     NETIF_F_SG |
+						     NETIF_F_HIGHDMA |
+						     NETIF_F_FRAGLIST |
+						     NETIF_F_GEN_CSUM |
+						     NETIF_F_HW_VLAN_CTAG_TX |
+						     NETIF_F_HW_VLAN_STAG_TX);
+
+	return features;
+}
+
 #endif /* !(_LINUX_IF_VLAN_H_) */
diff --git a/net/core/dev.c b/net/core/dev.c
index 04bffcd4a48d..cb46badbef5a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2562,6 +2562,13 @@ static netdev_features_t harmonize_features(struct sk_buff *skb,
 	return features;
 }
 
+static netdev_features_t dflt_features_check(const struct sk_buff *skb,
+					     struct net_device *dev,
+					     netdev_features_t features)
+{
+	return vlan_features_check(skb, features);
+}
+
 netdev_features_t netif_skb_features(struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
@@ -2583,22 +2590,12 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
 						     dev->vlan_features |
 						     NETIF_F_HW_VLAN_CTAG_TX |
 						     NETIF_F_HW_VLAN_STAG_TX);
-	else
-		goto finalize;
 
-	if (skb_vlan_tagged_multi(skb))
-		features = netdev_intersect_features(features,
-						     NETIF_F_SG |
-						     NETIF_F_HIGHDMA |
-						     NETIF_F_FRAGLIST |
-						     NETIF_F_GEN_CSUM |
-						     NETIF_F_HW_VLAN_CTAG_TX |
-						     NETIF_F_HW_VLAN_STAG_TX);
-
-finalize:
 	if (dev->netdev_ops->ndo_features_check)
 		features &= dev->netdev_ops->ndo_features_check(skb, dev,
 								features);
+	else
+		features &= dflt_features_check(skb, dev, features);
 
 	return harmonize_features(skb, features);
 }
-- 
cgit v1.2.3


From e38f30256b36700aa63aa709dc091bf6eb69c257 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Fri, 27 Mar 2015 14:31:13 +0900
Subject: net: Introduce passthru_features_check

As there are a number of (especially virtual) devices that don't
need the multiple vlan check, introduce passthru_features_check() for
convenience.

Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 3 +++
 net/core/dev.c            | 8 ++++++++
 2 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 08c4ab37189f..967bb4c8caf1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3657,6 +3657,9 @@ void netdev_change_features(struct net_device *dev);
 void netif_stacked_transfer_operstate(const struct net_device *rootdev,
 					struct net_device *dev);
 
+netdev_features_t passthru_features_check(struct sk_buff *skb,
+					  struct net_device *dev,
+					  netdev_features_t features);
 netdev_features_t netif_skb_features(struct sk_buff *skb);
 
 static inline bool net_gso_ok(netdev_features_t features, int gso_type)
diff --git a/net/core/dev.c b/net/core/dev.c
index cb46badbef5a..3a06003ecafd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2562,6 +2562,14 @@ static netdev_features_t harmonize_features(struct sk_buff *skb,
 	return features;
 }
 
+netdev_features_t passthru_features_check(struct sk_buff *skb,
+					  struct net_device *dev,
+					  netdev_features_t features)
+{
+	return features;
+}
+EXPORT_SYMBOL(passthru_features_check);
+
 static netdev_features_t dflt_features_check(const struct sk_buff *skb,
 					     struct net_device *dev,
 					     netdev_features_t features)
-- 
cgit v1.2.3


From a818292952bbfad12ec5a32ab01330cb1ceed013 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Mon, 16 Mar 2015 23:23:34 +0200
Subject: mac80211: convert rssi_callback() to event_callback()

We will be able to add more events, such as MLME events and
others. The low level driver may be interested in knowing
about these events to dump firmware data upon failures, or
to change parameters in case connection attempts fail etc...

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/iwlwifi/dvm/mac80211.c    | 15 ++++----
 drivers/net/wireless/iwlwifi/mvm/coex.c        |  2 +-
 drivers/net/wireless/iwlwifi/mvm/coex_legacy.c |  2 +-
 drivers/net/wireless/iwlwifi/mvm/mvm.h         |  4 +--
 include/net/mac80211.h                         | 48 ++++++++++++++++++++------
 net/mac80211/driver-ops.h                      | 12 +++----
 net/mac80211/mlme.c                            |  9 +++--
 net/mac80211/trace.h                           | 14 ++++----
 8 files changed, 71 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/iwlwifi/dvm/mac80211.c
index 47e64e8b9517..5707ba5ce23f 100644
--- a/drivers/net/wireless/iwlwifi/dvm/mac80211.c
+++ b/drivers/net/wireless/iwlwifi/dvm/mac80211.c
@@ -1129,20 +1129,23 @@ done:
 	IWL_DEBUG_MAC80211(priv, "leave\n");
 }
 
-static void iwlagn_mac_rssi_callback(struct ieee80211_hw *hw,
-				     struct ieee80211_vif *vif,
-				     enum ieee80211_rssi_event rssi_event)
+static void iwlagn_mac_event_callback(struct ieee80211_hw *hw,
+				      struct ieee80211_vif *vif,
+				      const struct ieee80211_event *event)
 {
 	struct iwl_priv *priv = IWL_MAC80211_GET_DVM(hw);
 
+	if (event->type != RSSI_EVENT)
+		return;
+
 	IWL_DEBUG_MAC80211(priv, "enter\n");
 	mutex_lock(&priv->mutex);
 
 	if (priv->lib->bt_params &&
 	    priv->lib->bt_params->advanced_bt_coexist) {
-		if (rssi_event == RSSI_EVENT_LOW)
+		if (event->u.rssi.data == RSSI_EVENT_LOW)
 			priv->bt_enable_pspoll = true;
-		else if (rssi_event == RSSI_EVENT_HIGH)
+		else if (event->u.rssi.data == RSSI_EVENT_HIGH)
 			priv->bt_enable_pspoll = false;
 
 		iwlagn_send_advance_bt_config(priv);
@@ -1613,7 +1616,7 @@ const struct ieee80211_ops iwlagn_hw_ops = {
 	.channel_switch = iwlagn_mac_channel_switch,
 	.flush = iwlagn_mac_flush,
 	.tx_last_beacon = iwlagn_mac_tx_last_beacon,
-	.rssi_callback = iwlagn_mac_rssi_callback,
+	.event_callback = iwlagn_mac_event_callback,
 	.set_tim = iwlagn_mac_set_tim,
 };
 
diff --git a/drivers/net/wireless/iwlwifi/mvm/coex.c b/drivers/net/wireless/iwlwifi/mvm/coex.c
index 1ec4d55155f7..ba0a596f2d0f 100644
--- a/drivers/net/wireless/iwlwifi/mvm/coex.c
+++ b/drivers/net/wireless/iwlwifi/mvm/coex.c
@@ -1023,7 +1023,7 @@ static void iwl_mvm_bt_rssi_iterator(void *_data, u8 *mac,
 }
 
 void iwl_mvm_bt_rssi_event(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
-			   enum ieee80211_rssi_event rssi_event)
+			   enum ieee80211_rssi_event_data rssi_event)
 {
 	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
 	struct iwl_bt_iterator_data data = {
diff --git a/drivers/net/wireless/iwlwifi/mvm/coex_legacy.c b/drivers/net/wireless/iwlwifi/mvm/coex_legacy.c
index d530ef3da107..92c3072bdfff 100644
--- a/drivers/net/wireless/iwlwifi/mvm/coex_legacy.c
+++ b/drivers/net/wireless/iwlwifi/mvm/coex_legacy.c
@@ -1068,7 +1068,7 @@ static void iwl_mvm_bt_rssi_iterator(void *_data, u8 *mac,
 }
 
 void iwl_mvm_bt_rssi_event_old(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
-			       enum ieee80211_rssi_event rssi_event)
+			       enum ieee80211_rssi_event_data rssi_event)
 {
 	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
 	struct iwl_bt_iterator_data data = {
diff --git a/drivers/net/wireless/iwlwifi/mvm/mvm.h b/drivers/net/wireless/iwlwifi/mvm/mvm.h
index 6c69d0584f6c..646ef92e23e8 100644
--- a/drivers/net/wireless/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/iwlwifi/mvm/mvm.h
@@ -1225,7 +1225,7 @@ int iwl_mvm_rx_bt_coex_notif(struct iwl_mvm *mvm,
 			     struct iwl_rx_cmd_buffer *rxb,
 			     struct iwl_device_cmd *cmd);
 void iwl_mvm_bt_rssi_event(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
-			   enum ieee80211_rssi_event rssi_event);
+			   enum ieee80211_rssi_event_data);
 void iwl_mvm_bt_coex_vif_change(struct iwl_mvm *mvm);
 u16 iwl_mvm_coex_agg_time_limit(struct iwl_mvm *mvm,
 				struct ieee80211_sta *sta);
@@ -1246,7 +1246,7 @@ int iwl_mvm_rx_bt_coex_notif_old(struct iwl_mvm *mvm,
 				 struct iwl_rx_cmd_buffer *rxb,
 				 struct iwl_device_cmd *cmd);
 void iwl_mvm_bt_rssi_event_old(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
-			       enum ieee80211_rssi_event rssi_event);
+			       enum ieee80211_rssi_event_data);
 u16 iwl_mvm_coex_agg_time_limit_old(struct iwl_mvm *mvm,
 				    struct ieee80211_sta *sta);
 bool iwl_mvm_bt_coex_is_mimo_allowed_old(struct iwl_mvm *mvm,
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 157c0f151766..7a966f3ed67a 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -301,16 +301,43 @@ enum ieee80211_bss_change {
 #define IEEE80211_BSS_ARP_ADDR_LIST_LEN 4
 
 /**
- * enum ieee80211_rssi_event - RSSI threshold event
- * An indicator for when RSSI goes below/above a certain threshold.
- * @RSSI_EVENT_HIGH: AP's rssi crossed the high threshold set by the driver.
- * @RSSI_EVENT_LOW: AP's rssi crossed the low threshold set by the driver.
+ * enum ieee80211_event_type - event to be notified to the low level driver
+ * @RSSI_EVENT: AP's rssi crossed the a threshold set by the driver.
  */
-enum ieee80211_rssi_event {
+enum ieee80211_event_type {
+	RSSI_EVENT,
+};
+
+/**
+ * enum ieee80211_rssi_event_data - relevant when event type is %RSSI_EVENT
+ * @RSSI_EVENT_HIGH: AP's rssi went below the threshold set by the driver.
+ * @RSSI_EVENT_LOW: AP's rssi went above the threshold set by the driver.
+ */
+enum ieee80211_rssi_event_data {
 	RSSI_EVENT_HIGH,
 	RSSI_EVENT_LOW,
 };
 
+/**
+ * enum ieee80211_rssi_event - data attached to an %RSSI_EVENT
+ * @data: See &enum ieee80211_rssi_event_data
+ */
+struct ieee80211_rssi_event {
+	enum ieee80211_rssi_event_data data;
+};
+
+/**
+ * struct ieee80211_event - event to be sent to the driver
+ * @type The event itself. See &enum ieee80211_event_type.
+ * @rssi: relevant if &type is %RSSI_EVENT
+ */
+struct ieee80211_event {
+	enum ieee80211_event_type type;
+	union {
+		struct ieee80211_rssi_event rssi;
+	} u;
+};
+
 /**
  * struct ieee80211_bss_conf - holds the BSS's changing parameters
  *
@@ -2862,8 +2889,9 @@ enum ieee80211_reconfig_type {
  * @set_bitrate_mask: Set a mask of rates to be used for rate control selection
  *	when transmitting a frame. Currently only legacy rates are handled.
  *	The callback can sleep.
- * @rssi_callback: Notify driver when the average RSSI goes above/below
- *	thresholds that were registered previously. The callback can sleep.
+ * @event_callback: Notify driver about any event in mac80211. See
+ *	&enum ieee80211_event_type for the different types.
+ *	The callback can sleep.
  *
  * @release_buffered_frames: Release buffered frames according to the given
  *	parameters. In the case where the driver buffers some frames for
@@ -3159,9 +3187,9 @@ struct ieee80211_ops {
 	bool (*tx_frames_pending)(struct ieee80211_hw *hw);
 	int (*set_bitrate_mask)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 				const struct cfg80211_bitrate_mask *mask);
-	void (*rssi_callback)(struct ieee80211_hw *hw,
-			      struct ieee80211_vif *vif,
-			      enum ieee80211_rssi_event rssi_event);
+	void (*event_callback)(struct ieee80211_hw *hw,
+			       struct ieee80211_vif *vif,
+			       const struct ieee80211_event *event);
 
 	void (*allow_buffered_frames)(struct ieee80211_hw *hw,
 				      struct ieee80211_sta *sta,
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index fdeda17b8dd2..0a39d3db951a 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -941,13 +941,13 @@ static inline void drv_set_rekey_data(struct ieee80211_local *local,
 	trace_drv_return_void(local);
 }
 
-static inline void drv_rssi_callback(struct ieee80211_local *local,
-				     struct ieee80211_sub_if_data *sdata,
-				     const enum ieee80211_rssi_event event)
+static inline void drv_event_callback(struct ieee80211_local *local,
+				      struct ieee80211_sub_if_data *sdata,
+				      const struct ieee80211_event *event)
 {
-	trace_drv_rssi_callback(local, sdata, event);
-	if (local->ops->rssi_callback)
-		local->ops->rssi_callback(&local->hw, &sdata->vif, event);
+	trace_drv_event_callback(local, sdata, event);
+	if (local->ops->event_callback)
+		local->ops->event_callback(&local->hw, &sdata->vif, event);
 	trace_drv_return_void(local);
 }
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 1999bc08fdcc..a8c8fe4c9f49 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3291,6 +3291,9 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 	    ifmgd->count_beacon_signal >= IEEE80211_SIGNAL_AVE_MIN_COUNT) {
 		int sig = ifmgd->ave_beacon_signal;
 		int last_sig = ifmgd->last_ave_beacon_signal;
+		struct ieee80211_event event = {
+			.type = RSSI_EVENT,
+		};
 
 		/*
 		 * if signal crosses either of the boundaries, invoke callback
@@ -3299,12 +3302,14 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 		if (sig > ifmgd->rssi_max_thold &&
 		    (last_sig <= ifmgd->rssi_min_thold || last_sig == 0)) {
 			ifmgd->last_ave_beacon_signal = sig;
-			drv_rssi_callback(local, sdata, RSSI_EVENT_HIGH);
+			event.u.rssi.data = RSSI_EVENT_HIGH;
+			drv_event_callback(local, sdata, &event);
 		} else if (sig < ifmgd->rssi_min_thold &&
 			   (last_sig >= ifmgd->rssi_max_thold ||
 			   last_sig == 0)) {
 			ifmgd->last_ave_beacon_signal = sig;
-			drv_rssi_callback(local, sdata, RSSI_EVENT_LOW);
+			event.u.rssi.data = RSSI_EVENT_LOW;
+			drv_event_callback(local, sdata, &event);
 		}
 	}
 
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 263a9561eb26..e9e462b349e5 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -1256,28 +1256,28 @@ TRACE_EVENT(drv_set_rekey_data,
 		  LOCAL_PR_ARG, VIF_PR_ARG)
 );
 
-TRACE_EVENT(drv_rssi_callback,
+TRACE_EVENT(drv_event_callback,
 	TP_PROTO(struct ieee80211_local *local,
 		 struct ieee80211_sub_if_data *sdata,
-		 enum ieee80211_rssi_event rssi_event),
+		 const struct ieee80211_event *_event),
 
-	TP_ARGS(local, sdata, rssi_event),
+	TP_ARGS(local, sdata, _event),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
 		VIF_ENTRY
-		__field(u32, rssi_event)
+		__field(u32, type)
 	),
 
 	TP_fast_assign(
 		LOCAL_ASSIGN;
 		VIF_ASSIGN;
-		__entry->rssi_event = rssi_event;
+		__entry->type = _event->type;
 	),
 
 	TP_printk(
-		LOCAL_PR_FMT VIF_PR_FMT " rssi_event:%d",
-		LOCAL_PR_ARG, VIF_PR_ARG, __entry->rssi_event
+		LOCAL_PR_FMT VIF_PR_FMT " event:%d",
+		LOCAL_PR_ARG, VIF_PR_ARG, __entry->type
 	)
 );
 
-- 
cgit v1.2.3


From a9409093d23c822d13a73f8d2df7e6fa987ae485 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Mon, 16 Mar 2015 23:23:35 +0200
Subject: mac80211: notify the driver about authentication status

This can allow the driver to take action based on the
success / failure of the authentication.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 36 ++++++++++++++++++++++++++++++++++++
 net/mac80211/mlme.c    | 15 +++++++++++++++
 2 files changed, 51 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 7a966f3ed67a..6cddf7725bf2 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -303,9 +303,11 @@ enum ieee80211_bss_change {
 /**
  * enum ieee80211_event_type - event to be notified to the low level driver
  * @RSSI_EVENT: AP's rssi crossed the a threshold set by the driver.
+ * @MLME_EVENT: event related to MLME
  */
 enum ieee80211_event_type {
 	RSSI_EVENT,
+	MLME_EVENT,
 };
 
 /**
@@ -326,15 +328,49 @@ struct ieee80211_rssi_event {
 	enum ieee80211_rssi_event_data data;
 };
 
+/**
+ * enum ieee80211_mlme_event_data - relevant when event type is %MLME_EVENT
+ * @AUTH_EVENT: the MLME operation is authentication
+ */
+enum ieee80211_mlme_event_data {
+	AUTH_EVENT,
+};
+
+/**
+ * enum ieee80211_mlme_event_status - relevant when event type is %MLME_EVENT
+ * @MLME_SUCCESS: the MLME operation completed successfully.
+ * @MLME_DENIED: the MLME operation was denied by the peer.
+ * @MLME_TIMEOUT: the MLME operation timed out.
+ */
+enum ieee80211_mlme_event_status {
+	MLME_SUCCESS,
+	MLME_DENIED,
+	MLME_TIMEOUT,
+};
+
+/**
+ * enum ieee80211_mlme_event - data attached to an %MLME_EVENT
+ * @data: See &enum ieee80211_mlme_event_data
+ * @status: See &enum ieee80211_mlme_event_status
+ * @reason: the reason code if applicable
+ */
+struct ieee80211_mlme_event {
+	enum ieee80211_mlme_event_data data;
+	enum ieee80211_mlme_event_status status;
+	u16 reason;
+};
+
 /**
  * struct ieee80211_event - event to be sent to the driver
  * @type The event itself. See &enum ieee80211_event_type.
  * @rssi: relevant if &type is %RSSI_EVENT
+ * @mlme: relevant if &type is %AUTH_EVENT
  */
 struct ieee80211_event {
 	enum ieee80211_event_type type;
 	union {
 		struct ieee80211_rssi_event rssi;
+		struct ieee80211_mlme_event mlme;
 	} u;
 };
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index a8c8fe4c9f49..7865998d69dd 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2495,6 +2495,10 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
 	u8 bssid[ETH_ALEN];
 	u16 auth_alg, auth_transaction, status_code;
 	struct sta_info *sta;
+	struct ieee80211_event event = {
+		.type = MLME_EVENT,
+		.u.mlme.data = AUTH_EVENT,
+	};
 
 	sdata_assert_lock(sdata);
 
@@ -2527,6 +2531,9 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
 			   mgmt->sa, status_code);
 		ieee80211_destroy_auth_data(sdata, false);
 		cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len);
+		event.u.mlme.status = MLME_DENIED;
+		event.u.mlme.reason = status_code;
+		drv_event_callback(sdata->local, sdata, &event);
 		return;
 	}
 
@@ -2549,6 +2556,8 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
 		return;
 	}
 
+	event.u.mlme.status = MLME_SUCCESS;
+	drv_event_callback(sdata->local, sdata, &event);
 	sdata_info(sdata, "authenticated\n");
 	ifmgd->auth_data->done = true;
 	ifmgd->auth_data->timeout = jiffies + IEEE80211_AUTH_WAIT_ASSOC;
@@ -3805,12 +3814,18 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
 			ieee80211_destroy_auth_data(sdata, false);
 		} else if (ieee80211_probe_auth(sdata)) {
 			u8 bssid[ETH_ALEN];
+			struct ieee80211_event event = {
+				.type = MLME_EVENT,
+				.u.mlme.data = AUTH_EVENT,
+				.u.mlme.status = MLME_TIMEOUT,
+			};
 
 			memcpy(bssid, ifmgd->auth_data->bss->bssid, ETH_ALEN);
 
 			ieee80211_destroy_auth_data(sdata, false);
 
 			cfg80211_auth_timeout(sdata->dev, bssid);
+			drv_event_callback(sdata->local, sdata, &event);
 		}
 	} else if (ifmgd->auth_data && ifmgd->auth_data->timeout_started)
 		run_again(sdata, ifmgd->auth_data->timeout);
-- 
cgit v1.2.3


From d0d1a12f9cc665f3fcac5288ab84429a91abd4e9 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Mon, 16 Mar 2015 23:23:36 +0200
Subject: mac80211: notify the driver about association status

This can allow the driver to take action based on the
success / failure of the association.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h |  2 ++
 net/mac80211/mlme.c    | 15 +++++++++++++++
 2 files changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 6cddf7725bf2..dcddc4ac4184 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -331,9 +331,11 @@ struct ieee80211_rssi_event {
 /**
  * enum ieee80211_mlme_event_data - relevant when event type is %MLME_EVENT
  * @AUTH_EVENT: the MLME operation is authentication
+ * @ASSOC_EVENT: the MLME operation is association
  */
 enum ieee80211_mlme_event_data {
 	AUTH_EVENT,
+	ASSOC_EVENT,
 };
 
 /**
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 7865998d69dd..b6817c4c4449 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3041,6 +3041,10 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	u8 *pos;
 	bool reassoc;
 	struct cfg80211_bss *bss;
+	struct ieee80211_event event = {
+		.type = MLME_EVENT,
+		.u.mlme.data = ASSOC_EVENT,
+	};
 
 	sdata_assert_lock(sdata);
 
@@ -3092,6 +3096,9 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 		sdata_info(sdata, "%pM denied association (code=%d)\n",
 			   mgmt->sa, status_code);
 		ieee80211_destroy_assoc_data(sdata, false);
+		event.u.mlme.status = MLME_DENIED;
+		event.u.mlme.reason = status_code;
+		drv_event_callback(sdata->local, sdata, &event);
 	} else {
 		if (!ieee80211_assoc_success(sdata, bss, mgmt, len)) {
 			/* oops -- internal error -- send timeout for now */
@@ -3099,6 +3106,8 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 			cfg80211_assoc_timeout(sdata->dev, bss);
 			return;
 		}
+		event.u.mlme.status = MLME_SUCCESS;
+		drv_event_callback(sdata->local, sdata, &event);
 		sdata_info(sdata, "associated\n");
 
 		/*
@@ -3835,9 +3844,15 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
 		if ((ifmgd->assoc_data->need_beacon && !ifmgd->have_beacon) ||
 		    ieee80211_do_assoc(sdata)) {
 			struct cfg80211_bss *bss = ifmgd->assoc_data->bss;
+			struct ieee80211_event event = {
+				.type = MLME_EVENT,
+				.u.mlme.data = ASSOC_EVENT,
+				.u.mlme.status = MLME_TIMEOUT,
+			};
 
 			ieee80211_destroy_assoc_data(sdata, false);
 			cfg80211_assoc_timeout(sdata->dev, bss);
+			drv_event_callback(sdata->local, sdata, &event);
 		}
 	} else if (ifmgd->assoc_data && ifmgd->assoc_data->timeout_started)
 		run_again(sdata, ifmgd->assoc_data->timeout);
-- 
cgit v1.2.3


From a90faa9d6449f3861428bafeaf4d1fcf2be4ba7f Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Mon, 16 Mar 2015 23:23:37 +0200
Subject: mac80211: notify the driver about deauth

This can allow the driver to take action based on the reason
of the deauth.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h |  4 ++++
 net/mac80211/mlme.c    | 60 +++++++++++++++++++++++++++++++++++---------------
 2 files changed, 46 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index dcddc4ac4184..94c8c62800c3 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -332,10 +332,14 @@ struct ieee80211_rssi_event {
  * enum ieee80211_mlme_event_data - relevant when event type is %MLME_EVENT
  * @AUTH_EVENT: the MLME operation is authentication
  * @ASSOC_EVENT: the MLME operation is association
+ * @DEAUTH_RX_EVENT: deauth received..
+ * @DEAUTH_TX_EVENT: deauth sent.
  */
 enum ieee80211_mlme_event_data {
 	AUTH_EVENT,
 	ASSOC_EVENT,
+	DEAUTH_RX_EVENT,
+	DEAUTH_TX_EVENT,
 };
 
 /**
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index b6817c4c4449..22b125913661 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2345,6 +2345,24 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw,
 }
 EXPORT_SYMBOL(ieee80211_ap_probereq_get);
 
+static void ieee80211_report_disconnect(struct ieee80211_sub_if_data *sdata,
+					const u8 *buf, size_t len, bool tx,
+					u16 reason)
+{
+	struct ieee80211_event event = {
+		.type = MLME_EVENT,
+		.u.mlme.data = tx ? DEAUTH_TX_EVENT : DEAUTH_RX_EVENT,
+		.u.mlme.reason = reason,
+	};
+
+	if (tx)
+		cfg80211_tx_mlme_mgmt(sdata->dev, buf, len);
+	else
+		cfg80211_rx_mlme_mgmt(sdata->dev, buf, len);
+
+	drv_event_callback(sdata->local, sdata, &event);
+}
+
 static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_local *local = sdata->local;
@@ -2370,8 +2388,9 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
 	}
 	mutex_unlock(&local->mtx);
 
-	cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf,
-			      IEEE80211_DEAUTH_FRAME_LEN);
+	ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true,
+				    WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY);
+
 	sdata_unlock(sdata);
 }
 
@@ -2676,7 +2695,7 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
 
 	ieee80211_set_disassoc(sdata, 0, 0, false, NULL);
 
-	cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len);
+	ieee80211_report_disconnect(sdata, (u8 *)mgmt, len, false, reason_code);
 }
 
 
@@ -2702,7 +2721,7 @@ static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
 
 	ieee80211_set_disassoc(sdata, 0, 0, false, NULL);
 
-	cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len);
+	ieee80211_report_disconnect(sdata, (u8 *)mgmt, len, false, reason_code);
 }
 
 static void ieee80211_get_rates(struct ieee80211_supported_band *sband,
@@ -3509,8 +3528,9 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 		ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
 				       WLAN_REASON_DEAUTH_LEAVING,
 				       true, deauth_buf);
-		cfg80211_tx_mlme_mgmt(sdata->dev, deauth_buf,
-				      sizeof(deauth_buf));
+		ieee80211_report_disconnect(sdata, deauth_buf,
+					    sizeof(deauth_buf), true,
+					    WLAN_REASON_DEAUTH_LEAVING);
 		return;
 	}
 
@@ -3628,8 +3648,8 @@ static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata,
 	ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, reason,
 			       tx, frame_buf);
 
-	cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf,
-			      IEEE80211_DEAUTH_FRAME_LEN);
+	ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true,
+				    reason);
 }
 
 static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata)
@@ -4507,8 +4527,9 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
 				       WLAN_REASON_UNSPECIFIED,
 				       false, frame_buf);
 
-		cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf,
-				      sizeof(frame_buf));
+		ieee80211_report_disconnect(sdata, frame_buf,
+					    sizeof(frame_buf), true,
+					    WLAN_REASON_UNSPECIFIED);
 	}
 
 	sdata_info(sdata, "authenticate with %pM\n", req->bss->bssid);
@@ -4608,8 +4629,9 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 				       WLAN_REASON_UNSPECIFIED,
 				       false, frame_buf);
 
-		cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf,
-				      sizeof(frame_buf));
+		ieee80211_report_disconnect(sdata, frame_buf,
+					    sizeof(frame_buf), true,
+					    WLAN_REASON_UNSPECIFIED);
 	}
 
 	if (ifmgd->auth_data && !ifmgd->auth_data->done) {
@@ -4899,8 +4921,9 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 					       req->reason_code, tx,
 					       frame_buf);
 		ieee80211_destroy_auth_data(sdata, false);
-		cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf,
-				      IEEE80211_DEAUTH_FRAME_LEN);
+		ieee80211_report_disconnect(sdata, frame_buf,
+					    sizeof(frame_buf), true,
+					    req->reason_code);
 
 		return 0;
 	}
@@ -4914,8 +4937,9 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 
 		ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
 				       req->reason_code, tx, frame_buf);
-		cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf,
-				      IEEE80211_DEAUTH_FRAME_LEN);
+		ieee80211_report_disconnect(sdata, frame_buf,
+					    sizeof(frame_buf), true,
+					    req->reason_code);
 		return 0;
 	}
 
@@ -4947,8 +4971,8 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
 			       req->reason_code, !req->local_state_change,
 			       frame_buf);
 
-	cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf,
-			      IEEE80211_DEAUTH_FRAME_LEN);
+	ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true,
+				    req->reason_code);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 3a323d4e17dd5a84f6ad036e6f985d263ca973ed Mon Sep 17 00:00:00 2001
From: Luciano Coelho <luciano.coelho@intel.com>
Date: Wed, 18 Mar 2015 10:47:02 +0200
Subject: nl80211: small clarification of the sched_scan delay attribute

Just clarify that the delay is only before the first cycle.

Signed-off-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index ae16ba9cb1e3..241220c43e86 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1754,8 +1754,9 @@ enum nl80211_commands {
  *	should be contained in the result as the sum of the respective counters
  *	over all channels.
  *
- * @NL80211_ATTR_SCHED_SCAN_DELAY: delay before a scheduled scan (or a
- *	WoWLAN net-detect scan) is started, u32 in seconds.
+ * @NL80211_ATTR_SCHED_SCAN_DELAY: delay before the first cycle of a
+ *	scheduled scan (or a WoWLAN net-detect scan) is started, u32
+ *	in seconds.
 
  * @NL80211_ATTR_REG_INDOOR: flag attribute, if set indicates that the device
  *      is operating in an indoor environment.
-- 
cgit v1.2.3


From a38700dd486f3def34cef47d00e2d360a04a7bc8 Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Wed, 18 Mar 2015 08:46:08 +0200
Subject: cfg/mac80211: add regulatory classes IE during TDLS setup

Seems Broadcom TDLS peers (Nexus 5, Xperia Z3) refuse to allow TDLS
connection when channel-switching is supported but the regulatory
classes IE is missing from the setup request.
Add a chandef to reg-class translation function to cfg80211 and use it
to add the required IE during setup. For now add only the current
regulatory class as supported - it is enough to resolve the
compatibility issue.

Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h |  11 +++++
 net/mac80211/tdls.c    |  21 ++++++++
 net/wireless/util.c    | 129 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 161 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 12a6121ea76e..c4d873b8f32d 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4903,6 +4903,17 @@ void cfg80211_ch_switch_started_notify(struct net_device *dev,
 bool ieee80211_operating_class_to_band(u8 operating_class,
 				       enum ieee80211_band *band);
 
+/**
+ * ieee80211_chandef_to_operating_class - convert chandef to operation class
+ *
+ * @chandef: the chandef to convert
+ * @op_class: a pointer to the resulting operating class
+ *
+ * Returns %true if the conversion was successful, %false otherwise.
+ */
+bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef,
+					  u8 *op_class);
+
 /*
  * cfg80211_tdls_oper_request - request userspace to perform TDLS operation
  * @dev: the device on which the operation is requested
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index bc7e4049896f..79ed59acf0d4 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -136,6 +136,24 @@ ieee80211_tdls_add_supp_channels(struct ieee80211_sub_if_data *sdata,
 	*pos = 2 * subband_cnt;
 }
 
+static void ieee80211_tdls_add_oper_classes(struct ieee80211_sub_if_data *sdata,
+					    struct sk_buff *skb)
+{
+	u8 *pos;
+	u8 op_class;
+
+	if (!ieee80211_chandef_to_operating_class(&sdata->vif.bss_conf.chandef,
+						  &op_class))
+		return;
+
+	pos = skb_put(skb, 4);
+	*pos++ = WLAN_EID_SUPPORTED_REGULATORY_CLASSES;
+	*pos++ = 2; /* len */
+
+	*pos++ = op_class;
+	*pos++ = op_class; /* give current operating class as alternate too */
+}
+
 static void ieee80211_tdls_add_bss_coex_ie(struct sk_buff *skb)
 {
 	u8 *pos = (void *)skb_put(skb, 3);
@@ -350,6 +368,8 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata,
 		}
 	}
 
+	ieee80211_tdls_add_oper_classes(sdata, skb);
+
 	/*
 	 * with TDLS we can switch channels, and HT-caps are not necessarily
 	 * the same on all bands. The specification limits the setup to a
@@ -786,6 +806,7 @@ ieee80211_tdls_build_mgmt_packet_data(struct ieee80211_sub_if_data *sdata,
 			       50 + /* supported channels */
 			       3 + /* 40/20 BSS coex */
 			       4 + /* AID */
+			       4 + /* oper classes */
 			       extra_ies_len +
 			       sizeof(struct ieee80211_tdls_lnkie));
 	if (!skb)
diff --git a/net/wireless/util.c b/net/wireless/util.c
index f7b35980af69..f218b151530a 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1314,6 +1314,135 @@ bool ieee80211_operating_class_to_band(u8 operating_class,
 }
 EXPORT_SYMBOL(ieee80211_operating_class_to_band);
 
+bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef,
+					  u8 *op_class)
+{
+	u8 vht_opclass;
+	u16 freq = chandef->center_freq1;
+
+	if (freq >= 2412 && freq <= 2472) {
+		if (chandef->width > NL80211_CHAN_WIDTH_40)
+			return false;
+
+		/* 2.407 GHz, channels 1..13 */
+		if (chandef->width == NL80211_CHAN_WIDTH_40) {
+			if (freq > chandef->chan->center_freq)
+				*op_class = 83; /* HT40+ */
+			else
+				*op_class = 84; /* HT40- */
+		} else {
+			*op_class = 81;
+		}
+
+		return true;
+	}
+
+	if (freq == 2484) {
+		if (chandef->width > NL80211_CHAN_WIDTH_40)
+			return false;
+
+		*op_class = 82; /* channel 14 */
+		return true;
+	}
+
+	switch (chandef->width) {
+	case NL80211_CHAN_WIDTH_80:
+		vht_opclass = 128;
+		break;
+	case NL80211_CHAN_WIDTH_160:
+		vht_opclass = 129;
+		break;
+	case NL80211_CHAN_WIDTH_80P80:
+		vht_opclass = 130;
+		break;
+	case NL80211_CHAN_WIDTH_10:
+	case NL80211_CHAN_WIDTH_5:
+		return false; /* unsupported for now */
+	default:
+		vht_opclass = 0;
+		break;
+	}
+
+	/* 5 GHz, channels 36..48 */
+	if (freq >= 5180 && freq <= 5240) {
+		if (vht_opclass) {
+			*op_class = vht_opclass;
+		} else if (chandef->width == NL80211_CHAN_WIDTH_40) {
+			if (freq > chandef->chan->center_freq)
+				*op_class = 116;
+			else
+				*op_class = 117;
+		} else {
+			*op_class = 115;
+		}
+
+		return true;
+	}
+
+	/* 5 GHz, channels 52..64 */
+	if (freq >= 5260 && freq <= 5320) {
+		if (vht_opclass) {
+			*op_class = vht_opclass;
+		} else if (chandef->width == NL80211_CHAN_WIDTH_40) {
+			if (freq > chandef->chan->center_freq)
+				*op_class = 119;
+			else
+				*op_class = 120;
+		} else {
+			*op_class = 118;
+		}
+
+		return true;
+	}
+
+	/* 5 GHz, channels 100..144 */
+	if (freq >= 5500 && freq <= 5720) {
+		if (vht_opclass) {
+			*op_class = vht_opclass;
+		} else if (chandef->width == NL80211_CHAN_WIDTH_40) {
+			if (freq > chandef->chan->center_freq)
+				*op_class = 122;
+			else
+				*op_class = 123;
+		} else {
+			*op_class = 121;
+		}
+
+		return true;
+	}
+
+	/* 5 GHz, channels 149..169 */
+	if (freq >= 5745 && freq <= 5845) {
+		if (vht_opclass) {
+			*op_class = vht_opclass;
+		} else if (chandef->width == NL80211_CHAN_WIDTH_40) {
+			if (freq > chandef->chan->center_freq)
+				*op_class = 126;
+			else
+				*op_class = 127;
+		} else if (freq <= 5805) {
+			*op_class = 124;
+		} else {
+			*op_class = 125;
+		}
+
+		return true;
+	}
+
+	/* 56.16 GHz, channel 1..4 */
+	if (freq >= 56160 + 2160 * 1 && freq <= 56160 + 2160 * 4) {
+		if (chandef->width >= NL80211_CHAN_WIDTH_40)
+			return false;
+
+		*op_class = 180;
+		return true;
+	}
+
+	/* not supported yet */
+	return false;
+}
+EXPORT_SYMBOL(ieee80211_chandef_to_operating_class);
+
 int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
 				 u32 beacon_int)
 {
-- 
cgit v1.2.3


From 6bab2e19c5ffd1f21b28c2cabb3801a37b77ae69 Mon Sep 17 00:00:00 2001
From: Tom Gundersen <teg@jklm.no>
Date: Wed, 18 Mar 2015 11:13:39 +0100
Subject: cfg80211: pass name_assign_type to rdev_add_virtual_intf()

This will expose in /sys whether the ifname of a device is set by
userspace or generated by the kernel. The latter kind (wlanX, etc)
is not deterministic, so userspace needs to rename these devices
to names that are guaranteed to stay the same between reboots. The
former, however should never be renamed, so userspace needs to be
able to reliably tell the difference.

Similar functionality was introduced for the rtnetlink core in
commit 5517750f058e ("net: rtnetlink - make create_link take name_assign_type")

Signed-off-by: Tom Gundersen <teg@jklm.no>
Cc: Kalle Valo <kvalo@qca.qualcomm.com>
Cc: Brett Rudley <brudley@broadcom.com>
Cc: Arend van Spriel <arend@broadcom.com>
Cc: Franky (Zhenhui) Lin <frankyl@broadcom.com>
Cc: Hante Meuleman <meuleman@broadcom.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
[reformat changelog to fit 72 cols]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath6kl/cfg80211.c         | 6 ++++--
 drivers/net/wireless/ath/ath6kl/cfg80211.h         | 1 +
 drivers/net/wireless/ath/ath6kl/core.c             | 4 ++--
 drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c | 3 ++-
 drivers/net/wireless/brcm80211/brcmfmac/p2p.c      | 3 +++
 drivers/net/wireless/brcm80211/brcmfmac/p2p.h      | 1 +
 drivers/net/wireless/mwifiex/cfg80211.c            | 5 +++--
 drivers/net/wireless/mwifiex/main.c                | 6 +++---
 drivers/net/wireless/mwifiex/main.h                | 1 +
 drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c  | 6 +++++-
 include/net/cfg80211.h                             | 1 +
 net/mac80211/cfg.c                                 | 3 ++-
 net/mac80211/ieee80211_i.h                         | 1 +
 net/mac80211/iface.c                               | 3 ++-
 net/mac80211/main.c                                | 2 +-
 net/wireless/nl80211.c                             | 3 ++-
 net/wireless/rdev-ops.h                            | 5 +++--
 17 files changed, 37 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c
index ff7ba5c195c6..d740f14f3539 100644
--- a/drivers/net/wireless/ath/ath6kl/cfg80211.c
+++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c
@@ -1496,6 +1496,7 @@ static int ath6kl_cfg80211_set_power_mgmt(struct wiphy *wiphy,
 
 static struct wireless_dev *ath6kl_cfg80211_add_iface(struct wiphy *wiphy,
 						      const char *name,
+						      unsigned char name_assign_type,
 						      enum nl80211_iftype type,
 						      u32 *flags,
 						      struct vif_params *params)
@@ -1514,7 +1515,7 @@ static struct wireless_dev *ath6kl_cfg80211_add_iface(struct wiphy *wiphy,
 		return ERR_PTR(-EINVAL);
 	}
 
-	wdev = ath6kl_interface_add(ar, name, type, if_idx, nw_type);
+	wdev = ath6kl_interface_add(ar, name, name_assign_type, type, if_idx, nw_type);
 	if (!wdev)
 		return ERR_PTR(-ENOMEM);
 
@@ -3634,13 +3635,14 @@ void ath6kl_cfg80211_vif_cleanup(struct ath6kl_vif *vif)
 }
 
 struct wireless_dev *ath6kl_interface_add(struct ath6kl *ar, const char *name,
+					  unsigned char name_assign_type,
 					  enum nl80211_iftype type,
 					  u8 fw_vif_idx, u8 nw_type)
 {
 	struct net_device *ndev;
 	struct ath6kl_vif *vif;
 
-	ndev = alloc_netdev(sizeof(*vif), name, NET_NAME_UNKNOWN, ether_setup);
+	ndev = alloc_netdev(sizeof(*vif), name, name_assign_type, ether_setup);
 	if (!ndev)
 		return NULL;
 
diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.h b/drivers/net/wireless/ath/ath6kl/cfg80211.h
index b59becd91aea..5aa57a7639bf 100644
--- a/drivers/net/wireless/ath/ath6kl/cfg80211.h
+++ b/drivers/net/wireless/ath/ath6kl/cfg80211.h
@@ -25,6 +25,7 @@ enum ath6kl_cfg_suspend_mode {
 };
 
 struct wireless_dev *ath6kl_interface_add(struct ath6kl *ar, const char *name,
+					  unsigned char name_assign_type,
 					  enum nl80211_iftype type,
 					  u8 fw_vif_idx, u8 nw_type);
 void ath6kl_cfg80211_ch_switch_notify(struct ath6kl_vif *vif, int freq,
diff --git a/drivers/net/wireless/ath/ath6kl/core.c b/drivers/net/wireless/ath/ath6kl/core.c
index 0df74b245af4..4ec02cea0f43 100644
--- a/drivers/net/wireless/ath/ath6kl/core.c
+++ b/drivers/net/wireless/ath/ath6kl/core.c
@@ -211,8 +211,8 @@ int ath6kl_core_init(struct ath6kl *ar, enum ath6kl_htc_type htc_type)
 	rtnl_lock();
 
 	/* Add an initial station interface */
-	wdev = ath6kl_interface_add(ar, "wlan%d", NL80211_IFTYPE_STATION, 0,
-				    INFRA_NETWORK);
+	wdev = ath6kl_interface_add(ar, "wlan%d", NET_NAME_ENUM,
+				    NL80211_IFTYPE_STATION, 0, INFRA_NETWORK);
 
 	rtnl_unlock();
 
diff --git a/drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c
index b59b8c6c42ab..bcbccba4cdbf 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c
@@ -625,6 +625,7 @@ static bool brcmf_is_ibssmode(struct brcmf_cfg80211_vif *vif)
 
 static struct wireless_dev *brcmf_cfg80211_add_iface(struct wiphy *wiphy,
 						     const char *name,
+						     unsigned char name_assign_type,
 						     enum nl80211_iftype type,
 						     u32 *flags,
 						     struct vif_params *params)
@@ -648,7 +649,7 @@ static struct wireless_dev *brcmf_cfg80211_add_iface(struct wiphy *wiphy,
 	case NL80211_IFTYPE_P2P_CLIENT:
 	case NL80211_IFTYPE_P2P_GO:
 	case NL80211_IFTYPE_P2P_DEVICE:
-		wdev = brcmf_p2p_add_vif(wiphy, name, type, flags, params);
+		wdev = brcmf_p2p_add_vif(wiphy, name, name_assign_type, type, flags, params);
 		if (!IS_ERR(wdev))
 			brcmf_cfg80211_update_proto_addr_mode(wdev);
 		return wdev;
diff --git a/drivers/net/wireless/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/brcm80211/brcmfmac/p2p.c
index effb48ebd864..b44ea85dd9db 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/p2p.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/p2p.c
@@ -2246,11 +2246,13 @@ static void brcmf_p2p_delete_p2pdev(struct brcmf_p2p_info *p2p,
  *
  * @wiphy: wiphy device of new interface.
  * @name: name of the new interface.
+ * @name_assign_type: origin of the interface name
  * @type: nl80211 interface type.
  * @flags: not used.
  * @params: contains mac address for P2P device.
  */
 struct wireless_dev *brcmf_p2p_add_vif(struct wiphy *wiphy, const char *name,
+				       unsigned char name_assign_type,
 				       enum nl80211_iftype type, u32 *flags,
 				       struct vif_params *params)
 {
@@ -2310,6 +2312,7 @@ struct wireless_dev *brcmf_p2p_add_vif(struct wiphy *wiphy, const char *name,
 	}
 
 	strncpy(ifp->ndev->name, name, sizeof(ifp->ndev->name) - 1);
+	ifp->ndev->name_assign_type = name_assign_type;
 	err = brcmf_net_attach(ifp, true);
 	if (err) {
 		brcmf_err("Registering netdevice failed\n");
diff --git a/drivers/net/wireless/brcm80211/brcmfmac/p2p.h b/drivers/net/wireless/brcm80211/brcmfmac/p2p.h
index 6821b26224be..872f382d9e49 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/p2p.h
+++ b/drivers/net/wireless/brcm80211/brcmfmac/p2p.h
@@ -149,6 +149,7 @@ struct brcmf_p2p_info {
 s32 brcmf_p2p_attach(struct brcmf_cfg80211_info *cfg);
 void brcmf_p2p_detach(struct brcmf_p2p_info *p2p);
 struct wireless_dev *brcmf_p2p_add_vif(struct wiphy *wiphy, const char *name,
+				       unsigned char name_assign_type,
 				       enum nl80211_iftype type, u32 *flags,
 				       struct vif_params *params);
 int brcmf_p2p_del_vif(struct wiphy *wiphy, struct wireless_dev *wdev);
diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c
index a47eb55bb6da..2d489bfaea08 100644
--- a/drivers/net/wireless/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/mwifiex/cfg80211.c
@@ -2399,10 +2399,11 @@ mwifiex_setup_ht_caps(struct ieee80211_sta_ht_cap *ht_info,
 
 #define MWIFIEX_MAX_WQ_LEN  30
 /*
- *  create a new virtual interface with the given name
+ *  create a new virtual interface with the given name and name assign type
  */
 struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy,
 					      const char *name,
+					      unsigned char name_assign_type,
 					      enum nl80211_iftype type,
 					      u32 *flags,
 					      struct vif_params *params)
@@ -2523,7 +2524,7 @@ struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy,
 	}
 
 	dev = alloc_netdev_mqs(sizeof(struct mwifiex_private *), name,
-			       NET_NAME_UNKNOWN, ether_setup,
+			       name_assign_type, ether_setup,
 			       IEEE80211_NUM_ACS, 1);
 	if (!dev) {
 		wiphy_err(wiphy, "no memory available for netdevice\n");
diff --git a/drivers/net/wireless/mwifiex/main.c b/drivers/net/wireless/mwifiex/main.c
index 7e74b4fccddd..6f55e84fcab1 100644
--- a/drivers/net/wireless/mwifiex/main.c
+++ b/drivers/net/wireless/mwifiex/main.c
@@ -466,7 +466,7 @@ static void mwifiex_fw_dpc(const struct firmware *firmware, void *context)
 
 	rtnl_lock();
 	/* Create station interface by default */
-	wdev = mwifiex_add_virtual_intf(adapter->wiphy, "mlan%d",
+	wdev = mwifiex_add_virtual_intf(adapter->wiphy, "mlan%d", NET_NAME_ENUM,
 					NL80211_IFTYPE_STATION, NULL, NULL);
 	if (IS_ERR(wdev)) {
 		dev_err(adapter->dev, "cannot create default STA interface\n");
@@ -475,7 +475,7 @@ static void mwifiex_fw_dpc(const struct firmware *firmware, void *context)
 	}
 
 	if (driver_mode & MWIFIEX_DRIVER_MODE_UAP) {
-		wdev = mwifiex_add_virtual_intf(adapter->wiphy, "uap%d",
+		wdev = mwifiex_add_virtual_intf(adapter->wiphy, "uap%d", NET_NAME_ENUM,
 						NL80211_IFTYPE_AP, NULL, NULL);
 		if (IS_ERR(wdev)) {
 			dev_err(adapter->dev, "cannot create AP interface\n");
@@ -485,7 +485,7 @@ static void mwifiex_fw_dpc(const struct firmware *firmware, void *context)
 	}
 
 	if (driver_mode & MWIFIEX_DRIVER_MODE_P2P) {
-		wdev = mwifiex_add_virtual_intf(adapter->wiphy, "p2p%d",
+		wdev = mwifiex_add_virtual_intf(adapter->wiphy, "p2p%d", NET_NAME_ENUM,
 						NL80211_IFTYPE_P2P_CLIENT, NULL,
 						NULL);
 		if (IS_ERR(wdev)) {
diff --git a/drivers/net/wireless/mwifiex/main.h b/drivers/net/wireless/mwifiex/main.h
index f0a6af179af0..3f0625f22265 100644
--- a/drivers/net/wireless/mwifiex/main.h
+++ b/drivers/net/wireless/mwifiex/main.h
@@ -1318,6 +1318,7 @@ u8 mwifiex_chan_type_to_sec_chan_offset(enum nl80211_channel_type chan_type);
 
 struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy,
 					      const char *name,
+					      unsigned char name_assign_type,
 					      enum nl80211_iftype type,
 					      u32 *flags,
 					      struct vif_params *params);
diff --git a/drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c
index 537bd8214efe..a6116fdc8678 100644
--- a/drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c
+++ b/drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c
@@ -2580,6 +2580,7 @@ static const struct net_device_ops rtw_cfg80211_monitor_if_ops = {
 };
 
 static int rtw_cfg80211_add_monitor_if(struct rtw_adapter *padapter, char *name,
+				       unsigned char name_assign_type,
 				       struct net_device **ndev)
 {
 	int ret = 0;
@@ -2612,6 +2613,7 @@ static int rtw_cfg80211_add_monitor_if(struct rtw_adapter *padapter, char *name,
 	mon_ndev->type = ARPHRD_IEEE80211_RADIOTAP;
 	strncpy(mon_ndev->name, name, IFNAMSIZ);
 	mon_ndev->name[IFNAMSIZ - 1] = 0;
+	mon_ndev->name_assign_type = name_assign_type;
 	mon_ndev->destructor = rtw_ndev_destructor;
 
 	mon_ndev->netdev_ops = &rtw_cfg80211_monitor_if_ops;
@@ -2654,6 +2656,7 @@ out:
 
 static struct wireless_dev *
 cfg80211_rtw_add_virtual_intf(struct wiphy *wiphy, const char *name,
+			      unsigned char name_assign_type,
 			      enum nl80211_iftype type, u32 *flags,
 			      struct vif_params *params)
 {
@@ -2673,7 +2676,8 @@ cfg80211_rtw_add_virtual_intf(struct wiphy *wiphy, const char *name,
 		break;
 	case NL80211_IFTYPE_MONITOR:
 		ret =
-		    rtw_cfg80211_add_monitor_if(padapter, (char *)name, &ndev);
+		    rtw_cfg80211_add_monitor_if(padapter, (char *)name,
+						name_assign_type, &ndev);
 		break;
 
 	case NL80211_IFTYPE_P2P_CLIENT:
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index c4d873b8f32d..ab667fbc743d 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2456,6 +2456,7 @@ struct cfg80211_ops {
 
 	struct wireless_dev * (*add_virtual_intf)(struct wiphy *wiphy,
 						  const char *name,
+						  unsigned char name_assign_type,
 						  enum nl80211_iftype type,
 						  u32 *flags,
 						  struct vif_params *params);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index e01cea3b9043..49401238ac39 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -24,6 +24,7 @@
 
 static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy,
 						const char *name,
+						unsigned char name_assign_type,
 						enum nl80211_iftype type,
 						u32 *flags,
 						struct vif_params *params)
@@ -33,7 +34,7 @@ static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy,
 	struct ieee80211_sub_if_data *sdata;
 	int err;
 
-	err = ieee80211_if_add(local, name, &wdev, type, params);
+	err = ieee80211_if_add(local, name, name_assign_type, &wdev, type, params);
 	if (err)
 		return ERR_PTR(err);
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index ebc8135e0aaa..3e3cfe8da4ef 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1578,6 +1578,7 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
 int ieee80211_iface_init(void);
 void ieee80211_iface_exit(void);
 int ieee80211_if_add(struct ieee80211_local *local, const char *name,
+		     unsigned char name_assign_type,
 		     struct wireless_dev **new_wdev, enum nl80211_iftype type,
 		     struct vif_params *params);
 int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 9689d3b1133b..a0cd97fd0c49 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1648,6 +1648,7 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local,
 }
 
 int ieee80211_if_add(struct ieee80211_local *local, const char *name,
+		     unsigned char name_assign_type,
 		     struct wireless_dev **new_wdev, enum nl80211_iftype type,
 		     struct vif_params *params)
 {
@@ -1676,7 +1677,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 			txqs = IEEE80211_NUM_ACS;
 
 		ndev = alloc_netdev_mqs(sizeof(*sdata) + local->hw.vif_data_size,
-					name, NET_NAME_UNKNOWN,
+					name, name_assign_type,
 					ieee80211_if_setup, txqs, 1);
 		if (!ndev)
 			return -ENOMEM;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 2f51e6d1f2b3..4977967c8b00 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1057,7 +1057,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	/* add one default STA interface if supported */
 	if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_STATION) &&
 	    !(hw->flags & IEEE80211_HW_NO_AUTO_VIF)) {
-		result = ieee80211_if_add(local, "wlan%d", NULL,
+		result = ieee80211_if_add(local, "wlan%d", NET_NAME_ENUM, NULL,
 					  NL80211_IFTYPE_STATION, NULL);
 		if (result)
 			wiphy_warn(local->hw.wiphy,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 543dd51342f0..f60ee5b45c0c 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2667,7 +2667,8 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
 
 	wdev = rdev_add_virtual_intf(rdev,
 				nla_data(info->attrs[NL80211_ATTR_IFNAME]),
-				type, err ? NULL : &flags, &params);
+				NET_NAME_USER, type, err ? NULL : &flags,
+				&params);
 	if (WARN_ON(!wdev)) {
 		nlmsg_free(msg);
 		return -EPROTO;
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 35cfb7134bdb..c6e83a7468c0 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -35,13 +35,14 @@ static inline void rdev_set_wakeup(struct cfg80211_registered_device *rdev,
 
 static inline struct wireless_dev
 *rdev_add_virtual_intf(struct cfg80211_registered_device *rdev, char *name,
+		       unsigned char name_assign_type,
 		       enum nl80211_iftype type, u32 *flags,
 		       struct vif_params *params)
 {
 	struct wireless_dev *ret;
 	trace_rdev_add_virtual_intf(&rdev->wiphy, name, type);
-	ret = rdev->ops->add_virtual_intf(&rdev->wiphy, name, type, flags,
-					  params);
+	ret = rdev->ops->add_virtual_intf(&rdev->wiphy, name, name_assign_type,
+					  type, flags, params);
 	trace_rdev_return_wdev(&rdev->wiphy, ret);
 	return ret;
 }
-- 
cgit v1.2.3


From 527871d7206dac2733d0bae52f5a328811d299ee Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sat, 21 Mar 2015 08:09:55 +0100
Subject: mac80211: make sta.wme indicate whether QoS is used

Indicating just the peer's capability is fairly pointless
if the local device doesn't support it. Make the variable
track both combined, and remove the 'local support' check
in the TX path.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 3 ++-
 net/mac80211/cfg.c     | 3 ++-
 net/mac80211/ibss.c    | 2 +-
 net/mac80211/mlme.c    | 2 +-
 net/mac80211/tx.c      | 4 ++--
 5 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 94c8c62800c3..201bc68e0cff 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1557,7 +1557,8 @@ struct ieee80211_sta_rates {
  * @supp_rates: Bitmap of supported rates (per band)
  * @ht_cap: HT capabilities of this STA; restricted to our own capabilities
  * @vht_cap: VHT capabilities of this STA; restricted to our own capabilities
- * @wme: indicates whether the STA supports QoS/WME.
+ * @wme: indicates whether the STA supports QoS/WME (if local devices does,
+ *	otherwise always false)
  * @drv_priv: data area for driver use, will always be aligned to
  *	sizeof(void *), size is determined in hw information.
  * @uapsd_queues: bitmap of queues configured for uapsd. Only valid
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 49401238ac39..301215be59ef 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1073,7 +1073,8 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 			clear_sta_flag(sta, WLAN_STA_SHORT_PREAMBLE);
 	}
 
-	if (mask & BIT(NL80211_STA_FLAG_WME))
+	if (mask & BIT(NL80211_STA_FLAG_WME) &&
+	    local->hw.queues >= IEEE80211_NUM_ACS)
 		sta->sta.wme = set & BIT(NL80211_STA_FLAG_WME);
 
 	if (mask & BIT(NL80211_STA_FLAG_MFP)) {
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 17eda3575d7d..6da4e72f8178 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -1016,7 +1016,7 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
 		}
 	}
 
-	if (sta && elems->wmm_info)
+	if (sta && elems->wmm_info && local->hw.queues >= IEEE80211_NUM_ACS)
 		sta->sta.wme = true;
 
 	if (sta && elems->ht_operation && elems->ht_cap_elem &&
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 8a06b1abc47a..0cbcde11fae3 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2990,7 +2990,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 		sta->sta.mfp = false;
 	}
 
-	sta->sta.wme = elems.wmm_param;
+	sta->sta.wme = elems.wmm_param && local->hw.queues >= IEEE80211_NUM_ACS;
 
 	err = sta_info_move_state(sta, IEEE80211_STA_ASSOC);
 	if (!err && !(ifmgd->flags & IEEE80211_STA_CONTROL_PORT))
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 34b0e7545cc5..018f029d0c95 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2088,8 +2088,8 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
 	if (ieee80211_vif_is_mesh(&sdata->vif))
 		wme_sta = true;
 
-	/* receiver and we are QoS enabled, use a QoS type frame */
-	if (wme_sta && local->hw.queues >= IEEE80211_NUM_ACS) {
+	/* receiver does QoS (which also means we do) use it */
+	if (wme_sta) {
 		fc |= cpu_to_le16(IEEE80211_STYPE_QOS_DATA);
 		hdrlen += 2;
 	}
-- 
cgit v1.2.3


From f3f03330dee0526d82f2a0fd1a79d207ed1ac439 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 30 Mar 2015 18:46:29 +0200
Subject: nfsd: require an explicit option to enable pNFS

Turns out sending out layouts to any client is a bad idea if they
can't get at the storage device, so require explicit admin action
to enable pNFS.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4layouts.c            | 2 +-
 include/uapi/linux/nfsd/export.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 80e236bf79fc..6904213a4363 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -118,7 +118,7 @@ void nfsd4_setup_layout_type(struct svc_export *exp)
 {
 	struct super_block *sb = exp->ex_path.mnt->mnt_sb;
 
-	if (exp->ex_flags & NFSEXP_NOPNFS)
+	if (!(exp->ex_flags & NFSEXP_PNFS))
 		return;
 
 	if (sb->s_export_op->get_uuid &&
diff --git a/include/uapi/linux/nfsd/export.h b/include/uapi/linux/nfsd/export.h
index 4742f2cb42f2..d3bd6ffec041 100644
--- a/include/uapi/linux/nfsd/export.h
+++ b/include/uapi/linux/nfsd/export.h
@@ -47,7 +47,7 @@
  * exported filesystem.
  */
 #define	NFSEXP_V4ROOT		0x10000
-#define NFSEXP_NOPNFS		0x20000
+#define NFSEXP_PNFS		0x20000
 
 /* All flags that we claim to support.  (Note we don't support NOACL.) */
 #define NFSEXP_ALLFLAGS		0x3FE7F
-- 
cgit v1.2.3


From a4368ff3ed3b57e4b5e36d83b75604f68bbcdaad Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Mon, 30 Mar 2015 23:21:01 +0300
Subject: Bluetooth: Refactor L2CAP variables into l2cap_ctrl

We're getting very close to the maximum possible size of bt_skb_cb. To
prepare to shrink the struct with the help of a union this patch moves
all L2CAP related variables into the l2cap_ctrl struct. To later add
other 'ctrl' structs the L2CAP one is renamed simple 'l2cap' instead
of 'control'.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/bluetooth.h |  8 +++----
 net/bluetooth/l2cap_core.c        | 48 +++++++++++++++++++--------------------
 net/bluetooth/l2cap_sock.c        |  6 ++---
 net/bluetooth/smp.c               |  2 +-
 4 files changed, 32 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 33a5e00025aa..d8367cc7c76e 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -269,6 +269,9 @@ struct l2cap_ctrl {
 	__u16	reqseq;
 	__u16	txseq;
 	__u8	retries;
+	__le16  psm;
+	bdaddr_t bdaddr;
+	struct l2cap_chan *chan;
 };
 
 struct hci_dev;
@@ -284,10 +287,7 @@ struct bt_skb_cb {
 	__u8 req_start:1;
 	u8 req_event;
 	hci_req_complete_t req_complete;
-	struct l2cap_chan *chan;
-	struct l2cap_ctrl control;
-	bdaddr_t bdaddr;
-	__le16 psm;
+	struct l2cap_ctrl l2cap;
 };
 #define bt_cb(skb) ((struct bt_skb_cb *)((skb)->cb))
 
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index d69861c89bb5..dad419782a12 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -292,7 +292,7 @@ static struct sk_buff *l2cap_ertm_seq_in_queue(struct sk_buff_head *head,
 	struct sk_buff *skb;
 
 	skb_queue_walk(head, skb) {
-		if (bt_cb(skb)->control.txseq == seq)
+		if (bt_cb(skb)->l2cap.txseq == seq)
 			return skb;
 	}
 
@@ -954,11 +954,11 @@ static inline void __unpack_control(struct l2cap_chan *chan,
 {
 	if (test_bit(FLAG_EXT_CTRL, &chan->flags)) {
 		__unpack_extended_control(get_unaligned_le32(skb->data),
-					  &bt_cb(skb)->control);
+					  &bt_cb(skb)->l2cap);
 		skb_pull(skb, L2CAP_EXT_CTRL_SIZE);
 	} else {
 		__unpack_enhanced_control(get_unaligned_le16(skb->data),
-					  &bt_cb(skb)->control);
+					  &bt_cb(skb)->l2cap);
 		skb_pull(skb, L2CAP_ENH_CTRL_SIZE);
 	}
 }
@@ -1200,8 +1200,8 @@ static void l2cap_move_setup(struct l2cap_chan *chan)
 
 	chan->retry_count = 0;
 	skb_queue_walk(&chan->tx_q, skb) {
-		if (bt_cb(skb)->control.retries)
-			bt_cb(skb)->control.retries = 1;
+		if (bt_cb(skb)->l2cap.retries)
+			bt_cb(skb)->l2cap.retries = 1;
 		else
 			break;
 	}
@@ -1846,8 +1846,8 @@ static void l2cap_streaming_send(struct l2cap_chan *chan,
 
 		skb = skb_dequeue(&chan->tx_q);
 
-		bt_cb(skb)->control.retries = 1;
-		control = &bt_cb(skb)->control;
+		bt_cb(skb)->l2cap.retries = 1;
+		control = &bt_cb(skb)->l2cap;
 
 		control->reqseq = 0;
 		control->txseq = chan->next_tx_seq;
@@ -1891,8 +1891,8 @@ static int l2cap_ertm_send(struct l2cap_chan *chan)
 
 		skb = chan->tx_send_head;
 
-		bt_cb(skb)->control.retries = 1;
-		control = &bt_cb(skb)->control;
+		bt_cb(skb)->l2cap.retries = 1;
+		control = &bt_cb(skb)->l2cap;
 
 		if (test_and_clear_bit(CONN_SEND_FBIT, &chan->conn_state))
 			control->final = 1;
@@ -1963,11 +1963,11 @@ static void l2cap_ertm_resend(struct l2cap_chan *chan)
 			continue;
 		}
 
-		bt_cb(skb)->control.retries++;
-		control = bt_cb(skb)->control;
+		bt_cb(skb)->l2cap.retries++;
+		control = bt_cb(skb)->l2cap;
 
 		if (chan->max_tx != 0 &&
-		    bt_cb(skb)->control.retries > chan->max_tx) {
+		    bt_cb(skb)->l2cap.retries > chan->max_tx) {
 			BT_DBG("Retry limit exceeded (%d)", chan->max_tx);
 			l2cap_send_disconn_req(chan, ECONNRESET);
 			l2cap_seq_list_clear(&chan->retrans_list);
@@ -2045,7 +2045,7 @@ static void l2cap_retransmit_all(struct l2cap_chan *chan,
 
 	if (chan->unacked_frames) {
 		skb_queue_walk(&chan->tx_q, skb) {
-			if (bt_cb(skb)->control.txseq == control->reqseq ||
+			if (bt_cb(skb)->l2cap.txseq == control->reqseq ||
 			    skb == chan->tx_send_head)
 				break;
 		}
@@ -2055,7 +2055,7 @@ static void l2cap_retransmit_all(struct l2cap_chan *chan,
 				break;
 
 			l2cap_seq_list_append(&chan->retrans_list,
-					      bt_cb(skb)->control.txseq);
+					      bt_cb(skb)->l2cap.txseq);
 		}
 
 		l2cap_ertm_resend(chan);
@@ -2267,8 +2267,8 @@ static struct sk_buff *l2cap_create_iframe_pdu(struct l2cap_chan *chan,
 		return ERR_PTR(err);
 	}
 
-	bt_cb(skb)->control.fcs = chan->fcs;
-	bt_cb(skb)->control.retries = 0;
+	bt_cb(skb)->l2cap.fcs = chan->fcs;
+	bt_cb(skb)->l2cap.retries = 0;
 	return skb;
 }
 
@@ -2321,7 +2321,7 @@ static int l2cap_segment_sdu(struct l2cap_chan *chan,
 			return PTR_ERR(skb);
 		}
 
-		bt_cb(skb)->control.sar = sar;
+		bt_cb(skb)->l2cap.sar = sar;
 		__skb_queue_tail(seg_queue, skb);
 
 		len -= pdu_len;
@@ -2856,7 +2856,7 @@ static void l2cap_raw_recv(struct l2cap_conn *conn, struct sk_buff *skb)
 			continue;
 
 		/* Don't send frame to the channel it came from */
-		if (bt_cb(skb)->chan == chan)
+		if (bt_cb(skb)->l2cap.chan == chan)
 			continue;
 
 		nskb = skb_clone(skb, GFP_KERNEL);
@@ -5918,7 +5918,7 @@ static int l2cap_rx_queued_iframes(struct l2cap_chan *chan)
 
 		skb_unlink(skb, &chan->srej_q);
 		chan->buffer_seq = __next_seq(chan, chan->buffer_seq);
-		err = l2cap_reassemble_sdu(chan, skb, &bt_cb(skb)->control);
+		err = l2cap_reassemble_sdu(chan, skb, &bt_cb(skb)->l2cap);
 		if (err)
 			break;
 	}
@@ -5952,7 +5952,7 @@ static void l2cap_handle_srej(struct l2cap_chan *chan,
 		return;
 	}
 
-	if (chan->max_tx != 0 && bt_cb(skb)->control.retries >= chan->max_tx) {
+	if (chan->max_tx != 0 && bt_cb(skb)->l2cap.retries >= chan->max_tx) {
 		BT_DBG("Retry limit exceeded (%d)", chan->max_tx);
 		l2cap_send_disconn_req(chan, ECONNRESET);
 		return;
@@ -6005,7 +6005,7 @@ static void l2cap_handle_rej(struct l2cap_chan *chan,
 	skb = l2cap_ertm_seq_in_queue(&chan->tx_q, control->reqseq);
 
 	if (chan->max_tx && skb &&
-	    bt_cb(skb)->control.retries >= chan->max_tx) {
+	    bt_cb(skb)->l2cap.retries >= chan->max_tx) {
 		BT_DBG("Retry limit exceeded (%d)", chan->max_tx);
 		l2cap_send_disconn_req(chan, ECONNRESET);
 		return;
@@ -6565,7 +6565,7 @@ static int l2cap_stream_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control,
 
 static int l2cap_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb)
 {
-	struct l2cap_ctrl *control = &bt_cb(skb)->control;
+	struct l2cap_ctrl *control = &bt_cb(skb)->l2cap;
 	u16 len;
 	u8 event;
 
@@ -6864,8 +6864,8 @@ static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm,
 		goto drop;
 
 	/* Store remote BD_ADDR and PSM for msg_name */
-	bacpy(&bt_cb(skb)->bdaddr, &hcon->dst);
-	bt_cb(skb)->psm = psm;
+	bacpy(&bt_cb(skb)->l2cap.bdaddr, &hcon->dst);
+	bt_cb(skb)->l2cap.psm = psm;
 
 	if (!chan->ops->recv(chan, skb)) {
 		l2cap_chan_put(chan);
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 9070720eedc8..a7278f05eafb 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1330,7 +1330,7 @@ static struct sk_buff *l2cap_sock_alloc_skb_cb(struct l2cap_chan *chan,
 
 	skb->priority = sk->sk_priority;
 
-	bt_cb(skb)->chan = chan;
+	bt_cb(skb)->l2cap.chan = chan;
 
 	return skb;
 }
@@ -1444,8 +1444,8 @@ static void l2cap_skb_msg_name(struct sk_buff *skb, void *msg_name,
 
 	memset(la, 0, sizeof(struct sockaddr_l2));
 	la->l2_family = AF_BLUETOOTH;
-	la->l2_psm = bt_cb(skb)->psm;
-	bacpy(&la->l2_bdaddr, &bt_cb(skb)->bdaddr);
+	la->l2_psm = bt_cb(skb)->l2cap.psm;
+	bacpy(&la->l2_bdaddr, &bt_cb(skb)->l2cap.bdaddr);
 
 	*msg_namelen = sizeof(struct sockaddr_l2);
 }
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 1ec3f66b5a74..1910c5806974 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -3017,7 +3017,7 @@ static struct sk_buff *smp_alloc_skb_cb(struct l2cap_chan *chan,
 		return ERR_PTR(-ENOMEM);
 
 	skb->priority = HCI_PRIO_MAX;
-	bt_cb(skb)->chan = chan;
+	bt_cb(skb)->l2cap.chan = chan;
 
 	return skb;
 }
-- 
cgit v1.2.3


From db6e3e8d016823c6b0f773c70a69ce65807d8a44 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Mon, 30 Mar 2015 23:21:02 +0300
Subject: Bluetooth: Refactor HCI request variables into own struct

In order to shrink the size of bt_skb_cb, this patch moves the HCI
request related variables into their own req_ctrl struct. Additionall
the L2CAP and HCI request structs are placed inside the same union since
they will never be used at the same time for the same skb.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/bluetooth.h | 14 ++++++++++----
 net/bluetooth/hci_core.c          | 12 ++++++------
 net/bluetooth/hci_event.c         |  4 ++--
 net/bluetooth/hci_request.c       |  6 +++---
 net/bluetooth/hci_sock.c          |  2 +-
 5 files changed, 22 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index d8367cc7c76e..eeaff4b5cb62 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -278,16 +278,22 @@ struct hci_dev;
 
 typedef void (*hci_req_complete_t)(struct hci_dev *hdev, u8 status, u16 opcode);
 
+struct req_ctrl {
+	bool start;
+	u8 event;
+	hci_req_complete_t complete;
+};
+
 struct bt_skb_cb {
 	__u8 pkt_type;
 	__u8 force_active;
 	__u16 opcode;
 	__u16 expect;
 	__u8 incoming:1;
-	__u8 req_start:1;
-	u8 req_event;
-	hci_req_complete_t req_complete;
-	struct l2cap_ctrl l2cap;
+	union {
+		struct l2cap_ctrl l2cap;
+		struct req_ctrl req;
+	};
 };
 #define bt_cb(skb) ((struct bt_skb_cb *)((skb)->cb))
 
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index e6bfeb7b4415..246d7eca5d29 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -3585,7 +3585,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen,
 	/* Stand-alone HCI commands must be flagged as
 	 * single-command requests.
 	 */
-	bt_cb(skb)->req_start = 1;
+	bt_cb(skb)->req.start = true;
 
 	skb_queue_tail(&hdev->cmd_q, skb);
 	queue_work(hdev->workqueue, &hdev->cmd_work);
@@ -4263,7 +4263,7 @@ static bool hci_req_is_complete(struct hci_dev *hdev)
 	if (!skb)
 		return true;
 
-	return bt_cb(skb)->req_start;
+	return bt_cb(skb)->req.start;
 }
 
 static void hci_resend_last(struct hci_dev *hdev)
@@ -4323,14 +4323,14 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status)
 	 * command queue (hdev->cmd_q).
 	 */
 	if (hdev->sent_cmd) {
-		req_complete = bt_cb(hdev->sent_cmd)->req_complete;
+		req_complete = bt_cb(hdev->sent_cmd)->req.complete;
 
 		if (req_complete) {
 			/* We must set the complete callback to NULL to
 			 * avoid calling the callback more than once if
 			 * this function gets called again.
 			 */
-			bt_cb(hdev->sent_cmd)->req_complete = NULL;
+			bt_cb(hdev->sent_cmd)->req.complete = NULL;
 
 			goto call_complete;
 		}
@@ -4339,12 +4339,12 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status)
 	/* Remove all pending commands belonging to this request */
 	spin_lock_irqsave(&hdev->cmd_q.lock, flags);
 	while ((skb = __skb_dequeue(&hdev->cmd_q))) {
-		if (bt_cb(skb)->req_start) {
+		if (bt_cb(skb)->req.start) {
 			__skb_queue_head(&hdev->cmd_q, skb);
 			break;
 		}
 
-		req_complete = bt_cb(skb)->req_complete;
+		req_complete = bt_cb(skb)->req.complete;
 		kfree_skb(skb);
 	}
 	spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 20f34b861426..7c0f992602f5 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -3125,7 +3125,7 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		atomic_set(&hdev->cmd_cnt, 1);
 
 	if (ev->status ||
-	    (hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->req_event))
+	    (hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->req.event))
 		hci_req_cmd_complete(hdev, opcode, ev->status);
 
 	if (atomic_read(&hdev->cmd_cnt) && !skb_queue_empty(&hdev->cmd_q))
@@ -5049,7 +5049,7 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
 
 	skb_pull(skb, HCI_EVENT_HDR_SIZE);
 
-	if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->req_event == event) {
+	if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->req.event == event) {
 		struct hci_command_hdr *cmd_hdr = (void *) hdev->sent_cmd->data;
 		u16 opcode = __le16_to_cpu(cmd_hdr->opcode);
 
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 55e096d20a0f..7e17907effb3 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -55,7 +55,7 @@ int hci_req_run(struct hci_request *req, hci_req_complete_t complete)
 		return -ENODATA;
 
 	skb = skb_peek_tail(&req->cmd_q);
-	bt_cb(skb)->req_complete = complete;
+	bt_cb(skb)->req.complete = complete;
 
 	spin_lock_irqsave(&hdev->cmd_q.lock, flags);
 	skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q);
@@ -116,9 +116,9 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
 	}
 
 	if (skb_queue_empty(&req->cmd_q))
-		bt_cb(skb)->req_start = 1;
+		bt_cb(skb)->req.start = true;
 
-	bt_cb(skb)->req_event = event;
+	bt_cb(skb)->req.event = event;
 
 	skb_queue_tail(&req->cmd_q, skb);
 }
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 85a44a7dc150..56f9edbf3d05 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1164,7 +1164,7 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
 			/* Stand-alone HCI commands must be flagged as
 			 * single-command requests.
 			 */
-			bt_cb(skb)->req_start = 1;
+			bt_cb(skb)->req.start = true;
 
 			skb_queue_tail(&hdev->cmd_q, skb);
 			queue_work(hdev->workqueue, &hdev->cmd_work);
-- 
cgit v1.2.3


From e9637415a92cf25ad800b7fdeddcd30cce7b44ab Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Mon, 30 Mar 2015 13:39:09 -0400
Subject: block: fix blk_stack_limits() regression due to lcm() change

Linux 3.19 commit 69c953c ("lib/lcm.c: lcm(n,0)=lcm(0,n) is 0, not n")
caused blk_stack_limits() to not properly stack queue_limits for stacked
devices (e.g. DM).

Fix this regression by establishing lcm_not_zero() and switching
blk_stack_limits() over to using it.

DM uses blk_set_stacking_limits() to establish the initial top-level
queue_limits that are then built up based on underlying devices' limits
using blk_stack_limits().  In the case of optimal_io_size (io_opt)
blk_set_stacking_limits() establishes a default value of 0.  With commit
69c953c, lcm(0, n) is no longer n, which compromises proper stacking of
the underlying devices' io_opt.

Test:
$ modprobe scsi_debug dev_size_mb=10 num_tgts=1 opt_blks=1536
$ cat /sys/block/sde/queue/optimal_io_size
786432
$ dmsetup create node --table "0 100 linear /dev/sde 0"

Before this fix:
$ cat /sys/block/dm-5/queue/optimal_io_size
0

After this fix:
$ cat /sys/block/dm-5/queue/optimal_io_size
786432

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org # 3.19+
Acked-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-settings.c |  6 +++---
 include/linux/lcm.h  |  1 +
 lib/lcm.c            | 11 +++++++++++
 3 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 6ed2cbe5e8c9..12600bfffca9 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -585,7 +585,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 				     b->physical_block_size);
 
 	t->io_min = max(t->io_min, b->io_min);
-	t->io_opt = lcm(t->io_opt, b->io_opt);
+	t->io_opt = lcm_not_zero(t->io_opt, b->io_opt);
 
 	t->cluster &= b->cluster;
 	t->discard_zeroes_data &= b->discard_zeroes_data;
@@ -616,7 +616,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 		    b->raid_partial_stripes_expensive);
 
 	/* Find lowest common alignment_offset */
-	t->alignment_offset = lcm(t->alignment_offset, alignment)
+	t->alignment_offset = lcm_not_zero(t->alignment_offset, alignment)
 		% max(t->physical_block_size, t->io_min);
 
 	/* Verify that new alignment_offset is on a logical block boundary */
@@ -643,7 +643,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 						      b->max_discard_sectors);
 		t->discard_granularity = max(t->discard_granularity,
 					     b->discard_granularity);
-		t->discard_alignment = lcm(t->discard_alignment, alignment) %
+		t->discard_alignment = lcm_not_zero(t->discard_alignment, alignment) %
 			t->discard_granularity;
 	}
 
diff --git a/include/linux/lcm.h b/include/linux/lcm.h
index 7bf01d779b45..1ce79a7f1daa 100644
--- a/include/linux/lcm.h
+++ b/include/linux/lcm.h
@@ -4,5 +4,6 @@
 #include <linux/compiler.h>
 
 unsigned long lcm(unsigned long a, unsigned long b) __attribute_const__;
+unsigned long lcm_not_zero(unsigned long a, unsigned long b) __attribute_const__;
 
 #endif /* _LCM_H */
diff --git a/lib/lcm.c b/lib/lcm.c
index e97dbd51e756..03d7fcb420b5 100644
--- a/lib/lcm.c
+++ b/lib/lcm.c
@@ -12,3 +12,14 @@ unsigned long lcm(unsigned long a, unsigned long b)
 		return 0;
 }
 EXPORT_SYMBOL_GPL(lcm);
+
+unsigned long lcm_not_zero(unsigned long a, unsigned long b)
+{
+	unsigned long l = lcm(a, b);
+
+	if (l)
+		return l;
+
+	return (b ? : a);
+}
+EXPORT_SYMBOL_GPL(lcm_not_zero);
-- 
cgit v1.2.3


From 92f1719407b90475b3be0b7b9c983dec2ff8351e Mon Sep 17 00:00:00 2001
From: Richard Cochran <richardcochran@gmail.com>
Date: Sun, 29 Mar 2015 23:11:51 +0200
Subject: ptp: introduce get/set time methods with explicit 64 bit seconds.

Converting the PHC drivers over to the new methods is one step along the
way to making them ready for 2038.  Once all the drivers are up to date,
then the old methods will be removed.

Signed-off-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ptp_clock_kernel.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index 0d8ff3fb84ba..7d6f8e66396f 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -64,12 +64,18 @@ struct ptp_clock_request {
  * @adjtime:  Shifts the time of the hardware clock.
  *            parameter delta: Desired change in nanoseconds.
  *
- * @gettime:  Reads the current time from the hardware clock.
+ * @gettime:  Reads the current time from the hardware clock. (deprecated)
  *            parameter ts: Holds the result.
  *
- * @settime:  Set the current time on the hardware clock.
+ * @settime:  Set the current time on the hardware clock. (deprecated)
  *            parameter ts: Time value to set.
  *
+ * @gettime64:  Reads the current time from the hardware clock.
+ *              parameter ts: Holds the result.
+ *
+ * @settime64:  Set the current time on the hardware clock.
+ *              parameter ts: Time value to set.
+ *
  * @enable:   Request driver to enable or disable an ancillary feature.
  *            parameter request: Desired resource to enable or disable.
  *            parameter on: Caller passes one to enable or zero to disable.
@@ -106,6 +112,8 @@ struct ptp_clock_info {
 	int (*adjtime)(struct ptp_clock_info *ptp, s64 delta);
 	int (*gettime)(struct ptp_clock_info *ptp, struct timespec *ts);
 	int (*settime)(struct ptp_clock_info *ptp, const struct timespec *ts);
+	int (*gettime64)(struct ptp_clock_info *ptp, struct timespec64 *ts);
+	int (*settime64)(struct ptp_clock_info *p, const struct timespec64 *ts);
 	int (*enable)(struct ptp_clock_info *ptp,
 		      struct ptp_clock_request *request, int on);
 	int (*verify)(struct ptp_clock_info *ptp, unsigned int pin,
-- 
cgit v1.2.3


From ed7c6317bc599502e1fdc7f5f95cb9a5550360a4 Mon Sep 17 00:00:00 2001
From: Richard Cochran <richardcochran@gmail.com>
Date: Sun, 29 Mar 2015 23:12:13 +0200
Subject: ptp: remove 32 bit get/set methods.

All of the PHC drivers have been converted to the new methods.  This patch
converts the three remaining callers within the core code and removes the
older methods for good.  As a result, the core PHC code is ready for the
year 2038.  However, some of the PHC drivers are not quite ready yet.

Signed-off-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_chardev.c        |  8 +-------
 drivers/ptp/ptp_clock.c          | 15 ++++-----------
 include/linux/ptp_clock_kernel.h |  8 --------
 3 files changed, 5 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
index 95bcf1525a84..da7bae991552 100644
--- a/drivers/ptp/ptp_chardev.c
+++ b/drivers/ptp/ptp_chardev.c
@@ -125,7 +125,6 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 	struct ptp_clock_info *ops = ptp->info;
 	struct ptp_clock_time *pct;
 	struct timespec64 ts;
-	struct timespec t2;
 	int enable, err = 0;
 	unsigned int i, pin_index;
 
@@ -202,12 +201,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 			pct->sec = ts.tv_sec;
 			pct->nsec = ts.tv_nsec;
 			pct++;
-			if (ptp->info->gettime64) {
-				ptp->info->gettime64(ptp->info, &ts);
-			} else {
-				ptp->info->gettime(ptp->info, &t2);
-				ts = timespec_to_timespec64(t2);
-			}
+			ptp->info->gettime64(ptp->info, &ts);
 			pct->sec = ts.tv_sec;
 			pct->nsec = ts.tv_nsec;
 			pct++;
diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index df50d5eeae6f..2e481b9e8ea5 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -109,9 +109,7 @@ static int ptp_clock_settime(struct posix_clock *pc, const struct timespec *tp)
 	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
 	struct timespec64 ts = timespec_to_timespec64(*tp);
 
-	return  ptp->info->settime64 ?
-		ptp->info->settime64(ptp->info, &ts) :
-		ptp->info->settime(ptp->info, tp);
+	return  ptp->info->settime64(ptp->info, &ts);
 }
 
 static int ptp_clock_gettime(struct posix_clock *pc, struct timespec *tp)
@@ -120,14 +118,9 @@ static int ptp_clock_gettime(struct posix_clock *pc, struct timespec *tp)
 	struct timespec64 ts;
 	int err;
 
-	if (ptp->info->gettime64) {
-		err = ptp->info->gettime64(ptp->info, &ts);
-		if (!err)
-			*tp = timespec64_to_timespec(ts);
-	} else {
-		err = ptp->info->gettime(ptp->info, tp);
-	}
-
+	err = ptp->info->gettime64(ptp->info, &ts);
+	if (!err)
+		*tp = timespec64_to_timespec(ts);
 	return err;
 }
 
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index 7d6f8e66396f..b8b73066d137 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -64,12 +64,6 @@ struct ptp_clock_request {
  * @adjtime:  Shifts the time of the hardware clock.
  *            parameter delta: Desired change in nanoseconds.
  *
- * @gettime:  Reads the current time from the hardware clock. (deprecated)
- *            parameter ts: Holds the result.
- *
- * @settime:  Set the current time on the hardware clock. (deprecated)
- *            parameter ts: Time value to set.
- *
  * @gettime64:  Reads the current time from the hardware clock.
  *              parameter ts: Holds the result.
  *
@@ -110,8 +104,6 @@ struct ptp_clock_info {
 	struct ptp_pin_desc *pin_config;
 	int (*adjfreq)(struct ptp_clock_info *ptp, s32 delta);
 	int (*adjtime)(struct ptp_clock_info *ptp, s64 delta);
-	int (*gettime)(struct ptp_clock_info *ptp, struct timespec *ts);
-	int (*settime)(struct ptp_clock_info *ptp, const struct timespec *ts);
 	int (*gettime64)(struct ptp_clock_info *ptp, struct timespec64 *ts);
 	int (*settime64)(struct ptp_clock_info *p, const struct timespec64 *ts);
 	int (*enable)(struct ptp_clock_info *ptp,
-- 
cgit v1.2.3


From 8f55db48608b109ad8c7ff4b946ad39b3189a540 Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Sun, 29 Mar 2015 16:59:23 +0200
Subject: tcp: simplify inetpeer_addr_base use

In many places, the a6 field is typecasted to struct in6_addr. As the
fields are in union anyway, just add in6_addr type to the union and get rid
of the typecasting.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h |  3 ++-
 net/ipv4/tcp_metrics.c | 22 ++++++++--------------
 2 files changed, 10 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 80479abddf73..d5332ddcea3f 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -19,6 +19,7 @@ struct inetpeer_addr_base {
 	union {
 		__be32			a4;
 		__be32			a6[4];
+		struct in6_addr		in6;
 	};
 };
 
@@ -151,7 +152,7 @@ static inline struct inet_peer *inet_getpeer_v6(struct inet_peer_base *base,
 {
 	struct inetpeer_addr daddr;
 
-	*(struct in6_addr *)daddr.addr.a6 = *v6daddr;
+	daddr.addr.in6 = *v6daddr;
 	daddr.family = AF_INET6;
 	return inet_getpeer(base, &daddr, create);
 }
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 5bef3513af77..f62c2c68ced0 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -80,17 +80,11 @@ static void tcp_metric_set(struct tcp_metrics_block *tm,
 static bool addr_same(const struct inetpeer_addr *a,
 		      const struct inetpeer_addr *b)
 {
-	const struct in6_addr *a6, *b6;
-
 	if (a->family != b->family)
 		return false;
 	if (a->family == AF_INET)
 		return a->addr.a4 == b->addr.a4;
-
-	a6 = (const struct in6_addr *) &a->addr.a6[0];
-	b6 = (const struct in6_addr *) &b->addr.a6[0];
-
-	return ipv6_addr_equal(a6, b6);
+	return ipv6_addr_equal(&a->addr.in6, &b->addr.in6);
 }
 
 struct tcpm_hash_bucket {
@@ -256,8 +250,8 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
 		break;
 #if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
-		*(struct in6_addr *)saddr.addr.a6 = inet_rsk(req)->ir_v6_loc_addr;
-		*(struct in6_addr *)daddr.addr.a6 = inet_rsk(req)->ir_v6_rmt_addr;
+		saddr.addr.in6 = inet_rsk(req)->ir_v6_loc_addr;
+		daddr.addr.in6 = inet_rsk(req)->ir_v6_rmt_addr;
 		hash = ipv6_addr_hash(&inet_rsk(req)->ir_v6_rmt_addr);
 		break;
 #endif
@@ -304,9 +298,9 @@ static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock
 			hash = (__force unsigned int) daddr.addr.a4;
 		} else {
 			saddr.family = AF_INET6;
-			*(struct in6_addr *)saddr.addr.a6 = tw->tw_v6_rcv_saddr;
+			saddr.addr.in6 = tw->tw_v6_rcv_saddr;
 			daddr.family = AF_INET6;
-			*(struct in6_addr *)daddr.addr.a6 = tw->tw_v6_daddr;
+			daddr.addr.in6 = tw->tw_v6_daddr;
 			hash = ipv6_addr_hash(&tw->tw_v6_daddr);
 		}
 	}
@@ -354,9 +348,9 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk,
 			hash = (__force unsigned int) daddr.addr.a4;
 		} else {
 			saddr.family = AF_INET6;
-			*(struct in6_addr *)saddr.addr.a6 = sk->sk_v6_rcv_saddr;
+			saddr.addr.in6 = sk->sk_v6_rcv_saddr;
 			daddr.family = AF_INET6;
-			*(struct in6_addr *)daddr.addr.a6 = sk->sk_v6_daddr;
+			daddr.addr.in6 = sk->sk_v6_daddr;
 			hash = ipv6_addr_hash(&sk->sk_v6_daddr);
 		}
 	}
@@ -966,7 +960,7 @@ static int __parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr,
 		addr->family = AF_INET6;
 		memcpy(addr->addr.a6, nla_data(a), sizeof(addr->addr.a6));
 		if (hash)
-			*hash = ipv6_addr_hash((struct in6_addr *) addr->addr.a6);
+			*hash = ipv6_addr_hash(&addr->addr.in6);
 		return 0;
 	}
 	return optional ? 1 : -EAFNOSUPPORT;
-- 
cgit v1.2.3


From 15e318bdc6dfb82914c82fb7ad00badaa8387d8e Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Sun, 29 Mar 2015 16:59:24 +0200
Subject: xfrm: simplify xfrm_address_t use

In many places, the a6 field is typecasted to struct in6_addr. As the
fields are in union anyway, just add in6_addr type to the union and
get rid of the typecasting.

Modifying the uapi header is okay, the union has still the same size.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h         | 6 +++---
 include/uapi/linux/xfrm.h  | 2 ++
 net/ipv6/xfrm6_mode_beet.c | 4 ++--
 net/ipv6/xfrm6_policy.c    | 4 +---
 net/key/af_key.c           | 2 +-
 net/xfrm/xfrm_state.c      | 8 ++++----
 6 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index d0ac7d7be8a7..461f83539493 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1025,7 +1025,7 @@ xfrm_addr_any(const xfrm_address_t *addr, unsigned short family)
 	case AF_INET:
 		return addr->a4 == 0;
 	case AF_INET6:
-		return ipv6_addr_any((struct in6_addr *)&addr->a6);
+		return ipv6_addr_any(&addr->in6);
 	}
 	return 0;
 }
@@ -1238,8 +1238,8 @@ void xfrm_flowi_addr_get(const struct flowi *fl,
 		memcpy(&daddr->a4, &fl->u.ip4.daddr, sizeof(daddr->a4));
 		break;
 	case AF_INET6:
-		*(struct in6_addr *)saddr->a6 = fl->u.ip6.saddr;
-		*(struct in6_addr *)daddr->a6 = fl->u.ip6.daddr;
+		saddr->in6 = fl->u.ip6.saddr;
+		daddr->in6 = fl->u.ip6.daddr;
 		break;
 	}
 }
diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index 02d5125a5ee8..2cd9e608d0d1 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_XFRM_H
 #define _LINUX_XFRM_H
 
+#include <linux/in6.h>
 #include <linux/types.h>
 
 /* All of the structures in this file may not change size as they are
@@ -13,6 +14,7 @@
 typedef union {
 	__be32		a4;
 	__be32		a6[4];
+	struct in6_addr	in6;
 } xfrm_address_t;
 
 /* Ident of a specific xfrm_state. It is used on input to lookup
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index 9949a356d62c..1e205c3253ac 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -95,8 +95,8 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	ip6h = ipv6_hdr(skb);
 	ip6h->payload_len = htons(skb->len - size);
-	ip6h->daddr = *(struct in6_addr *)&x->sel.daddr.a6;
-	ip6h->saddr = *(struct in6_addr *)&x->sel.saddr.a6;
+	ip6h->daddr = x->sel.daddr.in6;
+	ip6h->saddr = x->sel.saddr.in6;
 	err = 0;
 out:
 	return err;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 91d934c22a2a..f337a908a76a 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -61,9 +61,7 @@ static int xfrm6_get_saddr(struct net *net,
 		return -EHOSTUNREACH;
 
 	dev = ip6_dst_idev(dst)->dev;
-	ipv6_dev_get_saddr(dev_net(dev), dev,
-			   (struct in6_addr *)&daddr->a6, 0,
-			   (struct in6_addr *)&saddr->a6);
+	ipv6_dev_get_saddr(dev_net(dev), dev, &daddr->in6, 0, &saddr->in6);
 	dst_release(dst);
 	return 0;
 }
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 9255fd9d94bc..f0d52d721b3a 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -709,7 +709,7 @@ static unsigned int pfkey_sockaddr_fill(const xfrm_address_t *xaddr, __be16 port
 		sin6->sin6_family = AF_INET6;
 		sin6->sin6_port = port;
 		sin6->sin6_flowinfo = 0;
-		sin6->sin6_addr = *(struct in6_addr *)xaddr->a6;
+		sin6->sin6_addr = xaddr->in6;
 		sin6->sin6_scope_id = 0;
 		return 128;
 	    }
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index de971b6d38c5..f5e39e35d73a 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1043,12 +1043,12 @@ static struct xfrm_state *__find_acq_core(struct net *net,
 			break;
 
 		case AF_INET6:
-			*(struct in6_addr *)x->sel.daddr.a6 = *(struct in6_addr *)daddr;
-			*(struct in6_addr *)x->sel.saddr.a6 = *(struct in6_addr *)saddr;
+			x->sel.daddr.in6 = daddr->in6;
+			x->sel.saddr.in6 = saddr->in6;
 			x->sel.prefixlen_d = 128;
 			x->sel.prefixlen_s = 128;
-			*(struct in6_addr *)x->props.saddr.a6 = *(struct in6_addr *)saddr;
-			*(struct in6_addr *)x->id.daddr.a6 = *(struct in6_addr *)daddr;
+			x->props.saddr.in6 = saddr->in6;
+			x->id.daddr.in6 = daddr->in6;
 			break;
 		}
 
-- 
cgit v1.2.3


From 930345ea630405aa6e6f42efcb149c3f360a6b67 Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Sun, 29 Mar 2015 16:59:25 +0200
Subject: netlink: implement nla_put_in_addr and nla_put_in6_addr

IP addresses are often stored in netlink attributes. Add generic functions
to do that.

For nla_put_in_addr, it would be nicer to pass struct in_addr but this is
not used universally throughout the kernel, in way too many places __be32 is
used to store IPv4 address.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c                            | 22 +++++++++----------
 include/linux/netfilter/ipset/ip_set.h         |  5 ++---
 include/net/netlink.h                          | 29 ++++++++++++++++++++++++++
 net/ipv4/devinet.c                             |  6 +++---
 net/ipv4/fib_rules.c                           |  4 ++--
 net/ipv4/fib_semantics.c                       |  8 +++----
 net/ipv4/ip_gre.c                              |  4 ++--
 net/ipv4/ip_vti.c                              |  4 ++--
 net/ipv4/ipip.c                                |  4 ++--
 net/ipv4/ipmr.c                                |  4 ++--
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c |  4 ++--
 net/ipv4/route.c                               |  8 +++----
 net/ipv4/tcp_metrics.c                         | 16 +++++++-------
 net/ipv6/addrconf.c                            | 10 ++++-----
 net/ipv6/addrlabel.c                           |  2 +-
 net/ipv6/fib6_rules.c                          |  6 ++----
 net/ipv6/ip6_gre.c                             |  4 ++--
 net/ipv6/ip6_tunnel.c                          |  6 ++----
 net/ipv6/ip6_vti.c                             |  6 ++----
 net/ipv6/ip6mr.c                               |  4 ++--
 net/ipv6/ndisc.c                               |  3 +--
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |  6 ++----
 net/ipv6/route.c                               | 14 ++++++-------
 net/ipv6/sit.c                                 | 12 +++++------
 net/l2tp/l2tp_netlink.c                        | 14 +++++++------
 net/netlabel/netlabel_mgmt.c                   | 20 +++++++-----------
 net/netlabel/netlabel_unlabeled.c              | 28 +++++++++++--------------
 net/openvswitch/flow_netlink.c                 |  6 ++++--
 net/wireless/nl80211.c                         |  4 ++--
 29 files changed, 139 insertions(+), 124 deletions(-)

(limited to 'include')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 1c80b67c688d..86f085f95408 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -187,9 +187,9 @@ static int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
 			      const union vxlan_addr *ip)
 {
 	if (ip->sa.sa_family == AF_INET6)
-		return nla_put(skb, attr, sizeof(struct in6_addr), &ip->sin6.sin6_addr);
+		return nla_put_in6_addr(skb, attr, &ip->sin6.sin6_addr);
 	else
-		return nla_put_be32(skb, attr, ip->sin.sin_addr.s_addr);
+		return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr);
 }
 
 #else /* !CONFIG_IPV6 */
@@ -226,7 +226,7 @@ static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
 static int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
 			      const union vxlan_addr *ip)
 {
-	return nla_put_be32(skb, attr, ip->sin.sin_addr.s_addr);
+	return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr);
 }
 #endif
 
@@ -2807,13 +2807,13 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 
 	if (!vxlan_addr_any(&dst->remote_ip)) {
 		if (dst->remote_ip.sa.sa_family == AF_INET) {
-			if (nla_put_be32(skb, IFLA_VXLAN_GROUP,
-					 dst->remote_ip.sin.sin_addr.s_addr))
+			if (nla_put_in_addr(skb, IFLA_VXLAN_GROUP,
+					    dst->remote_ip.sin.sin_addr.s_addr))
 				goto nla_put_failure;
 #if IS_ENABLED(CONFIG_IPV6)
 		} else {
-			if (nla_put(skb, IFLA_VXLAN_GROUP6, sizeof(struct in6_addr),
-				    &dst->remote_ip.sin6.sin6_addr))
+			if (nla_put_in6_addr(skb, IFLA_VXLAN_GROUP6,
+					     &dst->remote_ip.sin6.sin6_addr))
 				goto nla_put_failure;
 #endif
 		}
@@ -2824,13 +2824,13 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 
 	if (!vxlan_addr_any(&vxlan->saddr)) {
 		if (vxlan->saddr.sa.sa_family == AF_INET) {
-			if (nla_put_be32(skb, IFLA_VXLAN_LOCAL,
-					 vxlan->saddr.sin.sin_addr.s_addr))
+			if (nla_put_in_addr(skb, IFLA_VXLAN_LOCAL,
+					    vxlan->saddr.sin.sin_addr.s_addr))
 				goto nla_put_failure;
 #if IS_ENABLED(CONFIG_IPV6)
 		} else {
-			if (nla_put(skb, IFLA_VXLAN_LOCAL6, sizeof(struct in6_addr),
-				    &vxlan->saddr.sin6.sin6_addr))
+			if (nla_put_in6_addr(skb, IFLA_VXLAN_LOCAL6,
+					     &vxlan->saddr.sin6.sin6_addr))
 				goto nla_put_failure;
 #endif
 		}
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index f1606fa6132d..34b172301558 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -483,7 +483,7 @@ static inline int nla_put_ipaddr4(struct sk_buff *skb, int type, __be32 ipaddr)
 
 	if (!__nested)
 		return -EMSGSIZE;
-	ret = nla_put_net32(skb, IPSET_ATTR_IPADDR_IPV4, ipaddr);
+	ret = nla_put_in_addr(skb, IPSET_ATTR_IPADDR_IPV4, ipaddr);
 	if (!ret)
 		ipset_nest_end(skb, __nested);
 	return ret;
@@ -497,8 +497,7 @@ static inline int nla_put_ipaddr6(struct sk_buff *skb, int type,
 
 	if (!__nested)
 		return -EMSGSIZE;
-	ret = nla_put(skb, IPSET_ATTR_IPADDR_IPV6,
-		      sizeof(struct in6_addr), ipaddrptr);
+	ret = nla_put_in6_addr(skb, IPSET_ATTR_IPADDR_IPV6, ipaddrptr);
 	if (!ret)
 		ipset_nest_end(skb, __nested);
 	return ret;
diff --git a/include/net/netlink.h b/include/net/netlink.h
index e010ee8da41d..17fc76e5b05e 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -4,6 +4,7 @@
 #include <linux/types.h>
 #include <linux/netlink.h>
 #include <linux/jiffies.h>
+#include <linux/in6.h>
 
 /* ========================================================================
  *         Netlink Messages and Attributes Interface (As Seen On TV)
@@ -105,6 +106,8 @@
  *   nla_put_string(skb, type, str)	add string attribute to skb
  *   nla_put_flag(skb, type)		add flag attribute to skb
  *   nla_put_msecs(skb, type, jiffies)	add msecs attribute to skb
+ *   nla_put_in_addr(skb, type, addr)	add IPv4 address attribute to skb
+ *   nla_put_in6_addr(skb, type, addr)	add IPv6 address attribute to skb
  *
  * Nested Attributes Construction:
  *   nla_nest_start(skb, type)		start a nested attribute
@@ -956,6 +959,32 @@ static inline int nla_put_msecs(struct sk_buff *skb, int attrtype,
 	return nla_put(skb, attrtype, sizeof(u64), &tmp);
 }
 
+/**
+ * nla_put_in_addr - Add an IPv4 address netlink attribute to a socket
+ * buffer
+ * @skb: socket buffer to add attribute to
+ * @attrtype: attribute type
+ * @addr: IPv4 address
+ */
+static inline int nla_put_in_addr(struct sk_buff *skb, int attrtype,
+				  __be32 addr)
+{
+	return nla_put_be32(skb, attrtype, addr);
+}
+
+/**
+ * nla_put_in6_addr - Add an IPv6 address netlink attribute to a socket
+ * buffer
+ * @skb: socket buffer to add attribute to
+ * @attrtype: attribute type
+ * @addr: IPv6 address
+ */
+static inline int nla_put_in6_addr(struct sk_buff *skb, int attrtype,
+				   const struct in6_addr *addr)
+{
+	return nla_put(skb, attrtype, sizeof(*addr), addr);
+}
+
 /**
  * nla_get_u32 - return payload of u32 attribute
  * @nla: u32 netlink attribute
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 975ee5e30c64..66cd85973056 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1541,11 +1541,11 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
 		valid = INFINITY_LIFE_TIME;
 	}
 	if ((ifa->ifa_address &&
-	     nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
+	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
 	    (ifa->ifa_local &&
-	     nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
+	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
 	    (ifa->ifa_broadcast &&
-	     nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
+	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
 	    (ifa->ifa_label[0] &&
 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index e9bc5e42cf43..edfea0deec43 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -279,9 +279,9 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 	frh->tos = rule4->tos;
 
 	if ((rule4->dst_len &&
-	     nla_put_be32(skb, FRA_DST, rule4->dst)) ||
+	     nla_put_in_addr(skb, FRA_DST, rule4->dst)) ||
 	    (rule4->src_len &&
-	     nla_put_be32(skb, FRA_SRC, rule4->src)))
+	     nla_put_in_addr(skb, FRA_SRC, rule4->src)))
 		goto nla_put_failure;
 #ifdef CONFIG_IP_ROUTE_CLASSID
 	if (rule4->tclassid &&
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 66c1e4fbf884..453b24e5322c 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1015,7 +1015,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 	rtm->rtm_protocol = fi->fib_protocol;
 
 	if (rtm->rtm_dst_len &&
-	    nla_put_be32(skb, RTA_DST, dst))
+	    nla_put_in_addr(skb, RTA_DST, dst))
 		goto nla_put_failure;
 	if (fi->fib_priority &&
 	    nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority))
@@ -1024,11 +1024,11 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 		goto nla_put_failure;
 
 	if (fi->fib_prefsrc &&
-	    nla_put_be32(skb, RTA_PREFSRC, fi->fib_prefsrc))
+	    nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc))
 		goto nla_put_failure;
 	if (fi->fib_nhs == 1) {
 		if (fi->fib_nh->nh_gw &&
-		    nla_put_be32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw))
+		    nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw))
 			goto nla_put_failure;
 		if (fi->fib_nh->nh_oif &&
 		    nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif))
@@ -1058,7 +1058,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 			rtnh->rtnh_ifindex = nh->nh_oif;
 
 			if (nh->nh_gw &&
-			    nla_put_be32(skb, RTA_GATEWAY, nh->nh_gw))
+			    nla_put_in_addr(skb, RTA_GATEWAY, nh->nh_gw))
 				goto nla_put_failure;
 #ifdef CONFIG_IP_ROUTE_CLASSID
 			if (nh->nh_tclassid &&
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 6207275fc749..2e878df46075 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -776,8 +776,8 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	    nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
 	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
 	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
-	    nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
-	    nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
+	    nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
+	    nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
 	    nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
 	    nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
 	    nla_put_u8(skb, IFLA_GRE_PMTUDISC,
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 94efe148181c..f189f2a8aaa5 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -505,8 +505,8 @@ static int vti_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	nla_put_u32(skb, IFLA_VTI_LINK, p->link);
 	nla_put_be32(skb, IFLA_VTI_IKEY, p->i_key);
 	nla_put_be32(skb, IFLA_VTI_OKEY, p->o_key);
-	nla_put_be32(skb, IFLA_VTI_LOCAL, p->iph.saddr);
-	nla_put_be32(skb, IFLA_VTI_REMOTE, p->iph.daddr);
+	nla_put_in_addr(skb, IFLA_VTI_LOCAL, p->iph.saddr);
+	nla_put_in_addr(skb, IFLA_VTI_REMOTE, p->iph.daddr);
 
 	return 0;
 }
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 915d215a7d14..17df8d38bbbd 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -450,8 +450,8 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	struct ip_tunnel_parm *parm = &tunnel->parms;
 
 	if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
-	    nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
-	    nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
+	    nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
+	    nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
 	    nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
 	    nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
 	    nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index c688cd1b2110..b4a545d24adb 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -2281,8 +2281,8 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 		rtm->rtm_protocol = RTPROT_MROUTED;
 	rtm->rtm_flags    = 0;
 
-	if (nla_put_be32(skb, RTA_SRC, c->mfc_origin) ||
-	    nla_put_be32(skb, RTA_DST, c->mfc_mcastgrp))
+	if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) ||
+	    nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp))
 		goto nla_put_failure;
 	err = __ipmr_fill_mroute(mrt, skb, c, rtm);
 	/* do not break the dump if cache is unresolved */
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 5c61328b7704..b36ebfc6b812 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -322,8 +322,8 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
 static int ipv4_tuple_to_nlattr(struct sk_buff *skb,
 				const struct nf_conntrack_tuple *tuple)
 {
-	if (nla_put_be32(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) ||
-	    nla_put_be32(skb, CTA_IP_V4_DST, tuple->dst.u3.ip))
+	if (nla_put_in_addr(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) ||
+	    nla_put_in_addr(skb, CTA_IP_V4_DST, tuple->dst.u3.ip))
 		goto nla_put_failure;
 	return 0;
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index be8703d02ef0..1f147204f1f3 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2319,11 +2319,11 @@ static int rt_fill_info(struct net *net,  __be32 dst, __be32 src,
 	if (IPCB(skb)->flags & IPSKB_DOREDIRECT)
 		r->rtm_flags |= RTCF_DOREDIRECT;
 
-	if (nla_put_be32(skb, RTA_DST, dst))
+	if (nla_put_in_addr(skb, RTA_DST, dst))
 		goto nla_put_failure;
 	if (src) {
 		r->rtm_src_len = 32;
-		if (nla_put_be32(skb, RTA_SRC, src))
+		if (nla_put_in_addr(skb, RTA_SRC, src))
 			goto nla_put_failure;
 	}
 	if (rt->dst.dev &&
@@ -2336,11 +2336,11 @@ static int rt_fill_info(struct net *net,  __be32 dst, __be32 src,
 #endif
 	if (!rt_is_input_route(rt) &&
 	    fl4->saddr != src) {
-		if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr))
+		if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr))
 			goto nla_put_failure;
 	}
 	if (rt->rt_uses_gateway &&
-	    nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway))
+	    nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gateway))
 		goto nla_put_failure;
 
 	expires = rt->dst.expires;
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index f62c2c68ced0..32e36ea6bc0f 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -786,19 +786,19 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
 
 	switch (tm->tcpm_daddr.family) {
 	case AF_INET:
-		if (nla_put_be32(msg, TCP_METRICS_ATTR_ADDR_IPV4,
-				tm->tcpm_daddr.addr.a4) < 0)
+		if (nla_put_in_addr(msg, TCP_METRICS_ATTR_ADDR_IPV4,
+				    tm->tcpm_daddr.addr.a4) < 0)
 			goto nla_put_failure;
-		if (nla_put_be32(msg, TCP_METRICS_ATTR_SADDR_IPV4,
-				tm->tcpm_saddr.addr.a4) < 0)
+		if (nla_put_in_addr(msg, TCP_METRICS_ATTR_SADDR_IPV4,
+				    tm->tcpm_saddr.addr.a4) < 0)
 			goto nla_put_failure;
 		break;
 	case AF_INET6:
-		if (nla_put(msg, TCP_METRICS_ATTR_ADDR_IPV6, 16,
-			    tm->tcpm_daddr.addr.a6) < 0)
+		if (nla_put_in6_addr(msg, TCP_METRICS_ATTR_ADDR_IPV6,
+				     &tm->tcpm_daddr.addr.in6) < 0)
 			goto nla_put_failure;
-		if (nla_put(msg, TCP_METRICS_ATTR_SADDR_IPV6, 16,
-			    tm->tcpm_saddr.addr.a6) < 0)
+		if (nla_put_in6_addr(msg, TCP_METRICS_ATTR_SADDR_IPV6,
+				     &tm->tcpm_saddr.addr.in6) < 0)
 			goto nla_put_failure;
 		break;
 	default:
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e86f7434e3c3..5c9e94cb1b2c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4237,11 +4237,11 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
 	}
 
 	if (!ipv6_addr_any(&ifa->peer_addr)) {
-		if (nla_put(skb, IFA_LOCAL, 16, &ifa->addr) < 0 ||
-		    nla_put(skb, IFA_ADDRESS, 16, &ifa->peer_addr) < 0)
+		if (nla_put_in6_addr(skb, IFA_LOCAL, &ifa->addr) < 0 ||
+		    nla_put_in6_addr(skb, IFA_ADDRESS, &ifa->peer_addr) < 0)
 			goto error;
 	} else
-		if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0)
+		if (nla_put_in6_addr(skb, IFA_ADDRESS, &ifa->addr) < 0)
 			goto error;
 
 	if (put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0)
@@ -4273,7 +4273,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
 		return -EMSGSIZE;
 
 	put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
-	if (nla_put(skb, IFA_MULTICAST, 16, &ifmca->mca_addr) < 0 ||
+	if (nla_put_in6_addr(skb, IFA_MULTICAST, &ifmca->mca_addr) < 0 ||
 	    put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp,
 			  INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) {
 		nlmsg_cancel(skb, nlh);
@@ -4299,7 +4299,7 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
 		return -EMSGSIZE;
 
 	put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
-	if (nla_put(skb, IFA_ANYCAST, 16, &ifaca->aca_addr) < 0 ||
+	if (nla_put_in6_addr(skb, IFA_ANYCAST, &ifaca->aca_addr) < 0 ||
 	    put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp,
 			  INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) {
 		nlmsg_cancel(skb, nlh);
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 3cc50e2d3bf5..882124ebb438 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -477,7 +477,7 @@ static int ip6addrlbl_fill(struct sk_buff *skb,
 
 	ip6addrlbl_putmsg(nlh, p->prefixlen, p->ifindex, lseq);
 
-	if (nla_put(skb, IFAL_ADDRESS, 16, &p->prefix) < 0 ||
+	if (nla_put_in6_addr(skb, IFAL_ADDRESS, &p->prefix) < 0 ||
 	    nla_put_u32(skb, IFAL_LABEL, p->label) < 0) {
 		nlmsg_cancel(skb, nlh);
 		return -EMSGSIZE;
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 273eb26cd6d4..d313bfd88512 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -250,11 +250,9 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 	frh->tos = rule6->tclass;
 
 	if ((rule6->dst.plen &&
-	     nla_put(skb, FRA_DST, sizeof(struct in6_addr),
-		     &rule6->dst.addr)) ||
+	     nla_put_in6_addr(skb, FRA_DST, &rule6->dst.addr)) ||
 	    (rule6->src.plen &&
-	     nla_put(skb, FRA_SRC, sizeof(struct in6_addr),
-		     &rule6->src.addr)))
+	     nla_put_in6_addr(skb, FRA_SRC, &rule6->src.addr)))
 		goto nla_put_failure;
 	return 0;
 
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 67e014d88e55..f61f7ad2d045 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1622,8 +1622,8 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	    nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
 	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
 	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
-	    nla_put(skb, IFLA_GRE_LOCAL, sizeof(struct in6_addr), &p->laddr) ||
-	    nla_put(skb, IFLA_GRE_REMOTE, sizeof(struct in6_addr), &p->raddr) ||
+	    nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) ||
+	    nla_put_in6_addr(skb, IFLA_GRE_REMOTE, &p->raddr) ||
 	    nla_put_u8(skb, IFLA_GRE_TTL, p->hop_limit) ||
 	    /*nla_put_u8(skb, IFLA_GRE_TOS, t->priority) ||*/
 	    nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) ||
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 0c68012b6d6e..80543d13ea7c 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1739,10 +1739,8 @@ static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	struct __ip6_tnl_parm *parm = &tunnel->parms;
 
 	if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
-	    nla_put(skb, IFLA_IPTUN_LOCAL, sizeof(struct in6_addr),
-		    &parm->laddr) ||
-	    nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr),
-		    &parm->raddr) ||
+	    nla_put_in6_addr(skb, IFLA_IPTUN_LOCAL, &parm->laddr) ||
+	    nla_put_in6_addr(skb, IFLA_IPTUN_REMOTE, &parm->raddr) ||
 	    nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) ||
 	    nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) ||
 	    nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) ||
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 1ec5b4a530d0..87a262b0f07b 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -983,10 +983,8 @@ static int vti6_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	struct __ip6_tnl_parm *parm = &tunnel->parms;
 
 	if (nla_put_u32(skb, IFLA_VTI_LINK, parm->link) ||
-	    nla_put(skb, IFLA_VTI_LOCAL, sizeof(struct in6_addr),
-		    &parm->laddr) ||
-	    nla_put(skb, IFLA_VTI_REMOTE, sizeof(struct in6_addr),
-		    &parm->raddr) ||
+	    nla_put_in6_addr(skb, IFLA_VTI_LOCAL, &parm->laddr) ||
+	    nla_put_in6_addr(skb, IFLA_VTI_REMOTE, &parm->raddr) ||
 	    nla_put_be32(skb, IFLA_VTI_IKEY, parm->i_key) ||
 	    nla_put_be32(skb, IFLA_VTI_OKEY, parm->o_key))
 		goto nla_put_failure;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index ff883c9d0e3c..caf6b99374e6 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -2378,8 +2378,8 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
 		rtm->rtm_protocol = RTPROT_MROUTED;
 	rtm->rtm_flags    = 0;
 
-	if (nla_put(skb, RTA_SRC, 16, &c->mf6c_origin) ||
-	    nla_put(skb, RTA_DST, 16, &c->mf6c_mcastgrp))
+	if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
+	    nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
 		goto nla_put_failure;
 	err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
 	/* do not break the dump if cache is unresolved */
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 482dfb9f0f7e..c283827d60e2 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1049,8 +1049,7 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
 
 	memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3);
 
-	if (nla_put(skb, NDUSEROPT_SRCADDR, sizeof(struct in6_addr),
-		    &ipv6_hdr(ra)->saddr))
+	if (nla_put_in6_addr(skb, NDUSEROPT_SRCADDR, &ipv6_hdr(ra)->saddr))
 		goto nla_put_failure;
 	nlmsg_end(skb, nlh);
 
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index b68d0e59c1f8..78284a697439 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -290,10 +290,8 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
 static int ipv6_tuple_to_nlattr(struct sk_buff *skb,
 				const struct nf_conntrack_tuple *tuple)
 {
-	if (nla_put(skb, CTA_IP_V6_SRC, sizeof(u_int32_t) * 4,
-		    &tuple->src.u3.ip6) ||
-	    nla_put(skb, CTA_IP_V6_DST, sizeof(u_int32_t) * 4,
-		    &tuple->dst.u3.ip6))
+	if (nla_put_in6_addr(skb, CTA_IP_V6_SRC, &tuple->src.u3.in6) ||
+	    nla_put_in6_addr(skb, CTA_IP_V6_DST, &tuple->dst.u3.in6))
 		goto nla_put_failure;
 	return 0;
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index fe742fa0f7ff..385e9bd4f218 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2669,19 +2669,19 @@ static int rt6_fill_node(struct net *net,
 		rtm->rtm_flags |= RTM_F_CLONED;
 
 	if (dst) {
-		if (nla_put(skb, RTA_DST, 16, dst))
+		if (nla_put_in6_addr(skb, RTA_DST, dst))
 			goto nla_put_failure;
 		rtm->rtm_dst_len = 128;
 	} else if (rtm->rtm_dst_len)
-		if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
+		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
 			goto nla_put_failure;
 #ifdef CONFIG_IPV6_SUBTREES
 	if (src) {
-		if (nla_put(skb, RTA_SRC, 16, src))
+		if (nla_put_in6_addr(skb, RTA_SRC, src))
 			goto nla_put_failure;
 		rtm->rtm_src_len = 128;
 	} else if (rtm->rtm_src_len &&
-		   nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
+		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
 		goto nla_put_failure;
 #endif
 	if (iif) {
@@ -2705,14 +2705,14 @@ static int rt6_fill_node(struct net *net,
 	} else if (dst) {
 		struct in6_addr saddr_buf;
 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
-		    nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
+		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
 			goto nla_put_failure;
 	}
 
 	if (rt->rt6i_prefsrc.plen) {
 		struct in6_addr saddr_buf;
 		saddr_buf = rt->rt6i_prefsrc.addr;
-		if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
+		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
 			goto nla_put_failure;
 	}
 
@@ -2720,7 +2720,7 @@ static int rt6_fill_node(struct net *net,
 		goto nla_put_failure;
 
 	if (rt->rt6i_flags & RTF_GATEWAY) {
-		if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
+		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
 			goto nla_put_failure;
 	}
 
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 92692a7e8a2b..0e2bb538a556 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1683,8 +1683,8 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	struct ip_tunnel_parm *parm = &tunnel->parms;
 
 	if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
-	    nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
-	    nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
+	    nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
+	    nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
 	    nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
 	    nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
 	    nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
@@ -1694,10 +1694,10 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev)
 		goto nla_put_failure;
 
 #ifdef CONFIG_IPV6_SIT_6RD
-	if (nla_put(skb, IFLA_IPTUN_6RD_PREFIX, sizeof(struct in6_addr),
-		    &tunnel->ip6rd.prefix) ||
-	    nla_put_be32(skb, IFLA_IPTUN_6RD_RELAY_PREFIX,
-			 tunnel->ip6rd.relay_prefix) ||
+	if (nla_put_in6_addr(skb, IFLA_IPTUN_6RD_PREFIX,
+			     &tunnel->ip6rd.prefix) ||
+	    nla_put_in_addr(skb, IFLA_IPTUN_6RD_RELAY_PREFIX,
+			    tunnel->ip6rd.relay_prefix) ||
 	    nla_put_u16(skb, IFLA_IPTUN_6RD_PREFIXLEN,
 			tunnel->ip6rd.prefixlen) ||
 	    nla_put_u16(skb, IFLA_IPTUN_6RD_RELAY_PREFIXLEN,
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index b4e923f77954..a4f78d36bace 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -376,15 +376,17 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
 	case L2TP_ENCAPTYPE_IP:
 #if IS_ENABLED(CONFIG_IPV6)
 		if (np) {
-			if (nla_put(skb, L2TP_ATTR_IP6_SADDR, sizeof(np->saddr),
-				    &np->saddr) ||
-			    nla_put(skb, L2TP_ATTR_IP6_DADDR, sizeof(sk->sk_v6_daddr),
-				    &sk->sk_v6_daddr))
+			if (nla_put_in6_addr(skb, L2TP_ATTR_IP6_SADDR,
+					     &np->saddr) ||
+			    nla_put_in6_addr(skb, L2TP_ATTR_IP6_DADDR,
+					     &sk->sk_v6_daddr))
 				goto nla_put_failure;
 		} else
 #endif
-		if (nla_put_be32(skb, L2TP_ATTR_IP_SADDR, inet->inet_saddr) ||
-		    nla_put_be32(skb, L2TP_ATTR_IP_DADDR, inet->inet_daddr))
+		if (nla_put_in_addr(skb, L2TP_ATTR_IP_SADDR,
+				    inet->inet_saddr) ||
+		    nla_put_in_addr(skb, L2TP_ATTR_IP_DADDR,
+				    inet->inet_daddr))
 			goto nla_put_failure;
 		break;
 	}
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 70440748fe5c..13f777f20995 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -293,15 +293,13 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb,
 				return -ENOMEM;
 
 			addr_struct.s_addr = iter4->addr;
-			ret_val = nla_put(skb, NLBL_MGMT_A_IPV4ADDR,
-					  sizeof(struct in_addr),
-					  &addr_struct);
+			ret_val = nla_put_in_addr(skb, NLBL_MGMT_A_IPV4ADDR,
+						  addr_struct.s_addr);
 			if (ret_val != 0)
 				return ret_val;
 			addr_struct.s_addr = iter4->mask;
-			ret_val = nla_put(skb, NLBL_MGMT_A_IPV4MASK,
-					  sizeof(struct in_addr),
-					  &addr_struct);
+			ret_val = nla_put_in_addr(skb, NLBL_MGMT_A_IPV4MASK,
+						  addr_struct.s_addr);
 			if (ret_val != 0)
 				return ret_val;
 			map4 = netlbl_domhsh_addr4_entry(iter4);
@@ -328,14 +326,12 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb,
 			if (nla_b == NULL)
 				return -ENOMEM;
 
-			ret_val = nla_put(skb, NLBL_MGMT_A_IPV6ADDR,
-					  sizeof(struct in6_addr),
-					  &iter6->addr);
+			ret_val = nla_put_in6_addr(skb, NLBL_MGMT_A_IPV6ADDR,
+						   &iter6->addr);
 			if (ret_val != 0)
 				return ret_val;
-			ret_val = nla_put(skb, NLBL_MGMT_A_IPV6MASK,
-					  sizeof(struct in6_addr),
-					  &iter6->mask);
+			ret_val = nla_put_in6_addr(skb, NLBL_MGMT_A_IPV6MASK,
+						   &iter6->mask);
 			if (ret_val != 0)
 				return ret_val;
 			map6 = netlbl_domhsh_addr6_entry(iter6);
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index aec7994f78cf..b0380927f05f 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -1117,34 +1117,30 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd,
 		struct in_addr addr_struct;
 
 		addr_struct.s_addr = addr4->list.addr;
-		ret_val = nla_put(cb_arg->skb,
-				  NLBL_UNLABEL_A_IPV4ADDR,
-				  sizeof(struct in_addr),
-				  &addr_struct);
+		ret_val = nla_put_in_addr(cb_arg->skb,
+					  NLBL_UNLABEL_A_IPV4ADDR,
+					  addr_struct.s_addr);
 		if (ret_val != 0)
 			goto list_cb_failure;
 
 		addr_struct.s_addr = addr4->list.mask;
-		ret_val = nla_put(cb_arg->skb,
-				  NLBL_UNLABEL_A_IPV4MASK,
-				  sizeof(struct in_addr),
-				  &addr_struct);
+		ret_val = nla_put_in_addr(cb_arg->skb,
+					  NLBL_UNLABEL_A_IPV4MASK,
+					  addr_struct.s_addr);
 		if (ret_val != 0)
 			goto list_cb_failure;
 
 		secid = addr4->secid;
 	} else {
-		ret_val = nla_put(cb_arg->skb,
-				  NLBL_UNLABEL_A_IPV6ADDR,
-				  sizeof(struct in6_addr),
-				  &addr6->list.addr);
+		ret_val = nla_put_in6_addr(cb_arg->skb,
+					   NLBL_UNLABEL_A_IPV6ADDR,
+					   &addr6->list.addr);
 		if (ret_val != 0)
 			goto list_cb_failure;
 
-		ret_val = nla_put(cb_arg->skb,
-				  NLBL_UNLABEL_A_IPV6MASK,
-				  sizeof(struct in6_addr),
-				  &addr6->list.mask);
+		ret_val = nla_put_in6_addr(cb_arg->skb,
+					   NLBL_UNLABEL_A_IPV6MASK,
+					   &addr6->list.mask);
 		if (ret_val != 0)
 			goto list_cb_failure;
 
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 22b18c145c92..c0c5b5519f45 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -648,10 +648,12 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
 	    nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
 		return -EMSGSIZE;
 	if (output->ipv4_src &&
-	    nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src))
+	    nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
+			    output->ipv4_src))
 		return -EMSGSIZE;
 	if (output->ipv4_dst &&
-	    nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst))
+	    nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
+			    output->ipv4_dst))
 		return -EMSGSIZE;
 	if (output->ipv4_tos &&
 	    nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos))
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index d6ba4a6bbff6..6a4a4d7db1fc 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -8761,8 +8761,8 @@ static int nl80211_send_wowlan_tcp(struct sk_buff *msg,
 	if (!nl_tcp)
 		return -ENOBUFS;
 
-	if (nla_put_be32(msg, NL80211_WOWLAN_TCP_SRC_IPV4, tcp->src) ||
-	    nla_put_be32(msg, NL80211_WOWLAN_TCP_DST_IPV4, tcp->dst) ||
+	if (nla_put_in_addr(msg, NL80211_WOWLAN_TCP_SRC_IPV4, tcp->src) ||
+	    nla_put_in_addr(msg, NL80211_WOWLAN_TCP_DST_IPV4, tcp->dst) ||
 	    nla_put(msg, NL80211_WOWLAN_TCP_DST_MAC, ETH_ALEN, tcp->dst_mac) ||
 	    nla_put_u16(msg, NL80211_WOWLAN_TCP_SRC_PORT, tcp->src_port) ||
 	    nla_put_u16(msg, NL80211_WOWLAN_TCP_DST_PORT, tcp->dst_port) ||
-- 
cgit v1.2.3


From 67b61f6c130a05b2cd4c3dfded49a751ff42c534 Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Sun, 29 Mar 2015 16:59:26 +0200
Subject: netlink: implement nla_get_in_addr and nla_get_in6_addr

Those are counterparts to nla_put_in_addr and nla_put_in6_addr.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c                            | 16 +++++++---------
 include/net/netlink.h                          | 21 +++++++++++++++++++++
 net/ipv4/devinet.c                             | 10 +++++-----
 net/ipv4/fib_rules.c                           |  8 ++++----
 net/ipv4/fib_semantics.c                       |  4 ++--
 net/ipv4/ip_gre.c                              |  4 ++--
 net/ipv4/ip_vti.c                              |  4 ++--
 net/ipv4/ipip.c                                |  4 ++--
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c |  4 ++--
 net/ipv4/route.c                               |  4 ++--
 net/ipv4/tcp_metrics.c                         |  4 ++--
 net/ipv6/fib6_rules.c                          |  6 ++----
 net/ipv6/ip6_gre.c                             |  6 +++---
 net/ipv6/ip6_tunnel.c                          |  6 ++----
 net/ipv6/ip6_vti.c                             |  6 ++----
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |  6 ++----
 net/ipv6/route.c                               |  6 +++---
 net/ipv6/sit.c                                 |  3 +--
 net/l2tp/l2tp_netlink.c                        |  4 ++--
 net/openvswitch/flow_netlink.c                 |  4 ++--
 net/wireless/nl80211.c                         |  4 ++--
 21 files changed, 72 insertions(+), 62 deletions(-)

(limited to 'include')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 86f085f95408..a8d345054d23 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -171,11 +171,11 @@ static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
 static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
 {
 	if (nla_len(nla) >= sizeof(struct in6_addr)) {
-		nla_memcpy(&ip->sin6.sin6_addr, nla, sizeof(struct in6_addr));
+		ip->sin6.sin6_addr = nla_get_in6_addr(nla);
 		ip->sa.sa_family = AF_INET6;
 		return 0;
 	} else if (nla_len(nla) >= sizeof(__be32)) {
-		ip->sin.sin_addr.s_addr = nla_get_be32(nla);
+		ip->sin.sin_addr.s_addr = nla_get_in_addr(nla);
 		ip->sa.sa_family = AF_INET;
 		return 0;
 	} else {
@@ -215,7 +215,7 @@ static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
 	if (nla_len(nla) >= sizeof(struct in6_addr)) {
 		return -EAFNOSUPPORT;
 	} else if (nla_len(nla) >= sizeof(__be32)) {
-		ip->sin.sin_addr.s_addr = nla_get_be32(nla);
+		ip->sin.sin_addr.s_addr = nla_get_in_addr(nla);
 		ip->sa.sa_family = AF_INET;
 		return 0;
 	} else {
@@ -2602,27 +2602,25 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
 	/* Unless IPv6 is explicitly requested, assume IPv4 */
 	dst->remote_ip.sa.sa_family = AF_INET;
 	if (data[IFLA_VXLAN_GROUP]) {
-		dst->remote_ip.sin.sin_addr.s_addr = nla_get_be32(data[IFLA_VXLAN_GROUP]);
+		dst->remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]);
 	} else if (data[IFLA_VXLAN_GROUP6]) {
 		if (!IS_ENABLED(CONFIG_IPV6))
 			return -EPFNOSUPPORT;
 
-		nla_memcpy(&dst->remote_ip.sin6.sin6_addr, data[IFLA_VXLAN_GROUP6],
-			   sizeof(struct in6_addr));
+		dst->remote_ip.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_GROUP6]);
 		dst->remote_ip.sa.sa_family = AF_INET6;
 		use_ipv6 = true;
 	}
 
 	if (data[IFLA_VXLAN_LOCAL]) {
-		vxlan->saddr.sin.sin_addr.s_addr = nla_get_be32(data[IFLA_VXLAN_LOCAL]);
+		vxlan->saddr.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_LOCAL]);
 		vxlan->saddr.sa.sa_family = AF_INET;
 	} else if (data[IFLA_VXLAN_LOCAL6]) {
 		if (!IS_ENABLED(CONFIG_IPV6))
 			return -EPFNOSUPPORT;
 
 		/* TODO: respect scope id */
-		nla_memcpy(&vxlan->saddr.sin6.sin6_addr, data[IFLA_VXLAN_LOCAL6],
-			   sizeof(struct in6_addr));
+		vxlan->saddr.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_LOCAL6]);
 		vxlan->saddr.sa.sa_family = AF_INET6;
 		use_ipv6 = true;
 	}
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 17fc76e5b05e..2a5dbcc90d1c 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -1127,6 +1127,27 @@ static inline unsigned long nla_get_msecs(const struct nlattr *nla)
 	return msecs_to_jiffies((unsigned long) msecs);
 }
 
+/**
+ * nla_get_in_addr - return payload of IPv4 address attribute
+ * @nla: IPv4 address netlink attribute
+ */
+static inline __be32 nla_get_in_addr(const struct nlattr *nla)
+{
+	return *(__be32 *) nla_data(nla);
+}
+
+/**
+ * nla_get_in6_addr - return payload of IPv6 address attribute
+ * @nla: IPv6 address netlink attribute
+ */
+static inline struct in6_addr nla_get_in6_addr(const struct nlattr *nla)
+{
+	struct in6_addr tmp;
+
+	nla_memcpy(&tmp, nla, sizeof(tmp));
+	return tmp;
+}
+
 /**
  * nla_nest_start - Start a new level of nested attributes
  * @skb: socket buffer to add attributes to
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 66cd85973056..c6473f365ad1 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -593,7 +593,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
 	     ifap = &ifa->ifa_next) {
 		if (tb[IFA_LOCAL] &&
-		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
+		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
 			continue;
 
 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
@@ -601,7 +601,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
 
 		if (tb[IFA_ADDRESS] &&
 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
-		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
+		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
 			continue;
 
 		if (ipv4_is_multicast(ifa->ifa_address))
@@ -791,11 +791,11 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
 	ifa->ifa_scope = ifm->ifa_scope;
 	ifa->ifa_dev = in_dev;
 
-	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
-	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
+	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
+	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
 
 	if (tb[IFA_BROADCAST])
-		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
+		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
 
 	if (tb[IFA_LABEL])
 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index edfea0deec43..8162dd8e86d7 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -194,10 +194,10 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 	}
 
 	if (frh->src_len)
-		rule4->src = nla_get_be32(tb[FRA_SRC]);
+		rule4->src = nla_get_in_addr(tb[FRA_SRC]);
 
 	if (frh->dst_len)
-		rule4->dst = nla_get_be32(tb[FRA_DST]);
+		rule4->dst = nla_get_in_addr(tb[FRA_DST]);
 
 #ifdef CONFIG_IP_ROUTE_CLASSID
 	if (tb[FRA_FLOW]) {
@@ -260,10 +260,10 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 		return 0;
 #endif
 
-	if (frh->src_len && (rule4->src != nla_get_be32(tb[FRA_SRC])))
+	if (frh->src_len && (rule4->src != nla_get_in_addr(tb[FRA_SRC])))
 		return 0;
 
-	if (frh->dst_len && (rule4->dst != nla_get_be32(tb[FRA_DST])))
+	if (frh->dst_len && (rule4->dst != nla_get_in_addr(tb[FRA_DST])))
 		return 0;
 
 	return 1;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 453b24e5322c..eac5aec7772a 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -468,7 +468,7 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
 
 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
-			nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0;
+			nexthop_nh->nh_gw = nla ? nla_get_in_addr(nla) : 0;
 #ifdef CONFIG_IP_ROUTE_CLASSID
 			nla = nla_find(attrs, attrlen, RTA_FLOW);
 			nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
@@ -523,7 +523,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
 
 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
-			if (nla && nla_get_be32(nla) != nh->nh_gw)
+			if (nla && nla_get_in_addr(nla) != nh->nh_gw)
 				return 1;
 #ifdef CONFIG_IP_ROUTE_CLASSID
 			nla = nla_find(attrs, attrlen, RTA_FLOW);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 2e878df46075..0eb2a040a830 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -621,10 +621,10 @@ static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[],
 		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
 
 	if (data[IFLA_GRE_LOCAL])
-		parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
+		parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
 
 	if (data[IFLA_GRE_REMOTE])
-		parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
+		parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
 
 	if (data[IFLA_GRE_TTL])
 		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index f189f2a8aaa5..5a6e27054f0a 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -456,10 +456,10 @@ static void vti_netlink_parms(struct nlattr *data[],
 		parms->o_key = nla_get_be32(data[IFLA_VTI_OKEY]);
 
 	if (data[IFLA_VTI_LOCAL])
-		parms->iph.saddr = nla_get_be32(data[IFLA_VTI_LOCAL]);
+		parms->iph.saddr = nla_get_in_addr(data[IFLA_VTI_LOCAL]);
 
 	if (data[IFLA_VTI_REMOTE])
-		parms->iph.daddr = nla_get_be32(data[IFLA_VTI_REMOTE]);
+		parms->iph.daddr = nla_get_in_addr(data[IFLA_VTI_REMOTE]);
 
 }
 
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 17df8d38bbbd..bfbcc85c02ee 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -325,10 +325,10 @@ static void ipip_netlink_parms(struct nlattr *data[],
 		parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
 
 	if (data[IFLA_IPTUN_LOCAL])
-		parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]);
+		parms->iph.saddr = nla_get_in_addr(data[IFLA_IPTUN_LOCAL]);
 
 	if (data[IFLA_IPTUN_REMOTE])
-		parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]);
+		parms->iph.daddr = nla_get_in_addr(data[IFLA_IPTUN_REMOTE]);
 
 	if (data[IFLA_IPTUN_TTL]) {
 		parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index b36ebfc6b812..8c8d6642cbb0 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -342,8 +342,8 @@ static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
 	if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST])
 		return -EINVAL;
 
-	t->src.u3.ip = nla_get_be32(tb[CTA_IP_V4_SRC]);
-	t->dst.u3.ip = nla_get_be32(tb[CTA_IP_V4_DST]);
+	t->src.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_SRC]);
+	t->dst.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_DST]);
 
 	return 0;
 }
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 1f147204f1f3..652b92ebd7ba 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2436,8 +2436,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
 	ip_hdr(skb)->protocol = IPPROTO_ICMP;
 	skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
 
-	src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0;
-	dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0;
+	src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
+	dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
 	iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
 	mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
 
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 32e36ea6bc0f..71ec14c87579 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -948,7 +948,7 @@ static int __parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr,
 	a = info->attrs[v4];
 	if (a) {
 		addr->family = AF_INET;
-		addr->addr.a4 = nla_get_be32(a);
+		addr->addr.a4 = nla_get_in_addr(a);
 		if (hash)
 			*hash = (__force unsigned int) addr->addr.a4;
 		return 0;
@@ -958,7 +958,7 @@ static int __parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr,
 		if (nla_len(a) != sizeof(struct in6_addr))
 			return -EINVAL;
 		addr->family = AF_INET6;
-		memcpy(addr->addr.a6, nla_data(a), sizeof(addr->addr.a6));
+		addr->addr.in6 = nla_get_in6_addr(a);
 		if (hash)
 			*hash = ipv6_addr_hash(&addr->addr.in6);
 		return 0;
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index d313bfd88512..61fb184b818d 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -199,12 +199,10 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 	}
 
 	if (frh->src_len)
-		nla_memcpy(&rule6->src.addr, tb[FRA_SRC],
-			   sizeof(struct in6_addr));
+		rule6->src.addr = nla_get_in6_addr(tb[FRA_SRC]);
 
 	if (frh->dst_len)
-		nla_memcpy(&rule6->dst.addr, tb[FRA_DST],
-			   sizeof(struct in6_addr));
+		rule6->dst.addr = nla_get_in6_addr(tb[FRA_DST]);
 
 	rule6->src.plen = frh->src_len;
 	rule6->dst.plen = frh->dst_len;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index f61f7ad2d045..0f4e73da14e4 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1412,7 +1412,7 @@ static int ip6gre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
 		goto out;
 
 	if (data[IFLA_GRE_REMOTE]) {
-		nla_memcpy(&daddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr));
+		daddr = nla_get_in6_addr(data[IFLA_GRE_REMOTE]);
 		if (ipv6_addr_any(&daddr))
 			return -EINVAL;
 	}
@@ -1446,10 +1446,10 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
 		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
 
 	if (data[IFLA_GRE_LOCAL])
-		nla_memcpy(&parms->laddr, data[IFLA_GRE_LOCAL], sizeof(struct in6_addr));
+		parms->laddr = nla_get_in6_addr(data[IFLA_GRE_LOCAL]);
 
 	if (data[IFLA_GRE_REMOTE])
-		nla_memcpy(&parms->raddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr));
+		parms->raddr = nla_get_in6_addr(data[IFLA_GRE_REMOTE]);
 
 	if (data[IFLA_GRE_TTL])
 		parms->hop_limit = nla_get_u8(data[IFLA_GRE_TTL]);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 80543d13ea7c..9bd85f0dff69 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1640,12 +1640,10 @@ static void ip6_tnl_netlink_parms(struct nlattr *data[],
 		parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
 
 	if (data[IFLA_IPTUN_LOCAL])
-		nla_memcpy(&parms->laddr, data[IFLA_IPTUN_LOCAL],
-			   sizeof(struct in6_addr));
+		parms->laddr = nla_get_in6_addr(data[IFLA_IPTUN_LOCAL]);
 
 	if (data[IFLA_IPTUN_REMOTE])
-		nla_memcpy(&parms->raddr, data[IFLA_IPTUN_REMOTE],
-			   sizeof(struct in6_addr));
+		parms->raddr = nla_get_in6_addr(data[IFLA_IPTUN_REMOTE]);
 
 	if (data[IFLA_IPTUN_TTL])
 		parms->hop_limit = nla_get_u8(data[IFLA_IPTUN_TTL]);
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 87a262b0f07b..53d90ed68905 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -897,12 +897,10 @@ static void vti6_netlink_parms(struct nlattr *data[],
 		parms->link = nla_get_u32(data[IFLA_VTI_LINK]);
 
 	if (data[IFLA_VTI_LOCAL])
-		nla_memcpy(&parms->laddr, data[IFLA_VTI_LOCAL],
-			   sizeof(struct in6_addr));
+		parms->laddr = nla_get_in6_addr(data[IFLA_VTI_LOCAL]);
 
 	if (data[IFLA_VTI_REMOTE])
-		nla_memcpy(&parms->raddr, data[IFLA_VTI_REMOTE],
-			   sizeof(struct in6_addr));
+		parms->raddr = nla_get_in6_addr(data[IFLA_VTI_REMOTE]);
 
 	if (data[IFLA_VTI_IKEY])
 		parms->i_key = nla_get_be32(data[IFLA_VTI_IKEY]);
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 78284a697439..fba91c6fc7ca 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -310,10 +310,8 @@ static int ipv6_nlattr_to_tuple(struct nlattr *tb[],
 	if (!tb[CTA_IP_V6_SRC] || !tb[CTA_IP_V6_DST])
 		return -EINVAL;
 
-	memcpy(&t->src.u3.ip6, nla_data(tb[CTA_IP_V6_SRC]),
-	       sizeof(u_int32_t) * 4);
-	memcpy(&t->dst.u3.ip6, nla_data(tb[CTA_IP_V6_DST]),
-	       sizeof(u_int32_t) * 4);
+	t->src.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_SRC]);
+	t->dst.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_DST]);
 
 	return 0;
 }
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 385e9bd4f218..5c48293ff062 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2438,7 +2438,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
 
 	if (tb[RTA_GATEWAY]) {
-		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
+		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
 		cfg->fc_flags |= RTF_GATEWAY;
 	}
 
@@ -2461,7 +2461,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
 	}
 
 	if (tb[RTA_PREFSRC])
-		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
+		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
 
 	if (tb[RTA_OIF])
 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
@@ -2519,7 +2519,7 @@ beginning:
 
 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
 			if (nla) {
-				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
+				r_cfg.fc_gateway = nla_get_in6_addr(nla);
 				r_cfg.fc_flags |= RTF_GATEWAY;
 			}
 		}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 0e2bb538a556..e6b9f51b15e8 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1530,8 +1530,7 @@ static bool ipip6_netlink_6rd_parms(struct nlattr *data[],
 
 	if (data[IFLA_IPTUN_6RD_PREFIX]) {
 		ret = true;
-		nla_memcpy(&ip6rd->prefix, data[IFLA_IPTUN_6RD_PREFIX],
-			   sizeof(struct in6_addr));
+		ip6rd->prefix = nla_get_in6_addr(data[IFLA_IPTUN_6RD_PREFIX]);
 	}
 
 	if (data[IFLA_IPTUN_6RD_RELAY_PREFIX]) {
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index a4f78d36bace..9e13c2ff8789 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -205,9 +205,9 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info
 #endif
 		if (info->attrs[L2TP_ATTR_IP_SADDR] &&
 		    info->attrs[L2TP_ATTR_IP_DADDR]) {
-			cfg.local_ip.s_addr = nla_get_be32(
+			cfg.local_ip.s_addr = nla_get_in_addr(
 				info->attrs[L2TP_ATTR_IP_SADDR]);
-			cfg.peer_ip.s_addr = nla_get_be32(
+			cfg.peer_ip.s_addr = nla_get_in_addr(
 				info->attrs[L2TP_ATTR_IP_DADDR]);
 		} else {
 			ret = -EINVAL;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index c0c5b5519f45..c691b1a1eee0 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -535,11 +535,11 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 			break;
 		case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
 			SW_FLOW_KEY_PUT(match, tun_key.ipv4_src,
-					nla_get_be32(a), is_mask);
+					nla_get_in_addr(a), is_mask);
 			break;
 		case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
 			SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst,
-					nla_get_be32(a), is_mask);
+					nla_get_in_addr(a), is_mask);
 			break;
 		case OVS_TUNNEL_KEY_ATTR_TOS:
 			SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 6a4a4d7db1fc..2fb804bfa361 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -8993,8 +8993,8 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev,
 	cfg = kzalloc(size, GFP_KERNEL);
 	if (!cfg)
 		return -ENOMEM;
-	cfg->src = nla_get_be32(tb[NL80211_WOWLAN_TCP_SRC_IPV4]);
-	cfg->dst = nla_get_be32(tb[NL80211_WOWLAN_TCP_DST_IPV4]);
+	cfg->src = nla_get_in_addr(tb[NL80211_WOWLAN_TCP_SRC_IPV4]);
+	cfg->dst = nla_get_in_addr(tb[NL80211_WOWLAN_TCP_DST_IPV4]);
 	memcpy(cfg->dst_mac, nla_data(tb[NL80211_WOWLAN_TCP_DST_MAC]),
 	       ETH_ALEN);
 	if (tb[NL80211_WOWLAN_TCP_SRC_PORT])
-- 
cgit v1.2.3


From f9c72d10d6fbf949558cd088389a42213ed7b12d Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@poochiereds.net>
Date: Tue, 31 Mar 2015 12:03:28 -0400
Subject: sunrpc: make debugfs file creation failure non-fatal

We currently have a problem that SELinux policy is being enforced when
creating debugfs files. If a debugfs file is created as a side effect of
doing some syscall, then that creation can fail if the SELinux policy
for that process prevents it.

This seems wrong. We don't do that for files under /proc, for instance,
so Bruce has proposed a patch to fix that.

While discussing that patch however, Greg K.H. stated:

    "No kernel code should care / fail if a debugfs function fails, so
     please fix up the sunrpc code first."

This patch converts all of the sunrpc debugfs setup code to be void
return functins, and the callers to not look for errors from those
functions.

This should allow rpc_clnt and rpc_xprt creation to work, even if the
kernel fails to create debugfs files for some reason.

Symptoms were failing krb5 mounts on systems using gss-proxy and
selinux.

Fixes: 388f0c776781 "sunrpc: add a debugfs rpc_xprt directory..."
Cc: stable@vger.kernel.org
Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/debug.h | 18 +++++++--------
 net/sunrpc/clnt.c            |  4 +---
 net/sunrpc/debugfs.c         | 52 ++++++++++++++++++++++++--------------------
 net/sunrpc/sunrpc_syms.c     |  7 +-----
 net/sunrpc/xprt.c            |  7 +-----
 5 files changed, 41 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h
index c57d8ea0716c..59a7889e15db 100644
--- a/include/linux/sunrpc/debug.h
+++ b/include/linux/sunrpc/debug.h
@@ -60,17 +60,17 @@ struct rpc_xprt;
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 void		rpc_register_sysctl(void);
 void		rpc_unregister_sysctl(void);
-int		sunrpc_debugfs_init(void);
+void		sunrpc_debugfs_init(void);
 void		sunrpc_debugfs_exit(void);
-int		rpc_clnt_debugfs_register(struct rpc_clnt *);
+void		rpc_clnt_debugfs_register(struct rpc_clnt *);
 void		rpc_clnt_debugfs_unregister(struct rpc_clnt *);
-int		rpc_xprt_debugfs_register(struct rpc_xprt *);
+void		rpc_xprt_debugfs_register(struct rpc_xprt *);
 void		rpc_xprt_debugfs_unregister(struct rpc_xprt *);
 #else
-static inline int
+static inline void
 sunrpc_debugfs_init(void)
 {
-	return 0;
+	return;
 }
 
 static inline void
@@ -79,10 +79,10 @@ sunrpc_debugfs_exit(void)
 	return;
 }
 
-static inline int
+static inline void
 rpc_clnt_debugfs_register(struct rpc_clnt *clnt)
 {
-	return 0;
+	return;
 }
 
 static inline void
@@ -91,10 +91,10 @@ rpc_clnt_debugfs_unregister(struct rpc_clnt *clnt)
 	return;
 }
 
-static inline int
+static inline void
 rpc_xprt_debugfs_register(struct rpc_xprt *xprt)
 {
-	return 0;
+	return;
 }
 
 static inline void
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 612aa73bbc60..e6ce1517367f 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -303,9 +303,7 @@ static int rpc_client_register(struct rpc_clnt *clnt,
 	struct super_block *pipefs_sb;
 	int err;
 
-	err = rpc_clnt_debugfs_register(clnt);
-	if (err)
-		return err;
+	rpc_clnt_debugfs_register(clnt);
 
 	pipefs_sb = rpc_get_sb_net(net);
 	if (pipefs_sb) {
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index e811f390f9f6..82962f7e6e88 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -129,48 +129,52 @@ static const struct file_operations tasks_fops = {
 	.release	= tasks_release,
 };
 
-int
+void
 rpc_clnt_debugfs_register(struct rpc_clnt *clnt)
 {
-	int len, err;
+	int len;
 	char name[24]; /* enough for "../../rpc_xprt/ + 8 hex digits + NULL */
+	struct rpc_xprt *xprt;
 
 	/* Already registered? */
-	if (clnt->cl_debugfs)
-		return 0;
+	if (clnt->cl_debugfs || !rpc_clnt_dir)
+		return;
 
 	len = snprintf(name, sizeof(name), "%x", clnt->cl_clid);
 	if (len >= sizeof(name))
-		return -EINVAL;
+		return;
 
 	/* make the per-client dir */
 	clnt->cl_debugfs = debugfs_create_dir(name, rpc_clnt_dir);
 	if (!clnt->cl_debugfs)
-		return -ENOMEM;
+		return;
 
 	/* make tasks file */
-	err = -ENOMEM;
 	if (!debugfs_create_file("tasks", S_IFREG | S_IRUSR, clnt->cl_debugfs,
 				 clnt, &tasks_fops))
 		goto out_err;
 
-	err = -EINVAL;
 	rcu_read_lock();
+	xprt = rcu_dereference(clnt->cl_xprt);
+	/* no "debugfs" dentry? Don't bother with the symlink. */
+	if (!xprt->debugfs) {
+		rcu_read_unlock();
+		return;
+	}
 	len = snprintf(name, sizeof(name), "../../rpc_xprt/%s",
-			rcu_dereference(clnt->cl_xprt)->debugfs->d_name.name);
+			xprt->debugfs->d_name.name);
 	rcu_read_unlock();
+
 	if (len >= sizeof(name))
 		goto out_err;
 
-	err = -ENOMEM;
 	if (!debugfs_create_symlink("xprt", clnt->cl_debugfs, name))
 		goto out_err;
 
-	return 0;
+	return;
 out_err:
 	debugfs_remove_recursive(clnt->cl_debugfs);
 	clnt->cl_debugfs = NULL;
-	return err;
 }
 
 void
@@ -226,33 +230,33 @@ static const struct file_operations xprt_info_fops = {
 	.release	= xprt_info_release,
 };
 
-int
+void
 rpc_xprt_debugfs_register(struct rpc_xprt *xprt)
 {
 	int len, id;
 	static atomic_t	cur_id;
 	char		name[9]; /* 8 hex digits + NULL term */
 
+	if (!rpc_xprt_dir)
+		return;
+
 	id = (unsigned int)atomic_inc_return(&cur_id);
 
 	len = snprintf(name, sizeof(name), "%x", id);
 	if (len >= sizeof(name))
-		return -EINVAL;
+		return;
 
 	/* make the per-client dir */
 	xprt->debugfs = debugfs_create_dir(name, rpc_xprt_dir);
 	if (!xprt->debugfs)
-		return -ENOMEM;
+		return;
 
 	/* make tasks file */
 	if (!debugfs_create_file("info", S_IFREG | S_IRUSR, xprt->debugfs,
 				 xprt, &xprt_info_fops)) {
 		debugfs_remove_recursive(xprt->debugfs);
 		xprt->debugfs = NULL;
-		return -ENOMEM;
 	}
-
-	return 0;
 }
 
 void
@@ -266,14 +270,17 @@ void __exit
 sunrpc_debugfs_exit(void)
 {
 	debugfs_remove_recursive(topdir);
+	topdir = NULL;
+	rpc_clnt_dir = NULL;
+	rpc_xprt_dir = NULL;
 }
 
-int __init
+void __init
 sunrpc_debugfs_init(void)
 {
 	topdir = debugfs_create_dir("sunrpc", NULL);
 	if (!topdir)
-		goto out;
+		return;
 
 	rpc_clnt_dir = debugfs_create_dir("rpc_clnt", topdir);
 	if (!rpc_clnt_dir)
@@ -283,10 +290,9 @@ sunrpc_debugfs_init(void)
 	if (!rpc_xprt_dir)
 		goto out_remove;
 
-	return 0;
+	return;
 out_remove:
 	debugfs_remove_recursive(topdir);
 	topdir = NULL;
-out:
-	return -ENOMEM;
+	rpc_clnt_dir = NULL;
 }
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index e37fbed87956..ee5d3d253102 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -98,10 +98,7 @@ init_sunrpc(void)
 	if (err)
 		goto out4;
 
-	err = sunrpc_debugfs_init();
-	if (err)
-		goto out5;
-
+	sunrpc_debugfs_init();
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 	rpc_register_sysctl();
 #endif
@@ -109,8 +106,6 @@ init_sunrpc(void)
 	init_socket_xprt();	/* clnt sock transport */
 	return 0;
 
-out5:
-	unregister_rpc_pipefs();
 out4:
 	unregister_pernet_subsys(&sunrpc_net_ops);
 out3:
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index e3015aede0d9..9949722d99ce 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1331,7 +1331,6 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net)
  */
 struct rpc_xprt *xprt_create_transport(struct xprt_create *args)
 {
-	int err;
 	struct rpc_xprt	*xprt;
 	struct xprt_class *t;
 
@@ -1372,11 +1371,7 @@ found:
 		return ERR_PTR(-ENOMEM);
 	}
 
-	err = rpc_xprt_debugfs_register(xprt);
-	if (err) {
-		xprt_destroy(xprt);
-		return ERR_PTR(err);
-	}
+	rpc_xprt_debugfs_register(xprt);
 
 	dprintk("RPC:       created transport %p with %u slots\n", xprt,
 			xprt->max_reqs);
-- 
cgit v1.2.3


From ffa88f37ffeaac398be68f9678b0e6046a5ba7f6 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Mon, 30 Mar 2015 17:45:22 +0300
Subject: net/mlx4_en: Move statistics bitmap setting to the Ethernet driver

The statistics bitmap belongs to the Ethernet driver, move it there.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 22 +++++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   |  2 ++
 drivers/net/ethernet/mellanox/mlx4/port.c      | 21 ---------------------
 include/linux/mlx4/device.h                    |  1 -
 4 files changed, 23 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index ebc93a101c93..9556230465f0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -49,6 +49,11 @@
 #include "mlx4_en.h"
 #include "en_port.h"
 
+#define MLX4_STATS_TRAFFIC_COUNTERS_MASK	0xfULL
+#define MLX4_STATS_TRAFFIC_DROPS_MASK		0xc0ULL
+#define MLX4_STATS_ERROR_COUNTERS_MASK		0x1ffc30ULL
+#define MLX4_STATS_PORT_COUNTERS_MASK		0x7fe00000ULL
+
 int mlx4_en_setup_tc(struct net_device *dev, u8 up)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -2648,6 +2653,21 @@ int mlx4_en_netdev_event(struct notifier_block *this,
 	return NOTIFY_DONE;
 }
 
+void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev, u64 *stats_bitmap)
+{
+	if (!mlx4_is_mfunc(dev)) {
+		*stats_bitmap = 0;
+		return;
+	}
+
+	*stats_bitmap = (MLX4_STATS_TRAFFIC_COUNTERS_MASK |
+			 MLX4_STATS_TRAFFIC_DROPS_MASK |
+			 MLX4_STATS_PORT_COUNTERS_MASK);
+
+	if (mlx4_is_master(dev))
+		*stats_bitmap |= MLX4_STATS_ERROR_COUNTERS_MASK;
+}
+
 int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 			struct mlx4_en_port_profile *prof)
 {
@@ -2881,7 +2901,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 		queue_delayed_work(mdev->workqueue, &priv->service_task,
 				   SERVICE_TASK_DELAY);
 
-	mlx4_set_stats_bitmap(mdev->dev, &priv->stats_bitmap);
+	mlx4_en_set_stats_bitmap(mdev->dev, &priv->stats_bitmap);
 
 	return 0;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 6b379a2df4a0..af3d9b79277c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -730,6 +730,8 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 int mlx4_en_start_port(struct net_device *dev);
 void mlx4_en_stop_port(struct net_device *dev, int detach);
 
+void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev, u64 *stats_bitmap);
+
 void mlx4_en_free_resources(struct mlx4_en_priv *priv);
 int mlx4_en_alloc_resources(struct mlx4_en_priv *priv);
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index 3f6f77112274..b97f173ab062 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -45,11 +45,6 @@
 #define MLX4_VLAN_VALID		(1u << 31)
 #define MLX4_VLAN_MASK		0xfff
 
-#define MLX4_STATS_TRAFFIC_COUNTERS_MASK	0xfULL
-#define MLX4_STATS_TRAFFIC_DROPS_MASK		0xc0ULL
-#define MLX4_STATS_ERROR_COUNTERS_MASK		0x1ffc30ULL
-#define MLX4_STATS_PORT_COUNTERS_MASK		0x7fe00000ULL
-
 void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table)
 {
 	int i;
@@ -1185,22 +1180,6 @@ int mlx4_DUMP_ETH_STATS_wrapper(struct mlx4_dev *dev, int slave,
 					  vhcr->in_modifier, outbox);
 }
 
-void mlx4_set_stats_bitmap(struct mlx4_dev *dev, u64 *stats_bitmap)
-{
-	if (!mlx4_is_mfunc(dev)) {
-		*stats_bitmap = 0;
-		return;
-	}
-
-	*stats_bitmap = (MLX4_STATS_TRAFFIC_COUNTERS_MASK |
-			 MLX4_STATS_TRAFFIC_DROPS_MASK |
-			 MLX4_STATS_PORT_COUNTERS_MASK);
-
-	if (mlx4_is_master(dev))
-		*stats_bitmap |= MLX4_STATS_ERROR_COUNTERS_MASK;
-}
-EXPORT_SYMBOL(mlx4_set_stats_bitmap);
-
 int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid,
 				 int *slave_id)
 {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 4550c67b92e4..49abbe28e230 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1300,7 +1300,6 @@ int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac);
 void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac);
 int mlx4_get_base_qpn(struct mlx4_dev *dev, u8 port);
 int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac);
-void mlx4_set_stats_bitmap(struct mlx4_dev *dev, u64 *stats_bitmap);
 int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
 			  u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx);
 int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
-- 
cgit v1.2.3


From 0b131561a7d639abb0a194d2d8fae839ce3b99e9 Mon Sep 17 00:00:00 2001
From: Matan Barak <matanb@mellanox.com>
Date: Mon, 30 Mar 2015 17:45:25 +0300
Subject: net/mlx4_en: Add Flow control statistics display via ethtool

Flow control per priority and Global pause counters are now visible via
ethtool.  The counters shows statistics regarding pauses in the device.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Shani Michaeli <shanim@mellanox.com>
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c  |  4 ++
 drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 74 +++++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c  | 57 ++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlx4/en_port.c    | 50 +++++++++++++++++
 drivers/net/ethernet/mellanox/mlx4/fw.c         | 10 +++-
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h    | 13 ++++-
 drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h | 52 ++++++++++++++++-
 include/linux/mlx4/device.h                     |  3 +-
 8 files changed, 255 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
index cde14fa2f742..8e3260c0eaa5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
@@ -226,6 +226,10 @@ static int mlx4_en_dcbnl_ieee_setpfc(struct net_device *dev,
 				    prof->rx_ppp);
 	if (err)
 		en_err(priv, "Failed setting pause params\n");
+	else
+		mlx4_en_update_pfc_stats_bitmap(mdev->dev, &priv->stats_bitmap,
+						prof->rx_ppp, prof->rx_pause,
+						prof->tx_ppp, prof->tx_pause);
 
 	return err;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index 3e7ed39e8e76..4fc767461b4f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -119,6 +119,48 @@ static const char main_strings[][ETH_GSTRING_LEN] = {
 	"queue_stopped", "wake_queue", "tx_timeout", "rx_alloc_failed",
 	"rx_csum_good", "rx_csum_none", "rx_csum_complete", "tx_chksum_offload",
 
+	/* priority flow control statistics rx */
+	"rx_pause_prio_0", "rx_pause_duration_prio_0",
+	"rx_pause_transition_prio_0",
+	"rx_pause_prio_1", "rx_pause_duration_prio_1",
+	"rx_pause_transition_prio_1",
+	"rx_pause_prio_2", "rx_pause_duration_prio_2",
+	"rx_pause_transition_prio_2",
+	"rx_pause_prio_3", "rx_pause_duration_prio_3",
+	"rx_pause_transition_prio_3",
+	"rx_pause_prio_4", "rx_pause_duration_prio_4",
+	"rx_pause_transition_prio_4",
+	"rx_pause_prio_5", "rx_pause_duration_prio_5",
+	"rx_pause_transition_prio_5",
+	"rx_pause_prio_6", "rx_pause_duration_prio_6",
+	"rx_pause_transition_prio_6",
+	"rx_pause_prio_7", "rx_pause_duration_prio_7",
+	"rx_pause_transition_prio_7",
+
+	/* flow control statistics rx */
+	"rx_pause", "rx_pause_duration", "rx_pause_transition",
+
+	/* priority flow control statistics tx */
+	"tx_pause_prio_0", "tx_pause_duration_prio_0",
+	"tx_pause_transition_prio_0",
+	"tx_pause_prio_1", "tx_pause_duration_prio_1",
+	"tx_pause_transition_prio_1",
+	"tx_pause_prio_2", "tx_pause_duration_prio_2",
+	"tx_pause_transition_prio_2",
+	"tx_pause_prio_3", "tx_pause_duration_prio_3",
+	"tx_pause_transition_prio_3",
+	"tx_pause_prio_4", "tx_pause_duration_prio_4",
+	"tx_pause_transition_prio_4",
+	"tx_pause_prio_5", "tx_pause_duration_prio_5",
+	"tx_pause_transition_prio_5",
+	"tx_pause_prio_6", "tx_pause_duration_prio_6",
+	"tx_pause_transition_prio_6",
+	"tx_pause_prio_7", "tx_pause_duration_prio_7",
+	"tx_pause_transition_prio_7",
+
+	/* flow control statistics tx */
+	"tx_pause", "tx_pause_duration", "tx_pause_transition",
+
 	/* packet statistics */
 	"broadcast", "rx_prio_0", "rx_prio_1", "rx_prio_2", "rx_prio_3",
 	"rx_prio_4", "rx_prio_5", "rx_prio_6", "rx_prio_7", "tx_prio_0",
@@ -304,6 +346,26 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev,
 		if (bitmap_iterator_test(&it))
 			data[index++] = ((unsigned long *)&priv->port_stats)[i];
 
+	for (i = 0; i < NUM_FLOW_PRIORITY_STATS_RX;
+	     i++, bitmap_iterator_inc(&it))
+		if (bitmap_iterator_test(&it))
+			data[index++] =
+				((u64 *)&priv->rx_priority_flowstats)[i];
+
+	for (i = 0; i < NUM_FLOW_STATS_RX; i++, bitmap_iterator_inc(&it))
+		if (bitmap_iterator_test(&it))
+			data[index++] = ((u64 *)&priv->rx_flowstats)[i];
+
+	for (i = 0; i < NUM_FLOW_PRIORITY_STATS_TX;
+	     i++, bitmap_iterator_inc(&it))
+		if (bitmap_iterator_test(&it))
+			data[index++] =
+				((u64 *)&priv->tx_priority_flowstats)[i];
+
+	for (i = 0; i < NUM_FLOW_STATS_TX; i++, bitmap_iterator_inc(&it))
+		if (bitmap_iterator_test(&it))
+			data[index++] = ((u64 *)&priv->tx_flowstats)[i];
+
 	for (i = 0; i < NUM_PKT_STATS; i++, bitmap_iterator_inc(&it))
 		if (bitmap_iterator_test(&it))
 			data[index++] = ((unsigned long *)&priv->pkstats)[i];
@@ -364,6 +426,12 @@ static void mlx4_en_get_strings(struct net_device *dev,
 				strcpy(data + (index++) * ETH_GSTRING_LEN,
 				       main_strings[strings]);
 
+		for (i = 0; i < NUM_FLOW_STATS; i++, strings++,
+		     bitmap_iterator_inc(&it))
+			if (bitmap_iterator_test(&it))
+				strcpy(data + (index++) * ETH_GSTRING_LEN,
+				       main_strings[strings]);
+
 		for (i = 0; i < NUM_PKT_STATS; i++, strings++,
 		     bitmap_iterator_inc(&it))
 			if (bitmap_iterator_test(&it))
@@ -910,6 +978,12 @@ static int mlx4_en_set_pauseparam(struct net_device *dev,
 				    priv->prof->rx_ppp);
 	if (err)
 		en_err(priv, "Failed setting pause params\n");
+	else
+		mlx4_en_update_pfc_stats_bitmap(mdev->dev, &priv->stats_bitmap,
+						priv->prof->rx_ppp,
+						priv->prof->rx_pause,
+						priv->prof->tx_ppp,
+						priv->prof->tx_pause);
 
 	return err;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 4542bab9494b..354e254b53cf 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1888,6 +1888,12 @@ static void mlx4_en_clear_stats(struct net_device *dev)
 	memset(&priv->pstats, 0, sizeof(priv->pstats));
 	memset(&priv->pkstats, 0, sizeof(priv->pkstats));
 	memset(&priv->port_stats, 0, sizeof(priv->port_stats));
+	memset(&priv->rx_flowstats, 0, sizeof(priv->rx_flowstats));
+	memset(&priv->tx_flowstats, 0, sizeof(priv->tx_flowstats));
+	memset(&priv->rx_priority_flowstats, 0,
+	       sizeof(priv->rx_priority_flowstats));
+	memset(&priv->tx_priority_flowstats, 0,
+	       sizeof(priv->tx_priority_flowstats));
 
 	for (i = 0; i < priv->tx_ring_num; i++) {
 		priv->tx_ring[i]->bytes = 0;
@@ -2648,8 +2654,46 @@ int mlx4_en_netdev_event(struct notifier_block *this,
 	return NOTIFY_DONE;
 }
 
+void mlx4_en_update_pfc_stats_bitmap(struct mlx4_dev *dev,
+				     struct mlx4_en_stats_bitmap *stats_bitmap,
+				     u8 rx_ppp, u8 rx_pause,
+				     u8 tx_ppp, u8 tx_pause)
+{
+	int last_i = NUM_MAIN_STATS + NUM_PORT_STATS;
+
+	if (!mlx4_is_slave(dev) &&
+	    (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN)) {
+		mutex_lock(&stats_bitmap->mutex);
+		bitmap_clear(stats_bitmap->bitmap, last_i, NUM_FLOW_STATS);
+
+		if (rx_ppp)
+			bitmap_set(stats_bitmap->bitmap, last_i,
+				   NUM_FLOW_PRIORITY_STATS_RX);
+		last_i += NUM_FLOW_PRIORITY_STATS_RX;
+
+		if (rx_pause && !(rx_ppp))
+			bitmap_set(stats_bitmap->bitmap, last_i,
+				   NUM_FLOW_STATS_RX);
+		last_i += NUM_FLOW_STATS_RX;
+
+		if (tx_ppp)
+			bitmap_set(stats_bitmap->bitmap, last_i,
+				   NUM_FLOW_PRIORITY_STATS_TX);
+		last_i += NUM_FLOW_PRIORITY_STATS_TX;
+
+		if (tx_pause && !(tx_ppp))
+			bitmap_set(stats_bitmap->bitmap, last_i,
+				   NUM_FLOW_STATS_TX);
+		last_i += NUM_FLOW_STATS_TX;
+
+		mutex_unlock(&stats_bitmap->mutex);
+	}
+}
+
 void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev,
-			      struct mlx4_en_stats_bitmap *stats_bitmap)
+			      struct mlx4_en_stats_bitmap *stats_bitmap,
+			      u8 rx_ppp, u8 rx_pause,
+			      u8 tx_ppp, u8 tx_pause)
 {
 	int last_i = 0;
 
@@ -2677,6 +2721,11 @@ void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev,
 	bitmap_set(stats_bitmap->bitmap, last_i, NUM_PORT_STATS);
 	last_i += NUM_PORT_STATS;
 
+	mlx4_en_update_pfc_stats_bitmap(dev, stats_bitmap,
+					rx_ppp, rx_pause,
+					tx_ppp, tx_pause);
+	last_i += NUM_FLOW_STATS;
+
 	if (!mlx4_is_slave(dev))
 		bitmap_set(stats_bitmap->bitmap, last_i, NUM_PKT_STATS);
 }
@@ -2914,7 +2963,11 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 		queue_delayed_work(mdev->workqueue, &priv->service_task,
 				   SERVICE_TASK_DELAY);
 
-	mlx4_en_set_stats_bitmap(mdev->dev, &priv->stats_bitmap);
+	mlx4_en_set_stats_bitmap(mdev->dev, &priv->stats_bitmap,
+				 mdev->profile.prof[priv->port].rx_ppp,
+				 mdev->profile.prof[priv->port].rx_pause,
+				 mdev->profile.prof[priv->port].tx_ppp,
+				 mdev->profile.prof[priv->port].tx_pause);
 
 	return 0;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c
index 6cb80072af6c..821ae1278dc9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c
@@ -131,6 +131,7 @@ out:
 int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
 {
 	struct mlx4_en_stat_out_mbox *mlx4_en_stats;
+	struct mlx4_en_stat_out_flow_control_mbox *flowstats;
 	struct mlx4_en_priv *priv = netdev_priv(mdev->pndev[port]);
 	struct net_device_stats *stats = &priv->stats;
 	struct mlx4_cmd_mailbox *mailbox;
@@ -239,6 +240,55 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
 	priv->pkstats.tx_prio[7] = be64_to_cpu(mlx4_en_stats->TTOT_prio_7);
 	spin_unlock_bh(&priv->stats_lock);
 
+	/* 0xffs indicates invalid value */
+	memset(mailbox->buf, 0xff, sizeof(*flowstats) * MLX4_NUM_PRIORITIES);
+
+	if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN) {
+		memset(mailbox->buf, 0,
+		       sizeof(*flowstats) * MLX4_NUM_PRIORITIES);
+		err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma,
+				   in_mod | MLX4_DUMP_ETH_STATS_FLOW_CONTROL,
+				   0, MLX4_CMD_DUMP_ETH_STATS,
+				   MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
+		if (err)
+			goto out;
+	}
+
+	flowstats = mailbox->buf;
+
+	spin_lock_bh(&priv->stats_lock);
+
+	for (i = 0; i < MLX4_NUM_PRIORITIES; i++)	{
+		priv->rx_priority_flowstats[i].rx_pause =
+			be64_to_cpu(flowstats[i].rx_pause);
+		priv->rx_priority_flowstats[i].rx_pause_duration =
+			be64_to_cpu(flowstats[i].rx_pause_duration);
+		priv->rx_priority_flowstats[i].rx_pause_transition =
+			be64_to_cpu(flowstats[i].rx_pause_transition);
+		priv->tx_priority_flowstats[i].tx_pause =
+			be64_to_cpu(flowstats[i].tx_pause);
+		priv->tx_priority_flowstats[i].tx_pause_duration =
+			be64_to_cpu(flowstats[i].tx_pause_duration);
+		priv->tx_priority_flowstats[i].tx_pause_transition =
+			be64_to_cpu(flowstats[i].tx_pause_transition);
+	}
+
+	/* if pfc is not in use, all priorities counters have the same value */
+	priv->rx_flowstats.rx_pause =
+		be64_to_cpu(flowstats[0].rx_pause);
+	priv->rx_flowstats.rx_pause_duration =
+		be64_to_cpu(flowstats[0].rx_pause_duration);
+	priv->rx_flowstats.rx_pause_transition =
+		be64_to_cpu(flowstats[0].rx_pause_transition);
+	priv->tx_flowstats.tx_pause =
+		be64_to_cpu(flowstats[0].tx_pause);
+	priv->tx_flowstats.tx_pause_duration =
+		be64_to_cpu(flowstats[0].tx_pause_duration);
+	priv->tx_flowstats.tx_pause_transition =
+		be64_to_cpu(flowstats[0].tx_pause_transition);
+
+	spin_unlock_bh(&priv->stats_lock);
+
 out:
 	mlx4_free_cmd_mailbox(mdev->dev, mailbox);
 	return err;
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 4a471f5d1b56..209a6171e59b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -145,7 +145,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
 		[20] = "Recoverable error events support",
 		[21] = "Port Remap support",
 		[22] = "QCN support",
-		[23] = "QP rate limiting support"
+		[23] = "QP rate limiting support",
+		[24] = "Ethernet Flow control statistics support"
 	};
 	int i;
 
@@ -672,6 +673,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_RSVD_XRC_OFFSET		0x66
 #define QUERY_DEV_CAP_MAX_XRC_OFFSET		0x67
 #define QUERY_DEV_CAP_MAX_COUNTERS_OFFSET	0x68
+#define QUERY_DEV_CAP_PORT_FLOWSTATS_COUNTERS_OFFSET	0x70
 #define QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET	0x70
 #define QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET	0x74
 #define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET	0x76
@@ -773,6 +775,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_VL_PORT_OFFSET);
 	dev_cap->num_ports = field & 0xf;
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MSG_SZ_OFFSET);
+	MLX4_GET(field, outbox, QUERY_DEV_CAP_PORT_FLOWSTATS_COUNTERS_OFFSET);
+	if (field & 0x10)
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN;
 	dev_cap->max_msg_sz = 1 << (field & 0x1f);
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET);
 	if (field & 0x80)
@@ -1088,6 +1093,7 @@ out:
 	return err;
 }
 
+#define DEV_CAP_EXT_2_FLAG_PFC_COUNTERS	(1 << 28)
 #define DEV_CAP_EXT_2_FLAG_VLAN_CONTROL (1 << 26)
 #define DEV_CAP_EXT_2_FLAG_80_VFS	(1 << 21)
 #define DEV_CAP_EXT_2_FLAG_FSM		(1 << 20)
@@ -1177,7 +1183,7 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
 	/* turn off host side virt features (VST, FSM, etc) for guests */
 	MLX4_GET(field32, outbox->buf, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
 	field32 &= ~(DEV_CAP_EXT_2_FLAG_VLAN_CONTROL | DEV_CAP_EXT_2_FLAG_80_VFS |
-		     DEV_CAP_EXT_2_FLAG_FSM);
+		     DEV_CAP_EXT_2_FLAG_FSM | DEV_CAP_EXT_2_FLAG_PFC_COUNTERS);
 	MLX4_PUT(outbox->buf, field32, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
 
 	/* turn off QCN for guests */
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index d5d971a408f2..67eeea244eff 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -565,6 +565,10 @@ struct mlx4_en_priv {
 #endif
 	struct mlx4_en_perf_stats pstats;
 	struct mlx4_en_pkt_stats pkstats;
+	struct mlx4_en_flow_stats_rx rx_priority_flowstats[MLX4_NUM_PRIORITIES];
+	struct mlx4_en_flow_stats_tx tx_priority_flowstats[MLX4_NUM_PRIORITIES];
+	struct mlx4_en_flow_stats_rx rx_flowstats;
+	struct mlx4_en_flow_stats_tx tx_flowstats;
 	struct mlx4_en_port_stats port_stats;
 	struct mlx4_en_stats_bitmap stats_bitmap;
 	struct list_head mc_list;
@@ -736,7 +740,9 @@ int mlx4_en_start_port(struct net_device *dev);
 void mlx4_en_stop_port(struct net_device *dev, int detach);
 
 void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev,
-			      struct mlx4_en_stats_bitmap *stats_bitmap);
+			      struct mlx4_en_stats_bitmap *stats_bitmap,
+			      u8 rx_ppp, u8 rx_pause,
+			      u8 tx_ppp, u8 tx_pause);
 
 void mlx4_en_free_resources(struct mlx4_en_priv *priv);
 int mlx4_en_alloc_resources(struct mlx4_en_priv *priv);
@@ -823,7 +829,10 @@ void mlx4_en_ptp_overflow_check(struct mlx4_en_dev *mdev);
 int mlx4_en_reset_config(struct net_device *dev,
 			 struct hwtstamp_config ts_config,
 			 netdev_features_t new_features);
-
+void mlx4_en_update_pfc_stats_bitmap(struct mlx4_dev *dev,
+				     struct mlx4_en_stats_bitmap *stats_bitmap,
+				     u8 rx_ppp, u8 rx_pause,
+				     u8 tx_ppp, u8 tx_pause);
 int mlx4_en_netdev_event(struct notifier_block *this,
 			 unsigned long event, void *ptr);
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
index d7183e30b40b..e193680fb527 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
@@ -42,8 +42,58 @@ struct mlx4_en_perf_stats {
 };
 
 #define NUM_MAIN_STATS	21
+
+#define MLX4_NUM_PRIORITIES	8
+
+struct mlx4_en_flow_stats_rx {
+	u64 rx_pause;
+	u64 rx_pause_duration;
+	u64 rx_pause_transition;
+#define NUM_FLOW_STATS_RX	3
+#define NUM_FLOW_PRIORITY_STATS_RX	(NUM_FLOW_STATS_RX * \
+					 MLX4_NUM_PRIORITIES)
+};
+
+struct mlx4_en_flow_stats_tx {
+	u64 tx_pause;
+	u64 tx_pause_duration;
+	u64 tx_pause_transition;
+#define NUM_FLOW_STATS_TX	3
+#define NUM_FLOW_PRIORITY_STATS_TX	(NUM_FLOW_STATS_TX * \
+					 MLX4_NUM_PRIORITIES)
+};
+
+#define NUM_FLOW_STATS (NUM_FLOW_STATS_RX + NUM_FLOW_STATS_TX + \
+			NUM_FLOW_PRIORITY_STATS_TX + \
+			NUM_FLOW_PRIORITY_STATS_RX)
+
+struct mlx4_en_stat_out_flow_control_mbox {
+	/* Total number of PAUSE frames received from the far-end port */
+	__be64 rx_pause;
+	/* Total number of microseconds that far-end port requested to pause
+	* transmission of packets
+	*/
+	__be64 rx_pause_duration;
+	/* Number of received transmission from XOFF state to XON state */
+	__be64 rx_pause_transition;
+	/* Total number of PAUSE frames sent from the far-end port */
+	__be64 tx_pause;
+	/* Total time in microseconds that transmission of packets has been
+	* paused
+	*/
+	__be64 tx_pause_duration;
+	/* Number of transmitter transitions from XOFF state to XON state */
+	__be64 tx_pause_transition;
+	/* Reserverd */
+	__be64 reserved[2];
+};
+
+enum {
+	MLX4_DUMP_ETH_STATS_FLOW_CONTROL = 1 << 12
+};
+
 #define NUM_ALL_STATS	(NUM_MAIN_STATS + NUM_PORT_STATS + NUM_PKT_STATS + \
-			 NUM_PERF_STATS)
+			 NUM_FLOW_STATS + NUM_PERF_STATS)
 
 #define MLX4_FIND_NETDEV_STAT(n) (offsetof(struct net_device_stats, n) / \
 				  sizeof(((struct net_device_stats *)0)->n))
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 49abbe28e230..ab7ebec943b8 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -205,7 +205,8 @@ enum {
 	MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT = 1LL << 20,
 	MLX4_DEV_CAP_FLAG2_PORT_REMAP		= 1LL <<  21,
 	MLX4_DEV_CAP_FLAG2_QCN			= 1LL <<  22,
-	MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT	= 1LL <<  23
+	MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT	= 1LL <<  23,
+	MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN         = 1LL <<  24
 };
 
 enum {
-- 
cgit v1.2.3


From a5581ef4c2eac6449188862e903eb46c7233582a Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Wed, 1 Apr 2015 07:50:29 +0200
Subject: can: introduce new raw socket option to join the given CAN filters

The CAN_RAW socket can set multiple CAN identifier specific filters that lead
to multiple filters in the af_can.c filter processing. These filters are
indenpendent from each other which leads to logical OR'ed filters when applied.

This socket option joines the given CAN filters in the way that only CAN frames
are passed to user space that matched *all* given CAN filters. The semantic for
the applied filters is therefore changed to a logical AND.

This is useful especially when the filterset is a combination of filters where
the CAN_INV_FILTER flag is set in order to notch single CAN IDs or CAN ID
ranges from the incoming traffic.

As the raw_rcv() function is executed from NET_RX softirq the introduced
variables are implemented as per-CPU variables to avoid extensive locking at
CAN frame reception time.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 Documentation/networking/can.txt | 20 ++++++++++++++++++--
 include/uapi/linux/can/raw.h     |  1 +
 net/can/raw.c                    | 31 ++++++++++++++++++++++++++++++-
 3 files changed, 49 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/can.txt b/Documentation/networking/can.txt
index 0a2859a8ee7e..5abad1e921ca 100644
--- a/Documentation/networking/can.txt
+++ b/Documentation/networking/can.txt
@@ -22,7 +22,8 @@ This file contains
       4.1.3 RAW socket option CAN_RAW_LOOPBACK
       4.1.4 RAW socket option CAN_RAW_RECV_OWN_MSGS
       4.1.5 RAW socket option CAN_RAW_FD_FRAMES
-      4.1.6 RAW socket returned message flags
+      4.1.6 RAW socket option CAN_RAW_JOIN_FILTERS
+      4.1.7 RAW socket returned message flags
     4.2 Broadcast Manager protocol sockets (SOCK_DGRAM)
       4.2.1 Broadcast Manager operations
       4.2.2 Broadcast Manager message flags
@@ -601,7 +602,22 @@ solution for a couple of reasons:
   CAN FD frames by checking if the device maximum transfer unit is CANFD_MTU.
   The CAN device MTU can be retrieved e.g. with a SIOCGIFMTU ioctl() syscall.
 
-  4.1.6 RAW socket returned message flags
+  4.1.6 RAW socket option CAN_RAW_JOIN_FILTERS
+
+  The CAN_RAW socket can set multiple CAN identifier specific filters that
+  lead to multiple filters in the af_can.c filter processing. These filters
+  are indenpendent from each other which leads to logical OR'ed filters when
+  applied (see 4.1.1).
+
+  This socket option joines the given CAN filters in the way that only CAN
+  frames are passed to user space that matched *all* given CAN filters. The
+  semantic for the applied filters is therefore changed to a logical AND.
+
+  This is useful especially when the filterset is a combination of filters
+  where the CAN_INV_FILTER flag is set in order to notch single CAN IDs or
+  CAN ID ranges from the incoming traffic.
+
+  4.1.7 RAW socket returned message flags
 
   When using recvmsg() call, the msg->msg_flags may contain following flags:
 
diff --git a/include/uapi/linux/can/raw.h b/include/uapi/linux/can/raw.h
index 78ec76fd89a6..8735f1080385 100644
--- a/include/uapi/linux/can/raw.h
+++ b/include/uapi/linux/can/raw.h
@@ -57,6 +57,7 @@ enum {
 	CAN_RAW_LOOPBACK,	/* local loopback (default:on)       */
 	CAN_RAW_RECV_OWN_MSGS,	/* receive my own msgs (default:off) */
 	CAN_RAW_FD_FRAMES,	/* allow CAN FD frames (default:off) */
+	CAN_RAW_JOIN_FILTERS,	/* all filters must match to trigger */
 };
 
 #endif /* !_UAPI_CAN_RAW_H */
diff --git a/net/can/raw.c b/net/can/raw.c
index 0c8d537b59b8..31b9748cbb4e 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -77,6 +77,7 @@ MODULE_ALIAS("can-proto-1");
 struct uniqframe {
 	ktime_t tstamp;
 	const struct sk_buff *skb;
+	unsigned int join_rx_count;
 };
 
 struct raw_sock {
@@ -87,6 +88,7 @@ struct raw_sock {
 	int loopback;
 	int recv_own_msgs;
 	int fd_frames;
+	int join_filters;
 	int count;                 /* number of active filters */
 	struct can_filter dfilter; /* default/single filter */
 	struct can_filter *filter; /* pointer to filter(s) */
@@ -132,10 +134,21 @@ static void raw_rcv(struct sk_buff *oskb, void *data)
 	/* eliminate multiple filter matches for the same skb */
 	if (this_cpu_ptr(ro->uniq)->skb == oskb &&
 	    ktime_equal(this_cpu_ptr(ro->uniq)->tstamp, oskb->tstamp)) {
-		return;
+		if (ro->join_filters) {
+			this_cpu_inc(ro->uniq->join_rx_count);
+			/* drop frame until all enabled filters matched */
+			if (this_cpu_ptr(ro->uniq)->join_rx_count < ro->count)
+				return;
+		} else {
+			return;
+		}
 	} else {
 		this_cpu_ptr(ro->uniq)->skb = oskb;
 		this_cpu_ptr(ro->uniq)->tstamp = oskb->tstamp;
+		this_cpu_ptr(ro->uniq)->join_rx_count = 1;
+		/* drop first frame to check all enabled filters? */
+		if (ro->join_filters && ro->count > 1)
+			return;
 	}
 
 	/* clone the given skb to be able to enqueue it into the rcv queue */
@@ -311,6 +324,7 @@ static int raw_init(struct sock *sk)
 	ro->loopback         = 1;
 	ro->recv_own_msgs    = 0;
 	ro->fd_frames        = 0;
+	ro->join_filters     = 0;
 
 	/* alloc_percpu provides zero'ed memory */
 	ro->uniq = alloc_percpu(struct uniqframe);
@@ -604,6 +618,15 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
 
 		break;
 
+	case CAN_RAW_JOIN_FILTERS:
+		if (optlen != sizeof(ro->join_filters))
+			return -EINVAL;
+
+		if (copy_from_user(&ro->join_filters, optval, optlen))
+			return -EFAULT;
+
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
@@ -668,6 +691,12 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
 		val = &ro->fd_frames;
 		break;
 
+	case CAN_RAW_JOIN_FILTERS:
+		if (len > sizeof(int))
+			len = sizeof(int);
+		val = &ro->join_filters;
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
-- 
cgit v1.2.3


From 44ba06987c0b10faa998b9324850e8a6564c714d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 1 Apr 2015 16:31:26 +0100
Subject: RxRPC: Handle VERSION Rx protocol packets

Handle VERSION Rx protocol packets.  We should respond to a VERSION packet
with a string indicating the Rx version.  This is a maximum of 64 characters
and is padded out to 65 chars with NUL bytes.

Note that other AFS clients use the version request as a NAT keepalive so we
need to handle it rather than returning an abort.

The standard formulation seems to be:

	<project> <version> built <yyyy>-<mm>-<dd>

for example:

	" OpenAFS 1.6.2 built  2013-05-07 "

(note the three extra spaces) as obtained with:

	rxdebug grand.mit.edu -version

from the openafs package.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/rxrpc/packet.h  |  3 +-
 net/rxrpc/ar-input.c    | 23 +++++++++++-
 net/rxrpc/ar-internal.h |  2 +
 net/rxrpc/ar-local.c    | 98 +++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 124 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/rxrpc/packet.h b/include/rxrpc/packet.h
index f2902ef7ab75..4dce116bfd80 100644
--- a/include/rxrpc/packet.h
+++ b/include/rxrpc/packet.h
@@ -47,7 +47,8 @@ struct rxrpc_header {
 #define RXRPC_PACKET_TYPE_CHALLENGE	6	/* connection security challenge (SRVR->CLNT) */
 #define RXRPC_PACKET_TYPE_RESPONSE	7	/* connection secutity response (CLNT->SRVR) */
 #define RXRPC_PACKET_TYPE_DEBUG		8	/* debug info request */
-#define RXRPC_N_PACKET_TYPES		9	/* number of packet types (incl type 0) */
+#define RXRPC_PACKET_TYPE_VERSION	13	/* version string request */
+#define RXRPC_N_PACKET_TYPES		14	/* number of packet types (incl type 0) */
 
 	uint8_t		flags;		/* packet flags */
 #define RXRPC_CLIENT_INITIATED	0x01		/* signifies a packet generated by a client */
diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c
index 481f89f93789..4505a691d88c 100644
--- a/net/rxrpc/ar-input.c
+++ b/net/rxrpc/ar-input.c
@@ -28,7 +28,7 @@
 const char *rxrpc_pkts[] = {
 	"?00",
 	"DATA", "ACK", "BUSY", "ABORT", "ACKALL", "CHALL", "RESP", "DEBUG",
-	"?09", "?10", "?11", "?12", "?13", "?14", "?15"
+	"?09", "?10", "?11", "?12", "VERSION", "?14", "?15"
 };
 
 /*
@@ -593,6 +593,20 @@ static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
 	rxrpc_queue_conn(conn);
 }
 
+/*
+ * post endpoint-level events to the local endpoint
+ * - this includes debug and version messages
+ */
+static void rxrpc_post_packet_to_local(struct rxrpc_local *local,
+				       struct sk_buff *skb)
+{
+	_enter("%p,%p", local, skb);
+
+	atomic_inc(&local->usage);
+	skb_queue_tail(&local->event_queue, skb);
+	rxrpc_queue_work(&local->event_processor);
+}
+
 static struct rxrpc_connection *rxrpc_conn_from_local(struct rxrpc_local *local,
 					       struct sk_buff *skb,
 					       struct rxrpc_skb_priv *sp)
@@ -699,6 +713,11 @@ void rxrpc_data_ready(struct sock *sk)
 		goto bad_message;
 	}
 
+	if (sp->hdr.type == RXRPC_PACKET_TYPE_VERSION) {
+		rxrpc_post_packet_to_local(local, skb);
+		goto out;
+	}
+	
 	if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
 	    (sp->hdr.callNumber == 0 || sp->hdr.seq == 0))
 		goto bad_message;
@@ -731,6 +750,8 @@ void rxrpc_data_ready(struct sock *sk)
 		else
 			goto cant_route_call;
 	}
+
+out:
 	rxrpc_put_local(local);
 	return;
 
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index ba9fd36d3f15..9a4f7a26adc6 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -152,11 +152,13 @@ struct rxrpc_local {
 	struct work_struct	destroyer;	/* endpoint destroyer */
 	struct work_struct	acceptor;	/* incoming call processor */
 	struct work_struct	rejecter;	/* packet reject writer */
+	struct work_struct	event_processor; /* endpoint event processor */
 	struct list_head	services;	/* services listening on this endpoint */
 	struct list_head	link;		/* link in endpoint list */
 	struct rw_semaphore	defrag_sem;	/* control re-enablement of IP DF bit */
 	struct sk_buff_head	accept_queue;	/* incoming calls awaiting acceptance */
 	struct sk_buff_head	reject_queue;	/* packets awaiting rejection */
+	struct sk_buff_head	event_queue;	/* endpoint event packets awaiting processing */
 	spinlock_t		lock;		/* access lock */
 	rwlock_t		services_lock;	/* lock for services list */
 	atomic_t		usage;
diff --git a/net/rxrpc/ar-local.c b/net/rxrpc/ar-local.c
index 87f7135d238b..ca904ed5400a 100644
--- a/net/rxrpc/ar-local.c
+++ b/net/rxrpc/ar-local.c
@@ -13,16 +13,22 @@
 #include <linux/net.h>
 #include <linux/skbuff.h>
 #include <linux/slab.h>
+#include <linux/udp.h>
+#include <linux/ip.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
+#include <generated/utsrelease.h>
 #include "ar-internal.h"
 
+static const char rxrpc_version_string[65] = "linux-" UTS_RELEASE " AF_RXRPC";
+
 static LIST_HEAD(rxrpc_locals);
 DEFINE_RWLOCK(rxrpc_local_lock);
 static DECLARE_RWSEM(rxrpc_local_sem);
 static DECLARE_WAIT_QUEUE_HEAD(rxrpc_local_wq);
 
 static void rxrpc_destroy_local(struct work_struct *work);
+static void rxrpc_process_local_events(struct work_struct *work);
 
 /*
  * allocate a new local
@@ -37,11 +43,13 @@ struct rxrpc_local *rxrpc_alloc_local(struct sockaddr_rxrpc *srx)
 		INIT_WORK(&local->destroyer, &rxrpc_destroy_local);
 		INIT_WORK(&local->acceptor, &rxrpc_accept_incoming_calls);
 		INIT_WORK(&local->rejecter, &rxrpc_reject_packets);
+		INIT_WORK(&local->event_processor, &rxrpc_process_local_events);
 		INIT_LIST_HEAD(&local->services);
 		INIT_LIST_HEAD(&local->link);
 		init_rwsem(&local->defrag_sem);
 		skb_queue_head_init(&local->accept_queue);
 		skb_queue_head_init(&local->reject_queue);
+		skb_queue_head_init(&local->event_queue);
 		spin_lock_init(&local->lock);
 		rwlock_init(&local->services_lock);
 		atomic_set(&local->usage, 1);
@@ -264,10 +272,12 @@ static void rxrpc_destroy_local(struct work_struct *work)
 	ASSERT(list_empty(&local->services));
 	ASSERT(!work_pending(&local->acceptor));
 	ASSERT(!work_pending(&local->rejecter));
+	ASSERT(!work_pending(&local->event_processor));
 
 	/* finish cleaning up the local descriptor */
 	rxrpc_purge_queue(&local->accept_queue);
 	rxrpc_purge_queue(&local->reject_queue);
+	rxrpc_purge_queue(&local->event_queue);
 	kernel_sock_shutdown(local->socket, SHUT_RDWR);
 	sock_release(local->socket);
 
@@ -308,3 +318,91 @@ void __exit rxrpc_destroy_all_locals(void)
 
 	_leave("");
 }
+
+/*
+ * Reply to a version request
+ */
+static void rxrpc_send_version_request(struct rxrpc_local *local,
+				       struct rxrpc_header *hdr,
+				       struct sk_buff *skb)
+{
+	struct sockaddr_in sin;
+	struct msghdr msg;
+	struct kvec iov[2];
+	size_t len;
+	int ret;
+
+	_enter("");
+
+	sin.sin_family = AF_INET;
+	sin.sin_port = udp_hdr(skb)->source;
+	sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+
+	msg.msg_name	= &sin;
+	msg.msg_namelen	= sizeof(sin);
+	msg.msg_control	= NULL;
+	msg.msg_controllen = 0;
+	msg.msg_flags	= 0;
+
+	hdr->seq	= 0;
+	hdr->serial	= 0;
+	hdr->type	= RXRPC_PACKET_TYPE_VERSION;
+	hdr->flags	= RXRPC_LAST_PACKET | (~hdr->flags & RXRPC_CLIENT_INITIATED);
+	hdr->userStatus	= 0;
+	hdr->_rsvd	= 0;
+
+	iov[0].iov_base	= hdr;
+	iov[0].iov_len	= sizeof(*hdr);
+	iov[1].iov_base	= (char *)rxrpc_version_string;
+	iov[1].iov_len	= sizeof(rxrpc_version_string);
+
+	len = iov[0].iov_len + iov[1].iov_len;
+
+	_proto("Tx VERSION (reply)");
+
+	ret = kernel_sendmsg(local->socket, &msg, iov, 2, len);
+	if (ret < 0)
+		_debug("sendmsg failed: %d", ret);
+
+	_leave("");
+}
+
+/*
+ * Process event packets targetted at a local endpoint.
+ */
+static void rxrpc_process_local_events(struct work_struct *work)
+{
+	struct rxrpc_local *local = container_of(work, struct rxrpc_local, event_processor);
+	struct sk_buff *skb;
+	char v;
+
+	_enter("");
+
+	atomic_inc(&local->usage);
+	
+	while ((skb = skb_dequeue(&local->event_queue))) {
+		struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+		kdebug("{%d},{%u}", local->debug_id, sp->hdr.type);
+
+		switch (sp->hdr.type) {
+		case RXRPC_PACKET_TYPE_VERSION:
+			if (skb_copy_bits(skb, 0, &v, 1) < 0)
+				return;
+			_proto("Rx VERSION { %02x }", v);
+			if (v == 0)
+				rxrpc_send_version_request(local, &sp->hdr, skb);
+			break;
+
+		default:
+			/* Just ignore anything we don't understand */
+			break;
+		}
+
+		rxrpc_put_local(local);
+		rxrpc_free_skb(skb);
+	}
+
+	rxrpc_put_local(local);
+	_leave("");
+}
-- 
cgit v1.2.3


From e6214487492566b15ff24e97c6747bb2e5d9e040 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Thu, 2 Apr 2015 13:41:08 +0300
Subject: Bluetooth: Add second hci_request callback option for full skb

This patch adds a second possible callback for HCI requests where the
callback will receive the full skb of the last successfully completed
HCI command. This API is useful for cases where we want to use a request
to read some data and the existing hci_event.c handlers do not store it
e.g. in the hci_dev struct.

The reason the patch is a bit bigger than just adding the new API is
because the hci_req_cmd_complete() functions required some refactoring
to enable it: now hci_req_cmd_complete() is simply used to request the
callback pointers if any, and the actual calling of them happens from a
single place at the end of hci_event_packet(). The reason for this is
that we need to pass the original skb (without any skb_pull, etc
modifications done to it) and it's simplest to keep track of it within
the hci_event_packet() function.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/bluetooth.h |  3 ++
 net/bluetooth/hci_core.c          | 30 +++++++---------
 net/bluetooth/hci_event.c         | 76 ++++++++++++++++++++++++++-------------
 net/bluetooth/hci_request.c       | 14 +++++++-
 net/bluetooth/hci_request.h       |  5 ++-
 5 files changed, 84 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index eeaff4b5cb62..7dba80546f16 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -277,11 +277,14 @@ struct l2cap_ctrl {
 struct hci_dev;
 
 typedef void (*hci_req_complete_t)(struct hci_dev *hdev, u8 status, u16 opcode);
+typedef void (*hci_req_complete_skb_t)(struct hci_dev *hdev, u8 status,
+				       u16 opcode, struct sk_buff *skb);
 
 struct req_ctrl {
 	bool start;
 	u8 event;
 	hci_req_complete_t complete;
+	hci_req_complete_skb_t complete_skb;
 };
 
 struct bt_skb_cb {
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 246d7eca5d29..8af3af324eee 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -4288,9 +4288,10 @@ static void hci_resend_last(struct hci_dev *hdev)
 	queue_work(hdev->workqueue, &hdev->cmd_work);
 }
 
-void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status)
+void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status,
+			  hci_req_complete_t *req_complete,
+			  hci_req_complete_skb_t *req_complete_skb)
 {
-	hci_req_complete_t req_complete = NULL;
 	struct sk_buff *skb;
 	unsigned long flags;
 
@@ -4322,18 +4323,14 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status)
 	 * callback would be found in hdev->sent_cmd instead of the
 	 * command queue (hdev->cmd_q).
 	 */
-	if (hdev->sent_cmd) {
-		req_complete = bt_cb(hdev->sent_cmd)->req.complete;
-
-		if (req_complete) {
-			/* We must set the complete callback to NULL to
-			 * avoid calling the callback more than once if
-			 * this function gets called again.
-			 */
-			bt_cb(hdev->sent_cmd)->req.complete = NULL;
+	if (bt_cb(hdev->sent_cmd)->req.complete) {
+		*req_complete = bt_cb(hdev->sent_cmd)->req.complete;
+		return;
+	}
 
-			goto call_complete;
-		}
+	if (bt_cb(hdev->sent_cmd)->req.complete_skb) {
+		*req_complete_skb = bt_cb(hdev->sent_cmd)->req.complete_skb;
+		return;
 	}
 
 	/* Remove all pending commands belonging to this request */
@@ -4344,14 +4341,11 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status)
 			break;
 		}
 
-		req_complete = bt_cb(skb)->req.complete;
+		*req_complete = bt_cb(skb)->req.complete;
+		*req_complete_skb = bt_cb(skb)->req.complete_skb;
 		kfree_skb(skb);
 	}
 	spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);
-
-call_complete:
-	if (req_complete)
-		req_complete(hdev, status, status ? opcode : HCI_OP_NOP);
 }
 
 static void hci_rx_work(struct work_struct *work)
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 87e5bee36408..7c69eb3629b7 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -2731,17 +2731,19 @@ unlock:
 	hci_dev_unlock(hdev);
 }
 
-static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
+static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb,
+				 u16 *opcode, u8 *status,
+				 hci_req_complete_t *req_complete,
+				 hci_req_complete_skb_t *req_complete_skb)
 {
 	struct hci_ev_cmd_complete *ev = (void *) skb->data;
-	u8 status = skb->data[sizeof(*ev)];
-	__u16 opcode;
 
-	skb_pull(skb, sizeof(*ev));
+	*opcode = __le16_to_cpu(ev->opcode);
+	*status = skb->data[sizeof(*ev)];
 
-	opcode = __le16_to_cpu(ev->opcode);
+	skb_pull(skb, sizeof(*ev));
 
-	switch (opcode) {
+	switch (*opcode) {
 	case HCI_OP_INQUIRY_CANCEL:
 		hci_cc_inquiry_cancel(hdev, skb);
 		break;
@@ -3019,32 +3021,36 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		break;
 
 	default:
-		BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode);
+		BT_DBG("%s opcode 0x%4.4x", hdev->name, *opcode);
 		break;
 	}
 
-	if (opcode != HCI_OP_NOP)
+	if (*opcode != HCI_OP_NOP)
 		cancel_delayed_work(&hdev->cmd_timer);
 
 	if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags))
 		atomic_set(&hdev->cmd_cnt, 1);
 
-	hci_req_cmd_complete(hdev, opcode, status);
+	hci_req_cmd_complete(hdev, *opcode, *status, req_complete,
+			     req_complete_skb);
 
 	if (atomic_read(&hdev->cmd_cnt) && !skb_queue_empty(&hdev->cmd_q))
 		queue_work(hdev->workqueue, &hdev->cmd_work);
 }
 
-static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)
+static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb,
+			       u16 *opcode, u8 *status,
+			       hci_req_complete_t *req_complete,
+			       hci_req_complete_skb_t *req_complete_skb)
 {
 	struct hci_ev_cmd_status *ev = (void *) skb->data;
-	__u16 opcode;
 
 	skb_pull(skb, sizeof(*ev));
 
-	opcode = __le16_to_cpu(ev->opcode);
+	*opcode = __le16_to_cpu(ev->opcode);
+	*status = ev->status;
 
-	switch (opcode) {
+	switch (*opcode) {
 	case HCI_OP_INQUIRY:
 		hci_cs_inquiry(hdev, ev->status);
 		break;
@@ -3114,11 +3120,11 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		break;
 
 	default:
-		BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode);
+		BT_DBG("%s opcode 0x%4.4x", hdev->name, *opcode);
 		break;
 	}
 
-	if (opcode != HCI_OP_NOP)
+	if (*opcode != HCI_OP_NOP)
 		cancel_delayed_work(&hdev->cmd_timer);
 
 	if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags))
@@ -3132,7 +3138,8 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)
 	 */
 	if (ev->status ||
 	    (hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->req.event))
-		hci_req_cmd_complete(hdev, opcode, ev->status);
+		hci_req_cmd_complete(hdev, *opcode, ev->status, req_complete,
+				     req_complete_skb);
 
 	if (atomic_read(&hdev->cmd_cnt) && !skb_queue_empty(&hdev->cmd_q))
 		queue_work(hdev->workqueue, &hdev->cmd_work);
@@ -5039,7 +5046,11 @@ static void hci_chan_selected_evt(struct hci_dev *hdev, struct sk_buff *skb)
 void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_event_hdr *hdr = (void *) skb->data;
-	__u8 event = hdr->evt;
+	hci_req_complete_t req_complete = NULL;
+	hci_req_complete_skb_t req_complete_skb = NULL;
+	struct sk_buff *orig_skb = NULL;
+	u8 status = 0, event = hdr->evt;
+	u16 opcode = HCI_OP_NOP;
 
 	hci_dev_lock(hdev);
 
@@ -5053,15 +5064,24 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
 
 	hci_dev_unlock(hdev);
 
-	skb_pull(skb, HCI_EVENT_HDR_SIZE);
-
 	if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->req.event == event) {
 		struct hci_command_hdr *cmd_hdr = (void *) hdev->sent_cmd->data;
-		u16 opcode = __le16_to_cpu(cmd_hdr->opcode);
-
-		hci_req_cmd_complete(hdev, opcode, 0);
+		opcode = __le16_to_cpu(cmd_hdr->opcode);
+		hci_req_cmd_complete(hdev, opcode, status, &req_complete,
+				     &req_complete_skb);
 	}
 
+	/* If it looks like we might end up having to call
+	 * req_complete_skb, store a pristine copy of the skb since the
+	 * various handlers may modify the original one through
+	 * skb_pull() calls, etc.
+	 */
+	if (req_complete_skb || event == HCI_EV_CMD_STATUS ||
+	    event == HCI_EV_CMD_COMPLETE)
+		orig_skb = skb_clone(skb, GFP_KERNEL);
+
+	skb_pull(skb, HCI_EVENT_HDR_SIZE);
+
 	switch (event) {
 	case HCI_EV_INQUIRY_COMPLETE:
 		hci_inquiry_complete_evt(hdev, skb);
@@ -5104,11 +5124,13 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
 		break;
 
 	case HCI_EV_CMD_COMPLETE:
-		hci_cmd_complete_evt(hdev, skb);
+		hci_cmd_complete_evt(hdev, skb, &opcode, &status,
+				     &req_complete, &req_complete_skb);
 		break;
 
 	case HCI_EV_CMD_STATUS:
-		hci_cmd_status_evt(hdev, skb);
+		hci_cmd_status_evt(hdev, skb, &opcode, &status, &req_complete,
+				   &req_complete_skb);
 		break;
 
 	case HCI_EV_HARDWARE_ERROR:
@@ -5240,6 +5262,12 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
 		break;
 	}
 
+	if (req_complete)
+		req_complete(hdev, status, opcode);
+	else if (req_complete_skb)
+		req_complete_skb(hdev, status, opcode, orig_skb);
+
+	kfree_skb(orig_skb);
 	kfree_skb(skb);
 	hdev->stat.evt_rx++;
 }
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 7e17907effb3..d6025d6e6d59 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -34,7 +34,8 @@ void hci_req_init(struct hci_request *req, struct hci_dev *hdev)
 	req->err = 0;
 }
 
-int hci_req_run(struct hci_request *req, hci_req_complete_t complete)
+static int req_run(struct hci_request *req, hci_req_complete_t complete,
+		   hci_req_complete_skb_t complete_skb)
 {
 	struct hci_dev *hdev = req->hdev;
 	struct sk_buff *skb;
@@ -56,6 +57,7 @@ int hci_req_run(struct hci_request *req, hci_req_complete_t complete)
 
 	skb = skb_peek_tail(&req->cmd_q);
 	bt_cb(skb)->req.complete = complete;
+	bt_cb(skb)->req.complete_skb = complete_skb;
 
 	spin_lock_irqsave(&hdev->cmd_q.lock, flags);
 	skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q);
@@ -66,6 +68,16 @@ int hci_req_run(struct hci_request *req, hci_req_complete_t complete)
 	return 0;
 }
 
+int hci_req_run(struct hci_request *req, hci_req_complete_t complete)
+{
+	return req_run(req, complete, NULL);
+}
+
+int hci_req_run_skb(struct hci_request *req, hci_req_complete_skb_t complete)
+{
+	return req_run(req, NULL, complete);
+}
+
 struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen,
 				const void *param)
 {
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index adf074d33544..bf6df92f42db 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -32,11 +32,14 @@ struct hci_request {
 
 void hci_req_init(struct hci_request *req, struct hci_dev *hdev);
 int hci_req_run(struct hci_request *req, hci_req_complete_t complete);
+int hci_req_run_skb(struct hci_request *req, hci_req_complete_skb_t complete);
 void hci_req_add(struct hci_request *req, u16 opcode, u32 plen,
 		 const void *param);
 void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
 		    const void *param, u8 event);
-void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status);
+void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status,
+			  hci_req_complete_t *req_complete,
+			  hci_req_complete_skb_t *req_complete_skb);
 
 struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen,
 				const void *param);
-- 
cgit v1.2.3


From f60cb30579d3401cab1ed36b42df5c0568ae0ba7 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Thu, 2 Apr 2015 13:41:09 +0300
Subject: Bluetooth: Convert hci_req_sync family of function to new request API

Now that there's an API in place that allows passing the resulting skb
to the request callback we can conveniently convert the hci_req_sync and
related functions to use it. Since we still need to get the skb from the
async callback into the sleeping _sync() function the patch adds another
req_skb variable to hci_dev where the sync request state is tracked.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  1 +
 net/bluetooth/hci_core.c         | 28 ++++++++++++++--------------
 2 files changed, 15 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 540c07feece7..257ac04c00e1 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -334,6 +334,7 @@ struct hci_dev {
 	wait_queue_head_t	req_wait_q;
 	__u32			req_status;
 	__u32			req_result;
+	struct sk_buff		*req_skb;
 
 	void			*smp_data;
 	void			*smp_bredr_data;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 8af3af324eee..5cbb0957edc7 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -141,13 +141,16 @@ static const struct file_operations dut_mode_fops = {
 
 /* ---- HCI requests ---- */
 
-static void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode)
+static void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode,
+				  struct sk_buff *skb)
 {
 	BT_DBG("%s result 0x%2.2x", hdev->name, result);
 
 	if (hdev->req_status == HCI_REQ_PEND) {
 		hdev->req_result = result;
 		hdev->req_status = HCI_REQ_DONE;
+		if (skb)
+			hdev->req_skb = skb_get(skb);
 		wake_up_interruptible(&hdev->req_wait_q);
 	}
 }
@@ -164,18 +167,10 @@ static void hci_req_cancel(struct hci_dev *hdev, int err)
 }
 
 static struct sk_buff *hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode,
-					    u8 event)
+					    u8 event, struct sk_buff *skb)
 {
 	struct hci_ev_cmd_complete *ev;
 	struct hci_event_hdr *hdr;
-	struct sk_buff *skb;
-
-	hci_dev_lock(hdev);
-
-	skb = hdev->recv_evt;
-	hdev->recv_evt = NULL;
-
-	hci_dev_unlock(hdev);
 
 	if (!skb)
 		return ERR_PTR(-ENODATA);
@@ -223,6 +218,7 @@ struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen,
 {
 	DECLARE_WAITQUEUE(wait, current);
 	struct hci_request req;
+	struct sk_buff *skb;
 	int err = 0;
 
 	BT_DBG("%s", hdev->name);
@@ -236,7 +232,7 @@ struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen,
 	add_wait_queue(&hdev->req_wait_q, &wait);
 	set_current_state(TASK_INTERRUPTIBLE);
 
-	err = hci_req_run(&req, hci_req_sync_complete);
+	err = hci_req_run_skb(&req, hci_req_sync_complete);
 	if (err < 0) {
 		remove_wait_queue(&hdev->req_wait_q, &wait);
 		set_current_state(TASK_RUNNING);
@@ -265,13 +261,17 @@ struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen,
 	}
 
 	hdev->req_status = hdev->req_result = 0;
+	skb = hdev->req_skb;
+	hdev->req_skb = NULL;
 
 	BT_DBG("%s end: err %d", hdev->name, err);
 
-	if (err < 0)
+	if (err < 0) {
+		kfree_skb(skb);
 		return ERR_PTR(err);
+	}
 
-	return hci_get_cmd_complete(hdev, opcode, event);
+	return hci_get_cmd_complete(hdev, opcode, event, skb);
 }
 EXPORT_SYMBOL(__hci_cmd_sync_ev);
 
@@ -303,7 +303,7 @@ static int __hci_req_sync(struct hci_dev *hdev,
 	add_wait_queue(&hdev->req_wait_q, &wait);
 	set_current_state(TASK_INTERRUPTIBLE);
 
-	err = hci_req_run(&req, hci_req_sync_complete);
+	err = hci_req_run_skb(&req, hci_req_sync_complete);
 	if (err < 0) {
 		hdev->req_status = 0;
 
-- 
cgit v1.2.3


From f7d9e97592aeb7742084814c5f37e25571b2d51d Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Thu, 2 Apr 2015 13:41:10 +0300
Subject: Bluetooth: Remove unneeded recv_event variable

Now that the synchronous HCI requests use the new API and a new private
variable the recv_evt member of hci_dev is no-longer needed. This patch
removes it.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  1 -
 net/bluetooth/hci_core.c         |  3 ---
 net/bluetooth/hci_event.c        | 12 ------------
 3 files changed, 16 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 257ac04c00e1..4cefee0b6330 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -326,7 +326,6 @@ struct hci_dev {
 	struct sk_buff_head	raw_q;
 	struct sk_buff_head	cmd_q;
 
-	struct sk_buff		*recv_evt;
 	struct sk_buff		*sent_cmd;
 	struct sk_buff		*reassembly[NUM_REASSEMBLY];
 
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 5cbb0957edc7..6192f6e3242f 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1690,9 +1690,6 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 		hdev->sent_cmd = NULL;
 	}
 
-	kfree_skb(hdev->recv_evt);
-	hdev->recv_evt = NULL;
-
 	/* After this point our queues are empty
 	 * and no tasks are scheduled. */
 	hdev->close(hdev);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 7c69eb3629b7..dc9547c11c45 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -5052,18 +5052,6 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
 	u8 status = 0, event = hdr->evt;
 	u16 opcode = HCI_OP_NOP;
 
-	hci_dev_lock(hdev);
-
-	/* Received events are (currently) only needed when a request is
-	 * ongoing so avoid unnecessary memory allocation.
-	 */
-	if (hci_req_pending(hdev)) {
-		kfree_skb(hdev->recv_evt);
-		hdev->recv_evt = skb_clone(skb, GFP_KERNEL);
-	}
-
-	hci_dev_unlock(hdev);
-
 	if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->req.event == event) {
 		struct hci_command_hdr *cmd_hdr = (void *) hdev->sent_cmd->data;
 		opcode = __le16_to_cpu(cmd_hdr->opcode);
-- 
cgit v1.2.3


From abe66a4d036933c7376b40b0d7bb5de0458331aa Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Thu, 2 Apr 2015 13:41:11 +0300
Subject: Bluetooth: Remove unused hci_req_pending() function

The hci_req_pending() function has no users anymore, so simply remove
it.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 2 --
 net/bluetooth/hci_core.c         | 5 -----
 2 files changed, 7 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 4cefee0b6330..12686e8e9343 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1284,8 +1284,6 @@ static inline int hci_check_conn_params(u16 min, u16 max, u16 latency,
 int hci_register_cb(struct hci_cb *hcb);
 int hci_unregister_cb(struct hci_cb *hcb);
 
-bool hci_req_pending(struct hci_dev *hdev);
-
 struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen,
 			       const void *param, u32 timeout);
 struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen,
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 6192f6e3242f..fda23720e7b8 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -3560,11 +3560,6 @@ static void hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
 	}
 }
 
-bool hci_req_pending(struct hci_dev *hdev)
-{
-	return (hdev->req_status == HCI_REQ_PEND);
-}
-
 /* Send HCI command */
 int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen,
 		 const void *param)
-- 
cgit v1.2.3


From 1b9441f8ec426223f6f54f2af10ee01c8b743e5b Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Thu, 2 Apr 2015 13:41:13 +0300
Subject: Bluetooth: Convert local OOB data reading to use HCI request

Now that there's a HCI request API available where the callback receives
the resulting skb, we can convert the local OOB data reading to use this
new API. This patch does the necessary update in mgmt.c (which also
requires moving the callback higher up since it's now a static function)
and removes the custom calls from hci_event.c that are no-longer
necessary.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |   3 --
 net/bluetooth/hci_event.c        |  11 ----
 net/bluetooth/mgmt.c             | 105 ++++++++++++++++++++++++---------------
 3 files changed, 65 insertions(+), 54 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 12686e8e9343..93fd3e756b8a 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1391,9 +1391,6 @@ void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status);
 void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class,
 				    u8 status);
 void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status);
-void mgmt_read_local_oob_data_complete(struct hci_dev *hdev, u8 *hash192,
-				       u8 *rand192, u8 *hash256, u8 *rand256,
-				       u8 status);
 void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
 		       u8 addr_type, u8 *dev_class, s8 rssi, u32 flags,
 		       u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index c2483cb6ffbd..01031038eb0e 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1045,11 +1045,6 @@ static void hci_cc_read_local_oob_data(struct hci_dev *hdev,
 	struct hci_rp_read_local_oob_data *rp = (void *) skb->data;
 
 	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
-
-	hci_dev_lock(hdev);
-	mgmt_read_local_oob_data_complete(hdev, rp->hash, rp->rand, NULL, NULL,
-					  rp->status);
-	hci_dev_unlock(hdev);
 }
 
 static void hci_cc_read_local_oob_ext_data(struct hci_dev *hdev,
@@ -1058,12 +1053,6 @@ static void hci_cc_read_local_oob_ext_data(struct hci_dev *hdev,
 	struct hci_rp_read_local_oob_ext_data *rp = (void *) skb->data;
 
 	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
-
-	hci_dev_lock(hdev);
-	mgmt_read_local_oob_data_complete(hdev, rp->hash192, rp->rand192,
-					  rp->hash256, rp->rand256,
-					  rp->status);
-	hci_dev_unlock(hdev);
 }
 
 static void hci_cc_le_set_random_addr(struct hci_dev *hdev, struct sk_buff *skb)
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 3048092b1264..bb0c53ac4c66 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -3783,10 +3783,70 @@ failed:
 	return err;
 }
 
+static void read_local_oob_data_complete(struct hci_dev *hdev, u8 status,
+				         u16 opcode, struct sk_buff *skb)
+{
+	struct mgmt_rp_read_local_oob_data mgmt_rp;
+	size_t rp_size = sizeof(mgmt_rp);
+	struct mgmt_pending_cmd *cmd;
+
+	BT_DBG("%s status %u", hdev->name, status);
+
+	cmd = pending_find(MGMT_OP_READ_LOCAL_OOB_DATA, hdev);
+	if (!cmd)
+		return;
+
+	if (status || !skb) {
+		mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA,
+				status ? mgmt_status(status) : MGMT_STATUS_FAILED);
+		goto remove;
+	}
+
+	memset(&mgmt_rp, 0, sizeof(mgmt_rp));
+
+	if (opcode == HCI_OP_READ_LOCAL_OOB_DATA) {
+		struct hci_rp_read_local_oob_data *rp = (void *) skb->data;
+
+		if (skb->len < sizeof(*rp)) {
+			mgmt_cmd_status(cmd->sk, hdev->id,
+					MGMT_OP_READ_LOCAL_OOB_DATA,
+					MGMT_STATUS_FAILED);
+			goto remove;
+		}
+
+		memcpy(mgmt_rp.hash192, rp->hash, sizeof(rp->hash));
+		memcpy(mgmt_rp.rand192, rp->rand, sizeof(rp->rand));
+
+		rp_size -= sizeof(mgmt_rp.hash256) + sizeof(mgmt_rp.rand256);
+	} else {
+		struct hci_rp_read_local_oob_ext_data *rp = (void *) skb->data;
+
+		if (skb->len < sizeof(*rp)) {
+			mgmt_cmd_status(cmd->sk, hdev->id,
+					MGMT_OP_READ_LOCAL_OOB_DATA,
+					MGMT_STATUS_FAILED);
+			goto remove;
+		}
+
+		memcpy(mgmt_rp.hash192, rp->hash192, sizeof(rp->hash192));
+		memcpy(mgmt_rp.rand192, rp->rand192, sizeof(rp->rand192));
+
+		memcpy(mgmt_rp.hash256, rp->hash256, sizeof(rp->hash256));
+		memcpy(mgmt_rp.rand256, rp->rand256, sizeof(rp->rand256));
+	}
+
+	mgmt_cmd_complete(cmd->sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA,
+			  MGMT_STATUS_SUCCESS, &mgmt_rp, rp_size);
+
+remove:
+	mgmt_pending_remove(cmd);
+}
+
 static int read_local_oob_data(struct sock *sk, struct hci_dev *hdev,
 			       void *data, u16 data_len)
 {
 	struct mgmt_pending_cmd *cmd;
+	struct hci_request req;
 	int err;
 
 	BT_DBG("%s", hdev->name);
@@ -3817,12 +3877,14 @@ static int read_local_oob_data(struct sock *sk, struct hci_dev *hdev,
 		goto unlock;
 	}
 
+	hci_req_init(&req, hdev);
+
 	if (bredr_sc_enabled(hdev))
-		err = hci_send_cmd(hdev, HCI_OP_READ_LOCAL_OOB_EXT_DATA,
-				   0, NULL);
+		hci_req_add(&req, HCI_OP_READ_LOCAL_OOB_EXT_DATA, 0, NULL);
 	else
-		err = hci_send_cmd(hdev, HCI_OP_READ_LOCAL_OOB_DATA, 0, NULL);
+		hci_req_add(&req, HCI_OP_READ_LOCAL_OOB_DATA, 0, NULL);
 
+	err = hci_req_run_skb(&req, read_local_oob_data_complete);
 	if (err < 0)
 		mgmt_pending_remove(cmd);
 
@@ -7920,43 +7982,6 @@ void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status)
 			   cmd ? cmd->sk : NULL);
 }
 
-void mgmt_read_local_oob_data_complete(struct hci_dev *hdev, u8 *hash192,
-				       u8 *rand192, u8 *hash256, u8 *rand256,
-				       u8 status)
-{
-	struct mgmt_pending_cmd *cmd;
-
-	BT_DBG("%s status %u", hdev->name, status);
-
-	cmd = pending_find(MGMT_OP_READ_LOCAL_OOB_DATA, hdev);
-	if (!cmd)
-		return;
-
-	if (status) {
-		mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA,
-			        mgmt_status(status));
-	} else {
-		struct mgmt_rp_read_local_oob_data rp;
-		size_t rp_size = sizeof(rp);
-
-		memcpy(rp.hash192, hash192, sizeof(rp.hash192));
-		memcpy(rp.rand192, rand192, sizeof(rp.rand192));
-
-		if (bredr_sc_enabled(hdev) && hash256 && rand256) {
-			memcpy(rp.hash256, hash256, sizeof(rp.hash256));
-			memcpy(rp.rand256, rand256, sizeof(rp.rand256));
-		} else {
-			rp_size -= sizeof(rp.hash256) + sizeof(rp.rand256);
-		}
-
-		mgmt_cmd_complete(cmd->sk, hdev->id,
-				  MGMT_OP_READ_LOCAL_OOB_DATA, 0,
-				  &rp, rp_size);
-	}
-
-	mgmt_pending_remove(cmd);
-}
-
 static inline bool has_uuid(u8 *uuid, u16 uuid_count, u8 (*uuids)[16])
 {
 	int i;
-- 
cgit v1.2.3


From a54acb3a6f853e8394c4cb7b6a4d93c88f13eefd Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 2 Apr 2015 17:07:00 +0200
Subject: dev: introduce dev_get_iflink()

The goal of this patch is to prepare the removal of the iflink field. It
introduces a new ndo function, which will be implemented by virtual interfaces.

There is no functional change into this patch. All readers of iflink field
now call dev_get_iflink().

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipvlan/ipvlan_core.c |  2 +-
 include/linux/netdevice.h        |  4 ++++
 net/batman-adv/hard-interface.c  |  5 +++--
 net/bridge/br_netlink.c          |  4 ++--
 net/core/dev.c                   | 21 +++++++++++++++++++--
 net/core/link_watch.c            |  4 ++--
 net/core/net-sysfs.c             | 10 +++++++++-
 net/core/rtnetlink.c             |  8 ++++----
 net/ipv4/ipmr.c                  |  2 +-
 net/ipv6/addrconf.c              |  4 ++--
 net/ipv6/ip6mr.c                 |  2 +-
 11 files changed, 48 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index 2a175006028b..131bde98188d 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -330,7 +330,7 @@ static int ipvlan_process_v4_outbound(struct sk_buff *skb)
 	struct rtable *rt;
 	int err, ret = NET_XMIT_DROP;
 	struct flowi4 fl4 = {
-		.flowi4_oif = dev->iflink,
+		.flowi4_oif = dev_get_iflink(dev),
 		.flowi4_tos = RT_TOS(ip4h->tos),
 		.flowi4_flags = FLOWI_FLAG_ANYSRC,
 		.daddr = ip4h->daddr,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 967bb4c8caf1..788eb7a622ad 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1030,6 +1030,8 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  *			     int queue_index, u32 maxrate);
  *	Called when a user wants to set a max-rate limitation of specific
  *	TX queue.
+ * int (*ndo_get_iflink)(const struct net_device *dev);
+ *	Called to get the iflink value of this device.
  */
 struct net_device_ops {
 	int			(*ndo_init)(struct net_device *dev);
@@ -1191,6 +1193,7 @@ struct net_device_ops {
 	int			(*ndo_set_tx_maxrate)(struct net_device *dev,
 						      int queue_index,
 						      u32 maxrate);
+	int			(*ndo_get_iflink)(const struct net_device *dev);
 };
 
 /**
@@ -2149,6 +2152,7 @@ void __dev_remove_pack(struct packet_type *pt);
 void dev_add_offload(struct packet_offload *po);
 void dev_remove_offload(struct packet_offload *po);
 
+int dev_get_iflink(const struct net_device *dev);
 struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
 				      unsigned short mask);
 struct net_device *dev_get_by_name(struct net *net, const char *name);
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index fbda6b54baff..baf1f9843f2c 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -83,11 +83,12 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
 		return true;
 
 	/* no more parents..stop recursion */
-	if (net_dev->iflink == 0 || net_dev->iflink == net_dev->ifindex)
+	if (dev_get_iflink(net_dev) == 0 ||
+	    dev_get_iflink(net_dev) == net_dev->ifindex)
 		return false;
 
 	/* recurse over the parent device */
-	parent_dev = __dev_get_by_index(&init_net, net_dev->iflink);
+	parent_dev = __dev_get_by_index(&init_net, dev_get_iflink(net_dev));
 	/* if we got a NULL parent_dev there is something broken.. */
 	if (WARN(!parent_dev, "Cannot find parent device"))
 		return false;
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index e1115a224a95..0e4ddb81610d 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -305,8 +305,8 @@ static int br_fill_ifinfo(struct sk_buff *skb,
 	    nla_put_u8(skb, IFLA_OPERSTATE, operstate) ||
 	    (dev->addr_len &&
 	     nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) ||
-	    (dev->ifindex != dev->iflink &&
-	     nla_put_u32(skb, IFLA_LINK, dev->iflink)))
+	    (dev->ifindex != dev_get_iflink(dev) &&
+	     nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))))
 		goto nla_put_failure;
 
 	if (event == RTM_NEWLINK && port) {
diff --git a/net/core/dev.c b/net/core/dev.c
index 65492b0354c0..77172d085760 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -659,6 +659,23 @@ __setup("netdev=", netdev_boot_setup);
 
 *******************************************************************************/
 
+/**
+ *	dev_get_iflink	- get 'iflink' value of a interface
+ *	@dev: targeted interface
+ *
+ *	Indicates the ifindex the interface is linked to.
+ *	Physical interfaces have the same 'ifindex' and 'iflink' values.
+ */
+
+int dev_get_iflink(const struct net_device *dev)
+{
+	if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink)
+		return dev->netdev_ops->ndo_get_iflink(dev);
+
+	return dev->iflink;
+}
+EXPORT_SYMBOL(dev_get_iflink);
+
 /**
  *	__dev_get_by_name	- find a device by its name
  *	@net: the applicable net namespace
@@ -6345,7 +6362,7 @@ int register_netdevice(struct net_device *dev)
 	else if (__dev_get_by_index(net, dev->ifindex))
 		goto err_uninit;
 
-	if (dev->iflink == -1)
+	if (dev_get_iflink(dev) == -1)
 		dev->iflink = dev->ifindex;
 
 	/* Transfer changeable features to wanted_features and enable
@@ -7061,7 +7078,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 
 	/* If there is an ifindex conflict assign a new one */
 	if (__dev_get_by_index(net, dev->ifindex)) {
-		int iflink = (dev->iflink == dev->ifindex);
+		int iflink = (dev_get_iflink(dev) == dev->ifindex);
 		dev->ifindex = dev_new_index(net);
 		if (iflink)
 			dev->iflink = dev->ifindex;
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 49a9e3e06c08..982861607f88 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -40,7 +40,7 @@ static DEFINE_SPINLOCK(lweventlist_lock);
 static unsigned char default_operstate(const struct net_device *dev)
 {
 	if (!netif_carrier_ok(dev))
-		return (dev->ifindex != dev->iflink ?
+		return (dev->ifindex != dev_get_iflink(dev) ?
 			IF_OPER_LOWERLAYERDOWN : IF_OPER_DOWN);
 
 	if (netif_dormant(dev))
@@ -89,7 +89,7 @@ static bool linkwatch_urgent_event(struct net_device *dev)
 	if (!netif_running(dev))
 		return false;
 
-	if (dev->ifindex != dev->iflink)
+	if (dev->ifindex != dev_get_iflink(dev))
 		return true;
 
 	if (dev->priv_flags & IFF_TEAM_PORT)
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index cc5cf689809c..4238d6da5c60 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -109,11 +109,19 @@ NETDEVICE_SHOW_RO(dev_id, fmt_hex);
 NETDEVICE_SHOW_RO(dev_port, fmt_dec);
 NETDEVICE_SHOW_RO(addr_assign_type, fmt_dec);
 NETDEVICE_SHOW_RO(addr_len, fmt_dec);
-NETDEVICE_SHOW_RO(iflink, fmt_dec);
 NETDEVICE_SHOW_RO(ifindex, fmt_dec);
 NETDEVICE_SHOW_RO(type, fmt_dec);
 NETDEVICE_SHOW_RO(link_mode, fmt_dec);
 
+static ssize_t iflink_show(struct device *dev, struct device_attribute *attr,
+			   char *buf)
+{
+	struct net_device *ndev = to_net_dev(dev);
+
+	return sprintf(buf, fmt_dec, dev_get_iflink(ndev));
+}
+static DEVICE_ATTR_RO(iflink);
+
 static ssize_t format_name_assign_type(const struct net_device *dev, char *buf)
 {
 	return sprintf(buf, fmt_dec, dev->name_assign_type);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index b96ac2109c82..ee0186cdd5cf 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1055,8 +1055,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 #ifdef CONFIG_RPS
 	    nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) ||
 #endif
-	    (dev->ifindex != dev->iflink &&
-	     nla_put_u32(skb, IFLA_LINK, dev->iflink)) ||
+	    (dev->ifindex != dev_get_iflink(dev) &&
+	     nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) ||
 	    (upper_dev &&
 	     nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex)) ||
 	    nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) ||
@@ -2863,8 +2863,8 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
 	     nla_put_u32(skb, IFLA_MASTER, br_dev->ifindex)) ||
 	    (dev->addr_len &&
 	     nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) ||
-	    (dev->ifindex != dev->iflink &&
-	     nla_put_u32(skb, IFLA_LINK, dev->iflink)))
+	    (dev->ifindex != dev_get_iflink(dev) &&
+	     nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))))
 		goto nla_put_failure;
 
 	br_afspec = nla_nest_start(skb, IFLA_AF_SPEC);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index b4a545d24adb..eec68b0c3bc8 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -801,7 +801,7 @@ static int vif_add(struct net *net, struct mr_table *mrt,
 	v->pkt_out = 0;
 	v->link = dev->ifindex;
 	if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER))
-		v->link = dev->iflink;
+		v->link = dev_get_iflink(dev);
 
 	/* And finish update writing critical data */
 	write_lock_bh(&mrt_lock);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 5c9e94cb1b2c..37b70e82bff8 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4858,8 +4858,8 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
 	    (dev->addr_len &&
 	     nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) ||
 	    nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
-	    (dev->ifindex != dev->iflink &&
-	     nla_put_u32(skb, IFLA_LINK, dev->iflink)))
+	    (dev->ifindex != dev_get_iflink(dev) &&
+	     nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))))
 		goto nla_put_failure;
 	protoinfo = nla_nest_start(skb, IFLA_PROTINFO);
 	if (!protoinfo)
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index caf6b99374e6..18a5ab286420 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -992,7 +992,7 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
 	v->pkt_out = 0;
 	v->link = dev->ifindex;
 	if (v->flags & MIFF_REGISTER)
-		v->link = dev->iflink;
+		v->link = dev_get_iflink(dev);
 
 	/* And finish update writing critical data */
 	write_lock_bh(&mrt_lock);
-- 
cgit v1.2.3


From ecf2c06a88d2ed534a87b84b8c1a467ab23352dd Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 2 Apr 2015 17:07:01 +0200
Subject: ip6tnl,gre6,vti6: implement ndo_get_iflink

Don't use dev->iflink anymore.

CC: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_tunnel.h |  1 +
 net/ipv6/ip6_gre.c       |  8 ++------
 net/ipv6/ip6_tunnel.c    | 10 ++++++++--
 net/ipv6/ip6_vti.c       |  3 +--
 4 files changed, 12 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index 76c091b53dae..1668be5937e6 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -71,6 +71,7 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw);
 __u32 ip6_tnl_get_cap(struct ip6_tnl *t, const struct in6_addr *laddr,
 			     const struct in6_addr *raddr);
 struct net *ip6_tnl_get_link_net(const struct net_device *dev);
+int ip6_tnl_get_iflink(const struct net_device *dev);
 
 static inline void ip6tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 {
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 0f4e73da14e4..f724329d7436 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1216,6 +1216,7 @@ static const struct net_device_ops ip6gre_netdev_ops = {
 	.ndo_do_ioctl		= ip6gre_tunnel_ioctl,
 	.ndo_change_mtu		= ip6gre_tunnel_change_mtu,
 	.ndo_get_stats64	= ip_tunnel_get_stats64,
+	.ndo_get_iflink		= ip6_tnl_get_iflink,
 };
 
 static void ip6gre_dev_free(struct net_device *dev)
@@ -1238,7 +1239,6 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
 	if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
 		dev->mtu -= 8;
 	dev->flags |= IFF_NOARP;
-	dev->iflink = 0;
 	dev->addr_len = sizeof(struct in6_addr);
 	netif_keep_dst(dev);
 }
@@ -1270,8 +1270,6 @@ static int ip6gre_tunnel_init(struct net_device *dev)
 		u64_stats_init(&ip6gre_tunnel_stats->syncp);
 	}
 
-	dev->iflink = tunnel->parms.link;
-
 	return 0;
 }
 
@@ -1480,8 +1478,6 @@ static int ip6gre_tap_init(struct net_device *dev)
 	if (!dev->tstats)
 		return -ENOMEM;
 
-	dev->iflink = tunnel->parms.link;
-
 	return 0;
 }
 
@@ -1493,6 +1489,7 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = {
 	.ndo_validate_addr = eth_validate_addr,
 	.ndo_change_mtu = ip6gre_tunnel_change_mtu,
 	.ndo_get_stats64 = ip_tunnel_get_stats64,
+	.ndo_get_iflink = ip6_tnl_get_iflink,
 };
 
 static void ip6gre_tap_setup(struct net_device *dev)
@@ -1503,7 +1500,6 @@ static void ip6gre_tap_setup(struct net_device *dev)
 	dev->netdev_ops = &ip6gre_tap_netdev_ops;
 	dev->destructor = ip6gre_dev_free;
 
-	dev->iflink = 0;
 	dev->features |= NETIF_F_NETNS_LOCAL;
 }
 
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 9bd85f0dff69..b6a211a150b2 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1264,8 +1264,6 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
 	else
 		dev->flags &= ~IFF_POINTOPOINT;
 
-	dev->iflink = p->link;
-
 	if (p->flags & IP6_TNL_F_CAP_XMIT) {
 		int strict = (ipv6_addr_type(&p->raddr) &
 			      (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
@@ -1517,6 +1515,13 @@ ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
+int ip6_tnl_get_iflink(const struct net_device *dev)
+{
+	struct ip6_tnl *t = netdev_priv(dev);
+
+	return t->parms.link;
+}
+EXPORT_SYMBOL(ip6_tnl_get_iflink);
 
 static const struct net_device_ops ip6_tnl_netdev_ops = {
 	.ndo_init	= ip6_tnl_dev_init,
@@ -1525,6 +1530,7 @@ static const struct net_device_ops ip6_tnl_netdev_ops = {
 	.ndo_do_ioctl	= ip6_tnl_ioctl,
 	.ndo_change_mtu = ip6_tnl_change_mtu,
 	.ndo_get_stats	= ip6_get_stats,
+	.ndo_get_iflink = ip6_tnl_get_iflink,
 };
 
 
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 53d90ed68905..b53148444e15 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -601,8 +601,6 @@ static void vti6_link_config(struct ip6_tnl *t)
 		dev->flags |= IFF_POINTOPOINT;
 	else
 		dev->flags &= ~IFF_POINTOPOINT;
-
-	dev->iflink = p->link;
 }
 
 /**
@@ -808,6 +806,7 @@ static const struct net_device_ops vti6_netdev_ops = {
 	.ndo_do_ioctl	= vti6_ioctl,
 	.ndo_change_mtu = vti6_change_mtu,
 	.ndo_get_stats64 = ip_tunnel_get_stats64,
+	.ndo_get_iflink = ip6_tnl_get_iflink,
 };
 
 /**
-- 
cgit v1.2.3


From 1e99584b911cb6f3d2a681e2532d8dc3f9339c9c Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 2 Apr 2015 17:07:02 +0200
Subject: ipip,gre,vti,sit: implement ndo_get_iflink

Don't use dev->iflink anymore.

CC: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_tunnels.h | 1 +
 net/ipv4/ip_gre.c        | 2 ++
 net/ipv4/ip_tunnel.c     | 9 ++++++++-
 net/ipv4/ip_vti.c        | 2 +-
 net/ipv4/ipip.c          | 2 +-
 net/ipv6/sit.c           | 3 +--
 6 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 2c47061a6954..d8214cb88bbc 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -142,6 +142,7 @@ int ip_tunnel_init(struct net_device *dev);
 void ip_tunnel_uninit(struct net_device *dev);
 void  ip_tunnel_dellink(struct net_device *dev, struct list_head *head);
 struct net *ip_tunnel_get_link_net(const struct net_device *dev);
+int ip_tunnel_get_iflink(const struct net_device *dev);
 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
 		       struct rtnl_link_ops *ops, char *devname);
 
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 0eb2a040a830..1060ca0bc23a 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -456,6 +456,7 @@ static const struct net_device_ops ipgre_netdev_ops = {
 	.ndo_do_ioctl		= ipgre_tunnel_ioctl,
 	.ndo_change_mtu		= ip_tunnel_change_mtu,
 	.ndo_get_stats64	= ip_tunnel_get_stats64,
+	.ndo_get_iflink		= ip_tunnel_get_iflink,
 };
 
 #define GRE_FEATURES (NETIF_F_SG |		\
@@ -686,6 +687,7 @@ static const struct net_device_ops gre_tap_netdev_ops = {
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_change_mtu		= ip_tunnel_change_mtu,
 	.ndo_get_stats64	= ip_tunnel_get_stats64,
+	.ndo_get_iflink		= ip_tunnel_get_iflink,
 };
 
 static void ipgre_tap_setup(struct net_device *dev)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 2cd08280c77b..4bb7252110a6 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -389,7 +389,6 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
 		hlen = tdev->hard_header_len + tdev->needed_headroom;
 		mtu = tdev->mtu;
 	}
-	dev->iflink = tunnel->parms.link;
 
 	dev->needed_headroom = t_hlen + hlen;
 	mtu -= (dev->hard_header_len + t_hlen);
@@ -980,6 +979,14 @@ struct net *ip_tunnel_get_link_net(const struct net_device *dev)
 }
 EXPORT_SYMBOL(ip_tunnel_get_link_net);
 
+int ip_tunnel_get_iflink(const struct net_device *dev)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+
+	return tunnel->parms.link;
+}
+EXPORT_SYMBOL(ip_tunnel_get_iflink);
+
 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
 				  struct rtnl_link_ops *ops, char *devname)
 {
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 5a6e27054f0a..c4f93c0d1104 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -341,6 +341,7 @@ static const struct net_device_ops vti_netdev_ops = {
 	.ndo_do_ioctl	= vti_tunnel_ioctl,
 	.ndo_change_mtu	= ip_tunnel_change_mtu,
 	.ndo_get_stats64 = ip_tunnel_get_stats64,
+	.ndo_get_iflink = ip_tunnel_get_iflink,
 };
 
 static void vti_tunnel_setup(struct net_device *dev)
@@ -361,7 +362,6 @@ static int vti_tunnel_init(struct net_device *dev)
 	dev->hard_header_len	= LL_MAX_HEADER + sizeof(struct iphdr);
 	dev->mtu		= ETH_DATA_LEN;
 	dev->flags		= IFF_NOARP;
-	dev->iflink		= 0;
 	dev->addr_len		= 4;
 	dev->features		|= NETIF_F_LLTX;
 	netif_keep_dst(dev);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index bfbcc85c02ee..5c81f6e40842 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -272,6 +272,7 @@ static const struct net_device_ops ipip_netdev_ops = {
 	.ndo_do_ioctl	= ipip_tunnel_ioctl,
 	.ndo_change_mtu = ip_tunnel_change_mtu,
 	.ndo_get_stats64 = ip_tunnel_get_stats64,
+	.ndo_get_iflink = ip_tunnel_get_iflink,
 };
 
 #define IPIP_FEATURES (NETIF_F_SG |		\
@@ -286,7 +287,6 @@ static void ipip_tunnel_setup(struct net_device *dev)
 
 	dev->type		= ARPHRD_TUNNEL;
 	dev->flags		= IFF_NOARP;
-	dev->iflink		= 0;
 	dev->addr_len		= 4;
 	dev->features		|= NETIF_F_LLTX;
 	netif_keep_dst(dev);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index e6b9f51b15e8..6cf2026a9cea 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1076,7 +1076,6 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
 		if (dev->mtu < IPV6_MIN_MTU)
 			dev->mtu = IPV6_MIN_MTU;
 	}
-	dev->iflink = tunnel->parms.link;
 }
 
 static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
@@ -1336,6 +1335,7 @@ static const struct net_device_ops ipip6_netdev_ops = {
 	.ndo_do_ioctl	= ipip6_tunnel_ioctl,
 	.ndo_change_mtu	= ipip6_tunnel_change_mtu,
 	.ndo_get_stats64 = ip_tunnel_get_stats64,
+	.ndo_get_iflink = ip_tunnel_get_iflink,
 };
 
 static void ipip6_dev_free(struct net_device *dev)
@@ -1366,7 +1366,6 @@ static void ipip6_tunnel_setup(struct net_device *dev)
 	dev->mtu		= ETH_DATA_LEN - t_hlen;
 	dev->flags		= IFF_NOARP;
 	netif_keep_dst(dev);
-	dev->iflink		= 0;
 	dev->addr_len		= 4;
 	dev->features		|= NETIF_F_LLTX;
 	dev->features		|= SIT_FEATURES;
-- 
cgit v1.2.3


From 7a66bbc96ce9ad8261fa5f7f6ae65370eb6866ee Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 2 Apr 2015 17:07:09 +0200
Subject: net: remove iflink field from struct net_device

Now that all users of iflink have the ndo_get_iflink handler available, it's
possible to remove this field.

By default, dev_get_iflink() returns the ifindex of the interface.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  3 +--
 net/core/dev.c            | 13 ++-----------
 2 files changed, 3 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 788eb7a622ad..846a1f5bc9db 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1538,7 +1538,7 @@ struct net_device {
 	netdev_features_t	mpls_features;
 
 	int			ifindex;
-	int			iflink;
+	int			group;
 
 	struct net_device_stats	stats;
 
@@ -1741,7 +1741,6 @@ struct net_device {
 #endif
 	struct phy_device *phydev;
 	struct lock_class_key *qdisc_tx_busylock;
-	int group;
 	struct pm_qos_request	pm_qos_req;
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
diff --git a/net/core/dev.c b/net/core/dev.c
index 77172d085760..3be107e0bc93 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -672,7 +672,7 @@ int dev_get_iflink(const struct net_device *dev)
 	if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink)
 		return dev->netdev_ops->ndo_get_iflink(dev);
 
-	return dev->iflink;
+	return dev->ifindex;
 }
 EXPORT_SYMBOL(dev_get_iflink);
 
@@ -6331,8 +6331,6 @@ int register_netdevice(struct net_device *dev)
 	spin_lock_init(&dev->addr_list_lock);
 	netdev_set_addr_lockdep_class(dev);
 
-	dev->iflink = -1;
-
 	ret = dev_get_valid_name(net, dev, dev->name);
 	if (ret < 0)
 		goto out;
@@ -6362,9 +6360,6 @@ int register_netdevice(struct net_device *dev)
 	else if (__dev_get_by_index(net, dev->ifindex))
 		goto err_uninit;
 
-	if (dev_get_iflink(dev) == -1)
-		dev->iflink = dev->ifindex;
-
 	/* Transfer changeable features to wanted_features and enable
 	 * software offloads (GSO and GRO).
 	 */
@@ -7077,12 +7072,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	dev_net_set(dev, net);
 
 	/* If there is an ifindex conflict assign a new one */
-	if (__dev_get_by_index(net, dev->ifindex)) {
-		int iflink = (dev_get_iflink(dev) == dev->ifindex);
+	if (__dev_get_by_index(net, dev->ifindex))
 		dev->ifindex = dev_new_index(net);
-		if (iflink)
-			dev->iflink = dev->ifindex;
-	}
 
 	/* Send a netdev-add uevent to the new namespace */
 	kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
-- 
cgit v1.2.3


From 802f42a8d99254b8602bf65cc50e8333bf479f13 Mon Sep 17 00:00:00 2001
From: Ido Shamay <idos@mellanox.com>
Date: Thu, 2 Apr 2015 16:31:06 +0300
Subject: net/mlx4: Add RSS support for fragmented IP datagrams

Enable RSS support for fragmented IP packets, when device supports it.
Until now, fragmented IP packets were directed only to the default_qpn.
Since IP fragments (datagram) have no upper protocols (L3 IP packets),
hash is performed on 3-tuple - dst MAC, source IP and dest IP. The HW
makes sure that this holds for the 1st fragment too, so all fragments
go to the same QP.

Signed-off-by: Ido Shamay <idos@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/fw.c   | 12 ++++++++++++
 drivers/net/ethernet/mellanox/mlx4/fw.h   |  1 +
 drivers/net/ethernet/mellanox/mlx4/main.c |  2 ++
 include/linux/mlx4/device.h               |  1 +
 4 files changed, 16 insertions(+)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 209a6171e59b..412e019e8b2e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -105,6 +105,7 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u64 flags)
 		[41] = "Unicast VEP steering support",
 		[42] = "Multicast VEP steering support",
 		[48] = "Counters support",
+		[52] = "RSS IP fragments support",
 		[53] = "Port ETS Scheduler support",
 		[55] = "Port link type sensing support",
 		[59] = "Port management change event support",
@@ -1138,6 +1139,9 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
 	}
 	for (; slave_port < dev->caps.num_ports; ++slave_port)
 		flags &= ~(MLX4_DEV_CAP_FLAG_WOL_PORT1 << slave_port);
+
+	/* Not exposing RSS IP fragments to guests */
+	flags &= ~MLX4_DEV_CAP_FLAG_RSS_IP_FRAG;
 	MLX4_PUT(outbox->buf, flags, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
 
 	MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_VL_PORT_OFFSET);
@@ -1701,6 +1705,10 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)
 		*(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 4);
 
+	/* Enable RSS spread to fragmented IP packets when supported */
+	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_RSS_IP_FRAG)
+		*(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 13);
+
 	/* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */
 	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_EQE) {
 		*(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 29);
@@ -1889,6 +1897,10 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
 		else
 			param->steering_mode = MLX4_STEERING_MODE_A0;
 	}
+
+	if (dword_field & (1 << 13))
+		param->rss_ip_frags = 1;
+
 	/* steering attributes */
 	if (param->steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
 		MLX4_GET(param->mc_base, outbox, INIT_HCA_FS_BASE_OFFSET);
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 863655bd3947..07cb7c2461ad 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -203,6 +203,7 @@ struct mlx4_init_hca_param {
 	u64 dev_cap_enabled;
 	u16 cqe_size; /* For use only when CQE stride feature enabled */
 	u16 eqe_size; /* For use only when EQE stride feature enabled */
+	u8 rss_ip_frags;
 };
 
 struct mlx4_init_ib_param {
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 43aa76775b5f..ee0a67069b57 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -885,6 +885,8 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
 	mlx4_warn(dev, "Timestamping is not supported in slave mode\n");
 
 	slave_adjust_steering_mode(dev, &dev_cap, &hca_param);
+	mlx4_dbg(dev, "RSS support for IP fragments is %s\n",
+		 hca_param.rss_ip_frags ? "on" : "off");
 
 	if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_BF_RES_QP &&
 	    dev->caps.bf_reg_size)
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index ab7ebec943b8..41eaafdfb1cd 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -174,6 +174,7 @@ enum {
 	MLX4_DEV_CAP_FLAG_VEP_UC_STEER	= 1LL << 41,
 	MLX4_DEV_CAP_FLAG_VEP_MC_STEER	= 1LL << 42,
 	MLX4_DEV_CAP_FLAG_COUNTERS	= 1LL << 48,
+	MLX4_DEV_CAP_FLAG_RSS_IP_FRAG   = 1LL << 52,
 	MLX4_DEV_CAP_FLAG_SET_ETH_SCHED = 1LL << 53,
 	MLX4_DEV_CAP_FLAG_SENSE_SUPPORT	= 1LL << 55,
 	MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV = 1LL << 59,
-- 
cgit v1.2.3


From fccea6436a5bd16aed6f722efce13ec2c90427d7 Mon Sep 17 00:00:00 2001
From: Ido Shamay <idos@mellanox.com>
Date: Thu, 2 Apr 2015 16:31:08 +0300
Subject: net/mlx4: Make mlx4_is_eth visible inline funcion

Currently implemented as static function in resource_tracker.c --
this change will allow other files in mlx4_core to use it as well.

Signed-off-by: Ido Shamay <idos@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 5 -----
 include/linux/mlx4/device.h                           | 5 +++++
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 11bcd27e218f..fd2520aa8a8c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -221,11 +221,6 @@ struct res_fs_rule {
 	int			qpn;
 };
 
-static int mlx4_is_eth(struct mlx4_dev *dev, int port)
-{
-	return dev->caps.port_mask[port] == MLX4_PORT_TYPE_IB ? 0 : 1;
-}
-
 static void *res_tracker_lookup(struct rb_root *root, u64 res_id)
 {
 	struct rb_node *node = root->rb_node;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 41eaafdfb1cd..0f7f13d6a03e 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1002,6 +1002,11 @@ static inline int mlx4_is_slave(struct mlx4_dev *dev)
 	return dev->flags & MLX4_FLAG_SLAVE;
 }
 
+static inline int mlx4_is_eth(struct mlx4_dev *dev, int port)
+{
+	return dev->caps.port_type[port] == MLX4_PORT_TYPE_IB ? 0 : 1;
+}
+
 int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
 		   struct mlx4_buf *buf, gfp_t gfp);
 void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf);
-- 
cgit v1.2.3


From 12a889c057504fbf307dd237aedb87263ef2848a Mon Sep 17 00:00:00 2001
From: Ido Shamay <idos@mellanox.com>
Date: Thu, 2 Apr 2015 16:31:10 +0300
Subject: net/mlx4: New file for QoS related firmware commands

Create two new files fw_qos.h and fw_qos.c in mlx4_core module.

It gathers all relevant QoS firmware related commands etc, thus improving
encapsulation of the mlx4_core module. For now it contains the QoS existing
commands: mlx4_SET_PORT_SCHEDULER and mlx4_SET_PORT_PRIO2TC.

Signed-off-by: Ido Shamay <idos@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/Makefile    |   5 +-
 drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c |   1 +
 drivers/net/ethernet/mellanox/mlx4/fw_qos.c    | 125 +++++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx4/fw_qos.h    |  75 +++++++++++++++
 drivers/net/ethernet/mellanox/mlx4/mlx4.h      |  15 ---
 drivers/net/ethernet/mellanox/mlx4/port.c      |  71 --------------
 include/linux/mlx4/device.h                    |   5 -
 7 files changed, 204 insertions(+), 93 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx4/fw_qos.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx4/fw_qos.h

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/Makefile b/drivers/net/ethernet/mellanox/mlx4/Makefile
index 3e9c70f15b42..c82217e0d22d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx4/Makefile
@@ -1,7 +1,8 @@
 obj-$(CONFIG_MLX4_CORE)		+= mlx4_core.o
 
-mlx4_core-y :=	alloc.o catas.o cmd.o cq.o eq.o fw.o icm.o intf.o main.o mcg.o \
-		mr.o pd.o port.o profile.o qp.o reset.o sense.o srq.o resource_tracker.o
+mlx4_core-y :=	alloc.o catas.o cmd.o cq.o eq.o fw.o fw_qos.o icm.o intf.o \
+		main.o mcg.o mr.o pd.o port.o profile.o qp.o reset.o sense.o \
+		srq.o resource_tracker.o
 
 obj-$(CONFIG_MLX4_EN)               += mlx4_en.o
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
index 8e3260c0eaa5..f01918c63f28 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
@@ -35,6 +35,7 @@
 #include <linux/math64.h>
 
 #include "mlx4_en.h"
+#include "fw_qos.h"
 
 /* Definitions for QCN
  */
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw_qos.c b/drivers/net/ethernet/mellanox/mlx4/fw_qos.c
new file mode 100644
index 000000000000..0f5af7cf4547
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/fw_qos.c
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies.
+ * All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/export.h>
+#include "fw_qos.h"
+
+struct mlx4_set_port_prio2tc_context {
+	u8 prio2tc[4];
+};
+
+struct mlx4_port_scheduler_tc_cfg_be {
+	__be16 pg;
+	__be16 bw_precentage;
+	__be16 max_bw_units; /* 3-100Mbps, 4-1Gbps, other values - reserved */
+	__be16 max_bw_value;
+};
+
+struct mlx4_set_port_scheduler_context {
+	struct mlx4_port_scheduler_tc_cfg_be tc[MLX4_NUM_TC];
+};
+
+int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	struct mlx4_set_port_prio2tc_context *context;
+	int err;
+	u32 in_mod;
+	int i;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+
+	context = mailbox->buf;
+
+	for (i = 0; i < MLX4_NUM_UP; i += 2)
+		context->prio2tc[i >> 1] = prio2tc[i] << 4 | prio2tc[i + 1];
+
+	in_mod = MLX4_SET_PORT_PRIO2TC << 8 | port;
+	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
+		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
+EXPORT_SYMBOL(mlx4_SET_PORT_PRIO2TC);
+
+int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw,
+			    u8 *pg, u16 *ratelimit)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	struct mlx4_set_port_scheduler_context *context;
+	int err;
+	u32 in_mod;
+	int i;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+
+	context = mailbox->buf;
+
+	for (i = 0; i < MLX4_NUM_TC; i++) {
+		struct mlx4_port_scheduler_tc_cfg_be *tc = &context->tc[i];
+		u16 r;
+
+		if (ratelimit && ratelimit[i]) {
+			if (ratelimit[i] <= MLX4_MAX_100M_UNITS_VAL) {
+				r = ratelimit[i];
+				tc->max_bw_units =
+					htons(MLX4_RATELIMIT_100M_UNITS);
+			} else {
+				r = ratelimit[i] / 10;
+				tc->max_bw_units =
+					htons(MLX4_RATELIMIT_1G_UNITS);
+			}
+			tc->max_bw_value = htons(r);
+		} else {
+			tc->max_bw_value = htons(MLX4_RATELIMIT_DEFAULT);
+			tc->max_bw_units = htons(MLX4_RATELIMIT_1G_UNITS);
+		}
+
+		tc->pg = htons(pg[i]);
+		tc->bw_precentage = htons(tc_tx_bw[i]);
+	}
+
+	in_mod = MLX4_SET_PORT_SCHEDULER << 8 | port;
+	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
+		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
+EXPORT_SYMBOL(mlx4_SET_PORT_SCHEDULER);
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw_qos.h b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h
new file mode 100644
index 000000000000..ab9be0fad9c8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies.
+ * All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX4_FW_QOS_H
+#define MLX4_FW_QOS_H
+
+#include <linux/mlx4/cmd.h>
+#include <linux/mlx4/device.h>
+
+#define MLX4_NUM_UP 8
+#define MLX4_NUM_TC 8
+
+/**
+ * mlx4_SET_PORT_PRIO2TC - This routine maps user priorities to traffic
+ * classes of a given port and device.
+ *
+ * @dev: mlx4_dev.
+ * @port: Physical port number.
+ * @prio2tc: Array of TC associated with each priorities.
+ *
+ * Returns 0 on success or a negative mlx4_core errno code.
+ **/
+int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc);
+
+/**
+ * mlx4_SET_PORT_SCHEDULER - This routine configures the arbitration between
+ * traffic classes (ETS) and configured rate limit for traffic classes.
+ * tc_tx_bw, pg and ratelimit are arrays where each index represents a TC.
+ * The description for those parameters below refers to a single TC.
+ *
+ * @dev: mlx4_dev.
+ * @port: Physical port number.
+ * @tc_tx_bw: The percentage of the bandwidth allocated for traffic class
+ *  within a TC group. The sum of the bw_percentage of all the traffic
+ *  classes within a TC group must equal 100% for correct operation.
+ * @pg: The TC group the traffic class is associated with.
+ * @ratelimit: The maximal bandwidth allowed for the use by this traffic class.
+ *
+ * Returns 0 on success or a negative mlx4_core errno code.
+ **/
+int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw,
+			    u8 *pg, u16 *ratelimit);
+
+#endif /* MLX4_FW_QOS_H */
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 0b16db015745..9e5236b1b5f8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -64,21 +64,6 @@
 
 #define INIT_HCA_TPT_MW_ENABLE          (1 << 7)
 
-struct mlx4_set_port_prio2tc_context {
-	u8 prio2tc[4];
-};
-
-struct mlx4_port_scheduler_tc_cfg_be {
-	__be16 pg;
-	__be16 bw_precentage;
-	__be16 max_bw_units; /* 3-100Mbps, 4-1Gbps, other values - reserved */
-	__be16 max_bw_value;
-};
-
-struct mlx4_set_port_scheduler_context {
-	struct mlx4_port_scheduler_tc_cfg_be tc[MLX4_NUM_TC];
-};
-
 enum {
 	MLX4_HCR_BASE		= 0x80680,
 	MLX4_HCR_SIZE		= 0x0001c,
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index b97f173ab062..6a53d42db52f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -1016,77 +1016,6 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
 }
 EXPORT_SYMBOL(mlx4_SET_PORT_qpn_calc);
 
-int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc)
-{
-	struct mlx4_cmd_mailbox *mailbox;
-	struct mlx4_set_port_prio2tc_context *context;
-	int err;
-	u32 in_mod;
-	int i;
-
-	mailbox = mlx4_alloc_cmd_mailbox(dev);
-	if (IS_ERR(mailbox))
-		return PTR_ERR(mailbox);
-	context = mailbox->buf;
-	for (i = 0; i < MLX4_NUM_UP; i += 2)
-		context->prio2tc[i >> 1] = prio2tc[i] << 4 | prio2tc[i + 1];
-
-	in_mod = MLX4_SET_PORT_PRIO2TC << 8 | port;
-	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
-		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
-
-	mlx4_free_cmd_mailbox(dev, mailbox);
-	return err;
-}
-EXPORT_SYMBOL(mlx4_SET_PORT_PRIO2TC);
-
-int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw,
-		u8 *pg, u16 *ratelimit)
-{
-	struct mlx4_cmd_mailbox *mailbox;
-	struct mlx4_set_port_scheduler_context *context;
-	int err;
-	u32 in_mod;
-	int i;
-
-	mailbox = mlx4_alloc_cmd_mailbox(dev);
-	if (IS_ERR(mailbox))
-		return PTR_ERR(mailbox);
-	context = mailbox->buf;
-
-	for (i = 0; i < MLX4_NUM_TC; i++) {
-		struct mlx4_port_scheduler_tc_cfg_be *tc = &context->tc[i];
-		u16 r;
-
-		if (ratelimit && ratelimit[i]) {
-			if (ratelimit[i] <= MLX4_MAX_100M_UNITS_VAL) {
-				r = ratelimit[i];
-				tc->max_bw_units =
-					htons(MLX4_RATELIMIT_100M_UNITS);
-			} else {
-				r = ratelimit[i]/10;
-				tc->max_bw_units =
-					htons(MLX4_RATELIMIT_1G_UNITS);
-			}
-			tc->max_bw_value = htons(r);
-		} else {
-			tc->max_bw_value = htons(MLX4_RATELIMIT_DEFAULT);
-			tc->max_bw_units = htons(MLX4_RATELIMIT_1G_UNITS);
-		}
-
-		tc->pg = htons(pg[i]);
-		tc->bw_precentage = htons(tc_tx_bw[i]);
-	}
-
-	in_mod = MLX4_SET_PORT_SCHEDULER << 8 | port;
-	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
-		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
-
-	mlx4_free_cmd_mailbox(dev, mailbox);
-	return err;
-}
-EXPORT_SYMBOL(mlx4_SET_PORT_SCHEDULER);
-
 enum {
 	VXLAN_ENABLE_MODIFY	= 1 << 7,
 	VXLAN_STEERING_MODIFY	= 1 << 6,
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 0f7f13d6a03e..b676d0c0111a 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -49,8 +49,6 @@
 #define MSIX_LEGACY_SZ		4
 #define MIN_MSIX_P_PORT		5
 
-#define MLX4_NUM_UP			8
-#define MLX4_NUM_TC			8
 #define MLX4_MAX_100M_UNITS_VAL		255	/*
 						 * work around: can't set values
 						 * greater then this value when
@@ -1311,9 +1309,6 @@ int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
 			  u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx);
 int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
 			   u8 promisc);
-int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc);
-int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw,
-		u8 *pg, u16 *ratelimit);
 int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering, int enable);
 int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx);
 int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx);
-- 
cgit v1.2.3


From 7e95bb99a8f969d4534867452793e44cc4731ca9 Mon Sep 17 00:00:00 2001
From: Ido Shamay <idos@mellanox.com>
Date: Thu, 2 Apr 2015 16:31:11 +0300
Subject: net/mlx4: Add mlx4_ALLOCATE_VPP implementation

Implements device ALLOCATE_VPP command, to be used for granular QoS
configuration of VFs by the PF device. Defines and queries the amount
of VPPs assigned to each port, and the amount of VPPs assigned to each
priority of each port. Once the total VPPs are split between the priorities
of a port, they may be assigned with a share of the BW or a rate limit.

Split into two functions (get/set) whoch are supplied with
mlx4_alloc_vpp_context and physical port number.

Signed-off-by: Ido Shamay <idos@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/cmd.c    |  9 ++++
 drivers/net/ethernet/mellanox/mlx4/fw_qos.c | 75 +++++++++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx4/fw_qos.h | 28 +++++++++++
 include/linux/mlx4/cmd.h                    |  1 +
 4 files changed, 113 insertions(+)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 179ca2889434..79cc5ec57dca 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -1454,6 +1454,15 @@ static struct mlx4_cmd_info cmd_info[] = {
 		.verify = NULL,
 		.wrapper = mlx4_CMD_EPERM_wrapper,
 	},
+	{
+		.opcode = MLX4_CMD_ALLOCATE_VPP,
+		.has_inbox = false,
+		.has_outbox = true,
+		.out_is_imm = false,
+		.encode_slave_id = false,
+		.verify = NULL,
+		.wrapper = mlx4_CMD_EPERM_wrapper,
+	},
 	{
 		.opcode = MLX4_CMD_CONF_SPECIAL_QP,
 		.has_inbox = false,
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw_qos.c b/drivers/net/ethernet/mellanox/mlx4/fw_qos.c
index 0f5af7cf4547..5ce3440b99c7 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw_qos.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw_qos.c
@@ -34,6 +34,13 @@
 
 #include <linux/export.h>
 #include "fw_qos.h"
+#include "fw.h"
+
+enum {
+	/* allocate vpp opcode modifiers */
+	MLX4_ALLOCATE_VPP_ALLOCATE	= 0x0,
+	MLX4_ALLOCATE_VPP_QUERY		= 0x1
+};
 
 struct mlx4_set_port_prio2tc_context {
 	u8 prio2tc[4];
@@ -50,6 +57,12 @@ struct mlx4_set_port_scheduler_context {
 	struct mlx4_port_scheduler_tc_cfg_be tc[MLX4_NUM_TC];
 };
 
+/* Granular Qos (per VF) section */
+struct mlx4_alloc_vpp_param {
+	__be32 availible_vpp;
+	__be32 vpp_p_up[MLX4_NUM_UP];
+};
+
 int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc)
 {
 	struct mlx4_cmd_mailbox *mailbox;
@@ -123,3 +136,65 @@ int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw,
 	return err;
 }
 EXPORT_SYMBOL(mlx4_SET_PORT_SCHEDULER);
+
+int mlx4_ALLOCATE_VPP_get(struct mlx4_dev *dev, u8 port,
+			  u16 *availible_vpp, u8 *vpp_p_up)
+{
+	int i;
+	int err;
+	struct mlx4_cmd_mailbox *mailbox;
+	struct mlx4_alloc_vpp_param *out_param;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+
+	out_param = mailbox->buf;
+
+	err = mlx4_cmd_box(dev, 0, mailbox->dma, port,
+			   MLX4_ALLOCATE_VPP_QUERY,
+			   MLX4_CMD_ALLOCATE_VPP,
+			   MLX4_CMD_TIME_CLASS_A,
+			   MLX4_CMD_NATIVE);
+	if (err)
+		goto out;
+
+	/* Total number of supported VPPs */
+	*availible_vpp = (u16)be32_to_cpu(out_param->availible_vpp);
+
+	for (i = 0; i < MLX4_NUM_UP; i++)
+		vpp_p_up[i] = (u8)be32_to_cpu(out_param->vpp_p_up[i]);
+
+out:
+	mlx4_free_cmd_mailbox(dev, mailbox);
+
+	return err;
+}
+EXPORT_SYMBOL(mlx4_ALLOCATE_VPP_get);
+
+int mlx4_ALLOCATE_VPP_set(struct mlx4_dev *dev, u8 port, u8 *vpp_p_up)
+{
+	int i;
+	int err;
+	struct mlx4_cmd_mailbox *mailbox;
+	struct mlx4_alloc_vpp_param *in_param;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+
+	in_param = mailbox->buf;
+
+	for (i = 0; i < MLX4_NUM_UP; i++)
+		in_param->vpp_p_up[i] = cpu_to_be32(vpp_p_up[i]);
+
+	err = mlx4_cmd(dev, mailbox->dma, port,
+		       MLX4_ALLOCATE_VPP_ALLOCATE,
+		       MLX4_CMD_ALLOCATE_VPP,
+		       MLX4_CMD_TIME_CLASS_A,
+		       MLX4_CMD_NATIVE);
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
+EXPORT_SYMBOL(mlx4_ALLOCATE_VPP_set);
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw_qos.h b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h
index ab9be0fad9c8..be79951c44ef 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw_qos.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h
@@ -71,5 +71,33 @@ int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc);
  **/
 int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw,
 			    u8 *pg, u16 *ratelimit);
+/**
+ * mlx4_ALLOCATE_VPP_get - Query port VPP availible resources and allocation.
+ * Before distribution of VPPs to priorities, only availible_vpp is returned.
+ * After initialization it returns the distribution of VPPs among priorities.
+ *
+ * @dev: mlx4_dev.
+ * @port: Physical port number.
+ * @availible_vpp: Pointer to variable where number of availible VPPs is stored
+ * @vpp_p_up: Distribution of VPPs to priorities is stored in this array
+ *
+ * Returns 0 on success or a negative mlx4_core errno code.
+ **/
+int mlx4_ALLOCATE_VPP_get(struct mlx4_dev *dev, u8 port,
+			  u16 *availible_vpp, u8 *vpp_p_up);
+/**
+ * mlx4_ALLOCATE_VPP_set - Distribution of VPPs among differnt priorities.
+ * The total number of VPPs assigned to all for a port must not exceed
+ * the value reported by availible_vpp in mlx4_ALLOCATE_VPP_get.
+ * VPP allocation is allowed only after the port type has been set,
+ * and while no QPs are open for this port.
+ *
+ * @dev: mlx4_dev.
+ * @port: Physical port number.
+ * @vpp_p_up: Allocation of VPPs to different priorities.
+ *
+ * Returns 0 on success or a negative mlx4_core errno code.
+ **/
+int mlx4_ALLOCATE_VPP_set(struct mlx4_dev *dev, u8 port, u8 *vpp_p_up);
 
 #endif /* MLX4_FW_QOS_H */
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index 7299e9548906..e0f88a098fb8 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -68,6 +68,7 @@ enum {
 	MLX4_CMD_UNMAP_ICM_AUX	 = 0xffb,
 	MLX4_CMD_SET_ICM_SIZE	 = 0xffd,
 	MLX4_CMD_ACCESS_REG	 = 0x3b,
+	MLX4_CMD_ALLOCATE_VPP	 = 0x80,
 
 	/*master notify fw on finish for slave's flr*/
 	MLX4_CMD_INFORM_FLR_DONE = 0x5b,
-- 
cgit v1.2.3


From 1c29146d3821be984030b49cf9509a269edc3b8b Mon Sep 17 00:00:00 2001
From: Ido Shamay <idos@mellanox.com>
Date: Thu, 2 Apr 2015 16:31:12 +0300
Subject: net/mlx4: Add mlx4_SET_VPORT_QOS implementation

Add the SET_VPORT_QOS device command, which is ntended for virtual
granular QoS configuration per VF in SRIOV mode. The SET_VPORT_QOS
command sets and queries QoS parameters of a VPort. Each priority
allowed for a VPort is assigned with a share of the BW, and a BW
limitation. QoS parameters can be modified at any time, but must be
initialized before any QP is associated with the VPort.

Signed-off-by: Ido Shamay <idos@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/cmd.c    |  9 +++
 drivers/net/ethernet/mellanox/mlx4/fw_qos.c | 89 +++++++++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx4/fw_qos.h | 36 ++++++++++++
 include/linux/mlx4/cmd.h                    |  1 +
 4 files changed, 135 insertions(+)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 79cc5ec57dca..af95231ba3e2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -1463,6 +1463,15 @@ static struct mlx4_cmd_info cmd_info[] = {
 		.verify = NULL,
 		.wrapper = mlx4_CMD_EPERM_wrapper,
 	},
+	{
+		.opcode = MLX4_CMD_SET_VPORT_QOS,
+		.has_inbox = false,
+		.has_outbox = true,
+		.out_is_imm = false,
+		.encode_slave_id = false,
+		.verify = NULL,
+		.wrapper = mlx4_CMD_EPERM_wrapper,
+	},
 	{
 		.opcode = MLX4_CMD_CONF_SPECIAL_QP,
 		.has_inbox = false,
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw_qos.c b/drivers/net/ethernet/mellanox/mlx4/fw_qos.c
index 5ce3440b99c7..8f2fde0487c4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw_qos.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw_qos.c
@@ -42,6 +42,12 @@ enum {
 	MLX4_ALLOCATE_VPP_QUERY		= 0x1
 };
 
+enum {
+	/* set vport qos opcode modifiers */
+	MLX4_SET_VPORT_QOS_SET		= 0x0,
+	MLX4_SET_VPORT_QOS_QUERY	= 0x1
+};
+
 struct mlx4_set_port_prio2tc_context {
 	u8 prio2tc[4];
 };
@@ -63,6 +69,19 @@ struct mlx4_alloc_vpp_param {
 	__be32 vpp_p_up[MLX4_NUM_UP];
 };
 
+struct mlx4_prio_qos_param {
+	__be32 bw_share;
+	__be32 max_avg_bw;
+	__be32 reserved;
+	__be32 enable;
+	__be32 reserved1[4];
+};
+
+struct mlx4_set_vport_context {
+	__be32 reserved[8];
+	struct mlx4_prio_qos_param qos_p_up[MLX4_NUM_UP];
+};
+
 int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc)
 {
 	struct mlx4_cmd_mailbox *mailbox;
@@ -198,3 +217,73 @@ int mlx4_ALLOCATE_VPP_set(struct mlx4_dev *dev, u8 port, u8 *vpp_p_up)
 	return err;
 }
 EXPORT_SYMBOL(mlx4_ALLOCATE_VPP_set);
+
+int mlx4_SET_VPORT_QOS_get(struct mlx4_dev *dev, u8 port, u8 vport,
+			   struct mlx4_vport_qos_param *out_param)
+{
+	int i;
+	int err;
+	struct mlx4_cmd_mailbox *mailbox;
+	struct mlx4_set_vport_context *ctx;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+
+	ctx = mailbox->buf;
+
+	err = mlx4_cmd_box(dev, 0, mailbox->dma, (vport << 8) | port,
+			   MLX4_SET_VPORT_QOS_QUERY,
+			   MLX4_CMD_SET_VPORT_QOS,
+			   MLX4_CMD_TIME_CLASS_A,
+			   MLX4_CMD_NATIVE);
+	if (err)
+		goto out;
+
+	for (i = 0; i < MLX4_NUM_UP; i++) {
+		out_param[i].bw_share = be32_to_cpu(ctx->qos_p_up[i].bw_share);
+		out_param[i].max_avg_bw =
+			be32_to_cpu(ctx->qos_p_up[i].max_avg_bw);
+		out_param[i].enable =
+			!!(be32_to_cpu(ctx->qos_p_up[i].enable) & 31);
+	}
+
+out:
+	mlx4_free_cmd_mailbox(dev, mailbox);
+
+	return err;
+}
+EXPORT_SYMBOL(mlx4_SET_VPORT_QOS_get);
+
+int mlx4_SET_VPORT_QOS_set(struct mlx4_dev *dev, u8 port, u8 vport,
+			   struct mlx4_vport_qos_param *in_param)
+{
+	int i;
+	int err;
+	struct mlx4_cmd_mailbox *mailbox;
+	struct mlx4_set_vport_context *ctx;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+
+	ctx = mailbox->buf;
+
+	for (i = 0; i < MLX4_NUM_UP; i++) {
+		ctx->qos_p_up[i].bw_share = cpu_to_be32(in_param[i].bw_share);
+		ctx->qos_p_up[i].max_avg_bw =
+				cpu_to_be32(in_param[i].max_avg_bw);
+		ctx->qos_p_up[i].enable =
+				cpu_to_be32(in_param[i].enable << 31);
+	}
+
+	err = mlx4_cmd(dev, mailbox->dma, (vport << 8) | port,
+		       MLX4_SET_VPORT_QOS_SET,
+		       MLX4_CMD_SET_VPORT_QOS,
+		       MLX4_CMD_TIME_CLASS_A,
+		       MLX4_CMD_NATIVE);
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
+EXPORT_SYMBOL(mlx4_SET_VPORT_QOS_set);
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw_qos.h b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h
index be79951c44ef..b3fffafff062 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw_qos.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h
@@ -41,6 +41,12 @@
 #define MLX4_NUM_UP 8
 #define MLX4_NUM_TC 8
 
+struct mlx4_vport_qos_param {
+	u32 bw_share;
+	u32 max_avg_bw;
+	u8 enable;
+};
+
 /**
  * mlx4_SET_PORT_PRIO2TC - This routine maps user priorities to traffic
  * classes of a given port and device.
@@ -100,4 +106,34 @@ int mlx4_ALLOCATE_VPP_get(struct mlx4_dev *dev, u8 port,
  **/
 int mlx4_ALLOCATE_VPP_set(struct mlx4_dev *dev, u8 port, u8 *vpp_p_up);
 
+/**
+ * mlx4_SET_VPORT_QOS_get - Query QoS proporties of a Vport.
+ * Each priority allowed for the Vport is assigned with a share of the BW,
+ * and a BW limitation. This commands query the current QoS values.
+ *
+ * @dev: mlx4_dev.
+ * @port: Physical port number.
+ * @vport: Vport id.
+ * @out_param: Array of mlx4_vport_qos_param that will contain the values.
+ *
+ * Returns 0 on success or a negative mlx4_core errno code.
+ **/
+int mlx4_SET_VPORT_QOS_get(struct mlx4_dev *dev, u8 port, u8 vport,
+			   struct mlx4_vport_qos_param *out_param);
+
+/**
+ * mlx4_SET_VPORT_QOS_set - Set QoS proporties of a Vport.
+ * QoS parameters can be modified at any time, but must be initialized
+ * before any QP is associated with the VPort.
+ *
+ * @dev: mlx4_dev.
+ * @port: Physical port number.
+ * @vport: Vport id.
+ * @out_param: Array of mlx4_vport_qos_param which holds the requested values.
+ *
+ * Returns 0 on success or a negative mlx4_core errno code.
+ **/
+int mlx4_SET_VPORT_QOS_set(struct mlx4_dev *dev, u8 port, u8 vport,
+			   struct mlx4_vport_qos_param *in_param);
+
 #endif /* MLX4_FW_QOS_H */
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index e0f88a098fb8..88326ed0033a 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -69,6 +69,7 @@ enum {
 	MLX4_CMD_SET_ICM_SIZE	 = 0xffd,
 	MLX4_CMD_ACCESS_REG	 = 0x3b,
 	MLX4_CMD_ALLOCATE_VPP	 = 0x80,
+	MLX4_CMD_SET_VPORT_QOS	 = 0x81,
 
 	/*master notify fw on finish for slave's flr*/
 	MLX4_CMD_INFORM_FLR_DONE = 0x5b,
-- 
cgit v1.2.3


From d019fcb2244519d453694bfce868f3e717bfcebb Mon Sep 17 00:00:00 2001
From: Ido Shamay <idos@mellanox.com>
Date: Thu, 2 Apr 2015 16:31:13 +0300
Subject: net/mlx4: Query device for QoS per VF support

Checks in QUERY_DEV_CAP if the granular QoS per VF feature is
supported by the device. Disabled for guests.

Signed-off-by: Ido Shamay <idos@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/fw.c | 10 +++++++++-
 include/linux/mlx4/device.h             |  3 ++-
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 412e019e8b2e..94e857e39063 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -147,7 +147,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
 		[21] = "Port Remap support",
 		[22] = "QCN support",
 		[23] = "QP rate limiting support",
-		[24] = "Ethernet Flow control statistics support"
+		[24] = "Ethernet Flow control statistics support",
+		[25] = "Granular QoS per VF support",
 	};
 	int i;
 
@@ -871,6 +872,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET);
 	dev_cap->max_rq_desc_sz = size;
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE);
+	if (field & (1 << 4))
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_QOS_VPP;
 	if (field & (1 << 5))
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL;
 	if (field & (1 << 6))
@@ -1199,6 +1202,11 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
 	field16 = 0;
 	MLX4_PUT(outbox->buf, field16, QUERY_DEV_CAP_QP_RATE_LIMIT_NUM_OFFSET);
 
+	/* turn off QoS per VF support for guests */
+	MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE);
+	field &= 0xef;
+	MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE);
+
 	return 0;
 }
 
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index b676d0c0111a..c37208f7869f 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -205,7 +205,8 @@ enum {
 	MLX4_DEV_CAP_FLAG2_PORT_REMAP		= 1LL <<  21,
 	MLX4_DEV_CAP_FLAG2_QCN			= 1LL <<  22,
 	MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT	= 1LL <<  23,
-	MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN         = 1LL <<  24
+	MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN         = 1LL <<  24,
+	MLX4_DEV_CAP_FLAG2_QOS_VPP		= 1LL <<  25,
 };
 
 enum {
-- 
cgit v1.2.3


From 08068cd5683f11e4505aa9c8cc6ed5942f8ad299 Mon Sep 17 00:00:00 2001
From: Ido Shamay <idos@mellanox.com>
Date: Thu, 2 Apr 2015 16:31:15 +0300
Subject: net/mlx4: Added qos_vport QP configuration in VST mode

Granular QoS per VF feature introduce a new QP field, qos_vport.

PF administrator can connect VF QPs to a certain QoS Vport, to
inherit its proporties. Connecting QPs to the default QoS Vport
(defined as 0) is always allowed, even when there are no allocated VPPs.
At this point, only the default vport is connected to QPs.

Signed-off-by: Ido Shamay <idos@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/cmd.c              | 9 ++++++++-
 drivers/net/ethernet/mellanox/mlx4/fw_qos.h           | 3 +++
 drivers/net/ethernet/mellanox/mlx4/mlx4.h             | 2 ++
 drivers/net/ethernet/mellanox/mlx4/qp.c               | 5 +++++
 drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 6 ++++++
 include/linux/mlx4/qp.h                               | 8 ++++++--
 6 files changed, 30 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 7d3c64275b75..4c0c6c7e63e7 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -48,6 +48,7 @@
 
 #include "mlx4.h"
 #include "fw.h"
+#include "fw_qos.h"
 
 #define CMD_POLL_TOKEN 0xffff
 #define INBOX_MASK	0xffffffffffffff00ULL
@@ -1808,7 +1809,8 @@ static int mlx4_master_immediate_activate_vlan_qos(struct mlx4_priv *priv,
 
 	if (vp_oper->state.default_vlan == vp_admin->default_vlan &&
 	    vp_oper->state.default_qos == vp_admin->default_qos &&
-	    vp_oper->state.link_state == vp_admin->link_state)
+	    vp_oper->state.link_state == vp_admin->link_state &&
+	    vp_oper->state.qos_vport == vp_admin->qos_vport)
 		return 0;
 
 	if (!(priv->mfunc.master.slave_state[slave].active &&
@@ -1866,6 +1868,7 @@ static int mlx4_master_immediate_activate_vlan_qos(struct mlx4_priv *priv,
 	vp_oper->state.default_vlan = vp_admin->default_vlan;
 	vp_oper->state.default_qos = vp_admin->default_qos;
 	vp_oper->state.link_state = vp_admin->link_state;
+	vp_oper->state.qos_vport = vp_admin->qos_vport;
 
 	if (vp_admin->link_state == IFLA_VF_LINK_STATE_DISABLE)
 		work->flags |= MLX4_VF_IMMED_VLAN_FLAG_LINK_DISABLE;
@@ -1874,6 +1877,7 @@ static int mlx4_master_immediate_activate_vlan_qos(struct mlx4_priv *priv,
 	work->port = port;
 	work->slave = slave;
 	work->qos = vp_oper->state.default_qos;
+	work->qos_vport = vp_oper->state.qos_vport;
 	work->vlan_id = vp_oper->state.default_vlan;
 	work->vlan_ix = vp_oper->vlan_idx;
 	work->priv = priv;
@@ -2339,6 +2343,9 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
 				INIT_LIST_HEAD(&s_state->mcast_filters[port]);
 				admin_vport->default_vlan = MLX4_VGT;
 				oper_vport->default_vlan = MLX4_VGT;
+				admin_vport->qos_vport =
+						MLX4_VPP_DEFAULT_VPORT;
+				oper_vport->qos_vport = MLX4_VPP_DEFAULT_VPORT;
 				vf_oper->vport[port].vlan_idx = NO_INDX;
 				vf_oper->vport[port].mac_idx = NO_INDX;
 			}
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw_qos.h b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h
index 4a5320f79094..ac1f331878e6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw_qos.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h
@@ -44,6 +44,9 @@
 /* Default supported priorities for VPP allocation */
 #define MLX4_DEFAULT_QOS_PRIO (0)
 
+/* Derived from FW feature definition, 0 is the default vport fo all QPs */
+#define MLX4_VPP_DEFAULT_VPORT (0)
+
 struct mlx4_vport_qos_param {
 	u32 bw_share;
 	u32 max_avg_bw;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 1add698dba5b..32469c64d580 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -498,6 +498,7 @@ struct mlx4_vport_state {
 	u32 tx_rate;
 	bool spoofchk;
 	u32 link_state;
+	u8 qos_vport;
 };
 
 struct mlx4_vf_admin_state {
@@ -636,6 +637,7 @@ struct mlx4_vf_immed_vlan_work {
 	int			orig_vlan_ix;
 	u8			port;
 	u8			qos;
+	u8                      qos_vport;
 	u16			vlan_id;
 	u16			orig_vlan_id;
 };
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 69e4462e4ee4..b75214a80d0e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -447,6 +447,11 @@ int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn,
 		cmd->qp_context.rate_limit_params = cpu_to_be16((params->rate_unit << 14) | params->rate_val);
 	}
 
+	if (attr & MLX4_UPDATE_QP_QOS_VPORT) {
+		qp_mask |= 1ULL << MLX4_UPD_QP_MASK_QOS_VPP;
+		cmd->qp_context.qos_vport = params->qos_vport;
+	}
+
 	cmd->primary_addr_path_mask = cpu_to_be64(pri_addr_path_mask);
 	cmd->qp_mask = cpu_to_be64(qp_mask);
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index fd2520aa8a8c..c7f28bf4b8e2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -765,6 +765,7 @@ static int update_vport_qp_param(struct mlx4_dev *dev,
 		qpc->pri_path.feup |= MLX4_FEUP_FORCE_ETH_UP | MLX4_FVL_FORCE_ETH_VLAN;
 		qpc->pri_path.sched_queue &= 0xC7;
 		qpc->pri_path.sched_queue |= (vp_oper->state.default_qos) << 3;
+		qpc->qos_vport = vp_oper->state.qos_vport;
 	}
 	if (vp_oper->state.spoofchk) {
 		qpc->pri_path.feup |= MLX4_FSM_FORCE_ETH_SRC_MAC;
@@ -4917,6 +4918,11 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work)
 					qp->sched_queue & 0xC7;
 				upd_context->qp_context.pri_path.sched_queue |=
 					((work->qos & 0x7) << 3);
+				upd_context->qp_mask |=
+					cpu_to_be64(1ULL <<
+						    MLX4_UPD_QP_MASK_QOS_VPP);
+				upd_context->qp_context.qos_vport =
+					work->qos_vport;
 			}
 
 			err = mlx4_cmd(dev, mailbox->dma,
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 1023ebe035b7..6fed539e5456 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -209,7 +209,8 @@ struct mlx4_qp_context {
 	__be16			sq_wqe_counter;
 	u32			reserved3;
 	__be16			rate_limit_params;
-	__be16			reserved4;
+	u8			reserved4;
+	u8			qos_vport;
 	__be32			param3;
 	__be32			nummmcpeers_basemkey;
 	u8			log_page_size;
@@ -231,6 +232,7 @@ struct mlx4_update_qp_context {
 enum {
 	MLX4_UPD_QP_MASK_PM_STATE	= 32,
 	MLX4_UPD_QP_MASK_VSD		= 33,
+	MLX4_UPD_QP_MASK_QOS_VPP	= 34,
 	MLX4_UPD_QP_MASK_RATE_LIMIT	= 35,
 };
 
@@ -432,7 +434,8 @@ enum mlx4_update_qp_attr {
 	MLX4_UPDATE_QP_SMAC		= 1 << 0,
 	MLX4_UPDATE_QP_VSD		= 1 << 1,
 	MLX4_UPDATE_QP_RATE_LIMIT	= 1 << 2,
-	MLX4_UPDATE_QP_SUPPORTED_ATTRS	= (1 << 3) - 1
+	MLX4_UPDATE_QP_QOS_VPORT	= 1 << 3,
+	MLX4_UPDATE_QP_SUPPORTED_ATTRS	= (1 << 4) - 1
 };
 
 enum mlx4_update_qp_params_flags {
@@ -441,6 +444,7 @@ enum mlx4_update_qp_params_flags {
 
 struct mlx4_update_qp_params {
 	u8	smac_index;
+	u8	qos_vport;
 	u32	flags;
 	u16	rate_unit;
 	u16	rate_val;
-- 
cgit v1.2.3


From cda373f4849d5dd6fedceb4aeba35682a0e1a833 Mon Sep 17 00:00:00 2001
From: Ido Shamay <idos@mellanox.com>
Date: Thu, 2 Apr 2015 16:31:16 +0300
Subject: net/mlx4_en: Enable TX rate limit per VF

Support granular QoS per VF, by implementing the ndo_set_vf_rate.

Enforce a rate limit per VF when called, and enabled only for VFs in
VST mode with user priority supported by the device.

We don't enforce VFs to be in VST mode at the moment of configuration,
but rather save the given rate limit and enforce it when the VF is
moved to VST with user priority which is supported (currently 0).

VST<->VGT or VST qos value state changes are disallowed when a rate
limit is configured. Minimum BW share is not supported yet.

Signed-off-by: Ido Shamay <idos@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/cmd.c       | 177 ++++++++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c |  11 ++
 include/linux/mlx4/cmd.h                       |   2 +
 3 files changed, 189 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 4c0c6c7e63e7..bc6e4c9b6bb5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -2781,6 +2781,103 @@ static int mlx4_slaves_closest_port(struct mlx4_dev *dev, int slave, int port)
 	return port;
 }
 
+static int mlx4_set_vport_qos(struct mlx4_priv *priv, int slave, int port,
+			      int max_tx_rate)
+{
+	int i;
+	int err;
+	struct mlx4_qos_manager *port_qos;
+	struct mlx4_dev *dev = &priv->dev;
+	struct mlx4_vport_qos_param vpp_qos[MLX4_NUM_UP];
+
+	port_qos = &priv->mfunc.master.qos_ctl[port];
+	memset(vpp_qos, 0, sizeof(struct mlx4_vport_qos_param) * MLX4_NUM_UP);
+
+	if (slave > port_qos->num_of_qos_vfs) {
+		mlx4_info(dev, "No availible VPP resources for this VF\n");
+		return -EINVAL;
+	}
+
+	/* Query for default QoS values from Vport 0 is needed */
+	err = mlx4_SET_VPORT_QOS_get(dev, port, 0, vpp_qos);
+	if (err) {
+		mlx4_info(dev, "Failed to query Vport 0 QoS values\n");
+		return err;
+	}
+
+	for (i = 0; i < MLX4_NUM_UP; i++) {
+		if (test_bit(i, port_qos->priority_bm) && max_tx_rate) {
+			vpp_qos[i].max_avg_bw = max_tx_rate;
+			vpp_qos[i].enable = 1;
+		} else {
+			/* if user supplied tx_rate == 0, meaning no rate limit
+			 * configuration is required. so we are leaving the
+			 * value of max_avg_bw as queried from Vport 0.
+			 */
+			vpp_qos[i].enable = 0;
+		}
+	}
+
+	err = mlx4_SET_VPORT_QOS_set(dev, port, slave, vpp_qos);
+	if (err) {
+		mlx4_info(dev, "Failed to set Vport %d QoS values\n", slave);
+		return err;
+	}
+
+	return 0;
+}
+
+static bool mlx4_is_vf_vst_and_prio_qos(struct mlx4_dev *dev, int port,
+					struct mlx4_vport_state *vf_admin)
+{
+	struct mlx4_qos_manager *info;
+	struct mlx4_priv *priv = mlx4_priv(dev);
+
+	if (!mlx4_is_master(dev) ||
+	    !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_QOS_VPP))
+		return false;
+
+	info = &priv->mfunc.master.qos_ctl[port];
+
+	if (vf_admin->default_vlan != MLX4_VGT &&
+	    test_bit(vf_admin->default_qos, info->priority_bm))
+		return true;
+
+	return false;
+}
+
+static bool mlx4_valid_vf_state_change(struct mlx4_dev *dev, int port,
+				       struct mlx4_vport_state *vf_admin,
+				       int vlan, int qos)
+{
+	struct mlx4_vport_state dummy_admin = {0};
+
+	if (!mlx4_is_vf_vst_and_prio_qos(dev, port, vf_admin) ||
+	    !vf_admin->tx_rate)
+		return true;
+
+	dummy_admin.default_qos = qos;
+	dummy_admin.default_vlan = vlan;
+
+	/* VF wants to move to other VST state which is valid with current
+	 * rate limit. Either differnt default vlan in VST or other
+	 * supported QoS priority. Otherwise we don't allow this change when
+	 * the TX rate is still configured.
+	 */
+	if (mlx4_is_vf_vst_and_prio_qos(dev, port, &dummy_admin))
+		return true;
+
+	mlx4_info(dev, "Cannot change VF state to %s while rate is set\n",
+		  (vlan == MLX4_VGT) ? "VGT" : "VST");
+
+	if (vlan != MLX4_VGT)
+		mlx4_info(dev, "VST priority %d not supported for QoS\n", qos);
+
+	mlx4_info(dev, "Please set rate to 0 prior to this VF state change\n");
+
+	return false;
+}
+
 int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -2824,12 +2921,22 @@ int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos)
 	port = mlx4_slaves_closest_port(dev, slave, port);
 	vf_admin = &priv->mfunc.master.vf_admin[slave].vport[port];
 
+	if (!mlx4_valid_vf_state_change(dev, port, vf_admin, vlan, qos))
+		return -EPERM;
+
 	if ((0 == vlan) && (0 == qos))
 		vf_admin->default_vlan = MLX4_VGT;
 	else
 		vf_admin->default_vlan = vlan;
 	vf_admin->default_qos = qos;
 
+	/* If rate was configured prior to VST, we saved the configured rate
+	 * in vf_admin->rate and now, if priority supported we enforce the QoS
+	 */
+	if (mlx4_is_vf_vst_and_prio_qos(dev, port, vf_admin) &&
+	    vf_admin->tx_rate)
+		vf_admin->qos_vport = slave;
+
 	if (mlx4_master_immediate_activate_vlan_qos(priv, slave, port))
 		mlx4_info(dev,
 			  "updating vf %d port %d config will take effect on next VF restart\n",
@@ -2838,6 +2945,69 @@ int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos)
 }
 EXPORT_SYMBOL_GPL(mlx4_set_vf_vlan);
 
+int mlx4_set_vf_rate(struct mlx4_dev *dev, int port, int vf, int min_tx_rate,
+		     int max_tx_rate)
+{
+	int err;
+	int slave;
+	struct mlx4_vport_state *vf_admin;
+	struct mlx4_priv *priv = mlx4_priv(dev);
+
+	if (!mlx4_is_master(dev) ||
+	    !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_QOS_VPP))
+		return -EPROTONOSUPPORT;
+
+	if (min_tx_rate) {
+		mlx4_info(dev, "Minimum BW share not supported\n");
+		return -EPROTONOSUPPORT;
+	}
+
+	slave = mlx4_get_slave_indx(dev, vf);
+	if (slave < 0)
+		return -EINVAL;
+
+	port = mlx4_slaves_closest_port(dev, slave, port);
+	vf_admin = &priv->mfunc.master.vf_admin[slave].vport[port];
+
+	err = mlx4_set_vport_qos(priv, slave, port, max_tx_rate);
+	if (err) {
+		mlx4_info(dev, "vf %d failed to set rate %d\n", vf,
+			  max_tx_rate);
+		return err;
+	}
+
+	vf_admin->tx_rate = max_tx_rate;
+	/* if VF is not in supported mode (VST with supported prio),
+	 * we do not change vport configuration for its QPs, but save
+	 * the rate, so it will be enforced when it moves to supported
+	 * mode next time.
+	 */
+	if (!mlx4_is_vf_vst_and_prio_qos(dev, port, vf_admin)) {
+		mlx4_info(dev,
+			  "rate set for VF %d when not in valid state\n", vf);
+
+		if (vf_admin->default_vlan != MLX4_VGT)
+			mlx4_info(dev, "VST priority not supported by QoS\n");
+		else
+			mlx4_info(dev, "VF in VGT mode (needed VST)\n");
+
+		mlx4_info(dev,
+			  "rate %d take affect when VF moves to valid state\n",
+			  max_tx_rate);
+		return 0;
+	}
+
+	/* If user sets rate 0 assigning default vport for its QPs */
+	vf_admin->qos_vport = max_tx_rate ? slave : MLX4_VPP_DEFAULT_VPORT;
+
+	if (priv->mfunc.master.slave_state[slave].active &&
+	    dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_UPDATE_QP)
+		mlx4_master_immediate_activate_vlan_qos(priv, slave, port);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_set_vf_rate);
+
  /* mlx4_get_slave_default_vlan -
  * return true if VST ( default vlan)
  * if VST, will return vlan & qos (if not NULL)
@@ -2911,7 +3081,12 @@ int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_in
 
 	ivf->vlan		= s_info->default_vlan;
 	ivf->qos		= s_info->default_qos;
-	ivf->max_tx_rate	= s_info->tx_rate;
+
+	if (mlx4_is_vf_vst_and_prio_qos(dev, port, s_info))
+		ivf->max_tx_rate = s_info->tx_rate;
+	else
+		ivf->max_tx_rate = 0;
+
 	ivf->min_tx_rate	= 0;
 	ivf->spoofchk		= s_info->spoofchk;
 	ivf->linkstate		= s_info->link_state;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index d73ab1ba5a78..2a5df0aa2228 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2240,6 +2240,16 @@ static int mlx4_en_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos)
 	return mlx4_set_vf_vlan(mdev->dev, en_priv->port, vf, vlan, qos);
 }
 
+static int mlx4_en_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
+			       int max_tx_rate)
+{
+	struct mlx4_en_priv *en_priv = netdev_priv(dev);
+	struct mlx4_en_dev *mdev = en_priv->mdev;
+
+	return mlx4_set_vf_rate(mdev->dev, en_priv->port, vf, min_tx_rate,
+				max_tx_rate);
+}
+
 static int mlx4_en_set_vf_spoofchk(struct net_device *dev, int vf, bool setting)
 {
 	struct mlx4_en_priv *en_priv = netdev_priv(dev);
@@ -2458,6 +2468,7 @@ static const struct net_device_ops mlx4_netdev_ops_master = {
 	.ndo_vlan_rx_kill_vid	= mlx4_en_vlan_rx_kill_vid,
 	.ndo_set_vf_mac		= mlx4_en_set_vf_mac,
 	.ndo_set_vf_vlan	= mlx4_en_set_vf_vlan,
+	.ndo_set_vf_rate	= mlx4_en_set_vf_rate,
 	.ndo_set_vf_spoofchk	= mlx4_en_set_vf_spoofchk,
 	.ndo_set_vf_link_state	= mlx4_en_set_vf_link_state,
 	.ndo_get_vf_config	= mlx4_en_get_vf_config,
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index 88326ed0033a..a788839f54bb 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -296,6 +296,8 @@ void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbo
 u32 mlx4_comm_get_version(void);
 int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac);
 int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos);
+int mlx4_set_vf_rate(struct mlx4_dev *dev, int port, int vf, int min_tx_rate,
+		     int max_tx_rate);
 int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting);
 int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_info *ivf);
 int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_state);
-- 
cgit v1.2.3


From 3742cc65512cd4897a63dce94104f9a6e74997a0 Mon Sep 17 00:00:00 2001
From: Ido Shamay <idos@mellanox.com>
Date: Thu, 2 Apr 2015 16:31:17 +0300
Subject: net/mlx4: Warn users of depracated QoS Firmware

A new capability bit was introduced in the past to to differ devices
using the QoS ETS feature. The old was deprecated since then.
If driver sees device which set only the old capabilty, it will print
warning to user suggesting to upgrade the FW.

Signed-off-by: Ido Shamay <idos@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 2 +-
 drivers/net/ethernet/mellanox/mlx4/fw.c        | 7 +++++--
 drivers/net/ethernet/mellanox/mlx4/main.c      | 8 ++++++++
 include/linux/mlx4/device.h                    | 1 +
 4 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 2a5df0aa2228..be9e07a72193 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2814,7 +2814,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 	priv->msg_enable = MLX4_EN_MSG_LEVEL;
 #ifdef CONFIG_MLX4_EN_DCB
 	if (!mlx4_is_slave(priv->mdev->dev)) {
-		if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_SET_ETH_SCHED) {
+		if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) {
 			dev->dcbnl_ops = &mlx4_en_dcbnl_ops;
 		} else {
 			en_info(priv, "enabling only PFC DCB ops\n");
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 94e857e39063..16bbd6cdff6c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -149,6 +149,7 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
 		[23] = "QP rate limiting support",
 		[24] = "Ethernet Flow control statistics support",
 		[25] = "Granular QoS per VF support",
+		[26] = "Port ETS Scheduler support",
 	};
 	int i;
 
@@ -900,6 +901,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_VXLAN);
 	if (field & 1<<3)
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS;
+	if (field & (1 << 5))
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETS_CFG;
 	MLX4_GET(dev_cap->max_icm_sz, outbox,
 		 QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET);
 	if (dev_cap->flags & MLX4_DEV_CAP_FLAG_COUNTERS)
@@ -1157,9 +1160,9 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
 	field &= 0x7f;
 	MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET);
 
-	/* For guests, disable vxlan tunneling */
+	/* For guests, disable vxlan tunneling and QoS support */
 	MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_VXLAN);
-	field &= 0xf7;
+	field &= 0xd7;
 	MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_VXLAN);
 
 	/* For guests, report Blueflame disabled */
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index ee0a67069b57..3becad37aec8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -528,6 +528,14 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		dev->caps.alloc_res_qp_mask =
 			(dev->caps.bf_reg_size ? MLX4_RESERVE_ETH_BF_QP : 0) |
 			MLX4_RESERVE_A0_QP;
+
+		if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) &&
+		    dev->caps.flags & MLX4_DEV_CAP_FLAG_SET_ETH_SCHED) {
+			mlx4_warn(dev, "Old device ETS support detected\n");
+			mlx4_warn(dev, "Consider upgrading device FW.\n");
+			dev->caps.flags2 |= MLX4_DEV_CAP_FLAG2_ETS_CFG;
+		}
+
 	} else {
 		dev->caps.alloc_res_qp_mask = 0;
 	}
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index c37208f7869f..15f46767342e 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -207,6 +207,7 @@ enum {
 	MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT	= 1LL <<  23,
 	MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN         = 1LL <<  24,
 	MLX4_DEV_CAP_FLAG2_QOS_VPP		= 1LL <<  25,
+	MLX4_DEV_CAP_FLAG2_ETS_CFG		= 1LL <<  26,
 };
 
 enum {
-- 
cgit v1.2.3


From a130b59057320192b4b00ed0ba4bc8a38f66f289 Mon Sep 17 00:00:00 2001
From: Ido Shamay <idos@mellanox.com>
Date: Thu, 2 Apr 2015 16:31:19 +0300
Subject: net/mlx4: Add SET_PORT opcode modifiers enumeration

The calls to SET_PORT used hard-code numbers, when supplying command's
opcode modifiers, fix that to use well defined constants.

Signed-off-by: Ido Shamay <idos@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx4/main.c         | 11 +++++-----
 drivers/net/ethernet/mellanox/mlx4/cmd.c  |  3 ++-
 drivers/net/ethernet/mellanox/mlx4/port.c | 36 ++++++++++++++++++-------------
 include/linux/mlx4/cmd.h                  |  8 ++++++-
 4 files changed, 36 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index b972c0b41799..976bea794b5f 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -587,8 +587,9 @@ static int mlx4_ib_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_vio
 		((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask);
 	}
 
-	err = mlx4_cmd(dev->dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT,
-		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
+	err = mlx4_cmd(dev->dev, mailbox->dma, port, MLX4_SET_PORT_IB_OPCODE,
+		       MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+		       MLX4_CMD_WRAPPED);
 
 	mlx4_free_cmd_mailbox(dev->dev, mailbox);
 	return err;
@@ -1525,8 +1526,8 @@ static void update_gids_task(struct work_struct *work)
 	memcpy(gids, gw->gids, sizeof gw->gids);
 
 	err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
-		       1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
-		       MLX4_CMD_WRAPPED);
+		       MLX4_SET_PORT_ETH_OPCODE, MLX4_CMD_SET_PORT,
+		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
 	if (err)
 		pr_warn("set port command failed\n");
 	else
@@ -1564,7 +1565,7 @@ static void reset_gids_task(struct work_struct *work)
 				    IB_LINK_LAYER_ETHERNET) {
 		err = mlx4_cmd(dev, mailbox->dma,
 			       MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
-			       1, MLX4_CMD_SET_PORT,
+			       MLX4_SET_PORT_ETH_OPCODE, MLX4_CMD_SET_PORT,
 			       MLX4_CMD_TIME_CLASS_B,
 			       MLX4_CMD_WRAPPED);
 		if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index bc6e4c9b6bb5..06993ea9e6ba 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -726,7 +726,8 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
 		 * specific command/input_mod/opcode_mod/fw-status to be debug.
 		 */
 		if (op == MLX4_CMD_SET_PORT && in_modifier == 1 &&
-		    op_modifier == 0 && context->fw_status == CMD_STAT_BAD_SIZE)
+		    op_modifier == MLX4_SET_PORT_IB_OPCODE &&
+		    context->fw_status == CMD_STAT_BAD_SIZE)
 			mlx4_dbg(dev, "command 0x%x failed: fw status = 0x%x\n",
 				 op, context->fw_status);
 		else
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index 6a53d42db52f..ca4488ed561c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -123,8 +123,9 @@ static int mlx4_set_port_mac_table(struct mlx4_dev *dev, u8 port,
 
 	in_mod = MLX4_SET_PORT_MAC_TABLE << 8 | port;
 
-	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
-		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+	err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE,
+		       MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+		       MLX4_CMD_NATIVE);
 
 	mlx4_free_cmd_mailbox(dev, mailbox);
 	return err;
@@ -337,8 +338,9 @@ static int mlx4_set_port_vlan_table(struct mlx4_dev *dev, u8 port,
 
 	memcpy(mailbox->buf, entries, MLX4_VLAN_TABLE_SIZE);
 	in_mod = MLX4_SET_PORT_VLAN_TABLE << 8 | port;
-	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
-		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+	err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE,
+		       MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+		       MLX4_CMD_NATIVE);
 
 	mlx4_free_cmd_mailbox(dev, mailbox);
 
@@ -625,9 +627,9 @@ static int mlx4_reset_roce_port_gids(struct mlx4_dev *dev, int slave,
 		       MLX4_ROCE_GID_ENTRY_SIZE);
 
 	err = mlx4_cmd(dev, mailbox->dma,
-		       ((u32)port) | (MLX4_SET_PORT_GID_TABLE << 8), 1,
-		       MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
-		       MLX4_CMD_NATIVE);
+		       ((u32)port) | (MLX4_SET_PORT_GID_TABLE << 8),
+		       MLX4_SET_PORT_ETH_OPCODE, MLX4_CMD_SET_PORT,
+		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
 	mutex_unlock(&(priv->port[port].gid_table.mutex));
 	return err;
 }
@@ -941,8 +943,9 @@ int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz)
 			(pkey_tbl_flag << MLX4_CHANGE_PORT_PKEY_TBL_SZ) |
 			(dev->caps.port_ib_mtu[port] << MLX4_SET_PORT_MTU_CAP) |
 			(vl_cap << MLX4_SET_PORT_VL_CAP));
-		err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT,
-				MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
+		err = mlx4_cmd(dev, mailbox->dma, port,
+			       MLX4_SET_PORT_IB_OPCODE, MLX4_CMD_SET_PORT,
+			       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
 		if (err != -ENOMEM)
 			break;
 	}
@@ -971,8 +974,9 @@ int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
 	context->pfcrx = pfcrx;
 
 	in_mod = MLX4_SET_PORT_GENERAL << 8 | port;
-	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
-		       MLX4_CMD_TIME_CLASS_B,  MLX4_CMD_WRAPPED);
+	err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE,
+		       MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+		       MLX4_CMD_WRAPPED);
 
 	mlx4_free_cmd_mailbox(dev, mailbox);
 	return err;
@@ -1008,8 +1012,9 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
 	context->vlan_miss = MLX4_VLAN_MISS_IDX;
 
 	in_mod = MLX4_SET_PORT_RQP_CALC << 8 | port;
-	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
-		       MLX4_CMD_TIME_CLASS_B,  MLX4_CMD_WRAPPED);
+	err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE,
+		       MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+		       MLX4_CMD_WRAPPED);
 
 	mlx4_free_cmd_mailbox(dev, mailbox);
 	return err;
@@ -1050,8 +1055,9 @@ int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering, int enable)
 	context->steering  = steering;
 
 	in_mod = MLX4_SET_PORT_VXLAN << 8 | port;
-	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
-		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+	err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE,
+		       MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+		       MLX4_CMD_NATIVE);
 
 	mlx4_free_cmd_mailbox(dev, mailbox);
 	return err;
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index a788839f54bb..62c4d4def474 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -188,7 +188,13 @@ enum {
 };
 
 enum {
-	/* set port opcode modifiers */
+	/* Set port opcode modifiers */
+	MLX4_SET_PORT_IB_OPCODE		= 0x0,
+	MLX4_SET_PORT_ETH_OPCODE	= 0x1,
+};
+
+enum {
+	/* Set port Ethernet input modifiers */
 	MLX4_SET_PORT_GENERAL   = 0x0,
 	MLX4_SET_PORT_RQP_CALC  = 0x1,
 	MLX4_SET_PORT_MAC_TABLE = 0x2,
-- 
cgit v1.2.3


From 51af33cfed248dc8f36fa82df06b85e10038a01e Mon Sep 17 00:00:00 2001
From: Ido Shamay <idos@mellanox.com>
Date: Thu, 2 Apr 2015 16:31:20 +0300
Subject: net/mlx4_en: Add interface identify support

Add support for the interface ethtool identify feature.

Make the physical port LED to blink with green and yellow colors.

The device handles the LED blink by itself (synchrous use of
set_phys_id), by returning 0 to ETHTOOL_ID_ACTIVE command.

Signed-off-by: Eyal Grossman <eyalgr@mellanox.com>
Signed-off-by: Ido Shamay <idos@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 27 +++++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx4/fw.c         | 10 +++++++++
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h    |  1 +
 drivers/net/ethernet/mellanox/mlx4/port.c       | 26 ++++++++++++++++++++++++
 include/linux/mlx4/cmd.h                        |  1 +
 include/linux/mlx4/device.h                     |  2 ++
 6 files changed, 67 insertions(+)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index eba969b08dd1..3f44e2bbb982 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -1939,6 +1939,32 @@ static int mlx4_en_get_module_eeprom(struct net_device *dev,
 	return 0;
 }
 
+static int mlx4_en_set_phys_id(struct net_device *dev,
+			       enum ethtool_phys_id_state state)
+{
+	int err;
+	u16 beacon_duration;
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct mlx4_en_dev *mdev = priv->mdev;
+
+	if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_BEACON))
+		return -EOPNOTSUPP;
+
+	switch (state) {
+	case ETHTOOL_ID_ACTIVE:
+		beacon_duration = PORT_BEACON_MAX_LIMIT;
+		break;
+	case ETHTOOL_ID_INACTIVE:
+		beacon_duration = 0;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	err = mlx4_SET_PORT_BEACON(mdev->dev, priv->port, beacon_duration);
+	return err;
+}
+
 const struct ethtool_ops mlx4_en_ethtool_ops = {
 	.get_drvinfo = mlx4_en_get_drvinfo,
 	.get_settings = mlx4_en_get_settings,
@@ -1948,6 +1974,7 @@ const struct ethtool_ops mlx4_en_ethtool_ops = {
 	.get_sset_count = mlx4_en_get_sset_count,
 	.get_ethtool_stats = mlx4_en_get_ethtool_stats,
 	.self_test = mlx4_en_self_test,
+	.set_phys_id = mlx4_en_set_phys_id,
 	.get_wol = mlx4_en_get_wol,
 	.set_wol = mlx4_en_set_wol,
 	.get_msglevel = mlx4_en_get_msglevel,
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index b93e23bef6c9..6317844b291f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -150,6 +150,7 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
 		[24] = "Ethernet Flow control statistics support",
 		[25] = "Granular QoS per VF support",
 		[26] = "Port ETS Scheduler support",
+		[27] = "Port beacon support",
 	};
 	int i;
 
@@ -647,6 +648,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_RSS_OFFSET		0x2e
 #define QUERY_DEV_CAP_MAX_RDMA_OFFSET		0x2f
 #define QUERY_DEV_CAP_RSZ_SRQ_OFFSET		0x33
+#define QUERY_DEV_CAP_PORT_BEACON_OFFSET	0x34
 #define QUERY_DEV_CAP_ACK_DELAY_OFFSET		0x35
 #define QUERY_DEV_CAP_MTU_WIDTH_OFFSET		0x36
 #define QUERY_DEV_CAP_VL_PORT_OFFSET		0x37
@@ -786,6 +788,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	if (field & 0x80)
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FS_EN;
 	dev_cap->fs_log_max_ucast_qp_range_size = field & 0x1f;
+	MLX4_GET(field, outbox, QUERY_DEV_CAP_PORT_BEACON_OFFSET);
+	if (field & 0x80)
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_BEACON;
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET);
 	if (field & 0x80)
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DMFS_IPOIB;
@@ -1165,6 +1170,11 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
 	field &= 0xd7;
 	MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_VXLAN);
 
+	/* For guests, disable port BEACON */
+	MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_PORT_BEACON_OFFSET);
+	field &= 0x7f;
+	MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_PORT_BEACON_OFFSET);
+
 	/* For guests, report Blueflame disabled */
 	MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_BF_OFFSET);
 	field &= 0x7f;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 67eeea244eff..9de30216b146 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -482,6 +482,7 @@ enum {
 	MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP	= (1 << 5),
 };
 
+#define PORT_BEACON_MAX_LIMIT (65535)
 #define MLX4_EN_MAC_HASH_SIZE (1 << BITS_PER_BYTE)
 #define MLX4_EN_MAC_HASH_IDX 5
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index ca4488ed561c..e8d716dfd5cb 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -835,6 +835,12 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod,
 				MLX4_CMD_NATIVE);
 	}
 
+	/* Slaves are not allowed to SET_PORT beacon (LED) blink */
+	if (op_mod == MLX4_SET_PORT_BEACON_OPCODE) {
+		mlx4_warn(dev, "denying SET_PORT Beacon slave:%d\n", slave);
+		return -EPERM;
+	}
+
 	/* For IB, we only consider:
 	 * - The capability mask, which is set to the aggregate of all
 	 *   slave function capabilities
@@ -1064,6 +1070,26 @@ int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering, int enable)
 }
 EXPORT_SYMBOL(mlx4_SET_PORT_VXLAN);
 
+int mlx4_SET_PORT_BEACON(struct mlx4_dev *dev, u8 port, u16 time)
+{
+	int err;
+	struct mlx4_cmd_mailbox *mailbox;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+
+	*((__be32 *)mailbox->buf) = cpu_to_be32(time);
+
+	err = mlx4_cmd(dev, mailbox->dma, port, MLX4_SET_PORT_BEACON_OPCODE,
+		       MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+		       MLX4_CMD_NATIVE);
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
+EXPORT_SYMBOL(mlx4_SET_PORT_BEACON);
+
 int mlx4_SET_MCAST_FLTR_wrapper(struct mlx4_dev *dev, int slave,
 				struct mlx4_vhcr *vhcr,
 				struct mlx4_cmd_mailbox *inbox,
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index 62c4d4def474..f62e7cf227c6 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -191,6 +191,7 @@ enum {
 	/* Set port opcode modifiers */
 	MLX4_SET_PORT_IB_OPCODE		= 0x0,
 	MLX4_SET_PORT_ETH_OPCODE	= 0x1,
+	MLX4_SET_PORT_BEACON_OPCODE	= 0x4,
 };
 
 enum {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 15f46767342e..b761be7c88c8 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -208,6 +208,7 @@ enum {
 	MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN         = 1LL <<  24,
 	MLX4_DEV_CAP_FLAG2_QOS_VPP		= 1LL <<  25,
 	MLX4_DEV_CAP_FLAG2_ETS_CFG		= 1LL <<  26,
+	MLX4_DEV_CAP_FLAG2_PORT_BEACON		= 1LL <<  27,
 };
 
 enum {
@@ -1311,6 +1312,7 @@ int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
 			  u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx);
 int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
 			   u8 promisc);
+int mlx4_SET_PORT_BEACON(struct mlx4_dev *dev, u8 port, u16 time);
 int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering, int enable);
 int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx);
 int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx);
-- 
cgit v1.2.3


From 78500b8c03236a18d454c9cc8a24cccca506b200 Mon Sep 17 00:00:00 2001
From: Muhammad Mahajna <muhammadm@mellanox.com>
Date: Thu, 2 Apr 2015 16:31:22 +0300
Subject: net/mlx4_en: Add RX-ALL support

Enabled when the device supports KEEP FCS and IGNORE FCS.

When the flag is set, pass all received frames up the stack,
even ones with invalid FCS, controlled by ethtool.

Signed-off-by: Muhammad Mahajna <muhammadm@mellanox.com>
Signed-off-by: Ido Shamay <idos@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 14 +++++++++++
 drivers/net/ethernet/mellanox/mlx4/fw.c        |  8 ++++++
 drivers/net/ethernet/mellanox/mlx4/main.c      | 21 ++++++++++++++++
 drivers/net/ethernet/mellanox/mlx4/mlx4.h      |  6 +++--
 drivers/net/ethernet/mellanox/mlx4/port.c      | 34 ++++++++++++++++++++++++++
 include/linux/mlx4/device.h                    |  3 +++
 6 files changed, 84 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 333af3405a80..0f1afc085d58 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2204,6 +2204,17 @@ static int mlx4_en_set_features(struct net_device *netdev,
 		reset = true;
 	}
 
+	if (DEV_FEATURE_CHANGED(netdev, features, NETIF_F_RXALL)) {
+		u8 ignore_fcs_value = (features & NETIF_F_RXALL) ? 1 : 0;
+
+		en_info(priv, "Turn %s RX-ALL\n",
+			ignore_fcs_value ? "ON" : "OFF");
+		ret = mlx4_SET_PORT_fcs_check(priv->mdev->dev,
+					      priv->port, ignore_fcs_value);
+		if (ret)
+			return ret;
+	}
+
 	if (DEV_FEATURE_CHANGED(netdev, features, NETIF_F_HW_VLAN_CTAG_RX)) {
 		en_info(priv, "Turn %s RX vlan strip offload\n",
 			(features & NETIF_F_HW_VLAN_CTAG_RX) ? "ON" : "OFF");
@@ -2914,6 +2925,9 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 	if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP)
 		dev->hw_features |= NETIF_F_RXFCS;
 
+	if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_IGNORE_FCS)
+		dev->hw_features |= NETIF_F_RXALL;
+
 	if (mdev->dev->caps.steering_mode ==
 	    MLX4_STEERING_MODE_DEVICE_MANAGED &&
 	    mdev->dev->caps.dmfs_high_steer_mode != MLX4_STEERING_DMFS_A0_STATIC)
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 6317844b291f..b9881fc1252f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -151,6 +151,7 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
 		[25] = "Granular QoS per VF support",
 		[26] = "Port ETS Scheduler support",
 		[27] = "Port beacon support",
+		[28] = "RX-ALL support",
 	};
 	int i;
 
@@ -893,6 +894,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_CONFIG_DEV_OFFSET);
 	if (field & 0x20)
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_CONFIG_DEV;
+	if (field & (1 << 2))
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_IGNORE_FCS;
 	MLX4_GET(dev_cap->reserved_lkey, outbox,
 		 QUERY_DEV_CAP_RSVD_LKEY_OFFSET);
 	MLX4_GET(field32, outbox, QUERY_DEV_CAP_ETH_BACKPL_OFFSET);
@@ -1220,6 +1223,11 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
 	field &= 0xef;
 	MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE);
 
+	/* turn off ignore FCS feature for guests */
+	MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_CONFIG_DEV_OFFSET);
+	field &= 0xfb;
+	MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_CONFIG_DEV_OFFSET);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 3becad37aec8..acceb75e8c44 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -297,6 +297,25 @@ static int mlx4_dev_port(struct mlx4_dev *dev, int port,
 	return err;
 }
 
+static inline void mlx4_enable_ignore_fcs(struct mlx4_dev *dev)
+{
+	if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_IGNORE_FCS))
+		return;
+
+	if (mlx4_is_mfunc(dev)) {
+		mlx4_dbg(dev, "SRIOV mode - Disabling Ignore FCS");
+		dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_IGNORE_FCS;
+		return;
+	}
+
+	if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP)) {
+		mlx4_dbg(dev,
+			 "Keep FCS is not supported - Disabling Ignore FCS");
+		dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_IGNORE_FCS;
+		return;
+	}
+}
+
 #define MLX4_A0_STEERING_TABLE_SIZE	256
 static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 {
@@ -540,6 +559,8 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		dev->caps.alloc_res_qp_mask = 0;
 	}
 
+	mlx4_enable_ignore_fcs(dev);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 32469c64d580..f30eeb730a86 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -763,9 +763,11 @@ enum {
 
 
 struct mlx4_set_port_general_context {
-	u8 reserved[3];
+	u16 reserved1;
+	u8 v_ignore_fcs;
 	u8 flags;
-	u16 reserved2;
+	u8 ignore_fcs;
+	u8 reserved2;
 	__be16 mtu;
 	u8 pptx;
 	u8 pfctx;
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index e8d716dfd5cb..c2b21313dba7 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -45,6 +45,14 @@
 #define MLX4_VLAN_VALID		(1u << 31)
 #define MLX4_VLAN_MASK		0xfff
 
+#define MLX4_STATS_TRAFFIC_COUNTERS_MASK	0xfULL
+#define MLX4_STATS_TRAFFIC_DROPS_MASK		0xc0ULL
+#define MLX4_STATS_ERROR_COUNTERS_MASK		0x1ffc30ULL
+#define MLX4_STATS_PORT_COUNTERS_MASK		0x1fe00000ULL
+
+#define MLX4_FLAG_V_IGNORE_FCS_MASK		0x2
+#define MLX4_IGNORE_FCS_MASK			0x1
+
 void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table)
 {
 	int i;
@@ -1027,6 +1035,32 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
 }
 EXPORT_SYMBOL(mlx4_SET_PORT_qpn_calc);
 
+int mlx4_SET_PORT_fcs_check(struct mlx4_dev *dev, u8 port, u8 ignore_fcs_value)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	struct mlx4_set_port_general_context *context;
+	u32 in_mod;
+	int err;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	context = mailbox->buf;
+	context->v_ignore_fcs |= MLX4_FLAG_V_IGNORE_FCS_MASK;
+	if (ignore_fcs_value)
+		context->ignore_fcs |= MLX4_IGNORE_FCS_MASK;
+	else
+		context->ignore_fcs &= ~MLX4_IGNORE_FCS_MASK;
+
+	in_mod = MLX4_SET_PORT_GENERAL << 8 | port;
+	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
+		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
+EXPORT_SYMBOL(mlx4_SET_PORT_fcs_check);
+
 enum {
 	VXLAN_ENABLE_MODIFY	= 1 << 7,
 	VXLAN_STEERING_MODIFY	= 1 << 6,
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index b761be7c88c8..f9ce34bec45b 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -209,6 +209,7 @@ enum {
 	MLX4_DEV_CAP_FLAG2_QOS_VPP		= 1LL <<  25,
 	MLX4_DEV_CAP_FLAG2_ETS_CFG		= 1LL <<  26,
 	MLX4_DEV_CAP_FLAG2_PORT_BEACON		= 1LL <<  27,
+	MLX4_DEV_CAP_FLAG2_IGNORE_FCS		= 1LL <<  28,
 };
 
 enum {
@@ -1313,6 +1314,8 @@ int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
 int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
 			   u8 promisc);
 int mlx4_SET_PORT_BEACON(struct mlx4_dev *dev, u8 port, u16 time);
+int mlx4_SET_PORT_fcs_check(struct mlx4_dev *dev, u8 port,
+			    u8 ignore_fcs_value);
 int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering, int enable);
 int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx);
 int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx);
-- 
cgit v1.2.3


From 64599cca51de08cef94bc13a0f98351e5bb01f41 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@dev.mellanox.co.il>
Date: Thu, 2 Apr 2015 17:07:25 +0300
Subject: net/mlx5_core: Use coherent memory for command interface page

Use coherent memory for the commands descriptor page. Take measures to make
sure the page is aligned to MLX5_ADAPTER_PAGE_SIZE as required by the hardware.

Reported-by: Yevgeny Kliteynik <kliteyn@mellanox.com>
Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 72 ++++++++++++++++++---------
 include/linux/mlx5/driver.h                   |  3 ++
 2 files changed, 52 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index a2853057c779..43a73d36b01d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -1319,6 +1319,45 @@ ex_err:
 	return err;
 }
 
+static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd)
+{
+	struct device *ddev = &dev->pdev->dev;
+
+	cmd->cmd_alloc_buf = dma_zalloc_coherent(ddev, MLX5_ADAPTER_PAGE_SIZE,
+						 &cmd->alloc_dma, GFP_KERNEL);
+	if (!cmd->cmd_alloc_buf)
+		return -ENOMEM;
+
+	/* make sure it is aligned to 4K */
+	if (!((uintptr_t)cmd->cmd_alloc_buf & (MLX5_ADAPTER_PAGE_SIZE - 1))) {
+		cmd->cmd_buf = cmd->cmd_alloc_buf;
+		cmd->dma = cmd->alloc_dma;
+		cmd->alloc_size = MLX5_ADAPTER_PAGE_SIZE;
+		return 0;
+	}
+
+	dma_free_coherent(ddev, MLX5_ADAPTER_PAGE_SIZE, cmd->cmd_alloc_buf,
+			  cmd->alloc_dma);
+	cmd->cmd_alloc_buf = dma_zalloc_coherent(ddev,
+						 2 * MLX5_ADAPTER_PAGE_SIZE - 1,
+						 &cmd->alloc_dma, GFP_KERNEL);
+	if (!cmd->cmd_alloc_buf)
+		return -ENOMEM;
+
+	cmd->cmd_buf = PTR_ALIGN(cmd->cmd_alloc_buf, MLX5_ADAPTER_PAGE_SIZE);
+	cmd->dma = ALIGN(cmd->alloc_dma, MLX5_ADAPTER_PAGE_SIZE);
+	cmd->alloc_size = 2 * MLX5_ADAPTER_PAGE_SIZE - 1;
+	return 0;
+}
+
+static void free_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd)
+{
+	struct device *ddev = &dev->pdev->dev;
+
+	dma_free_coherent(ddev, cmd->alloc_size, cmd->cmd_alloc_buf,
+			  cmd->alloc_dma);
+}
+
 int mlx5_cmd_init(struct mlx5_core_dev *dev)
 {
 	int size = sizeof(struct mlx5_cmd_prot_block);
@@ -1341,17 +1380,9 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
 	if (!cmd->pool)
 		return -ENOMEM;
 
-	cmd->cmd_buf = (void *)__get_free_pages(GFP_ATOMIC, 0);
-	if (!cmd->cmd_buf) {
-		err = -ENOMEM;
+	err = alloc_cmd_page(dev, cmd);
+	if (err)
 		goto err_free_pool;
-	}
-	cmd->dma = dma_map_single(&dev->pdev->dev, cmd->cmd_buf, PAGE_SIZE,
-				  DMA_BIDIRECTIONAL);
-	if (dma_mapping_error(&dev->pdev->dev, cmd->dma)) {
-		err = -ENOMEM;
-		goto err_free;
-	}
 
 	cmd_l = ioread32be(&dev->iseg->cmdq_addr_l_sz) & 0xff;
 	cmd->log_sz = cmd_l >> 4 & 0xf;
@@ -1360,13 +1391,13 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
 		dev_err(&dev->pdev->dev, "firmware reports too many outstanding commands %d\n",
 			1 << cmd->log_sz);
 		err = -EINVAL;
-		goto err_map;
+		goto err_free_page;
 	}
 
 	if (cmd->log_sz + cmd->log_stride > MLX5_ADAPTER_PAGE_SHIFT) {
 		dev_err(&dev->pdev->dev, "command queue size overflow\n");
 		err = -EINVAL;
-		goto err_map;
+		goto err_free_page;
 	}
 
 	cmd->checksum_disabled = 1;
@@ -1378,7 +1409,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
 		dev_err(&dev->pdev->dev, "driver does not support command interface version. driver %d, firmware %d\n",
 			CMD_IF_REV, cmd->cmdif_rev);
 		err = -ENOTSUPP;
-		goto err_map;
+		goto err_free_page;
 	}
 
 	spin_lock_init(&cmd->alloc_lock);
@@ -1394,7 +1425,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
 	if (cmd_l & 0xfff) {
 		dev_err(&dev->pdev->dev, "invalid command queue address\n");
 		err = -ENOMEM;
-		goto err_map;
+		goto err_free_page;
 	}
 
 	iowrite32be(cmd_h, &dev->iseg->cmdq_addr_h);
@@ -1410,7 +1441,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
 	err = create_msg_cache(dev);
 	if (err) {
 		dev_err(&dev->pdev->dev, "failed to create command cache\n");
-		goto err_map;
+		goto err_free_page;
 	}
 
 	set_wqname(dev);
@@ -1435,11 +1466,8 @@ err_wq:
 err_cache:
 	destroy_msg_cache(dev);
 
-err_map:
-	dma_unmap_single(&dev->pdev->dev, cmd->dma, PAGE_SIZE,
-			 DMA_BIDIRECTIONAL);
-err_free:
-	free_pages((unsigned long)cmd->cmd_buf, 0);
+err_free_page:
+	free_cmd_page(dev, cmd);
 
 err_free_pool:
 	pci_pool_destroy(cmd->pool);
@@ -1455,9 +1483,7 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev)
 	clean_debug_files(dev);
 	destroy_workqueue(cmd->wq);
 	destroy_msg_cache(dev);
-	dma_unmap_single(&dev->pdev->dev, cmd->dma, PAGE_SIZE,
-			 DMA_BIDIRECTIONAL);
-	free_pages((unsigned long)cmd->cmd_buf, 0);
+	free_cmd_page(dev, cmd);
 	pci_pool_destroy(cmd->pool);
 }
 EXPORT_SYMBOL(mlx5_cmd_cleanup);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 166d9315fe4b..8d8ca6d9b03b 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -232,6 +232,9 @@ struct mlx5_cmd_stats {
 };
 
 struct mlx5_cmd {
+	void	       *cmd_alloc_buf;
+	dma_addr_t	alloc_dma;
+	int		alloc_size;
 	void	       *cmd_buf;
 	dma_addr_t	dma;
 	u16		cmdif_rev;
-- 
cgit v1.2.3


From 302bdf68fc56a6330bc6b10ce435b4d466417537 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Thu, 2 Apr 2015 17:07:29 +0300
Subject: net/mlx5_core: Fix Mellanox copyright note

Signed-off-by: Achiad Shochat <achiad@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/alloc.c     | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c       | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/cq.c        | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/debugfs.c   | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/eq.c        | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/fw.c        | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/health.c    | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/mad.c       | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/main.c      | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/mcg.c       | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/mr.c        | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/pd.c        | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/port.c      | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/qp.c        | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/srq.c       | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/uar.c       | 2 +-
 include/linux/mlx5/cmd.h                            | 2 +-
 include/linux/mlx5/cq.h                             | 2 +-
 include/linux/mlx5/device.h                         | 2 +-
 include/linux/mlx5/doorbell.h                       | 2 +-
 include/linux/mlx5/driver.h                         | 2 +-
 include/linux/mlx5/mlx5_ifc.h                       | 2 +-
 include/linux/mlx5/qp.h                             | 2 +-
 include/linux/mlx5/srq.h                            | 2 +-
 26 files changed, 26 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
index 63dd75485204..ac0f7bf4be95 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 697d538163f2..e3273faf4568 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index 43c5f4809526..eb0cf81f5f45 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
index 4878025e231c..5210d92e6bc7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index da82991239a8..dbf190d9b9ad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 06f9036acd83..4b4cda3bcc5f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 3e6670c4a7cd..292d76f2a904 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mad.c b/drivers/net/ethernet/mellanox/mlx5/core/mad.c
index fd80ecfa7195..ee1b0b965f34 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mad.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mad.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index f29fd5592305..78dfdab1ead4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
index 44837640bd7c..d79fd85d1dd5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index f0c9f9a7a361..a051b906afdf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index 1a9b75139c2d..1adb300dd850 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index cf8591a0c48d..df2238372ea7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pd.c b/drivers/net/ethernet/mellanox/mlx5/core/pd.c
index 790da5c4ca4f..f2d3aee909e8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pd.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 72c2d002c3b8..49e90f2612d8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index 575d853dbe05..dc7dbf7e9d98 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
index 38bce93f8314..f9d25dcd03c1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
index 06801d6f595e..5a89bb1d678a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/include/linux/mlx5/cmd.h b/include/linux/mlx5/cmd.h
index 2826a4b6071e..68cd08f02c2f 100644
--- a/include/linux/mlx5/cmd.h
+++ b/include/linux/mlx5/cmd.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
index f6b17ac601bd..72ee0d732a26 100644
--- a/include/linux/mlx5/cq.h
+++ b/include/linux/mlx5/cq.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 4e5bd813bb9a..abf65c790421 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/include/linux/mlx5/doorbell.h b/include/linux/mlx5/doorbell.h
index 163a818411e7..afc78a3f4462 100644
--- a/include/linux/mlx5/doorbell.h
+++ b/include/linux/mlx5/doorbell.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 8d8ca6d9b03b..8fedd39a9a60 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 5f48b8f592c5..cb3ad17edd1f 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, Mellanox Technologies inc.  All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 61f7a342d1bf..310b5f7fd6ae 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h
index e1a363a33663..f43ed054a3e0 100644
--- a/include/linux/mlx5/srq.h
+++ b/include/linux/mlx5/srq.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
-- 
cgit v1.2.3


From 233d05d28ad942929b6b4fbc48aa8dd083c16484 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Thu, 2 Apr 2015 17:07:32 +0300
Subject: net/mlx5_core: Move completion eqs from mlx5_ib to mlx5_core

Preparation for ethernet driver.
These functions will be used in drivers other than mlx5_ib.

Signed-off-by: Achiad Shochat <achiad@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx5/cq.c                |   2 +-
 drivers/infiniband/hw/mlx5/main.c              | 104 +------------------------
 drivers/infiniband/hw/mlx5/mlx5_ib.h           |   3 -
 drivers/net/ethernet/mellanox/mlx5/core/main.c |  91 ++++++++++++++++++++++
 include/linux/mlx5/driver.h                    |   3 +-
 5 files changed, 98 insertions(+), 105 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 6efd9f73ff60..c94e2651820d 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -780,7 +780,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
 	cq->cqe_size = cqe_size;
 	cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
 	cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index);
-	err = mlx5_vector2eqn(dev, vector, &eqn, &irqn);
+	err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn);
 	if (err)
 		goto err_cqb;
 
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 45816b10af5e..e59e2a1f8726 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -62,95 +62,6 @@ static char mlx5_version[] =
 	DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
 	DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
 
-int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn)
-{
-	struct mlx5_eq_table *table = &dev->mdev->priv.eq_table;
-	struct mlx5_eq *eq, *n;
-	int err = -ENOENT;
-
-	spin_lock(&table->lock);
-	list_for_each_entry_safe(eq, n, &dev->eqs_list, list) {
-		if (eq->index == vector) {
-			*eqn = eq->eqn;
-			*irqn = eq->irqn;
-			err = 0;
-			break;
-		}
-	}
-	spin_unlock(&table->lock);
-
-	return err;
-}
-
-static int alloc_comp_eqs(struct mlx5_ib_dev *dev)
-{
-	struct mlx5_eq_table *table = &dev->mdev->priv.eq_table;
-	char name[MLX5_MAX_EQ_NAME];
-	struct mlx5_eq *eq, *n;
-	int ncomp_vec;
-	int nent;
-	int err;
-	int i;
-
-	INIT_LIST_HEAD(&dev->eqs_list);
-	ncomp_vec = table->num_comp_vectors;
-	nent = MLX5_COMP_EQ_SIZE;
-	for (i = 0; i < ncomp_vec; i++) {
-		eq = kzalloc(sizeof(*eq), GFP_KERNEL);
-		if (!eq) {
-			err = -ENOMEM;
-			goto clean;
-		}
-
-		snprintf(name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i);
-		err = mlx5_create_map_eq(dev->mdev, eq,
-					 i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
-					 name, &dev->mdev->priv.uuari.uars[0]);
-		if (err) {
-			kfree(eq);
-			goto clean;
-		}
-		mlx5_ib_dbg(dev, "allocated completion EQN %d\n", eq->eqn);
-		eq->index = i;
-		spin_lock(&table->lock);
-		list_add_tail(&eq->list, &dev->eqs_list);
-		spin_unlock(&table->lock);
-	}
-
-	dev->num_comp_vectors = ncomp_vec;
-	return 0;
-
-clean:
-	spin_lock(&table->lock);
-	list_for_each_entry_safe(eq, n, &dev->eqs_list, list) {
-		list_del(&eq->list);
-		spin_unlock(&table->lock);
-		if (mlx5_destroy_unmap_eq(dev->mdev, eq))
-			mlx5_ib_warn(dev, "failed to destroy EQ 0x%x\n", eq->eqn);
-		kfree(eq);
-		spin_lock(&table->lock);
-	}
-	spin_unlock(&table->lock);
-	return err;
-}
-
-static void free_comp_eqs(struct mlx5_ib_dev *dev)
-{
-	struct mlx5_eq_table *table = &dev->mdev->priv.eq_table;
-	struct mlx5_eq *eq, *n;
-
-	spin_lock(&table->lock);
-	list_for_each_entry_safe(eq, n, &dev->eqs_list, list) {
-		list_del(&eq->list);
-		spin_unlock(&table->lock);
-		if (mlx5_destroy_unmap_eq(dev->mdev, eq))
-			mlx5_ib_warn(dev, "failed to destroy EQ 0x%x\n", eq->eqn);
-		kfree(eq);
-		spin_lock(&table->lock);
-	}
-	spin_unlock(&table->lock);
-}
-
 static int mlx5_ib_query_device(struct ib_device *ibdev,
 				struct ib_device_attr *props)
 {
@@ -1291,10 +1202,6 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 
 	get_ext_port_caps(dev);
 
-	err = alloc_comp_eqs(dev);
-	if (err)
-		goto err_dealloc;
-
 	MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
 
 	strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
@@ -1303,7 +1210,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 	dev->ib_dev.local_dma_lkey	= mdev->caps.gen.reserved_lkey;
 	dev->num_ports		= mdev->caps.gen.num_ports;
 	dev->ib_dev.phys_port_cnt     = dev->num_ports;
-	dev->ib_dev.num_comp_vectors	= dev->num_comp_vectors;
+	dev->ib_dev.num_comp_vectors    =
+		dev->mdev->priv.eq_table.num_comp_vectors;
 	dev->ib_dev.dma_device	= &mdev->pdev->dev;
 
 	dev->ib_dev.uverbs_abi_ver	= MLX5_IB_UVERBS_ABI_VERSION;
@@ -1390,13 +1298,13 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 
 	err = init_node_data(dev);
 	if (err)
-		goto err_eqs;
+		goto err_dealloc;
 
 	mutex_init(&dev->cap_mask_mutex);
 
 	err = create_dev_resources(&dev->devr);
 	if (err)
-		goto err_eqs;
+		goto err_dealloc;
 
 	err = mlx5_ib_odp_init_one(dev);
 	if (err)
@@ -1433,9 +1341,6 @@ err_odp:
 err_rsrc:
 	destroy_dev_resources(&dev->devr);
 
-err_eqs:
-	free_comp_eqs(dev);
-
 err_dealloc:
 	ib_dealloc_device((struct ib_device *)dev);
 
@@ -1450,7 +1355,6 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
 	destroy_umrc_res(dev);
 	mlx5_ib_odp_remove_one(dev);
 	destroy_dev_resources(&dev->devr);
-	free_comp_eqs(dev);
 	ib_dealloc_device(&dev->ib_dev);
 }
 
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 8f94f69fbea5..dff1cfcdf476 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -421,9 +421,7 @@ struct mlx5_ib_dev {
 	struct ib_device		ib_dev;
 	struct mlx5_core_dev		*mdev;
 	MLX5_DECLARE_DOORBELL_LOCK(uar_lock);
-	struct list_head		eqs_list;
 	int				num_ports;
-	int				num_comp_vectors;
 	/* serialize update of capability mask
 	 */
 	struct mutex			cap_mask_mutex;
@@ -594,7 +592,6 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
 					  struct ib_ucontext *context,
 					  struct ib_udata *udata);
 int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd);
-int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn);
 int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset);
 int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port);
 int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index fe41a7c2c629..cfc29027f07a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -507,6 +507,87 @@ static int mlx5_core_disable_hca(struct mlx5_core_dev *dev)
 	return 0;
 }
 
+int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, int *irqn)
+{
+	struct mlx5_eq_table *table = &dev->priv.eq_table;
+	struct mlx5_eq *eq, *n;
+	int err = -ENOENT;
+
+	spin_lock(&table->lock);
+	list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
+		if (eq->index == vector) {
+			*eqn = eq->eqn;
+			*irqn = eq->irqn;
+			err = 0;
+			break;
+		}
+	}
+	spin_unlock(&table->lock);
+
+	return err;
+}
+EXPORT_SYMBOL(mlx5_vector2eqn);
+
+static void free_comp_eqs(struct mlx5_core_dev *dev)
+{
+	struct mlx5_eq_table *table = &dev->priv.eq_table;
+	struct mlx5_eq *eq, *n;
+
+	spin_lock(&table->lock);
+	list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
+		list_del(&eq->list);
+		spin_unlock(&table->lock);
+		if (mlx5_destroy_unmap_eq(dev, eq))
+			mlx5_core_warn(dev, "failed to destroy EQ 0x%x\n",
+				       eq->eqn);
+		kfree(eq);
+		spin_lock(&table->lock);
+	}
+	spin_unlock(&table->lock);
+}
+
+static int alloc_comp_eqs(struct mlx5_core_dev *dev)
+{
+	struct mlx5_eq_table *table = &dev->priv.eq_table;
+	char name[MLX5_MAX_EQ_NAME];
+	struct mlx5_eq *eq;
+	int ncomp_vec;
+	int nent;
+	int err;
+	int i;
+
+	INIT_LIST_HEAD(&table->comp_eqs_list);
+	ncomp_vec = table->num_comp_vectors;
+	nent = MLX5_COMP_EQ_SIZE;
+	for (i = 0; i < ncomp_vec; i++) {
+		eq = kzalloc(sizeof(*eq), GFP_KERNEL);
+		if (!eq) {
+			err = -ENOMEM;
+			goto clean;
+		}
+
+		snprintf(name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i);
+		err = mlx5_create_map_eq(dev, eq,
+					 i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
+					 name, &dev->priv.uuari.uars[0]);
+		if (err) {
+			kfree(eq);
+			goto clean;
+		}
+		mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->eqn);
+		eq->index = i;
+		spin_lock(&table->lock);
+		list_add_tail(&eq->list, &table->comp_eqs_list);
+		spin_unlock(&table->lock);
+	}
+
+	return 0;
+
+clean:
+	free_comp_eqs(dev);
+	return err;
+}
+
 static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev)
 {
 	struct mlx5_priv *priv = &dev->priv;
@@ -643,6 +724,12 @@ static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev)
 		goto err_free_uar;
 	}
 
+	err = alloc_comp_eqs(dev);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to alloc completion EQs\n");
+		goto err_stop_eqs;
+	}
+
 	MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock);
 
 	mlx5_init_cq_table(dev);
@@ -652,6 +739,9 @@ static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev)
 
 	return 0;
 
+err_stop_eqs:
+	mlx5_stop_eqs(dev);
+
 err_free_uar:
 	mlx5_free_uuars(dev, &priv->uuari);
 
@@ -703,6 +793,7 @@ static void mlx5_dev_cleanup(struct mlx5_core_dev *dev)
 	mlx5_cleanup_srq_table(dev);
 	mlx5_cleanup_qp_table(dev);
 	mlx5_cleanup_cq_table(dev);
+	free_comp_eqs(dev);
 	mlx5_stop_eqs(dev);
 	mlx5_free_uuars(dev, &priv->uuari);
 	mlx5_eq_cleanup(dev);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 8fedd39a9a60..f250f6580dad 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -410,7 +410,7 @@ struct mlx5_core_srq {
 struct mlx5_eq_table {
 	void __iomem	       *update_ci;
 	void __iomem	       *update_arm_ci;
-	struct list_head       *comp_eq_head;
+	struct list_head	comp_eqs_list;
 	struct mlx5_eq		pages_eq;
 	struct mlx5_eq		async_eq;
 	struct mlx5_eq		cmd_eq;
@@ -725,6 +725,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
 int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
 int mlx5_start_eqs(struct mlx5_core_dev *dev);
 int mlx5_stop_eqs(struct mlx5_core_dev *dev);
+int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, int *irqn);
 int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn);
 int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn);
 
-- 
cgit v1.2.3


From ce0f75093282c5dca1e79ae3e3e893deaea86166 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Thu, 2 Apr 2015 17:07:33 +0300
Subject: net/mlx5_core: Modify arm CQ in preparation for upcoming Ethernet
 driver

Pass consumer index as a parameter to arm CQ

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx5/cq.c | 8 ++++++--
 include/linux/mlx5/cq.h         | 5 +++--
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index c94e2651820d..2ee6b1051975 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -572,11 +572,15 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 
 int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
 {
+	struct mlx5_core_dev *mdev = to_mdev(ibcq->device)->mdev;
+	void __iomem *uar_page = mdev->priv.uuari.uars[0].map;
+
 	mlx5_cq_arm(&to_mcq(ibcq)->mcq,
 		    (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
 		    MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT,
-		    to_mdev(ibcq->device)->mdev->priv.uuari.uars[0].map,
-		    MLX5_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->mdev->priv.cq_uar_lock));
+		    uar_page,
+		    MLX5_GET_DOORBELL_LOCK(&mdev->priv.cq_uar_lock),
+		    to_mcq(ibcq)->mcq.cons_index);
 
 	return 0;
 }
diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
index 72ee0d732a26..2695ced222df 100644
--- a/include/linux/mlx5/cq.h
+++ b/include/linux/mlx5/cq.h
@@ -137,14 +137,15 @@ enum {
 
 static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd,
 			       void __iomem *uar_page,
-			       spinlock_t *doorbell_lock)
+			       spinlock_t *doorbell_lock,
+			       u32 cons_index)
 {
 	__be32 doorbell[2];
 	u32 sn;
 	u32 ci;
 
 	sn = cq->arm_sn & 3;
-	ci = cq->cons_index & 0xffffff;
+	ci = cons_index & 0xffffff;
 
 	*cq->arm_db = cpu_to_be32(sn << 28 | cmd | ci);
 
-- 
cgit v1.2.3


From 64613d9499c4887485d4350387919ea507330d90 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Thu, 2 Apr 2015 17:07:34 +0300
Subject: net/mlx5_core: Extend struct mlx5_interface to support multiple
 protocols

Preparation for ethernet driver.

Signed-off-by: Achiad Shochat <achiad@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx5/main.c              |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 22 ++++++++++++++++++++++
 include/linux/mlx5/driver.h                    |  8 ++++++++
 3 files changed, 31 insertions(+)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index e59e2a1f8726..57c9809e8b87 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1362,6 +1362,7 @@ static struct mlx5_interface mlx5_ib_interface = {
 	.add            = mlx5_ib_add,
 	.remove         = mlx5_ib_remove,
 	.event          = mlx5_ib_event,
+	.protocol	= MLX5_INTERFACE_PROTOCOL_IB,
 };
 
 static int __init mlx5_ib_init(void)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index cfc29027f07a..28425e5ea91f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -908,6 +908,28 @@ void mlx5_unregister_interface(struct mlx5_interface *intf)
 }
 EXPORT_SYMBOL(mlx5_unregister_interface);
 
+void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
+{
+	struct mlx5_priv *priv = &mdev->priv;
+	struct mlx5_device_context *dev_ctx;
+	unsigned long flags;
+	void *result = NULL;
+
+	spin_lock_irqsave(&priv->ctx_lock, flags);
+
+	list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list)
+		if ((dev_ctx->intf->protocol == protocol) &&
+		    dev_ctx->intf->get_dev) {
+			result = dev_ctx->intf->get_dev(dev_ctx->context);
+			break;
+		}
+
+	spin_unlock_irqrestore(&priv->ctx_lock, flags);
+
+	return result;
+}
+EXPORT_SYMBOL(mlx5_get_protocol_dev);
+
 static void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
 			    unsigned long param)
 {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index f250f6580dad..9a90e7523dc2 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -781,14 +781,22 @@ enum {
 	MAX_MR_CACHE_ENTRIES    = 16,
 };
 
+enum {
+	MLX5_INTERFACE_PROTOCOL_IB  = 0,
+	MLX5_INTERFACE_PROTOCOL_ETH = 1,
+};
+
 struct mlx5_interface {
 	void *			(*add)(struct mlx5_core_dev *dev);
 	void			(*remove)(struct mlx5_core_dev *dev, void *context);
 	void			(*event)(struct mlx5_core_dev *dev, void *context,
 					 enum mlx5_dev_event event, unsigned long param);
+	void *                  (*get_dev)(void *context);
+	int			protocol;
 	struct list_head	list;
 };
 
+void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol);
 int mlx5_register_interface(struct mlx5_interface *intf);
 void mlx5_unregister_interface(struct mlx5_interface *intf);
 
-- 
cgit v1.2.3


From e79d8429aac95a5cbe4c235795c7cd554c91f924 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 3 Apr 2015 22:17:17 +1030
Subject: netdevice: document NETDEV_TX_BUSY deprecation.

This paraphrases DaveM (and steals some of his words) explaining why
a device shouldn't return NETDEV_TX_BUSY, even though it looks so inviting
to driver authors.

See http://www.spinics.net/lists/netdev/msg322350.html

Inspired-by: David Miller <davem@davemloft.net>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 846a1f5bc9db..a710d22b174f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -795,7 +795,10 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  * netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb,
  *                               struct net_device *dev);
  *	Called when a packet needs to be transmitted.
- *	Must return NETDEV_TX_OK , NETDEV_TX_BUSY.
+ *	Returns NETDEV_TX_OK.  Can return NETDEV_TX_BUSY, but you should stop
+ *	the queue before that can happen; it's for obsolete devices and weird
+ *	corner cases, but the stack really does a non-trivial amount
+ *	of useless work if you return NETDEV_TX_BUSY.
  *        (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX)
  *	Required can not be NULL.
  *
-- 
cgit v1.2.3


From 2e7056c433216f406b90a003aa0ba42e19d3bdcf Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Tue, 31 Mar 2015 14:19:10 -0700
Subject: jhash: Update jhash_[321]words functions to use correct initval

Looking over the implementation for jhash2 and comparing it to jhash_3words
I realized that the two hashes were in fact very different.  Doing a bit of
digging led me to "The new jhash implementation" in which lookup2 was
supposed to have been replaced with lookup3.

In reviewing the patch I noticed that jhash2 had originally initialized a
and b to JHASH_GOLDENRATIO and c to initval, but after the patch a, b, and
c were initialized to initval + (length << 2) + JHASH_INITVAL.  However the
changes in jhash_3words simply replaced the initialization of a and b with
JHASH_INITVAL.

This change corrects what I believe was an oversight so that a, b, and c in
jhash_3words all have the same value added consisting of initval + (length
<< 2) + JHASH_INITVAL so that jhash2 and jhash_3words will now produce the
same hash result given the same inputs.

Fixes: 60d509c823cca ("The new jhash implementation")
Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/jhash.h | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/jhash.h b/include/linux/jhash.h
index 47cb09edec1a..348c6f47e4cc 100644
--- a/include/linux/jhash.h
+++ b/include/linux/jhash.h
@@ -145,11 +145,11 @@ static inline u32 jhash2(const u32 *k, u32 length, u32 initval)
 }
 
 
-/* jhash_3words - hash exactly 3, 2 or 1 word(s) */
-static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval)
+/* __jhash_nwords - hash exactly 3, 2 or 1 word(s) */
+static inline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
 {
-	a += JHASH_INITVAL;
-	b += JHASH_INITVAL;
+	a += initval;
+	b += initval;
 	c += initval;
 
 	__jhash_final(a, b, c);
@@ -157,14 +157,19 @@ static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval)
 	return c;
 }
 
+static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval)
+{
+	return __jhash_nwords(a, b, c, initval + JHASH_INITVAL + (3 << 2));
+}
+
 static inline u32 jhash_2words(u32 a, u32 b, u32 initval)
 {
-	return jhash_3words(a, b, 0, initval);
+	return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
 }
 
 static inline u32 jhash_1word(u32 a, u32 initval)
 {
-	return jhash_3words(a, 0, 0, initval);
+	return __jhash_nwords(a, 0, 0, initval + JHASH_INITVAL + (1 << 2));
 }
 
 #endif /* _LINUX_JHASH_H */
-- 
cgit v1.2.3


From bcad57182425426dd4aa14deb27f97acb329f3cd Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 3 Apr 2015 20:52:24 +0200
Subject: ebpf: add skb->priority to offset map for usage in {cls, act}_bpf

This adds the ability to read out the skb->priority from an eBPF
program, so that it can be taken into account from a tc filter
or action for the use-case where the priority is not being used
to directly override the filter classification in a qdisc, but
to tag traffic otherwise for the classifier; the priority can be
assigned from various places incl. user space, in future we may
also mangle it from an eBPF program.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 1 +
 net/core/filter.c        | 7 +++++++
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 74aab6e0d964..0db8580f3cca 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -184,6 +184,7 @@ struct __sk_buff {
 	__u32 vlan_present;
 	__u32 vlan_tci;
 	__u32 vlan_proto;
+	__u32 priority;
 };
 
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/net/core/filter.c b/net/core/filter.c
index 444a07e4f68d..955a7d77decd 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1304,6 +1304,13 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
 				      offsetof(struct sk_buff, vlan_proto));
 		break;
 
+	case offsetof(struct __sk_buff, priority):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, priority) != 4);
+
+		*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+				      offsetof(struct sk_buff, priority));
+		break;
+
 	case offsetof(struct __sk_buff, mark):
 		return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn);
 
-- 
cgit v1.2.3


From a3bebdce4135a44d09e96ba66c40797c8f9fa902 Mon Sep 17 00:00:00 2001
From: Stas Sergeev <stsp@list.ru>
Date: Wed, 1 Apr 2015 20:30:31 +0300
Subject: add fixed_phy_update_state() - update state of fixed_phy

Currently fixed_phy uses a callback to periodically poll the link state.
This patch adds the fixed_phy_update_state() API.
It solves the following problems:
- On link state interrupt, MAC driver can't update status.
Instead it needs to provide the callback to periodically query
the HW about the link state. It is more efficient to update status
after interrupt.
- The callback needs to be unregistered before phy_disconnect(),
or otherwise it will be called with net_dev==NULL. phy_disconnect()
does not have enough info to unregister the callback automatically.
- The callback needs to be registered before of_phy_connect() to
avoid running with outdated state, but of_phy_connect() returns the
phy_device pointer, which is needed to register the callback. Registering
it before of_phy_connect() will therefore require a hack to get the
pointer earlier.

Overall, this addition makes the subsequent patch that implements
SGMII link status for mvneta, much cleaner.

CC: Florian Fainelli <f.fainelli@gmail.com>
CC: netdev@vger.kernel.org
CC: linux-kernel@vger.kernel.org

Signed-off-by: Stas Sergeev <stsp@users.sourceforge.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/fixed_phy.c | 29 +++++++++++++++++++++++++++++
 include/linux/phy_fixed.h   |  9 +++++++++
 2 files changed, 38 insertions(+)

(limited to 'include')

diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c
index a08a3c78ba97..1960b46add65 100644
--- a/drivers/net/phy/fixed_phy.c
+++ b/drivers/net/phy/fixed_phy.c
@@ -183,6 +183,35 @@ int fixed_phy_set_link_update(struct phy_device *phydev,
 }
 EXPORT_SYMBOL_GPL(fixed_phy_set_link_update);
 
+int fixed_phy_update_state(struct phy_device *phydev,
+			   const struct fixed_phy_status *status,
+			   const struct fixed_phy_status *changed)
+{
+	struct fixed_mdio_bus *fmb = &platform_fmb;
+	struct fixed_phy *fp;
+
+	if (!phydev || !phydev->bus)
+		return -EINVAL;
+
+	list_for_each_entry(fp, &fmb->phys, node) {
+		if (fp->addr == phydev->addr) {
+#define _UPD(x) if (changed->x) \
+	fp->status.x = status->x
+			_UPD(link);
+			_UPD(speed);
+			_UPD(duplex);
+			_UPD(pause);
+			_UPD(asym_pause);
+#undef _UPD
+			fixed_phy_update_regs(fp);
+			return 0;
+		}
+	}
+
+	return -ENOENT;
+}
+EXPORT_SYMBOL(fixed_phy_update_state);
+
 int fixed_phy_add(unsigned int irq, int phy_addr,
 		  struct fixed_phy_status *status)
 {
diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h
index 7e75bfe37cc7..fe5732d53eda 100644
--- a/include/linux/phy_fixed.h
+++ b/include/linux/phy_fixed.h
@@ -21,6 +21,9 @@ extern void fixed_phy_del(int phy_addr);
 extern int fixed_phy_set_link_update(struct phy_device *phydev,
 			int (*link_update)(struct net_device *,
 					   struct fixed_phy_status *));
+extern int fixed_phy_update_state(struct phy_device *phydev,
+			   const struct fixed_phy_status *status,
+			   const struct fixed_phy_status *changed);
 #else
 static inline int fixed_phy_add(unsigned int irq, int phy_id,
 				struct fixed_phy_status *status)
@@ -43,6 +46,12 @@ static inline int fixed_phy_set_link_update(struct phy_device *phydev,
 {
 	return -ENODEV;
 }
+static inline int fixed_phy_update_state(struct phy_device *phydev,
+			   const struct fixed_phy_status *status,
+			   const struct fixed_phy_status *changed)
+{
+	return -ENODEV;
+}
 #endif /* CONFIG_FIXED_PHY */
 
 #endif /* __PHY_FIXED_H */
-- 
cgit v1.2.3


From cfdfab314647b1755afedc33ab66f3f247e161ae Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 3 Apr 2015 16:23:58 -0400
Subject: netfilter: Create and use nf_hook_state.

Instead of passing a large number of arguments down into the nf_hook()
entry points, create a structure which carries this state down through
the hook processing layers.

This makes is so that if we want to change the types or signatures of
any of these pieces of state, there are less places that need to be
changed.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter.h    | 28 +++++++++++++++++++++++-----
 net/netfilter/core.c         | 32 +++++++++++++-------------------
 net/netfilter/nf_internals.h | 11 +++--------
 net/netfilter/nf_queue.c     | 38 ++++++++++++++++++++------------------
 4 files changed, 59 insertions(+), 50 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 2517ece98820..aee7ef1e23ed 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -44,6 +44,16 @@ int netfilter_init(void);
 struct sk_buff;
 
 struct nf_hook_ops;
+
+struct nf_hook_state {
+	unsigned int hook;
+	int thresh;
+	u_int8_t pf;
+	struct net_device *in;
+	struct net_device *out;
+	int (*okfn)(struct sk_buff *);
+};
+
 typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops,
 			       struct sk_buff *skb,
 			       const struct net_device *in,
@@ -118,9 +128,7 @@ static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook)
 }
 #endif
 
-int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
-		 struct net_device *indev, struct net_device *outdev,
-		 int (*okfn)(struct sk_buff *), int thresh);
+int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state);
 
 /**
  *	nf_hook_thresh - call a netfilter hook
@@ -135,8 +143,18 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
 				 struct net_device *outdev,
 				 int (*okfn)(struct sk_buff *), int thresh)
 {
-	if (nf_hooks_active(pf, hook))
-		return nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh);
+	if (nf_hooks_active(pf, hook)) {
+		struct nf_hook_state state = {
+			.hook = hook,
+			.thresh = thresh,
+			.pf = pf,
+			.in = indev,
+			.out = outdev,
+			.okfn = okfn
+		};
+
+		return nf_hook_slow(skb, &state);
+	}
 	return 1;
 }
 
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index fea9ef566427..11d04ebfc5e3 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -120,12 +120,8 @@ EXPORT_SYMBOL(nf_unregister_hooks);
 
 unsigned int nf_iterate(struct list_head *head,
 			struct sk_buff *skb,
-			unsigned int hook,
-			const struct net_device *indev,
-			const struct net_device *outdev,
-			struct nf_hook_ops **elemp,
-			int (*okfn)(struct sk_buff *),
-			int hook_thresh)
+			struct nf_hook_state *state,
+			struct nf_hook_ops **elemp)
 {
 	unsigned int verdict;
 
@@ -134,19 +130,20 @@ unsigned int nf_iterate(struct list_head *head,
 	 * function because of risk of continuing from deleted element.
 	 */
 	list_for_each_entry_continue_rcu((*elemp), head, list) {
-		if (hook_thresh > (*elemp)->priority)
+		if (state->thresh > (*elemp)->priority)
 			continue;
 
 		/* Optimization: we don't need to hold module
 		   reference here, since function can't sleep. --RR */
 repeat:
-		verdict = (*elemp)->hook(*elemp, skb, indev, outdev, okfn);
+		verdict = (*elemp)->hook(*elemp, skb, state->in, state->out,
+					 state->okfn);
 		if (verdict != NF_ACCEPT) {
 #ifdef CONFIG_NETFILTER_DEBUG
 			if (unlikely((verdict & NF_VERDICT_MASK)
 							> NF_MAX_VERDICT)) {
 				NFDEBUG("Evil return from %p(%u).\n",
-					(*elemp)->hook, hook);
+					(*elemp)->hook, state->hook);
 				continue;
 			}
 #endif
@@ -161,11 +158,7 @@ repeat:
 
 /* Returns 1 if okfn() needs to be executed by the caller,
  * -EPERM for NF_DROP, 0 otherwise. */
-int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
-		 struct net_device *indev,
-		 struct net_device *outdev,
-		 int (*okfn)(struct sk_buff *),
-		 int hook_thresh)
+int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state)
 {
 	struct nf_hook_ops *elem;
 	unsigned int verdict;
@@ -174,10 +167,11 @@ int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
 	/* We may already have this, but read-locks nest anyway */
 	rcu_read_lock();
 
-	elem = list_entry_rcu(&nf_hooks[pf][hook], struct nf_hook_ops, list);
+	elem = list_entry_rcu(&nf_hooks[state->pf][state->hook],
+			      struct nf_hook_ops, list);
 next_hook:
-	verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev,
-			     outdev, &elem, okfn, hook_thresh);
+	verdict = nf_iterate(&nf_hooks[state->pf][state->hook], skb, state,
+			     &elem);
 	if (verdict == NF_ACCEPT || verdict == NF_STOP) {
 		ret = 1;
 	} else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
@@ -186,8 +180,8 @@ next_hook:
 		if (ret == 0)
 			ret = -EPERM;
 	} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
-		int err = nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
-						verdict >> NF_VERDICT_QBITS);
+		int err = nf_queue(skb, elem, state,
+				   verdict >> NF_VERDICT_QBITS);
 		if (err < 0) {
 			if (err == -ECANCELED)
 				goto next_hook;
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 61a3c927e63c..ea7f36784b3d 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -14,16 +14,11 @@
 
 /* core.c */
 unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb,
-			unsigned int hook, const struct net_device *indev,
-			const struct net_device *outdev,
-			struct nf_hook_ops **elemp,
-			int (*okfn)(struct sk_buff *), int hook_thresh);
+			struct nf_hook_state *state, struct nf_hook_ops **elemp);
 
 /* nf_queue.c */
-int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem, u_int8_t pf,
-	     unsigned int hook, struct net_device *indev,
-	     struct net_device *outdev, int (*okfn)(struct sk_buff *),
-	     unsigned int queuenum);
+int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem,
+	     struct nf_hook_state *state, unsigned int queuenum);
 int __init netfilter_queue_init(void);
 
 /* nf_log.c */
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 4c8b68e5fa16..6f8e9485cc83 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -100,12 +100,9 @@ EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
  * through nf_reinject().
  */
 int nf_queue(struct sk_buff *skb,
-		      struct nf_hook_ops *elem,
-		      u_int8_t pf, unsigned int hook,
-		      struct net_device *indev,
-		      struct net_device *outdev,
-		      int (*okfn)(struct sk_buff *),
-		      unsigned int queuenum)
+	     struct nf_hook_ops *elem,
+	     struct nf_hook_state *state,
+	     unsigned int queuenum)
 {
 	int status = -ENOENT;
 	struct nf_queue_entry *entry = NULL;
@@ -121,7 +118,7 @@ int nf_queue(struct sk_buff *skb,
 		goto err_unlock;
 	}
 
-	afinfo = nf_get_afinfo(pf);
+	afinfo = nf_get_afinfo(state->pf);
 	if (!afinfo)
 		goto err_unlock;
 
@@ -134,11 +131,11 @@ int nf_queue(struct sk_buff *skb,
 	*entry = (struct nf_queue_entry) {
 		.skb	= skb,
 		.elem	= elem,
-		.pf	= pf,
-		.hook	= hook,
-		.indev	= indev,
-		.outdev	= outdev,
-		.okfn	= okfn,
+		.pf	= state->pf,
+		.hook	= state->hook,
+		.indev	= state->in,
+		.outdev	= state->out,
+		.okfn	= state->okfn,
 		.size	= sizeof(*entry) + afinfo->route_key_size,
 	};
 
@@ -171,6 +168,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 	struct sk_buff *skb = entry->skb;
 	struct nf_hook_ops *elem = entry->elem;
 	const struct nf_afinfo *afinfo;
+	struct nf_hook_state state;
 	int err;
 
 	rcu_read_lock();
@@ -189,12 +187,17 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 			verdict = NF_DROP;
 	}
 
+	state.hook = entry->hook;
+	state.thresh = INT_MIN;
+	state.pf = entry->pf;
+	state.in = entry->indev;
+	state.out = entry->outdev;
+	state.okfn = entry->okfn;
+
 	if (verdict == NF_ACCEPT) {
 	next_hook:
 		verdict = nf_iterate(&nf_hooks[entry->pf][entry->hook],
-				     skb, entry->hook,
-				     entry->indev, entry->outdev, &elem,
-				     entry->okfn, INT_MIN);
+				     skb, &state, &elem);
 	}
 
 	switch (verdict & NF_VERDICT_MASK) {
@@ -205,9 +208,8 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 		local_bh_enable();
 		break;
 	case NF_QUEUE:
-		err = nf_queue(skb, elem, entry->pf, entry->hook,
-				entry->indev, entry->outdev, entry->okfn,
-				verdict >> NF_VERDICT_QBITS);
+		err = nf_queue(skb, elem, &state,
+			       verdict >> NF_VERDICT_QBITS);
 		if (err < 0) {
 			if (err == -ECANCELED)
 				goto next_hook;
-- 
cgit v1.2.3


From 1d1de89b9a4746f1dd055a3b8d073dd2f962a3b6 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 3 Apr 2015 16:31:01 -0400
Subject: netfilter: Use nf_hook_state in nf_queue_entry.

That way we don't have to reinstantiate another nf_hook_state
on the stack of the nf_reinject() path.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netfilter/nf_queue.h     |  6 +----
 net/ipv4/netfilter.c                 |  4 ++--
 net/ipv6/netfilter.c                 |  4 ++--
 net/netfilter/nf_queue.c             | 44 ++++++++++++++++--------------------
 net/netfilter/nfnetlink_queue_core.c | 30 ++++++++++++------------
 5 files changed, 39 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index 84a53d780306..d81d584157e1 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -12,12 +12,8 @@ struct nf_queue_entry {
 	unsigned int		id;
 
 	struct nf_hook_ops	*elem;
-	u_int8_t		pf;
+	struct nf_hook_state	state;
 	u16			size; /* sizeof(entry) + saved route keys */
-	unsigned int		hook;
-	struct net_device	*indev;
-	struct net_device	*outdev;
-	int			(*okfn)(struct sk_buff *);
 
 	/* extra space to store route keys */
 };
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 7ebd6e37875c..65de0684e22a 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -94,7 +94,7 @@ static void nf_ip_saveroute(const struct sk_buff *skb,
 {
 	struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
 
-	if (entry->hook == NF_INET_LOCAL_OUT) {
+	if (entry->state.hook == NF_INET_LOCAL_OUT) {
 		const struct iphdr *iph = ip_hdr(skb);
 
 		rt_info->tos = iph->tos;
@@ -109,7 +109,7 @@ static int nf_ip_reroute(struct sk_buff *skb,
 {
 	const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
 
-	if (entry->hook == NF_INET_LOCAL_OUT) {
+	if (entry->state.hook == NF_INET_LOCAL_OUT) {
 		const struct iphdr *iph = ip_hdr(skb);
 
 		if (!(iph->tos == rt_info->tos &&
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 398377a9d018..d958718b5031 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -84,7 +84,7 @@ static void nf_ip6_saveroute(const struct sk_buff *skb,
 {
 	struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
 
-	if (entry->hook == NF_INET_LOCAL_OUT) {
+	if (entry->state.hook == NF_INET_LOCAL_OUT) {
 		const struct ipv6hdr *iph = ipv6_hdr(skb);
 
 		rt_info->daddr = iph->daddr;
@@ -98,7 +98,7 @@ static int nf_ip6_reroute(struct sk_buff *skb,
 {
 	struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
 
-	if (entry->hook == NF_INET_LOCAL_OUT) {
+	if (entry->state.hook == NF_INET_LOCAL_OUT) {
 		const struct ipv6hdr *iph = ipv6_hdr(skb);
 		if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
 		    !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) ||
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 6f8e9485cc83..d3cd37edca18 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -47,11 +47,13 @@ EXPORT_SYMBOL(nf_unregister_queue_handler);
 
 void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
 {
+	struct nf_hook_state *state = &entry->state;
+
 	/* Release those devices we held, or Alexey will kill me. */
-	if (entry->indev)
-		dev_put(entry->indev);
-	if (entry->outdev)
-		dev_put(entry->outdev);
+	if (state->in)
+		dev_put(state->in);
+	if (state->out)
+		dev_put(state->out);
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	if (entry->skb->nf_bridge) {
 		struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge;
@@ -70,13 +72,15 @@ EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);
 /* Bump dev refs so they don't vanish while packet is out */
 bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
 {
+	struct nf_hook_state *state = &entry->state;
+
 	if (!try_module_get(entry->elem->owner))
 		return false;
 
-	if (entry->indev)
-		dev_hold(entry->indev);
-	if (entry->outdev)
-		dev_hold(entry->outdev);
+	if (state->in)
+		dev_hold(state->in);
+	if (state->out)
+		dev_hold(state->out);
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	if (entry->skb->nf_bridge) {
 		struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge;
@@ -131,11 +135,7 @@ int nf_queue(struct sk_buff *skb,
 	*entry = (struct nf_queue_entry) {
 		.skb	= skb,
 		.elem	= elem,
-		.pf	= state->pf,
-		.hook	= state->hook,
-		.indev	= state->in,
-		.outdev	= state->out,
-		.okfn	= state->okfn,
+		.state	= *state,
 		.size	= sizeof(*entry) + afinfo->route_key_size,
 	};
 
@@ -168,7 +168,6 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 	struct sk_buff *skb = entry->skb;
 	struct nf_hook_ops *elem = entry->elem;
 	const struct nf_afinfo *afinfo;
-	struct nf_hook_state state;
 	int err;
 
 	rcu_read_lock();
@@ -182,33 +181,28 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 	}
 
 	if (verdict == NF_ACCEPT) {
-		afinfo = nf_get_afinfo(entry->pf);
+		afinfo = nf_get_afinfo(entry->state.pf);
 		if (!afinfo || afinfo->reroute(skb, entry) < 0)
 			verdict = NF_DROP;
 	}
 
-	state.hook = entry->hook;
-	state.thresh = INT_MIN;
-	state.pf = entry->pf;
-	state.in = entry->indev;
-	state.out = entry->outdev;
-	state.okfn = entry->okfn;
+	entry->state.thresh = INT_MIN;
 
 	if (verdict == NF_ACCEPT) {
 	next_hook:
-		verdict = nf_iterate(&nf_hooks[entry->pf][entry->hook],
-				     skb, &state, &elem);
+		verdict = nf_iterate(&nf_hooks[entry->state.pf][entry->state.hook],
+				     skb, &entry->state, &elem);
 	}
 
 	switch (verdict & NF_VERDICT_MASK) {
 	case NF_ACCEPT:
 	case NF_STOP:
 		local_bh_disable();
-		entry->okfn(skb);
+		entry->state.okfn(skb);
 		local_bh_enable();
 		break;
 	case NF_QUEUE:
-		err = nf_queue(skb, elem, &state,
+		err = nf_queue(skb, elem, &entry->state,
 			       verdict >> NF_VERDICT_QBITS);
 		if (err < 0) {
 			if (err == -ECANCELED)
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 86ee8b05adae..6e74655a8d4f 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -314,13 +314,13 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 	if (entskb->tstamp.tv64)
 		size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
 
-	if (entry->hook <= NF_INET_FORWARD ||
-	   (entry->hook == NF_INET_POST_ROUTING && entskb->sk == NULL))
+	if (entry->state.hook <= NF_INET_FORWARD ||
+	   (entry->state.hook == NF_INET_POST_ROUTING && entskb->sk == NULL))
 		csum_verify = !skb_csum_unnecessary(entskb);
 	else
 		csum_verify = false;
 
-	outdev = entry->outdev;
+	outdev = entry->state.out;
 
 	switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) {
 	case NFQNL_COPY_META:
@@ -368,23 +368,23 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 		return NULL;
 	}
 	nfmsg = nlmsg_data(nlh);
-	nfmsg->nfgen_family = entry->pf;
+	nfmsg->nfgen_family = entry->state.pf;
 	nfmsg->version = NFNETLINK_V0;
 	nfmsg->res_id = htons(queue->queue_num);
 
 	nla = __nla_reserve(skb, NFQA_PACKET_HDR, sizeof(*pmsg));
 	pmsg = nla_data(nla);
 	pmsg->hw_protocol	= entskb->protocol;
-	pmsg->hook		= entry->hook;
+	pmsg->hook		= entry->state.hook;
 	*packet_id_ptr		= &pmsg->packet_id;
 
-	indev = entry->indev;
+	indev = entry->state.in;
 	if (indev) {
 #if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 		if (nla_put_be32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex)))
 			goto nla_put_failure;
 #else
-		if (entry->pf == PF_BRIDGE) {
+		if (entry->state.pf == PF_BRIDGE) {
 			/* Case 1: indev is physical input device, we need to
 			 * look for bridge group (when called from
 			 * netfilter_bridge) */
@@ -414,7 +414,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 		if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex)))
 			goto nla_put_failure;
 #else
-		if (entry->pf == PF_BRIDGE) {
+		if (entry->state.pf == PF_BRIDGE) {
 			/* Case 1: outdev is physical output device, we need to
 			 * look for bridge group (when called from
 			 * netfilter_bridge) */
@@ -633,8 +633,8 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 	struct nfqnl_instance *queue;
 	struct sk_buff *skb, *segs;
 	int err = -ENOBUFS;
-	struct net *net = dev_net(entry->indev ?
-				  entry->indev : entry->outdev);
+	struct net *net = dev_net(entry->state.in ?
+				  entry->state.in : entry->state.out);
 	struct nfnl_queue_net *q = nfnl_queue_pernet(net);
 
 	/* rcu_read_lock()ed by nf_hook_slow() */
@@ -647,7 +647,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 
 	skb = entry->skb;
 
-	switch (entry->pf) {
+	switch (entry->state.pf) {
 	case NFPROTO_IPV4:
 		skb->protocol = htons(ETH_P_IP);
 		break;
@@ -757,11 +757,11 @@ nfqnl_set_mode(struct nfqnl_instance *queue,
 static int
 dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
 {
-	if (entry->indev)
-		if (entry->indev->ifindex == ifindex)
+	if (entry->state.in)
+		if (entry->state.in->ifindex == ifindex)
 			return 1;
-	if (entry->outdev)
-		if (entry->outdev->ifindex == ifindex)
+	if (entry->state.out)
+		if (entry->state.out->ifindex == ifindex)
 			return 1;
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	if (entry->skb->nf_bridge) {
-- 
cgit v1.2.3


From 238e54c9cb9385a1ba99e92801f3615a2fb398b6 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 3 Apr 2015 20:32:56 -0400
Subject: netfilter: Make nf_hookfn use nf_hook_state.

Pass the nf_hook_state all the way down into the hook
functions themselves.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter.h                      |  4 +--
 net/bridge/br_netfilter.c                      | 46 +++++++++-----------------
 net/bridge/netfilter/ebtable_filter.c          | 14 ++++----
 net/bridge/netfilter/ebtable_nat.c             | 14 ++++----
 net/bridge/netfilter/nf_tables_bridge.c        | 10 +++---
 net/decnet/netfilter/dn_rtmsg.c                |  4 +--
 net/ipv4/netfilter/arptable_filter.c           |  7 ++--
 net/ipv4/netfilter/ipt_CLUSTERIP.c             | 10 +++---
 net/ipv4/netfilter/ipt_SYNPROXY.c              |  6 ++--
 net/ipv4/netfilter/iptable_filter.c            |  7 ++--
 net/ipv4/netfilter/iptable_mangle.c            | 14 ++++----
 net/ipv4/netfilter/iptable_nat.c               | 28 +++++++---------
 net/ipv4/netfilter/iptable_raw.c               |  8 ++---
 net/ipv4/netfilter/iptable_security.c          |  8 ++---
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 20 ++++-------
 net/ipv4/netfilter/nf_defrag_ipv4.c            |  4 +--
 net/ipv4/netfilter/nf_tables_arp.c             |  6 ++--
 net/ipv4/netfilter/nf_tables_ipv4.c            | 12 +++----
 net/ipv4/netfilter/nft_chain_nat_ipv4.c        | 25 +++++---------
 net/ipv4/netfilter/nft_chain_route_ipv4.c      |  6 ++--
 net/ipv6/netfilter/ip6t_SYNPROXY.c             |  6 ++--
 net/ipv6/netfilter/ip6table_filter.c           |  7 ++--
 net/ipv6/netfilter/ip6table_mangle.c           | 13 ++++----
 net/ipv6/netfilter/ip6table_nat.c              | 28 +++++++---------
 net/ipv6/netfilter/ip6table_raw.c              |  7 ++--
 net/ipv6/netfilter/ip6table_security.c         |  8 ++---
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 20 ++++-------
 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c      |  8 ++---
 net/ipv6/netfilter/nf_tables_ipv6.c            | 12 +++----
 net/ipv6/netfilter/nft_chain_nat_ipv6.c        | 24 +++++---------
 net/ipv6/netfilter/nft_chain_route_ipv6.c      |  6 ++--
 net/netfilter/core.c                           |  3 +-
 net/netfilter/ipvs/ip_vs_core.c                | 32 ++++++------------
 security/selinux/hooks.c                       | 28 +++++-----------
 security/smack/smack_netfilter.c               |  8 ++---
 35 files changed, 169 insertions(+), 294 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index aee7ef1e23ed..c480c43ad8f7 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -56,9 +56,7 @@ struct nf_hook_state {
 
 typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops,
 			       struct sk_buff *skb,
-			       const struct net_device *in,
-			       const struct net_device *out,
-			       int (*okfn)(struct sk_buff *));
+			       const struct nf_hook_state *state);
 
 struct nf_hook_ops {
 	struct list_head list;
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index f3884a1b942f..7527e94dd5dc 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -562,9 +562,7 @@ bad:
  * to ip6tables, which doesn't support NAT, so things are fairly simple. */
 static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops,
 					   struct sk_buff *skb,
-					   const struct net_device *in,
-					   const struct net_device *out,
-					   int (*okfn)(struct sk_buff *))
+					   const struct nf_hook_state *state)
 {
 	const struct ipv6hdr *hdr;
 	u32 pkt_len;
@@ -612,9 +610,7 @@ static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops,
  * address to be able to detect DNAT afterwards. */
 static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
 				      struct sk_buff *skb,
-				      const struct net_device *in,
-				      const struct net_device *out,
-				      int (*okfn)(struct sk_buff *))
+				      const struct nf_hook_state *state)
 {
 	struct net_bridge_port *p;
 	struct net_bridge *br;
@@ -623,7 +619,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
 	if (unlikely(!pskb_may_pull(skb, len)))
 		return NF_DROP;
 
-	p = br_port_get_rcu(in);
+	p = br_port_get_rcu(state->in);
 	if (p == NULL)
 		return NF_DROP;
 	br = p->br;
@@ -633,7 +629,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
 			return NF_ACCEPT;
 
 		nf_bridge_pull_encap_header_rcsum(skb);
-		return br_nf_pre_routing_ipv6(ops, skb, in, out, okfn);
+		return br_nf_pre_routing_ipv6(ops, skb, state);
 	}
 
 	if (!brnf_call_iptables && !br->nf_call_iptables)
@@ -671,9 +667,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
  * prevent this from happening. */
 static unsigned int br_nf_local_in(const struct nf_hook_ops *ops,
 				   struct sk_buff *skb,
-				   const struct net_device *in,
-				   const struct net_device *out,
-				   int (*okfn)(struct sk_buff *))
+				   const struct nf_hook_state *state)
 {
 	br_drop_fake_rtable(skb);
 	return NF_ACCEPT;
@@ -710,9 +704,7 @@ static int br_nf_forward_finish(struct sk_buff *skb)
  * bridge ports. */
 static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
 				     struct sk_buff *skb,
-				     const struct net_device *in,
-				     const struct net_device *out,
-				     int (*okfn)(struct sk_buff *))
+				     const struct nf_hook_state *state)
 {
 	struct nf_bridge_info *nf_bridge;
 	struct net_device *parent;
@@ -726,7 +718,7 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
 	if (!nf_bridge_unshare(skb))
 		return NF_DROP;
 
-	parent = bridge_parent(out);
+	parent = bridge_parent(state->out);
 	if (!parent)
 		return NF_DROP;
 
@@ -754,23 +746,21 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
 	else
 		skb->protocol = htons(ETH_P_IPV6);
 
-	NF_HOOK(pf, NF_INET_FORWARD, skb, brnf_get_logical_dev(skb, in), parent,
-		br_nf_forward_finish);
+	NF_HOOK(pf, NF_INET_FORWARD, skb, brnf_get_logical_dev(skb, state->in),
+		parent,	br_nf_forward_finish);
 
 	return NF_STOLEN;
 }
 
 static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops,
 				      struct sk_buff *skb,
-				      const struct net_device *in,
-				      const struct net_device *out,
-				      int (*okfn)(struct sk_buff *))
+				      const struct nf_hook_state *state)
 {
 	struct net_bridge_port *p;
 	struct net_bridge *br;
 	struct net_device **d = (struct net_device **)(skb->cb);
 
-	p = br_port_get_rcu(out);
+	p = br_port_get_rcu(state->out);
 	if (p == NULL)
 		return NF_ACCEPT;
 	br = p->br;
@@ -789,9 +779,9 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops,
 			nf_bridge_push_encap_header(skb);
 		return NF_ACCEPT;
 	}
-	*d = (struct net_device *)in;
-	NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, skb, (struct net_device *)in,
-		(struct net_device *)out, br_nf_forward_finish);
+	*d = state->in;
+	NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, skb, state->in,
+		state->out, br_nf_forward_finish);
 
 	return NF_STOLEN;
 }
@@ -859,9 +849,7 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb)
 /* PF_BRIDGE/POST_ROUTING ********************************************/
 static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops,
 				       struct sk_buff *skb,
-				       const struct net_device *in,
-				       const struct net_device *out,
-				       int (*okfn)(struct sk_buff *))
+				       const struct nf_hook_state *state)
 {
 	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
 	struct net_device *realoutdev = bridge_parent(skb->dev);
@@ -910,9 +898,7 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops,
  * for the second time. */
 static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops,
 				   struct sk_buff *skb,
-				   const struct net_device *in,
-				   const struct net_device *out,
-				   int (*okfn)(struct sk_buff *))
+				   const struct nf_hook_state *state)
 {
 	if (skb->nf_bridge &&
 	    !(skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) {
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index ce205aabf9c5..8a3f63b2e807 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -58,20 +58,18 @@ static const struct ebt_table frame_filter = {
 
 static unsigned int
 ebt_in_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
-	    const struct net_device *in, const struct net_device *out,
-	    int (*okfn)(struct sk_buff *))
+	    const struct nf_hook_state *state)
 {
-	return ebt_do_table(ops->hooknum, skb, in, out,
-			    dev_net(in)->xt.frame_filter);
+	return ebt_do_table(ops->hooknum, skb, state->in, state->out,
+			    dev_net(state->in)->xt.frame_filter);
 }
 
 static unsigned int
 ebt_out_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
-	     const struct net_device *in, const struct net_device *out,
-	     int (*okfn)(struct sk_buff *))
+	     const struct nf_hook_state *state)
 {
-	return ebt_do_table(ops->hooknum, skb, in, out,
-			    dev_net(out)->xt.frame_filter);
+	return ebt_do_table(ops->hooknum, skb, state->in, state->out,
+			    dev_net(state->out)->xt.frame_filter);
 }
 
 static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index a0ac2984fb6c..c5ef5b1ab678 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -58,20 +58,18 @@ static struct ebt_table frame_nat = {
 
 static unsigned int
 ebt_nat_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
-	   const struct net_device *in, const struct net_device *out,
-	   int (*okfn)(struct sk_buff *))
+	   const struct nf_hook_state *state)
 {
-	return ebt_do_table(ops->hooknum, skb, in, out,
-			    dev_net(in)->xt.frame_nat);
+	return ebt_do_table(ops->hooknum, skb, state->in, state->out,
+			    dev_net(state->in)->xt.frame_nat);
 }
 
 static unsigned int
 ebt_nat_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
-	    const struct net_device *in, const struct net_device *out,
-	    int (*okfn)(struct sk_buff *))
+	    const struct nf_hook_state *state)
 {
-	return ebt_do_table(ops->hooknum, skb, in, out,
-			    dev_net(out)->xt.frame_nat);
+	return ebt_do_table(ops->hooknum, skb, state->in, state->out,
+			    dev_net(state->out)->xt.frame_nat);
 }
 
 static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c
index 19473a9371b8..2c46a47160a8 100644
--- a/net/bridge/netfilter/nf_tables_bridge.c
+++ b/net/bridge/netfilter/nf_tables_bridge.c
@@ -93,21 +93,19 @@ static inline void nft_bridge_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
 static unsigned int
 nft_do_chain_bridge(const struct nf_hook_ops *ops,
 		    struct sk_buff *skb,
-		    const struct net_device *in,
-		    const struct net_device *out,
-		    int (*okfn)(struct sk_buff *))
+		    const struct nf_hook_state *state)
 {
 	struct nft_pktinfo pkt;
 
 	switch (eth_hdr(skb)->h_proto) {
 	case htons(ETH_P_IP):
-		nft_bridge_set_pktinfo_ipv4(&pkt, ops, skb, in, out);
+		nft_bridge_set_pktinfo_ipv4(&pkt, ops, skb, state->in, state->out);
 		break;
 	case htons(ETH_P_IPV6):
-		nft_bridge_set_pktinfo_ipv6(&pkt, ops, skb, in, out);
+		nft_bridge_set_pktinfo_ipv6(&pkt, ops, skb, state->in, state->out);
 		break;
 	default:
-		nft_set_pktinfo(&pkt, ops, skb, in, out);
+		nft_set_pktinfo(&pkt, ops, skb, state->in, state->out);
 		break;
 	}
 
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index e4d9560a910b..af34fc9bdf69 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -89,9 +89,7 @@ static void dnrmg_send_peer(struct sk_buff *skb)
 
 static unsigned int dnrmg_hook(const struct nf_hook_ops *ops,
 			struct sk_buff *skb,
-			const struct net_device *in,
-			const struct net_device *out,
-			int (*okfn)(struct sk_buff *))
+			const struct nf_hook_state *state)
 {
 	dnrmg_send_peer(skb);
 	return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 802ddecb30b8..6a641cb41062 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -28,12 +28,11 @@ static const struct xt_table packet_filter = {
 /* The work comes in here from netfilter.c */
 static unsigned int
 arptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
-		     const struct net_device *in, const struct net_device *out,
-		     int (*okfn)(struct sk_buff *))
+		     const struct nf_hook_state *state)
 {
-	const struct net *net = dev_net((in != NULL) ? in : out);
+	const struct net *net = dev_net(state->in ? state->in : state->out);
 
-	return arpt_do_table(skb, ops->hooknum, in, out,
+	return arpt_do_table(skb, ops->hooknum, state->in, state->out,
 			     net->ipv4.arptable_filter);
 }
 
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index f75e9df5e017..771ab3d01ad3 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -504,14 +504,12 @@ static void arp_print(struct arp_payload *payload)
 static unsigned int
 arp_mangle(const struct nf_hook_ops *ops,
 	   struct sk_buff *skb,
-	   const struct net_device *in,
-	   const struct net_device *out,
-	   int (*okfn)(struct sk_buff *))
+	   const struct nf_hook_state *state)
 {
 	struct arphdr *arp = arp_hdr(skb);
 	struct arp_payload *payload;
 	struct clusterip_config *c;
-	struct net *net = dev_net(in ? in : out);
+	struct net *net = dev_net(state->in ? state->in : state->out);
 
 	/* we don't care about non-ethernet and non-ipv4 ARP */
 	if (arp->ar_hrd != htons(ARPHRD_ETHER) ||
@@ -536,10 +534,10 @@ arp_mangle(const struct nf_hook_ops *ops,
 	 * addresses on different interfacs.  However, in the CLUSTERIP case
 	 * this wouldn't work, since we didn't subscribe the mcast group on
 	 * other interfaces */
-	if (c->dev != out) {
+	if (c->dev != state->out) {
 		pr_debug("not mangling arp reply on different "
 			 "interface: cip'%s'-skb'%s'\n",
-			 c->dev->name, out->name);
+			 c->dev->name, state->out->name);
 		clusterip_config_put(c);
 		return NF_ACCEPT;
 	}
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index a313c3fbeb46..e9e67793055f 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -300,11 +300,9 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 
 static unsigned int ipv4_synproxy_hook(const struct nf_hook_ops *ops,
 				       struct sk_buff *skb,
-				       const struct net_device *in,
-				       const struct net_device *out,
-				       int (*okfn)(struct sk_buff *))
+				       const struct nf_hook_state *nhs)
 {
-	struct synproxy_net *snet = synproxy_pernet(dev_net(in ? : out));
+	struct synproxy_net *snet = synproxy_pernet(dev_net(nhs->in ? : nhs->out));
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct;
 	struct nf_conn_synproxy *synproxy;
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index e08a74a243a8..1df0d42bfd39 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -34,8 +34,7 @@ static const struct xt_table packet_filter = {
 
 static unsigned int
 iptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
-		    const struct net_device *in, const struct net_device *out,
-		    int (*okfn)(struct sk_buff *))
+		    const struct nf_hook_state *state)
 {
 	const struct net *net;
 
@@ -45,8 +44,8 @@ iptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		/* root is playing with raw sockets. */
 		return NF_ACCEPT;
 
-	net = dev_net((in != NULL) ? in : out);
-	return ipt_do_table(skb, ops->hooknum, in, out,
+	net = dev_net(state->in ? state->in : state->out);
+	return ipt_do_table(skb, ops->hooknum, state->in, state->out,
 			    net->ipv4.iptable_filter);
 }
 
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 6a5079c34bb3..7a825e740045 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -81,18 +81,16 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out)
 static unsigned int
 iptable_mangle_hook(const struct nf_hook_ops *ops,
 		     struct sk_buff *skb,
-		     const struct net_device *in,
-		     const struct net_device *out,
-		     int (*okfn)(struct sk_buff *))
+		     const struct nf_hook_state *state)
 {
 	if (ops->hooknum == NF_INET_LOCAL_OUT)
-		return ipt_mangle_out(skb, out);
+		return ipt_mangle_out(skb, state->out);
 	if (ops->hooknum == NF_INET_POST_ROUTING)
-		return ipt_do_table(skb, ops->hooknum, in, out,
-				    dev_net(out)->ipv4.iptable_mangle);
+		return ipt_do_table(skb, ops->hooknum, state->in, state->out,
+				    dev_net(state->out)->ipv4.iptable_mangle);
 	/* PREROUTING/INPUT/FORWARD: */
-	return ipt_do_table(skb, ops->hooknum, in, out,
-			    dev_net(in)->ipv4.iptable_mangle);
+	return ipt_do_table(skb, ops->hooknum, state->in, state->out,
+			    dev_net(state->in)->ipv4.iptable_mangle);
 }
 
 static struct nf_hook_ops *mangle_ops __read_mostly;
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 6b67d7e9a75d..7a7fea4711e5 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -41,38 +41,34 @@ static unsigned int iptable_nat_do_chain(const struct nf_hook_ops *ops,
 
 static unsigned int iptable_nat_ipv4_fn(const struct nf_hook_ops *ops,
 					struct sk_buff *skb,
-					const struct net_device *in,
-					const struct net_device *out,
-					int (*okfn)(struct sk_buff *))
+					const struct nf_hook_state *state)
 {
-	return nf_nat_ipv4_fn(ops, skb, in, out, iptable_nat_do_chain);
+	return nf_nat_ipv4_fn(ops, skb, state->in, state->out,
+			      iptable_nat_do_chain);
 }
 
 static unsigned int iptable_nat_ipv4_in(const struct nf_hook_ops *ops,
 					struct sk_buff *skb,
-					const struct net_device *in,
-					const struct net_device *out,
-					int (*okfn)(struct sk_buff *))
+					const struct nf_hook_state *state)
 {
-	return nf_nat_ipv4_in(ops, skb, in, out, iptable_nat_do_chain);
+	return nf_nat_ipv4_in(ops, skb, state->in, state->out,
+			      iptable_nat_do_chain);
 }
 
 static unsigned int iptable_nat_ipv4_out(const struct nf_hook_ops *ops,
 					 struct sk_buff *skb,
-					 const struct net_device *in,
-					 const struct net_device *out,
-					 int (*okfn)(struct sk_buff *))
+					 const struct nf_hook_state *state)
 {
-	return nf_nat_ipv4_out(ops, skb, in, out, iptable_nat_do_chain);
+	return nf_nat_ipv4_out(ops, skb, state->in, state->out,
+			       iptable_nat_do_chain);
 }
 
 static unsigned int iptable_nat_ipv4_local_fn(const struct nf_hook_ops *ops,
 					      struct sk_buff *skb,
-					      const struct net_device *in,
-					      const struct net_device *out,
-					      int (*okfn)(struct sk_buff *))
+					      const struct nf_hook_state *state)
 {
-	return nf_nat_ipv4_local_fn(ops, skb, in, out, iptable_nat_do_chain);
+	return nf_nat_ipv4_local_fn(ops, skb, state->in, state->out,
+				    iptable_nat_do_chain);
 }
 
 static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index b2f7e8f98316..fac8f607c70b 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -21,8 +21,7 @@ static const struct xt_table packet_raw = {
 /* The work comes in here from netfilter.c. */
 static unsigned int
 iptable_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
-		 const struct net_device *in, const struct net_device *out,
-		 int (*okfn)(struct sk_buff *))
+		 const struct nf_hook_state *state)
 {
 	const struct net *net;
 
@@ -32,8 +31,9 @@ iptable_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		/* root is playing with raw sockets. */
 		return NF_ACCEPT;
 
-	net = dev_net((in != NULL) ? in : out);
-	return ipt_do_table(skb, ops->hooknum, in, out, net->ipv4.iptable_raw);
+	net = dev_net(state->in ? state->in : state->out);
+	return ipt_do_table(skb, ops->hooknum, state->in, state->out,
+			    net->ipv4.iptable_raw);
 }
 
 static struct nf_hook_ops *rawtable_ops __read_mostly;
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index c86647ed2078..d9ad60a57413 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -38,9 +38,7 @@ static const struct xt_table security_table = {
 
 static unsigned int
 iptable_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
-		      const struct net_device *in,
-		      const struct net_device *out,
-		      int (*okfn)(struct sk_buff *))
+		      const struct nf_hook_state *state)
 {
 	const struct net *net;
 
@@ -50,8 +48,8 @@ iptable_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		/* Somebody is playing with raw sockets. */
 		return NF_ACCEPT;
 
-	net = dev_net((in != NULL) ? in : out);
-	return ipt_do_table(skb, ops->hooknum, in, out,
+	net = dev_net(state->in ? state->in : state->out);
+	return ipt_do_table(skb, ops->hooknum, state->in, state->out,
 			    net->ipv4.iptable_security);
 }
 
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 8c8d6642cbb0..30ad9554b5e9 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -94,9 +94,7 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 
 static unsigned int ipv4_helper(const struct nf_hook_ops *ops,
 				struct sk_buff *skb,
-				const struct net_device *in,
-				const struct net_device *out,
-				int (*okfn)(struct sk_buff *))
+				const struct nf_hook_state *state)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
@@ -123,9 +121,7 @@ static unsigned int ipv4_helper(const struct nf_hook_ops *ops,
 
 static unsigned int ipv4_confirm(const struct nf_hook_ops *ops,
 				 struct sk_buff *skb,
-				 const struct net_device *in,
-				 const struct net_device *out,
-				 int (*okfn)(struct sk_buff *))
+				 const struct nf_hook_state *state)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
@@ -149,24 +145,20 @@ out:
 
 static unsigned int ipv4_conntrack_in(const struct nf_hook_ops *ops,
 				      struct sk_buff *skb,
-				      const struct net_device *in,
-				      const struct net_device *out,
-				      int (*okfn)(struct sk_buff *))
+				      const struct nf_hook_state *state)
 {
-	return nf_conntrack_in(dev_net(in), PF_INET, ops->hooknum, skb);
+	return nf_conntrack_in(dev_net(state->in), PF_INET, ops->hooknum, skb);
 }
 
 static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops,
 					 struct sk_buff *skb,
-					 const struct net_device *in,
-					 const struct net_device *out,
-					 int (*okfn)(struct sk_buff *))
+					 const struct nf_hook_state *state)
 {
 	/* root is playing with raw sockets. */
 	if (skb->len < sizeof(struct iphdr) ||
 	    ip_hdrlen(skb) < sizeof(struct iphdr))
 		return NF_ACCEPT;
-	return nf_conntrack_in(dev_net(out), PF_INET, ops->hooknum, skb);
+	return nf_conntrack_in(dev_net(state->out), PF_INET, ops->hooknum, skb);
 }
 
 /* Connection tracking may drop packets, but never alters them, so
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 7e5ca6f2d0cd..c88b7d434718 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -63,9 +63,7 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum,
 
 static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops,
 					  struct sk_buff *skb,
-					  const struct net_device *in,
-					  const struct net_device *out,
-					  int (*okfn)(struct sk_buff *))
+					  const struct nf_hook_state *state)
 {
 	struct sock *sk = skb->sk;
 	struct inet_sock *inet = inet_sk(skb->sk);
diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c
index 19412a4063fb..fceb50e1e87d 100644
--- a/net/ipv4/netfilter/nf_tables_arp.c
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -17,13 +17,11 @@
 static unsigned int
 nft_do_chain_arp(const struct nf_hook_ops *ops,
 		  struct sk_buff *skb,
-		  const struct net_device *in,
-		  const struct net_device *out,
-		  int (*okfn)(struct sk_buff *))
+		  const struct nf_hook_state *state)
 {
 	struct nft_pktinfo pkt;
 
-	nft_set_pktinfo(&pkt, ops, skb, in, out);
+	nft_set_pktinfo(&pkt, ops, skb, state->in, state->out);
 
 	return nft_do_chain(&pkt, ops);
 }
diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c
index 6820c8c40842..708e388e3dbe 100644
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -20,22 +20,18 @@
 
 static unsigned int nft_do_chain_ipv4(const struct nf_hook_ops *ops,
 				      struct sk_buff *skb,
-				      const struct net_device *in,
-				      const struct net_device *out,
-				      int (*okfn)(struct sk_buff *))
+				      const struct nf_hook_state *state)
 {
 	struct nft_pktinfo pkt;
 
-	nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out);
+	nft_set_pktinfo_ipv4(&pkt, ops, skb, state->in, state->out);
 
 	return nft_do_chain(&pkt, ops);
 }
 
 static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops,
 				    struct sk_buff *skb,
-				    const struct net_device *in,
-				    const struct net_device *out,
-				    int (*okfn)(struct sk_buff *))
+				    const struct nf_hook_state *state)
 {
 	if (unlikely(skb->len < sizeof(struct iphdr) ||
 		     ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) {
@@ -45,7 +41,7 @@ static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops,
 		return NF_ACCEPT;
 	}
 
-	return nft_do_chain_ipv4(ops, skb, in, out, okfn);
+	return nft_do_chain_ipv4(ops, skb, state);
 }
 
 struct nft_af_info nft_af_ipv4 __read_mostly = {
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
index df547bf50078..d08db6b0fcc3 100644
--- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -41,38 +41,31 @@ static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops,
 
 static unsigned int nft_nat_ipv4_fn(const struct nf_hook_ops *ops,
 				    struct sk_buff *skb,
-				    const struct net_device *in,
-				    const struct net_device *out,
-				    int (*okfn)(struct sk_buff *))
+				    const struct nf_hook_state *state)
 {
-	return nf_nat_ipv4_fn(ops, skb, in, out, nft_nat_do_chain);
+	return nf_nat_ipv4_fn(ops, skb, state->in, state->out, nft_nat_do_chain);
 }
 
 static unsigned int nft_nat_ipv4_in(const struct nf_hook_ops *ops,
 				    struct sk_buff *skb,
-				    const struct net_device *in,
-				    const struct net_device *out,
-				    int (*okfn)(struct sk_buff *))
+				    const struct nf_hook_state *state)
 {
-	return nf_nat_ipv4_in(ops, skb, in, out, nft_nat_do_chain);
+	return nf_nat_ipv4_in(ops, skb, state->in, state->out, nft_nat_do_chain);
 }
 
 static unsigned int nft_nat_ipv4_out(const struct nf_hook_ops *ops,
 				     struct sk_buff *skb,
-				     const struct net_device *in,
-				     const struct net_device *out,
-				     int (*okfn)(struct sk_buff *))
+				     const struct nf_hook_state *state)
 {
-	return nf_nat_ipv4_out(ops, skb, in, out, nft_nat_do_chain);
+	return nf_nat_ipv4_out(ops, skb, state->in, state->out, nft_nat_do_chain);
 }
 
 static unsigned int nft_nat_ipv4_local_fn(const struct nf_hook_ops *ops,
 					  struct sk_buff *skb,
-					  const struct net_device *in,
-					  const struct net_device *out,
-					  int (*okfn)(struct sk_buff *))
+					  const struct nf_hook_state *state)
 {
-	return nf_nat_ipv4_local_fn(ops, skb, in, out, nft_nat_do_chain);
+	return nf_nat_ipv4_local_fn(ops, skb, state->in, state->out,
+				    nft_nat_do_chain);
 }
 
 static const struct nf_chain_type nft_chain_nat_ipv4 = {
diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c
index 125b66766c0a..073d0776ae7f 100644
--- a/net/ipv4/netfilter/nft_chain_route_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
@@ -23,9 +23,7 @@
 
 static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
 					struct sk_buff *skb,
-					const struct net_device *in,
-					const struct net_device *out,
-					int (*okfn)(struct sk_buff *))
+					const struct nf_hook_state *state)
 {
 	unsigned int ret;
 	struct nft_pktinfo pkt;
@@ -39,7 +37,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
 	    ip_hdrlen(skb) < sizeof(struct iphdr))
 		return NF_ACCEPT;
 
-	nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out);
+	nft_set_pktinfo_ipv4(&pkt, ops, skb, state->in, state->out);
 
 	mark = skb->mark;
 	iph = ip_hdr(skb);
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index a0d17270117c..6edb7b106de7 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -315,11 +315,9 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 
 static unsigned int ipv6_synproxy_hook(const struct nf_hook_ops *ops,
 				       struct sk_buff *skb,
-				       const struct net_device *in,
-				       const struct net_device *out,
-				       int (*okfn)(struct sk_buff *))
+				       const struct nf_hook_state *nhs)
 {
-	struct synproxy_net *snet = synproxy_pernet(dev_net(in ? : out));
+	struct synproxy_net *snet = synproxy_pernet(dev_net(nhs->in ? : nhs->out));
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct;
 	struct nf_conn_synproxy *synproxy;
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index ca7f6c128086..eb9ef093454f 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -33,12 +33,11 @@ static const struct xt_table packet_filter = {
 /* The work comes in here from netfilter.c. */
 static unsigned int
 ip6table_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
-		     const struct net_device *in, const struct net_device *out,
-		     int (*okfn)(struct sk_buff *))
+		     const struct nf_hook_state *state)
 {
-	const struct net *net = dev_net((in != NULL) ? in : out);
+	const struct net *net = dev_net(state->in ? state->in : state->out);
 
-	return ip6t_do_table(skb, ops->hooknum, in, out,
+	return ip6t_do_table(skb, ops->hooknum, state->in, state->out,
 			     net->ipv6.ip6table_filter);
 }
 
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 307bbb782d14..e713b8d3dbbc 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -77,17 +77,16 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out)
 /* The work comes in here from netfilter.c. */
 static unsigned int
 ip6table_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
-		     const struct net_device *in, const struct net_device *out,
-		     int (*okfn)(struct sk_buff *))
+		     const struct nf_hook_state *state)
 {
 	if (ops->hooknum == NF_INET_LOCAL_OUT)
-		return ip6t_mangle_out(skb, out);
+		return ip6t_mangle_out(skb, state->out);
 	if (ops->hooknum == NF_INET_POST_ROUTING)
-		return ip6t_do_table(skb, ops->hooknum, in, out,
-				     dev_net(out)->ipv6.ip6table_mangle);
+		return ip6t_do_table(skb, ops->hooknum, state->in, state->out,
+				     dev_net(state->out)->ipv6.ip6table_mangle);
 	/* INPUT/FORWARD */
-	return ip6t_do_table(skb, ops->hooknum, in, out,
-			     dev_net(in)->ipv6.ip6table_mangle);
+	return ip6t_do_table(skb, ops->hooknum, state->in, state->out,
+			     dev_net(state->in)->ipv6.ip6table_mangle);
 }
 
 static struct nf_hook_ops *mangle_ops __read_mostly;
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index b0634ac996b7..e32b0d0315e6 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -43,38 +43,34 @@ static unsigned int ip6table_nat_do_chain(const struct nf_hook_ops *ops,
 
 static unsigned int ip6table_nat_fn(const struct nf_hook_ops *ops,
 				    struct sk_buff *skb,
-				    const struct net_device *in,
-				    const struct net_device *out,
-				    int (*okfn)(struct sk_buff *))
+				    const struct nf_hook_state *state)
 {
-	return nf_nat_ipv6_fn(ops, skb, in, out, ip6table_nat_do_chain);
+	return nf_nat_ipv6_fn(ops, skb, state->in, state->out,
+			      ip6table_nat_do_chain);
 }
 
 static unsigned int ip6table_nat_in(const struct nf_hook_ops *ops,
 				    struct sk_buff *skb,
-				    const struct net_device *in,
-				    const struct net_device *out,
-				    int (*okfn)(struct sk_buff *))
+				    const struct nf_hook_state *state)
 {
-	return nf_nat_ipv6_in(ops, skb, in, out, ip6table_nat_do_chain);
+	return nf_nat_ipv6_in(ops, skb, state->in, state->out,
+			      ip6table_nat_do_chain);
 }
 
 static unsigned int ip6table_nat_out(const struct nf_hook_ops *ops,
 				     struct sk_buff *skb,
-				     const struct net_device *in,
-				     const struct net_device *out,
-				     int (*okfn)(struct sk_buff *))
+				     const struct nf_hook_state *state)
 {
-	return nf_nat_ipv6_out(ops, skb, in, out, ip6table_nat_do_chain);
+	return nf_nat_ipv6_out(ops, skb, state->in, state->out,
+			       ip6table_nat_do_chain);
 }
 
 static unsigned int ip6table_nat_local_fn(const struct nf_hook_ops *ops,
 					  struct sk_buff *skb,
-					  const struct net_device *in,
-					  const struct net_device *out,
-					  int (*okfn)(struct sk_buff *))
+					  const struct nf_hook_state *state)
 {
-	return nf_nat_ipv6_local_fn(ops, skb, in, out, ip6table_nat_do_chain);
+	return nf_nat_ipv6_local_fn(ops, skb, state->in, state->out,
+				    ip6table_nat_do_chain);
 }
 
 static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 5274740acecc..937908e25862 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -20,12 +20,11 @@ static const struct xt_table packet_raw = {
 /* The work comes in here from netfilter.c. */
 static unsigned int
 ip6table_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
-		  const struct net_device *in, const struct net_device *out,
-		  int (*okfn)(struct sk_buff *))
+		  const struct nf_hook_state *state)
 {
-	const struct net *net = dev_net((in != NULL) ? in : out);
+	const struct net *net = dev_net(state->in ? state->in : state->out);
 
-	return ip6t_do_table(skb, ops->hooknum, in, out,
+	return ip6t_do_table(skb, ops->hooknum, state->in, state->out,
 			     net->ipv6.ip6table_raw);
 }
 
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index ab3b0219ecfa..f33b41e8e294 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -37,13 +37,11 @@ static const struct xt_table security_table = {
 
 static unsigned int
 ip6table_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
-		       const struct net_device *in,
-		       const struct net_device *out,
-		       int (*okfn)(struct sk_buff *))
+		       const struct nf_hook_state *state)
 {
-	const struct net *net = dev_net((in != NULL) ? in : out);
+	const struct net *net = dev_net(state->in ? state->in : state->out);
 
-	return ip6t_do_table(skb, ops->hooknum, in, out,
+	return ip6t_do_table(skb, ops->hooknum, state->in, state->out,
 			     net->ipv6.ip6table_security);
 }
 
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index fba91c6fc7ca..4ba0c34c627b 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -97,9 +97,7 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 
 static unsigned int ipv6_helper(const struct nf_hook_ops *ops,
 				struct sk_buff *skb,
-				const struct net_device *in,
-				const struct net_device *out,
-				int (*okfn)(struct sk_buff *))
+				const struct nf_hook_state *state)
 {
 	struct nf_conn *ct;
 	const struct nf_conn_help *help;
@@ -135,9 +133,7 @@ static unsigned int ipv6_helper(const struct nf_hook_ops *ops,
 
 static unsigned int ipv6_confirm(const struct nf_hook_ops *ops,
 				 struct sk_buff *skb,
-				 const struct net_device *in,
-				 const struct net_device *out,
-				 int (*okfn)(struct sk_buff *))
+				 const struct nf_hook_state *state)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
@@ -171,25 +167,21 @@ out:
 
 static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops,
 				      struct sk_buff *skb,
-				      const struct net_device *in,
-				      const struct net_device *out,
-				      int (*okfn)(struct sk_buff *))
+				      const struct nf_hook_state *state)
 {
-	return nf_conntrack_in(dev_net(in), PF_INET6, ops->hooknum, skb);
+	return nf_conntrack_in(dev_net(state->in), PF_INET6, ops->hooknum, skb);
 }
 
 static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops,
 					 struct sk_buff *skb,
-					 const struct net_device *in,
-					 const struct net_device *out,
-					 int (*okfn)(struct sk_buff *))
+					 const struct nf_hook_state *state)
 {
 	/* root is playing with raw sockets. */
 	if (skb->len < sizeof(struct ipv6hdr)) {
 		net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
 		return NF_ACCEPT;
 	}
-	return nf_conntrack_in(dev_net(out), PF_INET6, ops->hooknum, skb);
+	return nf_conntrack_in(dev_net(state->out), PF_INET6, ops->hooknum, skb);
 }
 
 static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index e70382e4dfb5..e2b882056751 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -54,9 +54,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
 
 static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
 				struct sk_buff *skb,
-				const struct net_device *in,
-				const struct net_device *out,
-				int (*okfn)(struct sk_buff *))
+				const struct nf_hook_state *state)
 {
 	struct sk_buff *reasm;
 
@@ -78,8 +76,8 @@ static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
 	nf_ct_frag6_consume_orig(reasm);
 
 	NF_HOOK_THRESH(NFPROTO_IPV6, ops->hooknum, reasm,
-		       (struct net_device *) in, (struct net_device *) out,
-		       okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
+		       state->in, state->out,
+		       state->okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
 
 	return NF_STOLEN;
 }
diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c
index 0d812b31277d..224bc8971a0b 100644
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -18,14 +18,12 @@
 
 static unsigned int nft_do_chain_ipv6(const struct nf_hook_ops *ops,
 				      struct sk_buff *skb,
-				      const struct net_device *in,
-				      const struct net_device *out,
-				      int (*okfn)(struct sk_buff *))
+				      const struct nf_hook_state *state)
 {
 	struct nft_pktinfo pkt;
 
 	/* malformed packet, drop it */
-	if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0)
+	if (nft_set_pktinfo_ipv6(&pkt, ops, skb, state->in, state->out) < 0)
 		return NF_DROP;
 
 	return nft_do_chain(&pkt, ops);
@@ -33,9 +31,7 @@ static unsigned int nft_do_chain_ipv6(const struct nf_hook_ops *ops,
 
 static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops,
 				    struct sk_buff *skb,
-				    const struct net_device *in,
-				    const struct net_device *out,
-				    int (*okfn)(struct sk_buff *))
+				    const struct nf_hook_state *state)
 {
 	if (unlikely(skb->len < sizeof(struct ipv6hdr))) {
 		if (net_ratelimit())
@@ -44,7 +40,7 @@ static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops,
 		return NF_ACCEPT;
 	}
 
-	return nft_do_chain_ipv6(ops, skb, in, out, okfn);
+	return nft_do_chain_ipv6(ops, skb, state);
 }
 
 struct nft_af_info nft_af_ipv6 __read_mostly = {
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
index 1c4b75dd425b..f73f4ae25bc2 100644
--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -39,38 +39,30 @@ static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops,
 
 static unsigned int nft_nat_ipv6_fn(const struct nf_hook_ops *ops,
 				    struct sk_buff *skb,
-				    const struct net_device *in,
-				    const struct net_device *out,
-				    int (*okfn)(struct sk_buff *))
+				    const struct nf_hook_state *state)
 {
-	return nf_nat_ipv6_fn(ops, skb, in, out, nft_nat_do_chain);
+	return nf_nat_ipv6_fn(ops, skb, state->in, state->out, nft_nat_do_chain);
 }
 
 static unsigned int nft_nat_ipv6_in(const struct nf_hook_ops *ops,
 				    struct sk_buff *skb,
-				    const struct net_device *in,
-				    const struct net_device *out,
-				    int (*okfn)(struct sk_buff *))
+				    const struct nf_hook_state *state)
 {
-	return nf_nat_ipv6_in(ops, skb, in, out, nft_nat_do_chain);
+	return nf_nat_ipv6_in(ops, skb, state->in, state->out, nft_nat_do_chain);
 }
 
 static unsigned int nft_nat_ipv6_out(const struct nf_hook_ops *ops,
 				     struct sk_buff *skb,
-				     const struct net_device *in,
-				     const struct net_device *out,
-				     int (*okfn)(struct sk_buff *))
+				     const struct nf_hook_state *state)
 {
-	return nf_nat_ipv6_out(ops, skb, in, out, nft_nat_do_chain);
+	return nf_nat_ipv6_out(ops, skb, state->in, state->out, nft_nat_do_chain);
 }
 
 static unsigned int nft_nat_ipv6_local_fn(const struct nf_hook_ops *ops,
 					  struct sk_buff *skb,
-					  const struct net_device *in,
-					  const struct net_device *out,
-					  int (*okfn)(struct sk_buff *))
+					  const struct nf_hook_state *state)
 {
-	return nf_nat_ipv6_local_fn(ops, skb, in, out, nft_nat_do_chain);
+	return nf_nat_ipv6_local_fn(ops, skb, state->in, state->out, nft_nat_do_chain);
 }
 
 static const struct nf_chain_type nft_chain_nat_ipv6 = {
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
index 42031299585e..c826c3c854b2 100644
--- a/net/ipv6/netfilter/nft_chain_route_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -24,9 +24,7 @@
 
 static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
 					struct sk_buff *skb,
-					const struct net_device *in,
-					const struct net_device *out,
-					int (*okfn)(struct sk_buff *))
+					const struct nf_hook_state *state)
 {
 	unsigned int ret;
 	struct nft_pktinfo pkt;
@@ -35,7 +33,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
 	u32 mark, flowlabel;
 
 	/* malformed packet, drop it */
-	if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0)
+	if (nft_set_pktinfo_ipv6(&pkt, ops, skb, state->in, state->out) < 0)
 		return NF_DROP;
 
 	/* save source/dest address, mark, hoplimit, flowlabel, priority */
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 11d04ebfc5e3..e6163017c42d 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -136,8 +136,7 @@ unsigned int nf_iterate(struct list_head *head,
 		/* Optimization: we don't need to hold module
 		   reference here, since function can't sleep. --RR */
 repeat:
-		verdict = (*elemp)->hook(*elemp, skb, state->in, state->out,
-					 state->okfn);
+		verdict = (*elemp)->hook(*elemp, skb, state);
 		if (verdict != NF_ACCEPT) {
 #ifdef CONFIG_NETFILTER_DEBUG
 			if (unlikely((verdict & NF_VERDICT_MASK)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 04dbd9c7213f..5d2b806a862e 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1272,8 +1272,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
  */
 static unsigned int
 ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
-	     const struct net_device *in, const struct net_device *out,
-	     int (*okfn)(struct sk_buff *))
+	     const struct nf_hook_state *state)
 {
 	return ip_vs_out(ops->hooknum, skb, AF_INET);
 }
@@ -1284,8 +1283,7 @@ ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
  */
 static unsigned int
 ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
-		   const struct net_device *in, const struct net_device *out,
-		   int (*okfn)(struct sk_buff *))
+		   const struct nf_hook_state *state)
 {
 	return ip_vs_out(ops->hooknum, skb, AF_INET);
 }
@@ -1299,8 +1297,7 @@ ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
  */
 static unsigned int
 ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
-	     const struct net_device *in, const struct net_device *out,
-	     int (*okfn)(struct sk_buff *))
+	     const struct nf_hook_state *state)
 {
 	return ip_vs_out(ops->hooknum, skb, AF_INET6);
 }
@@ -1311,8 +1308,7 @@ ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
  */
 static unsigned int
 ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
-		   const struct net_device *in, const struct net_device *out,
-		   int (*okfn)(struct sk_buff *))
+		   const struct nf_hook_state *state)
 {
 	return ip_vs_out(ops->hooknum, skb, AF_INET6);
 }
@@ -1769,9 +1765,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
  */
 static unsigned int
 ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
-		      const struct net_device *in,
-		      const struct net_device *out,
-		      int (*okfn)(struct sk_buff *))
+		      const struct nf_hook_state *state)
 {
 	return ip_vs_in(ops->hooknum, skb, AF_INET);
 }
@@ -1782,8 +1776,7 @@ ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
  */
 static unsigned int
 ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
-		     const struct net_device *in, const struct net_device *out,
-		     int (*okfn)(struct sk_buff *))
+		     const struct nf_hook_state *state)
 {
 	return ip_vs_in(ops->hooknum, skb, AF_INET);
 }
@@ -1796,9 +1789,7 @@ ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
  */
 static unsigned int
 ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
-		      const struct net_device *in,
-		      const struct net_device *out,
-		      int (*okfn)(struct sk_buff *))
+		      const struct nf_hook_state *state)
 {
 	return ip_vs_in(ops->hooknum, skb, AF_INET6);
 }
@@ -1809,8 +1800,7 @@ ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
  */
 static unsigned int
 ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
-		     const struct net_device *in, const struct net_device *out,
-		     int (*okfn)(struct sk_buff *))
+		     const struct nf_hook_state *state)
 {
 	return ip_vs_in(ops->hooknum, skb, AF_INET6);
 }
@@ -1829,8 +1819,7 @@ ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
  */
 static unsigned int
 ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb,
-		   const struct net_device *in, const struct net_device *out,
-		   int (*okfn)(struct sk_buff *))
+		   const struct nf_hook_state *state)
 {
 	int r;
 	struct net *net;
@@ -1851,8 +1840,7 @@ ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb,
 #ifdef CONFIG_IP_VS_IPV6
 static unsigned int
 ip_vs_forward_icmp_v6(const struct nf_hook_ops *ops, struct sk_buff *skb,
-		      const struct net_device *in, const struct net_device *out,
-		      int (*okfn)(struct sk_buff *))
+		      const struct nf_hook_state *state)
 {
 	int r;
 	struct net *net;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index edc66de39f2e..7e392edaab97 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4852,21 +4852,17 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb,
 
 static unsigned int selinux_ipv4_forward(const struct nf_hook_ops *ops,
 					 struct sk_buff *skb,
-					 const struct net_device *in,
-					 const struct net_device *out,
-					 int (*okfn)(struct sk_buff *))
+					 const struct nf_hook_state *state)
 {
-	return selinux_ip_forward(skb, in, PF_INET);
+	return selinux_ip_forward(skb, state->in, PF_INET);
 }
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 static unsigned int selinux_ipv6_forward(const struct nf_hook_ops *ops,
 					 struct sk_buff *skb,
-					 const struct net_device *in,
-					 const struct net_device *out,
-					 int (*okfn)(struct sk_buff *))
+					 const struct nf_hook_state *state)
 {
-	return selinux_ip_forward(skb, in, PF_INET6);
+	return selinux_ip_forward(skb, state->in, PF_INET6);
 }
 #endif	/* IPV6 */
 
@@ -4914,9 +4910,7 @@ static unsigned int selinux_ip_output(struct sk_buff *skb,
 
 static unsigned int selinux_ipv4_output(const struct nf_hook_ops *ops,
 					struct sk_buff *skb,
-					const struct net_device *in,
-					const struct net_device *out,
-					int (*okfn)(struct sk_buff *))
+					const struct nf_hook_state *state)
 {
 	return selinux_ip_output(skb, PF_INET);
 }
@@ -5091,21 +5085,17 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb,
 
 static unsigned int selinux_ipv4_postroute(const struct nf_hook_ops *ops,
 					   struct sk_buff *skb,
-					   const struct net_device *in,
-					   const struct net_device *out,
-					   int (*okfn)(struct sk_buff *))
+					   const struct nf_hook_state *state)
 {
-	return selinux_ip_postroute(skb, out, PF_INET);
+	return selinux_ip_postroute(skb, state->out, PF_INET);
 }
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 static unsigned int selinux_ipv6_postroute(const struct nf_hook_ops *ops,
 					   struct sk_buff *skb,
-					   const struct net_device *in,
-					   const struct net_device *out,
-					   int (*okfn)(struct sk_buff *))
+					   const struct nf_hook_state *state)
 {
-	return selinux_ip_postroute(skb, out, PF_INET6);
+	return selinux_ip_postroute(skb, state->out, PF_INET6);
 }
 #endif	/* IPV6 */
 
diff --git a/security/smack/smack_netfilter.c b/security/smack/smack_netfilter.c
index c952632afb0d..a455cfc9ec1f 100644
--- a/security/smack/smack_netfilter.c
+++ b/security/smack/smack_netfilter.c
@@ -23,9 +23,7 @@
 
 static unsigned int smack_ipv6_output(const struct nf_hook_ops *ops,
 					struct sk_buff *skb,
-					const struct net_device *in,
-					const struct net_device *out,
-					int (*okfn)(struct sk_buff *))
+					const struct nf_hook_state *state)
 {
 	struct socket_smack *ssp;
 	struct smack_known *skp;
@@ -42,9 +40,7 @@ static unsigned int smack_ipv6_output(const struct nf_hook_ops *ops,
 
 static unsigned int smack_ipv4_output(const struct nf_hook_ops *ops,
 					struct sk_buff *skb,
-					const struct net_device *in,
-					const struct net_device *out,
-					int (*okfn)(struct sk_buff *))
+					const struct nf_hook_state *state)
 {
 	struct socket_smack *ssp;
 	struct smack_known *skp;
-- 
cgit v1.2.3


From d7cf4081ed454dba02cb632e492ecf5e29d1ee44 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 3 Apr 2015 20:51:13 -0400
Subject: netfilter: Pass nf_hook_state through
 nf_nat_ipv4_{in,out,fn,local_fn}().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netfilter/nf_nat_l3proto.h   | 24 ++++++++---------------
 net/ipv4/netfilter/iptable_nat.c         | 18 +++++++----------
 net/ipv4/netfilter/nf_nat_l3proto_ipv4.c | 33 +++++++++++++++-----------------
 net/ipv4/netfilter/nft_chain_nat_ipv4.c  | 14 ++++++--------
 4 files changed, 36 insertions(+), 53 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_nat_l3proto.h b/include/net/netfilter/nf_nat_l3proto.h
index 340c013795a4..e596f35f14c9 100644
--- a/include/net/netfilter/nf_nat_l3proto.h
+++ b/include/net/netfilter/nf_nat_l3proto.h
@@ -44,40 +44,32 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb, struct nf_conn *ct,
 				  unsigned int hooknum);
 
 unsigned int nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
-			    const struct net_device *in,
-			    const struct net_device *out,
+			    const struct nf_hook_state *state,
 			    unsigned int (*do_chain)(const struct nf_hook_ops *ops,
 						     struct sk_buff *skb,
-						     const struct net_device *in,
-						     const struct net_device *out,
+						     const struct nf_hook_state *state,
 						     struct nf_conn *ct));
 
 unsigned int nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
-			     const struct net_device *in,
-			     const struct net_device *out,
+			     const struct nf_hook_state *state,
 			     unsigned int (*do_chain)(const struct nf_hook_ops *ops,
 						      struct sk_buff *skb,
-						      const struct net_device *in,
-						      const struct net_device *out,
+						      const struct nf_hook_state *state,
 						      struct nf_conn *ct));
 
 unsigned int nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops,
 				  struct sk_buff *skb,
-				  const struct net_device *in,
-				  const struct net_device *out,
+				  const struct nf_hook_state *state,
 				  unsigned int (*do_chain)(const struct nf_hook_ops *ops,
 							   struct sk_buff *skb,
-							   const struct net_device *in,
-							   const struct net_device *out,
+							   const struct nf_hook_state *state,
 							   struct nf_conn *ct));
 
 unsigned int nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
-			    const struct net_device *in,
-			    const struct net_device *out,
+			    const struct nf_hook_state *state,
 			    unsigned int (*do_chain)(const struct nf_hook_ops *ops,
 						     struct sk_buff *skb,
-						     const struct net_device *in,
-						     const struct net_device *out,
+						     const struct nf_hook_state *state,
 						     struct nf_conn *ct));
 
 int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, struct nf_conn *ct,
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 7a7fea4711e5..086e2311438e 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -30,45 +30,41 @@ static const struct xt_table nf_nat_ipv4_table = {
 
 static unsigned int iptable_nat_do_chain(const struct nf_hook_ops *ops,
 					 struct sk_buff *skb,
-					 const struct net_device *in,
-					 const struct net_device *out,
+					 const struct nf_hook_state *state,
 					 struct nf_conn *ct)
 {
 	struct net *net = nf_ct_net(ct);
 
-	return ipt_do_table(skb, ops->hooknum, in, out, net->ipv4.nat_table);
+	return ipt_do_table(skb, ops->hooknum, state->in, state->out,
+			    net->ipv4.nat_table);
 }
 
 static unsigned int iptable_nat_ipv4_fn(const struct nf_hook_ops *ops,
 					struct sk_buff *skb,
 					const struct nf_hook_state *state)
 {
-	return nf_nat_ipv4_fn(ops, skb, state->in, state->out,
-			      iptable_nat_do_chain);
+	return nf_nat_ipv4_fn(ops, skb, state, iptable_nat_do_chain);
 }
 
 static unsigned int iptable_nat_ipv4_in(const struct nf_hook_ops *ops,
 					struct sk_buff *skb,
 					const struct nf_hook_state *state)
 {
-	return nf_nat_ipv4_in(ops, skb, state->in, state->out,
-			      iptable_nat_do_chain);
+	return nf_nat_ipv4_in(ops, skb, state, iptable_nat_do_chain);
 }
 
 static unsigned int iptable_nat_ipv4_out(const struct nf_hook_ops *ops,
 					 struct sk_buff *skb,
 					 const struct nf_hook_state *state)
 {
-	return nf_nat_ipv4_out(ops, skb, state->in, state->out,
-			       iptable_nat_do_chain);
+	return nf_nat_ipv4_out(ops, skb, state, iptable_nat_do_chain);
 }
 
 static unsigned int iptable_nat_ipv4_local_fn(const struct nf_hook_ops *ops,
 					      struct sk_buff *skb,
 					      const struct nf_hook_state *state)
 {
-	return nf_nat_ipv4_local_fn(ops, skb, state->in, state->out,
-				    iptable_nat_do_chain);
+	return nf_nat_ipv4_local_fn(ops, skb, state, iptable_nat_do_chain);
 }
 
 static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index fc37711e11f3..e59cc05c09e9 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -256,11 +256,10 @@ EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
 
 unsigned int
 nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
-	       const struct net_device *in, const struct net_device *out,
+	       const struct nf_hook_state *state,
 	       unsigned int (*do_chain)(const struct nf_hook_ops *ops,
 					struct sk_buff *skb,
-					const struct net_device *in,
-					const struct net_device *out,
+					const struct nf_hook_state *state,
 					struct nf_conn *ct))
 {
 	struct nf_conn *ct;
@@ -309,7 +308,7 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		if (!nf_nat_initialized(ct, maniptype)) {
 			unsigned int ret;
 
-			ret = do_chain(ops, skb, in, out, ct);
+			ret = do_chain(ops, skb, state, ct);
 			if (ret != NF_ACCEPT)
 				return ret;
 
@@ -323,7 +322,8 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
 			pr_debug("Already setup manip %s for ct %p\n",
 				 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
 				 ct);
-			if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
+			if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat,
+					       state->out))
 				goto oif_changed;
 		}
 		break;
@@ -332,7 +332,7 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		/* ESTABLISHED */
 		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
 			     ctinfo == IP_CT_ESTABLISHED_REPLY);
-		if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
+		if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, state->out))
 			goto oif_changed;
 	}
 
@@ -346,17 +346,16 @@ EXPORT_SYMBOL_GPL(nf_nat_ipv4_fn);
 
 unsigned int
 nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
-	       const struct net_device *in, const struct net_device *out,
+	       const struct nf_hook_state *state,
 	       unsigned int (*do_chain)(const struct nf_hook_ops *ops,
 					 struct sk_buff *skb,
-					 const struct net_device *in,
-					 const struct net_device *out,
+					 const struct nf_hook_state *state,
 					 struct nf_conn *ct))
 {
 	unsigned int ret;
 	__be32 daddr = ip_hdr(skb)->daddr;
 
-	ret = nf_nat_ipv4_fn(ops, skb, in, out, do_chain);
+	ret = nf_nat_ipv4_fn(ops, skb, state, do_chain);
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    daddr != ip_hdr(skb)->daddr)
 		skb_dst_drop(skb);
@@ -367,11 +366,10 @@ EXPORT_SYMBOL_GPL(nf_nat_ipv4_in);
 
 unsigned int
 nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
-		const struct net_device *in, const struct net_device *out,
+		const struct nf_hook_state *state,
 		unsigned int (*do_chain)(const struct nf_hook_ops *ops,
 					  struct sk_buff *skb,
-					  const struct net_device *in,
-					  const struct net_device *out,
+					  const struct nf_hook_state *state,
 					  struct nf_conn *ct))
 {
 #ifdef CONFIG_XFRM
@@ -386,7 +384,7 @@ nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
 	    ip_hdrlen(skb) < sizeof(struct iphdr))
 		return NF_ACCEPT;
 
-	ret = nf_nat_ipv4_fn(ops, skb, in, out, do_chain);
+	ret = nf_nat_ipv4_fn(ops, skb, state, do_chain);
 #ifdef CONFIG_XFRM
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
@@ -410,11 +408,10 @@ EXPORT_SYMBOL_GPL(nf_nat_ipv4_out);
 
 unsigned int
 nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
-		     const struct net_device *in, const struct net_device *out,
+		     const struct nf_hook_state *state,
 		     unsigned int (*do_chain)(const struct nf_hook_ops *ops,
 					       struct sk_buff *skb,
-					       const struct net_device *in,
-					       const struct net_device *out,
+					       const struct nf_hook_state *state,
 					       struct nf_conn *ct))
 {
 	const struct nf_conn *ct;
@@ -427,7 +424,7 @@ nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
 	    ip_hdrlen(skb) < sizeof(struct iphdr))
 		return NF_ACCEPT;
 
-	ret = nf_nat_ipv4_fn(ops, skb, in, out, do_chain);
+	ret = nf_nat_ipv4_fn(ops, skb, state, do_chain);
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
index d08db6b0fcc3..f56bbb1ab1a0 100644
--- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -28,13 +28,12 @@
 
 static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops,
 				      struct sk_buff *skb,
-				      const struct net_device *in,
-				      const struct net_device *out,
+				      const struct nf_hook_state *state,
 				      struct nf_conn *ct)
 {
 	struct nft_pktinfo pkt;
 
-	nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out);
+	nft_set_pktinfo_ipv4(&pkt, ops, skb, state->in, state->out);
 
 	return nft_do_chain(&pkt, ops);
 }
@@ -43,29 +42,28 @@ static unsigned int nft_nat_ipv4_fn(const struct nf_hook_ops *ops,
 				    struct sk_buff *skb,
 				    const struct nf_hook_state *state)
 {
-	return nf_nat_ipv4_fn(ops, skb, state->in, state->out, nft_nat_do_chain);
+	return nf_nat_ipv4_fn(ops, skb, state, nft_nat_do_chain);
 }
 
 static unsigned int nft_nat_ipv4_in(const struct nf_hook_ops *ops,
 				    struct sk_buff *skb,
 				    const struct nf_hook_state *state)
 {
-	return nf_nat_ipv4_in(ops, skb, state->in, state->out, nft_nat_do_chain);
+	return nf_nat_ipv4_in(ops, skb, state, nft_nat_do_chain);
 }
 
 static unsigned int nft_nat_ipv4_out(const struct nf_hook_ops *ops,
 				     struct sk_buff *skb,
 				     const struct nf_hook_state *state)
 {
-	return nf_nat_ipv4_out(ops, skb, state->in, state->out, nft_nat_do_chain);
+	return nf_nat_ipv4_out(ops, skb, state, nft_nat_do_chain);
 }
 
 static unsigned int nft_nat_ipv4_local_fn(const struct nf_hook_ops *ops,
 					  struct sk_buff *skb,
 					  const struct nf_hook_state *state)
 {
-	return nf_nat_ipv4_local_fn(ops, skb, state->in, state->out,
-				    nft_nat_do_chain);
+	return nf_nat_ipv4_local_fn(ops, skb, state, nft_nat_do_chain);
 }
 
 static const struct nf_chain_type nft_chain_nat_ipv4 = {
-- 
cgit v1.2.3


From 1c491ba2592f621f21a693d43fab06302527fc0f Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 3 Apr 2015 20:56:08 -0400
Subject: netfilter: Pass nf_hook_state through ipt_do_table().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4/ip_tables.h |  3 +--
 net/ipv4/netfilter/ip_tables.c           | 13 ++++++-------
 net/ipv4/netfilter/iptable_filter.c      |  3 +--
 net/ipv4/netfilter/iptable_mangle.c      | 11 ++++++-----
 net/ipv4/netfilter/iptable_nat.c         |  3 +--
 net/ipv4/netfilter/iptable_raw.c         |  3 +--
 net/ipv4/netfilter/iptable_security.c    |  2 +-
 7 files changed, 17 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index 901e84db847d..4073510da485 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -65,8 +65,7 @@ struct ipt_error {
 extern void *ipt_alloc_initial_table(const struct xt_table *);
 extern unsigned int ipt_do_table(struct sk_buff *skb,
 				 unsigned int hook,
-				 const struct net_device *in,
-				 const struct net_device *out,
+				 const struct nf_hook_state *state,
 				 struct xt_table *table);
 
 #ifdef CONFIG_COMPAT
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index cf5e82f39d3b..c69db7fa25ee 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -288,8 +288,7 @@ struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry)
 unsigned int
 ipt_do_table(struct sk_buff *skb,
 	     unsigned int hook,
-	     const struct net_device *in,
-	     const struct net_device *out,
+	     const struct nf_hook_state *state,
 	     struct xt_table *table)
 {
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
@@ -306,8 +305,8 @@ ipt_do_table(struct sk_buff *skb,
 
 	/* Initialization */
 	ip = ip_hdr(skb);
-	indev = in ? in->name : nulldevname;
-	outdev = out ? out->name : nulldevname;
+	indev = state->in ? state->in->name : nulldevname;
+	outdev = state->out ? state->out->name : nulldevname;
 	/* We handle fragments by dealing with the first fragment as
 	 * if it was a normal packet.  All other fragments are treated
 	 * normally, except that they will NEVER match rules that ask
@@ -317,8 +316,8 @@ ipt_do_table(struct sk_buff *skb,
 	acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
 	acpar.thoff   = ip_hdrlen(skb);
 	acpar.hotdrop = false;
-	acpar.in      = in;
-	acpar.out     = out;
+	acpar.in      = state->in;
+	acpar.out     = state->out;
 	acpar.family  = NFPROTO_IPV4;
 	acpar.hooknum = hook;
 
@@ -370,7 +369,7 @@ ipt_do_table(struct sk_buff *skb,
 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
 		/* The packet is traced: log it */
 		if (unlikely(skb->nf_trace))
-			trace_packet(skb, hook, in, out,
+			trace_packet(skb, hook, state->in, state->out,
 				     table->name, private, e);
 #endif
 		/* Standard target? */
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 1df0d42bfd39..a0f3beca52d2 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -45,8 +45,7 @@ iptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		return NF_ACCEPT;
 
 	net = dev_net(state->in ? state->in : state->out);
-	return ipt_do_table(skb, ops->hooknum, state->in, state->out,
-			    net->ipv4.iptable_filter);
+	return ipt_do_table(skb, ops->hooknum, state, net->ipv4.iptable_filter);
 }
 
 static struct nf_hook_ops *filter_ops __read_mostly;
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 7a825e740045..62cbb8c5f4a8 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -37,8 +37,9 @@ static const struct xt_table packet_mangler = {
 };
 
 static unsigned int
-ipt_mangle_out(struct sk_buff *skb, const struct net_device *out)
+ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
 {
+	struct net_device *out = state->out;
 	unsigned int ret;
 	const struct iphdr *iph;
 	u_int8_t tos;
@@ -58,7 +59,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out)
 	daddr = iph->daddr;
 	tos = iph->tos;
 
-	ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, NULL, out,
+	ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, state,
 			   dev_net(out)->ipv4.iptable_mangle);
 	/* Reroute for ANY change. */
 	if (ret != NF_DROP && ret != NF_STOLEN) {
@@ -84,12 +85,12 @@ iptable_mangle_hook(const struct nf_hook_ops *ops,
 		     const struct nf_hook_state *state)
 {
 	if (ops->hooknum == NF_INET_LOCAL_OUT)
-		return ipt_mangle_out(skb, state->out);
+		return ipt_mangle_out(skb, state);
 	if (ops->hooknum == NF_INET_POST_ROUTING)
-		return ipt_do_table(skb, ops->hooknum, state->in, state->out,
+		return ipt_do_table(skb, ops->hooknum, state,
 				    dev_net(state->out)->ipv4.iptable_mangle);
 	/* PREROUTING/INPUT/FORWARD: */
-	return ipt_do_table(skb, ops->hooknum, state->in, state->out,
+	return ipt_do_table(skb, ops->hooknum, state,
 			    dev_net(state->in)->ipv4.iptable_mangle);
 }
 
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 086e2311438e..0d4d9cdf98a4 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -35,8 +35,7 @@ static unsigned int iptable_nat_do_chain(const struct nf_hook_ops *ops,
 {
 	struct net *net = nf_ct_net(ct);
 
-	return ipt_do_table(skb, ops->hooknum, state->in, state->out,
-			    net->ipv4.nat_table);
+	return ipt_do_table(skb, ops->hooknum, state, net->ipv4.nat_table);
 }
 
 static unsigned int iptable_nat_ipv4_fn(const struct nf_hook_ops *ops,
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index fac8f607c70b..0356e6da4bb7 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -32,8 +32,7 @@ iptable_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		return NF_ACCEPT;
 
 	net = dev_net(state->in ? state->in : state->out);
-	return ipt_do_table(skb, ops->hooknum, state->in, state->out,
-			    net->ipv4.iptable_raw);
+	return ipt_do_table(skb, ops->hooknum, state, net->ipv4.iptable_raw);
 }
 
 static struct nf_hook_ops *rawtable_ops __read_mostly;
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index d9ad60a57413..4bce3980ccd9 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -49,7 +49,7 @@ iptable_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		return NF_ACCEPT;
 
 	net = dev_net(state->in ? state->in : state->out);
-	return ipt_do_table(skb, ops->hooknum, state->in, state->out,
+	return ipt_do_table(skb, ops->hooknum, state,
 			    net->ipv4.iptable_security);
 }
 
-- 
cgit v1.2.3


From 8fe22382d1e73dc0ded8098ccf761c986149f72b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 3 Apr 2015 21:05:07 -0400
Subject: netfilter: Pass nf_hook_state through
 nf_nat_ipv6_{in,out,fn,local_fn}().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netfilter/nf_nat_l3proto.h   | 24 ++++++++----------------
 net/ipv6/netfilter/ip6table_nat.c        | 18 +++++++-----------
 net/ipv6/netfilter/nf_nat_l3proto_ipv6.c | 32 ++++++++++++++------------------
 net/ipv6/netfilter/nft_chain_nat_ipv6.c  | 13 ++++++-------
 4 files changed, 35 insertions(+), 52 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_nat_l3proto.h b/include/net/netfilter/nf_nat_l3proto.h
index e596f35f14c9..a3127325f624 100644
--- a/include/net/netfilter/nf_nat_l3proto.h
+++ b/include/net/netfilter/nf_nat_l3proto.h
@@ -77,40 +77,32 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, struct nf_conn *ct,
 				    unsigned int hooknum, unsigned int hdrlen);
 
 unsigned int nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
-			    const struct net_device *in,
-			    const struct net_device *out,
+			    const struct nf_hook_state *state,
 			    unsigned int (*do_chain)(const struct nf_hook_ops *ops,
 						     struct sk_buff *skb,
-						     const struct net_device *in,
-						     const struct net_device *out,
+						     const struct nf_hook_state *state,
 						     struct nf_conn *ct));
 
 unsigned int nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
-			     const struct net_device *in,
-			     const struct net_device *out,
+			     const struct nf_hook_state *state,
 			     unsigned int (*do_chain)(const struct nf_hook_ops *ops,
 						      struct sk_buff *skb,
-						      const struct net_device *in,
-						      const struct net_device *out,
+						      const struct nf_hook_state *state,
 						      struct nf_conn *ct));
 
 unsigned int nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops,
 				  struct sk_buff *skb,
-				  const struct net_device *in,
-				  const struct net_device *out,
+				  const struct nf_hook_state *state,
 				  unsigned int (*do_chain)(const struct nf_hook_ops *ops,
 							   struct sk_buff *skb,
-							   const struct net_device *in,
-							   const struct net_device *out,
+							   const struct nf_hook_state *state,
 							   struct nf_conn *ct));
 
 unsigned int nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
-			    const struct net_device *in,
-			    const struct net_device *out,
+			    const struct nf_hook_state *state,
 			    unsigned int (*do_chain)(const struct nf_hook_ops *ops,
 						     struct sk_buff *skb,
-						     const struct net_device *in,
-						     const struct net_device *out,
+						     const struct nf_hook_state *state,
 						     struct nf_conn *ct));
 
 #endif /* _NF_NAT_L3PROTO_H */
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index e32b0d0315e6..d78f69c7abce 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -32,45 +32,41 @@ static const struct xt_table nf_nat_ipv6_table = {
 
 static unsigned int ip6table_nat_do_chain(const struct nf_hook_ops *ops,
 					  struct sk_buff *skb,
-					  const struct net_device *in,
-					  const struct net_device *out,
+					  const struct nf_hook_state *state,
 					  struct nf_conn *ct)
 {
 	struct net *net = nf_ct_net(ct);
 
-	return ip6t_do_table(skb, ops->hooknum, in, out, net->ipv6.ip6table_nat);
+	return ip6t_do_table(skb, ops->hooknum, state->in, state->out,
+			     net->ipv6.ip6table_nat);
 }
 
 static unsigned int ip6table_nat_fn(const struct nf_hook_ops *ops,
 				    struct sk_buff *skb,
 				    const struct nf_hook_state *state)
 {
-	return nf_nat_ipv6_fn(ops, skb, state->in, state->out,
-			      ip6table_nat_do_chain);
+	return nf_nat_ipv6_fn(ops, skb, state, ip6table_nat_do_chain);
 }
 
 static unsigned int ip6table_nat_in(const struct nf_hook_ops *ops,
 				    struct sk_buff *skb,
 				    const struct nf_hook_state *state)
 {
-	return nf_nat_ipv6_in(ops, skb, state->in, state->out,
-			      ip6table_nat_do_chain);
+	return nf_nat_ipv6_in(ops, skb, state, ip6table_nat_do_chain);
 }
 
 static unsigned int ip6table_nat_out(const struct nf_hook_ops *ops,
 				     struct sk_buff *skb,
 				     const struct nf_hook_state *state)
 {
-	return nf_nat_ipv6_out(ops, skb, state->in, state->out,
-			       ip6table_nat_do_chain);
+	return nf_nat_ipv6_out(ops, skb, state, ip6table_nat_do_chain);
 }
 
 static unsigned int ip6table_nat_local_fn(const struct nf_hook_ops *ops,
 					  struct sk_buff *skb,
 					  const struct nf_hook_state *state)
 {
-	return nf_nat_ipv6_local_fn(ops, skb, state->in, state->out,
-				    ip6table_nat_do_chain);
+	return nf_nat_ipv6_local_fn(ops, skb, state, ip6table_nat_do_chain);
 }
 
 static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index c5812e1c1ffb..e76900e0aa92 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -263,11 +263,10 @@ EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
 
 unsigned int
 nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
-	       const struct net_device *in, const struct net_device *out,
+	       const struct nf_hook_state *state,
 	       unsigned int (*do_chain)(const struct nf_hook_ops *ops,
 					struct sk_buff *skb,
-					const struct net_device *in,
-					const struct net_device *out,
+					const struct nf_hook_state *state,
 					struct nf_conn *ct))
 {
 	struct nf_conn *ct;
@@ -318,7 +317,7 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		if (!nf_nat_initialized(ct, maniptype)) {
 			unsigned int ret;
 
-			ret = do_chain(ops, skb, in, out, ct);
+			ret = do_chain(ops, skb, state, ct);
 			if (ret != NF_ACCEPT)
 				return ret;
 
@@ -332,7 +331,7 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
 			pr_debug("Already setup manip %s for ct %p\n",
 				 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
 				 ct);
-			if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
+			if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, state->out))
 				goto oif_changed;
 		}
 		break;
@@ -341,7 +340,7 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		/* ESTABLISHED */
 		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
 			     ctinfo == IP_CT_ESTABLISHED_REPLY);
-		if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
+		if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, state->out))
 			goto oif_changed;
 	}
 
@@ -355,17 +354,16 @@ EXPORT_SYMBOL_GPL(nf_nat_ipv6_fn);
 
 unsigned int
 nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
-	       const struct net_device *in, const struct net_device *out,
+	       const struct nf_hook_state *state,
 	       unsigned int (*do_chain)(const struct nf_hook_ops *ops,
 					struct sk_buff *skb,
-					const struct net_device *in,
-					const struct net_device *out,
+					const struct nf_hook_state *state,
 					struct nf_conn *ct))
 {
 	unsigned int ret;
 	struct in6_addr daddr = ipv6_hdr(skb)->daddr;
 
-	ret = nf_nat_ipv6_fn(ops, skb, in, out, do_chain);
+	ret = nf_nat_ipv6_fn(ops, skb, state, do_chain);
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
 		skb_dst_drop(skb);
@@ -376,11 +374,10 @@ EXPORT_SYMBOL_GPL(nf_nat_ipv6_in);
 
 unsigned int
 nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
-		const struct net_device *in, const struct net_device *out,
+		const struct nf_hook_state *state,
 		unsigned int (*do_chain)(const struct nf_hook_ops *ops,
 					 struct sk_buff *skb,
-					 const struct net_device *in,
-					 const struct net_device *out,
+					 const struct nf_hook_state *state,
 					 struct nf_conn *ct))
 {
 #ifdef CONFIG_XFRM
@@ -394,7 +391,7 @@ nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
 	if (skb->len < sizeof(struct ipv6hdr))
 		return NF_ACCEPT;
 
-	ret = nf_nat_ipv6_fn(ops, skb, in, out, do_chain);
+	ret = nf_nat_ipv6_fn(ops, skb, state, do_chain);
 #ifdef CONFIG_XFRM
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
@@ -418,11 +415,10 @@ EXPORT_SYMBOL_GPL(nf_nat_ipv6_out);
 
 unsigned int
 nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
-		     const struct net_device *in, const struct net_device *out,
+		     const struct nf_hook_state *state,
 		     unsigned int (*do_chain)(const struct nf_hook_ops *ops,
 					      struct sk_buff *skb,
-					      const struct net_device *in,
-					      const struct net_device *out,
+					      const struct nf_hook_state *state,
 					      struct nf_conn *ct))
 {
 	const struct nf_conn *ct;
@@ -434,7 +430,7 @@ nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
 	if (skb->len < sizeof(struct ipv6hdr))
 		return NF_ACCEPT;
 
-	ret = nf_nat_ipv6_fn(ops, skb, in, out, do_chain);
+	ret = nf_nat_ipv6_fn(ops, skb, state, do_chain);
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
index f73f4ae25bc2..11c6f75fcc4d 100644
--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -26,13 +26,12 @@
 
 static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops,
 				     struct sk_buff *skb,
-				     const struct net_device *in,
-				     const struct net_device *out,
+				     const struct nf_hook_state *state,
 				     struct nf_conn *ct)
 {
 	struct nft_pktinfo pkt;
 
-	nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out);
+	nft_set_pktinfo_ipv6(&pkt, ops, skb, state->in, state->out);
 
 	return nft_do_chain(&pkt, ops);
 }
@@ -41,28 +40,28 @@ static unsigned int nft_nat_ipv6_fn(const struct nf_hook_ops *ops,
 				    struct sk_buff *skb,
 				    const struct nf_hook_state *state)
 {
-	return nf_nat_ipv6_fn(ops, skb, state->in, state->out, nft_nat_do_chain);
+	return nf_nat_ipv6_fn(ops, skb, state, nft_nat_do_chain);
 }
 
 static unsigned int nft_nat_ipv6_in(const struct nf_hook_ops *ops,
 				    struct sk_buff *skb,
 				    const struct nf_hook_state *state)
 {
-	return nf_nat_ipv6_in(ops, skb, state->in, state->out, nft_nat_do_chain);
+	return nf_nat_ipv6_in(ops, skb, state, nft_nat_do_chain);
 }
 
 static unsigned int nft_nat_ipv6_out(const struct nf_hook_ops *ops,
 				     struct sk_buff *skb,
 				     const struct nf_hook_state *state)
 {
-	return nf_nat_ipv6_out(ops, skb, state->in, state->out, nft_nat_do_chain);
+	return nf_nat_ipv6_out(ops, skb, state, nft_nat_do_chain);
 }
 
 static unsigned int nft_nat_ipv6_local_fn(const struct nf_hook_ops *ops,
 					  struct sk_buff *skb,
 					  const struct nf_hook_state *state)
 {
-	return nf_nat_ipv6_local_fn(ops, skb, state->in, state->out, nft_nat_do_chain);
+	return nf_nat_ipv6_local_fn(ops, skb, state, nft_nat_do_chain);
 }
 
 static const struct nf_chain_type nft_chain_nat_ipv6 = {
-- 
cgit v1.2.3


From 8f8a37152df49d541c43f010543f2b0176fcfb8e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 3 Apr 2015 21:09:51 -0400
Subject: netfilter: Pass nf_hook_state through ip6t_do_table().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv6/ip6_tables.h |  3 +--
 net/ipv6/netfilter/ip6_tables.c           | 13 ++++++-------
 net/ipv6/netfilter/ip6table_filter.c      |  3 +--
 net/ipv6/netfilter/ip6table_mangle.c      | 12 ++++++------
 net/ipv6/netfilter/ip6table_nat.c         |  3 +--
 net/ipv6/netfilter/ip6table_raw.c         |  3 +--
 net/ipv6/netfilter/ip6table_security.c    |  2 +-
 7 files changed, 17 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index 610208b18c05..b40d2b635778 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -31,8 +31,7 @@ extern struct xt_table *ip6t_register_table(struct net *net,
 extern void ip6t_unregister_table(struct net *net, struct xt_table *table);
 extern unsigned int ip6t_do_table(struct sk_buff *skb,
 				  unsigned int hook,
-				  const struct net_device *in,
-				  const struct net_device *out,
+				  const struct nf_hook_state *state,
 				  struct xt_table *table);
 
 /* Check for an extension */
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 83f59dc3cccc..1a732a1d3c8e 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -317,8 +317,7 @@ ip6t_next_entry(const struct ip6t_entry *entry)
 unsigned int
 ip6t_do_table(struct sk_buff *skb,
 	      unsigned int hook,
-	      const struct net_device *in,
-	      const struct net_device *out,
+	      const struct nf_hook_state *state,
 	      struct xt_table *table)
 {
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
@@ -333,8 +332,8 @@ ip6t_do_table(struct sk_buff *skb,
 	unsigned int addend;
 
 	/* Initialization */
-	indev = in ? in->name : nulldevname;
-	outdev = out ? out->name : nulldevname;
+	indev = state->in ? state->in->name : nulldevname;
+	outdev = state->out ? state->out->name : nulldevname;
 	/* We handle fragments by dealing with the first fragment as
 	 * if it was a normal packet.  All other fragments are treated
 	 * normally, except that they will NEVER match rules that ask
@@ -342,8 +341,8 @@ ip6t_do_table(struct sk_buff *skb,
 	 * rule is also a fragment-specific rule, non-fragments won't
 	 * match it. */
 	acpar.hotdrop = false;
-	acpar.in      = in;
-	acpar.out     = out;
+	acpar.in      = state->in;
+	acpar.out     = state->out;
 	acpar.family  = NFPROTO_IPV6;
 	acpar.hooknum = hook;
 
@@ -393,7 +392,7 @@ ip6t_do_table(struct sk_buff *skb,
 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
 		/* The packet is traced: log it */
 		if (unlikely(skb->nf_trace))
-			trace_packet(skb, hook, in, out,
+			trace_packet(skb, hook, state->in, state->out,
 				     table->name, private, e);
 #endif
 		/* Standard target? */
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index eb9ef093454f..5c33d8abc077 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -37,8 +37,7 @@ ip6table_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 {
 	const struct net *net = dev_net(state->in ? state->in : state->out);
 
-	return ip6t_do_table(skb, ops->hooknum, state->in, state->out,
-			     net->ipv6.ip6table_filter);
+	return ip6t_do_table(skb, ops->hooknum, state, net->ipv6.ip6table_filter);
 }
 
 static struct nf_hook_ops *filter_ops __read_mostly;
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index e713b8d3dbbc..b551f5b79fe2 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -32,7 +32,7 @@ static const struct xt_table packet_mangler = {
 };
 
 static unsigned int
-ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out)
+ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
 {
 	unsigned int ret;
 	struct in6_addr saddr, daddr;
@@ -57,8 +57,8 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out)
 	/* flowlabel and prio (includes version, which shouldn't change either */
 	flowlabel = *((u_int32_t *)ipv6_hdr(skb));
 
-	ret = ip6t_do_table(skb, NF_INET_LOCAL_OUT, NULL, out,
-			    dev_net(out)->ipv6.ip6table_mangle);
+	ret = ip6t_do_table(skb, NF_INET_LOCAL_OUT, state,
+			    dev_net(state->out)->ipv6.ip6table_mangle);
 
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    (!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) ||
@@ -80,12 +80,12 @@ ip6table_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		     const struct nf_hook_state *state)
 {
 	if (ops->hooknum == NF_INET_LOCAL_OUT)
-		return ip6t_mangle_out(skb, state->out);
+		return ip6t_mangle_out(skb, state);
 	if (ops->hooknum == NF_INET_POST_ROUTING)
-		return ip6t_do_table(skb, ops->hooknum, state->in, state->out,
+		return ip6t_do_table(skb, ops->hooknum, state,
 				     dev_net(state->out)->ipv6.ip6table_mangle);
 	/* INPUT/FORWARD */
-	return ip6t_do_table(skb, ops->hooknum, state->in, state->out,
+	return ip6t_do_table(skb, ops->hooknum, state,
 			     dev_net(state->in)->ipv6.ip6table_mangle);
 }
 
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index d78f69c7abce..c3a7f7af0ed4 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -37,8 +37,7 @@ static unsigned int ip6table_nat_do_chain(const struct nf_hook_ops *ops,
 {
 	struct net *net = nf_ct_net(ct);
 
-	return ip6t_do_table(skb, ops->hooknum, state->in, state->out,
-			     net->ipv6.ip6table_nat);
+	return ip6t_do_table(skb, ops->hooknum, state, net->ipv6.ip6table_nat);
 }
 
 static unsigned int ip6table_nat_fn(const struct nf_hook_ops *ops,
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 937908e25862..0b33caad2b69 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -24,8 +24,7 @@ ip6table_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 {
 	const struct net *net = dev_net(state->in ? state->in : state->out);
 
-	return ip6t_do_table(skb, ops->hooknum, state->in, state->out,
-			     net->ipv6.ip6table_raw);
+	return ip6t_do_table(skb, ops->hooknum, state, net->ipv6.ip6table_raw);
 }
 
 static struct nf_hook_ops *rawtable_ops __read_mostly;
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index f33b41e8e294..fcef83c25f7b 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -41,7 +41,7 @@ ip6table_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 {
 	const struct net *net = dev_net(state->in ? state->in : state->out);
 
-	return ip6t_do_table(skb, ops->hooknum, state->in, state->out,
+	return ip6t_do_table(skb, ops->hooknum, state,
 			     net->ipv6.ip6table_security);
 }
 
-- 
cgit v1.2.3


From 073bfd56860446a2cb349bcf282fc17a36ca386c Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 3 Apr 2015 21:16:25 -0400
Subject: netfilter: Pass nf_hook_state through nft_set_pktinfo*().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netfilter/nf_tables.h         |  7 +++----
 include/net/netfilter/nf_tables_ipv4.h    |  5 ++---
 include/net/netfilter/nf_tables_ipv6.h    |  5 ++---
 net/bridge/netfilter/nf_tables_bridge.c   | 24 +++++++++++-------------
 net/ipv4/netfilter/nf_tables_arp.c        |  2 +-
 net/ipv4/netfilter/nf_tables_ipv4.c       |  2 +-
 net/ipv4/netfilter/nft_chain_nat_ipv4.c   |  2 +-
 net/ipv4/netfilter/nft_chain_route_ipv4.c |  2 +-
 net/ipv6/netfilter/nf_tables_ipv6.c       |  2 +-
 net/ipv6/netfilter/nft_chain_nat_ipv6.c   |  2 +-
 net/ipv6/netfilter/nft_chain_route_ipv6.c |  2 +-
 11 files changed, 25 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index b8cd60dcb4e1..804981980393 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -26,12 +26,11 @@ struct nft_pktinfo {
 static inline void nft_set_pktinfo(struct nft_pktinfo *pkt,
 				   const struct nf_hook_ops *ops,
 				   struct sk_buff *skb,
-				   const struct net_device *in,
-				   const struct net_device *out)
+				   const struct nf_hook_state *state)
 {
 	pkt->skb = skb;
-	pkt->in = pkt->xt.in = in;
-	pkt->out = pkt->xt.out = out;
+	pkt->in = pkt->xt.in = state->in;
+	pkt->out = pkt->xt.out = state->out;
 	pkt->ops = ops;
 	pkt->xt.hooknum = ops->hooknum;
 	pkt->xt.family = ops->pf;
diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h
index cba143fbd2e4..2df7f96902ee 100644
--- a/include/net/netfilter/nf_tables_ipv4.h
+++ b/include/net/netfilter/nf_tables_ipv4.h
@@ -8,12 +8,11 @@ static inline void
 nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
 		     const struct nf_hook_ops *ops,
 		     struct sk_buff *skb,
-		     const struct net_device *in,
-		     const struct net_device *out)
+		     const struct nf_hook_state *state)
 {
 	struct iphdr *ip;
 
-	nft_set_pktinfo(pkt, ops, skb, in, out);
+	nft_set_pktinfo(pkt, ops, skb, state);
 
 	ip = ip_hdr(pkt->skb);
 	pkt->tprot = ip->protocol;
diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h
index 74d976137658..97db2e3a5e65 100644
--- a/include/net/netfilter/nf_tables_ipv6.h
+++ b/include/net/netfilter/nf_tables_ipv6.h
@@ -8,13 +8,12 @@ static inline int
 nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
 		     const struct nf_hook_ops *ops,
 		     struct sk_buff *skb,
-		     const struct net_device *in,
-		     const struct net_device *out)
+		     const struct nf_hook_state *state)
 {
 	int protohdr, thoff = 0;
 	unsigned short frag_off;
 
-	nft_set_pktinfo(pkt, ops, skb, in, out);
+	nft_set_pktinfo(pkt, ops, skb, state);
 
 	protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL);
 	/* If malformed, drop it */
diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c
index 2c46a47160a8..a343e62442b1 100644
--- a/net/bridge/netfilter/nf_tables_bridge.c
+++ b/net/bridge/netfilter/nf_tables_bridge.c
@@ -67,27 +67,25 @@ EXPORT_SYMBOL_GPL(nft_bridge_ip6hdr_validate);
 static inline void nft_bridge_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
 					       const struct nf_hook_ops *ops,
 					       struct sk_buff *skb,
-					       const struct net_device *in,
-					       const struct net_device *out)
+					       const struct nf_hook_state *state)
 {
 	if (nft_bridge_iphdr_validate(skb))
-		nft_set_pktinfo_ipv4(pkt, ops, skb, in, out);
+		nft_set_pktinfo_ipv4(pkt, ops, skb, state);
 	else
-		nft_set_pktinfo(pkt, ops, skb, in, out);
+		nft_set_pktinfo(pkt, ops, skb, state);
 }
 
 static inline void nft_bridge_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
-					      const struct nf_hook_ops *ops,
-					      struct sk_buff *skb,
-					      const struct net_device *in,
-					      const struct net_device *out)
+					       const struct nf_hook_ops *ops,
+					       struct sk_buff *skb,
+					       const struct nf_hook_state *state)
 {
 #if IS_ENABLED(CONFIG_IPV6)
 	if (nft_bridge_ip6hdr_validate(skb) &&
-	    nft_set_pktinfo_ipv6(pkt, ops, skb, in, out) == 0)
+	    nft_set_pktinfo_ipv6(pkt, ops, skb, state) == 0)
 		return;
 #endif
-	nft_set_pktinfo(pkt, ops, skb, in, out);
+	nft_set_pktinfo(pkt, ops, skb, state);
 }
 
 static unsigned int
@@ -99,13 +97,13 @@ nft_do_chain_bridge(const struct nf_hook_ops *ops,
 
 	switch (eth_hdr(skb)->h_proto) {
 	case htons(ETH_P_IP):
-		nft_bridge_set_pktinfo_ipv4(&pkt, ops, skb, state->in, state->out);
+		nft_bridge_set_pktinfo_ipv4(&pkt, ops, skb, state);
 		break;
 	case htons(ETH_P_IPV6):
-		nft_bridge_set_pktinfo_ipv6(&pkt, ops, skb, state->in, state->out);
+		nft_bridge_set_pktinfo_ipv6(&pkt, ops, skb, state);
 		break;
 	default:
-		nft_set_pktinfo(&pkt, ops, skb, state->in, state->out);
+		nft_set_pktinfo(&pkt, ops, skb, state);
 		break;
 	}
 
diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c
index fceb50e1e87d..8412268bbad1 100644
--- a/net/ipv4/netfilter/nf_tables_arp.c
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -21,7 +21,7 @@ nft_do_chain_arp(const struct nf_hook_ops *ops,
 {
 	struct nft_pktinfo pkt;
 
-	nft_set_pktinfo(&pkt, ops, skb, state->in, state->out);
+	nft_set_pktinfo(&pkt, ops, skb, state);
 
 	return nft_do_chain(&pkt, ops);
 }
diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c
index 708e388e3dbe..aa180d3a69a5 100644
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -24,7 +24,7 @@ static unsigned int nft_do_chain_ipv4(const struct nf_hook_ops *ops,
 {
 	struct nft_pktinfo pkt;
 
-	nft_set_pktinfo_ipv4(&pkt, ops, skb, state->in, state->out);
+	nft_set_pktinfo_ipv4(&pkt, ops, skb, state);
 
 	return nft_do_chain(&pkt, ops);
 }
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
index f56bbb1ab1a0..bf5c30ae14e4 100644
--- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -33,7 +33,7 @@ static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops,
 {
 	struct nft_pktinfo pkt;
 
-	nft_set_pktinfo_ipv4(&pkt, ops, skb, state->in, state->out);
+	nft_set_pktinfo_ipv4(&pkt, ops, skb, state);
 
 	return nft_do_chain(&pkt, ops);
 }
diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c
index 073d0776ae7f..e335b0afdaf3 100644
--- a/net/ipv4/netfilter/nft_chain_route_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
@@ -37,7 +37,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
 	    ip_hdrlen(skb) < sizeof(struct iphdr))
 		return NF_ACCEPT;
 
-	nft_set_pktinfo_ipv4(&pkt, ops, skb, state->in, state->out);
+	nft_set_pktinfo_ipv4(&pkt, ops, skb, state);
 
 	mark = skb->mark;
 	iph = ip_hdr(skb);
diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c
index 224bc8971a0b..c8148ba76d1a 100644
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -23,7 +23,7 @@ static unsigned int nft_do_chain_ipv6(const struct nf_hook_ops *ops,
 	struct nft_pktinfo pkt;
 
 	/* malformed packet, drop it */
-	if (nft_set_pktinfo_ipv6(&pkt, ops, skb, state->in, state->out) < 0)
+	if (nft_set_pktinfo_ipv6(&pkt, ops, skb, state) < 0)
 		return NF_DROP;
 
 	return nft_do_chain(&pkt, ops);
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
index 11c6f75fcc4d..951bb458b7bd 100644
--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -31,7 +31,7 @@ static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops,
 {
 	struct nft_pktinfo pkt;
 
-	nft_set_pktinfo_ipv6(&pkt, ops, skb, state->in, state->out);
+	nft_set_pktinfo_ipv6(&pkt, ops, skb, state);
 
 	return nft_do_chain(&pkt, ops);
 }
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
index c826c3c854b2..0dafdaac5e17 100644
--- a/net/ipv6/netfilter/nft_chain_route_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -33,7 +33,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
 	u32 mark, flowlabel;
 
 	/* malformed packet, drop it */
-	if (nft_set_pktinfo_ipv6(&pkt, ops, skb, state->in, state->out) < 0)
+	if (nft_set_pktinfo_ipv6(&pkt, ops, skb, state) < 0)
 		return NF_DROP;
 
 	/* save source/dest address, mark, hoplimit, flowlabel, priority */
-- 
cgit v1.2.3


From b85c3dc9bd5347ad9540ec8d103b7c049c48b7cd Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 3 Apr 2015 21:18:46 -0400
Subject: netfilter: Pass nf_hook_state through arpt_do_table().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_arp/arp_tables.h |  3 +--
 net/ipv4/netfilter/arp_tables.c          | 11 +++++------
 net/ipv4/netfilter/arptable_filter.c     |  2 +-
 3 files changed, 7 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h
index cfb7191e6efa..c22a7fb8d0df 100644
--- a/include/linux/netfilter_arp/arp_tables.h
+++ b/include/linux/netfilter_arp/arp_tables.h
@@ -54,8 +54,7 @@ extern struct xt_table *arpt_register_table(struct net *net,
 extern void arpt_unregister_table(struct xt_table *table);
 extern unsigned int arpt_do_table(struct sk_buff *skb,
 				  unsigned int hook,
-				  const struct net_device *in,
-				  const struct net_device *out,
+				  const struct nf_hook_state *state,
 				  struct xt_table *table);
 
 #ifdef CONFIG_COMPAT
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index f95b6f93814b..13bfe84bf3ca 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -248,8 +248,7 @@ struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry)
 
 unsigned int arpt_do_table(struct sk_buff *skb,
 			   unsigned int hook,
-			   const struct net_device *in,
-			   const struct net_device *out,
+			   const struct nf_hook_state *state,
 			   struct xt_table *table)
 {
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
@@ -265,8 +264,8 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 	if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
 		return NF_DROP;
 
-	indev = in ? in->name : nulldevname;
-	outdev = out ? out->name : nulldevname;
+	indev = state->in ? state->in->name : nulldevname;
+	outdev = state->out ? state->out->name : nulldevname;
 
 	local_bh_disable();
 	addend = xt_write_recseq_begin();
@@ -281,8 +280,8 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 	e = get_entry(table_base, private->hook_entry[hook]);
 	back = get_entry(table_base, private->underflow[hook]);
 
-	acpar.in      = in;
-	acpar.out     = out;
+	acpar.in      = state->in;
+	acpar.out     = state->out;
 	acpar.hooknum = hook;
 	acpar.family  = NFPROTO_ARP;
 	acpar.hotdrop = false;
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 6a641cb41062..93876d03120c 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -32,7 +32,7 @@ arptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
 {
 	const struct net *net = dev_net(state->in ? state->in : state->out);
 
-	return arpt_do_table(skb, ops->hooknum, state->in, state->out,
+	return arpt_do_table(skb, ops->hooknum, state,
 			     net->ipv4.arptable_filter);
 }
 
-- 
cgit v1.2.3


From f60e5990d9c1424af9dbca60a23ba2a1c7c1ce90 Mon Sep 17 00:00:00 2001
From: "hannes@stressinduktion.org" <hannes@stressinduktion.org>
Date: Wed, 1 Apr 2015 17:07:44 +0200
Subject: ipv6: protect skb->sk accesses from recursive dereference inside the
 stack

We should not consult skb->sk for output decisions in xmit recursion
levels > 0 in the stack. Otherwise local socket settings could influence
the result of e.g. tunnel encapsulation process.

ipv6 does not conform with this in three places:

1) ip6_fragment: we do consult ipv6_npinfo for frag_size

2) sk_mc_loop in ipv6 uses skb->sk and checks if we should
   loop the packet back to the local socket

3) ip6_skb_dst_mtu could query the settings from the user socket and
   force a wrong MTU

Furthermore:
In sk_mc_loop we could potentially land in WARN_ON(1) if we use a
PF_PACKET socket ontop of an IPv6-backed vxlan device.

Reuse xmit_recursion as we are currently only interested in protecting
tunnel devices.

Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  6 ++++++
 include/net/ip.h          | 16 ----------------
 include/net/ip6_route.h   |  3 ++-
 include/net/sock.h        |  2 ++
 net/core/dev.c            |  4 +++-
 net/core/sock.c           | 19 +++++++++++++++++++
 net/ipv6/ip6_output.c     |  3 ++-
 7 files changed, 34 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index dcf6ec27739b..278738873703 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2185,6 +2185,12 @@ void netdev_freemem(struct net_device *dev);
 void synchronize_net(void);
 int init_dummy_netdev(struct net_device *dev);
 
+DECLARE_PER_CPU(int, xmit_recursion);
+static inline int dev_recursion_level(void)
+{
+	return this_cpu_read(xmit_recursion);
+}
+
 struct net_device *dev_get_by_index(struct net *net, int ifindex);
 struct net_device *__dev_get_by_index(struct net *net, int ifindex);
 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
diff --git a/include/net/ip.h b/include/net/ip.h
index 025c61c0dffb..6cc1eafb153a 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -453,22 +453,6 @@ static __inline__ void inet_reset_saddr(struct sock *sk)
 
 #endif
 
-static inline int sk_mc_loop(struct sock *sk)
-{
-	if (!sk)
-		return 1;
-	switch (sk->sk_family) {
-	case AF_INET:
-		return inet_sk(sk)->mc_loop;
-#if IS_ENABLED(CONFIG_IPV6)
-	case AF_INET6:
-		return inet6_sk(sk)->mc_loop;
-#endif
-	}
-	WARN_ON(1);
-	return 1;
-}
-
 bool ip_call_ra_chain(struct sk_buff *skb);
 
 /*
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 1d09b46c1e48..eda131d179d9 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -174,7 +174,8 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
 
 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
 {
-	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
+	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
+				inet6_sk(skb->sk) : NULL;
 
 	return (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) ?
 	       skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
diff --git a/include/net/sock.h b/include/net/sock.h
index ab186b1d31ff..e4079c28e6b8 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1762,6 +1762,8 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie);
 
 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie);
 
+bool sk_mc_loop(struct sock *sk);
+
 static inline bool sk_can_gso(const struct sock *sk)
 {
 	return net_gso_ok(sk->sk_route_caps, sk->sk_gso_type);
diff --git a/net/core/dev.c b/net/core/dev.c
index 962ee9d71964..45109b70664e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2848,7 +2848,9 @@ static void skb_update_prio(struct sk_buff *skb)
 #define skb_update_prio(skb)
 #endif
 
-static DEFINE_PER_CPU(int, xmit_recursion);
+DEFINE_PER_CPU(int, xmit_recursion);
+EXPORT_SYMBOL(xmit_recursion);
+
 #define RECURSION_LIMIT 10
 
 /**
diff --git a/net/core/sock.c b/net/core/sock.c
index 78e89eb7eb70..71e3e5f1eaa0 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -653,6 +653,25 @@ static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
 		sock_reset_flag(sk, bit);
 }
 
+bool sk_mc_loop(struct sock *sk)
+{
+	if (dev_recursion_level())
+		return false;
+	if (!sk)
+		return true;
+	switch (sk->sk_family) {
+	case AF_INET:
+		return inet_sk(sk)->mc_loop;
+#if IS_ENABLED(CONFIG_IPV6)
+	case AF_INET6:
+		return inet6_sk(sk)->mc_loop;
+#endif
+	}
+	WARN_ON(1);
+	return true;
+}
+EXPORT_SYMBOL(sk_mc_loop);
+
 /*
  *	This is meant for all protocols to use and covers goings on
  *	at the socket level. Everything here is generic.
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 7e80b61b51ff..36cf0ab685a0 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -542,7 +542,8 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 {
 	struct sk_buff *frag;
 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
-	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
+	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
+				inet6_sk(skb->sk) : NULL;
 	struct ipv6hdr *tmp_hdr;
 	struct frag_hdr *fh;
 	unsigned int mtu, hlen, left, len;
-- 
cgit v1.2.3


From 91bc4822c3d61b9bb7ef66d3b77948a4f9177954 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Wed, 1 Apr 2015 17:12:13 -0700
Subject: tc: bpf: add checksum helpers

Commit 608cd71a9c7c ("tc: bpf: generalize pedit action") has added the
possibility to mangle packet data to BPF programs in the tc pipeline.
This patch adds two helpers bpf_l3_csum_replace() and bpf_l4_csum_replace()
for fixing up the protocol checksums after the packet mangling.

It also adds 'flags' argument to bpf_skb_store_bytes() helper to avoid
unnecessary checksum recomputations when BPF programs adjusting l3/l4
checksums and documents all three helpers in uapi header.

Moreover, a sample program is added to show how BPF programs can make use
of the mangle and csum helpers.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h  |  38 +++++++++++++++-
 net/core/filter.c         | 108 ++++++++++++++++++++++++++++++++++++++++++++--
 samples/bpf/Makefile      |   1 +
 samples/bpf/bpf_helpers.h |   7 +++
 samples/bpf/tcbpf1_kern.c |  71 ++++++++++++++++++++++++++++++
 5 files changed, 220 insertions(+), 5 deletions(-)
 create mode 100644 samples/bpf/tcbpf1_kern.c

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0db8580f3cca..23df3e7f8e7d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -168,7 +168,43 @@ enum bpf_func_id {
 	BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
 	BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */
 	BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */
-	BPF_FUNC_skb_store_bytes, /* int skb_store_bytes(skb, offset, from, len) */
+
+	/**
+	 * skb_store_bytes(skb, offset, from, len, flags) - store bytes into packet
+	 * @skb: pointer to skb
+	 * @offset: offset within packet from skb->data
+	 * @from: pointer where to copy bytes from
+	 * @len: number of bytes to store into packet
+	 * @flags: bit 0 - if true, recompute skb->csum
+	 *         other bits - reserved
+	 * Return: 0 on success
+	 */
+	BPF_FUNC_skb_store_bytes,
+
+	/**
+	 * l3_csum_replace(skb, offset, from, to, flags) - recompute IP checksum
+	 * @skb: pointer to skb
+	 * @offset: offset within packet where IP checksum is located
+	 * @from: old value of header field
+	 * @to: new value of header field
+	 * @flags: bits 0-3 - size of header field
+	 *         other bits - reserved
+	 * Return: 0 on success
+	 */
+	BPF_FUNC_l3_csum_replace,
+
+	/**
+	 * l4_csum_replace(skb, offset, from, to, flags) - recompute TCP/UDP checksum
+	 * @skb: pointer to skb
+	 * @offset: offset within packet where TCP/UDP checksum is located
+	 * @from: old value of header field
+	 * @to: new value of header field
+	 * @flags: bits 0-3 - size of header field
+	 *         bit 4 - is pseudo header
+	 *         other bits - reserved
+	 * Return: 0 on success
+	 */
+	BPF_FUNC_l4_csum_replace,
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/net/core/filter.c b/net/core/filter.c
index 955a7d77decd..b669e75d2b36 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1175,7 +1175,9 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
 	return 0;
 }
 
-static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+#define BPF_RECOMPUTE_CSUM(flags)	((flags) & 1)
+
+static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
 {
 	struct sk_buff *skb = (struct sk_buff *) (long) r1;
 	unsigned int offset = (unsigned int) r2;
@@ -1192,7 +1194,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 	 *
 	 * so check for invalid 'offset' and too large 'len'
 	 */
-	if (offset > 0xffff || len > sizeof(buf))
+	if (unlikely(offset > 0xffff || len > sizeof(buf)))
 		return -EFAULT;
 
 	if (skb_cloned(skb) && !skb_clone_writable(skb, offset + len))
@@ -1202,7 +1204,8 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 	if (unlikely(!ptr))
 		return -EFAULT;
 
-	skb_postpull_rcsum(skb, ptr, len);
+	if (BPF_RECOMPUTE_CSUM(flags))
+		skb_postpull_rcsum(skb, ptr, len);
 
 	memcpy(ptr, from, len);
 
@@ -1210,7 +1213,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 		/* skb_store_bits cannot return -EFAULT here */
 		skb_store_bits(skb, offset, ptr, len);
 
-	if (skb->ip_summed == CHECKSUM_COMPLETE)
+	if (BPF_RECOMPUTE_CSUM(flags) && skb->ip_summed == CHECKSUM_COMPLETE)
 		skb->csum = csum_add(skb->csum, csum_partial(ptr, len, 0));
 	return 0;
 }
@@ -1223,6 +1226,99 @@ const struct bpf_func_proto bpf_skb_store_bytes_proto = {
 	.arg2_type	= ARG_ANYTHING,
 	.arg3_type	= ARG_PTR_TO_STACK,
 	.arg4_type	= ARG_CONST_STACK_SIZE,
+	.arg5_type	= ARG_ANYTHING,
+};
+
+#define BPF_HEADER_FIELD_SIZE(flags)	((flags) & 0x0f)
+#define BPF_IS_PSEUDO_HEADER(flags)	((flags) & 0x10)
+
+static u64 bpf_l3_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags)
+{
+	struct sk_buff *skb = (struct sk_buff *) (long) r1;
+	__sum16 sum, *ptr;
+
+	if (unlikely(offset > 0xffff))
+		return -EFAULT;
+
+	if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum)))
+		return -EFAULT;
+
+	ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
+	if (unlikely(!ptr))
+		return -EFAULT;
+
+	switch (BPF_HEADER_FIELD_SIZE(flags)) {
+	case 2:
+		csum_replace2(ptr, from, to);
+		break;
+	case 4:
+		csum_replace4(ptr, from, to);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (ptr == &sum)
+		/* skb_store_bits guaranteed to not return -EFAULT here */
+		skb_store_bits(skb, offset, ptr, sizeof(sum));
+
+	return 0;
+}
+
+const struct bpf_func_proto bpf_l3_csum_replace_proto = {
+	.func		= bpf_l3_csum_replace,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_ANYTHING,
+	.arg5_type	= ARG_ANYTHING,
+};
+
+static u64 bpf_l4_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags)
+{
+	struct sk_buff *skb = (struct sk_buff *) (long) r1;
+	u32 is_pseudo = BPF_IS_PSEUDO_HEADER(flags);
+	__sum16 sum, *ptr;
+
+	if (unlikely(offset > 0xffff))
+		return -EFAULT;
+
+	if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum)))
+		return -EFAULT;
+
+	ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
+	if (unlikely(!ptr))
+		return -EFAULT;
+
+	switch (BPF_HEADER_FIELD_SIZE(flags)) {
+	case 2:
+		inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
+		break;
+	case 4:
+		inet_proto_csum_replace4(ptr, skb, from, to, is_pseudo);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (ptr == &sum)
+		/* skb_store_bits guaranteed to not return -EFAULT here */
+		skb_store_bits(skb, offset, ptr, sizeof(sum));
+
+	return 0;
+}
+
+const struct bpf_func_proto bpf_l4_csum_replace_proto = {
+	.func		= bpf_l4_csum_replace,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_ANYTHING,
+	.arg5_type	= ARG_ANYTHING,
 };
 
 static const struct bpf_func_proto *
@@ -1250,6 +1346,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 	switch (func_id) {
 	case BPF_FUNC_skb_store_bytes:
 		return &bpf_skb_store_bytes_proto;
+	case BPF_FUNC_l3_csum_replace:
+		return &bpf_l3_csum_replace_proto;
+	case BPF_FUNC_l4_csum_replace:
+		return &bpf_l4_csum_replace_proto;
 	default:
 		return sk_filter_func_proto(func_id);
 	}
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index b5b3600dcdf5..d24f51bca465 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -17,6 +17,7 @@ sockex2-objs := bpf_load.o libbpf.o sockex2_user.o
 always := $(hostprogs-y)
 always += sockex1_kern.o
 always += sockex2_kern.o
+always += tcbpf1_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index ca0333146006..72540ec1f003 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -37,4 +37,11 @@ struct bpf_map_def {
 	unsigned int max_entries;
 };
 
+static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) =
+	(void *) BPF_FUNC_skb_store_bytes;
+static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flags) =
+	(void *) BPF_FUNC_l3_csum_replace;
+static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
+	(void *) BPF_FUNC_l4_csum_replace;
+
 #endif
diff --git a/samples/bpf/tcbpf1_kern.c b/samples/bpf/tcbpf1_kern.c
new file mode 100644
index 000000000000..7cf3f42a6e39
--- /dev/null
+++ b/samples/bpf/tcbpf1_kern.c
@@ -0,0 +1,71 @@
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <uapi/linux/in.h>
+#include <uapi/linux/tcp.h>
+#include "bpf_helpers.h"
+
+/* compiler workaround */
+#define _htonl __builtin_bswap32
+
+static inline void set_dst_mac(struct __sk_buff *skb, char *mac)
+{
+	bpf_skb_store_bytes(skb, 0, mac, ETH_ALEN, 1);
+}
+
+/* use 1 below for ingress qdisc and 0 for egress */
+#if 0
+#undef ETH_HLEN
+#define ETH_HLEN 0
+#endif
+
+#define IP_CSUM_OFF (ETH_HLEN + offsetof(struct iphdr, check))
+#define TOS_OFF (ETH_HLEN + offsetof(struct iphdr, tos))
+
+static inline void set_ip_tos(struct __sk_buff *skb, __u8 new_tos)
+{
+	__u8 old_tos = load_byte(skb, TOS_OFF);
+
+	bpf_l3_csum_replace(skb, IP_CSUM_OFF, htons(old_tos), htons(new_tos), 2);
+	bpf_skb_store_bytes(skb, TOS_OFF, &new_tos, sizeof(new_tos), 0);
+}
+
+#define TCP_CSUM_OFF (ETH_HLEN + sizeof(struct iphdr) + offsetof(struct tcphdr, check))
+#define IP_SRC_OFF (ETH_HLEN + offsetof(struct iphdr, saddr))
+
+#define IS_PSEUDO 0x10
+
+static inline void set_tcp_ip_src(struct __sk_buff *skb, __u32 new_ip)
+{
+	__u32 old_ip = _htonl(load_word(skb, IP_SRC_OFF));
+
+	bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_ip, new_ip, IS_PSEUDO | sizeof(new_ip));
+	bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_ip, new_ip, sizeof(new_ip));
+	bpf_skb_store_bytes(skb, IP_SRC_OFF, &new_ip, sizeof(new_ip), 0);
+}
+
+#define TCP_DPORT_OFF (ETH_HLEN + sizeof(struct iphdr) + offsetof(struct tcphdr, dest))
+static inline void set_tcp_dest_port(struct __sk_buff *skb, __u16 new_port)
+{
+	__u16 old_port = htons(load_half(skb, TCP_DPORT_OFF));
+
+	bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_port, new_port, sizeof(new_port));
+	bpf_skb_store_bytes(skb, TCP_DPORT_OFF, &new_port, sizeof(new_port), 0);
+}
+
+SEC("classifier")
+int bpf_prog1(struct __sk_buff *skb)
+{
+	__u8 proto = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
+	long *value;
+
+	if (proto == IPPROTO_TCP) {
+		set_ip_tos(skb, 8);
+		set_tcp_ip_src(skb, 0xA010101);
+		set_tcp_dest_port(skb, 5001);
+	}
+
+	return 0;
+}
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From da18428498fb24438a23d982259461fe22bc1f46 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 21 Mar 2015 19:29:06 -0400
Subject: net: switch importing msghdr from userland to
 {compat_,}import_iovec()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/net/compat.h |  2 +-
 net/compat.c         | 18 +++++++-----------
 net/socket.c         | 31 ++++++++++++-------------------
 3 files changed, 20 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/include/net/compat.h b/include/net/compat.h
index 42a9c8431177..48103cf94e97 100644
--- a/include/net/compat.h
+++ b/include/net/compat.h
@@ -40,7 +40,7 @@ int compat_sock_get_timestampns(struct sock *, struct timespec __user *);
 #define compat_mmsghdr	mmsghdr
 #endif /* defined(CONFIG_COMPAT) */
 
-ssize_t get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *,
+int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *,
 		      struct sockaddr __user **, struct iovec **);
 asmlinkage long compat_sys_sendmsg(int, struct compat_msghdr __user *,
 				   unsigned int);
diff --git a/net/compat.c b/net/compat.c
index c4b6b0f43d5d..5cfd26a0006f 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -31,10 +31,10 @@
 #include <asm/uaccess.h>
 #include <net/compat.h>
 
-ssize_t get_compat_msghdr(struct msghdr *kmsg,
-			  struct compat_msghdr __user *umsg,
-			  struct sockaddr __user **save_addr,
-			  struct iovec **iov)
+int get_compat_msghdr(struct msghdr *kmsg,
+		      struct compat_msghdr __user *umsg,
+		      struct sockaddr __user **save_addr,
+		      struct iovec **iov)
 {
 	compat_uptr_t uaddr, uiov, tmp3;
 	compat_size_t nr_segs;
@@ -81,13 +81,9 @@ ssize_t get_compat_msghdr(struct msghdr *kmsg,
 
 	kmsg->msg_iocb = NULL;
 
-	err = compat_rw_copy_check_uvector(save_addr ? READ : WRITE,
-					   compat_ptr(uiov), nr_segs,
-					   UIO_FASTIOV, *iov, iov);
-	if (err >= 0)
-		iov_iter_init(&kmsg->msg_iter, save_addr ? READ : WRITE,
-			      *iov, nr_segs, err);
-	return err;
+	return compat_import_iovec(save_addr ? READ : WRITE,
+				   compat_ptr(uiov), nr_segs,
+				   UIO_FASTIOV, iov, &kmsg->msg_iter);
 }
 
 /* Bleech... */
diff --git a/net/socket.c b/net/socket.c
index 46f0e1d752b3..e5669cee0759 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1841,10 +1841,10 @@ struct used_address {
 	unsigned int name_len;
 };
 
-static ssize_t copy_msghdr_from_user(struct msghdr *kmsg,
-				     struct user_msghdr __user *umsg,
-				     struct sockaddr __user **save_addr,
-				     struct iovec **iov)
+static int copy_msghdr_from_user(struct msghdr *kmsg,
+				 struct user_msghdr __user *umsg,
+				 struct sockaddr __user **save_addr,
+				 struct iovec **iov)
 {
 	struct sockaddr __user *uaddr;
 	struct iovec __user *uiov;
@@ -1890,13 +1890,8 @@ static ssize_t copy_msghdr_from_user(struct msghdr *kmsg,
 
 	kmsg->msg_iocb = NULL;
 
-	err = rw_copy_check_uvector(save_addr ? READ : WRITE,
-				    uiov, nr_segs,
-				    UIO_FASTIOV, *iov, iov);
-	if (err >= 0)
-		iov_iter_init(&kmsg->msg_iter, save_addr ? READ : WRITE,
-			      *iov, nr_segs, err);
-	return err;
+	return import_iovec(save_addr ? READ : WRITE, uiov, nr_segs,
+			    UIO_FASTIOV, iov, &kmsg->msg_iter);
 }
 
 static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
@@ -1921,8 +1916,8 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
 	else
 		err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
 	if (err < 0)
-		goto out_freeiov;
-	total_len = err;
+		return err;
+	total_len = iov_iter_count(&msg_sys->msg_iter);
 
 	err = -ENOBUFS;
 
@@ -1988,8 +1983,7 @@ out_freectl:
 	if (ctl_buf != ctl)
 		sock_kfree_s(sock->sk, ctl_buf, ctl_len);
 out_freeiov:
-	if (iov != iovstack)
-		kfree(iov);
+	kfree(iov);
 	return err;
 }
 
@@ -2114,8 +2108,8 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
 	else
 		err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
 	if (err < 0)
-		goto out_freeiov;
-	total_len = err;
+		return err;
+	total_len = iov_iter_count(&msg_sys->msg_iter);
 
 	cmsg_ptr = (unsigned long)msg_sys->msg_control;
 	msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
@@ -2153,8 +2147,7 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
 	err = len;
 
 out_freeiov:
-	if (iov != iovstack)
-		kfree(iov);
+	kfree(iov);
 	return err;
 }
 
-- 
cgit v1.2.3


From d8725c86aebaf3516e220760aaf5fefc73825188 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 11 Dec 2014 00:02:50 -0500
Subject: get rid of the size argument of sock_sendmsg()

it's equal to iov_iter_count(&msg->msg_iter) in all cases

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/net.h  |  2 +-
 net/socket.c         | 27 ++++++++++++++-------------
 net/sunrpc/svcsock.c |  2 +-
 3 files changed, 16 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/net.h b/include/linux/net.h
index e74114bcca68..738ea48be889 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -211,7 +211,7 @@ int sock_create(int family, int type, int proto, struct socket **res);
 int sock_create_kern(int family, int type, int proto, struct socket **res);
 int sock_create_lite(int family, int type, int proto, struct socket **res);
 void sock_release(struct socket *sock);
-int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len);
+int sock_sendmsg(struct socket *sock, struct msghdr *msg);
 int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 		 int flags);
 struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname);
diff --git a/net/socket.c b/net/socket.c
index b6ceeda65214..21676e469b13 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -610,17 +610,19 @@ void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags)
 }
 EXPORT_SYMBOL(__sock_tx_timestamp);
 
-static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg,
-				     size_t size)
+static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
 {
-	return sock->ops->sendmsg(sock, msg, size);
+	int ret = sock->ops->sendmsg(sock, msg, iov_iter_count(&msg->msg_iter));
+	BUG_ON(ret == -EIOCBQUEUED);
+	return ret;
 }
 
-int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+int sock_sendmsg(struct socket *sock, struct msghdr *msg)
 {
-	int err = security_socket_sendmsg(sock, msg, size);
+	int err = security_socket_sendmsg(sock, msg,
+					  iov_iter_count(&msg->msg_iter));
 
-	return err ?: sock_sendmsg_nosec(sock, msg, size);
+	return err ?: sock_sendmsg_nosec(sock, msg);
 }
 EXPORT_SYMBOL(sock_sendmsg);
 
@@ -628,7 +630,7 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 		   struct kvec *vec, size_t num, size_t size)
 {
 	iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
-	return sock_sendmsg(sock, msg, size);
+	return sock_sendmsg(sock, msg);
 }
 EXPORT_SYMBOL(kernel_sendmsg);
 
@@ -819,7 +821,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (sock->type == SOCK_SEQPACKET)
 		msg.msg_flags |= MSG_EOR;
 
-	res = sock_sendmsg(sock, &msg, iov_iter_count(from));
+	res = sock_sendmsg(sock, &msg);
 	*from = msg.msg_iter;
 	return res;
 }
@@ -1657,7 +1659,7 @@ SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
 	if (sock->file->f_flags & O_NONBLOCK)
 		flags |= MSG_DONTWAIT;
 	msg.msg_flags = flags;
-	err = sock_sendmsg(sock, &msg, iov_iter_count(&msg.msg_iter));
+	err = sock_sendmsg(sock, &msg);
 
 out_put:
 	fput_light(sock->file, fput_needed);
@@ -1892,7 +1894,7 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
 	    __attribute__ ((aligned(sizeof(__kernel_size_t))));
 	/* 20 is size of ipv6_pktinfo */
 	unsigned char *ctl_buf = ctl;
-	int ctl_len, total_len;
+	int ctl_len;
 	ssize_t err;
 
 	msg_sys->msg_name = &address;
@@ -1903,7 +1905,6 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
 		err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
 	if (err < 0)
 		return err;
-	total_len = iov_iter_count(&msg_sys->msg_iter);
 
 	err = -ENOBUFS;
 
@@ -1950,10 +1951,10 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
 	    used_address->name_len == msg_sys->msg_namelen &&
 	    !memcmp(&used_address->name, msg_sys->msg_name,
 		    used_address->name_len)) {
-		err = sock_sendmsg_nosec(sock, msg_sys, total_len);
+		err = sock_sendmsg_nosec(sock, msg_sys);
 		goto out_freectl;
 	}
-	err = sock_sendmsg(sock, msg_sys, total_len);
+	err = sock_sendmsg(sock, msg_sys);
 	/*
 	 * If this is sendmmsg() and sending to current destination address was
 	 * successful, remember it.
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index cc331b6cf573..0c8120229a03 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -257,7 +257,7 @@ static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
 
 		svc_set_cmsg_data(rqstp, cmh);
 
-		if (sock_sendmsg(sock, &msg, 0) < 0)
+		if (sock_sendmsg(sock, &msg) < 0)
 			goto out;
 	}
 
-- 
cgit v1.2.3


From 01e97e6517053d7c0b9af5248e944a9209909cf5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 15 Dec 2014 21:39:31 -0500
Subject: new helper: msg_data_left()

convert open-coded instances

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 crypto/algif_hash.c     |  4 ++--
 crypto/algif_skcipher.c |  4 ++--
 drivers/vhost/net.c     |  4 ++--
 include/linux/socket.h  |  5 +++++
 net/core/datagram.c     |  2 +-
 net/ipv4/tcp.c          |  8 ++++----
 net/rxrpc/ar-output.c   | 19 +++++++++----------
 net/socket.c            |  4 ++--
 8 files changed, 27 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c
index 0a465e0f3012..1396ad0787fc 100644
--- a/crypto/algif_hash.c
+++ b/crypto/algif_hash.c
@@ -56,8 +56,8 @@ static int hash_sendmsg(struct socket *sock, struct msghdr *msg,
 
 	ctx->more = 0;
 
-	while (iov_iter_count(&msg->msg_iter)) {
-		int len = iov_iter_count(&msg->msg_iter);
+	while (msg_data_left(msg)) {
+		int len = msg_data_left(msg);
 
 		if (len > limit)
 			len = limit;
diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index 8f903b6df299..945075292bc9 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -641,7 +641,7 @@ static int skcipher_recvmsg_sync(struct socket *sock, struct msghdr *msg,
 	long copied = 0;
 
 	lock_sock(sk);
-	while (iov_iter_count(&msg->msg_iter)) {
+	while (msg_data_left(msg)) {
 		sgl = list_first_entry(&ctx->tsgl,
 				       struct skcipher_sg_list, list);
 		sg = sgl->sg;
@@ -655,7 +655,7 @@ static int skcipher_recvmsg_sync(struct socket *sock, struct msghdr *msg,
 				goto unlock;
 		}
 
-		used = min_t(unsigned long, ctx->used, iov_iter_count(&msg->msg_iter));
+		used = min_t(unsigned long, ctx->used, msg_data_left(msg));
 
 		used = af_alg_make_sg(&ctx->rsgl, &msg->msg_iter, used);
 		err = used;
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 18f05bff8826..7d137a43cc86 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -357,13 +357,13 @@ static void handle_tx(struct vhost_net *net)
 		iov_iter_init(&msg.msg_iter, WRITE, vq->iov, out, len);
 		iov_iter_advance(&msg.msg_iter, hdr_size);
 		/* Sanity check */
-		if (!iov_iter_count(&msg.msg_iter)) {
+		if (!msg_data_left(&msg)) {
 			vq_err(vq, "Unexpected header len for TX: "
 			       "%zd expected %zd\n",
 			       len, hdr_size);
 			break;
 		}
-		len = iov_iter_count(&msg.msg_iter);
+		len = msg_data_left(&msg);
 
 		zcopy_used = zcopy && len >= VHOST_GOODCOPY_LEN
 				   && (nvq->upend_idx + 1) % UIO_MAXIOV !=
diff --git a/include/linux/socket.h b/include/linux/socket.h
index c9852ef7e317..5bf59c8493b7 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -139,6 +139,11 @@ static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr
 	return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg);
 }
 
+static inline size_t msg_data_left(struct msghdr *msg)
+{
+	return iov_iter_count(&msg->msg_iter);
+}
+
 /* "Socket"-level control message types: */
 
 #define	SCM_RIGHTS	0x01		/* rw: access rights (array of int) */
diff --git a/net/core/datagram.c b/net/core/datagram.c
index df493d68330c..b80fb91bb3f7 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -673,7 +673,7 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
 	if (!chunk)
 		return 0;
 
-	if (iov_iter_count(&msg->msg_iter) < chunk) {
+	if (msg_data_left(msg) < chunk) {
 		if (__skb_checksum_complete(skb))
 			goto csum_error;
 		if (skb_copy_datagram_msg(skb, hlen, msg, chunk))
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 094a6822c71d..18e3a12eb1b2 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1119,7 +1119,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 
 	sg = !!(sk->sk_route_caps & NETIF_F_SG);
 
-	while (iov_iter_count(&msg->msg_iter)) {
+	while (msg_data_left(msg)) {
 		int copy = 0;
 		int max = size_goal;
 
@@ -1163,8 +1163,8 @@ new_segment:
 		}
 
 		/* Try to append data to the end of skb. */
-		if (copy > iov_iter_count(&msg->msg_iter))
-			copy = iov_iter_count(&msg->msg_iter);
+		if (copy > msg_data_left(msg))
+			copy = msg_data_left(msg);
 
 		/* Where to copy to? */
 		if (skb_availroom(skb) > 0) {
@@ -1221,7 +1221,7 @@ new_segment:
 		tcp_skb_pcount_set(skb, 0);
 
 		copied += copy;
-		if (!iov_iter_count(&msg->msg_iter)) {
+		if (!msg_data_left(msg)) {
 			tcp_tx_timestamp(sk, skb);
 			goto out;
 		}
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index 7a31a3958364..c0042807bfc6 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -564,8 +564,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
 			max &= ~(call->conn->size_align - 1UL);
 
 			chunk = max;
-			if (chunk > iov_iter_count(&msg->msg_iter) && !more)
-				chunk = iov_iter_count(&msg->msg_iter);
+			if (chunk > msg_data_left(msg) && !more)
+				chunk = msg_data_left(msg);
 
 			space = chunk + call->conn->size_align;
 			space &= ~(call->conn->size_align - 1UL);
@@ -608,11 +608,11 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
 		sp = rxrpc_skb(skb);
 
 		/* append next segment of data to the current buffer */
-		if (iov_iter_count(&msg->msg_iter) > 0) {
+		if (msg_data_left(msg) > 0) {
 			int copy = skb_tailroom(skb);
 			ASSERTCMP(copy, >, 0);
-			if (copy > iov_iter_count(&msg->msg_iter))
-				copy = iov_iter_count(&msg->msg_iter);
+			if (copy > msg_data_left(msg))
+				copy = msg_data_left(msg);
 			if (copy > sp->remain)
 				copy = sp->remain;
 
@@ -633,7 +633,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
 
 		/* add the packet to the send queue if it's now full */
 		if (sp->remain <= 0 ||
-		    (iov_iter_count(&msg->msg_iter) == 0 && !more)) {
+		    (msg_data_left(msg) == 0 && !more)) {
 			struct rxrpc_connection *conn = call->conn;
 			uint32_t seq;
 			size_t pad;
@@ -663,7 +663,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
 			sp->hdr.serviceId = conn->service_id;
 
 			sp->hdr.flags = conn->out_clientflag;
-			if (iov_iter_count(&msg->msg_iter) == 0 && !more)
+			if (msg_data_left(msg) == 0 && !more)
 				sp->hdr.flags |= RXRPC_LAST_PACKET;
 			else if (CIRC_SPACE(call->acks_head, call->acks_tail,
 					    call->acks_winsz) > 1)
@@ -679,11 +679,10 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
 
 			memcpy(skb->head, &sp->hdr,
 			       sizeof(struct rxrpc_header));
-			rxrpc_queue_packet(call, skb,
-					   iov_iter_count(&msg->msg_iter) == 0 && !more);
+			rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more);
 			skb = NULL;
 		}
-	} while (iov_iter_count(&msg->msg_iter) > 0);
+	} while (msg_data_left(msg) > 0);
 
 success:
 	ret = copied;
diff --git a/net/socket.c b/net/socket.c
index 21676e469b13..5b0126234606 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -612,7 +612,7 @@ EXPORT_SYMBOL(__sock_tx_timestamp);
 
 static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
 {
-	int ret = sock->ops->sendmsg(sock, msg, iov_iter_count(&msg->msg_iter));
+	int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
 	BUG_ON(ret == -EIOCBQUEUED);
 	return ret;
 }
@@ -620,7 +620,7 @@ static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
 int sock_sendmsg(struct socket *sock, struct msghdr *msg)
 {
 	int err = security_socket_sendmsg(sock, msg,
-					  iov_iter_count(&msg->msg_iter));
+					  msg_data_left(msg));
 
 	return err ?: sock_sendmsg_nosec(sock, msg);
 }
-- 
cgit v1.2.3