From b5091b552a9fd7c45f9f8f4efa23bc78af0553cd Mon Sep 17 00:00:00 2001
From: Andreea-Cristina Bernat <bernat.ada@gmail.com>
Date: Sun, 17 Aug 2014 13:21:45 +0300
Subject: bonding: Replace rcu_dereference() with rcu_access_pointer()

This "rcu_dereference()" call is used directly in a condition.
Since its return value is never dereferenced it is recommended to use
"rcu_access_pointer()" instead of "rcu_dereference()".
Therefore, this patch makes this replacement.

The following Coccinelle semantic patch was used for solving it:
@@
@@

(
 if(
 (<+...
- rcu_dereference
+ rcu_access_pointer
  (...)
 ...+>)) {...}
|
 while(
 (<+...
- rcu_dereference
+ rcu_access_pointer
  (...)
  ...+>)) {...}
)

Signed-off-by: Andreea-Cristina Bernat <bernat.ada@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_alb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 95dd1f58c260..73c21e233131 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -1388,7 +1388,7 @@ static int bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond,
 	}
 
 	if (tx_slave && bond_slave_can_tx(tx_slave)) {
-		if (tx_slave != rcu_dereference(bond->curr_active_slave)) {
+		if (tx_slave != rcu_access_pointer(bond->curr_active_slave)) {
 			ether_addr_copy(eth_data->h_source,
 					tx_slave->dev->dev_addr);
 		}
-- 
cgit v1.2.3


From d4261e5650004d6d51137553ea5433d5828562dc Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Tue, 19 Aug 2014 16:02:12 +0200
Subject: bonding: create netlink event when bonding option is changed

Userspace needs to be notified if one changes some option.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Veaceslav Falico <vfalico@gmail.com>
Acked-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_options.c | 2 ++
 include/linux/netdevice.h          | 1 +
 2 files changed, 3 insertions(+)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index dc73463c2c23..d8dc17faa6b4 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -625,6 +625,8 @@ int __bond_opt_set(struct bonding *bond,
 out:
 	if (ret)
 		bond_opt_error_interpret(bond, opt, ret, val);
+	else
+		call_netdevice_notifiers(NETDEV_CHANGEINFODATA, bond->dev);
 
 	return ret;
 }
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 38377392d082..7e2b0b8b5cd7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1982,6 +1982,7 @@ struct pcpu_sw_netstats {
 #define NETDEV_CHANGEUPPER	0x0015
 #define NETDEV_RESEND_IGMP	0x0016
 #define NETDEV_PRECHANGEMTU	0x0017 /* notify before mtu change happened */
+#define NETDEV_CHANGEINFODATA	0x0018
 
 int register_netdevice_notifier(struct notifier_block *nb);
 int unregister_netdevice_notifier(struct notifier_block *nb);
-- 
cgit v1.2.3


From 0f23124aaacd68e11271f72a13f3a8e8904c26d3 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@redhat.com>
Date: Wed, 27 Aug 2014 16:06:46 +0200
Subject: bonding: add slave_changelink support and use it for queue_id

This patch adds support for slave_changelink to the bonding and uses it
to give the ability to change the queue_id of the enslaved devices via
netlink. It sets slave_maxtype and uses bond_changelink as a prototype for
bond_slave_changelink.
Example/test command after the iproute2 patch:
 ip link set eth0 type bond_slave queue_id 10

CC: David S. Miller <davem@davemloft.net>
CC: Jay Vosburgh <j.vosburgh@gmail.com>
CC: Veaceslav Falico <vfalico@gmail.com>
CC: Andy Gospodarek <andy@greyhouse.net>

Suggested-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_netlink.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
index d163e112f04c..1570deab112e 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -107,6 +107,33 @@ static int bond_validate(struct nlattr *tb[], struct nlattr *data[])
 	return 0;
 }
 
+static int bond_slave_changelink(struct net_device *bond_dev,
+				 struct net_device *slave_dev,
+				 struct nlattr *tb[], struct nlattr *data[])
+{
+	struct bonding *bond = netdev_priv(bond_dev);
+	struct bond_opt_value newval;
+	int err;
+
+	if (!data)
+		return 0;
+
+	if (data[IFLA_BOND_SLAVE_QUEUE_ID]) {
+		u16 queue_id = nla_get_u16(data[IFLA_BOND_SLAVE_QUEUE_ID]);
+		char queue_id_str[IFNAMSIZ + 7];
+
+		/* queue_id option setting expects slave_name:queue_id */
+		snprintf(queue_id_str, sizeof(queue_id_str), "%s:%u\n",
+			 slave_dev->name, queue_id);
+		bond_opt_initstr(&newval, queue_id_str);
+		err = __bond_opt_set(bond, BOND_OPT_QUEUE_ID, &newval);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static int bond_changelink(struct net_device *bond_dev,
 			   struct nlattr *tb[], struct nlattr *data[])
 {
@@ -553,10 +580,12 @@ struct rtnl_link_ops bond_link_ops __read_mostly = {
 	.priv_size		= sizeof(struct bonding),
 	.setup			= bond_setup,
 	.maxtype		= IFLA_BOND_MAX,
+	.slave_maxtype		= IFLA_BOND_SLAVE_MAX,
 	.policy			= bond_policy,
 	.validate		= bond_validate,
 	.newlink		= bond_newlink,
 	.changelink		= bond_changelink,
+	.slave_changelink	= bond_slave_changelink,
 	.get_size		= bond_get_size,
 	.fill_info		= bond_fill_info,
 	.get_num_tx_queues	= bond_get_num_tx_queues,
-- 
cgit v1.2.3


From cea6aeb6979f6409cacd6900d237cb4ed39fd6d2 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Fri, 5 Sep 2014 11:36:34 +0200
Subject: bonding: add slave netlink policy and put slave-related ops together

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_netlink.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
index 1570deab112e..e1489d9df2a4 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -96,6 +96,10 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = {
 	[IFLA_BOND_AD_INFO]		= { .type = NLA_NESTED },
 };
 
+static const struct nla_policy bond_slave_policy[IFLA_BOND_SLAVE_MAX + 1] = {
+	[IFLA_BOND_SLAVE_QUEUE_ID]	= { .type = NLA_U16 },
+};
+
 static int bond_validate(struct nlattr *tb[], struct nlattr *data[])
 {
 	if (tb[IFLA_ADDRESS]) {
@@ -580,17 +584,18 @@ struct rtnl_link_ops bond_link_ops __read_mostly = {
 	.priv_size		= sizeof(struct bonding),
 	.setup			= bond_setup,
 	.maxtype		= IFLA_BOND_MAX,
-	.slave_maxtype		= IFLA_BOND_SLAVE_MAX,
 	.policy			= bond_policy,
 	.validate		= bond_validate,
 	.newlink		= bond_newlink,
 	.changelink		= bond_changelink,
-	.slave_changelink	= bond_slave_changelink,
 	.get_size		= bond_get_size,
 	.fill_info		= bond_fill_info,
 	.get_num_tx_queues	= bond_get_num_tx_queues,
 	.get_num_rx_queues	= bond_get_num_tx_queues, /* Use the same number
 							     as for TX queues */
+	.slave_maxtype		= IFLA_BOND_SLAVE_MAX,
+	.slave_policy		= bond_slave_policy,
+	.slave_changelink	= bond_slave_changelink,
 	.get_slave_size		= bond_get_slave_size,
 	.fill_slave_info	= bond_fill_slave_info,
 };
-- 
cgit v1.2.3


From bdbc5f13036c13ba47dad5f99645556fc40381f0 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@redhat.com>
Date: Tue, 9 Sep 2014 23:16:58 +0200
Subject: bonding: 3ad: use curr_slave_lock instead of bond->lock

In 3ad mode the only syncing needed by bond->lock is for the wq
and the recv handler, so change them to use curr_slave_lock.
There're no locking dependencies here as 3ad doesn't use
curr_slave_lock at all.

Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_3ad.c  |  9 ++++-----
 drivers/net/bonding/bond_main.c | 12 +++++++-----
 2 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index ee2c73a9de39..5d27a6207384 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -2057,7 +2057,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
 	struct port *port;
 	bool should_notify_rtnl = BOND_SLAVE_NOTIFY_LATER;
 
-	read_lock(&bond->lock);
+	read_lock(&bond->curr_slave_lock);
 	rcu_read_lock();
 
 	/* check if there are any slaves */
@@ -2120,7 +2120,7 @@ re_arm:
 		}
 	}
 	rcu_read_unlock();
-	read_unlock(&bond->lock);
+	read_unlock(&bond->curr_slave_lock);
 
 	if (should_notify_rtnl && rtnl_trylock()) {
 		bond_slave_state_notify(bond);
@@ -2395,7 +2395,6 @@ int __bond_3ad_get_active_agg_info(struct bonding *bond,
 	return 0;
 }
 
-/* Wrapper used to hold bond->lock so no slave manipulation can occur */
 int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info)
 {
 	int ret;
@@ -2487,9 +2486,9 @@ int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
 	if (!lacpdu)
 		return ret;
 
-	read_lock(&bond->lock);
+	read_lock(&bond->curr_slave_lock);
 	ret = bond_3ad_rx_indication(lacpdu, slave, skb->len);
-	read_unlock(&bond->lock);
+	read_unlock(&bond->curr_slave_lock);
 	return ret;
 }
 
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index f0f5eab0fab1..dcd331bd0c17 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1687,13 +1687,15 @@ static int __bond_release_one(struct net_device *bond_dev,
 	 * for this slave anymore.
 	 */
 	netdev_rx_handler_unregister(slave_dev);
-	write_lock_bh(&bond->lock);
 
-	/* Inform AD package of unbinding of slave. */
-	if (BOND_MODE(bond) == BOND_MODE_8023AD)
+	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
+		/* Sync against bond_3ad_rx_indication and
+		 * bond_3ad_state_machine_handler
+		 */
+		write_lock_bh(&bond->curr_slave_lock);
 		bond_3ad_unbind_slave(slave);
-
-	write_unlock_bh(&bond->lock);
+		write_unlock_bh(&bond->curr_slave_lock);
+	}
 
 	netdev_info(bond_dev, "Releasing %s interface %s\n",
 		    bond_is_active_slave(slave) ? "active" : "backup",
-- 
cgit v1.2.3


From ecfede424e95b211050f777c3ae96356926ed1c4 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@redhat.com>
Date: Tue, 9 Sep 2014 23:16:59 +0200
Subject: bonding: alb: clean bond->lock

We can remove the lock/unlock as it's no longer necessary since
RTNL should be held while calling bond_alb_set_mac_address().

Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_alb.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 73c21e233131..028496205f39 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -1775,8 +1775,7 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char
  * Set the bond->curr_active_slave to @new_slave and handle
  * mac address swapping and promiscuity changes as needed.
  *
- * If new_slave is NULL, caller must hold curr_slave_lock or
- * bond->lock for write.
+ * If new_slave is NULL, caller must hold curr_slave_lock for write
  *
  * If new_slave is not NULL, caller must hold RTNL, curr_slave_lock
  * for write.  Processing here may sleep, so no other locks may be held.
@@ -1857,12 +1856,8 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave
 	write_lock_bh(&bond->curr_slave_lock);
 }
 
-/*
- * Called with RTNL
- */
+/* Called with RTNL */
 int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr)
-	__acquires(&bond->lock)
-	__releases(&bond->lock)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
 	struct sockaddr *sa = addr;
@@ -1895,14 +1890,12 @@ int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr)
 	} else {
 		alb_set_slave_mac_addr(curr_active, bond_dev->dev_addr);
 
-		read_lock(&bond->lock);
 		alb_send_learning_packets(curr_active,
 					  bond_dev->dev_addr, false);
 		if (bond->alb_info.rlb_enabled) {
 			/* inform clients mac address has changed */
 			rlb_req_update_slave_clients(bond, curr_active);
 		}
-		read_unlock(&bond->lock);
 	}
 
 	return 0;
-- 
cgit v1.2.3


From 059b47e8aaf997245bc531e980581de492315fe6 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@redhat.com>
Date: Tue, 9 Sep 2014 23:17:00 +0200
Subject: bonding: convert primary_slave to use RCU

This is necessary mainly for two bonding call sites: procfs and
sysfs as it was dereferenced without any real protection.

Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c    | 42 +++++++++++++++++++++-----------------
 drivers/net/bonding/bond_netlink.c |  7 ++++---
 drivers/net/bonding/bond_options.c |  8 ++++----
 drivers/net/bonding/bond_procfs.c  |  8 ++++----
 drivers/net/bonding/bond_sysfs.c   | 10 ++++++---
 drivers/net/bonding/bonding.h      |  2 +-
 6 files changed, 43 insertions(+), 34 deletions(-)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index dcd331bd0c17..629037f79213 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -708,7 +708,7 @@ out:
 
 static bool bond_should_change_active(struct bonding *bond)
 {
-	struct slave *prim = bond->primary_slave;
+	struct slave *prim = rtnl_dereference(bond->primary_slave);
 	struct slave *curr = bond_deref_active_protected(bond);
 
 	if (!prim || !curr || curr->link != BOND_LINK_UP)
@@ -732,13 +732,14 @@ static bool bond_should_change_active(struct bonding *bond)
  */
 static struct slave *bond_find_best_slave(struct bonding *bond)
 {
-	struct slave *slave, *bestslave = NULL;
+	struct slave *slave, *bestslave = NULL, *primary;
 	struct list_head *iter;
 	int mintime = bond->params.updelay;
 
-	if (bond->primary_slave && bond->primary_slave->link == BOND_LINK_UP &&
+	primary = rtnl_dereference(bond->primary_slave);
+	if (primary && primary->link == BOND_LINK_UP &&
 	    bond_should_change_active(bond))
-		return bond->primary_slave;
+		return primary;
 
 	bond_for_each_slave(bond, slave, iter) {
 		if (slave->link == BOND_LINK_UP)
@@ -1482,7 +1483,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 	if (bond_uses_primary(bond) && bond->params.primary[0]) {
 		/* if there is a primary slave, remember it */
 		if (strcmp(bond->params.primary, new_slave->dev->name) == 0) {
-			bond->primary_slave = new_slave;
+			rcu_assign_pointer(bond->primary_slave, new_slave);
 			bond->force_primary = true;
 		}
 	}
@@ -1596,8 +1597,8 @@ err_detach:
 		bond_hw_addr_flush(bond_dev, slave_dev);
 
 	vlan_vids_del_by_dev(slave_dev, bond_dev);
-	if (bond->primary_slave == new_slave)
-		bond->primary_slave = NULL;
+	if (rcu_access_pointer(bond->primary_slave) == new_slave)
+		RCU_INIT_POINTER(bond->primary_slave, NULL);
 	if (rcu_access_pointer(bond->curr_active_slave) == new_slave) {
 		block_netpoll_tx();
 		write_lock_bh(&bond->curr_slave_lock);
@@ -1606,6 +1607,8 @@ err_detach:
 		write_unlock_bh(&bond->curr_slave_lock);
 		unblock_netpoll_tx();
 	}
+	/* either primary_slave or curr_active_slave might've changed */
+	synchronize_rcu();
 	slave_disable_netpoll(new_slave);
 
 err_close:
@@ -1714,8 +1717,8 @@ static int __bond_release_one(struct net_device *bond_dev,
 				    bond_dev->name, slave_dev->name);
 	}
 
-	if (bond->primary_slave == slave)
-		bond->primary_slave = NULL;
+	if (rtnl_dereference(bond->primary_slave) == slave)
+		RCU_INIT_POINTER(bond->primary_slave, NULL);
 
 	if (oldcurrent == slave) {
 		write_lock_bh(&bond->curr_slave_lock);
@@ -1976,7 +1979,7 @@ static int bond_miimon_inspect(struct bonding *bond)
 static void bond_miimon_commit(struct bonding *bond)
 {
 	struct list_head *iter;
-	struct slave *slave;
+	struct slave *slave, *primary;
 
 	bond_for_each_slave(bond, slave, iter) {
 		switch (slave->new_link) {
@@ -1987,13 +1990,14 @@ static void bond_miimon_commit(struct bonding *bond)
 			slave->link = BOND_LINK_UP;
 			slave->last_link_up = jiffies;
 
+			primary = rtnl_dereference(bond->primary_slave);
 			if (BOND_MODE(bond) == BOND_MODE_8023AD) {
 				/* prevent it from being the active one */
 				bond_set_backup_slave(slave);
 			} else if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
 				/* make it immediately active */
 				bond_set_active_slave(slave);
-			} else if (slave != bond->primary_slave) {
+			} else if (slave != primary) {
 				/* prevent it from being the active one */
 				bond_set_backup_slave(slave);
 			}
@@ -2011,8 +2015,7 @@ static void bond_miimon_commit(struct bonding *bond)
 				bond_alb_handle_link_change(bond, slave,
 							    BOND_LINK_UP);
 
-			if (!bond->curr_active_slave ||
-			    (slave == bond->primary_slave))
+			if (!bond->curr_active_slave || slave == primary)
 				goto do_failover;
 
 			continue;
@@ -2633,7 +2636,7 @@ static void bond_ab_arp_commit(struct bonding *bond)
 					    slave->dev->name);
 
 				if (!rtnl_dereference(bond->curr_active_slave) ||
-				    (slave == bond->primary_slave))
+				    slave == rtnl_dereference(bond->primary_slave))
 					goto do_failover;
 
 			}
@@ -2860,7 +2863,7 @@ static int bond_master_netdev_event(unsigned long event,
 static int bond_slave_netdev_event(unsigned long event,
 				   struct net_device *slave_dev)
 {
-	struct slave *slave = bond_slave_get_rtnl(slave_dev);
+	struct slave *slave = bond_slave_get_rtnl(slave_dev), *primary;
 	struct bonding *bond;
 	struct net_device *bond_dev;
 	u32 old_speed;
@@ -2874,6 +2877,7 @@ static int bond_slave_netdev_event(unsigned long event,
 		return NOTIFY_DONE;
 	bond_dev = slave->bond->dev;
 	bond = slave->bond;
+	primary = rtnl_dereference(bond->primary_slave);
 
 	switch (event) {
 	case NETDEV_UNREGISTER:
@@ -2921,18 +2925,18 @@ static int bond_slave_netdev_event(unsigned long event,
 		    !bond->params.primary[0])
 			break;
 
-		if (slave == bond->primary_slave) {
+		if (slave == primary) {
 			/* slave's name changed - he's no longer primary */
-			bond->primary_slave = NULL;
+			RCU_INIT_POINTER(bond->primary_slave, NULL);
 		} else if (!strcmp(slave_dev->name, bond->params.primary)) {
 			/* we have a new primary slave */
-			bond->primary_slave = slave;
+			rcu_assign_pointer(bond->primary_slave, slave);
 		} else { /* we didn't change primary - exit */
 			break;
 		}
 
 		netdev_info(bond->dev, "Primary slave changed to %s, reselecting active slave\n",
-			    bond->primary_slave ? slave_dev->name : "none");
+			    primary ? slave_dev->name : "none");
 
 		block_netpoll_tx();
 		write_lock_bh(&bond->curr_slave_lock);
diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
index e1489d9df2a4..c13d83e15ace 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -443,6 +443,7 @@ static int bond_fill_info(struct sk_buff *skb,
 	unsigned int packets_per_slave;
 	int ifindex, i, targets_added;
 	struct nlattr *targets;
+	struct slave *primary;
 
 	if (nla_put_u8(skb, IFLA_BOND_MODE, BOND_MODE(bond)))
 		goto nla_put_failure;
@@ -492,9 +493,9 @@ static int bond_fill_info(struct sk_buff *skb,
 			bond->params.arp_all_targets))
 		goto nla_put_failure;
 
-	if (bond->primary_slave &&
-	    nla_put_u32(skb, IFLA_BOND_PRIMARY,
-			bond->primary_slave->dev->ifindex))
+	primary = rtnl_dereference(bond->primary_slave);
+	if (primary &&
+	    nla_put_u32(skb, IFLA_BOND_PRIMARY, primary->dev->ifindex))
 		goto nla_put_failure;
 
 	if (nla_put_u8(skb, IFLA_BOND_PRIMARY_RESELECT,
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index d8dc17faa6b4..7c9e176baecc 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -1090,7 +1090,7 @@ static int bond_option_primary_set(struct bonding *bond,
 	/* check to see if we are clearing primary */
 	if (!strlen(primary)) {
 		netdev_info(bond->dev, "Setting primary slave to None\n");
-		bond->primary_slave = NULL;
+		RCU_INIT_POINTER(bond->primary_slave, NULL);
 		memset(bond->params.primary, 0, sizeof(bond->params.primary));
 		bond_select_active_slave(bond);
 		goto out;
@@ -1100,16 +1100,16 @@ static int bond_option_primary_set(struct bonding *bond,
 		if (strncmp(slave->dev->name, primary, IFNAMSIZ) == 0) {
 			netdev_info(bond->dev, "Setting %s as primary slave\n",
 				    slave->dev->name);
-			bond->primary_slave = slave;
+			rcu_assign_pointer(bond->primary_slave, slave);
 			strcpy(bond->params.primary, slave->dev->name);
 			bond_select_active_slave(bond);
 			goto out;
 		}
 	}
 
-	if (bond->primary_slave) {
+	if (rtnl_dereference(bond->primary_slave)) {
 		netdev_info(bond->dev, "Setting primary slave to None\n");
-		bond->primary_slave = NULL;
+		RCU_INIT_POINTER(bond->primary_slave, NULL);
 		bond_select_active_slave(bond);
 	}
 	strncpy(bond->params.primary, primary, IFNAMSIZ);
diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c
index de62c0385dfb..1a9fe1ba4c60 100644
--- a/drivers/net/bonding/bond_procfs.c
+++ b/drivers/net/bonding/bond_procfs.c
@@ -66,7 +66,7 @@ static void bond_info_show_master(struct seq_file *seq)
 {
 	struct bonding *bond = seq->private;
 	const struct bond_opt_value *optval;
-	struct slave *curr;
+	struct slave *curr, *primary;
 	int i;
 
 	curr = rcu_dereference(bond->curr_active_slave);
@@ -92,10 +92,10 @@ static void bond_info_show_master(struct seq_file *seq)
 	}
 
 	if (bond_uses_primary(bond)) {
+		primary = rcu_dereference(bond->primary_slave);
 		seq_printf(seq, "Primary Slave: %s",
-			   (bond->primary_slave) ?
-			   bond->primary_slave->dev->name : "None");
-		if (bond->primary_slave) {
+			   primary ? primary->dev->name : "None");
+		if (primary) {
 			optval = bond_opt_get_val(BOND_OPT_PRIMARY_RESELECT,
 						  bond->params.primary_reselect);
 			seq_printf(seq, " (primary_reselect %s)",
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 98db8edd9c75..5555517284db 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -425,11 +425,15 @@ static ssize_t bonding_show_primary(struct device *d,
 				    struct device_attribute *attr,
 				    char *buf)
 {
-	int count = 0;
 	struct bonding *bond = to_bond(d);
+	struct slave *primary;
+	int count = 0;
 
-	if (bond->primary_slave)
-		count = sprintf(buf, "%s\n", bond->primary_slave->dev->name);
+	rcu_read_lock();
+	primary = rcu_dereference(bond->primary_slave);
+	if (primary)
+		count = sprintf(buf, "%s\n", primary->dev->name);
+	rcu_read_unlock();
 
 	return count;
 }
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index aace510d08d1..c798561a6f01 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -195,7 +195,7 @@ struct bonding {
 	struct   net_device *dev; /* first - useful for panic debug */
 	struct   slave __rcu *curr_active_slave;
 	struct   slave __rcu *current_arp_slave;
-	struct   slave *primary_slave;
+	struct   slave __rcu *primary_slave;
 	bool     force_primary;
 	s32      slave_cnt; /* never change this value outside the attach/detach wrappers */
 	int     (*recv_probe)(const struct sk_buff *, struct bonding *,
-- 
cgit v1.2.3


From e9fe8efeeae11f19bb6fafd6153ec77deaeb4b83 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@redhat.com>
Date: Tue, 9 Sep 2014 23:17:01 +0200
Subject: bonding: procfs: clean bond->lock usage and use RCU

Use RCU to protect against slave release, the proc show function will sync
with the bond destruction by the proc locks and the fact that the bond is
released after NETDEV_UNREGISTER which causes the bonding to remove the
proc entry.

Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_procfs.c | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c
index 1a9fe1ba4c60..bb09d0442aa8 100644
--- a/drivers/net/bonding/bond_procfs.c
+++ b/drivers/net/bonding/bond_procfs.c
@@ -7,21 +7,18 @@
 
 static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(RCU)
-	__acquires(&bond->lock)
 {
 	struct bonding *bond = seq->private;
 	struct list_head *iter;
 	struct slave *slave;
 	loff_t off = 0;
 
-	/* make sure the bond won't be taken away */
 	rcu_read_lock();
-	read_lock(&bond->lock);
 
 	if (*pos == 0)
 		return SEQ_START_TOKEN;
 
-	bond_for_each_slave(bond, slave, iter)
+	bond_for_each_slave_rcu(bond, slave, iter)
 		if (++off == *pos)
 			return slave;
 
@@ -37,12 +34,9 @@ static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 	++*pos;
 	if (v == SEQ_START_TOKEN)
-		return bond_first_slave(bond);
+		return bond_first_slave_rcu(bond);
 
-	if (bond_is_last_slave(bond, v))
-		return NULL;
-
-	bond_for_each_slave(bond, slave, iter) {
+	bond_for_each_slave_rcu(bond, slave, iter) {
 		if (found)
 			return slave;
 		if (slave == v)
@@ -53,12 +47,8 @@ static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void bond_info_seq_stop(struct seq_file *seq, void *v)
-	__releases(&bond->lock)
 	__releases(RCU)
 {
-	struct bonding *bond = seq->private;
-
-	read_unlock(&bond->lock);
 	rcu_read_unlock();
 }
 
-- 
cgit v1.2.3


From 246df7b423aa61a8bd0b58404759a9abb4c15081 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@redhat.com>
Date: Tue, 9 Sep 2014 23:17:02 +0200
Subject: bonding: options: remove bond->lock usage

We're safe to remove the bond->lock use from the arp targets because
arp_rcv_probe no longer acquires bond->lock, only rcu_read_lock.
Also setting the primary slave is safe because noone uses the bond->lock
as a syncing mechanism for that anymore.

Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_options.c | 19 +------------------
 1 file changed, 1 insertion(+), 18 deletions(-)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 7c9e176baecc..534c0600484e 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -955,14 +955,7 @@ static int _bond_option_arp_ip_target_add(struct bonding *bond, __be32 target)
 
 static int bond_option_arp_ip_target_add(struct bonding *bond, __be32 target)
 {
-	int ret;
-
-	/* not to race with bond_arp_rcv */
-	write_lock_bh(&bond->lock);
-	ret = _bond_option_arp_ip_target_add(bond, target);
-	write_unlock_bh(&bond->lock);
-
-	return ret;
+	return _bond_option_arp_ip_target_add(bond, target);
 }
 
 static int bond_option_arp_ip_target_rem(struct bonding *bond, __be32 target)
@@ -991,9 +984,6 @@ static int bond_option_arp_ip_target_rem(struct bonding *bond, __be32 target)
 
 	netdev_info(bond->dev, "Removing ARP target %pI4\n", &target);
 
-	/* not to race with bond_arp_rcv */
-	write_lock_bh(&bond->lock);
-
 	bond_for_each_slave(bond, slave, iter) {
 		targets_rx = slave->target_last_arp_rx;
 		for (i = ind; (i < BOND_MAX_ARP_TARGETS-1) && targets[i+1]; i++)
@@ -1004,8 +994,6 @@ static int bond_option_arp_ip_target_rem(struct bonding *bond, __be32 target)
 		targets[i] = targets[i+1];
 	targets[i] = 0;
 
-	write_unlock_bh(&bond->lock);
-
 	return 0;
 }
 
@@ -1013,11 +1001,8 @@ void bond_option_arp_ip_targets_clear(struct bonding *bond)
 {
 	int i;
 
-	/* not to race with bond_arp_rcv */
-	write_lock_bh(&bond->lock);
 	for (i = 0; i < BOND_MAX_ARP_TARGETS; i++)
 		_bond_options_arp_ip_target_set(bond, i, 0, 0);
-	write_unlock_bh(&bond->lock);
 }
 
 static int bond_option_arp_ip_targets_set(struct bonding *bond,
@@ -1081,7 +1066,6 @@ static int bond_option_primary_set(struct bonding *bond,
 	struct slave *slave;
 
 	block_netpoll_tx();
-	read_lock(&bond->lock);
 	write_lock_bh(&bond->curr_slave_lock);
 
 	p = strchr(primary, '\n');
@@ -1120,7 +1104,6 @@ static int bond_option_primary_set(struct bonding *bond,
 
 out:
 	write_unlock_bh(&bond->curr_slave_lock);
-	read_unlock(&bond->lock);
 	unblock_netpoll_tx();
 
 	return 0;
-- 
cgit v1.2.3


From 87163ef9cda7617f8afdb549de191706641003c0 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@redhat.com>
Date: Tue, 9 Sep 2014 23:17:03 +0200
Subject: bonding: remove last users of bond->lock and bond->lock itself

The usage of bond->lock in bond_main.c was completely unnecessary as it
didn't help to sync with anything, most of the spots already had RTNL.
Since there're no more users of bond->lock, remove it.

Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 39 ---------------------------------------
 drivers/net/bonding/bonding.h   |  8 ++------
 2 files changed, 2 insertions(+), 45 deletions(-)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 629037f79213..b43b2df9e5d1 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3105,7 +3105,6 @@ static int bond_open(struct net_device *bond_dev)
 	struct slave *slave;
 
 	/* reset slave->backup and slave->inactive */
-	read_lock(&bond->lock);
 	if (bond_has_slaves(bond)) {
 		read_lock(&bond->curr_slave_lock);
 		bond_for_each_slave(bond, slave, iter) {
@@ -3120,7 +3119,6 @@ static int bond_open(struct net_device *bond_dev)
 		}
 		read_unlock(&bond->curr_slave_lock);
 	}
-	read_unlock(&bond->lock);
 
 	bond_work_init_all(bond);
 
@@ -3175,7 +3173,6 @@ static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
 
 	memset(stats, 0, sizeof(*stats));
 
-	read_lock_bh(&bond->lock);
 	bond_for_each_slave(bond, slave, iter) {
 		const struct rtnl_link_stats64 *sstats =
 			dev_get_stats(slave->dev, &temp);
@@ -3206,7 +3203,6 @@ static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
 		stats->tx_heartbeat_errors += sstats->tx_heartbeat_errors;
 		stats->tx_window_errors += sstats->tx_window_errors;
 	}
-	read_unlock_bh(&bond->lock);
 
 	return stats;
 }
@@ -3246,13 +3242,11 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd
 
 		if (mii->reg_num == 1) {
 			mii->val_out = 0;
-			read_lock(&bond->lock);
 			read_lock(&bond->curr_slave_lock);
 			if (netif_carrier_ok(bond->dev))
 				mii->val_out = BMSR_LSTATUS;
 
 			read_unlock(&bond->curr_slave_lock);
-			read_unlock(&bond->lock);
 		}
 
 		return 0;
@@ -3428,21 +3422,6 @@ static int bond_change_mtu(struct net_device *bond_dev, int new_mtu)
 
 	netdev_dbg(bond_dev, "bond=%p, new_mtu=%d\n", bond, new_mtu);
 
-	/* Can't hold bond->lock with bh disabled here since
-	 * some base drivers panic. On the other hand we can't
-	 * hold bond->lock without bh disabled because we'll
-	 * deadlock. The only solution is to rely on the fact
-	 * that we're under rtnl_lock here, and the slaves
-	 * list won't change. This doesn't solve the problem
-	 * of setting the slave's MTU while it is
-	 * transmitting, but the assumption is that the base
-	 * driver can handle that.
-	 *
-	 * TODO: figure out a way to safely iterate the slaves
-	 * list, but without holding a lock around the actual
-	 * call to the base driver.
-	 */
-
 	bond_for_each_slave(bond, slave, iter) {
 		netdev_dbg(bond_dev, "s %p c_m %p\n",
 			   slave, slave->dev->netdev_ops->ndo_change_mtu);
@@ -3517,21 +3496,6 @@ static int bond_set_mac_address(struct net_device *bond_dev, void *addr)
 	if (!is_valid_ether_addr(sa->sa_data))
 		return -EADDRNOTAVAIL;
 
-	/* Can't hold bond->lock with bh disabled here since
-	 * some base drivers panic. On the other hand we can't
-	 * hold bond->lock without bh disabled because we'll
-	 * deadlock. The only solution is to rely on the fact
-	 * that we're under rtnl_lock here, and the slaves
-	 * list won't change. This doesn't solve the problem
-	 * of setting the slave's hw address while it is
-	 * transmitting, but the assumption is that the base
-	 * driver can handle that.
-	 *
-	 * TODO: figure out a way to safely iterate the slaves
-	 * list, but without holding a lock around the actual
-	 * call to the base driver.
-	 */
-
 	bond_for_each_slave(bond, slave, iter) {
 		netdev_dbg(bond_dev, "slave %p %s\n", slave, slave->dev->name);
 		res = dev_set_mac_address(slave->dev, addr);
@@ -3857,7 +3821,6 @@ static int bond_ethtool_get_settings(struct net_device *bond_dev,
 	 * the true receive or transmit bandwidth (not all modes are symmetric)
 	 * this is an accurate maximum.
 	 */
-	read_lock(&bond->lock);
 	bond_for_each_slave(bond, slave, iter) {
 		if (bond_slave_can_tx(slave)) {
 			if (slave->speed != SPEED_UNKNOWN)
@@ -3868,7 +3831,6 @@ static int bond_ethtool_get_settings(struct net_device *bond_dev,
 		}
 	}
 	ethtool_cmd_speed_set(ecmd, speed ? : SPEED_UNKNOWN);
-	read_unlock(&bond->lock);
 
 	return 0;
 }
@@ -3931,7 +3893,6 @@ void bond_setup(struct net_device *bond_dev)
 	struct bonding *bond = netdev_priv(bond_dev);
 
 	/* initialize rwlocks */
-	rwlock_init(&bond->lock);
 	rwlock_init(&bond->curr_slave_lock);
 	bond->params = bonding_defaults;
 
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index c798561a6f01..78c461abaa09 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -83,7 +83,7 @@
  * @pos:	current slave
  * @iter:	list_head * iterator
  *
- * Caller must hold bond->lock
+ * Caller must hold RTNL
  */
 #define bond_for_each_slave(bond, pos, iter) \
 	netdev_for_each_lower_private((bond)->dev, pos, iter)
@@ -185,11 +185,8 @@ struct slave {
 /*
  * Here are the locking policies for the two bonding locks:
  *
- * 1) Get bond->lock when reading/writing slave list.
+ * 1) Get rcu_read_lock when reading or RTNL when writing slave list.
  * 2) Get bond->curr_slave_lock when reading/writing bond->curr_active_slave.
- *    (It is unnecessary when the write-lock is put with bond->lock.)
- * 3) When we lock with bond->curr_slave_lock, we must lock with bond->lock
- *    beforehand.
  */
 struct bonding {
 	struct   net_device *dev; /* first - useful for panic debug */
@@ -200,7 +197,6 @@ struct bonding {
 	s32      slave_cnt; /* never change this value outside the attach/detach wrappers */
 	int     (*recv_probe)(const struct sk_buff *, struct bonding *,
 			      struct slave *);
-	rwlock_t lock;
 	rwlock_t curr_slave_lock;
 	u8	 send_peer_notif;
 	u8       igmp_retrans;
-- 
cgit v1.2.3


From 86e749866d7c6b0ee1f9377cf7142f2690596a05 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@redhat.com>
Date: Thu, 11 Sep 2014 22:49:22 +0200
Subject: bonding: 3ad: clean up curr_slave_lock usage

Remove the read_lock in bond_3ad_lacpdu_recv() since when the slave is
being released its rx_handler is removed before 3ad unbind, so even if
packets arrive, they won't see the slave in an inconsistent state.

Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_3ad.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 5d27a6207384..dfd3a7835d17 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -2476,20 +2476,16 @@ err_free:
 int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
 			 struct slave *slave)
 {
-	int ret = RX_HANDLER_ANOTHER;
 	struct lacpdu *lacpdu, _lacpdu;
 
 	if (skb->protocol != PKT_TYPE_LACPDU)
-		return ret;
+		return RX_HANDLER_ANOTHER;
 
 	lacpdu = skb_header_pointer(skb, 0, sizeof(_lacpdu), &_lacpdu);
 	if (!lacpdu)
-		return ret;
+		return RX_HANDLER_ANOTHER;
 
-	read_lock(&bond->curr_slave_lock);
-	ret = bond_3ad_rx_indication(lacpdu, slave, skb->len);
-	read_unlock(&bond->curr_slave_lock);
-	return ret;
+	return bond_3ad_rx_indication(lacpdu, slave, skb->len);
 }
 
 /**
-- 
cgit v1.2.3


From 62c5f5185397f4bd8e5defe6fcb86420deeb2b38 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@redhat.com>
Date: Thu, 11 Sep 2014 22:49:23 +0200
Subject: bonding: alb: remove curr_slave_lock

First in rlb_teach_disabled_mac_on_primary() it's okay to remove
curr_slave_lock as all callers except bond_alb_monitor() already hold
RTNL, and in case bond_alb_monitor() is executing we can at most have a
period with bad throughput (very unlikely though).
In bond_alb_monitor() it's okay to remove the read_lock as the slave
list is walked with RCU and the worst that could happen is another
transmitter at the same time and thus for a period which currently is 10
seconds (bond_alb.h: BOND_ALB_LP_TICKS).
And bond_alb_handle_active_change() is okay because it's always called
with RTNL. Removed the ASSERT_RTNL() because it'll be inserted in the
parent function in a following patch.

Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_alb.c | 39 +++------------------------------------
 1 file changed, 3 insertions(+), 36 deletions(-)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 028496205f39..cf4ede8594ff 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -447,7 +447,7 @@ static struct slave *__rlb_next_rx_slave(struct bonding *bond)
 /* teach the switch the mac of a disabled slave
  * on the primary for fault tolerance
  *
- * Caller must hold bond->curr_slave_lock for write or bond lock for write
+ * Caller must hold RTNL
  */
 static void rlb_teach_disabled_mac_on_primary(struct bonding *bond, u8 addr[])
 {
@@ -512,12 +512,8 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
 
 	_unlock_rx_hashtbl_bh(bond);
 
-	write_lock_bh(&bond->curr_slave_lock);
-
 	if (slave != bond_deref_active_protected(bond))
 		rlb_teach_disabled_mac_on_primary(bond, slave->dev->dev_addr);
-
-	write_unlock_bh(&bond->curr_slave_lock);
 }
 
 static void rlb_update_client(struct rlb_client_info *client_info)
@@ -1595,13 +1591,6 @@ void bond_alb_monitor(struct work_struct *work)
 	if (bond_info->lp_counter >= BOND_ALB_LP_TICKS(bond)) {
 		bool strict_match;
 
-		/* change of curr_active_slave involves swapping of mac addresses.
-		 * in order to avoid this swapping from happening while
-		 * sending the learning packets, the curr_slave_lock must be held for
-		 * read.
-		 */
-		read_lock(&bond->curr_slave_lock);
-
 		bond_for_each_slave_rcu(bond, slave, iter) {
 			/* If updating current_active, use all currently
 			 * user mac addreses (!strict_match).  Otherwise, only
@@ -1613,17 +1602,11 @@ void bond_alb_monitor(struct work_struct *work)
 			alb_send_learning_packets(slave, slave->dev->dev_addr,
 						  strict_match);
 		}
-
-		read_unlock(&bond->curr_slave_lock);
-
 		bond_info->lp_counter = 0;
 	}
 
 	/* rebalance tx traffic */
 	if (bond_info->tx_rebalance_counter >= BOND_TLB_REBALANCE_TICKS) {
-
-		read_lock(&bond->curr_slave_lock);
-
 		bond_for_each_slave_rcu(bond, slave, iter) {
 			tlb_clear_slave(bond, slave, 1);
 			if (slave == rcu_access_pointer(bond->curr_active_slave)) {
@@ -1633,9 +1616,6 @@ void bond_alb_monitor(struct work_struct *work)
 				bond_info->unbalanced_load = 0;
 			}
 		}
-
-		read_unlock(&bond->curr_slave_lock);
-
 		bond_info->tx_rebalance_counter = 0;
 	}
 
@@ -1775,21 +1755,14 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char
  * Set the bond->curr_active_slave to @new_slave and handle
  * mac address swapping and promiscuity changes as needed.
  *
- * If new_slave is NULL, caller must hold curr_slave_lock for write
- *
- * If new_slave is not NULL, caller must hold RTNL, curr_slave_lock
- * for write.  Processing here may sleep, so no other locks may be held.
+ * Caller must hold RTNL
  */
 void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave)
-	__releases(&bond->curr_slave_lock)
-	__acquires(&bond->curr_slave_lock)
 {
 	struct slave *swap_slave;
 	struct slave *curr_active;
 
-	curr_active = rcu_dereference_protected(bond->curr_active_slave,
-						!new_slave ||
-						lockdep_is_held(&bond->curr_slave_lock));
+	curr_active = rtnl_dereference(bond->curr_active_slave);
 	if (curr_active == new_slave)
 		return;
 
@@ -1820,10 +1793,6 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave
 		tlb_clear_slave(bond, swap_slave, 1);
 	tlb_clear_slave(bond, new_slave, 1);
 
-	write_unlock_bh(&bond->curr_slave_lock);
-
-	ASSERT_RTNL();
-
 	/* in TLB mode, the slave might flip down/up with the old dev_addr,
 	 * and thus filter bond->dev_addr's packets, so force bond's mac
 	 */
@@ -1852,8 +1821,6 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave
 		alb_send_learning_packets(new_slave, bond->dev->dev_addr,
 					  false);
 	}
-
-	write_lock_bh(&bond->curr_slave_lock);
 }
 
 /* Called with RTNL */
-- 
cgit v1.2.3


From 1c72cfdc96e63bf975cab514c4ca4d8a661ba0e6 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@redhat.com>
Date: Thu, 11 Sep 2014 22:49:24 +0200
Subject: bonding: clean curr_slave_lock use

Mostly all users of curr_slave_lock already have RTNL as we've discussed
previously so there's no point in using it, the one case where the lock
must stay is the 3ad code, in fact it's the only one.
It's okay to remove it from bond_do_fail_over_mac() as it's called with
RTNL and drops the curr_slave_lock anyway.
bond_change_active_slave() is one of the main places where
curr_slave_lock was used, it's okay to remove it as all callers use RTNL
these days before calling it, that's why we move the ASSERT_RTNL() in
the beginning to catch any potential offenders to this rule.
The RTNL argument actually applies to all of the places where
curr_slave_lock has been removed from in this patch.
Also remove the unnecessary bond_deref_active_protected() macro and use
rtnl_dereference() instead.

Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_alb.c     |  4 +--
 drivers/net/bonding/bond_main.c    | 62 ++++++--------------------------------
 drivers/net/bonding/bond_options.c | 10 +-----
 drivers/net/bonding/bonding.h      |  8 +----
 4 files changed, 14 insertions(+), 70 deletions(-)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index cf4ede8594ff..b755659ddfdc 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -451,7 +451,7 @@ static struct slave *__rlb_next_rx_slave(struct bonding *bond)
  */
 static void rlb_teach_disabled_mac_on_primary(struct bonding *bond, u8 addr[])
 {
-	struct slave *curr_active = bond_deref_active_protected(bond);
+	struct slave *curr_active = rtnl_dereference(bond->curr_active_slave);
 
 	if (!curr_active)
 		return;
@@ -512,7 +512,7 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
 
 	_unlock_rx_hashtbl_bh(bond);
 
-	if (slave != bond_deref_active_protected(bond))
+	if (slave != rtnl_dereference(bond->curr_active_slave))
 		rlb_teach_disabled_mac_on_primary(bond, slave->dev->dev_addr);
 }
 
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index b43b2df9e5d1..3b06685260b8 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -637,13 +637,11 @@ static void bond_set_dev_addr(struct net_device *bond_dev,
  *
  * Perform special MAC address swapping for fail_over_mac settings
  *
- * Called with RTNL, curr_slave_lock for write_bh.
+ * Called with RTNL
  */
 static void bond_do_fail_over_mac(struct bonding *bond,
 				  struct slave *new_active,
 				  struct slave *old_active)
-	__releases(&bond->curr_slave_lock)
-	__acquires(&bond->curr_slave_lock)
 {
 	u8 tmp_mac[ETH_ALEN];
 	struct sockaddr saddr;
@@ -651,11 +649,8 @@ static void bond_do_fail_over_mac(struct bonding *bond,
 
 	switch (bond->params.fail_over_mac) {
 	case BOND_FOM_ACTIVE:
-		if (new_active) {
-			write_unlock_bh(&bond->curr_slave_lock);
+		if (new_active)
 			bond_set_dev_addr(bond->dev, new_active->dev);
-			write_lock_bh(&bond->curr_slave_lock);
-		}
 		break;
 	case BOND_FOM_FOLLOW:
 		/*
@@ -666,8 +661,6 @@ static void bond_do_fail_over_mac(struct bonding *bond,
 		if (!new_active)
 			return;
 
-		write_unlock_bh(&bond->curr_slave_lock);
-
 		if (old_active) {
 			ether_addr_copy(tmp_mac, new_active->dev->dev_addr);
 			ether_addr_copy(saddr.sa_data,
@@ -696,7 +689,6 @@ static void bond_do_fail_over_mac(struct bonding *bond,
 			netdev_err(bond->dev, "Error %d setting MAC of slave %s\n",
 				   -rv, new_active->dev->name);
 out:
-		write_lock_bh(&bond->curr_slave_lock);
 		break;
 	default:
 		netdev_err(bond->dev, "bond_do_fail_over_mac impossible: bad policy %d\n",
@@ -709,7 +701,7 @@ out:
 static bool bond_should_change_active(struct bonding *bond)
 {
 	struct slave *prim = rtnl_dereference(bond->primary_slave);
-	struct slave *curr = bond_deref_active_protected(bond);
+	struct slave *curr = rtnl_dereference(bond->curr_active_slave);
 
 	if (!prim || !curr || curr->link != BOND_LINK_UP)
 		return true;
@@ -785,15 +777,15 @@ static bool bond_should_notify_peers(struct bonding *bond)
  * because it is apparently the best available slave we have, even though its
  * updelay hasn't timed out yet.
  *
- * If new_active is not NULL, caller must hold curr_slave_lock for write_bh.
+ * Caller must hold RTNL.
  */
 void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
 {
 	struct slave *old_active;
 
-	old_active = rcu_dereference_protected(bond->curr_active_slave,
-					       !new_active ||
-					       lockdep_is_held(&bond->curr_slave_lock));
+	ASSERT_RTNL();
+
+	old_active = rtnl_dereference(bond->curr_active_slave);
 
 	if (old_active == new_active)
 		return;
@@ -861,14 +853,10 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
 					bond_should_notify_peers(bond);
 			}
 
-			write_unlock_bh(&bond->curr_slave_lock);
-
 			call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev);
 			if (should_notify_peers)
 				call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
 							 bond->dev);
-
-			write_lock_bh(&bond->curr_slave_lock);
 		}
 	}
 
@@ -893,7 +881,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
  * - The primary_slave has got its link back.
  * - A slave has got its link back and there's no old curr_active_slave.
  *
- * Caller must hold curr_slave_lock for write_bh.
+ * Caller must hold RTNL.
  */
 void bond_select_active_slave(struct bonding *bond)
 {
@@ -901,7 +889,7 @@ void bond_select_active_slave(struct bonding *bond)
 	int rv;
 
 	best_slave = bond_find_best_slave(bond);
-	if (best_slave != bond_deref_active_protected(bond)) {
+	if (best_slave != rtnl_dereference(bond->curr_active_slave)) {
 		bond_change_active_slave(bond, best_slave);
 		rv = bond_set_carrier(bond);
 		if (!rv)
@@ -1571,9 +1559,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 
 	if (bond_uses_primary(bond)) {
 		block_netpoll_tx();
-		write_lock_bh(&bond->curr_slave_lock);
 		bond_select_active_slave(bond);
-		write_unlock_bh(&bond->curr_slave_lock);
 		unblock_netpoll_tx();
 	}
 
@@ -1601,10 +1587,8 @@ err_detach:
 		RCU_INIT_POINTER(bond->primary_slave, NULL);
 	if (rcu_access_pointer(bond->curr_active_slave) == new_slave) {
 		block_netpoll_tx();
-		write_lock_bh(&bond->curr_slave_lock);
 		bond_change_active_slave(bond, NULL);
 		bond_select_active_slave(bond);
-		write_unlock_bh(&bond->curr_slave_lock);
 		unblock_netpoll_tx();
 	}
 	/* either primary_slave or curr_active_slave might've changed */
@@ -1720,11 +1704,8 @@ static int __bond_release_one(struct net_device *bond_dev,
 	if (rtnl_dereference(bond->primary_slave) == slave)
 		RCU_INIT_POINTER(bond->primary_slave, NULL);
 
-	if (oldcurrent == slave) {
-		write_lock_bh(&bond->curr_slave_lock);
+	if (oldcurrent == slave)
 		bond_change_active_slave(bond, NULL);
-		write_unlock_bh(&bond->curr_slave_lock);
-	}
 
 	if (bond_is_lb(bond)) {
 		/* Must be called only after the slave has been
@@ -1743,11 +1724,7 @@ static int __bond_release_one(struct net_device *bond_dev,
 		 * is no concern that another slave add/remove event
 		 * will interfere.
 		 */
-		write_lock_bh(&bond->curr_slave_lock);
-
 		bond_select_active_slave(bond);
-
-		write_unlock_bh(&bond->curr_slave_lock);
 	}
 
 	if (!bond_has_slaves(bond)) {
@@ -2058,9 +2035,7 @@ static void bond_miimon_commit(struct bonding *bond)
 do_failover:
 		ASSERT_RTNL();
 		block_netpoll_tx();
-		write_lock_bh(&bond->curr_slave_lock);
 		bond_select_active_slave(bond);
-		write_unlock_bh(&bond->curr_slave_lock);
 		unblock_netpoll_tx();
 	}
 
@@ -2506,15 +2481,8 @@ static void bond_loadbalance_arp_mon(struct work_struct *work)
 		if (slave_state_changed) {
 			bond_slave_state_change(bond);
 		} else if (do_failover) {
-			/* the bond_select_active_slave must hold RTNL
-			 * and curr_slave_lock for write.
-			 */
 			block_netpoll_tx();
-			write_lock_bh(&bond->curr_slave_lock);
-
 			bond_select_active_slave(bond);
-
-			write_unlock_bh(&bond->curr_slave_lock);
 			unblock_netpoll_tx();
 		}
 		rtnl_unlock();
@@ -2670,9 +2638,7 @@ static void bond_ab_arp_commit(struct bonding *bond)
 do_failover:
 		ASSERT_RTNL();
 		block_netpoll_tx();
-		write_lock_bh(&bond->curr_slave_lock);
 		bond_select_active_slave(bond);
-		write_unlock_bh(&bond->curr_slave_lock);
 		unblock_netpoll_tx();
 	}
 
@@ -2939,9 +2905,7 @@ static int bond_slave_netdev_event(unsigned long event,
 			    primary ? slave_dev->name : "none");
 
 		block_netpoll_tx();
-		write_lock_bh(&bond->curr_slave_lock);
 		bond_select_active_slave(bond);
-		write_unlock_bh(&bond->curr_slave_lock);
 		unblock_netpoll_tx();
 		break;
 	case NETDEV_FEAT_CHANGE:
@@ -3106,7 +3070,6 @@ static int bond_open(struct net_device *bond_dev)
 
 	/* reset slave->backup and slave->inactive */
 	if (bond_has_slaves(bond)) {
-		read_lock(&bond->curr_slave_lock);
 		bond_for_each_slave(bond, slave, iter) {
 			if (bond_uses_primary(bond) &&
 			    slave != rcu_access_pointer(bond->curr_active_slave)) {
@@ -3117,7 +3080,6 @@ static int bond_open(struct net_device *bond_dev)
 							    BOND_SLAVE_NOTIFY_NOW);
 			}
 		}
-		read_unlock(&bond->curr_slave_lock);
 	}
 
 	bond_work_init_all(bond);
@@ -3239,14 +3201,10 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd
 		if (!mii)
 			return -EINVAL;
 
-
 		if (mii->reg_num == 1) {
 			mii->val_out = 0;
-			read_lock(&bond->curr_slave_lock);
 			if (netif_carrier_ok(bond->dev))
 				mii->val_out = BMSR_LSTATUS;
-
-			read_unlock(&bond->curr_slave_lock);
 		}
 
 		return 0;
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 534c0600484e..b62697f4a3de 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -734,15 +734,13 @@ static int bond_option_active_slave_set(struct bonding *bond,
 	}
 
 	block_netpoll_tx();
-	write_lock_bh(&bond->curr_slave_lock);
-
 	/* check to see if we are clearing active */
 	if (!slave_dev) {
 		netdev_info(bond->dev, "Clearing current active slave\n");
 		RCU_INIT_POINTER(bond->curr_active_slave, NULL);
 		bond_select_active_slave(bond);
 	} else {
-		struct slave *old_active = bond_deref_active_protected(bond);
+		struct slave *old_active = rtnl_dereference(bond->curr_active_slave);
 		struct slave *new_active = bond_slave_get_rtnl(slave_dev);
 
 		BUG_ON(!new_active);
@@ -765,8 +763,6 @@ static int bond_option_active_slave_set(struct bonding *bond,
 			}
 		}
 	}
-
-	write_unlock_bh(&bond->curr_slave_lock);
 	unblock_netpoll_tx();
 
 	return ret;
@@ -1066,7 +1062,6 @@ static int bond_option_primary_set(struct bonding *bond,
 	struct slave *slave;
 
 	block_netpoll_tx();
-	write_lock_bh(&bond->curr_slave_lock);
 
 	p = strchr(primary, '\n');
 	if (p)
@@ -1103,7 +1098,6 @@ static int bond_option_primary_set(struct bonding *bond,
 		    primary, bond->dev->name);
 
 out:
-	write_unlock_bh(&bond->curr_slave_lock);
 	unblock_netpoll_tx();
 
 	return 0;
@@ -1117,9 +1111,7 @@ static int bond_option_primary_reselect_set(struct bonding *bond,
 	bond->params.primary_reselect = newval->value;
 
 	block_netpoll_tx();
-	write_lock_bh(&bond->curr_slave_lock);
 	bond_select_active_slave(bond);
-	write_unlock_bh(&bond->curr_slave_lock);
 	unblock_netpoll_tx();
 
 	return 0;
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 78c461abaa09..02afdeb08765 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -184,9 +184,7 @@ struct slave {
 
 /*
  * Here are the locking policies for the two bonding locks:
- *
- * 1) Get rcu_read_lock when reading or RTNL when writing slave list.
- * 2) Get bond->curr_slave_lock when reading/writing bond->curr_active_slave.
+ * Get rcu_read_lock when reading or RTNL when writing slave list.
  */
 struct bonding {
 	struct   net_device *dev; /* first - useful for panic debug */
@@ -227,10 +225,6 @@ struct bonding {
 #define bond_slave_get_rtnl(dev) \
 	((struct slave *) rtnl_dereference(dev->rx_handler_data))
 
-#define bond_deref_active_protected(bond)				   \
-	rcu_dereference_protected(bond->curr_active_slave,		   \
-				  lockdep_is_held(&bond->curr_slave_lock))
-
 struct bond_vlan_tag {
 	__be16		vlan_proto;
 	unsigned short	vlan_id;
-- 
cgit v1.2.3


From b743562819bd97cc7c282e870896bae8016b64b5 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@redhat.com>
Date: Thu, 11 Sep 2014 22:49:25 +0200
Subject: bonding: convert curr_slave_lock to a spinlock and rename it

curr_slave_lock is now a misleading name, a much better name is
mode_lock as it'll be used for each mode's purposes and it's no longer
necessary to use a rwlock, a simple spinlock is enough.

Suggested-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_3ad.c  | 4 ++--
 drivers/net/bonding/bond_main.c | 7 +++----
 drivers/net/bonding/bonding.h   | 2 +-
 3 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index dfd3a7835d17..1824d1df4d09 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -2057,7 +2057,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
 	struct port *port;
 	bool should_notify_rtnl = BOND_SLAVE_NOTIFY_LATER;
 
-	read_lock(&bond->curr_slave_lock);
+	spin_lock_bh(&bond->mode_lock);
 	rcu_read_lock();
 
 	/* check if there are any slaves */
@@ -2120,7 +2120,7 @@ re_arm:
 		}
 	}
 	rcu_read_unlock();
-	read_unlock(&bond->curr_slave_lock);
+	spin_unlock_bh(&bond->mode_lock);
 
 	if (should_notify_rtnl && rtnl_trylock()) {
 		bond_slave_state_notify(bond);
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 3b06685260b8..99d21c2fd44f 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1679,9 +1679,9 @@ static int __bond_release_one(struct net_device *bond_dev,
 		/* Sync against bond_3ad_rx_indication and
 		 * bond_3ad_state_machine_handler
 		 */
-		write_lock_bh(&bond->curr_slave_lock);
+		spin_lock_bh(&bond->mode_lock);
 		bond_3ad_unbind_slave(slave);
-		write_unlock_bh(&bond->curr_slave_lock);
+		spin_unlock_bh(&bond->mode_lock);
 	}
 
 	netdev_info(bond_dev, "Releasing %s interface %s\n",
@@ -3850,8 +3850,7 @@ void bond_setup(struct net_device *bond_dev)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
 
-	/* initialize rwlocks */
-	rwlock_init(&bond->curr_slave_lock);
+	spin_lock_init(&bond->mode_lock);
 	bond->params = bonding_defaults;
 
 	/* Initialize pointers */
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 02afdeb08765..0cda34b827f8 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -195,7 +195,7 @@ struct bonding {
 	s32      slave_cnt; /* never change this value outside the attach/detach wrappers */
 	int     (*recv_probe)(const struct sk_buff *, struct bonding *,
 			      struct slave *);
-	rwlock_t curr_slave_lock;
+	spinlock_t mode_lock;
 	u8	 send_peer_notif;
 	u8       igmp_retrans;
 #ifdef CONFIG_PROC_FS
-- 
cgit v1.2.3


From 4bab16d7c97498e91564231b922d49f52efaf7d4 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@redhat.com>
Date: Thu, 11 Sep 2014 22:49:26 +0200
Subject: bonding: alb: convert to bond->mode_lock

The ALB/TLB specific spinlocks are no longer necessary as we now have
bond->mode_lock for this purpose, so convert them and remove them from
struct alb_bond_info.
Also remove the unneeded lock/unlock functions and use spin_lock/unlock
directly.

Suggested-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_alb.c     | 108 ++++++++++++-------------------------
 drivers/net/bonding/bond_alb.h     |   2 -
 drivers/net/bonding/bond_debugfs.c |   4 +-
 drivers/net/bonding/bond_main.c    |  10 ----
 4 files changed, 35 insertions(+), 89 deletions(-)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index b755659ddfdc..876b97fb55e9 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -100,27 +100,6 @@ static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
 
 /*********************** tlb specific functions ***************************/
 
-static inline void _lock_tx_hashtbl_bh(struct bonding *bond)
-{
-	spin_lock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
-}
-
-static inline void _unlock_tx_hashtbl_bh(struct bonding *bond)
-{
-	spin_unlock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
-}
-
-static inline void _lock_tx_hashtbl(struct bonding *bond)
-{
-	spin_lock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
-}
-
-static inline void _unlock_tx_hashtbl(struct bonding *bond)
-{
-	spin_unlock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
-}
-
-/* Caller must hold tx_hashtbl lock */
 static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load)
 {
 	if (save_load) {
@@ -167,9 +146,9 @@ static void __tlb_clear_slave(struct bonding *bond, struct slave *slave,
 static void tlb_clear_slave(struct bonding *bond, struct slave *slave,
 			 int save_load)
 {
-	_lock_tx_hashtbl_bh(bond);
+	spin_lock_bh(&bond->mode_lock);
 	__tlb_clear_slave(bond, slave, save_load);
-	_unlock_tx_hashtbl_bh(bond);
+	spin_unlock_bh(&bond->mode_lock);
 }
 
 /* Must be called before starting the monitor timer */
@@ -184,14 +163,14 @@ static int tlb_initialize(struct bonding *bond)
 	if (!new_hashtbl)
 		return -1;
 
-	_lock_tx_hashtbl_bh(bond);
+	spin_lock_bh(&bond->mode_lock);
 
 	bond_info->tx_hashtbl = new_hashtbl;
 
 	for (i = 0; i < TLB_HASH_TABLE_SIZE; i++)
 		tlb_init_table_entry(&bond_info->tx_hashtbl[i], 0);
 
-	_unlock_tx_hashtbl_bh(bond);
+	spin_unlock_bh(&bond->mode_lock);
 
 	return 0;
 }
@@ -202,12 +181,12 @@ static void tlb_deinitialize(struct bonding *bond)
 	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 	struct tlb_up_slave *arr;
 
-	_lock_tx_hashtbl_bh(bond);
+	spin_lock_bh(&bond->mode_lock);
 
 	kfree(bond_info->tx_hashtbl);
 	bond_info->tx_hashtbl = NULL;
 
-	_unlock_tx_hashtbl_bh(bond);
+	spin_unlock_bh(&bond->mode_lock);
 
 	arr = rtnl_dereference(bond_info->slave_arr);
 	if (arr)
@@ -281,7 +260,6 @@ static struct slave *__tlb_choose_channel(struct bonding *bond, u32 hash_index,
 	return assigned_slave;
 }
 
-/* Caller must hold bond lock for read */
 static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index,
 					u32 skb_len)
 {
@@ -291,32 +269,13 @@ static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index,
 	 * tlb_choose_channel() is only called by bond_alb_xmit()
 	 * which already has softirq disabled.
 	 */
-	_lock_tx_hashtbl(bond);
+	spin_lock(&bond->mode_lock);
 	tx_slave = __tlb_choose_channel(bond, hash_index, skb_len);
-	_unlock_tx_hashtbl(bond);
+	spin_unlock(&bond->mode_lock);
 	return tx_slave;
 }
 
 /*********************** rlb specific functions ***************************/
-static inline void _lock_rx_hashtbl_bh(struct bonding *bond)
-{
-	spin_lock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
-}
-
-static inline void _unlock_rx_hashtbl_bh(struct bonding *bond)
-{
-	spin_unlock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
-}
-
-static inline void _lock_rx_hashtbl(struct bonding *bond)
-{
-	spin_lock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
-}
-
-static inline void _unlock_rx_hashtbl(struct bonding *bond)
-{
-	spin_unlock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
-}
 
 /* when an ARP REPLY is received from a client update its info
  * in the rx_hashtbl
@@ -327,7 +286,7 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
 	struct rlb_client_info *client_info;
 	u32 hash_index;
 
-	_lock_rx_hashtbl_bh(bond);
+	spin_lock_bh(&bond->mode_lock);
 
 	hash_index = _simple_hash((u8 *)&(arp->ip_src), sizeof(arp->ip_src));
 	client_info = &(bond_info->rx_hashtbl[hash_index]);
@@ -342,7 +301,7 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
 		bond_info->rx_ntt = 1;
 	}
 
-	_unlock_rx_hashtbl_bh(bond);
+	spin_unlock_bh(&bond->mode_lock);
 }
 
 static int rlb_arp_recv(const struct sk_buff *skb, struct bonding *bond,
@@ -479,7 +438,7 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
 	u32 index, next_index;
 
 	/* clear slave from rx_hashtbl */
-	_lock_rx_hashtbl_bh(bond);
+	spin_lock_bh(&bond->mode_lock);
 
 	rx_hash_table = bond_info->rx_hashtbl;
 	index = bond_info->rx_hashtbl_used_head;
@@ -510,7 +469,7 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
 		}
 	}
 
-	_unlock_rx_hashtbl_bh(bond);
+	spin_unlock_bh(&bond->mode_lock);
 
 	if (slave != rtnl_dereference(bond->curr_active_slave))
 		rlb_teach_disabled_mac_on_primary(bond, slave->dev->dev_addr);
@@ -561,7 +520,7 @@ static void rlb_update_rx_clients(struct bonding *bond)
 	struct rlb_client_info *client_info;
 	u32 hash_index;
 
-	_lock_rx_hashtbl_bh(bond);
+	spin_lock_bh(&bond->mode_lock);
 
 	hash_index = bond_info->rx_hashtbl_used_head;
 	for (; hash_index != RLB_NULL_INDEX;
@@ -579,7 +538,7 @@ static void rlb_update_rx_clients(struct bonding *bond)
 	 */
 	bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY;
 
-	_unlock_rx_hashtbl_bh(bond);
+	spin_unlock_bh(&bond->mode_lock);
 }
 
 /* The slave was assigned a new mac address - update the clients */
@@ -590,7 +549,7 @@ static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *sla
 	int ntt = 0;
 	u32 hash_index;
 
-	_lock_rx_hashtbl_bh(bond);
+	spin_lock_bh(&bond->mode_lock);
 
 	hash_index = bond_info->rx_hashtbl_used_head;
 	for (; hash_index != RLB_NULL_INDEX;
@@ -611,7 +570,7 @@ static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *sla
 		bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY;
 	}
 
-	_unlock_rx_hashtbl_bh(bond);
+	spin_unlock_bh(&bond->mode_lock);
 }
 
 /* mark all clients using src_ip to be updated */
@@ -621,7 +580,7 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
 	struct rlb_client_info *client_info;
 	u32 hash_index;
 
-	_lock_rx_hashtbl(bond);
+	spin_lock(&bond->mode_lock);
 
 	hash_index = bond_info->rx_hashtbl_used_head;
 	for (; hash_index != RLB_NULL_INDEX;
@@ -645,10 +604,9 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
 		}
 	}
 
-	_unlock_rx_hashtbl(bond);
+	spin_unlock(&bond->mode_lock);
 }
 
-/* Caller must hold both bond and ptr locks for read */
 static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bond)
 {
 	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
@@ -657,7 +615,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
 	struct rlb_client_info *client_info;
 	u32 hash_index = 0;
 
-	_lock_rx_hashtbl(bond);
+	spin_lock(&bond->mode_lock);
 
 	curr_active_slave = rcu_dereference(bond->curr_active_slave);
 
@@ -676,7 +634,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
 
 			assigned_slave = client_info->slave;
 			if (assigned_slave) {
-				_unlock_rx_hashtbl(bond);
+				spin_unlock(&bond->mode_lock);
 				return assigned_slave;
 			}
 		} else {
@@ -738,7 +696,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
 		}
 	}
 
-	_unlock_rx_hashtbl(bond);
+	spin_unlock(&bond->mode_lock);
 
 	return assigned_slave;
 }
@@ -800,7 +758,7 @@ static void rlb_rebalance(struct bonding *bond)
 	int ntt;
 	u32 hash_index;
 
-	_lock_rx_hashtbl_bh(bond);
+	spin_lock_bh(&bond->mode_lock);
 
 	ntt = 0;
 	hash_index = bond_info->rx_hashtbl_used_head;
@@ -818,7 +776,7 @@ static void rlb_rebalance(struct bonding *bond)
 	/* update the team's flag only after the whole iteration */
 	if (ntt)
 		bond_info->rx_ntt = 1;
-	_unlock_rx_hashtbl_bh(bond);
+	spin_unlock_bh(&bond->mode_lock);
 }
 
 /* Caller must hold rx_hashtbl lock */
@@ -917,7 +875,7 @@ static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp)
 	u32 ip_src_hash = _simple_hash((u8 *)&(arp->ip_src), sizeof(arp->ip_src));
 	u32 index;
 
-	_lock_rx_hashtbl_bh(bond);
+	spin_lock_bh(&bond->mode_lock);
 
 	index = bond_info->rx_hashtbl[ip_src_hash].src_first;
 	while (index != RLB_NULL_INDEX) {
@@ -928,7 +886,7 @@ static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp)
 				rlb_delete_table_entry(bond, index);
 		index = next_index;
 	}
-	_unlock_rx_hashtbl_bh(bond);
+	spin_unlock_bh(&bond->mode_lock);
 }
 
 static int rlb_initialize(struct bonding *bond)
@@ -942,7 +900,7 @@ static int rlb_initialize(struct bonding *bond)
 	if (!new_hashtbl)
 		return -1;
 
-	_lock_rx_hashtbl_bh(bond);
+	spin_lock_bh(&bond->mode_lock);
 
 	bond_info->rx_hashtbl = new_hashtbl;
 
@@ -951,7 +909,7 @@ static int rlb_initialize(struct bonding *bond)
 	for (i = 0; i < RLB_HASH_TABLE_SIZE; i++)
 		rlb_init_table_entry(bond_info->rx_hashtbl + i);
 
-	_unlock_rx_hashtbl_bh(bond);
+	spin_unlock_bh(&bond->mode_lock);
 
 	/* register to receive ARPs */
 	bond->recv_probe = rlb_arp_recv;
@@ -963,13 +921,13 @@ static void rlb_deinitialize(struct bonding *bond)
 {
 	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 
-	_lock_rx_hashtbl_bh(bond);
+	spin_lock_bh(&bond->mode_lock);
 
 	kfree(bond_info->rx_hashtbl);
 	bond_info->rx_hashtbl = NULL;
 	bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX;
 
-	_unlock_rx_hashtbl_bh(bond);
+	spin_unlock_bh(&bond->mode_lock);
 }
 
 static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
@@ -977,7 +935,7 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
 	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 	u32 curr_index;
 
-	_lock_rx_hashtbl_bh(bond);
+	spin_lock_bh(&bond->mode_lock);
 
 	curr_index = bond_info->rx_hashtbl_used_head;
 	while (curr_index != RLB_NULL_INDEX) {
@@ -990,7 +948,7 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
 		curr_index = next_index;
 	}
 
-	_unlock_rx_hashtbl_bh(bond);
+	spin_unlock_bh(&bond->mode_lock);
 }
 
 /*********************** tlb/rlb shared functions *********************/
@@ -1394,9 +1352,9 @@ static int bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond,
 	}
 
 	if (tx_slave && bond->params.tlb_dynamic_lb) {
-		_lock_tx_hashtbl(bond);
+		spin_lock(&bond->mode_lock);
 		__tlb_clear_slave(bond, tx_slave, 0);
-		_unlock_tx_hashtbl(bond);
+		spin_unlock(&bond->mode_lock);
 	}
 
 	/* no suitable interface, frame not sent */
diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h
index aaeac61d03cf..3c6a7ff974d7 100644
--- a/drivers/net/bonding/bond_alb.h
+++ b/drivers/net/bonding/bond_alb.h
@@ -147,7 +147,6 @@ struct tlb_up_slave {
 
 struct alb_bond_info {
 	struct tlb_client_info	*tx_hashtbl; /* Dynamically allocated */
-	spinlock_t		tx_hashtbl_lock;
 	u32			unbalanced_load;
 	int			tx_rebalance_counter;
 	int			lp_counter;
@@ -156,7 +155,6 @@ struct alb_bond_info {
 	/* -------- rlb parameters -------- */
 	int rlb_enabled;
 	struct rlb_client_info	*rx_hashtbl;	/* Receive hash table */
-	spinlock_t		rx_hashtbl_lock;
 	u32			rx_hashtbl_used_head;
 	u8			rx_ntt;	/* flag - need to transmit
 					 * to all rx clients
diff --git a/drivers/net/bonding/bond_debugfs.c b/drivers/net/bonding/bond_debugfs.c
index 280971b227ea..652f6c5d1bf7 100644
--- a/drivers/net/bonding/bond_debugfs.c
+++ b/drivers/net/bonding/bond_debugfs.c
@@ -29,7 +29,7 @@ static int bond_debug_rlb_hash_show(struct seq_file *m, void *v)
 	seq_printf(m, "SourceIP        DestinationIP   "
 			"Destination MAC   DEV\n");
 
-	spin_lock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
+	spin_lock_bh(&bond->mode_lock);
 
 	hash_index = bond_info->rx_hashtbl_used_head;
 	for (; hash_index != RLB_NULL_INDEX;
@@ -42,7 +42,7 @@ static int bond_debug_rlb_hash_show(struct seq_file *m, void *v)
 			client_info->slave->dev->name);
 	}
 
-	spin_unlock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
+	spin_unlock_bh(&bond->mode_lock);
 
 	return 0;
 }
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 99d21c2fd44f..e06251417a7d 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4297,19 +4297,9 @@ static int bond_init(struct net_device *bond_dev)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
 	struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id);
-	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 
 	netdev_dbg(bond_dev, "Begin bond_init\n");
 
-	/*
-	 * Initialize locks that may be required during
-	 * en/deslave operations.  All of the bond_open work
-	 * (of which this is part) should really be moved to
-	 * a phase prior to dev_open
-	 */
-	spin_lock_init(&(bond_info->tx_hashtbl_lock));
-	spin_lock_init(&(bond_info->rx_hashtbl_lock));
-
 	bond->wq = create_singlethread_workqueue(bond_dev->name);
 	if (!bond->wq)
 		return -ENOMEM;
-- 
cgit v1.2.3


From e470259fa1bd7ce5a375b16c5ec97cc0e83b058d Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@redhat.com>
Date: Thu, 11 Sep 2014 22:49:27 +0200
Subject: bonding: 3ad: convert to bond->mode_lock

Now that we have bond->mode_lock, we can remove the state_machine_lock
and use it in its place. There're no fast paths requiring the per-port
spinlocks so it should be okay to consolidate them into mode_lock.
Also move it inside the unbinding function as we don't want to expose
mode_lock outside of the specific modes.

Suggested-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_3ad.c  | 70 +++++++++++++----------------------------
 drivers/net/bonding/bond_3ad.h  |  1 -
 drivers/net/bonding/bond_main.c |  8 +----
 3 files changed, 22 insertions(+), 57 deletions(-)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 1824d1df4d09..2bb360f32a64 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -233,24 +233,6 @@ static inline int __check_agg_selection_timer(struct port *port)
 	return BOND_AD_INFO(bond).agg_select_timer ? 1 : 0;
 }
 
-/**
- * __get_state_machine_lock - lock the port's state machines
- * @port: the port we're looking at
- */
-static inline void __get_state_machine_lock(struct port *port)
-{
-	spin_lock_bh(&(SLAVE_AD_INFO(port->slave)->state_machine_lock));
-}
-
-/**
- * __release_state_machine_lock - unlock the port's state machines
- * @port: the port we're looking at
- */
-static inline void __release_state_machine_lock(struct port *port)
-{
-	spin_unlock_bh(&(SLAVE_AD_INFO(port->slave)->state_machine_lock));
-}
-
 /**
  * __get_link_speed - get a port's speed
  * @port: the port we're looking at
@@ -341,16 +323,6 @@ static u8 __get_duplex(struct port *port)
 	return retval;
 }
 
-/**
- * __initialize_port_locks - initialize a port's STATE machine spinlock
- * @port: the slave of the port we're looking at
- */
-static inline void __initialize_port_locks(struct slave *slave)
-{
-	/* make sure it isn't called twice */
-	spin_lock_init(&(SLAVE_AD_INFO(slave)->state_machine_lock));
-}
-
 /* Conversions */
 
 /**
@@ -1843,7 +1815,6 @@ void bond_3ad_bind_slave(struct slave *slave)
 
 		ad_initialize_port(port, bond->params.lacp_fast);
 
-		__initialize_port_locks(slave);
 		port->slave = slave;
 		port->actor_port_number = SLAVE_AD_INFO(slave)->id;
 		/* key is determined according to the link speed, duplex and user key(which
@@ -1899,6 +1870,8 @@ void bond_3ad_unbind_slave(struct slave *slave)
 	struct slave *slave_iter;
 	struct list_head *iter;
 
+	/* Sync against bond_3ad_state_machine_handler() */
+	spin_lock_bh(&bond->mode_lock);
 	aggregator = &(SLAVE_AD_INFO(slave)->aggregator);
 	port = &(SLAVE_AD_INFO(slave)->port);
 
@@ -1906,7 +1879,7 @@ void bond_3ad_unbind_slave(struct slave *slave)
 	if (!port->slave) {
 		netdev_warn(bond->dev, "Trying to unbind an uninitialized port on %s\n",
 			    slave->dev->name);
-		return;
+		goto out;
 	}
 
 	netdev_dbg(bond->dev, "Unbinding Link Aggregation Group %d\n",
@@ -2032,6 +2005,9 @@ void bond_3ad_unbind_slave(struct slave *slave)
 		}
 	}
 	port->slave = NULL;
+
+out:
+	spin_unlock_bh(&bond->mode_lock);
 }
 
 /**
@@ -2057,6 +2033,10 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
 	struct port *port;
 	bool should_notify_rtnl = BOND_SLAVE_NOTIFY_LATER;
 
+	/* Lock to protect data accessed by all (e.g., port->sm_vars) and
+	 * against running with bond_3ad_unbind_slave. ad_rx_machine may run
+	 * concurrently due to incoming LACPDU as well.
+	 */
 	spin_lock_bh(&bond->mode_lock);
 	rcu_read_lock();
 
@@ -2093,12 +2073,6 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
 			goto re_arm;
 		}
 
-		/* Lock around state machines to protect data accessed
-		 * by all (e.g., port->sm_vars).  ad_rx_machine may run
-		 * concurrently due to incoming LACPDU.
-		 */
-		__get_state_machine_lock(port);
-
 		ad_rx_machine(NULL, port);
 		ad_periodic_machine(port);
 		ad_port_selection_logic(port);
@@ -2108,8 +2082,6 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
 		/* turn off the BEGIN bit, since we already handled it */
 		if (port->sm_vars & AD_PORT_BEGIN)
 			port->sm_vars &= ~AD_PORT_BEGIN;
-
-		__release_state_machine_lock(port);
 	}
 
 re_arm:
@@ -2161,9 +2133,9 @@ static int bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave,
 			netdev_dbg(slave->bond->dev, "Received LACPDU on port %d\n",
 				   port->actor_port_number);
 			/* Protect against concurrent state machines */
-			__get_state_machine_lock(port);
+			spin_lock(&slave->bond->mode_lock);
 			ad_rx_machine(lacpdu, port);
-			__release_state_machine_lock(port);
+			spin_unlock(&slave->bond->mode_lock);
 			break;
 
 		case AD_TYPE_MARKER:
@@ -2213,7 +2185,7 @@ void bond_3ad_adapter_speed_changed(struct slave *slave)
 		return;
 	}
 
-	__get_state_machine_lock(port);
+	spin_lock_bh(&slave->bond->mode_lock);
 
 	port->actor_admin_port_key &= ~AD_SPEED_KEY_BITS;
 	port->actor_oper_port_key = port->actor_admin_port_key |=
@@ -2224,7 +2196,7 @@ void bond_3ad_adapter_speed_changed(struct slave *slave)
 	 */
 	port->sm_vars |= AD_PORT_BEGIN;
 
-	__release_state_machine_lock(port);
+	spin_unlock_bh(&slave->bond->mode_lock);
 }
 
 /**
@@ -2246,7 +2218,7 @@ void bond_3ad_adapter_duplex_changed(struct slave *slave)
 		return;
 	}
 
-	__get_state_machine_lock(port);
+	spin_lock_bh(&slave->bond->mode_lock);
 
 	port->actor_admin_port_key &= ~AD_DUPLEX_KEY_BITS;
 	port->actor_oper_port_key = port->actor_admin_port_key |=
@@ -2257,7 +2229,7 @@ void bond_3ad_adapter_duplex_changed(struct slave *slave)
 	 */
 	port->sm_vars |= AD_PORT_BEGIN;
 
-	__release_state_machine_lock(port);
+	spin_unlock_bh(&slave->bond->mode_lock);
 }
 
 /**
@@ -2280,7 +2252,7 @@ void bond_3ad_handle_link_change(struct slave *slave, char link)
 		return;
 	}
 
-	__get_state_machine_lock(port);
+	spin_lock_bh(&slave->bond->mode_lock);
 	/* on link down we are zeroing duplex and speed since
 	 * some of the adaptors(ce1000.lan) report full duplex/speed
 	 * instead of N/A(duplex) / 0(speed).
@@ -2311,7 +2283,7 @@ void bond_3ad_handle_link_change(struct slave *slave, char link)
 	 */
 	port->sm_vars |= AD_PORT_BEGIN;
 
-	__release_state_machine_lock(port);
+	spin_unlock_bh(&slave->bond->mode_lock);
 }
 
 /**
@@ -2495,7 +2467,7 @@ int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
  * When modify lacp_rate parameter via sysfs,
  * update actor_oper_port_state of each port.
  *
- * Hold slave->state_machine_lock,
+ * Hold bond->mode_lock,
  * so we can modify port->actor_oper_port_state,
  * no matter bond is up or down.
  */
@@ -2507,13 +2479,13 @@ void bond_3ad_update_lacp_rate(struct bonding *bond)
 	int lacp_fast;
 
 	lacp_fast = bond->params.lacp_fast;
+	spin_lock_bh(&bond->mode_lock);
 	bond_for_each_slave(bond, slave, iter) {
 		port = &(SLAVE_AD_INFO(slave)->port);
-		__get_state_machine_lock(port);
 		if (lacp_fast)
 			port->actor_oper_port_state |= AD_STATE_LACP_TIMEOUT;
 		else
 			port->actor_oper_port_state &= ~AD_STATE_LACP_TIMEOUT;
-		__release_state_machine_lock(port);
 	}
+	spin_unlock_bh(&bond->mode_lock);
 }
diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h
index bb03b1df2f3e..c5f14ac63f3e 100644
--- a/drivers/net/bonding/bond_3ad.h
+++ b/drivers/net/bonding/bond_3ad.h
@@ -259,7 +259,6 @@ struct ad_bond_info {
 struct ad_slave_info {
 	struct aggregator aggregator;	/* 802.3ad aggregator structure */
 	struct port port;		/* 802.3ad port structure */
-	spinlock_t state_machine_lock;	/* mutex state machines vs. incoming LACPDU */
 	u16 id;
 };
 
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index e06251417a7d..116cf6965bc5 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1675,14 +1675,8 @@ static int __bond_release_one(struct net_device *bond_dev,
 	 */
 	netdev_rx_handler_unregister(slave_dev);
 
-	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
-		/* Sync against bond_3ad_rx_indication and
-		 * bond_3ad_state_machine_handler
-		 */
-		spin_lock_bh(&bond->mode_lock);
+	if (BOND_MODE(bond) == BOND_MODE_8023AD)
 		bond_3ad_unbind_slave(slave);
-		spin_unlock_bh(&bond->mode_lock);
-	}
 
 	netdev_info(bond_dev, "Releasing %s interface %s\n",
 		    bond_is_active_slave(slave) ? "active" : "backup",
-- 
cgit v1.2.3


From 8c0bc550288d81e9ad8a2ed9136a72140b9ef507 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@redhat.com>
Date: Thu, 11 Sep 2014 22:49:28 +0200
Subject: bonding: adjust locking comments

Now that locks have been removed, remove some unnecessary comments and
adjust others to reflect reality. Also add a comment to "mode_lock" to
describe its current users and give a brief summary why they need it.

Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_alb.c  | 8 +-------
 drivers/net/bonding/bond_main.c | 6 +++---
 drivers/net/bonding/bonding.h   | 6 ++++++
 3 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 876b97fb55e9..85af961f1317 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -119,7 +119,6 @@ static inline void tlb_init_slave(struct slave *slave)
 	SLAVE_TLB_INFO(slave).head = TLB_NULL_INDEX;
 }
 
-/* Caller must hold bond lock for read, BH disabled */
 static void __tlb_clear_slave(struct bonding *bond, struct slave *slave,
 			 int save_load)
 {
@@ -142,7 +141,6 @@ static void __tlb_clear_slave(struct bonding *bond, struct slave *slave,
 	tlb_init_slave(slave);
 }
 
-/* Caller must hold bond lock for read */
 static void tlb_clear_slave(struct bonding *bond, struct slave *slave,
 			 int save_load)
 {
@@ -199,7 +197,6 @@ static long long compute_gap(struct slave *slave)
 	       (s64) (SLAVE_TLB_INFO(slave).load << 3); /* Bytes to bits */
 }
 
-/* Caller must hold bond lock for read */
 static struct slave *tlb_get_least_loaded_slave(struct bonding *bond)
 {
 	struct slave *slave, *least_loaded;
@@ -337,7 +334,6 @@ out:
 	return RX_HANDLER_ANOTHER;
 }
 
-/* Caller must hold bond lock for read */
 static struct slave *rlb_next_rx_slave(struct bonding *bond)
 {
 	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
@@ -370,7 +366,7 @@ static struct slave *rlb_next_rx_slave(struct bonding *bond)
 	return rx_slave;
 }
 
-/* Caller must hold rcu_read_lock() for read */
+/* Caller must hold rcu_read_lock() */
 static struct slave *__rlb_next_rx_slave(struct bonding *bond)
 {
 	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
@@ -749,7 +745,6 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
 	return tx_slave;
 }
 
-/* Caller must hold bond lock for read */
 static void rlb_rebalance(struct bonding *bond)
 {
 	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
@@ -1677,7 +1672,6 @@ void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave)
 
 }
 
-/* Caller must hold bond lock for read */
 void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link)
 {
 	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 116cf6965bc5..2d90a8b7f62e 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1629,7 +1629,7 @@ err_undo_flags:
 /*
  * Try to release the slave device <slave> from the bond device <master>
  * It is legal to access curr_active_slave without a lock because all the function
- * is write-locked. If "all" is true it means that the function is being called
+ * is RTNL-locked. If "all" is true it means that the function is being called
  * while destroying a bond interface and all slaves are being released.
  *
  * The rules for slave state should be:
@@ -2494,7 +2494,7 @@ re_arm:
  * place for the slave.  Returns 0 if no changes are found, >0 if changes
  * to link states must be committed.
  *
- * Called with rcu_read_lock hold.
+ * Called with rcu_read_lock held.
  */
 static int bond_ab_arp_inspect(struct bonding *bond)
 {
@@ -2642,7 +2642,7 @@ do_failover:
 /*
  * Send ARP probes for active-backup mode ARP monitor.
  *
- * Called with rcu_read_lock hold.
+ * Called with rcu_read_lock held.
  */
 static bool bond_ab_arp_probe(struct bonding *bond)
 {
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 0cda34b827f8..3aff1a815e89 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -195,6 +195,12 @@ struct bonding {
 	s32      slave_cnt; /* never change this value outside the attach/detach wrappers */
 	int     (*recv_probe)(const struct sk_buff *, struct bonding *,
 			      struct slave *);
+	/* mode_lock is used for mode-specific locking needs, currently used by:
+	 * 3ad mode (4) - protect against running bond_3ad_unbind_slave() and
+	 *                bond_3ad_state_machine_handler() concurrently.
+	 * TLB mode (5) - to sync the use and modifications of its hash table
+	 * ALB mode (6) - to sync the use and modifications of its hash table
+	 */
 	spinlock_t mode_lock;
 	u8	 send_peer_notif;
 	u8       igmp_retrans;
-- 
cgit v1.2.3


From 56924c3811ae843527f7a1090ead73a0acf704a4 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@redhat.com>
Date: Mon, 15 Sep 2014 17:19:33 +0200
Subject: bonding: consolidate the two rlb_next_rx_slave functions into one

__rlb_next_rx_slave() is a copy of rlb_next_rx_slave() with the
difference that it uses rcu primitives to walk the slave list. We don't
need the two functions and can make rlb_next_rx_slave() a wrapper for
callers which hold RTNL.
So add a comment and ASSERT_RTNL() to make sure what is intended.

Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_alb.c | 38 ++++++++++----------------------------
 1 file changed, 10 insertions(+), 28 deletions(-)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 85af961f1317..4efdeb67ab7c 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -334,14 +334,15 @@ out:
 	return RX_HANDLER_ANOTHER;
 }
 
-static struct slave *rlb_next_rx_slave(struct bonding *bond)
+/* Caller must hold rcu_read_lock() */
+static struct slave *__rlb_next_rx_slave(struct bonding *bond)
 {
 	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 	struct slave *before = NULL, *rx_slave = NULL, *slave;
 	struct list_head *iter;
 	bool found = false;
 
-	bond_for_each_slave(bond, slave, iter) {
+	bond_for_each_slave_rcu(bond, slave, iter) {
 		if (!bond_slave_can_tx(slave))
 			continue;
 		if (!found) {
@@ -366,35 +367,16 @@ static struct slave *rlb_next_rx_slave(struct bonding *bond)
 	return rx_slave;
 }
 
-/* Caller must hold rcu_read_lock() */
-static struct slave *__rlb_next_rx_slave(struct bonding *bond)
+/* Caller must hold RTNL, rcu_read_lock is obtained only to silence checkers */
+static struct slave *rlb_next_rx_slave(struct bonding *bond)
 {
-	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
-	struct slave *before = NULL, *rx_slave = NULL, *slave;
-	struct list_head *iter;
-	bool found = false;
+	struct slave *rx_slave;
 
-	bond_for_each_slave_rcu(bond, slave, iter) {
-		if (!bond_slave_can_tx(slave))
-			continue;
-		if (!found) {
-			if (!before || before->speed < slave->speed)
-				before = slave;
-		} else {
-			if (!rx_slave || rx_slave->speed < slave->speed)
-				rx_slave = slave;
-		}
-		if (slave == bond_info->rx_slave)
-			found = true;
-	}
-	/* we didn't find anything after the current or we have something
-	 * better before and up to the current slave
-	 */
-	if (!rx_slave || (before && rx_slave->speed < before->speed))
-		rx_slave = before;
+	ASSERT_RTNL();
 
-	if (rx_slave)
-		bond_info->rx_slave = rx_slave;
+	rcu_read_lock();
+	rx_slave = __rlb_next_rx_slave(bond);
+	rcu_read_unlock();
 
 	return rx_slave;
 }
-- 
cgit v1.2.3


From 547942cace50e536dcda9ce8397792bc992291d6 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@redhat.com>
Date: Mon, 15 Sep 2014 17:19:34 +0200
Subject: bonding: trivial: style and comment fixes

First adjust a couple of locking comments that were left inaccurate,
then adjust comments to use the netdev styling and remove extra new
lines where necessary and add a couple of new lines between declarations
and code. These are all trivial styling changes, no functional change.
Also removed a couple of outdated or obvious comments.
This patch is by no means a complete fix of all netdev style violations
but it gets the bonding closer.

Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_3ad.c     |   5 +-
 drivers/net/bonding/bond_alb.c     |  48 ++++-----
 drivers/net/bonding/bond_debugfs.c |   4 +-
 drivers/net/bonding/bond_main.c    | 199 +++++++++++++------------------------
 drivers/net/bonding/bond_sysfs.c   |   1 -
 drivers/net/bonding/bonding.h      |   3 +-
 6 files changed, 93 insertions(+), 167 deletions(-)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 2bb360f32a64..7e9e522fd476 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -297,15 +297,14 @@ static u16 __get_link_speed(struct port *port)
 static u8 __get_duplex(struct port *port)
 {
 	struct slave *slave = port->slave;
-
 	u8 retval;
 
 	/* handling a special case: when the configuration starts with
 	 * link down, it sets the duplex to 0.
 	 */
-	if (slave->link != BOND_LINK_UP)
+	if (slave->link != BOND_LINK_UP) {
 		retval = 0x0;
-	else {
+	} else {
 		switch (slave->duplex) {
 		case DUPLEX_FULL:
 			retval = 0x1;
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 4efdeb67ab7c..615f3bebd019 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -261,14 +261,15 @@ static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index,
 					u32 skb_len)
 {
 	struct slave *tx_slave;
-	/*
-	 * We don't need to disable softirq here, becase
+
+	/* We don't need to disable softirq here, becase
 	 * tlb_choose_channel() is only called by bond_alb_xmit()
 	 * which already has softirq disabled.
 	 */
 	spin_lock(&bond->mode_lock);
 	tx_slave = __tlb_choose_channel(bond, hash_index, skb_len);
 	spin_unlock(&bond->mode_lock);
+
 	return tx_slave;
 }
 
@@ -569,7 +570,7 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
 			netdev_err(bond->dev, "found a client with no channel in the client's hash table\n");
 			continue;
 		}
-		/*update all clients using this src_ip, that are not assigned
+		/* update all clients using this src_ip, that are not assigned
 		 * to the team's address (curr_active_slave) and have a known
 		 * unicast mac address.
 		 */
@@ -695,9 +696,7 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
 		return NULL;
 
 	if (arp->op_code == htons(ARPOP_REPLY)) {
-		/* the arp must be sent on the selected
-		* rx channel
-		*/
+		/* the arp must be sent on the selected rx channel */
 		tx_slave = rlb_choose_channel(skb, bond);
 		if (tx_slave)
 			ether_addr_copy(arp->mac_src, tx_slave->dev->dev_addr);
@@ -756,7 +755,7 @@ static void rlb_rebalance(struct bonding *bond)
 	spin_unlock_bh(&bond->mode_lock);
 }
 
-/* Caller must hold rx_hashtbl lock */
+/* Caller must hold mode_lock */
 static void rlb_init_table_entry_dst(struct rlb_client_info *entry)
 {
 	entry->used_next = RLB_NULL_INDEX;
@@ -844,8 +843,9 @@ static void rlb_src_link(struct bonding *bond, u32 ip_src_hash, u32 ip_dst_hash)
 	bond_info->rx_hashtbl[ip_src_hash].src_first = ip_dst_hash;
 }
 
-/* deletes all rx_hashtbl entries with  arp->ip_src if their mac_src does
- * not match arp->mac_src */
+/* deletes all rx_hashtbl entries with arp->ip_src if their mac_src does
+ * not match arp->mac_src
+ */
 static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp)
 {
 	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
@@ -1022,8 +1022,9 @@ static int alb_set_slave_mac_addr(struct slave *slave, u8 addr[])
 		return 0;
 	}
 
-	/* for rlb each slave must have a unique hw mac addresses so that */
-	/* each slave will receive packets destined to a different mac */
+	/* for rlb each slave must have a unique hw mac addresses so that
+	 * each slave will receive packets destined to a different mac
+	 */
 	memcpy(s_addr.sa_data, addr, dev->addr_len);
 	s_addr.sa_family = dev->type;
 	if (dev_set_mac_address(dev, &s_addr)) {
@@ -1034,13 +1035,10 @@ static int alb_set_slave_mac_addr(struct slave *slave, u8 addr[])
 	return 0;
 }
 
-/*
- * Swap MAC addresses between two slaves.
+/* Swap MAC addresses between two slaves.
  *
  * Called with RTNL held, and no other locks.
- *
  */
-
 static void alb_swap_mac_addr(struct slave *slave1, struct slave *slave2)
 {
 	u8 tmp_mac_addr[ETH_ALEN];
@@ -1051,8 +1049,7 @@ static void alb_swap_mac_addr(struct slave *slave1, struct slave *slave2)
 
 }
 
-/*
- * Send learning packets after MAC address swap.
+/* Send learning packets after MAC address swap.
  *
  * Called with RTNL and no other locks
  */
@@ -1125,7 +1122,6 @@ static void alb_change_hw_addr_on_detach(struct bonding *bond, struct slave *sla
 		found_slave = bond_slave_has_mac(bond, slave->perm_hwaddr);
 
 		if (found_slave) {
-			/* locking: needs RTNL and nothing else */
 			alb_swap_mac_addr(slave, found_slave);
 			alb_fasten_mac_swap(bond, slave, found_slave);
 		}
@@ -1174,7 +1170,8 @@ static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slav
 			return 0;
 
 		/* Try setting slave mac to bond address and fall-through
-		   to code handling that situation below... */
+		 * to code handling that situation below...
+		 */
 		alb_set_slave_mac_addr(slave, bond->dev->dev_addr);
 	}
 
@@ -1282,7 +1279,6 @@ int bond_alb_initialize(struct bonding *bond, int rlb_enabled)
 
 	if (rlb_enabled) {
 		bond->alb_info.rlb_enabled = 1;
-		/* initialize rlb */
 		res = rlb_initialize(bond);
 		if (res) {
 			tlb_deinitialize(bond);
@@ -1306,7 +1302,7 @@ void bond_alb_deinitialize(struct bonding *bond)
 }
 
 static int bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond,
-		struct slave *tx_slave)
+			    struct slave *tx_slave)
 {
 	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 	struct ethhdr *eth_data = eth_hdr(skb);
@@ -1554,13 +1550,11 @@ void bond_alb_monitor(struct work_struct *work)
 		bond_info->tx_rebalance_counter = 0;
 	}
 
-	/* handle rlb stuff */
 	if (bond_info->rlb_enabled) {
 		if (bond_info->primary_is_promisc &&
 		    (++bond_info->rlb_promisc_timeout_counter >= RLB_PROMISC_TIMEOUT)) {
 
-			/*
-			 * dev_set_promiscuity requires rtnl and
+			/* dev_set_promiscuity requires rtnl and
 			 * nothing else.  Avoid race with bond_close.
 			 */
 			rcu_read_unlock();
@@ -1630,8 +1624,7 @@ int bond_alb_init_slave(struct bonding *bond, struct slave *slave)
 	return 0;
 }
 
-/*
- * Remove slave from tlb and rlb hash tables, and fix up MAC addresses
+/* Remove slave from tlb and rlb hash tables, and fix up MAC addresses
  * if necessary.
  *
  * Caller must hold RTNL and no other locks
@@ -1718,8 +1711,7 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave
 	if (!swap_slave)
 		swap_slave = bond_slave_has_mac(bond, bond->dev->dev_addr);
 
-	/*
-	 * Arrange for swap_slave and new_slave to temporarily be
+	/* Arrange for swap_slave and new_slave to temporarily be
 	 * ignored so we can mess with their MAC addresses without
 	 * fear of interference from transmit activity.
 	 */
diff --git a/drivers/net/bonding/bond_debugfs.c b/drivers/net/bonding/bond_debugfs.c
index 652f6c5d1bf7..8f99082f90eb 100644
--- a/drivers/net/bonding/bond_debugfs.c
+++ b/drivers/net/bonding/bond_debugfs.c
@@ -13,9 +13,7 @@
 
 static struct dentry *bonding_debug_root;
 
-/*
- *  Show RLB hash table
- */
+/* Show RLB hash table */
 static int bond_debug_rlb_hash_show(struct seq_file *m, void *v)
 {
 	struct bonding *bond = m->private;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 2d90a8b7f62e..943a899fbfde 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -253,8 +253,7 @@ void bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,
 		dev_queue_xmit(skb);
 }
 
-/*
- * In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid,
+/* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid,
  * We don't protect the slave list iteration with a lock because:
  * a. This operation is performed in IOCTL context,
  * b. The operation is protected by the RTNL semaphore in the 8021q code,
@@ -326,8 +325,7 @@ static int bond_vlan_rx_kill_vid(struct net_device *bond_dev,
 
 /*------------------------------- Link status -------------------------------*/
 
-/*
- * Set the carrier state for the master according to the state of its
+/* Set the carrier state for the master according to the state of its
  * slaves.  If any slaves are up, the master is up.  In 802.3ad mode,
  * do special 802.3ad magic.
  *
@@ -362,8 +360,7 @@ down:
 	return 0;
 }
 
-/*
- * Get link speed and duplex from the slave's base driver
+/* Get link speed and duplex from the slave's base driver
  * using ethtool. If for some reason the call fails or the
  * values are invalid, set speed and duplex to -1,
  * and return.
@@ -416,8 +413,7 @@ const char *bond_slave_link_status(s8 link)
 	}
 }
 
-/*
- * if <dev> supports MII link status reporting, check its link status.
+/* if <dev> supports MII link status reporting, check its link status.
  *
  * We either do MII/ETHTOOL ioctls, or check netif_carrier_ok(),
  * depending upon the setting of the use_carrier parameter.
@@ -454,14 +450,14 @@ static int bond_check_dev_link(struct bonding *bond,
 	/* Ethtool can't be used, fallback to MII ioctls. */
 	ioctl = slave_ops->ndo_do_ioctl;
 	if (ioctl) {
-		/* TODO: set pointer to correct ioctl on a per team member */
-		/*       bases to make this more efficient. that is, once  */
-		/*       we determine the correct ioctl, we will always    */
-		/*       call it and not the others for that team          */
-		/*       member.                                           */
-
-		/*
-		 * We cannot assume that SIOCGMIIPHY will also read a
+		/* TODO: set pointer to correct ioctl on a per team member
+		 *       bases to make this more efficient. that is, once
+		 *       we determine the correct ioctl, we will always
+		 *       call it and not the others for that team
+		 *       member.
+		 */
+
+		/* We cannot assume that SIOCGMIIPHY will also read a
 		 * register; not all network drivers (e.g., e100)
 		 * support that.
 		 */
@@ -476,8 +472,7 @@ static int bond_check_dev_link(struct bonding *bond,
 		}
 	}
 
-	/*
-	 * If reporting, report that either there's no dev->do_ioctl,
+	/* If reporting, report that either there's no dev->do_ioctl,
 	 * or both SIOCGMIIREG and get_link failed (meaning that we
 	 * cannot report link status).  If not reporting, pretend
 	 * we're ok.
@@ -487,9 +482,7 @@ static int bond_check_dev_link(struct bonding *bond,
 
 /*----------------------------- Multicast list ------------------------------*/
 
-/*
- * Push the promiscuity flag down to appropriate slaves
- */
+/* Push the promiscuity flag down to appropriate slaves */
 static int bond_set_promiscuity(struct bonding *bond, int inc)
 {
 	struct list_head *iter;
@@ -512,9 +505,7 @@ static int bond_set_promiscuity(struct bonding *bond, int inc)
 	return err;
 }
 
-/*
- * Push the allmulti flag down to all slaves
- */
+/* Push the allmulti flag down to all slaves */
 static int bond_set_allmulti(struct bonding *bond, int inc)
 {
 	struct list_head *iter;
@@ -537,8 +528,7 @@ static int bond_set_allmulti(struct bonding *bond, int inc)
 	return err;
 }
 
-/*
- * Retrieve the list of registered multicast addresses for the bonding
+/* Retrieve the list of registered multicast addresses for the bonding
  * device and retransmit an IGMP JOIN request to the current active
  * slave.
  */
@@ -560,8 +550,7 @@ static void bond_resend_igmp_join_requests_delayed(struct work_struct *work)
 	rtnl_unlock();
 }
 
-/* Flush bond's hardware addresses from slave
- */
+/* Flush bond's hardware addresses from slave */
 static void bond_hw_addr_flush(struct net_device *bond_dev,
 			       struct net_device *slave_dev)
 {
@@ -632,8 +621,7 @@ static void bond_set_dev_addr(struct net_device *bond_dev,
 	call_netdevice_notifiers(NETDEV_CHANGEADDR, bond_dev);
 }
 
-/*
- * bond_do_fail_over_mac
+/* bond_do_fail_over_mac
  *
  * Perform special MAC address swapping for fail_over_mac settings
  *
@@ -653,8 +641,7 @@ static void bond_do_fail_over_mac(struct bonding *bond,
 			bond_set_dev_addr(bond->dev, new_active->dev);
 		break;
 	case BOND_FOM_FOLLOW:
-		/*
-		 * if new_active && old_active, swap them
+		/* if new_active && old_active, swap them
 		 * if just old_active, do nothing (going to no active slave)
 		 * if just new_active, set new_active to bond's MAC
 		 */
@@ -863,7 +850,8 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
 	/* resend IGMP joins since active slave has changed or
 	 * all were sent on curr_active_slave.
 	 * resend only if bond is brought up with the affected
-	 * bonding modes and the retransmission is enabled */
+	 * bonding modes and the retransmission is enabled
+	 */
 	if (netif_running(bond->dev) && (bond->params.resend_igmp > 0) &&
 	    ((bond_uses_primary(bond) && new_active) ||
 	     BOND_MODE(bond) == BOND_MODE_ROUNDROBIN)) {
@@ -1229,8 +1217,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 			   slave_dev->name);
 	}
 
-	/*
-	 * Old ifenslave binaries are no longer supported.  These can
+	/* Old ifenslave binaries are no longer supported.  These can
 	 * be identified with moderate accuracy by the state of the slave:
 	 * the current ifenslave will set the interface down prior to
 	 * enslaving it; the old ifenslave will not.
@@ -1302,7 +1289,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 	call_netdevice_notifiers(NETDEV_JOIN, slave_dev);
 
 	/* If this is the first slave, then we need to set the master's hardware
-	 * address to be the same as the slave's. */
+	 * address to be the same as the slave's.
+	 */
 	if (!bond_has_slaves(bond) &&
 	    bond->dev->addr_assign_type == NET_ADDR_RANDOM)
 		bond_set_dev_addr(bond->dev, slave_dev);
@@ -1315,8 +1303,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 
 	new_slave->bond = bond;
 	new_slave->dev = slave_dev;
-	/*
-	 * Set the new_slave's queue_id to be zero.  Queue ID mapping
+	/* Set the new_slave's queue_id to be zero.  Queue ID mapping
 	 * is set via sysfs or module option if desired.
 	 */
 	new_slave->queue_id = 0;
@@ -1329,8 +1316,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 		goto err_free;
 	}
 
-	/*
-	 * Save slave's original ("permanent") mac address for modes
+	/* Save slave's original ("permanent") mac address for modes
 	 * that need it, and for restoring it upon release, and then
 	 * set it to the master's address
 	 */
@@ -1338,8 +1324,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 
 	if (!bond->params.fail_over_mac ||
 	    BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
-		/*
-		 * Set slave to master's mac address.  The application already
+		/* Set slave to master's mac address.  The application already
 		 * set the master's mac address to that of the first slave
 		 */
 		memcpy(addr.sa_data, bond_dev->dev_addr, bond_dev->addr_len);
@@ -1425,8 +1410,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 		link_reporting = bond_check_dev_link(bond, slave_dev, 1);
 
 		if ((link_reporting == -1) && !bond->params.arp_interval) {
-			/*
-			 * miimon is set but a bonded network driver
+			/* miimon is set but a bonded network driver
 			 * does not support ETHTOOL/MII and
 			 * arp_interval is not set.  Note: if
 			 * use_carrier is enabled, we will never go
@@ -1626,8 +1610,7 @@ err_undo_flags:
 	return res;
 }
 
-/*
- * Try to release the slave device <slave> from the bond device <master>
+/* Try to release the slave device <slave> from the bond device <master>
  * It is legal to access curr_active_slave without a lock because all the function
  * is RTNL-locked. If "all" is true it means that the function is being called
  * while destroying a bond interface and all slaves are being released.
@@ -1713,8 +1696,7 @@ static int __bond_release_one(struct net_device *bond_dev,
 	if (all) {
 		RCU_INIT_POINTER(bond->curr_active_slave, NULL);
 	} else if (oldcurrent == slave) {
-		/*
-		 * Note that we hold RTNL over this sequence, so there
+		/* Note that we hold RTNL over this sequence, so there
 		 * is no concern that another slave add/remove event
 		 * will interfere.
 		 */
@@ -1741,10 +1723,9 @@ static int __bond_release_one(struct net_device *bond_dev,
 		netdev_info(bond_dev, "last VLAN challenged slave %s left bond %s - VLAN blocking is removed\n",
 			    slave_dev->name, bond_dev->name);
 
-	/* must do this from outside any spinlocks */
 	vlan_vids_del_by_dev(slave_dev, bond_dev);
 
-	/* If the mode uses primary, then this cases was handled above by
+	/* If the mode uses primary, then this case was handled above by
 	 * bond_change_active_slave(..., NULL)
 	 */
 	if (!bond_uses_primary(bond)) {
@@ -1784,7 +1765,7 @@ static int __bond_release_one(struct net_device *bond_dev,
 
 	bond_free_slave(slave);
 
-	return 0;  /* deletion OK */
+	return 0;
 }
 
 /* A wrapper used because of ndo_del_link */
@@ -1793,10 +1774,9 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
 	return __bond_release_one(bond_dev, slave_dev, false);
 }
 
-/*
-* First release a slave and then destroy the bond if no more slaves are left.
-* Must be under rtnl_lock when this function is called.
-*/
+/* First release a slave and then destroy the bond if no more slaves are left.
+ * Must be under rtnl_lock when this function is called.
+ */
 static int  bond_release_and_destroy(struct net_device *bond_dev,
 				     struct net_device *slave_dev)
 {
@@ -1819,7 +1799,6 @@ static int bond_info_query(struct net_device *bond_dev, struct ifbond *info)
 
 	info->bond_mode = BOND_MODE(bond);
 	info->miimon = bond->params.miimon;
-
 	info->num_slaves = bond->slave_cnt;
 
 	return 0;
@@ -1882,9 +1861,7 @@ static int bond_miimon_inspect(struct bonding *bond)
 			/*FALLTHRU*/
 		case BOND_LINK_FAIL:
 			if (link_state) {
-				/*
-				 * recovered before downdelay expired
-				 */
+				/* recovered before downdelay expired */
 				slave->link = BOND_LINK_UP;
 				slave->last_link_up = jiffies;
 				netdev_info(bond->dev, "link status up again after %d ms for interface %s\n",
@@ -2036,8 +2013,7 @@ do_failover:
 	bond_set_carrier(bond);
 }
 
-/*
- * bond_mii_monitor
+/* bond_mii_monitor
  *
  * Really a wrapper that splits the mii monitor into two phases: an
  * inspection, then (if inspection indicates something needs to be done)
@@ -2109,8 +2085,7 @@ static bool bond_has_this_ip(struct bonding *bond, __be32 ip)
 	return ret;
 }
 
-/*
- * We go to the (large) trouble of VLAN tagging ARP frames because
+/* We go to the (large) trouble of VLAN tagging ARP frames because
  * switches in VLAN mode (especially if ports are configured as
  * "native" to a VLAN) might not pass non-tagged frames.
  */
@@ -2337,8 +2312,7 @@ int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond,
 
 	curr_active_slave = rcu_dereference(bond->curr_active_slave);
 
-	/*
-	 * Backup slaves won't see the ARP reply, but do come through
+	/* Backup slaves won't see the ARP reply, but do come through
 	 * here for each ARP probe (so we swap the sip/tip to validate
 	 * the probe).  In a "redundant switch, common router" type of
 	 * configuration, the ARP probe will (hopefully) travel from
@@ -2378,8 +2352,7 @@ static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act,
 			     last_act + mod * delta_in_ticks + delta_in_ticks/2);
 }
 
-/*
- * this function is called regularly to monitor each slave's link
+/* This function is called regularly to monitor each slave's link
  * ensuring that traffic is being sent and received when arp monitoring
  * is used in load-balancing mode. if the adapter has been dormant, then an
  * arp is transmitted to generate traffic. see activebackup_arp_monitor for
@@ -2488,8 +2461,7 @@ re_arm:
 				   msecs_to_jiffies(bond->params.arp_interval));
 }
 
-/*
- * Called to inspect slaves for active-backup mode ARP monitor link state
+/* Called to inspect slaves for active-backup mode ARP monitor link state
  * changes.  Sets new_link in slaves to specify what action should take
  * place for the slave.  Returns 0 if no changes are found, >0 if changes
  * to link states must be committed.
@@ -2515,16 +2487,14 @@ static int bond_ab_arp_inspect(struct bonding *bond)
 			continue;
 		}
 
-		/*
-		 * Give slaves 2*delta after being enslaved or made
+		/* Give slaves 2*delta after being enslaved or made
 		 * active.  This avoids bouncing, as the last receive
 		 * times need a full ARP monitor cycle to be updated.
 		 */
 		if (bond_time_in_interval(bond, slave->last_link_up, 2))
 			continue;
 
-		/*
-		 * Backup slave is down if:
+		/* Backup slave is down if:
 		 * - No current_arp_slave AND
 		 * - more than 3*delta since last receive AND
 		 * - the bond has an IP address
@@ -2543,8 +2513,7 @@ static int bond_ab_arp_inspect(struct bonding *bond)
 			commit++;
 		}
 
-		/*
-		 * Active slave is down if:
+		/* Active slave is down if:
 		 * - more than 2*delta since transmitting OR
 		 * - (more than 2*delta since receive AND
 		 *    the bond has an IP address)
@@ -2561,8 +2530,7 @@ static int bond_ab_arp_inspect(struct bonding *bond)
 	return commit;
 }
 
-/*
- * Called to commit link state changes noted by inspection step of
+/* Called to commit link state changes noted by inspection step of
  * active-backup mode ARP monitor.
  *
  * Called with RTNL hold.
@@ -2639,8 +2607,7 @@ do_failover:
 	bond_set_carrier(bond);
 }
 
-/*
- * Send ARP probes for active-backup mode ARP monitor.
+/* Send ARP probes for active-backup mode ARP monitor.
  *
  * Called with rcu_read_lock held.
  */
@@ -2782,9 +2749,7 @@ re_arm:
 
 /*-------------------------- netdev event handling --------------------------*/
 
-/*
- * Change device name
- */
+/* Change device name */
 static int bond_event_changename(struct bonding *bond)
 {
 	bond_remove_proc_entry(bond);
@@ -2861,13 +2826,9 @@ static int bond_slave_netdev_event(unsigned long event,
 		}
 		break;
 	case NETDEV_DOWN:
-		/*
-		 * ... Or is it this?
-		 */
 		break;
 	case NETDEV_CHANGEMTU:
-		/*
-		 * TODO: Should slaves be allowed to
+		/* TODO: Should slaves be allowed to
 		 * independently alter their MTU?  For
 		 * an active-backup bond, slaves need
 		 * not be the same type of device, so
@@ -2916,8 +2877,7 @@ static int bond_slave_netdev_event(unsigned long event,
 	return NOTIFY_DONE;
 }
 
-/*
- * bond_netdev_event: handle netdev notifier chain events.
+/* bond_netdev_event: handle netdev notifier chain events.
  *
  * This function receives events for the netdev chain.  The caller (an
  * ioctl handler calling blocking_notifier_call_chain) holds the necessary
@@ -3187,8 +3147,7 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd
 		mii->phy_id = 0;
 		/* Fall Through */
 	case SIOCGMIIREG:
-		/*
-		 * We do this again just in case we were called by SIOCGMIIREG
+		/* We do this again just in case we were called by SIOCGMIIREG
 		 * instead of SIOCGMIIPHY.
 		 */
 		mii = if_mii(ifr);
@@ -3229,7 +3188,6 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd
 
 		return res;
 	default:
-		/* Go on */
 		break;
 	}
 
@@ -3291,7 +3249,6 @@ static void bond_set_rx_mode(struct net_device *bond_dev)
 	struct list_head *iter;
 	struct slave *slave;
 
-
 	rcu_read_lock();
 	if (bond_uses_primary(bond)) {
 		slave = rcu_dereference(bond->curr_active_slave);
@@ -3329,8 +3286,7 @@ static int bond_neigh_init(struct neighbour *n)
 	if (ret)
 		return ret;
 
-	/*
-	 * Assign slave's neigh_cleanup to neighbour in case cleanup is called
+	/* Assign slave's neigh_cleanup to neighbour in case cleanup is called
 	 * after the last slave has been detached.  Assumes that all slaves
 	 * utilize the same neigh_cleanup (true at this writing as only user
 	 * is ipoib).
@@ -3343,8 +3299,7 @@ static int bond_neigh_init(struct neighbour *n)
 	return parms.neigh_setup(n);
 }
 
-/*
- * The bonding ndo_neigh_setup is called at init time beofre any
+/* The bonding ndo_neigh_setup is called at init time beofre any
  * slave exists. So we must declare proxy setup function which will
  * be used at run time to resolve the actual slave neigh param setup.
  *
@@ -3362,9 +3317,7 @@ static int bond_neigh_setup(struct net_device *dev,
 	return 0;
 }
 
-/*
- * Change the MTU of all of a master's slaves to match the master
- */
+/* Change the MTU of all of a master's slaves to match the master */
 static int bond_change_mtu(struct net_device *bond_dev, int new_mtu)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
@@ -3417,8 +3370,7 @@ unwind:
 	return res;
 }
 
-/*
- * Change HW address
+/* Change HW address
  *
  * Note that many devices must be down to change the HW address, and
  * downing the master releases all slaves.  We can make bonds full of
@@ -3588,8 +3540,7 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev
 	return NETDEV_TX_OK;
 }
 
-/*
- * in active-backup mode, we know that bond->curr_active_slave is always valid if
+/* In active-backup mode, we know that bond->curr_active_slave is always valid if
  * the bond has a usable interface.
  */
 static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_dev)
@@ -3651,9 +3602,7 @@ static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev)
 
 /*------------------------- Device initialization ---------------------------*/
 
-/*
- * Lookup the slave that corresponds to a qid
- */
+/* Lookup the slave that corresponds to a qid */
 static inline int bond_slave_override(struct bonding *bond,
 				      struct sk_buff *skb)
 {
@@ -3682,17 +3631,14 @@ static inline int bond_slave_override(struct bonding *bond,
 static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb,
 			     void *accel_priv, select_queue_fallback_t fallback)
 {
-	/*
-	 * This helper function exists to help dev_pick_tx get the correct
+	/* This helper function exists to help dev_pick_tx get the correct
 	 * destination queue.  Using a helper function skips a call to
 	 * skb_tx_hash and will put the skbs in the queue we expect on their
 	 * way down to the bonding driver.
 	 */
 	u16 txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0;
 
-	/*
-	 * Save the original txq to restore before passing to the driver
-	 */
+	/* Save the original txq to restore before passing to the driver */
 	qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping;
 
 	if (unlikely(txq >= dev->real_num_tx_queues)) {
@@ -3740,8 +3686,7 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct bonding *bond = netdev_priv(dev);
 	netdev_tx_t ret = NETDEV_TX_OK;
 
-	/*
-	 * If we risk deadlock from transmitting this in the
+	/* If we risk deadlock from transmitting this in the
 	 * netpoll path, tell netpoll to queue the frame for later tx
 	 */
 	if (unlikely(is_netpoll_tx_blocked(dev)))
@@ -3865,8 +3810,7 @@ void bond_setup(struct net_device *bond_dev)
 	bond_dev->priv_flags |= IFF_BONDING | IFF_UNICAST_FLT;
 	bond_dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
 
-	/* don't acquire bond device's netif_tx_lock when
-	 * transmitting */
+	/* don't acquire bond device's netif_tx_lock when transmitting */
 	bond_dev->features |= NETIF_F_LLTX;
 
 	/* By default, we declare the bond to be fully
@@ -3889,10 +3833,9 @@ void bond_setup(struct net_device *bond_dev)
 	bond_dev->features |= bond_dev->hw_features;
 }
 
-/*
-* Destroy a bonding device.
-* Must be under rtnl_lock when this function is called.
-*/
+/* Destroy a bonding device.
+ * Must be under rtnl_lock when this function is called.
+ */
 static void bond_uninit(struct net_device *bond_dev)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
@@ -3920,9 +3863,7 @@ static int bond_check_params(struct bond_params *params)
 	const struct bond_opt_value *valptr;
 	int arp_all_targets_value;
 
-	/*
-	 * Convert string parameters.
-	 */
+	/* Convert string parameters. */
 	if (mode) {
 		bond_opt_initstr(&newval, mode);
 		valptr = bond_opt_parse(bond_opt_get(BOND_OPT_MODE), &newval);
@@ -4099,9 +4040,9 @@ static int bond_check_params(struct bond_params *params)
 
 	for (arp_ip_count = 0, i = 0;
 	     (arp_ip_count < BOND_MAX_ARP_TARGETS) && arp_ip_target[i]; i++) {
-		/* not complete check, but should be good enough to
-		   catch mistakes */
 		__be32 ip;
+
+		/* not a complete check, but good enough to catch mistakes */
 		if (!in4_pton(arp_ip_target[i], -1, (u8 *)&ip, -1, NULL) ||
 		    !bond_is_ip_target_ok(ip)) {
 			pr_warn("Warning: bad arp_ip_target module parameter (%s), ARP monitoring will not be performed\n",
@@ -4284,9 +4225,7 @@ static void bond_set_lockdep_class(struct net_device *dev)
 	dev->qdisc_tx_busylock = &bonding_tx_busylock_key;
 }
 
-/*
- * Called from registration process
- */
+/* Called from registration process */
 static int bond_init(struct net_device *bond_dev)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
@@ -4440,9 +4379,7 @@ static void __exit bonding_exit(void)
 	unregister_pernet_subsys(&bond_net_ops);
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
-	/*
-	 * Make sure we don't have an imbalance on our netpoll blocking
-	 */
+	/* Make sure we don't have an imbalance on our netpoll blocking */
 	WARN_ON(atomic_read(&netpoll_block_tx));
 #endif
 }
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 5555517284db..8ffbafd500fd 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -91,7 +91,6 @@ static struct net_device *bond_get_by_name(struct bond_net *bn, const char *ifna
  * creates and deletes entire bonds.
  *
  * The class parameter is ignored.
- *
  */
 static ssize_t bonding_store_bonds(struct class *cls,
 				   struct class_attribute *attr,
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 3aff1a815e89..6140bf0264a4 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -197,7 +197,8 @@ struct bonding {
 			      struct slave *);
 	/* mode_lock is used for mode-specific locking needs, currently used by:
 	 * 3ad mode (4) - protect against running bond_3ad_unbind_slave() and
-	 *                bond_3ad_state_machine_handler() concurrently.
+	 *                bond_3ad_state_machine_handler() concurrently and also
+	 *                the access to the state machine shared variables.
 	 * TLB mode (5) - to sync the use and modifications of its hash table
 	 * ALB mode (6) - to sync the use and modifications of its hash table
 	 */
-- 
cgit v1.2.3


From e0974585e74cc16446bc0690f0545b72aa2a3485 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@redhat.com>
Date: Mon, 15 Sep 2014 17:19:35 +0200
Subject: bonding: consolidate ASSERT_RTNL()s and remove the unnecessary

Consolidate the calls to ASSERT_RTNL() before bond_select_active_slave()
inside bond_select_active_slave() itself and remove the ASSERT_RTNL()
from bond_hw_addr_swap() as it's not exported and its only caller -
bond_change_active_slave() already has an ASSERT_RTNL().

Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 943a899fbfde..5e7987bba583 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -577,8 +577,6 @@ static void bond_hw_addr_flush(struct net_device *bond_dev,
 static void bond_hw_addr_swap(struct bonding *bond, struct slave *new_active,
 			      struct slave *old_active)
 {
-	ASSERT_RTNL();
-
 	if (old_active) {
 		if (bond->dev->flags & IFF_PROMISC)
 			dev_set_promiscuity(old_active->dev, -1);
@@ -876,6 +874,8 @@ void bond_select_active_slave(struct bonding *bond)
 	struct slave *best_slave;
 	int rv;
 
+	ASSERT_RTNL();
+
 	best_slave = bond_find_best_slave(bond);
 	if (best_slave != rtnl_dereference(bond->curr_active_slave)) {
 		bond_change_active_slave(bond, best_slave);
@@ -2004,7 +2004,6 @@ static void bond_miimon_commit(struct bonding *bond)
 		}
 
 do_failover:
-		ASSERT_RTNL();
 		block_netpoll_tx();
 		bond_select_active_slave(bond);
 		unblock_netpoll_tx();
@@ -2598,7 +2597,6 @@ static void bond_ab_arp_commit(struct bonding *bond)
 		}
 
 do_failover:
-		ASSERT_RTNL();
 		block_netpoll_tx();
 		bond_select_active_slave(bond);
 		unblock_netpoll_tx();
-- 
cgit v1.2.3


From a64d044e3907b717ae3d1e3711226064b42c83f4 Mon Sep 17 00:00:00 2001
From: dingtianhong <dingtianhong@huawei.com>
Date: Fri, 19 Sep 2014 21:04:57 +0800
Subject: bonding: slight optimization for bond_xmit_roundrobin()

When the slave is the curr_active_slave, no need to check
whether the slave is active or not, it is always active.

Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 5e7987bba583..88b1ab04ce4b 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3526,7 +3526,7 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev
 	 */
 	if (iph->protocol == IPPROTO_IGMP && skb->protocol == htons(ETH_P_IP)) {
 		slave = rcu_dereference(bond->curr_active_slave);
-		if (slave && bond_slave_can_tx(slave))
+		if (slave)
 			bond_dev_queue_xmit(bond, skb, slave->dev);
 		else
 			bond_xmit_slave_id(bond, skb, 0);
-- 
cgit v1.2.3


From 37ab7ddf3f81cec9175f53f17c357bb0d27a343e Mon Sep 17 00:00:00 2001
From: dingtianhong <dingtianhong@huawei.com>
Date: Fri, 19 Sep 2014 21:05:01 +0800
Subject: bonding: remove the unnecessary notes for bond_xmit_broadcast()

Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 88b1ab04ce4b..382a389b9bba 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3586,7 +3586,6 @@ static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev)
 						    bond_dev->name, __func__);
 				continue;
 			}
-			/* bond_dev_queue_xmit always returns 0 */
 			bond_dev_queue_xmit(bond, skb2, slave->dev);
 		}
 	}
-- 
cgit v1.2.3


From 5f0c5f73e5efaee2928c4cabcf48b03f6ba99fc8 Mon Sep 17 00:00:00 2001
From: Andy Gospodarek <gospo@cumulusnetworks.com>
Date: Sun, 28 Sep 2014 22:34:37 -0400
Subject: bonding: make global bonding stats more reliable

As the code stands today, bonding stats are based simply on the stats
from the member interfaces.  If a member was to be removed from a bond,
the stats would instantly drop.  This would be confusing to an admin
would would suddonly see interface stats drop while traffic is still
flowing.

In addition to preventing the stats drops mentioned above, new members
will now be added to the bond and only traffic received after the member
was added to the bond will be counted as part of bonding stats.  Bonding
counters will also be updated when any slaves are dropped to make sure
the reported stats are reliable.

v2: Changes suggested by Nik to properly allocate/free stats memory.
v3: Properly destroy workqueue and fix netlink configuration path.
v4: Moved cached stats into bonding and slave structs as there does not
seem to be a complexity/performance benefit to using alloc'd memory vs
in-struct memory.

Signed-off-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 68 ++++++++++++++++++++++++-----------------
 drivers/net/bonding/bonding.h   |  3 ++
 2 files changed, 43 insertions(+), 28 deletions(-)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 53904758d693..c2adc2755ff6 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -208,6 +208,8 @@ static int lacp_fast;
 
 static int bond_init(struct net_device *bond_dev);
 static void bond_uninit(struct net_device *bond_dev);
+static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
+						struct rtnl_link_stats64 *stats);
 
 /*---------------------------- General routines -----------------------------*/
 
@@ -1344,6 +1346,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 	}
 
 	slave_dev->priv_flags |= IFF_BONDING;
+	/* initialize slave stats */
+	dev_get_stats(new_slave->dev, &new_slave->slave_stats);
 
 	if (bond_is_lb(bond)) {
 		/* bond_alb_init_slave() must be called before all other stages since
@@ -1652,6 +1656,9 @@ static int __bond_release_one(struct net_device *bond_dev,
 
 	bond_sysfs_slave_del(slave);
 
+	/* recompute stats just before removing the slave */
+	bond_get_stats(bond->dev, &bond->bond_stats);
+
 	bond_upper_dev_unlink(bond_dev, slave_dev);
 	/* unregister rx_handler early so bond_handle_frame wouldn't be called
 	 * for this slave anymore.
@@ -3085,38 +3092,43 @@ static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
 	struct list_head *iter;
 	struct slave *slave;
 
-	memset(stats, 0, sizeof(*stats));
+	memcpy(stats, &bond->bond_stats, sizeof(*stats));
 
 	bond_for_each_slave(bond, slave, iter) {
 		const struct rtnl_link_stats64 *sstats =
 			dev_get_stats(slave->dev, &temp);
-
-		stats->rx_packets += sstats->rx_packets;
-		stats->rx_bytes += sstats->rx_bytes;
-		stats->rx_errors += sstats->rx_errors;
-		stats->rx_dropped += sstats->rx_dropped;
-
-		stats->tx_packets += sstats->tx_packets;
-		stats->tx_bytes += sstats->tx_bytes;
-		stats->tx_errors += sstats->tx_errors;
-		stats->tx_dropped += sstats->tx_dropped;
-
-		stats->multicast += sstats->multicast;
-		stats->collisions += sstats->collisions;
-
-		stats->rx_length_errors += sstats->rx_length_errors;
-		stats->rx_over_errors += sstats->rx_over_errors;
-		stats->rx_crc_errors += sstats->rx_crc_errors;
-		stats->rx_frame_errors += sstats->rx_frame_errors;
-		stats->rx_fifo_errors += sstats->rx_fifo_errors;
-		stats->rx_missed_errors += sstats->rx_missed_errors;
-
-		stats->tx_aborted_errors += sstats->tx_aborted_errors;
-		stats->tx_carrier_errors += sstats->tx_carrier_errors;
-		stats->tx_fifo_errors += sstats->tx_fifo_errors;
-		stats->tx_heartbeat_errors += sstats->tx_heartbeat_errors;
-		stats->tx_window_errors += sstats->tx_window_errors;
-	}
+		struct rtnl_link_stats64 *pstats = &slave->slave_stats;
+
+		stats->rx_packets +=  sstats->rx_packets - pstats->rx_packets;
+		stats->rx_bytes += sstats->rx_bytes - pstats->rx_bytes;
+		stats->rx_errors += sstats->rx_errors - pstats->rx_errors;
+		stats->rx_dropped += sstats->rx_dropped - pstats->rx_dropped;
+
+		stats->tx_packets += sstats->tx_packets - pstats->tx_packets;;
+		stats->tx_bytes += sstats->tx_bytes - pstats->tx_bytes;
+		stats->tx_errors += sstats->tx_errors - pstats->tx_errors;
+		stats->tx_dropped += sstats->tx_dropped - pstats->tx_dropped;
+
+		stats->multicast += sstats->multicast - pstats->multicast;
+		stats->collisions += sstats->collisions - pstats->collisions;
+
+		stats->rx_length_errors += sstats->rx_length_errors - pstats->rx_length_errors;
+		stats->rx_over_errors += sstats->rx_over_errors - pstats->rx_over_errors;
+		stats->rx_crc_errors += sstats->rx_crc_errors - pstats->rx_crc_errors;
+		stats->rx_frame_errors += sstats->rx_frame_errors - pstats->rx_frame_errors;
+		stats->rx_fifo_errors += sstats->rx_fifo_errors - pstats->rx_fifo_errors;
+		stats->rx_missed_errors += sstats->rx_missed_errors - pstats->rx_missed_errors;
+
+		stats->tx_aborted_errors += sstats->tx_aborted_errors - pstats->tx_aborted_errors;
+		stats->tx_carrier_errors += sstats->tx_carrier_errors - pstats->tx_carrier_errors;
+		stats->tx_fifo_errors += sstats->tx_fifo_errors - pstats->tx_fifo_errors;
+		stats->tx_heartbeat_errors += sstats->tx_heartbeat_errors - pstats->tx_heartbeat_errors;
+		stats->tx_window_errors += sstats->tx_window_errors - pstats->tx_window_errors;
+
+		/* save off the slave stats for the next run */
+		memcpy(pstats, sstats, sizeof(*sstats));
+	}
+	memcpy(&bond->bond_stats, stats, sizeof(*stats));
 
 	return stats;
 }
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 6140bf0264a4..57917e63b4e6 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -24,6 +24,7 @@
 #include <linux/inetdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/reciprocal_div.h>
+#include <linux/if_link.h>
 
 #include "bond_3ad.h"
 #include "bond_alb.h"
@@ -175,6 +176,7 @@ struct slave {
 	struct netpoll *np;
 #endif
 	struct kobject kobj;
+	struct rtnl_link_stats64 slave_stats;
 };
 
 /*
@@ -224,6 +226,7 @@ struct bonding {
 	/* debugging support via debugfs */
 	struct	 dentry *debug_dir;
 #endif /* CONFIG_DEBUG_FS */
+	struct rtnl_link_stats64 bond_stats;
 };
 
 #define bond_slave_get_rcu(dev) \
-- 
cgit v1.2.3


From d7021325a2ea5aaf4458097341c988f9dc93491f Mon Sep 17 00:00:00 2001
From: Mahesh Bandewar <maheshb@google.com>
Date: Sat, 4 Oct 2014 17:44:44 -0700
Subject: bonding: display xmit_hash_policy for non-dynamic-tlb mode

It's a trivial fix to display xmit_hash_policy for this new TLB mode
since it uses transmit-hash-poilicy as part of bonding-master info
(/proc/net/bonding/<bonding-interface).

Signed-off-by: Mahesh Bandewar <maheshb@google.com>
Reviewed-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_procfs.c | 3 +--
 drivers/net/bonding/bonding.h     | 7 +++++++
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c
index bb09d0442aa8..a3948f8d1e53 100644
--- a/drivers/net/bonding/bond_procfs.c
+++ b/drivers/net/bonding/bond_procfs.c
@@ -73,8 +73,7 @@ static void bond_info_show_master(struct seq_file *seq)
 
 	seq_printf(seq, "\n");
 
-	if (BOND_MODE(bond) == BOND_MODE_XOR ||
-		BOND_MODE(bond) == BOND_MODE_8023AD) {
+	if (bond_mode_uses_xmit_hash(bond)) {
 		optval = bond_opt_get_val(BOND_OPT_XMIT_HASH,
 					  bond->params.xmit_policy);
 		seq_printf(seq, "Transmit Hash Policy: %s (%d)\n",
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 57917e63b4e6..5b022da9cad2 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -274,6 +274,13 @@ static inline bool bond_is_nondyn_tlb(const struct bonding *bond)
 	       (bond->params.tlb_dynamic_lb == 0);
 }
 
+static inline bool bond_mode_uses_xmit_hash(const struct bonding *bond)
+{
+	return (BOND_MODE(bond) == BOND_MODE_8023AD ||
+		BOND_MODE(bond) == BOND_MODE_XOR ||
+		bond_is_nondyn_tlb(bond));
+}
+
 static inline bool bond_mode_uses_arp(int mode)
 {
 	return mode != BOND_MODE_8023AD && mode != BOND_MODE_TLB &&
-- 
cgit v1.2.3


From ee6377147409a00c071b2da853059a7d59979fbc Mon Sep 17 00:00:00 2001
From: Mahesh Bandewar <maheshb@google.com>
Date: Sat, 4 Oct 2014 17:45:01 -0700
Subject: bonding: Simplify the xmit function for modes that use xmit_hash

Earlier change to use usable slave array for TLB mode had an additional
performance advantage. So extending the same logic to all other modes
that use xmit-hash for slave selection (viz 802.3AD, and XOR modes).
Also consolidating this with the earlier TLB change.

The main idea is to build the usable slaves array in the control path
and use that array for slave selection during xmit operation.

Measured performance in a setup with a bond of 4x1G NICs with 200
instances of netperf for the modes involved (3ad, xor, tlb)
cmd: netperf -t TCP_RR -H <TargetHost> -l 60 -s 5

Mode        TPS-Before   TPS-After

802.3ad   : 468,694      493,101
TLB (lb=0): 392,583      392,965
XOR       : 475,696      484,517

Signed-off-by: Mahesh Bandewar <maheshb@google.com>
Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_3ad.c  | 140 +++++++++++------------------
 drivers/net/bonding/bond_alb.c  |  51 ++---------
 drivers/net/bonding/bond_alb.h  |   8 --
 drivers/net/bonding/bond_main.c | 192 +++++++++++++++++++++++++++++++++++++---
 drivers/net/bonding/bonding.h   |  10 +++
 5 files changed, 249 insertions(+), 152 deletions(-)

(limited to 'drivers/net/bonding')

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 7e9e522fd476..2110215f3528 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -102,17 +102,20 @@ static const u8 lacpdu_mcast_addr[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
 /* ================= main 802.3ad protocol functions ================== */
 static int ad_lacpdu_send(struct port *port);
 static int ad_marker_send(struct port *port, struct bond_marker *marker);
-static void ad_mux_machine(struct port *port);
+static void ad_mux_machine(struct port *port, bool *update_slave_arr);
 static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port);
 static void ad_tx_machine(struct port *port);
 static void ad_periodic_machine(struct port *port);
-static void ad_port_selection_logic(struct port *port);
-static void ad_agg_selection_logic(struct aggregator *aggregator);
+static void ad_port_selection_logic(struct port *port, bool *update_slave_arr);
+static void ad_agg_selection_logic(struct aggregator *aggregator,
+				   bool *update_slave_arr);
 static void ad_clear_agg(struct aggregator *aggregator);
 static void ad_initialize_agg(struct aggregator *aggregator);
 static void ad_initialize_port(struct port *port, int lacp_fast);
-static void ad_enable_collecting_distributing(struct port *port);
-static void ad_disable_collecting_distributing(struct port *port);
+static void ad_enable_collecting_distributing(struct port *port,
+					      bool *update_slave_arr);
+static void ad_disable_collecting_distributing(struct port *port,
+					       bool *update_slave_arr);
 static void ad_marker_info_received(struct bond_marker *marker_info,
 				    struct port *port);
 static void ad_marker_response_received(struct bond_marker *marker,
@@ -796,8 +799,9 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker)
 /**
  * ad_mux_machine - handle a port's mux state machine
  * @port: the port we're looking at
+ * @update_slave_arr: Does slave array need update?
  */
-static void ad_mux_machine(struct port *port)
+static void ad_mux_machine(struct port *port, bool *update_slave_arr)
 {
 	mux_states_t last_state;
 
@@ -901,7 +905,8 @@ static void ad_mux_machine(struct port *port)
 		switch (port->sm_mux_state) {
 		case AD_MUX_DETACHED:
 			port->actor_oper_port_state &= ~AD_STATE_SYNCHRONIZATION;
-			ad_disable_collecting_distributing(port);
+			ad_disable_collecting_distributing(port,
+							   update_slave_arr);
 			port->actor_oper_port_state &= ~AD_STATE_COLLECTING;
 			port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING;
 			port->ntt = true;
@@ -913,13 +918,15 @@ static void ad_mux_machine(struct port *port)
 			port->actor_oper_port_state |= AD_STATE_SYNCHRONIZATION;
 			port->actor_oper_port_state &= ~AD_STATE_COLLECTING;
 			port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING;
-			ad_disable_collecting_distributing(port);
+			ad_disable_collecting_distributing(port,
+							   update_slave_arr);
 			port->ntt = true;
 			break;
 		case AD_MUX_COLLECTING_DISTRIBUTING:
 			port->actor_oper_port_state |= AD_STATE_COLLECTING;
 			port->actor_oper_port_state |= AD_STATE_DISTRIBUTING;
-			ad_enable_collecting_distributing(port);
+			ad_enable_collecting_distributing(port,
+							  update_slave_arr);
 			port->ntt = true;
 			break;
 		default:
@@ -1187,12 +1194,13 @@ static void ad_periodic_machine(struct port *port)
 /**
  * ad_port_selection_logic - select aggregation groups
  * @port: the port we're looking at
+ * @update_slave_arr: Does slave array need update?
  *
  * Select aggregation groups, and assign each port for it's aggregetor. The
  * selection logic is called in the inititalization (after all the handshkes),
  * and after every lacpdu receive (if selected is off).
  */
-static void ad_port_selection_logic(struct port *port)
+static void ad_port_selection_logic(struct port *port, bool *update_slave_arr)
 {
 	struct aggregator *aggregator, *free_aggregator = NULL, *temp_aggregator;
 	struct port *last_port = NULL, *curr_port;
@@ -1347,7 +1355,7 @@ static void ad_port_selection_logic(struct port *port)
 			      __agg_ports_are_ready(port->aggregator));
 
 	aggregator = __get_first_agg(port);
-	ad_agg_selection_logic(aggregator);
+	ad_agg_selection_logic(aggregator, update_slave_arr);
 }
 
 /* Decide if "agg" is a better choice for the new active aggregator that
@@ -1435,6 +1443,7 @@ static int agg_device_up(const struct aggregator *agg)
 /**
  * ad_agg_selection_logic - select an aggregation group for a team
  * @aggregator: the aggregator we're looking at
+ * @update_slave_arr: Does slave array need update?
  *
  * It is assumed that only one aggregator may be selected for a team.
  *
@@ -1457,7 +1466,8 @@ static int agg_device_up(const struct aggregator *agg)
  * __get_active_agg() won't work correctly. This function should be better
  * called with the bond itself, and retrieve the first agg from it.
  */
-static void ad_agg_selection_logic(struct aggregator *agg)
+static void ad_agg_selection_logic(struct aggregator *agg,
+				   bool *update_slave_arr)
 {
 	struct aggregator *best, *active, *origin;
 	struct bonding *bond = agg->slave->bond;
@@ -1550,6 +1560,8 @@ static void ad_agg_selection_logic(struct aggregator *agg)
 				__disable_port(port);
 			}
 		}
+		/* Slave array needs update. */
+		*update_slave_arr = true;
 	}
 
 	/* if the selected aggregator is of join individuals
@@ -1678,24 +1690,30 @@ static void ad_initialize_port(struct port *port, int lacp_fast)
 /**
  * ad_enable_collecting_distributing - enable a port's transmit/receive
  * @port: the port we're looking at
+ * @update_slave_arr: Does slave array need update?
  *
  * Enable @port if it's in an active aggregator
  */
-static void ad_enable_collecting_distributing(struct port *port)
+static void ad_enable_collecting_distributing(struct port *port,
+					      bool *update_slave_arr)
 {
 	if (port->aggregator->is_active) {
 		pr_debug("Enabling port %d(LAG %d)\n",
 			 port->actor_port_number,
 			 port->aggregator->aggregator_identifier);
 		__enable_port(port);
+		/* Slave array needs update */
+		*update_slave_arr = true;
 	}
 }
 
 /**
  * ad_disable_collecting_distributing - disable a port's transmit/receive
  * @port: the port we're looking at
+ * @update_slave_arr: Does slave array need update?
  */
-static void ad_disable_collecting_distributing(struct port *port)
+static void ad_disable_collecting_distributing(struct port *port,
+					       bool *update_slave_arr)
 {
 	if (port->aggregator &&
 	    !MAC_ADDRESS_EQUAL(&(port->aggregator->partner_system),
@@ -1704,6 +1722,8 @@ static void ad_disable_collecting_distributing(struct port *port)
 			 port->actor_port_number,
 			 port->aggregator->aggregator_identifier);
 		__disable_port(port);
+		/* Slave array needs an update */
+		*update_slave_arr = true;
 	}
 }
 
@@ -1868,6 +1888,7 @@ void bond_3ad_unbind_slave(struct slave *slave)
 	struct bonding *bond = slave->bond;
 	struct slave *slave_iter;
 	struct list_head *iter;
+	bool dummy_slave_update; /* Ignore this value as caller updates array */
 
 	/* Sync against bond_3ad_state_machine_handler() */
 	spin_lock_bh(&bond->mode_lock);
@@ -1951,7 +1972,8 @@ void bond_3ad_unbind_slave(struct slave *slave)
 				ad_clear_agg(aggregator);
 
 				if (select_new_active_agg)
-					ad_agg_selection_logic(__get_first_agg(port));
+					ad_agg_selection_logic(__get_first_agg(port),
+							       &dummy_slave_update);
 			} else {
 				netdev_warn(bond->dev, "unbinding aggregator, and could not find a new aggregator for its ports\n");
 			}
@@ -1966,7 +1988,8 @@ void bond_3ad_unbind_slave(struct slave *slave)
 				/* select new active aggregator */
 				temp_aggregator = __get_first_agg(port);
 				if (temp_aggregator)
-					ad_agg_selection_logic(temp_aggregator);
+					ad_agg_selection_logic(temp_aggregator,
+							       &dummy_slave_update);
 			}
 		}
 	}
@@ -1996,7 +2019,8 @@ void bond_3ad_unbind_slave(struct slave *slave)
 					if (select_new_active_agg) {
 						netdev_info(bond->dev, "Removing an active aggregator\n");
 						/* select new active aggregator */
-						ad_agg_selection_logic(__get_first_agg(port));
+						ad_agg_selection_logic(__get_first_agg(port),
+							               &dummy_slave_update);
 					}
 				}
 				break;
@@ -2031,6 +2055,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
 	struct slave *slave;
 	struct port *port;
 	bool should_notify_rtnl = BOND_SLAVE_NOTIFY_LATER;
+	bool update_slave_arr = false;
 
 	/* Lock to protect data accessed by all (e.g., port->sm_vars) and
 	 * against running with bond_3ad_unbind_slave. ad_rx_machine may run
@@ -2058,7 +2083,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
 			}
 
 			aggregator = __get_first_agg(port);
-			ad_agg_selection_logic(aggregator);
+			ad_agg_selection_logic(aggregator, &update_slave_arr);
 		}
 		bond_3ad_set_carrier(bond);
 	}
@@ -2074,8 +2099,8 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
 
 		ad_rx_machine(NULL, port);
 		ad_periodic_machine(port);
-		ad_port_selection_logic(port);
-		ad_mux_machine(port);
+		ad_port_selection_logic(port, &update_slave_arr);
+		ad_mux_machine(port, &update_slave_arr);
 		ad_tx_machine(port);
 
 		/* turn off the BEGIN bit, since we already handled it */
@@ -2093,6 +2118,9 @@ re_arm:
 	rcu_read_unlock();
 	spin_unlock_bh(&bond->mode_lock);
 
+	if (update_slave_arr)
+		bond_slave_arr_work_rearm(bond, 0);
+
 	if (should_notify_rtnl && rtnl_trylock()) {
 		bond_slave_state_notify(bond);
 		rtnl_unlock();
@@ -2283,6 +2311,11 @@ void bond_3ad_handle_link_change(struct slave *slave, char link)
 	port->sm_vars |= AD_PORT_BEGIN;
 
 	spin_unlock_bh(&slave->bond->mode_lock);
+
+	/* RTNL is held and mode_lock is released so it's safe
+	 * to update slave_array here.
+	 */
+	bond_update_slave_arr(slave->bond, NULL);
 }
 
 /**
@@ -2377,73 +2410,6 @@ int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info)
 	return ret;
 }
 
-int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
-{
-	struct bonding *bond = netdev_priv(dev);
-	struct slave *slave, *first_ok_slave;
-	struct aggregator *agg;
-	struct ad_info ad_info;
-	struct list_head *iter;
-	int slaves_in_agg;
-	int slave_agg_no;
-	int agg_id;
-
-	if (__bond_3ad_get_active_agg_info(bond, &ad_info)) {
-		netdev_dbg(dev, "__bond_3ad_get_active_agg_info failed\n");
-		goto err_free;
-	}
-
-	slaves_in_agg = ad_info.ports;
-	agg_id = ad_info.aggregator_id;
-
-	if (slaves_in_agg == 0) {
-		netdev_dbg(dev, "active aggregator is empty\n");
-		goto err_free;
-	}
-
-	slave_agg_no = bond_xmit_hash(bond, skb) % slaves_in_agg;
-	first_ok_slave = NULL;
-
-	bond_for_each_slave_rcu(bond, slave, iter) {
-		agg = SLAVE_AD_INFO(slave)->port.aggregator;
-		if (!agg || agg->aggregator_identifier != agg_id)
-			continue;
-
-		if (slave_agg_no >= 0) {
-			if (!first_ok_slave && bond_slave_can_tx(slave))
-				first_ok_slave = slave;
-			slave_agg_no--;
-			continue;
-		}
-
-		if (bond_slave_can_tx(slave)) {
-			bond_dev_queue_xmit(bond, skb, slave->dev);
-			goto out;
-		}
-	}
-
-	if (slave_agg_no >= 0) {
-		netdev_err(dev, "Couldn't find a slave to tx on for aggregator ID %d\n",
-			   agg_id);
-		goto err_free;
-	}
-
-	/* we couldn't find any suitable slave after the agg_no, so use the
-	 * first suitable found, if found.
-	 */
-	if (first_ok_slave)
-		bond_dev_queue_xmit(bond, skb, first_ok_slave->dev);
-	else
-		goto err_free;
-
-out:
-	return NETDEV_TX_OK;
-err_free:
-	/* no suitable interface, frame not sent */
-	dev_kfree_skb_any(skb);
-	goto out;
-}
-
 int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
 			 struct slave *slave)
 {
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 615f3bebd019..d2eadab787c5 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -177,7 +177,6 @@ static int tlb_initialize(struct bonding *bond)
 static void tlb_deinitialize(struct bonding *bond)
 {
 	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
-	struct tlb_up_slave *arr;
 
 	spin_lock_bh(&bond->mode_lock);
 
@@ -185,10 +184,6 @@ static void tlb_deinitialize(struct bonding *bond)
 	bond_info->tx_hashtbl = NULL;
 
 	spin_unlock_bh(&bond->mode_lock);
-
-	arr = rtnl_dereference(bond_info->slave_arr);
-	if (arr)
-		kfree_rcu(arr, rcu);
 }
 
 static long long compute_gap(struct slave *slave)
@@ -1336,39 +1331,9 @@ out:
 	return NETDEV_TX_OK;
 }
 
-static int bond_tlb_update_slave_arr(struct bonding *bond,
-				     struct slave *skipslave)
-{
-	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
-	struct slave *tx_slave;
-	struct list_head *iter;
-	struct tlb_up_slave *new_arr, *old_arr;
-
-	new_arr = kzalloc(offsetof(struct tlb_up_slave, arr[bond->slave_cnt]),
-			  GFP_ATOMIC);
-	if (!new_arr)
-		return -ENOMEM;
-
-	bond_for_each_slave(bond, tx_slave, iter) {
-		if (!bond_slave_can_tx(tx_slave))
-			continue;
-		if (skipslave == tx_slave)
-			continue;
-		new_arr->arr[new_arr->count++] = tx_slave;
-	}
-
-	old_arr = rtnl_dereference(bond_info->slave_arr);
-	rcu_assign_pointer(bond_info->slave_arr, new_arr);
-	if (old_arr)
-		kfree_rcu(old_arr, rcu);
-
-	return 0;
-}
-
 int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
-	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 	struct ethhdr *eth_data;
 	struct slave *tx_slave = NULL;
 	u32 hash_index;
@@ -1389,12 +1354,14 @@ int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
 							      hash_index & 0xFF,
 							      skb->len);
 			} else {
-				struct tlb_up_slave *slaves;
+				struct bond_up_slave *slaves;
+				unsigned int count;
 
-				slaves = rcu_dereference(bond_info->slave_arr);
-				if (slaves && slaves->count)
+				slaves = rcu_dereference(bond->slave_arr);
+				count = slaves ? ACCESS_ONCE(slaves->count) : 0;
+				if (likely(count))
 					tx_slave = slaves->arr[hash_index %
-							       slaves->count];
+							       count];
 			}
 			break;
 		}
@@ -1641,10 +1608,6 @@ void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave)
 		rlb_clear_slave(bond, slave);
 	}
 
-	if (bond_is_nondyn_tlb(bond))
-		if (bond_tlb_update_slave_arr(bond, slave))
-			pr_err("Failed to build slave-array for TLB mode.\n");
-
 }
 
 void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link)
@@ -1669,7 +1632,7 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char
 	}
 
 	if (bond_is_nondyn_tlb(bond)) {
-		if (bond_tlb_update_slave_arr(bond, NULL))
+		if (bond_update_slave_arr(bond, NULL))
 			pr_err("Failed to build slave-array for TLB mode.\n");
 	}
 }
diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h
index 3c6a7ff974d7..1ad473b4ade5 100644
--- a/drivers/net/bonding/bond_alb.h
+++ b/drivers/net/bonding/bond_alb.h
@@ -139,19 +139,11 @@ struct tlb_slave_info {
 			 */
 };
 
-struct tlb_up_slave {
-	unsigned int	count;
-	struct rcu_head rcu;
-	struct slave	*arr[0];
-};
-
 struct alb_bond_info {
 	struct tlb_client_info	*tx_hashtbl; /* Dynamically allocated */
 	u32			unbalanced_load;
 	int			tx_rebalance_counter;
 	int			lp_counter;
-	/* -------- non-dynamic tlb mode only ---------*/
-	struct tlb_up_slave __rcu *slave_arr;	  /* Up slaves */
 	/* -------- rlb parameters -------- */
 	int rlb_enabled;
 	struct rlb_client_info	*rx_hashtbl;	/* Receive hash table */
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index c2adc2755ff6..3ad5413d4f57 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -210,6 +210,7 @@ static int bond_init(struct net_device *bond_dev);
 static void bond_uninit(struct net_device *bond_dev);
 static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
 						struct rtnl_link_stats64 *stats);
+static void bond_slave_arr_handler(struct work_struct *work);
 
 /*---------------------------- General routines -----------------------------*/
 
@@ -1551,6 +1552,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 		unblock_netpoll_tx();
 	}
 
+	if (bond_mode_uses_xmit_hash(bond))
+		bond_update_slave_arr(bond, NULL);
+
 	netdev_info(bond_dev, "Enslaving %s as %s interface with %s link\n",
 		    slave_dev->name,
 		    bond_is_active_slave(new_slave) ? "an active" : "a backup",
@@ -1668,6 +1672,9 @@ static int __bond_release_one(struct net_device *bond_dev,
 	if (BOND_MODE(bond) == BOND_MODE_8023AD)
 		bond_3ad_unbind_slave(slave);
 
+	if (bond_mode_uses_xmit_hash(bond))
+		bond_update_slave_arr(bond, slave);
+
 	netdev_info(bond_dev, "Releasing %s interface %s\n",
 		    bond_is_active_slave(slave) ? "active" : "backup",
 		    slave_dev->name);
@@ -1970,6 +1977,9 @@ static void bond_miimon_commit(struct bonding *bond)
 				bond_alb_handle_link_change(bond, slave,
 							    BOND_LINK_UP);
 
+			if (BOND_MODE(bond) == BOND_MODE_XOR)
+				bond_update_slave_arr(bond, NULL);
+
 			if (!bond->curr_active_slave || slave == primary)
 				goto do_failover;
 
@@ -1997,6 +2007,9 @@ static void bond_miimon_commit(struct bonding *bond)
 				bond_alb_handle_link_change(bond, slave,
 							    BOND_LINK_DOWN);
 
+			if (BOND_MODE(bond) == BOND_MODE_XOR)
+				bond_update_slave_arr(bond, NULL);
+
 			if (slave == rcu_access_pointer(bond->curr_active_slave))
 				goto do_failover;
 
@@ -2453,6 +2466,8 @@ static void bond_loadbalance_arp_mon(struct work_struct *work)
 
 		if (slave_state_changed) {
 			bond_slave_state_change(bond);
+			if (BOND_MODE(bond) == BOND_MODE_XOR)
+				bond_update_slave_arr(bond, NULL);
 		} else if (do_failover) {
 			block_netpoll_tx();
 			bond_select_active_slave(bond);
@@ -2829,8 +2844,20 @@ static int bond_slave_netdev_event(unsigned long event,
 			if (old_duplex != slave->duplex)
 				bond_3ad_adapter_duplex_changed(slave);
 		}
+		/* Refresh slave-array if applicable!
+		 * If the setup does not use miimon or arpmon (mode-specific!),
+		 * then these events will not cause the slave-array to be
+		 * refreshed. This will cause xmit to use a slave that is not
+		 * usable. Avoid such situation by refeshing the array at these
+		 * events. If these (miimon/arpmon) parameters are configured
+		 * then array gets refreshed twice and that should be fine!
+		 */
+		if (bond_mode_uses_xmit_hash(bond))
+			bond_update_slave_arr(bond, NULL);
 		break;
 	case NETDEV_DOWN:
+		if (bond_mode_uses_xmit_hash(bond))
+			bond_update_slave_arr(bond, NULL);
 		break;
 	case NETDEV_CHANGEMTU:
 		/* TODO: Should slaves be allowed to
@@ -3010,6 +3037,7 @@ static void bond_work_init_all(struct bonding *bond)
 	else
 		INIT_DELAYED_WORK(&bond->arp_work, bond_loadbalance_arp_mon);
 	INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler);
+	INIT_DELAYED_WORK(&bond->slave_arr_work, bond_slave_arr_handler);
 }
 
 static void bond_work_cancel_all(struct bonding *bond)
@@ -3019,6 +3047,7 @@ static void bond_work_cancel_all(struct bonding *bond)
 	cancel_delayed_work_sync(&bond->alb_work);
 	cancel_delayed_work_sync(&bond->ad_work);
 	cancel_delayed_work_sync(&bond->mcast_work);
+	cancel_delayed_work_sync(&bond->slave_arr_work);
 }
 
 static int bond_open(struct net_device *bond_dev)
@@ -3068,6 +3097,9 @@ static int bond_open(struct net_device *bond_dev)
 		bond_3ad_initiate_agg_selection(bond, 1);
 	}
 
+	if (bond_mode_uses_xmit_hash(bond))
+		bond_update_slave_arr(bond, NULL);
+
 	return 0;
 }
 
@@ -3573,20 +3605,148 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
 	return NETDEV_TX_OK;
 }
 
-/* In bond_xmit_xor() , we determine the output device by using a pre-
- * determined xmit_hash_policy(), If the selected device is not enabled,
- * find the next active slave.
+/* Use this to update slave_array when (a) it's not appropriate to update
+ * slave_array right away (note that update_slave_array() may sleep)
+ * and / or (b) RTNL is not held.
  */
-static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev)
+void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay)
 {
-	struct bonding *bond = netdev_priv(bond_dev);
-	int slave_cnt = ACCESS_ONCE(bond->slave_cnt);
+	queue_delayed_work(bond->wq, &bond->slave_arr_work, delay);
+}
 
-	if (likely(slave_cnt))
-		bond_xmit_slave_id(bond, skb,
-				   bond_xmit_hash(bond, skb) % slave_cnt);
-	else
+/* Slave array work handler. Holds only RTNL */
+static void bond_slave_arr_handler(struct work_struct *work)
+{
+	struct bonding *bond = container_of(work, struct bonding,
+					    slave_arr_work.work);
+	int ret;
+
+	if (!rtnl_trylock())
+		goto err;
+
+	ret = bond_update_slave_arr(bond, NULL);
+	rtnl_unlock();
+	if (ret) {
+		pr_warn_ratelimited("Failed to update slave array from WT\n");
+		goto err;
+	}
+	return;
+
+err:
+	bond_slave_arr_work_rearm(bond, 1);
+}
+
+/* Build the usable slaves array in control path for modes that use xmit-hash
+ * to determine the slave interface -
+ * (a) BOND_MODE_8023AD
+ * (b) BOND_MODE_XOR
+ * (c) BOND_MODE_TLB && tlb_dynamic_lb == 0
+ *
+ * The caller is expected to hold RTNL only and NO other lock!
+ */
+int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
+{
+	struct slave *slave;
+	struct list_head *iter;
+	struct bond_up_slave *new_arr, *old_arr;
+	int slaves_in_agg;
+	int agg_id = 0;
+	int ret = 0;
+
+#ifdef CONFIG_LOCKDEP
+	WARN_ON(lockdep_is_held(&bond->mode_lock));
+#endif
+
+	new_arr = kzalloc(offsetof(struct bond_up_slave, arr[bond->slave_cnt]),
+			  GFP_KERNEL);
+	if (!new_arr) {
+		ret = -ENOMEM;
+		pr_err("Failed to build slave-array.\n");
+		goto out;
+	}
+	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
+		struct ad_info ad_info;
+
+		if (bond_3ad_get_active_agg_info(bond, &ad_info)) {
+			pr_debug("bond_3ad_get_active_agg_info failed\n");
+			kfree_rcu(new_arr, rcu);
+			/* No active aggragator means it's not safe to use
+			 * the previous array.
+			 */
+			old_arr = rtnl_dereference(bond->slave_arr);
+			if (old_arr) {
+				RCU_INIT_POINTER(bond->slave_arr, NULL);
+				kfree_rcu(old_arr, rcu);
+			}
+			goto out;
+		}
+		slaves_in_agg = ad_info.ports;
+		agg_id = ad_info.aggregator_id;
+	}
+	bond_for_each_slave(bond, slave, iter) {
+		if (BOND_MODE(bond) == BOND_MODE_8023AD) {
+			struct aggregator *agg;
+
+			agg = SLAVE_AD_INFO(slave)->port.aggregator;
+			if (!agg || agg->aggregator_identifier != agg_id)
+				continue;
+		}
+		if (!bond_slave_can_tx(slave))
+			continue;
+		if (skipslave == slave)
+			continue;
+		new_arr->arr[new_arr->count++] = slave;
+	}
+
+	old_arr = rtnl_dereference(bond->slave_arr);
+	rcu_assign_pointer(bond->slave_arr, new_arr);
+	if (old_arr)
+		kfree_rcu(old_arr, rcu);
+out:
+	if (ret != 0 && skipslave) {
+		int idx;
+
+		/* Rare situation where caller has asked to skip a specific
+		 * slave but allocation failed (most likely!). BTW this is
+		 * only possible when the call is initiated from
+		 * __bond_release_one(). In this situation; overwrite the
+		 * skipslave entry in the array with the last entry from the
+		 * array to avoid a situation where the xmit path may choose
+		 * this to-be-skipped slave to send a packet out.
+		 */
+		old_arr = rtnl_dereference(bond->slave_arr);
+		for (idx = 0; idx < old_arr->count; idx++) {
+			if (skipslave == old_arr->arr[idx]) {
+				old_arr->arr[idx] =
+				    old_arr->arr[old_arr->count-1];
+				old_arr->count--;
+				break;
+			}
+		}
+	}
+	return ret;
+}
+
+/* Use this Xmit function for 3AD as well as XOR modes. The current
+ * usable slave array is formed in the control path. The xmit function
+ * just calculates hash and sends the packet out.
+ */
+int bond_3ad_xor_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct bonding *bond = netdev_priv(dev);
+	struct slave *slave;
+	struct bond_up_slave *slaves;
+	unsigned int count;
+
+	slaves = rcu_dereference(bond->slave_arr);
+	count = slaves ? ACCESS_ONCE(slaves->count) : 0;
+	if (likely(count)) {
+		slave = slaves->arr[bond_xmit_hash(bond, skb) % count];
+		bond_dev_queue_xmit(bond, skb, slave->dev);
+	} else {
 		dev_kfree_skb_any(skb);
+		atomic_long_inc(&dev->tx_dropped);
+	}
 
 	return NETDEV_TX_OK;
 }
@@ -3682,12 +3842,11 @@ static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev
 		return bond_xmit_roundrobin(skb, dev);
 	case BOND_MODE_ACTIVEBACKUP:
 		return bond_xmit_activebackup(skb, dev);
+	case BOND_MODE_8023AD:
 	case BOND_MODE_XOR:
-		return bond_xmit_xor(skb, dev);
+		return bond_3ad_xor_xmit(skb, dev);
 	case BOND_MODE_BROADCAST:
 		return bond_xmit_broadcast(skb, dev);
-	case BOND_MODE_8023AD:
-		return bond_3ad_xmit_xor(skb, dev);
 	case BOND_MODE_ALB:
 		return bond_alb_xmit(skb, dev);
 	case BOND_MODE_TLB:
@@ -3861,6 +4020,7 @@ static void bond_uninit(struct net_device *bond_dev)
 	struct bonding *bond = netdev_priv(bond_dev);
 	struct list_head *iter;
 	struct slave *slave;
+	struct bond_up_slave *arr;
 
 	bond_netpoll_cleanup(bond_dev);
 
@@ -3869,6 +4029,12 @@ static void bond_uninit(struct net_device *bond_dev)
 		__bond_release_one(bond_dev, slave->dev, true);
 	netdev_info(bond_dev, "Released all slaves\n");
 
+	arr = rtnl_dereference(bond->slave_arr);
+	if (arr) {
+		RCU_INIT_POINTER(bond->slave_arr, NULL);
+		kfree_rcu(arr, rcu);
+	}
+
 	list_del(&bond->bond_list);
 
 	bond_debug_unregister(bond);
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 5b022da9cad2..10920f0686e2 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -179,6 +179,12 @@ struct slave {
 	struct rtnl_link_stats64 slave_stats;
 };
 
+struct bond_up_slave {
+	unsigned int	count;
+	struct rcu_head rcu;
+	struct slave	*arr[0];
+};
+
 /*
  * Link pseudo-state only used internally by monitors
  */
@@ -193,6 +199,7 @@ struct bonding {
 	struct   slave __rcu *curr_active_slave;
 	struct   slave __rcu *current_arp_slave;
 	struct   slave __rcu *primary_slave;
+	struct   bond_up_slave __rcu *slave_arr; /* Array of usable slaves */
 	bool     force_primary;
 	s32      slave_cnt; /* never change this value outside the attach/detach wrappers */
 	int     (*recv_probe)(const struct sk_buff *, struct bonding *,
@@ -222,6 +229,7 @@ struct bonding {
 	struct   delayed_work alb_work;
 	struct   delayed_work ad_work;
 	struct   delayed_work mcast_work;
+	struct   delayed_work slave_arr_work;
 #ifdef CONFIG_DEBUG_FS
 	/* debugging support via debugfs */
 	struct	 dentry *debug_dir;
@@ -534,6 +542,8 @@ const char *bond_slave_link_status(s8 link);
 struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev,
 					      struct net_device *end_dev,
 					      int level);
+int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave);
+void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay);
 
 #ifdef CONFIG_PROC_FS
 void bond_create_proc_entry(struct bonding *bond);
-- 
cgit v1.2.3


From 0287587884b15041203b3a362d485e1ab1f24445 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 5 Oct 2014 18:38:35 -0700
Subject: net: better IFF_XMIT_DST_RELEASE support

Testing xmit_more support with netperf and connected UDP sockets,
I found strange dst refcount false sharing.

Current handling of IFF_XMIT_DST_RELEASE is not optimal.

Dropping dst in validate_xmit_skb() is certainly too late in case
packet was queued by cpu X but dequeued by cpu Y

The logical point to take care of drop/force is in __dev_queue_xmit()
before even taking qdisc lock.

As Julian Anastasov pointed out, need for skb_dst() might come from some
packet schedulers or classifiers.

This patch adds new helper to cleanly express needs of various drivers
or qdiscs/classifiers.

Drivers that need skb_dst() in their ndo_start_xmit() should call
following helper in their setup instead of the prior :

	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
->
	netif_keep_dst(dev);

Instead of using a single bit, we use two bits, one being
eventually rebuilt in bonding/team drivers.

The other one, is permanent and blocks IFF_XMIT_DST_RELEASE being
rebuilt in bonding/team. Eventually, we could add something
smarter later.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c |  2 +-
 drivers/net/appletalk/ipddp.c             |  2 +-
 drivers/net/bonding/bond_main.c           |  9 ++++++---
 drivers/net/eql.c                         |  2 +-
 drivers/net/ifb.c                         |  3 ++-
 drivers/net/loopback.c                    |  2 +-
 drivers/net/macvlan.c                     |  3 ++-
 drivers/net/ppp/ppp_generic.c             |  2 +-
 drivers/net/team/team.c                   |  8 +++++---
 drivers/net/vxlan.c                       |  2 +-
 drivers/net/wan/hdlc_fr.c                 |  2 +-
 drivers/s390/net/qeth_l3_main.c           |  2 +-
 include/linux/netdevice.h                 |  8 ++++++++
 net/8021q/vlan_dev.c                      |  3 ++-
 net/atm/clip.c                            |  2 +-
 net/core/dev.c                            | 19 +++++++++----------
 net/ipv4/ip_gre.c                         |  2 +-
 net/ipv4/ip_vti.c                         |  2 +-
 net/ipv4/ipip.c                           |  2 +-
 net/ipv6/ip6_gre.c                        |  2 +-
 net/ipv6/ip6_tunnel.c                     |  2 +-
 net/ipv6/ip6_vti.c                        |  2 +-
 net/ipv6/sit.c                            |  2 +-
 net/sched/cls_flow.c                      |  2 ++
 net/sched/cls_route.c                     |  1 +
 net/sched/sch_generic.c                   |  3 ---
 net/sched/sch_teql.c                      |  2 +-
 27 files changed, 54 insertions(+), 39 deletions(-)

(limited to 'drivers/net/bonding')

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 13e6e0431592..58b5aa3b6f2d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1364,7 +1364,7 @@ void ipoib_setup(struct net_device *dev)
 	dev->tx_queue_len	 = ipoib_sendq_size * 2;
 	dev->features		 = (NETIF_F_VLAN_CHALLENGED	|
 				    NETIF_F_HIGHDMA);
-	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 
 	memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);
 
diff --git a/drivers/net/appletalk/ipddp.c b/drivers/net/appletalk/ipddp.c
index 10d0dba572c2..e90c6a7333d7 100644
--- a/drivers/net/appletalk/ipddp.c
+++ b/drivers/net/appletalk/ipddp.c
@@ -74,7 +74,7 @@ static struct net_device * __init ipddp_init(void)
 	if (!dev)
 		return ERR_PTR(-ENOMEM);
 
-	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 	strcpy(dev->name, "ipddp%d");
 
 	if (version_printed++ == 0)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 3ad5413d4f57..c9ac06cfe6b7 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1002,7 +1002,8 @@ static netdev_features_t bond_fix_features(struct net_device *dev,
 
 static void bond_compute_features(struct bonding *bond)
 {
-	unsigned int flags, dst_release_flag = IFF_XMIT_DST_RELEASE;
+	unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE |
+					IFF_XMIT_DST_RELEASE_PERM;
 	netdev_features_t vlan_features = BOND_VLAN_FEATURES;
 	netdev_features_t enc_features  = BOND_ENC_FEATURES;
 	struct net_device *bond_dev = bond->dev;
@@ -1038,8 +1039,10 @@ done:
 	bond_dev->gso_max_segs = gso_max_segs;
 	netif_set_gso_max_size(bond_dev, gso_max_size);
 
-	flags = bond_dev->priv_flags & ~IFF_XMIT_DST_RELEASE;
-	bond_dev->priv_flags = flags | dst_release_flag;
+	bond_dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	if ((bond_dev->priv_flags & IFF_XMIT_DST_RELEASE_PERM) &&
+	    dst_release_flag == (IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM))
+		bond_dev->priv_flags |= IFF_XMIT_DST_RELEASE;
 
 	netdev_change_features(bond_dev);
 }
diff --git a/drivers/net/eql.c b/drivers/net/eql.c
index 957e5c0cede3..a10ad74cc8d2 100644
--- a/drivers/net/eql.c
+++ b/drivers/net/eql.c
@@ -199,7 +199,7 @@ static void __init eql_setup(struct net_device *dev)
 
 	dev->type       	= ARPHRD_SLIP;
 	dev->tx_queue_len 	= 5;		/* Hands them off fast */
-	dev->priv_flags	       &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 }
 
 static int eql_open(struct net_device *dev)
diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index d2d4a3d2237f..34f846b4bd05 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -185,7 +185,8 @@ static void ifb_setup(struct net_device *dev)
 
 	dev->flags |= IFF_NOARP;
 	dev->flags &= ~IFF_MULTICAST;
-	dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
+	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+	netif_keep_dst(dev);
 	eth_hw_addr_random(dev);
 }
 
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 8f2262540561..c76283c2f84a 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -169,7 +169,7 @@ static void loopback_setup(struct net_device *dev)
 	dev->type		= ARPHRD_LOOPBACK;	/* 0x0001*/
 	dev->flags		= IFF_LOOPBACK;
 	dev->priv_flags		|= IFF_LIVE_ADDR_CHANGE;
-	dev->priv_flags	       &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 	dev->hw_features	= NETIF_F_ALL_TSO | NETIF_F_UFO;
 	dev->features 		= NETIF_F_SG | NETIF_F_FRAGLIST
 		| NETIF_F_ALL_TSO
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index e8a453f1b458..38b4fae61f04 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -1025,7 +1025,8 @@ void macvlan_common_setup(struct net_device *dev)
 {
 	ether_setup(dev);
 
-	dev->priv_flags	       &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
+	dev->priv_flags	       &= ~IFF_TX_SKB_SHARING;
+	netif_keep_dst(dev);
 	dev->priv_flags	       |= IFF_UNICAST_FLT;
 	dev->netdev_ops		= &macvlan_netdev_ops;
 	dev->destructor		= free_netdev;
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index fa0d71727894..80e6f3430f65 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -1103,7 +1103,7 @@ static void ppp_setup(struct net_device *dev)
 	dev->type = ARPHRD_PPP;
 	dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
 	dev->features |= NETIF_F_NETNS_LOCAL;
-	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 }
 
 /*
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 2277c3679a51..a94a9df3e6bd 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -970,7 +970,8 @@ static void __team_compute_features(struct team *team)
 	struct team_port *port;
 	u32 vlan_features = TEAM_VLAN_FEATURES & NETIF_F_ALL_FOR_ALL;
 	unsigned short max_hard_header_len = ETH_HLEN;
-	unsigned int flags, dst_release_flag = IFF_XMIT_DST_RELEASE;
+	unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE |
+					IFF_XMIT_DST_RELEASE_PERM;
 
 	list_for_each_entry(port, &team->port_list, list) {
 		vlan_features = netdev_increment_features(vlan_features,
@@ -985,8 +986,9 @@ static void __team_compute_features(struct team *team)
 	team->dev->vlan_features = vlan_features;
 	team->dev->hard_header_len = max_hard_header_len;
 
-	flags = team->dev->priv_flags & ~IFF_XMIT_DST_RELEASE;
-	team->dev->priv_flags = flags | dst_release_flag;
+	team->dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	if (dst_release_flag == (IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM))
+		team->dev->priv_flags |= IFF_XMIT_DST_RELEASE;
 
 	netdev_change_features(team->dev);
 }
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 2af795d6ba05..2a51e6e48e1e 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2193,7 +2193,7 @@ static void vxlan_setup(struct net_device *dev)
 	dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
 	dev->hw_features |= NETIF_F_GSO_SOFTWARE;
 	dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
-	dev->priv_flags	&= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
 
 	INIT_LIST_HEAD(&vxlan->next);
diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index e5c7e6165a4b..3ebed1c40abb 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -1047,7 +1047,7 @@ static void pvc_setup(struct net_device *dev)
 	dev->flags = IFF_POINTOPOINT;
 	dev->hard_header_len = 10;
 	dev->addr_len = 2;
-	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 }
 
 static const struct net_device_ops pvc_ops = {
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index f8427a2c4840..afebb9709763 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -3306,7 +3306,7 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
 	card->dev->features |=	NETIF_F_HW_VLAN_CTAG_TX |
 				NETIF_F_HW_VLAN_CTAG_RX |
 				NETIF_F_HW_VLAN_CTAG_FILTER;
-	card->dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(card->dev);
 	card->dev->gso_max_size = 15 * PAGE_SIZE;
 
 	SET_NETDEV_DEV(card->dev, &card->gdev->dev);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2df86f50261c..3a4315b39d20 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1206,6 +1206,7 @@ enum netdev_priv_flags {
 	IFF_SUPP_NOFCS			= 1<<19,
 	IFF_LIVE_ADDR_CHANGE		= 1<<20,
 	IFF_MACVLAN			= 1<<21,
+	IFF_XMIT_DST_RELEASE_PERM	= 1<<22,
 };
 
 #define IFF_802_1Q_VLAN			IFF_802_1Q_VLAN
@@ -1230,6 +1231,7 @@ enum netdev_priv_flags {
 #define IFF_SUPP_NOFCS			IFF_SUPP_NOFCS
 #define IFF_LIVE_ADDR_CHANGE		IFF_LIVE_ADDR_CHANGE
 #define IFF_MACVLAN			IFF_MACVLAN
+#define IFF_XMIT_DST_RELEASE_PERM	IFF_XMIT_DST_RELEASE_PERM
 
 /**
  *	struct net_device - The DEVICE structure.
@@ -3588,6 +3590,12 @@ static inline bool netif_supports_nofcs(struct net_device *dev)
 	return dev->priv_flags & IFF_SUPP_NOFCS;
 }
 
+/* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */
+static inline void netif_keep_dst(struct net_device *dev)
+{
+	dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM);
+}
+
 extern struct pernet_operations __net_initdata loopback_net_ops;
 
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 35a6b6b15e8a..0d441ec8763e 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -799,7 +799,8 @@ void vlan_setup(struct net_device *dev)
 	ether_setup(dev);
 
 	dev->priv_flags		|= IFF_802_1Q_VLAN;
-	dev->priv_flags		&= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
+	dev->priv_flags		&= ~IFF_TX_SKB_SHARING;
+	netif_keep_dst(dev);
 	dev->tx_queue_len	= 0;
 
 	dev->netdev_ops		= &vlan_netdev_ops;
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 1d9eaa4f041a..17e55dfecbe2 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -501,7 +501,7 @@ static void clip_setup(struct net_device *dev)
 	/* without any more elaborate queuing. 100 is a reasonable */
 	/* compromise between decent burst-tolerance and protection */
 	/* against memory hogs. */
-	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 }
 
 static int clip_create(int number)
diff --git a/net/core/dev.c b/net/core/dev.c
index a63b8c43c1b6..3c5bdaa44486 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2665,12 +2665,6 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
 	if (skb->next)
 		return skb;
 
-	/* If device doesn't need skb->dst, release it right now while
-	 * its hot in this cpu cache
-	 */
-	if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
-		skb_dst_drop(skb);
-
 	features = netif_skb_features(skb);
 	skb = validate_xmit_vlan(skb, features);
 	if (unlikely(!skb))
@@ -2811,8 +2805,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 		 * waiting to be sent out; and the qdisc is not running -
 		 * xmit the skb directly.
 		 */
-		if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
-			skb_dst_force(skb);
 
 		qdisc_bstats_update(q, skb);
 
@@ -2827,7 +2819,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 
 		rc = NET_XMIT_SUCCESS;
 	} else {
-		skb_dst_force(skb);
 		rc = q->enqueue(skb, q) & NET_XMIT_MASK;
 		if (qdisc_run_begin(q)) {
 			if (unlikely(contended)) {
@@ -2924,6 +2915,14 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
 
 	skb_update_prio(skb);
 
+	/* If device/qdisc don't need skb->dst, release it right now while
+	 * its hot in this cpu cache.
+	 */
+	if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
+		skb_dst_drop(skb);
+	else
+		skb_dst_force(skb);
+
 	txq = netdev_pick_tx(dev, skb, accel_priv);
 	q = rcu_dereference_bh(txq->qdisc);
 
@@ -6674,7 +6673,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	INIT_LIST_HEAD(&dev->adj_list.lower);
 	INIT_LIST_HEAD(&dev->all_adj_list.upper);
 	INIT_LIST_HEAD(&dev->all_adj_list.lower);
-	dev->priv_flags = IFF_XMIT_DST_RELEASE;
+	dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
 	setup(dev);
 
 	dev->num_tx_queues = txqs;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 0485ef18d254..12055fdbe716 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -510,7 +510,7 @@ static int ipgre_tunnel_init(struct net_device *dev)
 	memcpy(dev->broadcast, &iph->daddr, 4);
 
 	dev->flags		= IFF_NOARP;
-	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 	dev->addr_len		= 4;
 
 	if (iph->daddr) {
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index e453cb724a95..3e861011e4a3 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -364,7 +364,7 @@ static int vti_tunnel_init(struct net_device *dev)
 	dev->iflink		= 0;
 	dev->addr_len		= 4;
 	dev->features		|= NETIF_F_LLTX;
-	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 
 	return ip_tunnel_init(dev);
 }
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index ea88ab3102a8..37096d64730e 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -289,7 +289,7 @@ static void ipip_tunnel_setup(struct net_device *dev)
 	dev->iflink		= 0;
 	dev->addr_len		= 4;
 	dev->features		|= NETIF_F_LLTX;
-	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 
 	dev->features		|= IPIP_FEATURES;
 	dev->hw_features	|= IPIP_FEATURES;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 74b677916a70..de3b1c86b8d3 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1242,7 +1242,7 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
 	dev->flags |= IFF_NOARP;
 	dev->iflink = 0;
 	dev->addr_len = sizeof(struct in6_addr);
-	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 }
 
 static int ip6gre_tunnel_init(struct net_device *dev)
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index d3e8888ad611..9409887fb664 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1493,7 +1493,7 @@ static void ip6_tnl_dev_setup(struct net_device *dev)
 		dev->mtu -= 8;
 	dev->flags |= IFF_NOARP;
 	dev->addr_len = sizeof(struct in6_addr);
-	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 	/* This perm addr will be used as interface identifier by IPv6 */
 	dev->addr_assign_type = NET_ADDR_RANDOM;
 	eth_random_addr(dev->perm_addr);
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 5833a2244467..d440bb585524 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -807,7 +807,7 @@ static void vti6_dev_setup(struct net_device *dev)
 	dev->mtu = ETH_DATA_LEN;
 	dev->flags |= IFF_NOARP;
 	dev->addr_len = sizeof(struct in6_addr);
-	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 }
 
 /**
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 0d4e27466f82..6eab37cf5345 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1364,7 +1364,7 @@ static void ipip6_tunnel_setup(struct net_device *dev)
 	dev->hard_header_len	= LL_MAX_HEADER + t_hlen;
 	dev->mtu		= ETH_DATA_LEN - t_hlen;
 	dev->flags		= IFF_NOARP;
-	dev->priv_flags	       &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 	dev->iflink		= 0;
 	dev->addr_len		= 4;
 	dev->features		|= NETIF_F_LLTX;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index a5d2b20db560..4ac515f2a6ce 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -493,6 +493,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 	tcf_exts_change(tp, &fnew->exts, &e);
 	tcf_em_tree_change(tp, &fnew->ematches, &t);
 
+	netif_keep_dst(qdisc_dev(tp->q));
+
 	if (tb[TCA_FLOW_KEYS]) {
 		fnew->keymask = keymask;
 		fnew->nkeys   = nkeys;
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 6f22baae0afa..109a329b7198 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -524,6 +524,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
 		if (f->handle < f1->handle)
 			break;
 
+	netif_keep_dst(qdisc_dev(tp->q));
 	rcu_assign_pointer(f->next, f1);
 	rcu_assign_pointer(*fp, f);
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 2b349a4de3c8..38d58e6cef07 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -47,7 +47,6 @@ EXPORT_SYMBOL(default_qdisc_ops);
 
 static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 {
-	skb_dst_force(skb);
 	q->gso_skb = skb;
 	q->qstats.requeues++;
 	q->q.qlen++;	/* it's still part of the queue */
@@ -218,8 +217,6 @@ static inline int qdisc_restart(struct Qdisc *q)
 	if (unlikely(!skb))
 		return 0;
 
-	WARN_ON_ONCE(skb_dst_is_noref(skb));
-
 	root_lock = qdisc_lock(q);
 	dev = qdisc_dev(q);
 	txq = skb_get_tx_queue(dev, skb);
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 5cd291bd00e4..6ada42396a24 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -470,7 +470,7 @@ static __init void teql_master_setup(struct net_device *dev)
 	dev->tx_queue_len	= 100;
 	dev->flags		= IFF_NOARP;
 	dev->hard_header_len	= LL_MAX_HEADER;
-	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 }
 
 static LIST_HEAD(master_dev_list);
-- 
cgit v1.2.3