diff options
author | David S. Miller <davem@davemloft.net> | 2016-10-18 11:45:01 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-10-18 11:45:01 -0400 |
commit | 5bb61cb5fd115bed1814f6b97417e0f397da3c79 (patch) | |
tree | d5f99f69bedcc17d837ceb1a7704d897253ca623 | |
parent | 5921a0fcfd27582f016134dcf13302362cdf2c3c (diff) | |
parent | 67b62f98a1de962277b60d77c0c208b76867dbae (diff) | |
download | linux-5bb61cb5fd115bed1814f6b97417e0f397da3c79.tar.bz2 |
Merge branch 'netdev-adjacency'
David Ahern says:
====================
net: Fix netdev adjacency tracking
The netdev adjacency tracking is failing to create proper dependencies
for some topologies. For example this topology
+--------+
| myvrf |
+--------+
| |
| +---------+
| | macvlan |
| +---------+
| |
+----------+
| bridge |
+----------+
|
+--------+
| bond1 |
+--------+
|
+--------+
| eth3 |
+--------+
hits 1 of 2 problems depending on the order of enslavement. The base set of
commands for both cases:
ip link add bond1 type bond
ip link set bond1 up
ip link set eth3 down
ip link set eth3 master bond1
ip link set eth3 up
ip link add bridge type bridge
ip link set bridge up
ip link add macvlan link bridge type macvlan
ip link set macvlan up
ip link add myvrf type vrf table 1234
ip link set myvrf up
ip link set bridge master myvrf
Case 1 enslave macvlan to the vrf before enslaving the bond to the bridge:
ip link set macvlan master myvrf
ip link set bond1 master bridge
Attempts to delete the VRF:
ip link delete myvrf
trigger the BUG in __netdev_adjacent_dev_remove:
[ 587.405260] tried to remove device eth3 from myvrf
[ 587.407269] ------------[ cut here ]------------
[ 587.408918] kernel BUG at /home/dsa/kernel.git/net/core/dev.c:5661!
[ 587.411113] invalid opcode: 0000 [#1] SMP
[ 587.412454] Modules linked in: macvlan bridge stp llc bonding vrf
[ 587.414765] CPU: 0 PID: 726 Comm: ip Not tainted 4.8.0+ #109
[ 587.416766] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014
[ 587.420241] task: ffff88013ab6eec0 task.stack: ffffc90000628000
[ 587.422163] RIP: 0010:[<ffffffff813cef03>] [<ffffffff813cef03>] __netdev_adjacent_dev_remove+0x40/0x12c
...
[ 587.446053] Call Trace:
[ 587.446424] [<ffffffff813d1542>] __netdev_adjacent_dev_unlink+0x20/0x3c
[ 587.447390] [<ffffffff813d16a3>] netdev_upper_dev_unlink+0xfa/0x15e
[ 587.448297] [<ffffffffa00003a3>] vrf_del_slave+0x13/0x2a [vrf]
[ 587.449153] [<ffffffffa00004a4>] vrf_dev_uninit+0xea/0x114 [vrf]
[ 587.450036] [<ffffffff813d19b0>] rollback_registered_many+0x22b/0x2da
[ 587.450974] [<ffffffff813d1aac>] unregister_netdevice_many+0x17/0x48
[ 587.451903] [<ffffffff813de444>] rtnl_delete_link+0x3c/0x43
[ 587.452719] [<ffffffff813dedcd>] rtnl_dellink+0x180/0x194
When the BUG is converted to a WARN_ON it shows 4 missing adjacencies:
eth3 - myvrf, mvrf - eth3, bond1 - myvrf and myvrf - bond1
All of those are because the __netdev_upper_dev_link function does not
properly link macvlan lower devices to myvrf when it is enslaved.
The second case just flips the ordering of the enslavements:
ip link set bond1 master bridge
ip link set macvlan master myvrf
Then run:
ip link delete bond1
ip link delete myvrf
The vrf delete command hangs because myvrf has a reference that has not
been released. In this case the removal code does not account for 2 paths
between eth3 and myvrf - one from bridge to vrf and the other through the
macvlan.
Rather than try to maintain a linked list of all upper and lower devices
per netdevice, only track the direct neighbors. The remaining stack can
be determined by recursively walking the neighbors.
The existing netdev_for_each_all_upper_dev_rcu,
netdev_for_each_all_lower_dev and netdev_for_each_all_lower_dev_rcu macros
are replaced with APIs that walk the upper and lower device lists. The
new APIs take a callback function and a data arg that is passed to the
callback for each device in the list. Drivers using the old macros are
converted in separate patches to make it easier on reviewers. It is an
API conversion only; no functional change is intended.
v3
- address Stephen's comment to simplify logic and remove typecasts
v2
- fixed bond0 references in cover-letter
- fixed definition of netdev_next_lower_dev_rcu to mirror the upper_dev
version.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/infiniband/core/core_priv.h | 9 | ||||
-rw-r--r-- | drivers/infiniband/core/roce_gid_mgmt.c | 42 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_main.c | 37 | ||||
-rw-r--r-- | drivers/net/bonding/bond_alb.c | 82 | ||||
-rw-r--r-- | drivers/net/bonding/bond_main.c | 17 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 132 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 37 | ||||
-rw-r--r-- | drivers/net/ethernet/rocker/rocker_main.c | 31 | ||||
-rw-r--r-- | include/linux/netdevice.h | 38 | ||||
-rw-r--r-- | net/core/dev.c | 350 |
10 files changed, 423 insertions, 352 deletions
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 19d499dcab76..0c0bea091de8 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -127,14 +127,7 @@ void ib_cache_release_one(struct ib_device *device); static inline bool rdma_is_upper_dev_rcu(struct net_device *dev, struct net_device *upper) { - struct net_device *_upper = NULL; - struct list_head *iter; - - netdev_for_each_all_upper_dev_rcu(dev, _upper, iter) - if (_upper == upper) - break; - - return _upper == upper; + return netdev_has_upper_dev_all_rcu(dev, upper); } int addr_init(void); diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index 06556c34606d..3a64a0881882 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -437,6 +437,28 @@ static void callback_for_addr_gid_device_scan(struct ib_device *device, &parsed->gid_attr); } +struct upper_list { + struct list_head list; + struct net_device *upper; +}; + +static int netdev_upper_walk(struct net_device *upper, void *data) +{ + struct upper_list *entry = kmalloc(sizeof(*entry), GFP_ATOMIC); + struct list_head *upper_list = data; + + if (!entry) { + pr_info("roce_gid_mgmt: couldn't allocate entry to delete ndev\n"); + return 0; + } + + list_add_tail(&entry->list, upper_list); + dev_hold(upper); + entry->upper = upper; + + return 0; +} + static void handle_netdev_upper(struct ib_device *ib_dev, u8 port, void *cookie, void (*handle_netdev)(struct ib_device *ib_dev, @@ -444,30 +466,12 @@ static void handle_netdev_upper(struct ib_device *ib_dev, u8 port, struct net_device *ndev)) { struct net_device *ndev = (struct net_device *)cookie; - struct upper_list { - struct list_head list; - struct net_device *upper; - }; - struct net_device *upper; - struct list_head *iter; struct upper_list *upper_iter; struct upper_list *upper_temp; LIST_HEAD(upper_list); rcu_read_lock(); - netdev_for_each_all_upper_dev_rcu(ndev, upper, iter) { - struct upper_list *entry = kmalloc(sizeof(*entry), - GFP_ATOMIC); - - if (!entry) { - pr_info("roce_gid_mgmt: couldn't allocate entry to delete ndev\n"); - continue; - } - - list_add_tail(&entry->list, &upper_list); - dev_hold(upper); - entry->upper = upper; - } + netdev_walk_all_upper_dev_rcu(ndev, netdev_upper_walk, &upper_list); rcu_read_unlock(); handle_netdev(ib_dev, port, ndev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 5636fc3da6b8..cc059218c962 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -292,6 +292,25 @@ static struct net_device *ipoib_get_master_net_dev(struct net_device *dev) return dev; } +struct ipoib_walk_data { + const struct sockaddr *addr; + struct net_device *result; +}; + +static int ipoib_upper_walk(struct net_device *upper, void *_data) +{ + struct ipoib_walk_data *data = _data; + int ret = 0; + + if (ipoib_is_dev_match_addr_rcu(data->addr, upper)) { + dev_hold(upper); + data->result = upper; + ret = 1; + } + + return ret; +} + /** * Find a net_device matching the given address, which is an upper device of * the given net_device. @@ -304,27 +323,21 @@ static struct net_device *ipoib_get_master_net_dev(struct net_device *dev) static struct net_device *ipoib_get_net_dev_match_addr( const struct sockaddr *addr, struct net_device *dev) { - struct net_device *upper, - *result = NULL; - struct list_head *iter; + struct ipoib_walk_data data = { + .addr = addr, + }; rcu_read_lock(); if (ipoib_is_dev_match_addr_rcu(addr, dev)) { dev_hold(dev); - result = dev; + data.result = dev; goto out; } - netdev_for_each_all_upper_dev_rcu(dev, upper, iter) { - if (ipoib_is_dev_match_addr_rcu(addr, upper)) { - dev_hold(upper); - result = upper; - break; - } - } + netdev_walk_all_upper_dev_rcu(dev, ipoib_upper_walk, &data); out: rcu_read_unlock(); - return result; + return data.result; } /* returns the number of IPoIB netdevs on top a given ipoib device matching a diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 551f0f8dead3..c80b023092dd 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -950,13 +950,61 @@ static void alb_send_lp_vid(struct slave *slave, u8 mac_addr[], dev_queue_xmit(skb); } +struct alb_walk_data { + struct bonding *bond; + struct slave *slave; + u8 *mac_addr; + bool strict_match; +}; + +static int alb_upper_dev_walk(struct net_device *upper, void *_data) +{ + struct alb_walk_data *data = _data; + bool strict_match = data->strict_match; + struct bonding *bond = data->bond; + struct slave *slave = data->slave; + u8 *mac_addr = data->mac_addr; + struct bond_vlan_tag *tags; + + if (is_vlan_dev(upper) && vlan_get_encap_level(upper) == 0) { + if (strict_match && + ether_addr_equal_64bits(mac_addr, + upper->dev_addr)) { + alb_send_lp_vid(slave, mac_addr, + vlan_dev_vlan_proto(upper), + vlan_dev_vlan_id(upper)); + } else if (!strict_match) { + alb_send_lp_vid(slave, upper->dev_addr, + vlan_dev_vlan_proto(upper), + vlan_dev_vlan_id(upper)); + } + } + + /* If this is a macvlan device, then only send updates + * when strict_match is turned off. + */ + if (netif_is_macvlan(upper) && !strict_match) { + tags = bond_verify_device_path(bond->dev, upper, 0); + if (IS_ERR_OR_NULL(tags)) + BUG(); + alb_send_lp_vid(slave, upper->dev_addr, + tags[0].vlan_proto, tags[0].vlan_id); + kfree(tags); + } + + return 0; +} + static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[], bool strict_match) { struct bonding *bond = bond_get_bond_by_slave(slave); - struct net_device *upper; - struct list_head *iter; - struct bond_vlan_tag *tags; + struct alb_walk_data data = { + .strict_match = strict_match, + .mac_addr = mac_addr, + .slave = slave, + .bond = bond, + }; /* send untagged */ alb_send_lp_vid(slave, mac_addr, 0, 0); @@ -965,33 +1013,7 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[], * for that device. */ rcu_read_lock(); - netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) { - if (is_vlan_dev(upper) && vlan_get_encap_level(upper) == 0) { - if (strict_match && - ether_addr_equal_64bits(mac_addr, - upper->dev_addr)) { - alb_send_lp_vid(slave, mac_addr, - vlan_dev_vlan_proto(upper), - vlan_dev_vlan_id(upper)); - } else if (!strict_match) { - alb_send_lp_vid(slave, upper->dev_addr, - vlan_dev_vlan_proto(upper), - vlan_dev_vlan_id(upper)); - } - } - - /* If this is a macvlan device, then only send updates - * when strict_match is turned off. - */ - if (netif_is_macvlan(upper) && !strict_match) { - tags = bond_verify_device_path(bond->dev, upper, 0); - if (IS_ERR_OR_NULL(tags)) - BUG(); - alb_send_lp_vid(slave, upper->dev_addr, - tags[0].vlan_proto, tags[0].vlan_id); - kfree(tags); - } - } + netdev_walk_all_upper_dev_rcu(bond->dev, alb_upper_dev_walk, &data); rcu_read_unlock(); } diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 5fa36ebc0640..c9944d86d045 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2270,22 +2270,23 @@ re_arm: } } +static int bond_upper_dev_walk(struct net_device *upper, void *data) +{ + __be32 ip = *((__be32 *)data); + + return ip == bond_confirm_addr(upper, 0, ip); +} + static bool bond_has_this_ip(struct bonding *bond, __be32 ip) { - struct net_device *upper; - struct list_head *iter; bool ret = false; if (ip == bond_confirm_addr(bond->dev, 0, ip)) return true; rcu_read_lock(); - netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) { - if (ip == bond_confirm_addr(upper, 0, ip)) { - ret = true; - break; - } - } + if (netdev_walk_all_upper_dev_rcu(bond->dev, bond_upper_dev_walk, &ip)) + ret = true; rcu_read_unlock(); return ret; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 7dfde209b0e0..cbd2cfa1b154 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -5012,24 +5012,23 @@ fwd_queue_err: return err; } -static void ixgbe_configure_dfwd(struct ixgbe_adapter *adapter) +static int ixgbe_upper_dev_walk(struct net_device *upper, void *data) { - struct net_device *upper; - struct list_head *iter; - int err; - - netdev_for_each_all_upper_dev_rcu(adapter->netdev, upper, iter) { - if (netif_is_macvlan(upper)) { - struct macvlan_dev *dfwd = netdev_priv(upper); - struct ixgbe_fwd_adapter *vadapter = dfwd->fwd_priv; + if (netif_is_macvlan(upper)) { + struct macvlan_dev *dfwd = netdev_priv(upper); + struct ixgbe_fwd_adapter *vadapter = dfwd->fwd_priv; - if (dfwd->fwd_priv) { - err = ixgbe_fwd_ring_up(upper, vadapter); - if (err) - continue; - } - } + if (dfwd->fwd_priv) + ixgbe_fwd_ring_up(upper, vadapter); } + + return 0; +} + +static void ixgbe_configure_dfwd(struct ixgbe_adapter *adapter) +{ + netdev_walk_all_upper_dev_rcu(adapter->netdev, + ixgbe_upper_dev_walk, NULL); } static void ixgbe_configure(struct ixgbe_adapter *adapter) @@ -5448,12 +5447,25 @@ static void ixgbe_fdir_filter_exit(struct ixgbe_adapter *adapter) spin_unlock(&adapter->fdir_perfect_lock); } +static int ixgbe_disable_macvlan(struct net_device *upper, void *data) +{ + if (netif_is_macvlan(upper)) { + struct macvlan_dev *vlan = netdev_priv(upper); + + if (vlan->fwd_priv) { + netif_tx_stop_all_queues(upper); + netif_carrier_off(upper); + netif_tx_disable(upper); + } + } + + return 0; +} + void ixgbe_down(struct ixgbe_adapter *adapter) { struct net_device *netdev = adapter->netdev; struct ixgbe_hw *hw = &adapter->hw; - struct net_device *upper; - struct list_head *iter; int i; /* signal that we are down to the interrupt handler */ @@ -5477,17 +5489,8 @@ void ixgbe_down(struct ixgbe_adapter *adapter) netif_tx_disable(netdev); /* disable any upper devices */ - netdev_for_each_all_upper_dev_rcu(adapter->netdev, upper, iter) { - if (netif_is_macvlan(upper)) { - struct macvlan_dev *vlan = netdev_priv(upper); - - if (vlan->fwd_priv) { - netif_tx_stop_all_queues(upper); - netif_carrier_off(upper); - netif_tx_disable(upper); - } - } - } + netdev_walk_all_upper_dev_rcu(adapter->netdev, + ixgbe_disable_macvlan, NULL); ixgbe_irq_disable(adapter); @@ -6723,6 +6726,18 @@ static void ixgbe_update_default_up(struct ixgbe_adapter *adapter) #endif } +static int ixgbe_enable_macvlan(struct net_device *upper, void *data) +{ + if (netif_is_macvlan(upper)) { + struct macvlan_dev *vlan = netdev_priv(upper); + + if (vlan->fwd_priv) + netif_tx_wake_all_queues(upper); + } + + return 0; +} + /** * ixgbe_watchdog_link_is_up - update netif_carrier status and * print link up message @@ -6732,8 +6747,6 @@ static void ixgbe_watchdog_link_is_up(struct ixgbe_adapter *adapter) { struct net_device *netdev = adapter->netdev; struct ixgbe_hw *hw = &adapter->hw; - struct net_device *upper; - struct list_head *iter; u32 link_speed = adapter->link_speed; const char *speed_str; bool flow_rx, flow_tx; @@ -6804,14 +6817,8 @@ static void ixgbe_watchdog_link_is_up(struct ixgbe_adapter *adapter) /* enable any upper devices */ rtnl_lock(); - netdev_for_each_all_upper_dev_rcu(adapter->netdev, upper, iter) { - if (netif_is_macvlan(upper)) { - struct macvlan_dev *vlan = netdev_priv(upper); - - if (vlan->fwd_priv) - netif_tx_wake_all_queues(upper); - } - } + netdev_walk_all_upper_dev_rcu(adapter->netdev, + ixgbe_enable_macvlan, NULL); rtnl_unlock(); /* update the default user priority for VFs */ @@ -8345,12 +8352,38 @@ static int ixgbe_configure_clsu32_del_hnode(struct ixgbe_adapter *adapter, } #ifdef CONFIG_NET_CLS_ACT +struct upper_walk_data { + struct ixgbe_adapter *adapter; + u64 action; + int ifindex; + u8 queue; +}; + +static int get_macvlan_queue(struct net_device *upper, void *_data) +{ + if (netif_is_macvlan(upper)) { + struct macvlan_dev *dfwd = netdev_priv(upper); + struct ixgbe_fwd_adapter *vadapter = dfwd->fwd_priv; + struct upper_walk_data *data = _data; + struct ixgbe_adapter *adapter = data->adapter; + int ifindex = data->ifindex; + + if (vadapter && vadapter->netdev->ifindex == ifindex) { + data->queue = adapter->rx_ring[vadapter->rx_base_queue]->reg_idx; + data->action = data->queue; + return 1; + } + } + + return 0; +} + static int handle_redirect_action(struct ixgbe_adapter *adapter, int ifindex, u8 *queue, u64 *action) { unsigned int num_vfs = adapter->num_vfs, vf; + struct upper_walk_data data; struct net_device *upper; - struct list_head *iter; /* redirect to a SRIOV VF */ for (vf = 0; vf < num_vfs; ++vf) { @@ -8368,17 +8401,16 @@ static int handle_redirect_action(struct ixgbe_adapter *adapter, int ifindex, } /* redirect to a offloaded macvlan netdev */ - netdev_for_each_all_upper_dev_rcu(adapter->netdev, upper, iter) { - if (netif_is_macvlan(upper)) { - struct macvlan_dev *dfwd = netdev_priv(upper); - struct ixgbe_fwd_adapter *vadapter = dfwd->fwd_priv; - - if (vadapter && vadapter->netdev->ifindex == ifindex) { - *queue = adapter->rx_ring[vadapter->rx_base_queue]->reg_idx; - *action = *queue; - return 0; - } - } + data.adapter = adapter; + data.ifindex = ifindex; + data.action = 0; + data.queue = 0; + if (netdev_walk_all_upper_dev_rcu(adapter->netdev, + get_macvlan_queue, &data)) { + *action = data.action; + *queue = data.queue; + + return 0; } return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 43a5eddc2c11..99805fd3d110 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3092,19 +3092,30 @@ static bool mlxsw_sp_port_dev_check(const struct net_device *dev) return dev->netdev_ops == &mlxsw_sp_port_netdev_ops; } +static int mlxsw_lower_dev_walk(struct net_device *lower_dev, void *data) +{ + struct mlxsw_sp_port **port = data; + int ret = 0; + + if (mlxsw_sp_port_dev_check(lower_dev)) { + *port = netdev_priv(lower_dev); + ret = 1; + } + + return ret; +} + static struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev) { - struct net_device *lower_dev; - struct list_head *iter; + struct mlxsw_sp_port *port; if (mlxsw_sp_port_dev_check(dev)) return netdev_priv(dev); - netdev_for_each_all_lower_dev(dev, lower_dev, iter) { - if (mlxsw_sp_port_dev_check(lower_dev)) - return netdev_priv(lower_dev); - } - return NULL; + port = NULL; + netdev_walk_all_lower_dev(dev, mlxsw_lower_dev_walk, &port); + + return port; } static struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev) @@ -3117,17 +3128,15 @@ static struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev) static struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev) { - struct net_device *lower_dev; - struct list_head *iter; + struct mlxsw_sp_port *port; if (mlxsw_sp_port_dev_check(dev)) return netdev_priv(dev); - netdev_for_each_all_lower_dev_rcu(dev, lower_dev, iter) { - if (mlxsw_sp_port_dev_check(lower_dev)) - return netdev_priv(lower_dev); - } - return NULL; + port = NULL; + netdev_walk_all_lower_dev_rcu(dev, mlxsw_lower_dev_walk, &port); + + return port; } struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev) diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 9b64f369076d..2e81b702a927 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -2839,20 +2839,37 @@ static bool rocker_port_dev_check_under(const struct net_device *dev, return true; } +struct rocker_walk_data { + struct rocker *rocker; + struct rocker_port *port; +}; + +static int rocker_lower_dev_walk(struct net_device *lower_dev, void *_data) +{ + struct rocker_walk_data *data = _data; + int ret = 0; + + if (rocker_port_dev_check_under(lower_dev, data->rocker)) { + data->port = netdev_priv(lower_dev); + ret = 1; + } + + return ret; +} + struct rocker_port *rocker_port_dev_lower_find(struct net_device *dev, struct rocker *rocker) { - struct net_device *lower_dev; - struct list_head *iter; + struct rocker_walk_data data; if (rocker_port_dev_check_under(dev, rocker)) return netdev_priv(dev); - netdev_for_each_all_lower_dev(dev, lower_dev, iter) { - if (rocker_port_dev_check_under(lower_dev, rocker)) - return netdev_priv(lower_dev); - } - return NULL; + data.rocker = rocker; + data.port = NULL; + netdev_walk_all_lower_dev(dev, rocker_lower_dev_walk, &data); + + return data.port; } static int rocker_netdevice_event(struct notifier_block *unused, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bf341b65ca5e..458c87631e7f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1456,7 +1456,6 @@ enum netdev_priv_flags { * @ptype_specific: Device-specific, protocol-specific packet handlers * * @adj_list: Directly linked devices, like slaves for bonding - * @all_adj_list: All linked devices, *including* neighbours * @features: Currently active device features * @hw_features: User-changeable features * @@ -1675,11 +1674,6 @@ struct net_device { struct list_head lower; } adj_list; - struct { - struct list_head upper; - struct list_head lower; - } all_adj_list; - netdev_features_t features; netdev_features_t hw_features; netdev_features_t wanted_features; @@ -3771,12 +3765,13 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, updev; \ updev = netdev_upper_get_next_dev_rcu(dev, &(iter))) -/* iterate through upper list, must be called under RCU read lock */ -#define netdev_for_each_all_upper_dev_rcu(dev, updev, iter) \ - for (iter = &(dev)->all_adj_list.upper, \ - updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter)); \ - updev; \ - updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter))) +int netdev_walk_all_upper_dev_rcu(struct net_device *dev, + int (*fn)(struct net_device *upper_dev, + void *data), + void *data); + +bool netdev_has_upper_dev_all_rcu(struct net_device *dev, + struct net_device *upper_dev); void *netdev_lower_get_next_private(struct net_device *dev, struct list_head **iter); @@ -3809,17 +3804,14 @@ struct net_device *netdev_all_lower_get_next(struct net_device *dev, struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, struct list_head **iter); -#define netdev_for_each_all_lower_dev(dev, ldev, iter) \ - for (iter = (dev)->all_adj_list.lower.next, \ - ldev = netdev_all_lower_get_next(dev, &(iter)); \ - ldev; \ - ldev = netdev_all_lower_get_next(dev, &(iter))) - -#define netdev_for_each_all_lower_dev_rcu(dev, ldev, iter) \ - for (iter = (dev)->all_adj_list.lower.next, \ - ldev = netdev_all_lower_get_next_rcu(dev, &(iter)); \ - ldev; \ - ldev = netdev_all_lower_get_next_rcu(dev, &(iter))) +int netdev_walk_all_lower_dev(struct net_device *dev, + int (*fn)(struct net_device *lower_dev, + void *data), + void *data); +int netdev_walk_all_lower_dev_rcu(struct net_device *dev, + int (*fn)(struct net_device *lower_dev, + void *data), + void *data); void *netdev_adjacent_get_private(struct list_head *adj_list); void *netdev_lower_get_first_private_rcu(struct net_device *dev); diff --git a/net/core/dev.c b/net/core/dev.c index 352e98129601..f55fb4536016 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5137,6 +5137,13 @@ static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev, return NULL; } +static int __netdev_has_upper_dev(struct net_device *upper_dev, void *data) +{ + struct net_device *dev = data; + + return upper_dev == dev; +} + /** * netdev_has_upper_dev - Check if device is linked to an upper device * @dev: device @@ -5151,11 +5158,30 @@ bool netdev_has_upper_dev(struct net_device *dev, { ASSERT_RTNL(); - return __netdev_find_adj(upper_dev, &dev->all_adj_list.upper); + return netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev, + upper_dev); } EXPORT_SYMBOL(netdev_has_upper_dev); /** + * netdev_has_upper_dev_all - Check if device is linked to an upper device + * @dev: device + * @upper_dev: upper device to check + * + * Find out if a device is linked to specified upper device and return true + * in case it is. Note that this checks the entire upper device chain. + * The caller must hold rcu lock. + */ + +bool netdev_has_upper_dev_all_rcu(struct net_device *dev, + struct net_device *upper_dev) +{ + return !!netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev, + upper_dev); +} +EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu); + +/** * netdev_has_any_upper_dev - Check if device is linked to some device * @dev: device * @@ -5166,7 +5192,7 @@ static bool netdev_has_any_upper_dev(struct net_device *dev) { ASSERT_RTNL(); - return !list_empty(&dev->all_adj_list.upper); + return !list_empty(&dev->adj_list.upper); } /** @@ -5193,6 +5219,20 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev) } EXPORT_SYMBOL(netdev_master_upper_dev_get); +/** + * netdev_has_any_lower_dev - Check if device is linked to some device + * @dev: device + * + * Find out if a device is linked to a lower device and return true in case + * it is. The caller must hold the RTNL lock. + */ +static bool netdev_has_any_lower_dev(struct net_device *dev) +{ + ASSERT_RTNL(); + + return !list_empty(&dev->adj_list.lower); +} + void *netdev_adjacent_get_private(struct list_head *adj_list) { struct netdev_adjacent *adj; @@ -5229,16 +5269,8 @@ struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, } EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu); -/** - * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list - * @dev: device - * @iter: list_head ** of the current position - * - * Gets the next device from the dev's upper list, starting from iter - * position. The caller must hold RCU read lock. - */ -struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, - struct list_head **iter) +static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev, + struct list_head **iter) { struct netdev_adjacent *upper; @@ -5246,14 +5278,41 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); - if (&upper->list == &dev->all_adj_list.upper) + if (&upper->list == &dev->adj_list.upper) return NULL; *iter = &upper->list; return upper->dev; } -EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu); + +int netdev_walk_all_upper_dev_rcu(struct net_device *dev, + int (*fn)(struct net_device *dev, + void *data), + void *data) +{ + struct net_device *udev; + struct list_head *iter; + int ret; + + for (iter = &dev->adj_list.upper, + udev = netdev_next_upper_dev_rcu(dev, &iter); + udev; + udev = netdev_next_upper_dev_rcu(dev, &iter)) { + /* first is the upper device itself */ + ret = fn(udev, data); + if (ret) + return ret; + + /* then look at all of its upper devices */ + ret = netdev_walk_all_upper_dev_rcu(udev, fn, data); + if (ret) + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(netdev_walk_all_upper_dev_rcu); /** * netdev_lower_get_next_private - Get the next ->private from the @@ -5336,51 +5395,90 @@ void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter) } EXPORT_SYMBOL(netdev_lower_get_next); -/** - * netdev_all_lower_get_next - Get the next device from all lower neighbour list - * @dev: device - * @iter: list_head ** of the current position - * - * Gets the next netdev_adjacent from the dev's all lower neighbour - * list, starting from iter position. The caller must hold RTNL lock or - * its own locking that guarantees that the neighbour all lower - * list will remain unchanged. - */ -struct net_device *netdev_all_lower_get_next(struct net_device *dev, struct list_head **iter) +static struct net_device *netdev_next_lower_dev(struct net_device *dev, + struct list_head **iter) { struct netdev_adjacent *lower; lower = list_entry(*iter, struct netdev_adjacent, list); - if (&lower->list == &dev->all_adj_list.lower) + if (&lower->list == &dev->adj_list.lower) return NULL; *iter = lower->list.next; return lower->dev; } -EXPORT_SYMBOL(netdev_all_lower_get_next); -/** - * netdev_all_lower_get_next_rcu - Get the next device from all - * lower neighbour list, RCU variant - * @dev: device - * @iter: list_head ** of the current position - * - * Gets the next netdev_adjacent from the dev's all lower neighbour - * list, starting from iter position. The caller must hold RCU read lock. - */ -struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, - struct list_head **iter) +int netdev_walk_all_lower_dev(struct net_device *dev, + int (*fn)(struct net_device *dev, + void *data), + void *data) +{ + struct net_device *ldev; + struct list_head *iter; + int ret; + + for (iter = &dev->adj_list.lower, + ldev = netdev_next_lower_dev(dev, &iter); + ldev; + ldev = netdev_next_lower_dev(dev, &iter)) { + /* first is the lower device itself */ + ret = fn(ldev, data); + if (ret) + return ret; + + /* then look at all of its lower devices */ + ret = netdev_walk_all_lower_dev(ldev, fn, data); + if (ret) + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev); + +static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, + struct list_head **iter) { struct netdev_adjacent *lower; - lower = list_first_or_null_rcu(&dev->all_adj_list.lower, - struct netdev_adjacent, list); + lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); + if (&lower->list == &dev->adj_list.lower) + return NULL; + + *iter = &lower->list; + + return lower->dev; +} + +int netdev_walk_all_lower_dev_rcu(struct net_device *dev, + int (*fn)(struct net_device *dev, + void *data), + void *data) +{ + struct net_device *ldev; + struct list_head *iter; + int ret; + + for (iter = &dev->adj_list.lower, + ldev = netdev_next_lower_dev_rcu(dev, &iter); + ldev; + ldev = netdev_next_lower_dev_rcu(dev, &iter)) { + /* first is the lower device itself */ + ret = fn(ldev, data); + if (ret) + return ret; + + /* then look at all of its lower devices */ + ret = netdev_walk_all_lower_dev_rcu(ldev, fn, data); + if (ret) + return ret; + } - return lower ? lower->dev : NULL; + return 0; } -EXPORT_SYMBOL(netdev_all_lower_get_next_rcu); +EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev_rcu); /** * netdev_lower_get_first_private_rcu - Get the first ->private from the @@ -5453,7 +5551,6 @@ static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev, static int __netdev_adjacent_dev_insert(struct net_device *dev, struct net_device *adj_dev, - u16 ref_nr, struct list_head *dev_list, void *private, bool master) { @@ -5463,7 +5560,10 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj = __netdev_find_adj(adj_dev, dev_list); if (adj) { - adj->ref_nr += ref_nr; + adj->ref_nr += 1; + pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d\n", + dev->name, adj_dev->name, adj->ref_nr); + return 0; } @@ -5473,12 +5573,12 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj->dev = adj_dev; adj->master = master; - adj->ref_nr = ref_nr; + adj->ref_nr = 1; adj->private = private; dev_hold(adj_dev); - pr_debug("dev_hold for %s, because of link added from %s to %s\n", - adj_dev->name, dev->name, adj_dev->name); + pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on %s\n", + dev->name, adj_dev->name, adj->ref_nr, adj_dev->name); if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) { ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list); @@ -5517,17 +5617,22 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, { struct netdev_adjacent *adj; + pr_debug("Remove adjacency: dev %s adj_dev %s ref_nr %d\n", + dev->name, adj_dev->name, ref_nr); + adj = __netdev_find_adj(adj_dev, dev_list); if (!adj) { - pr_err("tried to remove device %s from %s\n", + pr_err("Adjacency does not exist for device %s from %s\n", dev->name, adj_dev->name); - BUG(); + WARN_ON(1); + return; } if (adj->ref_nr > ref_nr) { - pr_debug("%s to %s ref_nr-%d = %d\n", dev->name, adj_dev->name, - ref_nr, adj->ref_nr-ref_nr); + pr_debug("adjacency: %s to %s ref_nr - %d = %d\n", + dev->name, adj_dev->name, ref_nr, + adj->ref_nr - ref_nr); adj->ref_nr -= ref_nr; return; } @@ -5539,7 +5644,7 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list); list_del_rcu(&adj->list); - pr_debug("dev_put for %s, because link removed from %s to %s\n", + pr_debug("adjacency: dev_put for %s, because link removed from %s to %s\n", adj_dev->name, dev->name, adj_dev->name); dev_put(adj_dev); kfree_rcu(adj, rcu); @@ -5547,38 +5652,27 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, static int __netdev_adjacent_dev_link_lists(struct net_device *dev, struct net_device *upper_dev, - u16 ref_nr, struct list_head *up_list, struct list_head *down_list, void *private, bool master) { int ret; - ret = __netdev_adjacent_dev_insert(dev, upper_dev, ref_nr, up_list, + ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private, master); if (ret) return ret; - ret = __netdev_adjacent_dev_insert(upper_dev, dev, ref_nr, down_list, + ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private, false); if (ret) { - __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list); + __netdev_adjacent_dev_remove(dev, upper_dev, 1, up_list); return ret; } return 0; } -static int __netdev_adjacent_dev_link(struct net_device *dev, - struct net_device *upper_dev, - u16 ref_nr) -{ - return __netdev_adjacent_dev_link_lists(dev, upper_dev, ref_nr, - &dev->all_adj_list.upper, - &upper_dev->all_adj_list.lower, - NULL, false); -} - static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, struct net_device *upper_dev, u16 ref_nr, @@ -5589,40 +5683,19 @@ static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, __netdev_adjacent_dev_remove(upper_dev, dev, ref_nr, down_list); } -static void __netdev_adjacent_dev_unlink(struct net_device *dev, - struct net_device *upper_dev, - u16 ref_nr) -{ - __netdev_adjacent_dev_unlink_lists(dev, upper_dev, ref_nr, - &dev->all_adj_list.upper, - &upper_dev->all_adj_list.lower); -} - static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, struct net_device *upper_dev, void *private, bool master) { - int ret = __netdev_adjacent_dev_link(dev, upper_dev, 1); - - if (ret) - return ret; - - ret = __netdev_adjacent_dev_link_lists(dev, upper_dev, 1, - &dev->adj_list.upper, - &upper_dev->adj_list.lower, - private, master); - if (ret) { - __netdev_adjacent_dev_unlink(dev, upper_dev, 1); - return ret; - } - - return 0; + return __netdev_adjacent_dev_link_lists(dev, upper_dev, + &dev->adj_list.upper, + &upper_dev->adj_list.lower, + private, master); } static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, struct net_device *upper_dev) { - __netdev_adjacent_dev_unlink(dev, upper_dev, 1); __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 1, &dev->adj_list.upper, &upper_dev->adj_list.lower); @@ -5633,7 +5706,6 @@ static int __netdev_upper_dev_link(struct net_device *dev, void *upper_priv, void *upper_info) { struct netdev_notifier_changeupper_info changeupper_info; - struct netdev_adjacent *i, *j, *to_i, *to_j; int ret = 0; ASSERT_RTNL(); @@ -5642,10 +5714,10 @@ static int __netdev_upper_dev_link(struct net_device *dev, return -EBUSY; /* To prevent loops, check if dev is not upper device to upper_dev. */ - if (__netdev_find_adj(dev, &upper_dev->all_adj_list.upper)) + if (netdev_has_upper_dev(upper_dev, dev)) return -EBUSY; - if (__netdev_find_adj(upper_dev, &dev->adj_list.upper)) + if (netdev_has_upper_dev(dev, upper_dev)) return -EEXIST; if (master && netdev_master_upper_dev_get(dev)) @@ -5667,80 +5739,15 @@ static int __netdev_upper_dev_link(struct net_device *dev, if (ret) return ret; - /* Now that we linked these devs, make all the upper_dev's - * all_adj_list.upper visible to every dev's all_adj_list.lower an - * versa, and don't forget the devices itself. All of these - * links are non-neighbours. - */ - list_for_each_entry(i, &dev->all_adj_list.lower, list) { - list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { - pr_debug("Interlinking %s with %s, non-neighbour\n", - i->dev->name, j->dev->name); - ret = __netdev_adjacent_dev_link(i->dev, j->dev, i->ref_nr); - if (ret) - goto rollback_mesh; - } - } - - /* add dev to every upper_dev's upper device */ - list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { - pr_debug("linking %s's upper device %s with %s\n", - upper_dev->name, i->dev->name, dev->name); - ret = __netdev_adjacent_dev_link(dev, i->dev, i->ref_nr); - if (ret) - goto rollback_upper_mesh; - } - - /* add upper_dev to every dev's lower device */ - list_for_each_entry(i, &dev->all_adj_list.lower, list) { - pr_debug("linking %s's lower device %s with %s\n", dev->name, - i->dev->name, upper_dev->name); - ret = __netdev_adjacent_dev_link(i->dev, upper_dev, i->ref_nr); - if (ret) - goto rollback_lower_mesh; - } - ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, &changeupper_info.info); ret = notifier_to_errno(ret); if (ret) - goto rollback_lower_mesh; + goto rollback; return 0; -rollback_lower_mesh: - to_i = i; - list_for_each_entry(i, &dev->all_adj_list.lower, list) { - if (i == to_i) - break; - __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr); - } - - i = NULL; - -rollback_upper_mesh: - to_i = i; - list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { - if (i == to_i) - break; - __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr); - } - - i = j = NULL; - -rollback_mesh: - to_i = i; - to_j = j; - list_for_each_entry(i, &dev->all_adj_list.lower, list) { - list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { - if (i == to_i && j == to_j) - break; - __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr); - } - if (i == to_i) - break; - } - +rollback: __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); return ret; @@ -5797,7 +5804,6 @@ void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev) { struct netdev_notifier_changeupper_info changeupper_info; - struct netdev_adjacent *i, *j; ASSERT_RTNL(); changeupper_info.upper_dev = upper_dev; @@ -5809,23 +5815,6 @@ void netdev_upper_dev_unlink(struct net_device *dev, __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); - /* Here is the tricky part. We must remove all dev's lower - * devices from all upper_dev's upper devices and vice - * versa, to maintain the graph relationship. - */ - list_for_each_entry(i, &dev->all_adj_list.lower, list) - list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) - __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr); - - /* remove also the devices itself from lower/upper device - * list - */ - list_for_each_entry(i, &dev->all_adj_list.lower, list) - __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr); - - list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) - __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr); - call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, &changeupper_info.info); } @@ -6649,6 +6638,7 @@ static void rollback_registered_many(struct list_head *head) /* Notifier chain MUST detach us all upper devices. */ WARN_ON(netdev_has_any_upper_dev(dev)); + WARN_ON(netdev_has_any_lower_dev(dev)); /* Remove entries from kobject tree */ netdev_unregister_kobject(dev); @@ -7527,8 +7517,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, INIT_LIST_HEAD(&dev->link_watch_list); INIT_LIST_HEAD(&dev->adj_list.upper); INIT_LIST_HEAD(&dev->adj_list.lower); - INIT_LIST_HEAD(&dev->all_adj_list.upper); - INIT_LIST_HEAD(&dev->all_adj_list.lower); INIT_LIST_HEAD(&dev->ptype_all); INIT_LIST_HEAD(&dev->ptype_specific); #ifdef CONFIG_NET_SCHED |