Skip to content

Commit 5bb61cb

Browse files
committed
Merge branch 'netdev-adjacency'
David Ahern says: ==================== net: Fix netdev adjacency tracking The netdev adjacency tracking is failing to create proper dependencies for some topologies. For example this topology +--------+ | myvrf | +--------+ | | | +---------+ | | macvlan | | +---------+ | | +----------+ | bridge | +----------+ | +--------+ | bond1 | +--------+ | +--------+ | eth3 | +--------+ hits 1 of 2 problems depending on the order of enslavement. The base set of commands for both cases: ip link add bond1 type bond ip link set bond1 up ip link set eth3 down ip link set eth3 master bond1 ip link set eth3 up ip link add bridge type bridge ip link set bridge up ip link add macvlan link bridge type macvlan ip link set macvlan up ip link add myvrf type vrf table 1234 ip link set myvrf up ip link set bridge master myvrf Case 1 enslave macvlan to the vrf before enslaving the bond to the bridge: ip link set macvlan master myvrf ip link set bond1 master bridge Attempts to delete the VRF: ip link delete myvrf trigger the BUG in __netdev_adjacent_dev_remove: [ 587.405260] tried to remove device eth3 from myvrf [ 587.407269] ------------[ cut here ]------------ [ 587.408918] kernel BUG at /home/dsa/kernel.git/net/core/dev.c:5661! [ 587.411113] invalid opcode: 0000 [#1] SMP [ 587.412454] Modules linked in: macvlan bridge stp llc bonding vrf [ 587.414765] CPU: 0 PID: 726 Comm: ip Not tainted 4.8.0+ #109 [ 587.416766] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014 [ 587.420241] task: ffff88013ab6eec0 task.stack: ffffc90000628000 [ 587.422163] RIP: 0010:[<ffffffff813cef03>] [<ffffffff813cef03>] __netdev_adjacent_dev_remove+0x40/0x12c ... [ 587.446053] Call Trace: [ 587.446424] [<ffffffff813d1542>] __netdev_adjacent_dev_unlink+0x20/0x3c [ 587.447390] [<ffffffff813d16a3>] netdev_upper_dev_unlink+0xfa/0x15e [ 587.448297] [<ffffffffa00003a3>] vrf_del_slave+0x13/0x2a [vrf] [ 587.449153] [<ffffffffa00004a4>] vrf_dev_uninit+0xea/0x114 [vrf] [ 587.450036] [<ffffffff813d19b0>] rollback_registered_many+0x22b/0x2da [ 587.450974] [<ffffffff813d1aac>] unregister_netdevice_many+0x17/0x48 [ 587.451903] [<ffffffff813de444>] rtnl_delete_link+0x3c/0x43 [ 587.452719] [<ffffffff813dedcd>] rtnl_dellink+0x180/0x194 When the BUG is converted to a WARN_ON it shows 4 missing adjacencies: eth3 - myvrf, mvrf - eth3, bond1 - myvrf and myvrf - bond1 All of those are because the __netdev_upper_dev_link function does not properly link macvlan lower devices to myvrf when it is enslaved. The second case just flips the ordering of the enslavements: ip link set bond1 master bridge ip link set macvlan master myvrf Then run: ip link delete bond1 ip link delete myvrf The vrf delete command hangs because myvrf has a reference that has not been released. In this case the removal code does not account for 2 paths between eth3 and myvrf - one from bridge to vrf and the other through the macvlan. Rather than try to maintain a linked list of all upper and lower devices per netdevice, only track the direct neighbors. The remaining stack can be determined by recursively walking the neighbors. The existing netdev_for_each_all_upper_dev_rcu, netdev_for_each_all_lower_dev and netdev_for_each_all_lower_dev_rcu macros are replaced with APIs that walk the upper and lower device lists. The new APIs take a callback function and a data arg that is passed to the callback for each device in the list. Drivers using the old macros are converted in separate patches to make it easier on reviewers. It is an API conversion only; no functional change is intended. v3 - address Stephen's comment to simplify logic and remove typecasts v2 - fixed bond0 references in cover-letter - fixed definition of netdev_next_lower_dev_rcu to mirror the upper_dev version. ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 5921a0f + 67b62f9 commit 5bb61cb

File tree

10 files changed

+423
-352
lines changed

10 files changed

+423
-352
lines changed

drivers/infiniband/core/core_priv.h

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -127,14 +127,7 @@ void ib_cache_release_one(struct ib_device *device);
127127
static inline bool rdma_is_upper_dev_rcu(struct net_device *dev,
128128
struct net_device *upper)
129129
{
130-
struct net_device *_upper = NULL;
131-
struct list_head *iter;
132-
133-
netdev_for_each_all_upper_dev_rcu(dev, _upper, iter)
134-
if (_upper == upper)
135-
break;
136-
137-
return _upper == upper;
130+
return netdev_has_upper_dev_all_rcu(dev, upper);
138131
}
139132

140133
int addr_init(void);

drivers/infiniband/core/roce_gid_mgmt.c

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -437,37 +437,41 @@ static void callback_for_addr_gid_device_scan(struct ib_device *device,
437437
&parsed->gid_attr);
438438
}
439439

440+
struct upper_list {
441+
struct list_head list;
442+
struct net_device *upper;
443+
};
444+
445+
static int netdev_upper_walk(struct net_device *upper, void *data)
446+
{
447+
struct upper_list *entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
448+
struct list_head *upper_list = data;
449+
450+
if (!entry) {
451+
pr_info("roce_gid_mgmt: couldn't allocate entry to delete ndev\n");
452+
return 0;
453+
}
454+
455+
list_add_tail(&entry->list, upper_list);
456+
dev_hold(upper);
457+
entry->upper = upper;
458+
459+
return 0;
460+
}
461+
440462
static void handle_netdev_upper(struct ib_device *ib_dev, u8 port,
441463
void *cookie,
442464
void (*handle_netdev)(struct ib_device *ib_dev,
443465
u8 port,
444466
struct net_device *ndev))
445467
{
446468
struct net_device *ndev = (struct net_device *)cookie;
447-
struct upper_list {
448-
struct list_head list;
449-
struct net_device *upper;
450-
};
451-
struct net_device *upper;
452-
struct list_head *iter;
453469
struct upper_list *upper_iter;
454470
struct upper_list *upper_temp;
455471
LIST_HEAD(upper_list);
456472

457473
rcu_read_lock();
458-
netdev_for_each_all_upper_dev_rcu(ndev, upper, iter) {
459-
struct upper_list *entry = kmalloc(sizeof(*entry),
460-
GFP_ATOMIC);
461-
462-
if (!entry) {
463-
pr_info("roce_gid_mgmt: couldn't allocate entry to delete ndev\n");
464-
continue;
465-
}
466-
467-
list_add_tail(&entry->list, &upper_list);
468-
dev_hold(upper);
469-
entry->upper = upper;
470-
}
474+
netdev_walk_all_upper_dev_rcu(ndev, netdev_upper_walk, &upper_list);
471475
rcu_read_unlock();
472476

473477
handle_netdev(ib_dev, port, ndev);

drivers/infiniband/ulp/ipoib/ipoib_main.c

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,25 @@ static struct net_device *ipoib_get_master_net_dev(struct net_device *dev)
292292
return dev;
293293
}
294294

295+
struct ipoib_walk_data {
296+
const struct sockaddr *addr;
297+
struct net_device *result;
298+
};
299+
300+
static int ipoib_upper_walk(struct net_device *upper, void *_data)
301+
{
302+
struct ipoib_walk_data *data = _data;
303+
int ret = 0;
304+
305+
if (ipoib_is_dev_match_addr_rcu(data->addr, upper)) {
306+
dev_hold(upper);
307+
data->result = upper;
308+
ret = 1;
309+
}
310+
311+
return ret;
312+
}
313+
295314
/**
296315
* Find a net_device matching the given address, which is an upper device of
297316
* the given net_device.
@@ -304,27 +323,21 @@ static struct net_device *ipoib_get_master_net_dev(struct net_device *dev)
304323
static struct net_device *ipoib_get_net_dev_match_addr(
305324
const struct sockaddr *addr, struct net_device *dev)
306325
{
307-
struct net_device *upper,
308-
*result = NULL;
309-
struct list_head *iter;
326+
struct ipoib_walk_data data = {
327+
.addr = addr,
328+
};
310329

311330
rcu_read_lock();
312331
if (ipoib_is_dev_match_addr_rcu(addr, dev)) {
313332
dev_hold(dev);
314-
result = dev;
333+
data.result = dev;
315334
goto out;
316335
}
317336

318-
netdev_for_each_all_upper_dev_rcu(dev, upper, iter) {
319-
if (ipoib_is_dev_match_addr_rcu(addr, upper)) {
320-
dev_hold(upper);
321-
result = upper;
322-
break;
323-
}
324-
}
337+
netdev_walk_all_upper_dev_rcu(dev, ipoib_upper_walk, &data);
325338
out:
326339
rcu_read_unlock();
327-
return result;
340+
return data.result;
328341
}
329342

330343
/* returns the number of IPoIB netdevs on top a given ipoib device matching a

drivers/net/bonding/bond_alb.c

Lines changed: 52 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -950,13 +950,61 @@ static void alb_send_lp_vid(struct slave *slave, u8 mac_addr[],
950950
dev_queue_xmit(skb);
951951
}
952952

953+
struct alb_walk_data {
954+
struct bonding *bond;
955+
struct slave *slave;
956+
u8 *mac_addr;
957+
bool strict_match;
958+
};
959+
960+
static int alb_upper_dev_walk(struct net_device *upper, void *_data)
961+
{
962+
struct alb_walk_data *data = _data;
963+
bool strict_match = data->strict_match;
964+
struct bonding *bond = data->bond;
965+
struct slave *slave = data->slave;
966+
u8 *mac_addr = data->mac_addr;
967+
struct bond_vlan_tag *tags;
968+
969+
if (is_vlan_dev(upper) && vlan_get_encap_level(upper) == 0) {
970+
if (strict_match &&
971+
ether_addr_equal_64bits(mac_addr,
972+
upper->dev_addr)) {
973+
alb_send_lp_vid(slave, mac_addr,
974+
vlan_dev_vlan_proto(upper),
975+
vlan_dev_vlan_id(upper));
976+
} else if (!strict_match) {
977+
alb_send_lp_vid(slave, upper->dev_addr,
978+
vlan_dev_vlan_proto(upper),
979+
vlan_dev_vlan_id(upper));
980+
}
981+
}
982+
983+
/* If this is a macvlan device, then only send updates
984+
* when strict_match is turned off.
985+
*/
986+
if (netif_is_macvlan(upper) && !strict_match) {
987+
tags = bond_verify_device_path(bond->dev, upper, 0);
988+
if (IS_ERR_OR_NULL(tags))
989+
BUG();
990+
alb_send_lp_vid(slave, upper->dev_addr,
991+
tags[0].vlan_proto, tags[0].vlan_id);
992+
kfree(tags);
993+
}
994+
995+
return 0;
996+
}
997+
953998
static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[],
954999
bool strict_match)
9551000
{
9561001
struct bonding *bond = bond_get_bond_by_slave(slave);
957-
struct net_device *upper;
958-
struct list_head *iter;
959-
struct bond_vlan_tag *tags;
1002+
struct alb_walk_data data = {
1003+
.strict_match = strict_match,
1004+
.mac_addr = mac_addr,
1005+
.slave = slave,
1006+
.bond = bond,
1007+
};
9601008

9611009
/* send untagged */
9621010
alb_send_lp_vid(slave, mac_addr, 0, 0);
@@ -965,33 +1013,7 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[],
9651013
* for that device.
9661014
*/
9671015
rcu_read_lock();
968-
netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) {
969-
if (is_vlan_dev(upper) && vlan_get_encap_level(upper) == 0) {
970-
if (strict_match &&
971-
ether_addr_equal_64bits(mac_addr,
972-
upper->dev_addr)) {
973-
alb_send_lp_vid(slave, mac_addr,
974-
vlan_dev_vlan_proto(upper),
975-
vlan_dev_vlan_id(upper));
976-
} else if (!strict_match) {
977-
alb_send_lp_vid(slave, upper->dev_addr,
978-
vlan_dev_vlan_proto(upper),
979-
vlan_dev_vlan_id(upper));
980-
}
981-
}
982-
983-
/* If this is a macvlan device, then only send updates
984-
* when strict_match is turned off.
985-
*/
986-
if (netif_is_macvlan(upper) && !strict_match) {
987-
tags = bond_verify_device_path(bond->dev, upper, 0);
988-
if (IS_ERR_OR_NULL(tags))
989-
BUG();
990-
alb_send_lp_vid(slave, upper->dev_addr,
991-
tags[0].vlan_proto, tags[0].vlan_id);
992-
kfree(tags);
993-
}
994-
}
1016+
netdev_walk_all_upper_dev_rcu(bond->dev, alb_upper_dev_walk, &data);
9951017
rcu_read_unlock();
9961018
}
9971019

drivers/net/bonding/bond_main.c

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2270,22 +2270,23 @@ static void bond_mii_monitor(struct work_struct *work)
22702270
}
22712271
}
22722272

2273+
static int bond_upper_dev_walk(struct net_device *upper, void *data)
2274+
{
2275+
__be32 ip = *((__be32 *)data);
2276+
2277+
return ip == bond_confirm_addr(upper, 0, ip);
2278+
}
2279+
22732280
static bool bond_has_this_ip(struct bonding *bond, __be32 ip)
22742281
{
2275-
struct net_device *upper;
2276-
struct list_head *iter;
22772282
bool ret = false;
22782283

22792284
if (ip == bond_confirm_addr(bond->dev, 0, ip))
22802285
return true;
22812286

22822287
rcu_read_lock();
2283-
netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) {
2284-
if (ip == bond_confirm_addr(upper, 0, ip)) {
2285-
ret = true;
2286-
break;
2287-
}
2288-
}
2288+
if (netdev_walk_all_upper_dev_rcu(bond->dev, bond_upper_dev_walk, &ip))
2289+
ret = true;
22892290
rcu_read_unlock();
22902291

22912292
return ret;

0 commit comments

Comments
 (0)