Skip to content

Commit e5f80fc

Browse files
Eric Dumazetdavem330
authored andcommitted
ipv6: give an IPv6 dev to blackhole_netdev
IPv6 addrconf notifiers wants the loopback device to be the last device being dismantled at netns deletion. This caused many limitations and work arounds. Back in linux-5.3, Mahesh added a per host blackhole_netdev that can be used whenever we need to make sure objects no longer refer to a disappearing device. If we attach to blackhole_netdev an ip6_ptr (allocate an idev), then we can use this special device (which is never freed) in place of the loopback_dev (which can be freed). This will permit improvements in netdev_run_todo() and other parts of the stack where had steps to make sure loopback_dev was the last device to disappear. Signed-off-by: Eric Dumazet <[email protected]> Cc: Mahesh Bandewar <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 2d4feb2 commit e5f80fc

File tree

2 files changed

+40
-59
lines changed

2 files changed

+40
-59
lines changed

net/ipv6/addrconf.c

Lines changed: 32 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
372372

373373
ASSERT_RTNL();
374374

375-
if (dev->mtu < IPV6_MIN_MTU)
375+
if (dev->mtu < IPV6_MIN_MTU && dev != blackhole_netdev)
376376
return ERR_PTR(-EINVAL);
377377

378378
ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL);
@@ -400,21 +400,22 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
400400
/* We refer to the device */
401401
dev_hold_track(dev, &ndev->dev_tracker, GFP_KERNEL);
402402

403-
if (snmp6_alloc_dev(ndev) < 0) {
404-
netdev_dbg(dev, "%s: cannot allocate memory for statistics\n",
405-
__func__);
406-
neigh_parms_release(&nd_tbl, ndev->nd_parms);
407-
dev_put_track(dev, &ndev->dev_tracker);
408-
kfree(ndev);
409-
return ERR_PTR(err);
410-
}
403+
if (dev != blackhole_netdev) {
404+
if (snmp6_alloc_dev(ndev) < 0) {
405+
netdev_dbg(dev, "%s: cannot allocate memory for statistics\n",
406+
__func__);
407+
neigh_parms_release(&nd_tbl, ndev->nd_parms);
408+
dev_put_track(dev, &ndev->dev_tracker);
409+
kfree(ndev);
410+
return ERR_PTR(err);
411+
}
411412

412-
if (snmp6_register_dev(ndev) < 0) {
413-
netdev_dbg(dev, "%s: cannot create /proc/net/dev_snmp6/%s\n",
414-
__func__, dev->name);
415-
goto err_release;
413+
if (snmp6_register_dev(ndev) < 0) {
414+
netdev_dbg(dev, "%s: cannot create /proc/net/dev_snmp6/%s\n",
415+
__func__, dev->name);
416+
goto err_release;
417+
}
416418
}
417-
418419
/* One reference from device. */
419420
refcount_set(&ndev->refcnt, 1);
420421

@@ -445,25 +446,28 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
445446

446447
ipv6_mc_init_dev(ndev);
447448
ndev->tstamp = jiffies;
448-
err = addrconf_sysctl_register(ndev);
449-
if (err) {
450-
ipv6_mc_destroy_dev(ndev);
451-
snmp6_unregister_dev(ndev);
452-
goto err_release;
449+
if (dev != blackhole_netdev) {
450+
err = addrconf_sysctl_register(ndev);
451+
if (err) {
452+
ipv6_mc_destroy_dev(ndev);
453+
snmp6_unregister_dev(ndev);
454+
goto err_release;
455+
}
453456
}
454457
/* protected by rtnl_lock */
455458
rcu_assign_pointer(dev->ip6_ptr, ndev);
456459

457-
/* Join interface-local all-node multicast group */
458-
ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allnodes);
460+
if (dev != blackhole_netdev) {
461+
/* Join interface-local all-node multicast group */
462+
ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allnodes);
459463

460-
/* Join all-node multicast group */
461-
ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes);
462-
463-
/* Join all-router multicast group if forwarding is set */
464-
if (ndev->cnf.forwarding && (dev->flags & IFF_MULTICAST))
465-
ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
464+
/* Join all-node multicast group */
465+
ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes);
466466

467+
/* Join all-router multicast group if forwarding is set */
468+
if (ndev->cnf.forwarding && (dev->flags & IFF_MULTICAST))
469+
ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
470+
}
467471
return ndev;
468472

469473
err_release:
@@ -7233,26 +7237,8 @@ int __init addrconf_init(void)
72337237
goto out_nowq;
72347238
}
72357239

7236-
/* The addrconf netdev notifier requires that loopback_dev
7237-
* has it's ipv6 private information allocated and setup
7238-
* before it can bring up and give link-local addresses
7239-
* to other devices which are up.
7240-
*
7241-
* Unfortunately, loopback_dev is not necessarily the first
7242-
* entry in the global dev_base list of net devices. In fact,
7243-
* it is likely to be the very last entry on that list.
7244-
* So this causes the notifier registry below to try and
7245-
* give link-local addresses to all devices besides loopback_dev
7246-
* first, then loopback_dev, which cases all the non-loopback_dev
7247-
* devices to fail to get a link-local address.
7248-
*
7249-
* So, as a temporary fix, allocate the ipv6 structure for
7250-
* loopback_dev first by hand.
7251-
* Longer term, all of the dependencies ipv6 has upon the loopback
7252-
* device and it being up should be removed.
7253-
*/
72547240
rtnl_lock();
7255-
idev = ipv6_add_dev(init_net.loopback_dev);
7241+
idev = ipv6_add_dev(blackhole_netdev);
72567242
rtnl_unlock();
72577243
if (IS_ERR(idev)) {
72587244
err = PTR_ERR(idev);

net/ipv6/route.c

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -156,14 +156,10 @@ void rt6_uncached_list_del(struct rt6_info *rt)
156156
}
157157
}
158158

159-
static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
159+
static void rt6_uncached_list_flush_dev(struct net_device *dev)
160160
{
161-
struct net_device *loopback_dev = net->loopback_dev;
162161
int cpu;
163162

164-
if (dev == loopback_dev)
165-
return;
166-
167163
for_each_possible_cpu(cpu) {
168164
struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
169165
struct rt6_info *rt;
@@ -174,7 +170,7 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
174170
struct net_device *rt_dev = rt->dst.dev;
175171

176172
if (rt_idev->dev == dev) {
177-
rt->rt6i_idev = in6_dev_get(loopback_dev);
173+
rt->rt6i_idev = in6_dev_get(blackhole_netdev);
178174
in6_dev_put(rt_idev);
179175
}
180176

@@ -371,13 +367,12 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
371367
{
372368
struct rt6_info *rt = (struct rt6_info *)dst;
373369
struct inet6_dev *idev = rt->rt6i_idev;
374-
struct net_device *loopback_dev =
375-
dev_net(dev)->loopback_dev;
376370

377-
if (idev && idev->dev != loopback_dev) {
378-
struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
379-
if (loopback_idev) {
380-
rt->rt6i_idev = loopback_idev;
371+
if (idev && idev->dev != blackhole_netdev) {
372+
struct inet6_dev *blackhole_idev = in6_dev_get(blackhole_netdev);
373+
374+
if (blackhole_idev) {
375+
rt->rt6i_idev = blackhole_idev;
381376
in6_dev_put(idev);
382377
}
383378
}
@@ -4892,7 +4887,7 @@ void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
48924887
void rt6_disable_ip(struct net_device *dev, unsigned long event)
48934888
{
48944889
rt6_sync_down_dev(dev, event);
4895-
rt6_uncached_list_flush_dev(dev_net(dev), dev);
4890+
rt6_uncached_list_flush_dev(dev);
48964891
neigh_ifdown(&nd_tbl, dev);
48974892
}
48984893

0 commit comments

Comments
 (0)