Skip to content

Commit 7c6bb7d

Browse files
dsaherndavem330
authored andcommitted
net/ipv6: Add knob to skip DELROUTE message on device down
Another difference between IPv4 and IPv6 is the generation of RTM_DELROUTE notifications when a device is taken down (admin down) or deleted. IPv4 does not generate a message for routes evicted by the down or delete; IPv6 does. A NOS at scale really needs to avoid these messages and have IPv4 and IPv6 behave similarly, relying on userspace to handle link notifications and evict the routes. At this point existing user behavior needs to be preserved. Since notifications are a global action (not per app) the only way to preserve existing behavior and allow the messages to be skipped is to add a new sysctl (net/ipv6/route/skip_notify_on_dev_down) which can be set to disable the notificatioons. IPv6 route code already supports the option to skip the message (it is used for multipath routes for example). Besides the new sysctl we need to pass the skip_notify setting through the generic fib6_clean and fib6_walk functions to fib6_clean_node and to set skip_notify on calls to __ip_del_rt for the addrconf_ifdown path. Signed-off-by: David Ahern <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 7cc2d50 commit 7c6bb7d

File tree

5 files changed

+46
-6
lines changed

5 files changed

+46
-6
lines changed

Documentation/networking/ip-sysctl.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1442,6 +1442,14 @@ max_hbh_length - INTEGER
14421442
header.
14431443
Default: INT_MAX (unlimited)
14441444

1445+
skip_notify_on_dev_down - BOOLEAN
1446+
Controls whether an RTM_DELROUTE message is generated for routes
1447+
removed when a device is taken down or deleted. IPv4 does not
1448+
generate this message; IPv6 does by default. Setting this sysctl
1449+
to true skips the message, making IPv4 and IPv6 on par in relying
1450+
on userspace caches to track link events and evict routes.
1451+
Default: false (generate message)
1452+
14451453
IPv6 Fragmentation:
14461454

14471455
ip6frag_high_thresh - INTEGER

include/net/ip6_fib.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,9 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
407407

408408
void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *arg),
409409
void *arg);
410+
void fib6_clean_all_skip_notify(struct net *net,
411+
int (*func)(struct fib6_info *, void *arg),
412+
void *arg);
410413

411414
int fib6_add(struct fib6_node *root, struct fib6_info *rt,
412415
struct nl_info *info, struct netlink_ext_ack *extack);

include/net/netns/ipv6.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ struct netns_sysctl_ipv6 {
4545
int max_dst_opts_len;
4646
int max_hbh_opts_len;
4747
int seg6_flowlabel;
48+
bool skip_notify_on_dev_down;
4849
};
4950

5051
struct netns_ipv6 {

net/ipv6/ip6_fib.c

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ struct fib6_cleaner {
4747
int (*func)(struct fib6_info *, void *arg);
4848
int sernum;
4949
void *arg;
50+
bool skip_notify;
5051
};
5152

5253
#ifdef CONFIG_IPV6_SUBTREES
@@ -1956,6 +1957,7 @@ static int fib6_clean_node(struct fib6_walker *w)
19561957
struct fib6_cleaner *c = container_of(w, struct fib6_cleaner, w);
19571958
struct nl_info info = {
19581959
.nl_net = c->net,
1960+
.skip_notify = c->skip_notify,
19591961
};
19601962

19611963
if (c->sernum != FIB6_NO_SERNUM_CHANGE &&
@@ -2007,7 +2009,7 @@ static int fib6_clean_node(struct fib6_walker *w)
20072009

20082010
static void fib6_clean_tree(struct net *net, struct fib6_node *root,
20092011
int (*func)(struct fib6_info *, void *arg),
2010-
int sernum, void *arg)
2012+
int sernum, void *arg, bool skip_notify)
20112013
{
20122014
struct fib6_cleaner c;
20132015

@@ -2019,13 +2021,14 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root,
20192021
c.sernum = sernum;
20202022
c.arg = arg;
20212023
c.net = net;
2024+
c.skip_notify = skip_notify;
20222025

20232026
fib6_walk(net, &c.w);
20242027
}
20252028

20262029
static void __fib6_clean_all(struct net *net,
20272030
int (*func)(struct fib6_info *, void *),
2028-
int sernum, void *arg)
2031+
int sernum, void *arg, bool skip_notify)
20292032
{
20302033
struct fib6_table *table;
20312034
struct hlist_head *head;
@@ -2037,7 +2040,7 @@ static void __fib6_clean_all(struct net *net,
20372040
hlist_for_each_entry_rcu(table, head, tb6_hlist) {
20382041
spin_lock_bh(&table->tb6_lock);
20392042
fib6_clean_tree(net, &table->tb6_root,
2040-
func, sernum, arg);
2043+
func, sernum, arg, skip_notify);
20412044
spin_unlock_bh(&table->tb6_lock);
20422045
}
20432046
}
@@ -2047,14 +2050,21 @@ static void __fib6_clean_all(struct net *net,
20472050
void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *),
20482051
void *arg)
20492052
{
2050-
__fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg);
2053+
__fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg, false);
2054+
}
2055+
2056+
void fib6_clean_all_skip_notify(struct net *net,
2057+
int (*func)(struct fib6_info *, void *),
2058+
void *arg)
2059+
{
2060+
__fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg, true);
20512061
}
20522062

20532063
static void fib6_flush_trees(struct net *net)
20542064
{
20552065
int new_sernum = fib6_new_sernum(net);
20562066

2057-
__fib6_clean_all(net, NULL, new_sernum, NULL);
2067+
__fib6_clean_all(net, NULL, new_sernum, NULL, false);
20582068
}
20592069

20602070
/*

net/ipv6/route.c

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4026,8 +4026,12 @@ void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
40264026
.event = event,
40274027
},
40284028
};
4029+
struct net *net = dev_net(dev);
40294030

4030-
fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
4031+
if (net->ipv6.sysctl.skip_notify_on_dev_down)
4032+
fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
4033+
else
4034+
fib6_clean_all(net, fib6_ifdown, &arg);
40314035
}
40324036

40334037
void rt6_disable_ip(struct net_device *dev, unsigned long event)
@@ -5031,6 +5035,9 @@ int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
50315035
return 0;
50325036
}
50335037

5038+
static int zero;
5039+
static int one = 1;
5040+
50345041
static struct ctl_table ipv6_route_table_template[] = {
50355042
{
50365043
.procname = "flush",
@@ -5102,6 +5109,15 @@ static struct ctl_table ipv6_route_table_template[] = {
51025109
.mode = 0644,
51035110
.proc_handler = proc_dointvec_ms_jiffies,
51045111
},
5112+
{
5113+
.procname = "skip_notify_on_dev_down",
5114+
.data = &init_net.ipv6.sysctl.skip_notify_on_dev_down,
5115+
.maxlen = sizeof(int),
5116+
.mode = 0644,
5117+
.proc_handler = proc_dointvec,
5118+
.extra1 = &zero,
5119+
.extra2 = &one,
5120+
},
51055121
{ }
51065122
};
51075123

@@ -5125,6 +5141,7 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
51255141
table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
51265142
table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
51275143
table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5144+
table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
51285145

51295146
/* Don't export sysctls to unprivileged users */
51305147
if (net->user_ns != &init_user_ns)
@@ -5189,6 +5206,7 @@ static int __net_init ip6_route_net_init(struct net *net)
51895206
net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
51905207
net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
51915208
net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
5209+
net->ipv6.sysctl.skip_notify_on_dev_down = 0;
51925210

51935211
net->ipv6.ip6_rt_gc_expire = 30*HZ;
51945212

0 commit comments

Comments
 (0)