Skip to content

Commit f66faae

Browse files
committed
Merge branch 'ipv6-ipv4-nexthop-align'
Ido Schimmel says: ==================== ipv6: Align nexthop behaviour with IPv4 This set tries to eliminate some differences between IPv4's and IPv6's treatment of nexthops. These differences are most likely a side effect of IPv6's data structures (specifically 'rt6_info') that incorporate both the route and the nexthop and the late addition of ECMP support in commit 51ebd31 ("ipv6: add support of equal cost multipath (ECMP)"). IPv4 and IPv6 do not react the same to certain netdev events. For example, upon carrier change affected IPv4 nexthops are marked using the RTNH_F_LINKDOWN flag and the nexthop group is rebalanced accordingly. IPv6 on the other hand, does nothing which forces us to perform a carrier check during route lookup and dump. This makes it difficult to introduce features such as non-equal-cost multipath that are built on top of this set [1]. In addition, when a netdev is put administratively down IPv4 nexthops are marked using the RTNH_F_DEAD flag, whereas IPv6 simply flushes all the routes using these nexthops. To be consistent with IPv4, multipath routes should only be flushed when all nexthops in the group are considered dead. The first 12 patches introduce non-functional changes that store the RTNH_F_DEAD and RTNH_F_LINKDOWN flags in IPv6 routes based on netdev events, in a similar fashion to IPv4. This allows us to remove the carrier check performed during route lookup and dump. The next three patches make sure we only flush a multipath route when all of its nexthops are dead. Last three patches add test cases for IPv4/IPv6 FIB. These verify that both address families react similarly to netdev events. Finally, this series also serves as a good first step towards David Ahern's goal of treating nexthops as standalone objects [2], as it makes the code more in line with IPv4 where the nexthop and the nexthop group are separate objects from the route itself. 1. https://github.com/idosch/linux/tree/ipv6-nexthops 2. http://vger.kernel.org/netconf2017_files/nexthop-objects.pdf Changes since RFC (feedback from David Ahern): * Remove redundant declaration of rt6_ifdown() in patch 4 and adjust comment referencing it accordingly * Drop patch to flush multipath routes upon NETDEV_UNREGISTER. Reword cover letter accordingly * Use a temporary variable to make code more readable in patch 15 ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 7f0b800 + 82e45b6 commit f66faae

File tree

7 files changed

+618
-42
lines changed

7 files changed

+618
-42
lines changed

include/net/ip6_fib.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,8 @@ struct rt6_info {
173173
unsigned short rt6i_nfheader_len;
174174
u8 rt6i_protocol;
175175
u8 exception_bucket_flushed:1,
176-
unused:7;
176+
should_flush:1,
177+
unused:6;
177178
};
178179

179180
#define for_each_fib6_node_rt_rcu(fn) \
@@ -404,6 +405,7 @@ unsigned int fib6_tables_seq_read(struct net *net);
404405
int fib6_tables_dump(struct net *net, struct notifier_block *nb);
405406

406407
void fib6_update_sernum(struct rt6_info *rt);
408+
void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt);
407409

408410
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
409411
int fib6_rules_init(void);

include/net/ip6_route.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,10 +165,12 @@ struct rt6_rtnl_dump_arg {
165165
};
166166

167167
int rt6_dump_route(struct rt6_info *rt, void *p_arg);
168-
void rt6_ifdown(struct net *net, struct net_device *dev);
169168
void rt6_mtu_change(struct net_device *dev, unsigned int mtu);
170169
void rt6_remove_prefsrc(struct inet6_ifaddr *ifp);
171170
void rt6_clean_tohost(struct net *net, struct in6_addr *gateway);
171+
void rt6_sync_up(struct net_device *dev, unsigned int nh_flags);
172+
void rt6_disable_ip(struct net_device *dev, unsigned long event);
173+
void rt6_sync_down_dev(struct net_device *dev, unsigned long event);
172174

173175
static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb)
174176
{

net/ipv6/addrconf.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3438,6 +3438,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
34383438
} else if (event == NETDEV_CHANGE) {
34393439
if (!addrconf_link_ready(dev)) {
34403440
/* device is still not ready. */
3441+
rt6_sync_down_dev(dev, event);
34413442
break;
34423443
}
34433444

@@ -3449,6 +3450,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
34493450
* multicast snooping switches
34503451
*/
34513452
ipv6_mc_up(idev);
3453+
rt6_sync_up(dev, RTNH_F_LINKDOWN);
34523454
break;
34533455
}
34543456
idev->if_flags |= IF_READY;
@@ -3484,6 +3486,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
34843486
if (run_pending)
34853487
addrconf_dad_run(idev);
34863488

3489+
/* Device has an address by now */
3490+
rt6_sync_up(dev, RTNH_F_DEAD);
3491+
34873492
/*
34883493
* If the MTU changed during the interface down,
34893494
* when the interface up, the changed MTU must be
@@ -3577,6 +3582,7 @@ static bool addr_is_local(const struct in6_addr *addr)
35773582

35783583
static int addrconf_ifdown(struct net_device *dev, int how)
35793584
{
3585+
unsigned long event = how ? NETDEV_UNREGISTER : NETDEV_DOWN;
35803586
struct net *net = dev_net(dev);
35813587
struct inet6_dev *idev;
35823588
struct inet6_ifaddr *ifa, *tmp;
@@ -3586,8 +3592,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
35863592

35873593
ASSERT_RTNL();
35883594

3589-
rt6_ifdown(net, dev);
3590-
neigh_ifdown(&nd_tbl, dev);
3595+
rt6_disable_ip(dev, event);
35913596

35923597
idev = __in6_dev_get(dev);
35933598
if (!idev)

net/ipv6/ip6_fib.c

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -107,16 +107,13 @@ enum {
107107

108108
void fib6_update_sernum(struct rt6_info *rt)
109109
{
110-
struct fib6_table *table = rt->rt6i_table;
111110
struct net *net = dev_net(rt->dst.dev);
112111
struct fib6_node *fn;
113112

114-
spin_lock_bh(&table->tb6_lock);
115113
fn = rcu_dereference_protected(rt->rt6i_node,
116-
lockdep_is_held(&table->tb6_lock));
114+
lockdep_is_held(&rt->rt6i_table->tb6_lock));
117115
if (fn)
118116
fn->fn_sernum = fib6_new_sernum(net);
119-
spin_unlock_bh(&table->tb6_lock);
120117
}
121118

122119
/*
@@ -1102,8 +1099,8 @@ void fib6_force_start_gc(struct net *net)
11021099
jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
11031100
}
11041101

1105-
static void fib6_update_sernum_upto_root(struct rt6_info *rt,
1106-
int sernum)
1102+
static void __fib6_update_sernum_upto_root(struct rt6_info *rt,
1103+
int sernum)
11071104
{
11081105
struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
11091106
lockdep_is_held(&rt->rt6i_table->tb6_lock));
@@ -1117,6 +1114,11 @@ static void fib6_update_sernum_upto_root(struct rt6_info *rt,
11171114
}
11181115
}
11191116

1117+
void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt)
1118+
{
1119+
__fib6_update_sernum_upto_root(rt, fib6_new_sernum(net));
1120+
}
1121+
11201122
/*
11211123
* Add routing information to the routing tree.
11221124
* <destination addr>/<source addr>
@@ -1230,7 +1232,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
12301232

12311233
err = fib6_add_rt2node(fn, rt, info, mxc, extack);
12321234
if (!err) {
1233-
fib6_update_sernum_upto_root(rt, sernum);
1235+
__fib6_update_sernum_upto_root(rt, sernum);
12341236
fib6_start_gc(info->nl_net, rt);
12351237
}
12361238

@@ -1887,7 +1889,7 @@ static int fib6_clean_node(struct fib6_walker *w)
18871889

18881890
for_each_fib6_walker_rt(w) {
18891891
res = c->func(rt, c->arg);
1890-
if (res < 0) {
1892+
if (res == -1) {
18911893
w->leaf = rt;
18921894
res = fib6_del(rt, &info);
18931895
if (res) {
@@ -1900,6 +1902,12 @@ static int fib6_clean_node(struct fib6_walker *w)
19001902
continue;
19011903
}
19021904
return 0;
1905+
} else if (res == -2) {
1906+
if (WARN_ON(!rt->rt6i_nsiblings))
1907+
continue;
1908+
rt = list_last_entry(&rt->rt6i_siblings,
1909+
struct rt6_info, rt6i_siblings);
1910+
continue;
19031911
}
19041912
WARN_ON(res != 0);
19051913
}
@@ -1911,7 +1919,8 @@ static int fib6_clean_node(struct fib6_walker *w)
19111919
* Convenient frontend to tree walker.
19121920
*
19131921
* func is called on each route.
1914-
* It may return -1 -> delete this route.
1922+
* It may return -2 -> skip multipath route.
1923+
* -1 -> delete this route.
19151924
* 0 -> continue walking
19161925
*/
19171926

@@ -2103,7 +2112,6 @@ static void fib6_net_exit(struct net *net)
21032112
{
21042113
unsigned int i;
21052114

2106-
rt6_ifdown(net, NULL);
21072115
del_timer_sync(&net->ipv6.ip6_fib_timer);
21082116

21092117
for (i = 0; i < FIB6_TABLE_HASHSZ; i++) {

0 commit comments

Comments
 (0)