Skip to content

Commit 8f34e53

Browse files
dsaherndavem330
authored andcommitted
ipv6: Use global sernum for dst validation with nexthop objects
Nik reported a bug with pcpu dst cache when nexthop objects are used illustrated by the following: $ ip netns add foo $ ip -netns foo li set lo up $ ip -netns foo addr add 2001:db8:11::1/128 dev lo $ ip netns exec foo sysctl net.ipv6.conf.all.forwarding=1 $ ip li add veth1 type veth peer name veth2 $ ip li set veth1 up $ ip addr add 2001:db8:10::1/64 dev veth1 $ ip li set dev veth2 netns foo $ ip -netns foo li set veth2 up $ ip -netns foo addr add 2001:db8:10::2/64 dev veth2 $ ip -6 nexthop add id 100 via 2001:db8:10::2 dev veth1 $ ip -6 route add 2001:db8:11::1/128 nhid 100 Create a pcpu entry on cpu 0: $ taskset -a -c 0 ip -6 route get 2001:db8:11::1 Re-add the route entry: $ ip -6 ro del 2001:db8:11::1 $ ip -6 route add 2001:db8:11::1/128 nhid 100 Route get on cpu 0 returns the stale pcpu: $ taskset -a -c 0 ip -6 route get 2001:db8:11::1 RTNETLINK answers: Network is unreachable While cpu 1 works: $ taskset -a -c 1 ip -6 route get 2001:db8:11::1 2001:db8:11::1 from :: via 2001:db8:10::2 dev veth1 src 2001:db8:10::1 metric 1024 pref medium Conversion of FIB entries to work with external nexthop objects missed an important difference between IPv4 and IPv6 - how dst entries are invalidated when the FIB changes. IPv4 has a per-network namespace generation id (rt_genid) that is bumped on changes to the FIB. Checking if a dst_entry is still valid means comparing rt_genid in the rtable to the current value of rt_genid for the namespace. IPv6 also has a per network namespace counter, fib6_sernum, but the count is saved per fib6_node. With the per-node counter only dst_entries based on fib entries under the node are invalidated when changes are made to the routes - limiting the scope of invalidations. IPv6 uses a reference in the rt6_info, 'from', to track the corresponding fib entry used to create the dst_entry. When validating a dst_entry, the 'from' is used to backtrack to the fib6_node and check the sernum of it to the cookie passed to the dst_check operation. With the inline format (nexthop definition inline with the fib6_info), dst_entries cached in the fib6_nh have a 1:1 correlation between fib entries, nexthop data and dst_entries. With external nexthops, IPv6 looks more like IPv4 which means multiple fib entries across disparate fib6_nodes can all reference the same fib6_nh. That means validation of dst_entries based on external nexthops needs to use the IPv4 format - the per-network namespace counter. Add sernum to rt6_info and set it when creating a pcpu dst entry. Update rt6_get_cookie to return sernum if it is set and update dst_check for IPv6 to look for sernum set and based the check on it if so. Finally, rt6_get_pcpu_route needs to validate the cached entry before returning a pcpu entry (similar to the rt_cache_valid calls in __mkroute_input and __mkroute_output for IPv4). This problem only affects routes using the new, external nexthops. Thanks to the kbuild test robot for catching the IS_ENABLED needed around rt_genid_ipv6 before I sent this out. Fixes: 5b98324 ("ipv6: Allow routes to use nexthop objects") Reported-by: Nikolay Aleksandrov <[email protected]> Signed-off-by: David Ahern <[email protected]> Reviewed-by: Nikolay Aleksandrov <[email protected]> Tested-by: Nikolay Aleksandrov <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 69422a7 commit 8f34e53

File tree

3 files changed

+36
-0
lines changed

3 files changed

+36
-0
lines changed

include/net/ip6_fib.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ struct fib6_info {
203203
struct rt6_info {
204204
struct dst_entry dst;
205205
struct fib6_info __rcu *from;
206+
int sernum;
206207

207208
struct rt6key rt6i_dst;
208209
struct rt6key rt6i_src;
@@ -291,6 +292,9 @@ static inline u32 rt6_get_cookie(const struct rt6_info *rt)
291292
struct fib6_info *from;
292293
u32 cookie = 0;
293294

295+
if (rt->sernum)
296+
return rt->sernum;
297+
294298
rcu_read_lock();
295299

296300
from = rcu_dereference(rt->from);

include/net/net_namespace.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,13 @@ static inline int rt_genid_ipv4(const struct net *net)
437437
return atomic_read(&net->ipv4.rt_genid);
438438
}
439439

440+
#if IS_ENABLED(CONFIG_IPV6)
441+
static inline int rt_genid_ipv6(const struct net *net)
442+
{
443+
return atomic_read(&net->ipv6.fib6_sernum);
444+
}
445+
#endif
446+
440447
static inline void rt_genid_bump_ipv4(struct net *net)
441448
{
442449
atomic_inc(&net->ipv4.rt_genid);

net/ipv6/route.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1385,16 +1385,38 @@ static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res)
13851385
}
13861386
ip6_rt_copy_init(pcpu_rt, res);
13871387
pcpu_rt->rt6i_flags |= RTF_PCPU;
1388+
1389+
if (f6i->nh)
1390+
pcpu_rt->sernum = rt_genid_ipv6(dev_net(dev));
1391+
13881392
return pcpu_rt;
13891393
}
13901394

1395+
static bool rt6_is_valid(const struct rt6_info *rt6)
1396+
{
1397+
return rt6->sernum == rt_genid_ipv6(dev_net(rt6->dst.dev));
1398+
}
1399+
13911400
/* It should be called with rcu_read_lock() acquired */
13921401
static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
13931402
{
13941403
struct rt6_info *pcpu_rt;
13951404

13961405
pcpu_rt = this_cpu_read(*res->nh->rt6i_pcpu);
13971406

1407+
if (pcpu_rt && pcpu_rt->sernum && !rt6_is_valid(pcpu_rt)) {
1408+
struct rt6_info *prev, **p;
1409+
1410+
p = this_cpu_ptr(res->nh->rt6i_pcpu);
1411+
prev = xchg(p, NULL);
1412+
if (prev) {
1413+
dst_dev_put(&prev->dst);
1414+
dst_release(&prev->dst);
1415+
}
1416+
1417+
pcpu_rt = NULL;
1418+
}
1419+
13981420
return pcpu_rt;
13991421
}
14001422

@@ -2593,6 +2615,9 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
25932615

25942616
rt = container_of(dst, struct rt6_info, dst);
25952617

2618+
if (rt->sernum)
2619+
return rt6_is_valid(rt) ? dst : NULL;
2620+
25962621
rcu_read_lock();
25972622

25982623
/* All IPV6 dsts are created with ->obsolete set to the value

0 commit comments

Comments
 (0)