Skip to content

Commit a94b936

Browse files
tracywwnjdavem330
authored andcommitted
ipv6: grab rt->rt6i_ref before allocating pcpu rt
After rwlock is replaced with rcu and spinlock, ip6_pol_route() will be called with only rcu held. That means rt6 route deletion could happen simultaneously with rt6_make_pcpu_rt(). This could potentially cause memory leak if rt6_release() is called right before rt6_make_pcpu_rt() on the same route. This patch grabs rt->rt6i_ref safely before calling rt6_make_pcpu_rt() to make sure rt6_release() will not get triggered while rt6_make_pcpu_rt() is in progress. And rt6_release() is called after rt6_make_pcpu_rt() is finished. Note: As we are incrementing rt->rt6i_ref in ip6_pol_route(), there is a very slim chance that fib6_purge_rt() will be triggered unnecessarily when deleting a route if ip6_pol_route() running on another thread picks this route as well and tries to make pcpu cache for it. Signed-off-by: Wei Wang <[email protected]> Signed-off-by: Martin KaFai Lau <[email protected]> Signed-off-by: Eric Dumazet <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 2b760fc commit a94b936

File tree

1 file changed

+29
-29
lines changed

1 file changed

+29
-29
lines changed

net/ipv6/route.c

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1070,7 +1070,6 @@ static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
10701070

10711071
static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
10721072
{
1073-
struct fib6_table *table = rt->rt6i_table;
10741073
struct rt6_info *pcpu_rt, *prev, **p;
10751074

10761075
pcpu_rt = ip6_rt_pcpu_alloc(rt);
@@ -1081,28 +1080,20 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
10811080
return net->ipv6.ip6_null_entry;
10821081
}
10831082

1084-
read_lock_bh(&table->tb6_lock);
1085-
if (rt->rt6i_pcpu) {
1086-
p = this_cpu_ptr(rt->rt6i_pcpu);
1087-
prev = cmpxchg(p, NULL, pcpu_rt);
1088-
if (prev) {
1089-
/* If someone did it before us, return prev instead */
1090-
dst_release_immediate(&pcpu_rt->dst);
1091-
pcpu_rt = prev;
1092-
}
1093-
} else {
1094-
/* rt has been removed from the fib6 tree
1095-
* before we have a chance to acquire the read_lock.
1096-
* In this case, don't brother to create a pcpu rt
1097-
* since rt is going away anyway. The next
1098-
* dst_check() will trigger a re-lookup.
1099-
*/
1083+
dst_hold(&pcpu_rt->dst);
1084+
p = this_cpu_ptr(rt->rt6i_pcpu);
1085+
prev = cmpxchg(p, NULL, pcpu_rt);
1086+
if (prev) {
1087+
/* If someone did it before us, return prev instead */
1088+
/* release refcnt taken by ip6_rt_pcpu_alloc() */
1089+
dst_release_immediate(&pcpu_rt->dst);
1090+
/* release refcnt taken by above dst_hold() */
11001091
dst_release_immediate(&pcpu_rt->dst);
1101-
pcpu_rt = rt;
1092+
dst_hold(&prev->dst);
1093+
pcpu_rt = prev;
11021094
}
1103-
dst_hold(&pcpu_rt->dst);
1095+
11041096
rt6_dst_from_metrics_check(pcpu_rt);
1105-
read_unlock_bh(&table->tb6_lock);
11061097
return pcpu_rt;
11071098
}
11081099

@@ -1683,19 +1674,28 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
16831674
if (pcpu_rt) {
16841675
read_unlock_bh(&table->tb6_lock);
16851676
} else {
1686-
/* We have to do the read_unlock first
1687-
* because rt6_make_pcpu_route() may trigger
1688-
* ip6_dst_gc() which will take the write_lock.
1689-
*/
1690-
dst_hold(&rt->dst);
1691-
read_unlock_bh(&table->tb6_lock);
1692-
pcpu_rt = rt6_make_pcpu_route(rt);
1693-
dst_release(&rt->dst);
1677+
/* atomic_inc_not_zero() is needed when using rcu */
1678+
if (atomic_inc_not_zero(&rt->rt6i_ref)) {
1679+
/* We have to do the read_unlock first
1680+
* because rt6_make_pcpu_route() may trigger
1681+
* ip6_dst_gc() which will take the write_lock.
1682+
*
1683+
* No dst_hold() on rt is needed because grabbing
1684+
* rt->rt6i_ref makes sure rt can't be released.
1685+
*/
1686+
read_unlock_bh(&table->tb6_lock);
1687+
pcpu_rt = rt6_make_pcpu_route(rt);
1688+
rt6_release(rt);
1689+
} else {
1690+
/* rt is already removed from tree */
1691+
read_unlock_bh(&table->tb6_lock);
1692+
pcpu_rt = net->ipv6.ip6_null_entry;
1693+
dst_hold(&pcpu_rt->dst);
1694+
}
16941695
}
16951696

16961697
trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
16971698
return pcpu_rt;
1698-
16991699
}
17001700
}
17011701
EXPORT_SYMBOL_GPL(ip6_pol_route);

0 commit comments

Comments
 (0)