Skip to content

Commit 0e09edc

Browse files
tracywwnjdavem330
authored andcommitted
ipv6: introduce RT6_LOOKUP_F_DST_NOREF flag in ip6_pol_route()
This new flag is to instruct the route lookup function to not take refcnt on the dst entry. The user which does route lookup with this flag must properly use rcu protection. ip6_pol_route() is the major route lookup function for both tx and rx path. In this function: Do not take refcnt on dst if RT6_LOOKUP_F_DST_NOREF flag is set, and directly return the route entry. The caller should be holding rcu lock when using this flag, and decide whether to take refcnt or not. One note on the dst cache in the uncached_list: As uncached_list does not consume refcnt, one refcnt is always returned back to the caller even if RT6_LOOKUP_F_DST_NOREF flag is set. Uncached dst is only possible in the output path. So in such call path, caller MUST check if the dst is in the uncached_list before assuming that there is no refcnt taken on the returned dst. Signed-off-by: Wei Wang <[email protected]> Acked-by: Eric Dumazet <[email protected]> Acked-by: Mahesh Bandewar <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 8c25c0c commit 0e09edc

File tree

2 files changed

+31
-43
lines changed

2 files changed

+31
-43
lines changed

include/net/ip6_route.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ struct route_info {
3636
#define RT6_LOOKUP_F_SRCPREF_PUBLIC 0x00000010
3737
#define RT6_LOOKUP_F_SRCPREF_COA 0x00000020
3838
#define RT6_LOOKUP_F_IGNORE_LINKSTATE 0x00000040
39+
#define RT6_LOOKUP_F_DST_NOREF 0x00000080
3940

4041
/* We do not (yet ?) support IPv6 jumbograms (RFC 2675)
4142
* Unlike IPv4, hdr->seg_len doesn't include the IPv6 header

net/ipv6/route.c

Lines changed: 30 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1391,9 +1391,6 @@ static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
13911391

13921392
pcpu_rt = this_cpu_read(*res->nh->rt6i_pcpu);
13931393

1394-
if (pcpu_rt)
1395-
ip6_hold_safe(NULL, &pcpu_rt);
1396-
13971394
return pcpu_rt;
13981395
}
13991396

@@ -1403,12 +1400,9 @@ static struct rt6_info *rt6_make_pcpu_route(struct net *net,
14031400
struct rt6_info *pcpu_rt, *prev, **p;
14041401

14051402
pcpu_rt = ip6_rt_pcpu_alloc(res);
1406-
if (!pcpu_rt) {
1407-
dst_hold(&net->ipv6.ip6_null_entry->dst);
1408-
return net->ipv6.ip6_null_entry;
1409-
}
1403+
if (!pcpu_rt)
1404+
return NULL;
14101405

1411-
dst_hold(&pcpu_rt->dst);
14121406
p = this_cpu_ptr(res->nh->rt6i_pcpu);
14131407
prev = cmpxchg(p, NULL, pcpu_rt);
14141408
BUG_ON(prev);
@@ -2189,9 +2183,12 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
21892183
const struct sk_buff *skb, int flags)
21902184
{
21912185
struct fib6_result res = {};
2192-
struct rt6_info *rt;
2186+
struct rt6_info *rt = NULL;
21932187
int strict = 0;
21942188

2189+
WARN_ON_ONCE((flags & RT6_LOOKUP_F_DST_NOREF) &&
2190+
!rcu_read_lock_held());
2191+
21952192
strict |= flags & RT6_LOOKUP_F_IFACE;
21962193
strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
21972194
if (net->ipv6.devconf_all->forwarding == 0)
@@ -2200,64 +2197,54 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
22002197
rcu_read_lock();
22012198

22022199
fib6_table_lookup(net, table, oif, fl6, &res, strict);
2203-
if (res.f6i == net->ipv6.fib6_null_entry) {
2204-
rt = net->ipv6.ip6_null_entry;
2205-
rcu_read_unlock();
2206-
dst_hold(&rt->dst);
2207-
return rt;
2208-
}
2200+
if (res.f6i == net->ipv6.fib6_null_entry)
2201+
goto out;
22092202

22102203
fib6_select_path(net, &res, fl6, oif, false, skb, strict);
22112204

22122205
/*Search through exception table */
22132206
rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
22142207
if (rt) {
2215-
if (ip6_hold_safe(net, &rt))
2216-
dst_use_noref(&rt->dst, jiffies);
2217-
2218-
rcu_read_unlock();
2219-
return rt;
2208+
goto out;
22202209
} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
22212210
!res.nh->fib_nh_gw_family)) {
22222211
/* Create a RTF_CACHE clone which will not be
22232212
* owned by the fib6 tree. It is for the special case where
22242213
* the daddr in the skb during the neighbor look-up is different
22252214
* from the fl6->daddr used to look-up route here.
22262215
*/
2227-
struct rt6_info *uncached_rt;
2216+
rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL);
22282217

2229-
uncached_rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL);
2230-
2231-
rcu_read_unlock();
2232-
2233-
if (uncached_rt) {
2234-
/* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
2235-
* No need for another dst_hold()
2218+
if (rt) {
2219+
/* 1 refcnt is taken during ip6_rt_cache_alloc().
2220+
* As rt6_uncached_list_add() does not consume refcnt,
2221+
* this refcnt is always returned to the caller even
2222+
* if caller sets RT6_LOOKUP_F_DST_NOREF flag.
22362223
*/
2237-
rt6_uncached_list_add(uncached_rt);
2224+
rt6_uncached_list_add(rt);
22382225
atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
2239-
} else {
2240-
uncached_rt = net->ipv6.ip6_null_entry;
2241-
dst_hold(&uncached_rt->dst);
2242-
}
2226+
rcu_read_unlock();
22432227

2244-
return uncached_rt;
2228+
return rt;
2229+
}
22452230
} else {
22462231
/* Get a percpu copy */
2247-
2248-
struct rt6_info *pcpu_rt;
2249-
22502232
local_bh_disable();
2251-
pcpu_rt = rt6_get_pcpu_route(&res);
2233+
rt = rt6_get_pcpu_route(&res);
22522234

2253-
if (!pcpu_rt)
2254-
pcpu_rt = rt6_make_pcpu_route(net, &res);
2235+
if (!rt)
2236+
rt = rt6_make_pcpu_route(net, &res);
22552237

22562238
local_bh_enable();
2257-
rcu_read_unlock();
2258-
2259-
return pcpu_rt;
22602239
}
2240+
out:
2241+
if (!rt)
2242+
rt = net->ipv6.ip6_null_entry;
2243+
if (!(flags & RT6_LOOKUP_F_DST_NOREF))
2244+
ip6_hold_safe(net, &rt);
2245+
rcu_read_unlock();
2246+
2247+
return rt;
22612248
}
22622249
EXPORT_SYMBOL_GPL(ip6_pol_route);
22632250

0 commit comments

Comments
 (0)