Skip to content

Commit f88d8ea

Browse files
dsaherndavem330
authored andcommitted
ipv6: Plumb support for nexthop object in a fib6_info
Add struct nexthop and nh_list list_head to fib6_info. nh_list is the fib6_info side of the nexthop <-> fib_info relationship. Since a fib6_info referencing a nexthop object can not have 'sibling' entries (the old way of doing multipath routes), the nh_list is a union with fib6_siblings. Add f6i_list list_head to 'struct nexthop' to track fib6_info entries using a nexthop instance. Update __remove_nexthop_fib to walk f6_list and delete fib entries using the nexthop. Add a few nexthop helpers for use when a nexthop is added to fib6_info: - nexthop_fib6_nh - return first fib6_nh in a nexthop object - fib6_info_nh_dev moved to nexthop.h and updated to use nexthop_fib6_nh if the fib6_info references a nexthop object - nexthop_path_fib6_result - similar to ipv4, select a path within a multipath nexthop object. If the nexthop is a blackhole, set fib6_result type to RTN_BLACKHOLE, and set the REJECT flag Update the fib6_info references to check for nh and take a different path as needed: - rt6_qualify_for_ecmp - if a fib entry uses a nexthop object it can NOT be coalesced with other fib entries into a multipath route - rt6_duplicate_nexthop - use nexthop_cmp if either fib6_info references a nexthop - addrconf (host routes), RA's and info entries (anything configured via ndisc) does not use nexthop objects - fib6_info_destroy_rcu - put reference to nexthop object - fib6_purge_rt - drop fib6_info from f6i_list - fib6_select_path - update to use the new nexthop_path_fib6_result when fib entry uses a nexthop object - rt6_device_match - update to catch use of nexthop object as a blackhole and set fib6_type and flags. - ip6_route_info_create - don't add space for fib6_nh if fib entry is going to reference a nexthop object, take a reference to nexthop object, disallow use of source routing - rt6_nlmsg_size - add space for RTA_NH_ID - add rt6_fill_node_nexthop to add nexthop data on a dump As with ipv4, most of the changes push existing code into the else branch of whether the fib entry uses a nexthop object. Update the nexthop code to walk f6i_list on a nexthop deleted to remove fib entries referencing it. Signed-off-by: David Ahern <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 4c7e808 commit f88d8ea

File tree

8 files changed

+260
-36
lines changed

8 files changed

+260
-36
lines changed

include/net/ip6_fib.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,10 @@ struct fib6_info {
146146
* destination, but not the same gateway. nsiblings is just a cache
147147
* to speed up lookup.
148148
*/
149-
struct list_head fib6_siblings;
149+
union {
150+
struct list_head fib6_siblings;
151+
struct list_head nh_list;
152+
};
150153
unsigned int fib6_nsiblings;
151154

152155
refcount_t fib6_ref;
@@ -170,6 +173,7 @@ struct fib6_info {
170173
unused:3;
171174

172175
struct rcu_head rcu;
176+
struct nexthop *nh;
173177
struct fib6_nh fib6_nh[0];
174178
};
175179

@@ -441,11 +445,6 @@ void rt6_get_prefsrc(const struct rt6_info *rt, struct in6_addr *addr)
441445
rcu_read_unlock();
442446
}
443447

444-
static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i)
445-
{
446-
return f6i->fib6_nh->fib_nh_dev;
447-
}
448-
449448
int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
450449
struct fib6_config *cfg, gfp_t gfp_flags,
451450
struct netlink_ext_ack *extack);

include/net/ip6_route.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ struct route_info {
2727
#include <linux/ip.h>
2828
#include <linux/ipv6.h>
2929
#include <linux/route.h>
30+
#include <net/nexthop.h>
3031

3132
#define RT6_LOOKUP_F_IFACE 0x00000001
3233
#define RT6_LOOKUP_F_REACHABLE 0x00000002
@@ -66,10 +67,13 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr)
6667
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
6768
}
6869

70+
/* fib entries using a nexthop object can not be coalesced into
71+
* a multipath route
72+
*/
6973
static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i)
7074
{
7175
/* the RTF_ADDRCONF flag filters out RA's */
72-
return !(f6i->fib6_flags & RTF_ADDRCONF) &&
76+
return !(f6i->fib6_flags & RTF_ADDRCONF) && !f6i->nh &&
7377
f6i->fib6_nh->fib_nh_gw_family;
7478
}
7579

@@ -275,8 +279,13 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt,
275279

276280
static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *b)
277281
{
278-
struct fib6_nh *nha = a->fib6_nh, *nhb = b->fib6_nh;
282+
struct fib6_nh *nha, *nhb;
283+
284+
if (a->nh || b->nh)
285+
return nexthop_cmp(a->nh, b->nh);
279286

287+
nha = a->fib6_nh;
288+
nhb = b->fib6_nh;
280289
return nha->fib_nh_dev == nhb->fib_nh_dev &&
281290
ipv6_addr_equal(&nha->fib_nh_gw6, &nhb->fib_nh_gw6) &&
282291
!lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws);

include/net/nexthop.h

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#define __LINUX_NEXTHOP_H
1111

1212
#include <linux/netdevice.h>
13+
#include <linux/route.h>
1314
#include <linux/types.h>
1415
#include <net/ip_fib.h>
1516
#include <net/ip6_fib.h>
@@ -78,6 +79,7 @@ struct nh_group {
7879
struct nexthop {
7980
struct rb_node rb_node; /* entry on netns rbtree */
8081
struct list_head fi_list; /* v4 entries using nh */
82+
struct list_head f6i_list; /* v6 entries using nh */
8183
struct list_head grp_list; /* nh group entries using this nh */
8284
struct net *net;
8385

@@ -255,4 +257,52 @@ static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel)
255257

256258
return &fi->fib_nh[nhsel];
257259
}
260+
261+
/*
262+
* IPv6 variants
263+
*/
264+
int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
265+
struct netlink_ext_ack *extack);
266+
267+
static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh)
268+
{
269+
struct nh_info *nhi;
270+
271+
if (nexthop_is_multipath(nh)) {
272+
nh = nexthop_mpath_select(nh, 0);
273+
if (!nh)
274+
return NULL;
275+
}
276+
277+
nhi = rcu_dereference_rtnl(nh->nh_info);
278+
if (nhi->family == AF_INET6)
279+
return &nhi->fib6_nh;
280+
281+
return NULL;
282+
}
283+
284+
static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i)
285+
{
286+
struct fib6_nh *fib6_nh;
287+
288+
fib6_nh = f6i->nh ? nexthop_fib6_nh(f6i->nh) : f6i->fib6_nh;
289+
return fib6_nh->fib_nh_dev;
290+
}
291+
292+
static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash)
293+
{
294+
struct nexthop *nh = res->f6i->nh;
295+
struct nh_info *nhi;
296+
297+
nh = nexthop_select_path(nh, hash);
298+
299+
nhi = rcu_dereference_rtnl(nh->nh_info);
300+
if (nhi->reject_nh) {
301+
res->fib6_type = RTN_BLACKHOLE;
302+
res->fib6_flags |= RTF_REJECT;
303+
res->nh = nexthop_fib6_nh(nh);
304+
} else {
305+
res->nh = &nhi->fib6_nh;
306+
}
307+
}
258308
#endif

net/ipv4/nexthop.c

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ static struct nexthop *nexthop_alloc(void)
106106
nh = kzalloc(sizeof(struct nexthop), GFP_KERNEL);
107107
if (nh) {
108108
INIT_LIST_HEAD(&nh->fi_list);
109+
INIT_LIST_HEAD(&nh->f6i_list);
109110
INIT_LIST_HEAD(&nh->grp_list);
110111
}
111112
return nh;
@@ -516,6 +517,41 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
516517
}
517518
EXPORT_SYMBOL_GPL(nexthop_select_path);
518519

520+
int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
521+
struct netlink_ext_ack *extack)
522+
{
523+
struct nh_info *nhi;
524+
525+
/* fib6_src is unique to a fib6_info and limits the ability to cache
526+
* routes in fib6_nh within a nexthop that is potentially shared
527+
* across multiple fib entries. If the config wants to use source
528+
* routing it can not use nexthop objects. mlxsw also does not allow
529+
* fib6_src on routes.
530+
*/
531+
if (!ipv6_addr_any(&cfg->fc_src)) {
532+
NL_SET_ERR_MSG(extack, "IPv6 routes using source address can not use nexthop objects");
533+
return -EINVAL;
534+
}
535+
536+
if (nh->is_group) {
537+
struct nh_group *nhg;
538+
539+
nhg = rtnl_dereference(nh->nh_grp);
540+
if (nhg->has_v4)
541+
goto no_v4_nh;
542+
} else {
543+
nhi = rtnl_dereference(nh->nh_info);
544+
if (nhi->family == AF_INET)
545+
goto no_v4_nh;
546+
}
547+
548+
return 0;
549+
no_v4_nh:
550+
NL_SET_ERR_MSG(extack, "IPv6 routes can not use an IPv4 nexthop");
551+
return -EINVAL;
552+
}
553+
EXPORT_SYMBOL_GPL(fib6_check_nexthop);
554+
519555
static int nexthop_check_scope(struct nexthop *nh, u8 scope,
520556
struct netlink_ext_ack *extack)
521557
{
@@ -658,6 +694,7 @@ static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)
658694

659695
static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
660696
{
697+
struct fib6_info *f6i, *tmp;
661698
bool do_flush = false;
662699
struct fib_info *fi;
663700

@@ -667,6 +704,13 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
667704
}
668705
if (do_flush)
669706
fib_flush(net);
707+
708+
/* ip6_del_rt removes the entry from this list hence the _safe */
709+
list_for_each_entry_safe(f6i, tmp, &nh->f6i_list, nh_list) {
710+
/* __ip6_del_rt does a release, so do a hold here */
711+
fib6_info_hold(f6i);
712+
ipv6_stub->ip6_del_rt(net, f6i);
713+
}
670714
}
671715

672716
static void __remove_nexthop(struct net *net, struct nexthop *nh,

net/ipv6/addrconf.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2421,6 +2421,10 @@ static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
24212421
goto out;
24222422

24232423
for_each_fib6_node_rt_rcu(fn) {
2424+
/* prefix routes only use builtin fib6_nh */
2425+
if (rt->nh)
2426+
continue;
2427+
24242428
if (rt->fib6_nh->fib_nh_dev->ifindex != dev->ifindex)
24252429
continue;
24262430
if (no_gw && rt->fib6_nh->fib_nh_gw_family)
@@ -6352,6 +6356,7 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
63526356
list_for_each_entry(ifa, &idev->addr_list, if_list) {
63536357
spin_lock(&ifa->lock);
63546358
if (ifa->rt) {
6359+
/* host routes only use builtin fib6_nh */
63556360
struct fib6_nh *nh = ifa->rt->fib6_nh;
63566361
int cpu;
63576362

net/ipv6/ip6_fib.c

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh)
159159
if (!f6i)
160160
return NULL;
161161

162+
/* fib6_siblings is a union with nh_list, so this initializes both */
162163
INIT_LIST_HEAD(&f6i->fib6_siblings);
163164
refcount_set(&f6i->fib6_ref, 1);
164165

@@ -171,7 +172,11 @@ void fib6_info_destroy_rcu(struct rcu_head *head)
171172

172173
WARN_ON(f6i->fib6_node);
173174

174-
fib6_nh_release(f6i->fib6_nh);
175+
if (f6i->nh)
176+
nexthop_put(f6i->nh);
177+
else
178+
fib6_nh_release(f6i->fib6_nh);
179+
175180
ip_fib_metrics_put(f6i->fib6_metrics);
176181
kfree(f6i);
177182
}
@@ -927,6 +932,9 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
927932

928933
fib6_drop_pcpu_from(rt, table);
929934

935+
if (rt->nh && !list_empty(&rt->nh_list))
936+
list_del_init(&rt->nh_list);
937+
930938
if (refcount_read(&rt->fib6_ref) != 1) {
931939
/* This route is used as dummy address holder in some split
932940
* nodes. It is not leaked, but it still holds other resources,
@@ -1334,6 +1342,8 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
13341342

13351343
err = fib6_add_rt2node(fn, rt, info, extack);
13361344
if (!err) {
1345+
if (rt->nh)
1346+
list_add(&rt->nh_list, &rt->nh->f6i_list);
13371347
__fib6_update_sernum_upto_root(rt, sernum);
13381348
fib6_start_gc(info->nl_net, rt);
13391349
}
@@ -2295,24 +2305,28 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v)
22952305
{
22962306
struct fib6_info *rt = v;
22972307
struct ipv6_route_iter *iter = seq->private;
2308+
struct fib6_nh *fib6_nh = rt->fib6_nh;
22982309
unsigned int flags = rt->fib6_flags;
22992310
const struct net_device *dev;
23002311

2312+
if (rt->nh)
2313+
fib6_nh = nexthop_fib6_nh(rt->nh);
2314+
23012315
seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);
23022316

23032317
#ifdef CONFIG_IPV6_SUBTREES
23042318
seq_printf(seq, "%pi6 %02x ", &rt->fib6_src.addr, rt->fib6_src.plen);
23052319
#else
23062320
seq_puts(seq, "00000000000000000000000000000000 00 ");
23072321
#endif
2308-
if (rt->fib6_nh->fib_nh_gw_family) {
2322+
if (fib6_nh->fib_nh_gw_family) {
23092323
flags |= RTF_GATEWAY;
2310-
seq_printf(seq, "%pi6", &rt->fib6_nh->fib_nh_gw6);
2324+
seq_printf(seq, "%pi6", &fib6_nh->fib_nh_gw6);
23112325
} else {
23122326
seq_puts(seq, "00000000000000000000000000000000");
23132327
}
23142328

2315-
dev = rt->fib6_nh->fib_nh_dev;
2329+
dev = fib6_nh->fib_nh_dev;
23162330
seq_printf(seq, " %08x %08x %08x %08x %8s\n",
23172331
rt->fib6_metric, refcount_read(&rt->fib6_ref), 0,
23182332
flags, dev ? dev->name : "");

net/ipv6/ndisc.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1289,9 +1289,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
12891289
!in6_dev->cnf.accept_ra_rtr_pref)
12901290
pref = ICMPV6_ROUTER_PREF_MEDIUM;
12911291
#endif
1292-
1292+
/* routes added from RAs do not use nexthop objects */
12931293
rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev);
1294-
12951294
if (rt) {
12961295
neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6,
12971296
rt->fib6_nh->fib_nh_dev, NULL,

0 commit comments

Comments
 (0)