Skip to content

Commit 597cfe4

Browse files
dsaherndavem330
authored andcommitted
nexthop: Add support for IPv4 nexthops
Add support for IPv4 nexthops. If nh_family is set to AF_INET, then NHA_GATEWAY is expected to be an IPv4 address. Register for netdev events to be notified of admin up/down changes as well as deletes. A hash table is used to track nexthop per devices to quickly convert device events to the affected nexthops. Signed-off-by: David Ahern <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent ab84be7 commit 597cfe4

File tree

2 files changed

+213
-0
lines changed

2 files changed

+213
-0
lines changed

include/net/nexthop.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ struct nh_config {
2929
int nh_ifindex;
3030
struct net_device *dev;
3131

32+
union {
33+
__be32 ipv4;
34+
} gw;
35+
3236
u32 nlflags;
3337
struct nl_info nlinfo;
3438
};
@@ -42,6 +46,7 @@ struct nh_info {
4246

4347
union {
4448
struct fib_nh_common fib_nhc;
49+
struct fib_nh fib_nh;
4550
};
4651
};
4752

net/ipv4/nexthop.c

Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,12 @@
99
#include <linux/rtnetlink.h>
1010
#include <linux/slab.h>
1111
#include <net/nexthop.h>
12+
#include <net/route.h>
1213
#include <net/sock.h>
1314

15+
#define NH_DEV_HASHBITS 8
16+
#define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS)
17+
1418
static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = {
1519
[NHA_UNSPEC] = { .strict_start_type = NHA_UNSPEC + 1 },
1620
[NHA_ID] = { .type = NLA_U32 },
@@ -25,12 +29,39 @@ static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = {
2529
[NHA_MASTER] = { .type = NLA_U32 },
2630
};
2731

32+
static unsigned int nh_dev_hashfn(unsigned int val)
33+
{
34+
unsigned int mask = NH_DEV_HASHSIZE - 1;
35+
36+
return (val ^
37+
(val >> NH_DEV_HASHBITS) ^
38+
(val >> (NH_DEV_HASHBITS * 2))) & mask;
39+
}
40+
41+
static void nexthop_devhash_add(struct net *net, struct nh_info *nhi)
42+
{
43+
struct net_device *dev = nhi->fib_nhc.nhc_dev;
44+
struct hlist_head *head;
45+
unsigned int hash;
46+
47+
WARN_ON(!dev);
48+
49+
hash = nh_dev_hashfn(dev->ifindex);
50+
head = &net->nexthop.devhash[hash];
51+
hlist_add_head(&nhi->dev_hash, head);
52+
}
53+
2854
void nexthop_free_rcu(struct rcu_head *head)
2955
{
3056
struct nexthop *nh = container_of(head, struct nexthop, rcu);
3157
struct nh_info *nhi;
3258

3359
nhi = rcu_dereference_raw(nh->nh_info);
60+
switch (nhi->family) {
61+
case AF_INET:
62+
fib_nh_release(nh->net, &nhi->fib_nh);
63+
break;
64+
}
3465
kfree(nhi);
3566

3667
kfree(nh);
@@ -96,6 +127,7 @@ static u32 nh_find_unused_id(struct net *net)
96127
static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
97128
int event, u32 portid, u32 seq, unsigned int nlflags)
98129
{
130+
struct fib_nh *fib_nh;
99131
struct nlmsghdr *nlh;
100132
struct nh_info *nhi;
101133
struct nhmsg *nhm;
@@ -120,6 +152,22 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
120152
if (nla_put_flag(skb, NHA_BLACKHOLE))
121153
goto nla_put_failure;
122154
goto out;
155+
} else {
156+
const struct net_device *dev;
157+
158+
dev = nhi->fib_nhc.nhc_dev;
159+
if (dev && nla_put_u32(skb, NHA_OIF, dev->ifindex))
160+
goto nla_put_failure;
161+
}
162+
163+
nhm->nh_scope = nhi->fib_nhc.nhc_scope;
164+
switch (nhi->family) {
165+
case AF_INET:
166+
fib_nh = &nhi->fib_nh;
167+
if (fib_nh->fib_nh_gw_family &&
168+
nla_put_u32(skb, NHA_GATEWAY, fib_nh->fib_nh_gw4))
169+
goto nla_put_failure;
170+
break;
123171
}
124172

125173
out:
@@ -132,13 +180,21 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
132180

133181
static size_t nh_nlmsg_size(struct nexthop *nh)
134182
{
183+
struct nh_info *nhi = rtnl_dereference(nh->nh_info);
135184
size_t sz = nla_total_size(4); /* NHA_ID */
136185

137186
/* covers NHA_BLACKHOLE since NHA_OIF and BLACKHOLE
138187
* are mutually exclusive
139188
*/
140189
sz += nla_total_size(4); /* NHA_OIF */
141190

191+
switch (nhi->family) {
192+
case AF_INET:
193+
if (nhi->fib_nh.fib_nh_gw_family)
194+
sz += nla_total_size(4); /* NHA_GATEWAY */
195+
break;
196+
}
197+
142198
return sz;
143199
}
144200

@@ -169,6 +225,15 @@ static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info)
169225
rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err);
170226
}
171227

228+
static void __remove_nexthop(struct net *net, struct nexthop *nh)
229+
{
230+
struct nh_info *nhi;
231+
232+
nhi = rtnl_dereference(nh->nh_info);
233+
if (nhi->fib_nhc.nhc_dev)
234+
hlist_del(&nhi->dev_hash);
235+
}
236+
172237
static void remove_nexthop(struct net *net, struct nexthop *nh,
173238
bool skip_fib, struct nl_info *nlinfo)
174239
{
@@ -178,6 +243,7 @@ static void remove_nexthop(struct net *net, struct nexthop *nh,
178243
if (nlinfo)
179244
nexthop_notify(RTM_DELNEXTHOP, nh, nlinfo);
180245

246+
__remove_nexthop(net, nh);
181247
nh_base_seq_inc(net);
182248

183249
nexthop_put(nh);
@@ -244,6 +310,24 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh,
244310
return rc;
245311
}
246312

313+
/* rtnl */
314+
/* remove all nexthops tied to a device being deleted */
315+
static void nexthop_flush_dev(struct net_device *dev)
316+
{
317+
unsigned int hash = nh_dev_hashfn(dev->ifindex);
318+
struct net *net = dev_net(dev);
319+
struct hlist_head *head = &net->nexthop.devhash[hash];
320+
struct hlist_node *n;
321+
struct nh_info *nhi;
322+
323+
hlist_for_each_entry_safe(nhi, n, head, dev_hash) {
324+
if (nhi->fib_nhc.nhc_dev != dev)
325+
continue;
326+
327+
remove_nexthop(net, nhi->nh_parent, false, NULL);
328+
}
329+
}
330+
247331
/* rtnl; called when net namespace is deleted */
248332
static void flush_all_nexthops(struct net *net)
249333
{
@@ -258,6 +342,38 @@ static void flush_all_nexthops(struct net *net)
258342
}
259343
}
260344

345+
static int nh_create_ipv4(struct net *net, struct nexthop *nh,
346+
struct nh_info *nhi, struct nh_config *cfg,
347+
struct netlink_ext_ack *extack)
348+
{
349+
struct fib_nh *fib_nh = &nhi->fib_nh;
350+
struct fib_config fib_cfg = {
351+
.fc_oif = cfg->nh_ifindex,
352+
.fc_gw4 = cfg->gw.ipv4,
353+
.fc_gw_family = cfg->gw.ipv4 ? AF_INET : 0,
354+
.fc_flags = cfg->nh_flags,
355+
};
356+
u32 tb_id = l3mdev_fib_table(cfg->dev);
357+
int err = -EINVAL;
358+
359+
err = fib_nh_init(net, fib_nh, &fib_cfg, 1, extack);
360+
if (err) {
361+
fib_nh_release(net, fib_nh);
362+
goto out;
363+
}
364+
365+
/* sets nh_dev if successful */
366+
err = fib_check_nh(net, fib_nh, tb_id, 0, extack);
367+
if (!err) {
368+
nh->nh_flags = fib_nh->fib_nh_flags;
369+
fib_info_update_nh_saddr(net, fib_nh, fib_nh->fib_nh_scope);
370+
} else {
371+
fib_nh_release(net, fib_nh);
372+
}
373+
out:
374+
return err;
375+
}
376+
261377
static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg,
262378
struct netlink_ext_ack *extack)
263379
{
@@ -287,12 +403,21 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg,
287403
cfg->nh_ifindex = net->loopback_dev->ifindex;
288404
}
289405

406+
switch (cfg->nh_family) {
407+
case AF_INET:
408+
err = nh_create_ipv4(net, nh, nhi, cfg, extack);
409+
break;
410+
}
411+
290412
if (err) {
291413
kfree(nhi);
292414
kfree(nh);
293415
return ERR_PTR(err);
294416
}
295417

418+
/* add the entry to the device based hash */
419+
nexthop_devhash_add(net, nhi);
420+
296421
rcu_assign_pointer(nh->nh_info, nhi);
297422

298423
return nh;
@@ -329,6 +454,7 @@ static struct nexthop *nexthop_add(struct net *net, struct nh_config *cfg,
329454

330455
err = insert_nexthop(net, nh, cfg, extack);
331456
if (err) {
457+
__remove_nexthop(net, nh);
332458
nexthop_put(nh);
333459
nh = ERR_PTR(err);
334460
}
@@ -360,6 +486,8 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
360486
}
361487

362488
switch (nhm->nh_family) {
489+
case AF_INET:
490+
break;
363491
default:
364492
NL_SET_ERR_MSG(extack, "Invalid address family");
365493
goto out;
@@ -416,6 +544,32 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
416544
goto out;
417545
}
418546

547+
err = -EINVAL;
548+
if (tb[NHA_GATEWAY]) {
549+
struct nlattr *gwa = tb[NHA_GATEWAY];
550+
551+
switch (cfg->nh_family) {
552+
case AF_INET:
553+
if (nla_len(gwa) != sizeof(u32)) {
554+
NL_SET_ERR_MSG(extack, "Invalid gateway");
555+
goto out;
556+
}
557+
cfg->gw.ipv4 = nla_get_be32(gwa);
558+
break;
559+
default:
560+
NL_SET_ERR_MSG(extack,
561+
"Unknown address family for gateway");
562+
goto out;
563+
}
564+
} else {
565+
/* device only nexthop (no gateway) */
566+
if (cfg->nh_flags & RTNH_F_ONLINK) {
567+
NL_SET_ERR_MSG(extack,
568+
"ONLINK flag can not be set for nexthop without a gateway");
569+
goto out;
570+
}
571+
}
572+
419573
err = 0;
420574
out:
421575
return err;
@@ -683,16 +837,68 @@ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
683837
return err;
684838
}
685839

840+
static void nexthop_sync_mtu(struct net_device *dev, u32 orig_mtu)
841+
{
842+
unsigned int hash = nh_dev_hashfn(dev->ifindex);
843+
struct net *net = dev_net(dev);
844+
struct hlist_head *head = &net->nexthop.devhash[hash];
845+
struct hlist_node *n;
846+
struct nh_info *nhi;
847+
848+
hlist_for_each_entry_safe(nhi, n, head, dev_hash) {
849+
if (nhi->fib_nhc.nhc_dev == dev) {
850+
if (nhi->family == AF_INET)
851+
fib_nhc_update_mtu(&nhi->fib_nhc, dev->mtu,
852+
orig_mtu);
853+
}
854+
}
855+
}
856+
857+
/* rtnl */
858+
static int nh_netdev_event(struct notifier_block *this,
859+
unsigned long event, void *ptr)
860+
{
861+
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
862+
struct netdev_notifier_info_ext *info_ext;
863+
864+
switch (event) {
865+
case NETDEV_DOWN:
866+
case NETDEV_UNREGISTER:
867+
nexthop_flush_dev(dev);
868+
break;
869+
case NETDEV_CHANGE:
870+
if (!(dev_get_flags(dev) & (IFF_RUNNING | IFF_LOWER_UP)))
871+
nexthop_flush_dev(dev);
872+
break;
873+
case NETDEV_CHANGEMTU:
874+
info_ext = ptr;
875+
nexthop_sync_mtu(dev, info_ext->ext.mtu);
876+
rt_cache_flush(dev_net(dev));
877+
break;
878+
}
879+
return NOTIFY_DONE;
880+
}
881+
882+
static struct notifier_block nh_netdev_notifier = {
883+
.notifier_call = nh_netdev_event,
884+
};
885+
686886
static void __net_exit nexthop_net_exit(struct net *net)
687887
{
688888
rtnl_lock();
689889
flush_all_nexthops(net);
690890
rtnl_unlock();
891+
kfree(net->nexthop.devhash);
691892
}
692893

693894
static int __net_init nexthop_net_init(struct net *net)
694895
{
896+
size_t sz = sizeof(struct hlist_head) * NH_DEV_HASHSIZE;
897+
695898
net->nexthop.rb_root = RB_ROOT;
899+
net->nexthop.devhash = kzalloc(sz, GFP_KERNEL);
900+
if (!net->nexthop.devhash)
901+
return -ENOMEM;
696902

697903
return 0;
698904
}
@@ -706,6 +912,8 @@ static int __init nexthop_init(void)
706912
{
707913
register_pernet_subsys(&nexthop_net_ops);
708914

915+
register_netdevice_notifier(&nh_netdev_notifier);
916+
709917
rtnl_register(PF_UNSPEC, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
710918
rtnl_register(PF_UNSPEC, RTM_DELNEXTHOP, rtm_del_nexthop, NULL, 0);
711919
rtnl_register(PF_UNSPEC, RTM_GETNEXTHOP, rtm_get_nexthop,

0 commit comments

Comments
 (0)