9
9
#include <linux/rtnetlink.h>
10
10
#include <linux/slab.h>
11
11
#include <net/nexthop.h>
12
+ #include <net/route.h>
12
13
#include <net/sock.h>
13
14
15
+ #define NH_DEV_HASHBITS 8
16
+ #define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS)
17
+
14
18
static const struct nla_policy rtm_nh_policy [NHA_MAX + 1 ] = {
15
19
[NHA_UNSPEC ] = { .strict_start_type = NHA_UNSPEC + 1 },
16
20
[NHA_ID ] = { .type = NLA_U32 },
@@ -25,12 +29,39 @@ static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = {
25
29
[NHA_MASTER ] = { .type = NLA_U32 },
26
30
};
27
31
32
+ static unsigned int nh_dev_hashfn (unsigned int val )
33
+ {
34
+ unsigned int mask = NH_DEV_HASHSIZE - 1 ;
35
+
36
+ return (val ^
37
+ (val >> NH_DEV_HASHBITS ) ^
38
+ (val >> (NH_DEV_HASHBITS * 2 ))) & mask ;
39
+ }
40
+
41
+ static void nexthop_devhash_add (struct net * net , struct nh_info * nhi )
42
+ {
43
+ struct net_device * dev = nhi -> fib_nhc .nhc_dev ;
44
+ struct hlist_head * head ;
45
+ unsigned int hash ;
46
+
47
+ WARN_ON (!dev );
48
+
49
+ hash = nh_dev_hashfn (dev -> ifindex );
50
+ head = & net -> nexthop .devhash [hash ];
51
+ hlist_add_head (& nhi -> dev_hash , head );
52
+ }
53
+
28
54
void nexthop_free_rcu (struct rcu_head * head )
29
55
{
30
56
struct nexthop * nh = container_of (head , struct nexthop , rcu );
31
57
struct nh_info * nhi ;
32
58
33
59
nhi = rcu_dereference_raw (nh -> nh_info );
60
+ switch (nhi -> family ) {
61
+ case AF_INET :
62
+ fib_nh_release (nh -> net , & nhi -> fib_nh );
63
+ break ;
64
+ }
34
65
kfree (nhi );
35
66
36
67
kfree (nh );
@@ -96,6 +127,7 @@ static u32 nh_find_unused_id(struct net *net)
96
127
static int nh_fill_node (struct sk_buff * skb , struct nexthop * nh ,
97
128
int event , u32 portid , u32 seq , unsigned int nlflags )
98
129
{
130
+ struct fib_nh * fib_nh ;
99
131
struct nlmsghdr * nlh ;
100
132
struct nh_info * nhi ;
101
133
struct nhmsg * nhm ;
@@ -120,6 +152,22 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
120
152
if (nla_put_flag (skb , NHA_BLACKHOLE ))
121
153
goto nla_put_failure ;
122
154
goto out ;
155
+ } else {
156
+ const struct net_device * dev ;
157
+
158
+ dev = nhi -> fib_nhc .nhc_dev ;
159
+ if (dev && nla_put_u32 (skb , NHA_OIF , dev -> ifindex ))
160
+ goto nla_put_failure ;
161
+ }
162
+
163
+ nhm -> nh_scope = nhi -> fib_nhc .nhc_scope ;
164
+ switch (nhi -> family ) {
165
+ case AF_INET :
166
+ fib_nh = & nhi -> fib_nh ;
167
+ if (fib_nh -> fib_nh_gw_family &&
168
+ nla_put_u32 (skb , NHA_GATEWAY , fib_nh -> fib_nh_gw4 ))
169
+ goto nla_put_failure ;
170
+ break ;
123
171
}
124
172
125
173
out :
@@ -132,13 +180,21 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
132
180
133
181
static size_t nh_nlmsg_size (struct nexthop * nh )
134
182
{
183
+ struct nh_info * nhi = rtnl_dereference (nh -> nh_info );
135
184
size_t sz = nla_total_size (4 ); /* NHA_ID */
136
185
137
186
/* covers NHA_BLACKHOLE since NHA_OIF and BLACKHOLE
138
187
* are mutually exclusive
139
188
*/
140
189
sz += nla_total_size (4 ); /* NHA_OIF */
141
190
191
+ switch (nhi -> family ) {
192
+ case AF_INET :
193
+ if (nhi -> fib_nh .fib_nh_gw_family )
194
+ sz += nla_total_size (4 ); /* NHA_GATEWAY */
195
+ break ;
196
+ }
197
+
142
198
return sz ;
143
199
}
144
200
@@ -169,6 +225,15 @@ static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info)
169
225
rtnl_set_sk_err (info -> nl_net , RTNLGRP_NEXTHOP , err );
170
226
}
171
227
228
+ static void __remove_nexthop (struct net * net , struct nexthop * nh )
229
+ {
230
+ struct nh_info * nhi ;
231
+
232
+ nhi = rtnl_dereference (nh -> nh_info );
233
+ if (nhi -> fib_nhc .nhc_dev )
234
+ hlist_del (& nhi -> dev_hash );
235
+ }
236
+
172
237
static void remove_nexthop (struct net * net , struct nexthop * nh ,
173
238
bool skip_fib , struct nl_info * nlinfo )
174
239
{
@@ -178,6 +243,7 @@ static void remove_nexthop(struct net *net, struct nexthop *nh,
178
243
if (nlinfo )
179
244
nexthop_notify (RTM_DELNEXTHOP , nh , nlinfo );
180
245
246
+ __remove_nexthop (net , nh );
181
247
nh_base_seq_inc (net );
182
248
183
249
nexthop_put (nh );
@@ -244,6 +310,24 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh,
244
310
return rc ;
245
311
}
246
312
313
+ /* rtnl */
314
+ /* remove all nexthops tied to a device being deleted */
315
+ static void nexthop_flush_dev (struct net_device * dev )
316
+ {
317
+ unsigned int hash = nh_dev_hashfn (dev -> ifindex );
318
+ struct net * net = dev_net (dev );
319
+ struct hlist_head * head = & net -> nexthop .devhash [hash ];
320
+ struct hlist_node * n ;
321
+ struct nh_info * nhi ;
322
+
323
+ hlist_for_each_entry_safe (nhi , n , head , dev_hash ) {
324
+ if (nhi -> fib_nhc .nhc_dev != dev )
325
+ continue ;
326
+
327
+ remove_nexthop (net , nhi -> nh_parent , false, NULL );
328
+ }
329
+ }
330
+
247
331
/* rtnl; called when net namespace is deleted */
248
332
static void flush_all_nexthops (struct net * net )
249
333
{
@@ -258,6 +342,38 @@ static void flush_all_nexthops(struct net *net)
258
342
}
259
343
}
260
344
345
+ static int nh_create_ipv4 (struct net * net , struct nexthop * nh ,
346
+ struct nh_info * nhi , struct nh_config * cfg ,
347
+ struct netlink_ext_ack * extack )
348
+ {
349
+ struct fib_nh * fib_nh = & nhi -> fib_nh ;
350
+ struct fib_config fib_cfg = {
351
+ .fc_oif = cfg -> nh_ifindex ,
352
+ .fc_gw4 = cfg -> gw .ipv4 ,
353
+ .fc_gw_family = cfg -> gw .ipv4 ? AF_INET : 0 ,
354
+ .fc_flags = cfg -> nh_flags ,
355
+ };
356
+ u32 tb_id = l3mdev_fib_table (cfg -> dev );
357
+ int err = - EINVAL ;
358
+
359
+ err = fib_nh_init (net , fib_nh , & fib_cfg , 1 , extack );
360
+ if (err ) {
361
+ fib_nh_release (net , fib_nh );
362
+ goto out ;
363
+ }
364
+
365
+ /* sets nh_dev if successful */
366
+ err = fib_check_nh (net , fib_nh , tb_id , 0 , extack );
367
+ if (!err ) {
368
+ nh -> nh_flags = fib_nh -> fib_nh_flags ;
369
+ fib_info_update_nh_saddr (net , fib_nh , fib_nh -> fib_nh_scope );
370
+ } else {
371
+ fib_nh_release (net , fib_nh );
372
+ }
373
+ out :
374
+ return err ;
375
+ }
376
+
261
377
static struct nexthop * nexthop_create (struct net * net , struct nh_config * cfg ,
262
378
struct netlink_ext_ack * extack )
263
379
{
@@ -287,12 +403,21 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg,
287
403
cfg -> nh_ifindex = net -> loopback_dev -> ifindex ;
288
404
}
289
405
406
+ switch (cfg -> nh_family ) {
407
+ case AF_INET :
408
+ err = nh_create_ipv4 (net , nh , nhi , cfg , extack );
409
+ break ;
410
+ }
411
+
290
412
if (err ) {
291
413
kfree (nhi );
292
414
kfree (nh );
293
415
return ERR_PTR (err );
294
416
}
295
417
418
+ /* add the entry to the device based hash */
419
+ nexthop_devhash_add (net , nhi );
420
+
296
421
rcu_assign_pointer (nh -> nh_info , nhi );
297
422
298
423
return nh ;
@@ -329,6 +454,7 @@ static struct nexthop *nexthop_add(struct net *net, struct nh_config *cfg,
329
454
330
455
err = insert_nexthop (net , nh , cfg , extack );
331
456
if (err ) {
457
+ __remove_nexthop (net , nh );
332
458
nexthop_put (nh );
333
459
nh = ERR_PTR (err );
334
460
}
@@ -360,6 +486,8 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
360
486
}
361
487
362
488
switch (nhm -> nh_family ) {
489
+ case AF_INET :
490
+ break ;
363
491
default :
364
492
NL_SET_ERR_MSG (extack , "Invalid address family" );
365
493
goto out ;
@@ -416,6 +544,32 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
416
544
goto out ;
417
545
}
418
546
547
+ err = - EINVAL ;
548
+ if (tb [NHA_GATEWAY ]) {
549
+ struct nlattr * gwa = tb [NHA_GATEWAY ];
550
+
551
+ switch (cfg -> nh_family ) {
552
+ case AF_INET :
553
+ if (nla_len (gwa ) != sizeof (u32 )) {
554
+ NL_SET_ERR_MSG (extack , "Invalid gateway" );
555
+ goto out ;
556
+ }
557
+ cfg -> gw .ipv4 = nla_get_be32 (gwa );
558
+ break ;
559
+ default :
560
+ NL_SET_ERR_MSG (extack ,
561
+ "Unknown address family for gateway" );
562
+ goto out ;
563
+ }
564
+ } else {
565
+ /* device only nexthop (no gateway) */
566
+ if (cfg -> nh_flags & RTNH_F_ONLINK ) {
567
+ NL_SET_ERR_MSG (extack ,
568
+ "ONLINK flag can not be set for nexthop without a gateway" );
569
+ goto out ;
570
+ }
571
+ }
572
+
419
573
err = 0 ;
420
574
out :
421
575
return err ;
@@ -683,16 +837,68 @@ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
683
837
return err ;
684
838
}
685
839
840
+ static void nexthop_sync_mtu (struct net_device * dev , u32 orig_mtu )
841
+ {
842
+ unsigned int hash = nh_dev_hashfn (dev -> ifindex );
843
+ struct net * net = dev_net (dev );
844
+ struct hlist_head * head = & net -> nexthop .devhash [hash ];
845
+ struct hlist_node * n ;
846
+ struct nh_info * nhi ;
847
+
848
+ hlist_for_each_entry_safe (nhi , n , head , dev_hash ) {
849
+ if (nhi -> fib_nhc .nhc_dev == dev ) {
850
+ if (nhi -> family == AF_INET )
851
+ fib_nhc_update_mtu (& nhi -> fib_nhc , dev -> mtu ,
852
+ orig_mtu );
853
+ }
854
+ }
855
+ }
856
+
857
+ /* rtnl */
858
+ static int nh_netdev_event (struct notifier_block * this ,
859
+ unsigned long event , void * ptr )
860
+ {
861
+ struct net_device * dev = netdev_notifier_info_to_dev (ptr );
862
+ struct netdev_notifier_info_ext * info_ext ;
863
+
864
+ switch (event ) {
865
+ case NETDEV_DOWN :
866
+ case NETDEV_UNREGISTER :
867
+ nexthop_flush_dev (dev );
868
+ break ;
869
+ case NETDEV_CHANGE :
870
+ if (!(dev_get_flags (dev ) & (IFF_RUNNING | IFF_LOWER_UP )))
871
+ nexthop_flush_dev (dev );
872
+ break ;
873
+ case NETDEV_CHANGEMTU :
874
+ info_ext = ptr ;
875
+ nexthop_sync_mtu (dev , info_ext -> ext .mtu );
876
+ rt_cache_flush (dev_net (dev ));
877
+ break ;
878
+ }
879
+ return NOTIFY_DONE ;
880
+ }
881
+
882
+ static struct notifier_block nh_netdev_notifier = {
883
+ .notifier_call = nh_netdev_event ,
884
+ };
885
+
686
886
static void __net_exit nexthop_net_exit (struct net * net )
687
887
{
688
888
rtnl_lock ();
689
889
flush_all_nexthops (net );
690
890
rtnl_unlock ();
891
+ kfree (net -> nexthop .devhash );
691
892
}
692
893
693
894
static int __net_init nexthop_net_init (struct net * net )
694
895
{
896
+ size_t sz = sizeof (struct hlist_head ) * NH_DEV_HASHSIZE ;
897
+
695
898
net -> nexthop .rb_root = RB_ROOT ;
899
+ net -> nexthop .devhash = kzalloc (sz , GFP_KERNEL );
900
+ if (!net -> nexthop .devhash )
901
+ return - ENOMEM ;
696
902
697
903
return 0 ;
698
904
}
@@ -706,6 +912,8 @@ static int __init nexthop_init(void)
706
912
{
707
913
register_pernet_subsys (& nexthop_net_ops );
708
914
915
+ register_netdevice_notifier (& nh_netdev_notifier );
916
+
709
917
rtnl_register (PF_UNSPEC , RTM_NEWNEXTHOP , rtm_new_nexthop , NULL , 0 );
710
918
rtnl_register (PF_UNSPEC , RTM_DELNEXTHOP , rtm_del_nexthop , NULL , 0 );
711
919
rtnl_register (PF_UNSPEC , RTM_GETNEXTHOP , rtm_get_nexthop ,
0 commit comments