@@ -69,6 +69,28 @@ struct bpf_lwt_prog {
69
69
char * name ;
70
70
};
71
71
72
+ enum seg6_end_dt_mode {
73
+ DT_INVALID_MODE = - EINVAL ,
74
+ DT_LEGACY_MODE = 0 ,
75
+ DT_VRF_MODE = 1 ,
76
+ };
77
+
78
+ struct seg6_end_dt_info {
79
+ enum seg6_end_dt_mode mode ;
80
+
81
+ struct net * net ;
82
+ /* VRF device associated to the routing table used by the SRv6
83
+ * End.DT4/DT6 behavior for routing IPv4/IPv6 packets.
84
+ */
85
+ int vrf_ifindex ;
86
+ int vrf_table ;
87
+
88
+ /* tunneled packet proto and family (IPv4 or IPv6) */
89
+ __be16 proto ;
90
+ u16 family ;
91
+ int hdrlen ;
92
+ };
93
+
72
94
struct seg6_local_lwt {
73
95
int action ;
74
96
struct ipv6_sr_hdr * srh ;
@@ -78,6 +100,9 @@ struct seg6_local_lwt {
78
100
int iif ;
79
101
int oif ;
80
102
struct bpf_lwt_prog bpf ;
103
+ #ifdef CONFIG_NET_L3_MASTER_DEV
104
+ struct seg6_end_dt_info dt_info ;
105
+ #endif
81
106
82
107
int headroom ;
83
108
struct seg6_action_desc * desc ;
@@ -429,6 +454,203 @@ static int input_action_end_dx4(struct sk_buff *skb,
429
454
return - EINVAL ;
430
455
}
431
456
457
+ #ifdef CONFIG_NET_L3_MASTER_DEV
458
+ static struct net * fib6_config_get_net (const struct fib6_config * fib6_cfg )
459
+ {
460
+ const struct nl_info * nli = & fib6_cfg -> fc_nlinfo ;
461
+
462
+ return nli -> nl_net ;
463
+ }
464
+
465
+ static int __seg6_end_dt_vrf_build (struct seg6_local_lwt * slwt , const void * cfg ,
466
+ u16 family , struct netlink_ext_ack * extack )
467
+ {
468
+ struct seg6_end_dt_info * info = & slwt -> dt_info ;
469
+ int vrf_ifindex ;
470
+ struct net * net ;
471
+
472
+ net = fib6_config_get_net (cfg );
473
+
474
+ /* note that vrf_table was already set by parse_nla_vrftable() */
475
+ vrf_ifindex = l3mdev_ifindex_lookup_by_table_id (L3MDEV_TYPE_VRF , net ,
476
+ info -> vrf_table );
477
+ if (vrf_ifindex < 0 ) {
478
+ if (vrf_ifindex == - EPERM ) {
479
+ NL_SET_ERR_MSG (extack ,
480
+ "Strict mode for VRF is disabled" );
481
+ } else if (vrf_ifindex == - ENODEV ) {
482
+ NL_SET_ERR_MSG (extack ,
483
+ "Table has no associated VRF device" );
484
+ } else {
485
+ pr_debug ("seg6local: SRv6 End.DT* creation error=%d\n" ,
486
+ vrf_ifindex );
487
+ }
488
+
489
+ return vrf_ifindex ;
490
+ }
491
+
492
+ info -> net = net ;
493
+ info -> vrf_ifindex = vrf_ifindex ;
494
+
495
+ switch (family ) {
496
+ case AF_INET :
497
+ info -> proto = htons (ETH_P_IP );
498
+ info -> hdrlen = sizeof (struct iphdr );
499
+ break ;
500
+ default :
501
+ return - EINVAL ;
502
+ }
503
+
504
+ info -> family = family ;
505
+ info -> mode = DT_VRF_MODE ;
506
+
507
+ return 0 ;
508
+ }
509
+
510
+ /* The SRv6 End.DT4/DT6 behavior extracts the inner (IPv4/IPv6) packet and
511
+ * routes the IPv4/IPv6 packet by looking at the configured routing table.
512
+ *
513
+ * In the SRv6 End.DT4/DT6 use case, we can receive traffic (IPv6+Segment
514
+ * Routing Header packets) from several interfaces and the outer IPv6
515
+ * destination address (DA) is used for retrieving the specific instance of the
516
+ * End.DT4/DT6 behavior that should process the packets.
517
+ *
518
+ * However, the inner IPv4/IPv6 packet is not really bound to any receiving
519
+ * interface and thus the End.DT4/DT6 sets the VRF (associated with the
520
+ * corresponding routing table) as the *receiving* interface.
521
+ * In other words, the End.DT4/DT6 processes a packet as if it has been received
522
+ * directly by the VRF (and not by one of its slave devices, if any).
523
+ * In this way, the VRF interface is used for routing the IPv4/IPv6 packet in
524
+ * according to the routing table configured by the End.DT4/DT6 instance.
525
+ *
526
+ * This design allows you to get some interesting features like:
527
+ * 1) the statistics on rx packets;
528
+ * 2) the possibility to install a packet sniffer on the receiving interface
529
+ * (the VRF one) for looking at the incoming packets;
530
+ * 3) the possibility to leverage the netfilter prerouting hook for the inner
531
+ * IPv4 packet.
532
+ *
533
+ * This function returns:
534
+ * - the sk_buff* when the VRF rcv handler has processed the packet correctly;
535
+ * - NULL when the skb is consumed by the VRF rcv handler;
536
+ * - a pointer which encodes a negative error number in case of error.
537
+ * Note that in this case, the function takes care of freeing the skb.
538
+ */
539
+ static struct sk_buff * end_dt_vrf_rcv (struct sk_buff * skb , u16 family ,
540
+ struct net_device * dev )
541
+ {
542
+ /* based on l3mdev_ip_rcv; we are only interested in the master */
543
+ if (unlikely (!netif_is_l3_master (dev ) && !netif_has_l3_rx_handler (dev )))
544
+ goto drop ;
545
+
546
+ if (unlikely (!dev -> l3mdev_ops -> l3mdev_l3_rcv ))
547
+ goto drop ;
548
+
549
+ /* the decap packet IPv4/IPv6 does not come with any mac header info.
550
+ * We must unset the mac header to allow the VRF device to rebuild it,
551
+ * just in case there is a sniffer attached on the device.
552
+ */
553
+ skb_unset_mac_header (skb );
554
+
555
+ skb = dev -> l3mdev_ops -> l3mdev_l3_rcv (dev , skb , family );
556
+ if (!skb )
557
+ /* the skb buffer was consumed by the handler */
558
+ return NULL ;
559
+
560
+ /* when a packet is received by a VRF or by one of its slaves, the
561
+ * master device reference is set into the skb.
562
+ */
563
+ if (unlikely (skb -> dev != dev || skb -> skb_iif != dev -> ifindex ))
564
+ goto drop ;
565
+
566
+ return skb ;
567
+
568
+ drop :
569
+ kfree_skb (skb );
570
+ return ERR_PTR (- EINVAL );
571
+ }
572
+
573
+ static struct net_device * end_dt_get_vrf_rcu (struct sk_buff * skb ,
574
+ struct seg6_end_dt_info * info )
575
+ {
576
+ int vrf_ifindex = info -> vrf_ifindex ;
577
+ struct net * net = info -> net ;
578
+
579
+ if (unlikely (vrf_ifindex < 0 ))
580
+ goto error ;
581
+
582
+ if (unlikely (!net_eq (dev_net (skb -> dev ), net )))
583
+ goto error ;
584
+
585
+ return dev_get_by_index_rcu (net , vrf_ifindex );
586
+
587
+ error :
588
+ return NULL ;
589
+ }
590
+
591
+ static struct sk_buff * end_dt_vrf_core (struct sk_buff * skb ,
592
+ struct seg6_local_lwt * slwt )
593
+ {
594
+ struct seg6_end_dt_info * info = & slwt -> dt_info ;
595
+ struct net_device * vrf ;
596
+
597
+ vrf = end_dt_get_vrf_rcu (skb , info );
598
+ if (unlikely (!vrf ))
599
+ goto drop ;
600
+
601
+ skb -> protocol = info -> proto ;
602
+
603
+ skb_dst_drop (skb );
604
+
605
+ skb_set_transport_header (skb , info -> hdrlen );
606
+
607
+ return end_dt_vrf_rcv (skb , info -> family , vrf );
608
+
609
+ drop :
610
+ kfree_skb (skb );
611
+ return ERR_PTR (- EINVAL );
612
+ }
613
+
614
+ static int input_action_end_dt4 (struct sk_buff * skb ,
615
+ struct seg6_local_lwt * slwt )
616
+ {
617
+ struct iphdr * iph ;
618
+ int err ;
619
+
620
+ if (!decap_and_validate (skb , IPPROTO_IPIP ))
621
+ goto drop ;
622
+
623
+ if (!pskb_may_pull (skb , sizeof (struct iphdr )))
624
+ goto drop ;
625
+
626
+ skb = end_dt_vrf_core (skb , slwt );
627
+ if (!skb )
628
+ /* packet has been processed and consumed by the VRF */
629
+ return 0 ;
630
+
631
+ if (IS_ERR (skb ))
632
+ return PTR_ERR (skb );
633
+
634
+ iph = ip_hdr (skb );
635
+
636
+ err = ip_route_input (skb , iph -> daddr , iph -> saddr , 0 , skb -> dev );
637
+ if (unlikely (err ))
638
+ goto drop ;
639
+
640
+ return dst_input (skb );
641
+
642
+ drop :
643
+ kfree_skb (skb );
644
+ return - EINVAL ;
645
+ }
646
+
647
+ static int seg6_end_dt4_build (struct seg6_local_lwt * slwt , const void * cfg ,
648
+ struct netlink_ext_ack * extack )
649
+ {
650
+ return __seg6_end_dt_vrf_build (slwt , cfg , AF_INET , extack );
651
+ }
652
+ #endif
653
+
432
654
static int input_action_end_dt6 (struct sk_buff * skb ,
433
655
struct seg6_local_lwt * slwt )
434
656
{
@@ -617,6 +839,16 @@ static struct seg6_action_desc seg6_action_table[] = {
617
839
.attrs = (1 << SEG6_LOCAL_NH4 ),
618
840
.input = input_action_end_dx4 ,
619
841
},
842
+ {
843
+ .action = SEG6_LOCAL_ACTION_END_DT4 ,
844
+ .attrs = (1 << SEG6_LOCAL_VRFTABLE ),
845
+ #ifdef CONFIG_NET_L3_MASTER_DEV
846
+ .input = input_action_end_dt4 ,
847
+ .slwt_ops = {
848
+ .build_state = seg6_end_dt4_build ,
849
+ },
850
+ #endif
851
+ },
620
852
{
621
853
.action = SEG6_LOCAL_ACTION_END_DT6 ,
622
854
.attrs = (1 << SEG6_LOCAL_TABLE ),
@@ -677,6 +909,7 @@ static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
677
909
[SEG6_LOCAL_ACTION ] = { .type = NLA_U32 },
678
910
[SEG6_LOCAL_SRH ] = { .type = NLA_BINARY },
679
911
[SEG6_LOCAL_TABLE ] = { .type = NLA_U32 },
912
+ [SEG6_LOCAL_VRFTABLE ] = { .type = NLA_U32 },
680
913
[SEG6_LOCAL_NH4 ] = { .type = NLA_BINARY ,
681
914
.len = sizeof (struct in_addr ) },
682
915
[SEG6_LOCAL_NH6 ] = { .type = NLA_BINARY ,
@@ -766,6 +999,53 @@ static int cmp_nla_table(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
766
999
return 0 ;
767
1000
}
768
1001
1002
+ static struct
1003
+ seg6_end_dt_info * seg6_possible_end_dt_info (struct seg6_local_lwt * slwt )
1004
+ {
1005
+ #ifdef CONFIG_NET_L3_MASTER_DEV
1006
+ return & slwt -> dt_info ;
1007
+ #else
1008
+ return ERR_PTR (- EOPNOTSUPP );
1009
+ #endif
1010
+ }
1011
+
1012
+ static int parse_nla_vrftable (struct nlattr * * attrs ,
1013
+ struct seg6_local_lwt * slwt )
1014
+ {
1015
+ struct seg6_end_dt_info * info = seg6_possible_end_dt_info (slwt );
1016
+
1017
+ if (IS_ERR (info ))
1018
+ return PTR_ERR (info );
1019
+
1020
+ info -> vrf_table = nla_get_u32 (attrs [SEG6_LOCAL_VRFTABLE ]);
1021
+
1022
+ return 0 ;
1023
+ }
1024
+
1025
+ static int put_nla_vrftable (struct sk_buff * skb , struct seg6_local_lwt * slwt )
1026
+ {
1027
+ struct seg6_end_dt_info * info = seg6_possible_end_dt_info (slwt );
1028
+
1029
+ if (IS_ERR (info ))
1030
+ return PTR_ERR (info );
1031
+
1032
+ if (nla_put_u32 (skb , SEG6_LOCAL_VRFTABLE , info -> vrf_table ))
1033
+ return - EMSGSIZE ;
1034
+
1035
+ return 0 ;
1036
+ }
1037
+
1038
+ static int cmp_nla_vrftable (struct seg6_local_lwt * a , struct seg6_local_lwt * b )
1039
+ {
1040
+ struct seg6_end_dt_info * info_a = seg6_possible_end_dt_info (a );
1041
+ struct seg6_end_dt_info * info_b = seg6_possible_end_dt_info (b );
1042
+
1043
+ if (info_a -> vrf_table != info_b -> vrf_table )
1044
+ return 1 ;
1045
+
1046
+ return 0 ;
1047
+ }
1048
+
769
1049
static int parse_nla_nh4 (struct nlattr * * attrs , struct seg6_local_lwt * slwt )
770
1050
{
771
1051
memcpy (& slwt -> nh4 , nla_data (attrs [SEG6_LOCAL_NH4 ]),
@@ -984,6 +1264,10 @@ static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
984
1264
.cmp = cmp_nla_bpf ,
985
1265
.destroy = destroy_attr_bpf },
986
1266
1267
+ [SEG6_LOCAL_VRFTABLE ] = { .parse = parse_nla_vrftable ,
1268
+ .put = put_nla_vrftable ,
1269
+ .cmp = cmp_nla_vrftable },
1270
+
987
1271
};
988
1272
989
1273
/* call the destroy() callback (if available) for each set attribute in
@@ -1283,6 +1567,9 @@ static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
1283
1567
nla_total_size (MAX_PROG_NAME ) +
1284
1568
nla_total_size (4 );
1285
1569
1570
+ if (attrs & (1 << SEG6_LOCAL_VRFTABLE ))
1571
+ nlsize += nla_total_size (4 );
1572
+
1286
1573
return nlsize ;
1287
1574
}
1288
1575
0 commit comments