@@ -1192,6 +1192,45 @@ static void vxlan_parse_gbp_hdr(struct vxlanhdr *unparsed,
1192
1192
unparsed -> vx_flags &= ~VXLAN_GBP_USED_BITS ;
1193
1193
}
1194
1194
1195
+ static bool vxlan_parse_gpe_hdr (struct vxlanhdr * unparsed ,
1196
+ __be32 * protocol ,
1197
+ struct sk_buff * skb , u32 vxflags )
1198
+ {
1199
+ struct vxlanhdr_gpe * gpe = (struct vxlanhdr_gpe * )unparsed ;
1200
+
1201
+ /* Need to have Next Protocol set for interfaces in GPE mode. */
1202
+ if (!gpe -> np_applied )
1203
+ return false;
1204
+ /* "The initial version is 0. If a receiver does not support the
1205
+ * version indicated it MUST drop the packet.
1206
+ */
1207
+ if (gpe -> version != 0 )
1208
+ return false;
1209
+ /* "When the O bit is set to 1, the packet is an OAM packet and OAM
1210
+ * processing MUST occur." However, we don't implement OAM
1211
+ * processing, thus drop the packet.
1212
+ */
1213
+ if (gpe -> oam_flag )
1214
+ return false;
1215
+
1216
+ switch (gpe -> next_protocol ) {
1217
+ case VXLAN_GPE_NP_IPV4 :
1218
+ * protocol = htons (ETH_P_IP );
1219
+ break ;
1220
+ case VXLAN_GPE_NP_IPV6 :
1221
+ * protocol = htons (ETH_P_IPV6 );
1222
+ break ;
1223
+ case VXLAN_GPE_NP_ETHERNET :
1224
+ * protocol = htons (ETH_P_TEB );
1225
+ break ;
1226
+ default :
1227
+ return false;
1228
+ }
1229
+
1230
+ unparsed -> vx_flags &= ~VXLAN_GPE_USED_BITS ;
1231
+ return true;
1232
+ }
1233
+
1195
1234
static bool vxlan_set_mac (struct vxlan_dev * vxlan ,
1196
1235
struct vxlan_sock * vs ,
1197
1236
struct sk_buff * skb )
@@ -1257,9 +1296,11 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
1257
1296
struct vxlanhdr unparsed ;
1258
1297
struct vxlan_metadata _md ;
1259
1298
struct vxlan_metadata * md = & _md ;
1299
+ __be32 protocol = htons (ETH_P_TEB );
1300
+ bool raw_proto = false;
1260
1301
void * oiph ;
1261
1302
1262
- /* Need Vxlan and inner Ethernet header to be present */
1303
+ /* Need UDP and VXLAN header to be present */
1263
1304
if (!pskb_may_pull (skb , VXLAN_HLEN ))
1264
1305
return 1 ;
1265
1306
@@ -1283,9 +1324,18 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
1283
1324
if (!vxlan )
1284
1325
goto drop ;
1285
1326
1286
- if (iptunnel_pull_header (skb , VXLAN_HLEN , htons (ETH_P_TEB ),
1287
- !net_eq (vxlan -> net , dev_net (vxlan -> dev ))))
1288
- goto drop ;
1327
+ /* For backwards compatibility, only allow reserved fields to be
1328
+ * used by VXLAN extensions if explicitly requested.
1329
+ */
1330
+ if (vs -> flags & VXLAN_F_GPE ) {
1331
+ if (!vxlan_parse_gpe_hdr (& unparsed , & protocol , skb , vs -> flags ))
1332
+ goto drop ;
1333
+ raw_proto = true;
1334
+ }
1335
+
1336
+ if (__iptunnel_pull_header (skb , VXLAN_HLEN , protocol , raw_proto ,
1337
+ !net_eq (vxlan -> net , dev_net (vxlan -> dev ))))
1338
+ goto drop ;
1289
1339
1290
1340
if (vxlan_collect_metadata (vs )) {
1291
1341
__be32 vni = vxlan_vni (vxlan_hdr (skb )-> vx_vni );
@@ -1304,14 +1354,14 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
1304
1354
memset (md , 0 , sizeof (* md ));
1305
1355
}
1306
1356
1307
- /* For backwards compatibility, only allow reserved fields to be
1308
- * used by VXLAN extensions if explicitly requested.
1309
- */
1310
1357
if (vs -> flags & VXLAN_F_REMCSUM_RX )
1311
1358
if (!vxlan_remcsum (& unparsed , skb , vs -> flags ))
1312
1359
goto drop ;
1313
1360
if (vs -> flags & VXLAN_F_GBP )
1314
1361
vxlan_parse_gbp_hdr (& unparsed , skb , vs -> flags , md );
1362
+ /* Note that GBP and GPE can never be active together. This is
1363
+ * ensured in vxlan_dev_configure.
1364
+ */
1315
1365
1316
1366
if (unparsed .vx_flags || unparsed .vx_vni ) {
1317
1367
/* If there are any unprocessed flags remaining treat
@@ -1325,8 +1375,13 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
1325
1375
goto drop ;
1326
1376
}
1327
1377
1328
- if (!vxlan_set_mac (vxlan , vs , skb ))
1329
- goto drop ;
1378
+ if (!raw_proto ) {
1379
+ if (!vxlan_set_mac (vxlan , vs , skb ))
1380
+ goto drop ;
1381
+ } else {
1382
+ skb -> dev = vxlan -> dev ;
1383
+ skb -> pkt_type = PACKET_HOST ;
1384
+ }
1330
1385
1331
1386
oiph = skb_network_header (skb );
1332
1387
skb_reset_network_header (skb );
@@ -1685,6 +1740,27 @@ static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags,
1685
1740
gbp -> policy_id = htons (md -> gbp & VXLAN_GBP_ID_MASK );
1686
1741
}
1687
1742
1743
+ static int vxlan_build_gpe_hdr (struct vxlanhdr * vxh , u32 vxflags ,
1744
+ __be16 protocol )
1745
+ {
1746
+ struct vxlanhdr_gpe * gpe = (struct vxlanhdr_gpe * )vxh ;
1747
+
1748
+ gpe -> np_applied = 1 ;
1749
+
1750
+ switch (protocol ) {
1751
+ case htons (ETH_P_IP ):
1752
+ gpe -> next_protocol = VXLAN_GPE_NP_IPV4 ;
1753
+ return 0 ;
1754
+ case htons (ETH_P_IPV6 ):
1755
+ gpe -> next_protocol = VXLAN_GPE_NP_IPV6 ;
1756
+ return 0 ;
1757
+ case htons (ETH_P_TEB ):
1758
+ gpe -> next_protocol = VXLAN_GPE_NP_ETHERNET ;
1759
+ return 0 ;
1760
+ }
1761
+ return - EPFNOSUPPORT ;
1762
+ }
1763
+
1688
1764
static int vxlan_build_skb (struct sk_buff * skb , struct dst_entry * dst ,
1689
1765
int iphdr_len , __be32 vni ,
1690
1766
struct vxlan_metadata * md , u32 vxflags ,
@@ -1694,6 +1770,7 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
1694
1770
int min_headroom ;
1695
1771
int err ;
1696
1772
int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL ;
1773
+ __be16 inner_protocol = htons (ETH_P_TEB );
1697
1774
1698
1775
if ((vxflags & VXLAN_F_REMCSUM_TX ) &&
1699
1776
skb -> ip_summed == CHECKSUM_PARTIAL ) {
@@ -1712,10 +1789,8 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
1712
1789
1713
1790
/* Need space for new headers (invalidates iph ptr) */
1714
1791
err = skb_cow_head (skb , min_headroom );
1715
- if (unlikely (err )) {
1716
- kfree_skb (skb );
1717
- return err ;
1718
- }
1792
+ if (unlikely (err ))
1793
+ goto out_free ;
1719
1794
1720
1795
skb = vlan_hwaccel_push_inside (skb );
1721
1796
if (WARN_ON (!skb ))
@@ -1744,9 +1819,19 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
1744
1819
1745
1820
if (vxflags & VXLAN_F_GBP )
1746
1821
vxlan_build_gbp_hdr (vxh , vxflags , md );
1822
+ if (vxflags & VXLAN_F_GPE ) {
1823
+ err = vxlan_build_gpe_hdr (vxh , vxflags , skb -> protocol );
1824
+ if (err < 0 )
1825
+ goto out_free ;
1826
+ inner_protocol = skb -> protocol ;
1827
+ }
1747
1828
1748
- skb_set_inner_protocol (skb , htons ( ETH_P_TEB ) );
1829
+ skb_set_inner_protocol (skb , inner_protocol );
1749
1830
return 0 ;
1831
+
1832
+ out_free :
1833
+ kfree_skb (skb );
1834
+ return err ;
1750
1835
}
1751
1836
1752
1837
static struct rtable * vxlan_get_route (struct vxlan_dev * vxlan ,
@@ -2421,6 +2506,17 @@ static const struct net_device_ops vxlan_netdev_ether_ops = {
2421
2506
.ndo_fill_metadata_dst = vxlan_fill_metadata_dst ,
2422
2507
};
2423
2508
2509
+ static const struct net_device_ops vxlan_netdev_raw_ops = {
2510
+ .ndo_init = vxlan_init ,
2511
+ .ndo_uninit = vxlan_uninit ,
2512
+ .ndo_open = vxlan_open ,
2513
+ .ndo_stop = vxlan_stop ,
2514
+ .ndo_start_xmit = vxlan_xmit ,
2515
+ .ndo_get_stats64 = ip_tunnel_get_stats64 ,
2516
+ .ndo_change_mtu = vxlan_change_mtu ,
2517
+ .ndo_fill_metadata_dst = vxlan_fill_metadata_dst ,
2518
+ };
2519
+
2424
2520
/* Info for udev, that this is a virtual tunnel endpoint */
2425
2521
static struct device_type vxlan_type = {
2426
2522
.name = "vxlan" ,
@@ -2500,6 +2596,17 @@ static void vxlan_ether_setup(struct net_device *dev)
2500
2596
dev -> netdev_ops = & vxlan_netdev_ether_ops ;
2501
2597
}
2502
2598
2599
+ static void vxlan_raw_setup (struct net_device * dev )
2600
+ {
2601
+ dev -> type = ARPHRD_NONE ;
2602
+ dev -> hard_header_len = 0 ;
2603
+ dev -> addr_len = 0 ;
2604
+ dev -> mtu = ETH_DATA_LEN ;
2605
+ dev -> tx_queue_len = 1000 ;
2606
+ dev -> flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST ;
2607
+ dev -> netdev_ops = & vxlan_netdev_raw_ops ;
2608
+ }
2609
+
2503
2610
static const struct nla_policy vxlan_policy [IFLA_VXLAN_MAX + 1 ] = {
2504
2611
[IFLA_VXLAN_ID ] = { .type = NLA_U32 },
2505
2612
[IFLA_VXLAN_GROUP ] = { .len = FIELD_SIZEOF (struct iphdr , daddr ) },
@@ -2526,6 +2633,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
2526
2633
[IFLA_VXLAN_REMCSUM_TX ] = { .type = NLA_U8 },
2527
2634
[IFLA_VXLAN_REMCSUM_RX ] = { .type = NLA_U8 },
2528
2635
[IFLA_VXLAN_GBP ] = { .type = NLA_FLAG , },
2636
+ [IFLA_VXLAN_GPE ] = { .type = NLA_FLAG , },
2529
2637
[IFLA_VXLAN_REMCSUM_NOPARTIAL ] = { .type = NLA_FLAG },
2530
2638
};
2531
2639
@@ -2726,7 +2834,20 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
2726
2834
__be16 default_port = vxlan -> cfg .dst_port ;
2727
2835
struct net_device * lowerdev = NULL ;
2728
2836
2729
- vxlan_ether_setup (dev );
2837
+ if (conf -> flags & VXLAN_F_GPE ) {
2838
+ if (conf -> flags & ~VXLAN_F_ALLOWED_GPE )
2839
+ return - EINVAL ;
2840
+ /* For now, allow GPE only together with COLLECT_METADATA.
2841
+ * This can be relaxed later; in such case, the other side
2842
+ * of the PtP link will have to be provided.
2843
+ */
2844
+ if (!(conf -> flags & VXLAN_F_COLLECT_METADATA ))
2845
+ return - EINVAL ;
2846
+
2847
+ vxlan_raw_setup (dev );
2848
+ } else {
2849
+ vxlan_ether_setup (dev );
2850
+ }
2730
2851
2731
2852
vxlan -> net = src_net ;
2732
2853
@@ -2789,8 +2910,12 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
2789
2910
dev -> needed_headroom = needed_headroom ;
2790
2911
2791
2912
memcpy (& vxlan -> cfg , conf , sizeof (* conf ));
2792
- if (!vxlan -> cfg .dst_port )
2793
- vxlan -> cfg .dst_port = default_port ;
2913
+ if (!vxlan -> cfg .dst_port ) {
2914
+ if (conf -> flags & VXLAN_F_GPE )
2915
+ vxlan -> cfg .dst_port = 4790 ; /* IANA assigned VXLAN-GPE port */
2916
+ else
2917
+ vxlan -> cfg .dst_port = default_port ;
2918
+ }
2794
2919
vxlan -> flags |= conf -> flags ;
2795
2920
2796
2921
if (!vxlan -> cfg .age_interval )
@@ -2961,6 +3086,9 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
2961
3086
if (data [IFLA_VXLAN_GBP ])
2962
3087
conf .flags |= VXLAN_F_GBP ;
2963
3088
3089
+ if (data [IFLA_VXLAN_GPE ])
3090
+ conf .flags |= VXLAN_F_GPE ;
3091
+
2964
3092
if (data [IFLA_VXLAN_REMCSUM_NOPARTIAL ])
2965
3093
conf .flags |= VXLAN_F_REMCSUM_NOPARTIAL ;
2966
3094
@@ -2977,6 +3105,10 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
2977
3105
case - EEXIST :
2978
3106
pr_info ("duplicate VNI %u\n" , be32_to_cpu (conf .vni ));
2979
3107
break ;
3108
+
3109
+ case - EINVAL :
3110
+ pr_info ("unsupported combination of extensions\n" );
3111
+ break ;
2980
3112
}
2981
3113
2982
3114
return err ;
@@ -3104,6 +3236,10 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
3104
3236
nla_put_flag (skb , IFLA_VXLAN_GBP ))
3105
3237
goto nla_put_failure ;
3106
3238
3239
+ if (vxlan -> flags & VXLAN_F_GPE &&
3240
+ nla_put_flag (skb , IFLA_VXLAN_GPE ))
3241
+ goto nla_put_failure ;
3242
+
3107
3243
if (vxlan -> flags & VXLAN_F_REMCSUM_NOPARTIAL &&
3108
3244
nla_put_flag (skb , IFLA_VXLAN_REMCSUM_NOPARTIAL ))
3109
3245
goto nla_put_failure ;
0 commit comments