Skip to content

Commit e1e5314

Browse files
Jiri Bencdavem330
authored andcommitted
vxlan: implement GPE
Implement VXLAN-GPE. Only COLLECT_METADATA is supported for now (it is possible to support static configuration, too, if there is demand for it). The GPE header parsing has to be moved before iptunnel_pull_header, as we need to know the protocol. v2: Removed what was called "L2 mode" in v1 of the patchset. Only "L3 mode" (now called "raw mode") is added by this patch. This mode does not allow Ethernet header to be encapsulated in VXLAN-GPE when using ip route to specify the encapsulation, IP header is encapsulated instead. The patch does support Ethernet to be encapsulated, though, using ETH_P_TEB in skb->protocol. This will be utilized by other COLLECT_METADATA users (openvswitch in particular). If there is ever demand for Ethernet encapsulation with VXLAN-GPE using ip route, it's easy to add a new flag switching the interface to "Ethernet mode" (called "L2 mode" in v1 of this patchset). For now, leave this out, it seems we don't need it. Disallowed more flag combinations, especially RCO with GPE. Added comment explaining that GBP and GPE cannot be set together. Signed-off-by: Jiri Benc <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent a6d5bbf commit e1e5314

File tree

3 files changed

+222
-17
lines changed

3 files changed

+222
-17
lines changed

drivers/net/vxlan.c

Lines changed: 153 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1192,6 +1192,45 @@ static void vxlan_parse_gbp_hdr(struct vxlanhdr *unparsed,
11921192
unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS;
11931193
}
11941194

1195+
static bool vxlan_parse_gpe_hdr(struct vxlanhdr *unparsed,
1196+
__be32 *protocol,
1197+
struct sk_buff *skb, u32 vxflags)
1198+
{
1199+
struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)unparsed;
1200+
1201+
/* Need to have Next Protocol set for interfaces in GPE mode. */
1202+
if (!gpe->np_applied)
1203+
return false;
1204+
/* "The initial version is 0. If a receiver does not support the
1205+
* version indicated it MUST drop the packet.
1206+
*/
1207+
if (gpe->version != 0)
1208+
return false;
1209+
/* "When the O bit is set to 1, the packet is an OAM packet and OAM
1210+
* processing MUST occur." However, we don't implement OAM
1211+
* processing, thus drop the packet.
1212+
*/
1213+
if (gpe->oam_flag)
1214+
return false;
1215+
1216+
switch (gpe->next_protocol) {
1217+
case VXLAN_GPE_NP_IPV4:
1218+
*protocol = htons(ETH_P_IP);
1219+
break;
1220+
case VXLAN_GPE_NP_IPV6:
1221+
*protocol = htons(ETH_P_IPV6);
1222+
break;
1223+
case VXLAN_GPE_NP_ETHERNET:
1224+
*protocol = htons(ETH_P_TEB);
1225+
break;
1226+
default:
1227+
return false;
1228+
}
1229+
1230+
unparsed->vx_flags &= ~VXLAN_GPE_USED_BITS;
1231+
return true;
1232+
}
1233+
11951234
static bool vxlan_set_mac(struct vxlan_dev *vxlan,
11961235
struct vxlan_sock *vs,
11971236
struct sk_buff *skb)
@@ -1257,9 +1296,11 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
12571296
struct vxlanhdr unparsed;
12581297
struct vxlan_metadata _md;
12591298
struct vxlan_metadata *md = &_md;
1299+
__be32 protocol = htons(ETH_P_TEB);
1300+
bool raw_proto = false;
12601301
void *oiph;
12611302

1262-
/* Need Vxlan and inner Ethernet header to be present */
1303+
/* Need UDP and VXLAN header to be present */
12631304
if (!pskb_may_pull(skb, VXLAN_HLEN))
12641305
return 1;
12651306

@@ -1283,9 +1324,18 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
12831324
if (!vxlan)
12841325
goto drop;
12851326

1286-
if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB),
1287-
!net_eq(vxlan->net, dev_net(vxlan->dev))))
1288-
goto drop;
1327+
/* For backwards compatibility, only allow reserved fields to be
1328+
* used by VXLAN extensions if explicitly requested.
1329+
*/
1330+
if (vs->flags & VXLAN_F_GPE) {
1331+
if (!vxlan_parse_gpe_hdr(&unparsed, &protocol, skb, vs->flags))
1332+
goto drop;
1333+
raw_proto = true;
1334+
}
1335+
1336+
if (__iptunnel_pull_header(skb, VXLAN_HLEN, protocol, raw_proto,
1337+
!net_eq(vxlan->net, dev_net(vxlan->dev))))
1338+
goto drop;
12891339

12901340
if (vxlan_collect_metadata(vs)) {
12911341
__be32 vni = vxlan_vni(vxlan_hdr(skb)->vx_vni);
@@ -1304,14 +1354,14 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
13041354
memset(md, 0, sizeof(*md));
13051355
}
13061356

1307-
/* For backwards compatibility, only allow reserved fields to be
1308-
* used by VXLAN extensions if explicitly requested.
1309-
*/
13101357
if (vs->flags & VXLAN_F_REMCSUM_RX)
13111358
if (!vxlan_remcsum(&unparsed, skb, vs->flags))
13121359
goto drop;
13131360
if (vs->flags & VXLAN_F_GBP)
13141361
vxlan_parse_gbp_hdr(&unparsed, skb, vs->flags, md);
1362+
/* Note that GBP and GPE can never be active together. This is
1363+
* ensured in vxlan_dev_configure.
1364+
*/
13151365

13161366
if (unparsed.vx_flags || unparsed.vx_vni) {
13171367
/* If there are any unprocessed flags remaining treat
@@ -1325,8 +1375,13 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
13251375
goto drop;
13261376
}
13271377

1328-
if (!vxlan_set_mac(vxlan, vs, skb))
1329-
goto drop;
1378+
if (!raw_proto) {
1379+
if (!vxlan_set_mac(vxlan, vs, skb))
1380+
goto drop;
1381+
} else {
1382+
skb->dev = vxlan->dev;
1383+
skb->pkt_type = PACKET_HOST;
1384+
}
13301385

13311386
oiph = skb_network_header(skb);
13321387
skb_reset_network_header(skb);
@@ -1685,6 +1740,27 @@ static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags,
16851740
gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK);
16861741
}
16871742

1743+
static int vxlan_build_gpe_hdr(struct vxlanhdr *vxh, u32 vxflags,
1744+
__be16 protocol)
1745+
{
1746+
struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)vxh;
1747+
1748+
gpe->np_applied = 1;
1749+
1750+
switch (protocol) {
1751+
case htons(ETH_P_IP):
1752+
gpe->next_protocol = VXLAN_GPE_NP_IPV4;
1753+
return 0;
1754+
case htons(ETH_P_IPV6):
1755+
gpe->next_protocol = VXLAN_GPE_NP_IPV6;
1756+
return 0;
1757+
case htons(ETH_P_TEB):
1758+
gpe->next_protocol = VXLAN_GPE_NP_ETHERNET;
1759+
return 0;
1760+
}
1761+
return -EPFNOSUPPORT;
1762+
}
1763+
16881764
static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
16891765
int iphdr_len, __be32 vni,
16901766
struct vxlan_metadata *md, u32 vxflags,
@@ -1694,6 +1770,7 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
16941770
int min_headroom;
16951771
int err;
16961772
int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
1773+
__be16 inner_protocol = htons(ETH_P_TEB);
16971774

16981775
if ((vxflags & VXLAN_F_REMCSUM_TX) &&
16991776
skb->ip_summed == CHECKSUM_PARTIAL) {
@@ -1712,10 +1789,8 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
17121789

17131790
/* Need space for new headers (invalidates iph ptr) */
17141791
err = skb_cow_head(skb, min_headroom);
1715-
if (unlikely(err)) {
1716-
kfree_skb(skb);
1717-
return err;
1718-
}
1792+
if (unlikely(err))
1793+
goto out_free;
17191794

17201795
skb = vlan_hwaccel_push_inside(skb);
17211796
if (WARN_ON(!skb))
@@ -1744,9 +1819,19 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
17441819

17451820
if (vxflags & VXLAN_F_GBP)
17461821
vxlan_build_gbp_hdr(vxh, vxflags, md);
1822+
if (vxflags & VXLAN_F_GPE) {
1823+
err = vxlan_build_gpe_hdr(vxh, vxflags, skb->protocol);
1824+
if (err < 0)
1825+
goto out_free;
1826+
inner_protocol = skb->protocol;
1827+
}
17471828

1748-
skb_set_inner_protocol(skb, htons(ETH_P_TEB));
1829+
skb_set_inner_protocol(skb, inner_protocol);
17491830
return 0;
1831+
1832+
out_free:
1833+
kfree_skb(skb);
1834+
return err;
17501835
}
17511836

17521837
static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
@@ -2421,6 +2506,17 @@ static const struct net_device_ops vxlan_netdev_ether_ops = {
24212506
.ndo_fill_metadata_dst = vxlan_fill_metadata_dst,
24222507
};
24232508

2509+
static const struct net_device_ops vxlan_netdev_raw_ops = {
2510+
.ndo_init = vxlan_init,
2511+
.ndo_uninit = vxlan_uninit,
2512+
.ndo_open = vxlan_open,
2513+
.ndo_stop = vxlan_stop,
2514+
.ndo_start_xmit = vxlan_xmit,
2515+
.ndo_get_stats64 = ip_tunnel_get_stats64,
2516+
.ndo_change_mtu = vxlan_change_mtu,
2517+
.ndo_fill_metadata_dst = vxlan_fill_metadata_dst,
2518+
};
2519+
24242520
/* Info for udev, that this is a virtual tunnel endpoint */
24252521
static struct device_type vxlan_type = {
24262522
.name = "vxlan",
@@ -2500,6 +2596,17 @@ static void vxlan_ether_setup(struct net_device *dev)
25002596
dev->netdev_ops = &vxlan_netdev_ether_ops;
25012597
}
25022598

2599+
static void vxlan_raw_setup(struct net_device *dev)
2600+
{
2601+
dev->type = ARPHRD_NONE;
2602+
dev->hard_header_len = 0;
2603+
dev->addr_len = 0;
2604+
dev->mtu = ETH_DATA_LEN;
2605+
dev->tx_queue_len = 1000;
2606+
dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
2607+
dev->netdev_ops = &vxlan_netdev_raw_ops;
2608+
}
2609+
25032610
static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
25042611
[IFLA_VXLAN_ID] = { .type = NLA_U32 },
25052612
[IFLA_VXLAN_GROUP] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
@@ -2526,6 +2633,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
25262633
[IFLA_VXLAN_REMCSUM_TX] = { .type = NLA_U8 },
25272634
[IFLA_VXLAN_REMCSUM_RX] = { .type = NLA_U8 },
25282635
[IFLA_VXLAN_GBP] = { .type = NLA_FLAG, },
2636+
[IFLA_VXLAN_GPE] = { .type = NLA_FLAG, },
25292637
[IFLA_VXLAN_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG },
25302638
};
25312639

@@ -2726,7 +2834,20 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
27262834
__be16 default_port = vxlan->cfg.dst_port;
27272835
struct net_device *lowerdev = NULL;
27282836

2729-
vxlan_ether_setup(dev);
2837+
if (conf->flags & VXLAN_F_GPE) {
2838+
if (conf->flags & ~VXLAN_F_ALLOWED_GPE)
2839+
return -EINVAL;
2840+
/* For now, allow GPE only together with COLLECT_METADATA.
2841+
* This can be relaxed later; in such case, the other side
2842+
* of the PtP link will have to be provided.
2843+
*/
2844+
if (!(conf->flags & VXLAN_F_COLLECT_METADATA))
2845+
return -EINVAL;
2846+
2847+
vxlan_raw_setup(dev);
2848+
} else {
2849+
vxlan_ether_setup(dev);
2850+
}
27302851

27312852
vxlan->net = src_net;
27322853

@@ -2789,8 +2910,12 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
27892910
dev->needed_headroom = needed_headroom;
27902911

27912912
memcpy(&vxlan->cfg, conf, sizeof(*conf));
2792-
if (!vxlan->cfg.dst_port)
2793-
vxlan->cfg.dst_port = default_port;
2913+
if (!vxlan->cfg.dst_port) {
2914+
if (conf->flags & VXLAN_F_GPE)
2915+
vxlan->cfg.dst_port = 4790; /* IANA assigned VXLAN-GPE port */
2916+
else
2917+
vxlan->cfg.dst_port = default_port;
2918+
}
27942919
vxlan->flags |= conf->flags;
27952920

27962921
if (!vxlan->cfg.age_interval)
@@ -2961,6 +3086,9 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
29613086
if (data[IFLA_VXLAN_GBP])
29623087
conf.flags |= VXLAN_F_GBP;
29633088

3089+
if (data[IFLA_VXLAN_GPE])
3090+
conf.flags |= VXLAN_F_GPE;
3091+
29643092
if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL])
29653093
conf.flags |= VXLAN_F_REMCSUM_NOPARTIAL;
29663094

@@ -2977,6 +3105,10 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
29773105
case -EEXIST:
29783106
pr_info("duplicate VNI %u\n", be32_to_cpu(conf.vni));
29793107
break;
3108+
3109+
case -EINVAL:
3110+
pr_info("unsupported combination of extensions\n");
3111+
break;
29803112
}
29813113

29823114
return err;
@@ -3104,6 +3236,10 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
31043236
nla_put_flag(skb, IFLA_VXLAN_GBP))
31053237
goto nla_put_failure;
31063238

3239+
if (vxlan->flags & VXLAN_F_GPE &&
3240+
nla_put_flag(skb, IFLA_VXLAN_GPE))
3241+
goto nla_put_failure;
3242+
31073243
if (vxlan->flags & VXLAN_F_REMCSUM_NOPARTIAL &&
31083244
nla_put_flag(skb, IFLA_VXLAN_REMCSUM_NOPARTIAL))
31093245
goto nla_put_failure;

include/net/vxlan.h

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,64 @@ struct vxlanhdr_gbp {
119119
#define VXLAN_GBP_POLICY_APPLIED (BIT(3) << 16)
120120
#define VXLAN_GBP_ID_MASK (0xFFFF)
121121

122+
/*
123+
* VXLAN Generic Protocol Extension (VXLAN_F_GPE):
124+
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
125+
* |R|R|Ver|I|P|R|O| Reserved |Next Protocol |
126+
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
127+
* | VXLAN Network Identifier (VNI) | Reserved |
128+
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
129+
*
130+
* Ver = Version. Indicates VXLAN GPE protocol version.
131+
*
132+
* P = Next Protocol Bit. The P bit is set to indicate that the
133+
* Next Protocol field is present.
134+
*
135+
* O = OAM Flag Bit. The O bit is set to indicate that the packet
136+
* is an OAM packet.
137+
*
138+
* Next Protocol = This 8 bit field indicates the protocol header
139+
* immediately following the VXLAN GPE header.
140+
*
141+
* https://tools.ietf.org/html/draft-ietf-nvo3-vxlan-gpe-01
142+
*/
143+
144+
struct vxlanhdr_gpe {
145+
#if defined(__LITTLE_ENDIAN_BITFIELD)
146+
u8 oam_flag:1,
147+
reserved_flags1:1,
148+
np_applied:1,
149+
instance_applied:1,
150+
version:2,
151+
reserved_flags2:2;
152+
#elif defined(__BIG_ENDIAN_BITFIELD)
153+
u8 reserved_flags2:2,
154+
version:2,
155+
instance_applied:1,
156+
np_applied:1,
157+
reserved_flags1:1,
158+
oam_flag:1;
159+
#endif
160+
u8 reserved_flags3;
161+
u8 reserved_flags4;
162+
u8 next_protocol;
163+
__be32 vx_vni;
164+
};
165+
166+
/* VXLAN-GPE header flags. */
167+
#define VXLAN_HF_VER cpu_to_be32(BIT(29) | BIT(28))
168+
#define VXLAN_HF_NP cpu_to_be32(BIT(26))
169+
#define VXLAN_HF_OAM cpu_to_be32(BIT(24))
170+
171+
#define VXLAN_GPE_USED_BITS (VXLAN_HF_VER | VXLAN_HF_NP | VXLAN_HF_OAM | \
172+
cpu_to_be32(0xff))
173+
174+
/* VXLAN-GPE header Next Protocol. */
175+
#define VXLAN_GPE_NP_IPV4 0x01
176+
#define VXLAN_GPE_NP_IPV6 0x02
177+
#define VXLAN_GPE_NP_ETHERNET 0x03
178+
#define VXLAN_GPE_NP_NSH 0x04
179+
122180
struct vxlan_metadata {
123181
u32 gbp;
124182
};
@@ -206,16 +264,26 @@ struct vxlan_dev {
206264
#define VXLAN_F_GBP 0x800
207265
#define VXLAN_F_REMCSUM_NOPARTIAL 0x1000
208266
#define VXLAN_F_COLLECT_METADATA 0x2000
267+
#define VXLAN_F_GPE 0x4000
209268

210269
/* Flags that are used in the receive path. These flags must match in
211270
* order for a socket to be shareable
212271
*/
213272
#define VXLAN_F_RCV_FLAGS (VXLAN_F_GBP | \
273+
VXLAN_F_GPE | \
214274
VXLAN_F_UDP_ZERO_CSUM6_RX | \
215275
VXLAN_F_REMCSUM_RX | \
216276
VXLAN_F_REMCSUM_NOPARTIAL | \
217277
VXLAN_F_COLLECT_METADATA)
218278

279+
/* Flags that can be set together with VXLAN_F_GPE. */
280+
#define VXLAN_F_ALLOWED_GPE (VXLAN_F_GPE | \
281+
VXLAN_F_IPV6 | \
282+
VXLAN_F_UDP_ZERO_CSUM_TX | \
283+
VXLAN_F_UDP_ZERO_CSUM6_TX | \
284+
VXLAN_F_UDP_ZERO_CSUM6_RX | \
285+
VXLAN_F_COLLECT_METADATA)
286+
219287
struct net_device *vxlan_dev_create(struct net *net, const char *name,
220288
u8 name_assign_type, struct vxlan_config *conf);
221289

include/uapi/linux/if_link.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,7 @@ enum {
488488
IFLA_VXLAN_REMCSUM_NOPARTIAL,
489489
IFLA_VXLAN_COLLECT_METADATA,
490490
IFLA_VXLAN_LABEL,
491+
IFLA_VXLAN_GPE,
491492
__IFLA_VXLAN_MAX
492493
};
493494
#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)

0 commit comments

Comments
 (0)