Skip to content

Commit 2e62fa6

Browse files
committed
Merge branch 'vxlan_group_policy_extension'
Thomas Graf says: ==================== VXLAN Group Policy Extension Implements supports for the Group Policy VXLAN extension [0] to provide a lightweight and simple security label mechanism across network peers based on VXLAN. The security context and associated metadata is mapped to/from skb->mark. This allows further mapping to a SELinux context using SECMARK, to implement ACLs directly with nftables, iptables, OVS, tc, etc. The extension is disabled by default and should be run on a distinct port in mixed Linux VXLAN VTEP environments. Liberal VXLAN VTEPs which ignore unknown reserved bits will be able to receive VXLAN-GBP frames. Simple usage example: 10.1.1.1: # ip link add vxlan0 type vxlan id 10 remote 10.1.1.2 gbp # iptables -I OUTPUT -m owner --uid-owner 101 -j MARK --set-mark 0x200 10.1.1.2: # ip link add vxlan0 type vxlan id 10 remote 10.1.1.1 gbp # iptables -I INPUT -m mark --mark 0x200 -j DROP iproute2 [1] and OVS [2] support will be provided in separate patches. [0] https://tools.ietf.org/html/draft-smith-vxlan-group-policy [1] https://github.com/tgraf/iproute2/tree/vxlan-gbp [2] https://github.com/tgraf/ovs/tree/vxlan-gbp ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 3f3558b + 1dd144c commit 2e62fa6

File tree

11 files changed

+491
-140
lines changed

11 files changed

+491
-140
lines changed

drivers/net/vxlan.c

Lines changed: 89 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -263,15 +263,19 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
263263
return list_first_entry(&fdb->remotes, struct vxlan_rdst, list);
264264
}
265265

266-
/* Find VXLAN socket based on network namespace, address family and UDP port */
267-
static struct vxlan_sock *vxlan_find_sock(struct net *net,
268-
sa_family_t family, __be16 port)
266+
/* Find VXLAN socket based on network namespace, address family and UDP port
267+
* and enabled unshareable flags.
268+
*/
269+
static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
270+
__be16 port, u32 flags)
269271
{
270272
struct vxlan_sock *vs;
273+
u32 match_flags = flags & VXLAN_F_UNSHAREABLE;
271274

272275
hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
273276
if (inet_sk(vs->sock->sk)->inet_sport == port &&
274-
inet_sk(vs->sock->sk)->sk.sk_family == family)
277+
inet_sk(vs->sock->sk)->sk.sk_family == family &&
278+
(vs->flags & VXLAN_F_UNSHAREABLE) == match_flags)
275279
return vs;
276280
}
277281
return NULL;
@@ -291,11 +295,12 @@ static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id)
291295

292296
/* Look up VNI in a per net namespace table */
293297
static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id,
294-
sa_family_t family, __be16 port)
298+
sa_family_t family, __be16 port,
299+
u32 flags)
295300
{
296301
struct vxlan_sock *vs;
297302

298-
vs = vxlan_find_sock(net, family, port);
303+
vs = vxlan_find_sock(net, family, port, flags);
299304
if (!vs)
300305
return NULL;
301306

@@ -620,7 +625,8 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
620625
continue;
621626

622627
vh2 = (struct vxlanhdr *)(p->data + off_vx);
623-
if (vh->vx_vni != vh2->vx_vni) {
628+
if (vh->vx_flags != vh2->vx_flags ||
629+
vh->vx_vni != vh2->vx_vni) {
624630
NAPI_GRO_CB(p)->same_flow = 0;
625631
continue;
626632
}
@@ -1183,6 +1189,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
11831189
struct vxlan_sock *vs;
11841190
struct vxlanhdr *vxh;
11851191
u32 flags, vni;
1192+
struct vxlan_metadata md = {0};
11861193

11871194
/* Need Vxlan and inner Ethernet header to be present */
11881195
if (!pskb_may_pull(skb, VXLAN_HLEN))
@@ -1216,6 +1223,24 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
12161223
vni &= VXLAN_VID_MASK;
12171224
}
12181225

1226+
/* For backwards compatibility, only allow reserved fields to be
1227+
* used by VXLAN extensions if explicitly requested.
1228+
*/
1229+
if ((flags & VXLAN_HF_GBP) && (vs->flags & VXLAN_F_GBP)) {
1230+
struct vxlanhdr_gbp *gbp;
1231+
1232+
gbp = (struct vxlanhdr_gbp *)vxh;
1233+
md.gbp = ntohs(gbp->policy_id);
1234+
1235+
if (gbp->dont_learn)
1236+
md.gbp |= VXLAN_GBP_DONT_LEARN;
1237+
1238+
if (gbp->policy_applied)
1239+
md.gbp |= VXLAN_GBP_POLICY_APPLIED;
1240+
1241+
flags &= ~VXLAN_GBP_USED_BITS;
1242+
}
1243+
12191244
if (flags || (vni & ~VXLAN_VID_MASK)) {
12201245
/* If there are any unprocessed flags remaining treat
12211246
* this as a malformed packet. This behavior diverges from
@@ -1229,7 +1254,8 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
12291254
goto bad_flags;
12301255
}
12311256

1232-
vs->rcv(vs, skb, vxh->vx_vni);
1257+
md.vni = vxh->vx_vni;
1258+
vs->rcv(vs, skb, &md);
12331259
return 0;
12341260

12351261
drop:
@@ -1246,8 +1272,8 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
12461272
return 1;
12471273
}
12481274

1249-
static void vxlan_rcv(struct vxlan_sock *vs,
1250-
struct sk_buff *skb, __be32 vx_vni)
1275+
static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
1276+
struct vxlan_metadata *md)
12511277
{
12521278
struct iphdr *oip = NULL;
12531279
struct ipv6hdr *oip6 = NULL;
@@ -1258,7 +1284,7 @@ static void vxlan_rcv(struct vxlan_sock *vs,
12581284
int err = 0;
12591285
union vxlan_addr *remote_ip;
12601286

1261-
vni = ntohl(vx_vni) >> 8;
1287+
vni = ntohl(md->vni) >> 8;
12621288
/* Is this VNI defined? */
12631289
vxlan = vxlan_vs_find_vni(vs, vni);
12641290
if (!vxlan)
@@ -1292,6 +1318,7 @@ static void vxlan_rcv(struct vxlan_sock *vs,
12921318
goto drop;
12931319

12941320
skb_reset_network_header(skb);
1321+
skb->mark = md->gbp;
12951322

12961323
if (oip6)
12971324
err = IP6_ECN_decapsulate(oip6, skb);
@@ -1641,13 +1668,30 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
16411668
return false;
16421669
}
16431670

1671+
static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, struct vxlan_sock *vs,
1672+
struct vxlan_metadata *md)
1673+
{
1674+
struct vxlanhdr_gbp *gbp;
1675+
1676+
gbp = (struct vxlanhdr_gbp *)vxh;
1677+
vxh->vx_flags |= htonl(VXLAN_HF_GBP);
1678+
1679+
if (md->gbp & VXLAN_GBP_DONT_LEARN)
1680+
gbp->dont_learn = 1;
1681+
1682+
if (md->gbp & VXLAN_GBP_POLICY_APPLIED)
1683+
gbp->policy_applied = 1;
1684+
1685+
gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK);
1686+
}
1687+
16441688
#if IS_ENABLED(CONFIG_IPV6)
16451689
static int vxlan6_xmit_skb(struct vxlan_sock *vs,
16461690
struct dst_entry *dst, struct sk_buff *skb,
16471691
struct net_device *dev, struct in6_addr *saddr,
16481692
struct in6_addr *daddr, __u8 prio, __u8 ttl,
1649-
__be16 src_port, __be16 dst_port, __be32 vni,
1650-
bool xnet)
1693+
__be16 src_port, __be16 dst_port,
1694+
struct vxlan_metadata *md, bool xnet)
16511695
{
16521696
struct vxlanhdr *vxh;
16531697
int min_headroom;
@@ -1696,7 +1740,7 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
16961740

16971741
vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
16981742
vxh->vx_flags = htonl(VXLAN_HF_VNI);
1699-
vxh->vx_vni = vni;
1743+
vxh->vx_vni = md->vni;
17001744

17011745
if (type & SKB_GSO_TUNNEL_REMCSUM) {
17021746
u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
@@ -1714,6 +1758,9 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
17141758
}
17151759
}
17161760

1761+
if (vs->flags & VXLAN_F_GBP)
1762+
vxlan_build_gbp_hdr(vxh, vs, md);
1763+
17171764
skb_set_inner_protocol(skb, htons(ETH_P_TEB));
17181765

17191766
udp_tunnel6_xmit_skb(vs->sock, dst, skb, dev, saddr, daddr, prio,
@@ -1728,7 +1775,8 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
17281775
int vxlan_xmit_skb(struct vxlan_sock *vs,
17291776
struct rtable *rt, struct sk_buff *skb,
17301777
__be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
1731-
__be16 src_port, __be16 dst_port, __be32 vni, bool xnet)
1778+
__be16 src_port, __be16 dst_port,
1779+
struct vxlan_metadata *md, bool xnet)
17321780
{
17331781
struct vxlanhdr *vxh;
17341782
int min_headroom;
@@ -1771,7 +1819,7 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
17711819

17721820
vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
17731821
vxh->vx_flags = htonl(VXLAN_HF_VNI);
1774-
vxh->vx_vni = vni;
1822+
vxh->vx_vni = md->vni;
17751823

17761824
if (type & SKB_GSO_TUNNEL_REMCSUM) {
17771825
u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
@@ -1789,6 +1837,9 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
17891837
}
17901838
}
17911839

1840+
if (vs->flags & VXLAN_F_GBP)
1841+
vxlan_build_gbp_hdr(vxh, vs, md);
1842+
17921843
skb_set_inner_protocol(skb, htons(ETH_P_TEB));
17931844

17941845
return udp_tunnel_xmit_skb(vs->sock, rt, skb, src, dst, tos,
@@ -1849,6 +1900,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
18491900
const struct iphdr *old_iph;
18501901
struct flowi4 fl4;
18511902
union vxlan_addr *dst;
1903+
struct vxlan_metadata md;
18521904
__be16 src_port = 0, dst_port;
18531905
u32 vni;
18541906
__be16 df = 0;
@@ -1910,7 +1962,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
19101962

19111963
ip_rt_put(rt);
19121964
dst_vxlan = vxlan_find_vni(vxlan->net, vni,
1913-
dst->sa.sa_family, dst_port);
1965+
dst->sa.sa_family, dst_port,
1966+
vxlan->flags);
19141967
if (!dst_vxlan)
19151968
goto tx_error;
19161969
vxlan_encap_bypass(skb, vxlan, dst_vxlan);
@@ -1919,11 +1972,12 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
19191972

19201973
tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
19211974
ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
1975+
md.vni = htonl(vni << 8);
1976+
md.gbp = skb->mark;
19221977

19231978
err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb,
19241979
fl4.saddr, dst->sin.sin_addr.s_addr,
1925-
tos, ttl, df, src_port, dst_port,
1926-
htonl(vni << 8),
1980+
tos, ttl, df, src_port, dst_port, &md,
19271981
!net_eq(vxlan->net, dev_net(vxlan->dev)));
19281982
if (err < 0) {
19291983
/* skb is already freed. */
@@ -1968,18 +2022,21 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
19682022

19692023
dst_release(ndst);
19702024
dst_vxlan = vxlan_find_vni(vxlan->net, vni,
1971-
dst->sa.sa_family, dst_port);
2025+
dst->sa.sa_family, dst_port,
2026+
vxlan->flags);
19722027
if (!dst_vxlan)
19732028
goto tx_error;
19742029
vxlan_encap_bypass(skb, vxlan, dst_vxlan);
19752030
return;
19762031
}
19772032

19782033
ttl = ttl ? : ip6_dst_hoplimit(ndst);
2034+
md.vni = htonl(vni << 8);
2035+
md.gbp = skb->mark;
19792036

19802037
err = vxlan6_xmit_skb(vxlan->vn_sock, ndst, skb,
19812038
dev, &fl6.saddr, &fl6.daddr, 0, ttl,
1982-
src_port, dst_port, htonl(vni << 8),
2039+
src_port, dst_port, &md,
19832040
!net_eq(vxlan->net, dev_net(vxlan->dev)));
19842041
#endif
19852042
}
@@ -2136,7 +2193,7 @@ static int vxlan_init(struct net_device *dev)
21362193

21372194
spin_lock(&vn->sock_lock);
21382195
vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
2139-
vxlan->dst_port);
2196+
vxlan->dst_port, vxlan->flags);
21402197
if (vs && atomic_add_unless(&vs->refcnt, 1, 0)) {
21412198
/* If we have a socket with same port already, reuse it */
21422199
vxlan_vs_add_dev(vs, vxlan);
@@ -2382,6 +2439,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
23822439
[IFLA_VXLAN_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 },
23832440
[IFLA_VXLAN_REMCSUM_TX] = { .type = NLA_U8 },
23842441
[IFLA_VXLAN_REMCSUM_RX] = { .type = NLA_U8 },
2442+
[IFLA_VXLAN_GBP] = { .type = NLA_FLAG, },
23852443
};
23862444

23872445
static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -2542,7 +2600,7 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
25422600
return vs;
25432601

25442602
spin_lock(&vn->sock_lock);
2545-
vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port);
2603+
vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port, flags);
25462604
if (vs && ((vs->rcv != rcv) ||
25472605
!atomic_add_unless(&vs->refcnt, 1, 0)))
25482606
vs = ERR_PTR(-EBUSY);
@@ -2706,8 +2764,11 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
27062764
nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX]))
27072765
vxlan->flags |= VXLAN_F_REMCSUM_RX;
27082766

2767+
if (data[IFLA_VXLAN_GBP])
2768+
vxlan->flags |= VXLAN_F_GBP;
2769+
27092770
if (vxlan_find_vni(net, vni, use_ipv6 ? AF_INET6 : AF_INET,
2710-
vxlan->dst_port)) {
2771+
vxlan->dst_port, vxlan->flags)) {
27112772
pr_info("duplicate VNI %u\n", vni);
27122773
return -EEXIST;
27132774
}
@@ -2851,6 +2912,10 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
28512912
if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
28522913
goto nla_put_failure;
28532914

2915+
if (vxlan->flags & VXLAN_F_GBP &&
2916+
nla_put_flag(skb, IFLA_VXLAN_GBP))
2917+
goto nla_put_failure;
2918+
28542919
return 0;
28552920

28562921
nla_put_failure:

include/net/ip_tunnels.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,10 @@ struct ip_tunnel {
9797
#define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100)
9898
#define TUNNEL_OAM __cpu_to_be16(0x0200)
9999
#define TUNNEL_CRIT_OPT __cpu_to_be16(0x0400)
100-
#define TUNNEL_OPTIONS_PRESENT __cpu_to_be16(0x0800)
100+
#define TUNNEL_GENEVE_OPT __cpu_to_be16(0x0800)
101+
#define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000)
102+
103+
#define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT)
101104

102105
struct tnl_ptk_info {
103106
__be16 flags;

0 commit comments

Comments
 (0)