@@ -263,15 +263,19 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
263
263
return list_first_entry (& fdb -> remotes , struct vxlan_rdst , list );
264
264
}
265
265
266
- /* Find VXLAN socket based on network namespace, address family and UDP port */
267
- static struct vxlan_sock * vxlan_find_sock (struct net * net ,
268
- sa_family_t family , __be16 port )
266
+ /* Find VXLAN socket based on network namespace, address family and UDP port
267
+ * and enabled unshareable flags.
268
+ */
269
+ static struct vxlan_sock * vxlan_find_sock (struct net * net , sa_family_t family ,
270
+ __be16 port , u32 flags )
269
271
{
270
272
struct vxlan_sock * vs ;
273
+ u32 match_flags = flags & VXLAN_F_UNSHAREABLE ;
271
274
272
275
hlist_for_each_entry_rcu (vs , vs_head (net , port ), hlist ) {
273
276
if (inet_sk (vs -> sock -> sk )-> inet_sport == port &&
274
- inet_sk (vs -> sock -> sk )-> sk .sk_family == family )
277
+ inet_sk (vs -> sock -> sk )-> sk .sk_family == family &&
278
+ (vs -> flags & VXLAN_F_UNSHAREABLE ) == match_flags )
275
279
return vs ;
276
280
}
277
281
return NULL ;
@@ -291,11 +295,12 @@ static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id)
291
295
292
296
/* Look up VNI in a per net namespace table */
293
297
static struct vxlan_dev * vxlan_find_vni (struct net * net , u32 id ,
294
- sa_family_t family , __be16 port )
298
+ sa_family_t family , __be16 port ,
299
+ u32 flags )
295
300
{
296
301
struct vxlan_sock * vs ;
297
302
298
- vs = vxlan_find_sock (net , family , port );
303
+ vs = vxlan_find_sock (net , family , port , flags );
299
304
if (!vs )
300
305
return NULL ;
301
306
@@ -620,7 +625,8 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
620
625
continue ;
621
626
622
627
vh2 = (struct vxlanhdr * )(p -> data + off_vx );
623
- if (vh -> vx_vni != vh2 -> vx_vni ) {
628
+ if (vh -> vx_flags != vh2 -> vx_flags ||
629
+ vh -> vx_vni != vh2 -> vx_vni ) {
624
630
NAPI_GRO_CB (p )-> same_flow = 0 ;
625
631
continue ;
626
632
}
@@ -1183,6 +1189,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
1183
1189
struct vxlan_sock * vs ;
1184
1190
struct vxlanhdr * vxh ;
1185
1191
u32 flags , vni ;
1192
+ struct vxlan_metadata md = {0 };
1186
1193
1187
1194
/* Need Vxlan and inner Ethernet header to be present */
1188
1195
if (!pskb_may_pull (skb , VXLAN_HLEN ))
@@ -1216,6 +1223,24 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
1216
1223
vni &= VXLAN_VID_MASK ;
1217
1224
}
1218
1225
1226
+ /* For backwards compatibility, only allow reserved fields to be
1227
+ * used by VXLAN extensions if explicitly requested.
1228
+ */
1229
+ if ((flags & VXLAN_HF_GBP ) && (vs -> flags & VXLAN_F_GBP )) {
1230
+ struct vxlanhdr_gbp * gbp ;
1231
+
1232
+ gbp = (struct vxlanhdr_gbp * )vxh ;
1233
+ md .gbp = ntohs (gbp -> policy_id );
1234
+
1235
+ if (gbp -> dont_learn )
1236
+ md .gbp |= VXLAN_GBP_DONT_LEARN ;
1237
+
1238
+ if (gbp -> policy_applied )
1239
+ md .gbp |= VXLAN_GBP_POLICY_APPLIED ;
1240
+
1241
+ flags &= ~VXLAN_GBP_USED_BITS ;
1242
+ }
1243
+
1219
1244
if (flags || (vni & ~VXLAN_VID_MASK )) {
1220
1245
/* If there are any unprocessed flags remaining treat
1221
1246
* this as a malformed packet. This behavior diverges from
@@ -1229,7 +1254,8 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
1229
1254
goto bad_flags ;
1230
1255
}
1231
1256
1232
- vs -> rcv (vs , skb , vxh -> vx_vni );
1257
+ md .vni = vxh -> vx_vni ;
1258
+ vs -> rcv (vs , skb , & md );
1233
1259
return 0 ;
1234
1260
1235
1261
drop :
@@ -1246,8 +1272,8 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
1246
1272
return 1 ;
1247
1273
}
1248
1274
1249
- static void vxlan_rcv (struct vxlan_sock * vs ,
1250
- struct sk_buff * skb , __be32 vx_vni )
1275
+ static void vxlan_rcv (struct vxlan_sock * vs , struct sk_buff * skb ,
1276
+ struct vxlan_metadata * md )
1251
1277
{
1252
1278
struct iphdr * oip = NULL ;
1253
1279
struct ipv6hdr * oip6 = NULL ;
@@ -1258,7 +1284,7 @@ static void vxlan_rcv(struct vxlan_sock *vs,
1258
1284
int err = 0 ;
1259
1285
union vxlan_addr * remote_ip ;
1260
1286
1261
- vni = ntohl (vx_vni ) >> 8 ;
1287
+ vni = ntohl (md -> vni ) >> 8 ;
1262
1288
/* Is this VNI defined? */
1263
1289
vxlan = vxlan_vs_find_vni (vs , vni );
1264
1290
if (!vxlan )
@@ -1292,6 +1318,7 @@ static void vxlan_rcv(struct vxlan_sock *vs,
1292
1318
goto drop ;
1293
1319
1294
1320
skb_reset_network_header (skb );
1321
+ skb -> mark = md -> gbp ;
1295
1322
1296
1323
if (oip6 )
1297
1324
err = IP6_ECN_decapsulate (oip6 , skb );
@@ -1641,13 +1668,30 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
1641
1668
return false;
1642
1669
}
1643
1670
1671
+ static void vxlan_build_gbp_hdr (struct vxlanhdr * vxh , struct vxlan_sock * vs ,
1672
+ struct vxlan_metadata * md )
1673
+ {
1674
+ struct vxlanhdr_gbp * gbp ;
1675
+
1676
+ gbp = (struct vxlanhdr_gbp * )vxh ;
1677
+ vxh -> vx_flags |= htonl (VXLAN_HF_GBP );
1678
+
1679
+ if (md -> gbp & VXLAN_GBP_DONT_LEARN )
1680
+ gbp -> dont_learn = 1 ;
1681
+
1682
+ if (md -> gbp & VXLAN_GBP_POLICY_APPLIED )
1683
+ gbp -> policy_applied = 1 ;
1684
+
1685
+ gbp -> policy_id = htons (md -> gbp & VXLAN_GBP_ID_MASK );
1686
+ }
1687
+
1644
1688
#if IS_ENABLED (CONFIG_IPV6 )
1645
1689
static int vxlan6_xmit_skb (struct vxlan_sock * vs ,
1646
1690
struct dst_entry * dst , struct sk_buff * skb ,
1647
1691
struct net_device * dev , struct in6_addr * saddr ,
1648
1692
struct in6_addr * daddr , __u8 prio , __u8 ttl ,
1649
- __be16 src_port , __be16 dst_port , __be32 vni ,
1650
- bool xnet )
1693
+ __be16 src_port , __be16 dst_port ,
1694
+ struct vxlan_metadata * md , bool xnet )
1651
1695
{
1652
1696
struct vxlanhdr * vxh ;
1653
1697
int min_headroom ;
@@ -1696,7 +1740,7 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
1696
1740
1697
1741
vxh = (struct vxlanhdr * ) __skb_push (skb , sizeof (* vxh ));
1698
1742
vxh -> vx_flags = htonl (VXLAN_HF_VNI );
1699
- vxh -> vx_vni = vni ;
1743
+ vxh -> vx_vni = md -> vni ;
1700
1744
1701
1745
if (type & SKB_GSO_TUNNEL_REMCSUM ) {
1702
1746
u32 data = (skb_checksum_start_offset (skb ) - hdrlen ) >>
@@ -1714,6 +1758,9 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
1714
1758
}
1715
1759
}
1716
1760
1761
+ if (vs -> flags & VXLAN_F_GBP )
1762
+ vxlan_build_gbp_hdr (vxh , vs , md );
1763
+
1717
1764
skb_set_inner_protocol (skb , htons (ETH_P_TEB ));
1718
1765
1719
1766
udp_tunnel6_xmit_skb (vs -> sock , dst , skb , dev , saddr , daddr , prio ,
@@ -1728,7 +1775,8 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
1728
1775
int vxlan_xmit_skb (struct vxlan_sock * vs ,
1729
1776
struct rtable * rt , struct sk_buff * skb ,
1730
1777
__be32 src , __be32 dst , __u8 tos , __u8 ttl , __be16 df ,
1731
- __be16 src_port , __be16 dst_port , __be32 vni , bool xnet )
1778
+ __be16 src_port , __be16 dst_port ,
1779
+ struct vxlan_metadata * md , bool xnet )
1732
1780
{
1733
1781
struct vxlanhdr * vxh ;
1734
1782
int min_headroom ;
@@ -1771,7 +1819,7 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
1771
1819
1772
1820
vxh = (struct vxlanhdr * ) __skb_push (skb , sizeof (* vxh ));
1773
1821
vxh -> vx_flags = htonl (VXLAN_HF_VNI );
1774
- vxh -> vx_vni = vni ;
1822
+ vxh -> vx_vni = md -> vni ;
1775
1823
1776
1824
if (type & SKB_GSO_TUNNEL_REMCSUM ) {
1777
1825
u32 data = (skb_checksum_start_offset (skb ) - hdrlen ) >>
@@ -1789,6 +1837,9 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
1789
1837
}
1790
1838
}
1791
1839
1840
+ if (vs -> flags & VXLAN_F_GBP )
1841
+ vxlan_build_gbp_hdr (vxh , vs , md );
1842
+
1792
1843
skb_set_inner_protocol (skb , htons (ETH_P_TEB ));
1793
1844
1794
1845
return udp_tunnel_xmit_skb (vs -> sock , rt , skb , src , dst , tos ,
@@ -1849,6 +1900,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
1849
1900
const struct iphdr * old_iph ;
1850
1901
struct flowi4 fl4 ;
1851
1902
union vxlan_addr * dst ;
1903
+ struct vxlan_metadata md ;
1852
1904
__be16 src_port = 0 , dst_port ;
1853
1905
u32 vni ;
1854
1906
__be16 df = 0 ;
@@ -1910,7 +1962,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
1910
1962
1911
1963
ip_rt_put (rt );
1912
1964
dst_vxlan = vxlan_find_vni (vxlan -> net , vni ,
1913
- dst -> sa .sa_family , dst_port );
1965
+ dst -> sa .sa_family , dst_port ,
1966
+ vxlan -> flags );
1914
1967
if (!dst_vxlan )
1915
1968
goto tx_error ;
1916
1969
vxlan_encap_bypass (skb , vxlan , dst_vxlan );
@@ -1919,11 +1972,12 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
1919
1972
1920
1973
tos = ip_tunnel_ecn_encap (tos , old_iph , skb );
1921
1974
ttl = ttl ? : ip4_dst_hoplimit (& rt -> dst );
1975
+ md .vni = htonl (vni << 8 );
1976
+ md .gbp = skb -> mark ;
1922
1977
1923
1978
err = vxlan_xmit_skb (vxlan -> vn_sock , rt , skb ,
1924
1979
fl4 .saddr , dst -> sin .sin_addr .s_addr ,
1925
- tos , ttl , df , src_port , dst_port ,
1926
- htonl (vni << 8 ),
1980
+ tos , ttl , df , src_port , dst_port , & md ,
1927
1981
!net_eq (vxlan -> net , dev_net (vxlan -> dev )));
1928
1982
if (err < 0 ) {
1929
1983
/* skb is already freed. */
@@ -1968,18 +2022,21 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
1968
2022
1969
2023
dst_release (ndst );
1970
2024
dst_vxlan = vxlan_find_vni (vxlan -> net , vni ,
1971
- dst -> sa .sa_family , dst_port );
2025
+ dst -> sa .sa_family , dst_port ,
2026
+ vxlan -> flags );
1972
2027
if (!dst_vxlan )
1973
2028
goto tx_error ;
1974
2029
vxlan_encap_bypass (skb , vxlan , dst_vxlan );
1975
2030
return ;
1976
2031
}
1977
2032
1978
2033
ttl = ttl ? : ip6_dst_hoplimit (ndst );
2034
+ md .vni = htonl (vni << 8 );
2035
+ md .gbp = skb -> mark ;
1979
2036
1980
2037
err = vxlan6_xmit_skb (vxlan -> vn_sock , ndst , skb ,
1981
2038
dev , & fl6 .saddr , & fl6 .daddr , 0 , ttl ,
1982
- src_port , dst_port , htonl ( vni << 8 ) ,
2039
+ src_port , dst_port , & md ,
1983
2040
!net_eq (vxlan -> net , dev_net (vxlan -> dev )));
1984
2041
#endif
1985
2042
}
@@ -2136,7 +2193,7 @@ static int vxlan_init(struct net_device *dev)
2136
2193
2137
2194
spin_lock (& vn -> sock_lock );
2138
2195
vs = vxlan_find_sock (vxlan -> net , ipv6 ? AF_INET6 : AF_INET ,
2139
- vxlan -> dst_port );
2196
+ vxlan -> dst_port , vxlan -> flags );
2140
2197
if (vs && atomic_add_unless (& vs -> refcnt , 1 , 0 )) {
2141
2198
/* If we have a socket with same port already, reuse it */
2142
2199
vxlan_vs_add_dev (vs , vxlan );
@@ -2382,6 +2439,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
2382
2439
[IFLA_VXLAN_UDP_ZERO_CSUM6_RX ] = { .type = NLA_U8 },
2383
2440
[IFLA_VXLAN_REMCSUM_TX ] = { .type = NLA_U8 },
2384
2441
[IFLA_VXLAN_REMCSUM_RX ] = { .type = NLA_U8 },
2442
+ [IFLA_VXLAN_GBP ] = { .type = NLA_FLAG , },
2385
2443
};
2386
2444
2387
2445
static int vxlan_validate (struct nlattr * tb [], struct nlattr * data [])
@@ -2542,7 +2600,7 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
2542
2600
return vs ;
2543
2601
2544
2602
spin_lock (& vn -> sock_lock );
2545
- vs = vxlan_find_sock (net , ipv6 ? AF_INET6 : AF_INET , port );
2603
+ vs = vxlan_find_sock (net , ipv6 ? AF_INET6 : AF_INET , port , flags );
2546
2604
if (vs && ((vs -> rcv != rcv ) ||
2547
2605
!atomic_add_unless (& vs -> refcnt , 1 , 0 )))
2548
2606
vs = ERR_PTR (- EBUSY );
@@ -2706,8 +2764,11 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
2706
2764
nla_get_u8 (data [IFLA_VXLAN_REMCSUM_RX ]))
2707
2765
vxlan -> flags |= VXLAN_F_REMCSUM_RX ;
2708
2766
2767
+ if (data [IFLA_VXLAN_GBP ])
2768
+ vxlan -> flags |= VXLAN_F_GBP ;
2769
+
2709
2770
if (vxlan_find_vni (net , vni , use_ipv6 ? AF_INET6 : AF_INET ,
2710
- vxlan -> dst_port )) {
2771
+ vxlan -> dst_port , vxlan -> flags )) {
2711
2772
pr_info ("duplicate VNI %u\n" , vni );
2712
2773
return - EEXIST ;
2713
2774
}
@@ -2851,6 +2912,10 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
2851
2912
if (nla_put (skb , IFLA_VXLAN_PORT_RANGE , sizeof (ports ), & ports ))
2852
2913
goto nla_put_failure ;
2853
2914
2915
+ if (vxlan -> flags & VXLAN_F_GBP &&
2916
+ nla_put_flag (skb , IFLA_VXLAN_GBP ))
2917
+ goto nla_put_failure ;
2918
+
2854
2919
return 0 ;
2855
2920
2856
2921
nla_put_failure :
0 commit comments