@@ -75,8 +75,7 @@ static struct rtnl_link_ops vxlan_link_ops;
75
75
76
76
static const u8 all_zeros_mac [ETH_ALEN ];
77
77
78
- static struct vxlan_sock * vxlan_sock_add (struct net * net , __be16 port ,
79
- bool no_share , u32 flags );
78
+ static int vxlan_sock_add (struct vxlan_dev * vxlan );
80
79
81
80
/* per-network namespace private data for this module */
82
81
struct vxlan_net {
@@ -994,19 +993,30 @@ static bool vxlan_snoop(struct net_device *dev,
994
993
static bool vxlan_group_used (struct vxlan_net * vn , struct vxlan_dev * dev )
995
994
{
996
995
struct vxlan_dev * vxlan ;
996
+ unsigned short family = dev -> default_dst .remote_ip .sa .sa_family ;
997
997
998
998
/* The vxlan_sock is only used by dev, leaving group has
999
999
* no effect on other vxlan devices.
1000
1000
*/
1001
- if (atomic_read (& dev -> vn_sock -> refcnt ) == 1 )
1001
+ if (family == AF_INET && dev -> vn4_sock &&
1002
+ atomic_read (& dev -> vn4_sock -> refcnt ) == 1 )
1002
1003
return false;
1004
+ #if IS_ENABLED (CONFIG_IPV6 )
1005
+ if (family == AF_INET6 && dev -> vn6_sock &&
1006
+ atomic_read (& dev -> vn6_sock -> refcnt ) == 1 )
1007
+ return false;
1008
+ #endif
1003
1009
1004
1010
list_for_each_entry (vxlan , & vn -> vxlan_list , next ) {
1005
1011
if (!netif_running (vxlan -> dev ) || vxlan == dev )
1006
1012
continue ;
1007
1013
1008
- if (vxlan -> vn_sock != dev -> vn_sock )
1014
+ if (family == AF_INET && vxlan -> vn4_sock != dev -> vn4_sock )
1009
1015
continue ;
1016
+ #if IS_ENABLED (CONFIG_IPV6 )
1017
+ if (family == AF_INET6 && vxlan -> vn6_sock != dev -> vn6_sock )
1018
+ continue ;
1019
+ #endif
1010
1020
1011
1021
if (!vxlan_addr_equal (& vxlan -> default_dst .remote_ip ,
1012
1022
& dev -> default_dst .remote_ip ))
@@ -1022,15 +1032,16 @@ static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev)
1022
1032
return false;
1023
1033
}
1024
1034
1025
- static void vxlan_sock_release (struct vxlan_sock * vs )
1035
+ static void __vxlan_sock_release (struct vxlan_sock * vs )
1026
1036
{
1027
- struct sock * sk = vs -> sock -> sk ;
1028
- struct net * net = sock_net (sk );
1029
- struct vxlan_net * vn = net_generic (net , vxlan_net_id );
1037
+ struct vxlan_net * vn ;
1030
1038
1039
+ if (!vs )
1040
+ return ;
1031
1041
if (!atomic_dec_and_test (& vs -> refcnt ))
1032
1042
return ;
1033
1043
1044
+ vn = net_generic (sock_net (vs -> sock -> sk ), vxlan_net_id );
1034
1045
spin_lock (& vn -> sock_lock );
1035
1046
hlist_del_rcu (& vs -> hlist );
1036
1047
vxlan_notify_del_rx_port (vs );
@@ -1039,60 +1050,74 @@ static void vxlan_sock_release(struct vxlan_sock *vs)
1039
1050
queue_work (vxlan_wq , & vs -> del_work );
1040
1051
}
1041
1052
1053
+ static void vxlan_sock_release (struct vxlan_dev * vxlan )
1054
+ {
1055
+ __vxlan_sock_release (vxlan -> vn4_sock );
1056
+ #if IS_ENABLED (CONFIG_IPV6 )
1057
+ __vxlan_sock_release (vxlan -> vn6_sock );
1058
+ #endif
1059
+ }
1060
+
1042
1061
/* Update multicast group membership when first VNI on
1043
1062
* multicast address is brought up
1044
1063
*/
1045
1064
static int vxlan_igmp_join (struct vxlan_dev * vxlan )
1046
1065
{
1047
- struct vxlan_sock * vs = vxlan -> vn_sock ;
1048
- struct sock * sk = vs -> sock -> sk ;
1066
+ struct sock * sk ;
1049
1067
union vxlan_addr * ip = & vxlan -> default_dst .remote_ip ;
1050
1068
int ifindex = vxlan -> default_dst .remote_ifindex ;
1051
1069
int ret = - EINVAL ;
1052
1070
1053
- lock_sock (sk );
1054
1071
if (ip -> sa .sa_family == AF_INET ) {
1055
1072
struct ip_mreqn mreq = {
1056
1073
.imr_multiaddr .s_addr = ip -> sin .sin_addr .s_addr ,
1057
1074
.imr_ifindex = ifindex ,
1058
1075
};
1059
1076
1077
+ sk = vxlan -> vn4_sock -> sock -> sk ;
1078
+ lock_sock (sk );
1060
1079
ret = ip_mc_join_group (sk , & mreq );
1080
+ release_sock (sk );
1061
1081
#if IS_ENABLED (CONFIG_IPV6 )
1062
1082
} else {
1083
+ sk = vxlan -> vn6_sock -> sock -> sk ;
1084
+ lock_sock (sk );
1063
1085
ret = ipv6_stub -> ipv6_sock_mc_join (sk , ifindex ,
1064
1086
& ip -> sin6 .sin6_addr );
1087
+ release_sock (sk );
1065
1088
#endif
1066
1089
}
1067
- release_sock (sk );
1068
1090
1069
1091
return ret ;
1070
1092
}
1071
1093
1072
1094
/* Inverse of vxlan_igmp_join when last VNI is brought down */
1073
1095
static int vxlan_igmp_leave (struct vxlan_dev * vxlan )
1074
1096
{
1075
- struct vxlan_sock * vs = vxlan -> vn_sock ;
1076
- struct sock * sk = vs -> sock -> sk ;
1097
+ struct sock * sk ;
1077
1098
union vxlan_addr * ip = & vxlan -> default_dst .remote_ip ;
1078
1099
int ifindex = vxlan -> default_dst .remote_ifindex ;
1079
1100
int ret = - EINVAL ;
1080
1101
1081
- lock_sock (sk );
1082
1102
if (ip -> sa .sa_family == AF_INET ) {
1083
1103
struct ip_mreqn mreq = {
1084
1104
.imr_multiaddr .s_addr = ip -> sin .sin_addr .s_addr ,
1085
1105
.imr_ifindex = ifindex ,
1086
1106
};
1087
1107
1108
+ sk = vxlan -> vn4_sock -> sock -> sk ;
1109
+ lock_sock (sk );
1088
1110
ret = ip_mc_leave_group (sk , & mreq );
1111
+ release_sock (sk );
1089
1112
#if IS_ENABLED (CONFIG_IPV6 )
1090
1113
} else {
1114
+ sk = vxlan -> vn6_sock -> sock -> sk ;
1115
+ lock_sock (sk );
1091
1116
ret = ipv6_stub -> ipv6_sock_mc_drop (sk , ifindex ,
1092
1117
& ip -> sin6 .sin6_addr );
1118
+ release_sock (sk );
1093
1119
#endif
1094
1120
}
1095
- release_sock (sk );
1096
1121
1097
1122
return ret ;
1098
1123
}
@@ -1873,8 +1898,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
1873
1898
{
1874
1899
struct ip_tunnel_info * info ;
1875
1900
struct vxlan_dev * vxlan = netdev_priv (dev );
1876
- struct sock * sk = vxlan -> vn_sock -> sock -> sk ;
1877
- unsigned short family = vxlan_get_sk_family (vxlan -> vn_sock );
1901
+ struct sock * sk ;
1878
1902
struct rtable * rt = NULL ;
1879
1903
const struct iphdr * old_iph ;
1880
1904
struct flowi4 fl4 ;
@@ -1901,13 +1925,10 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
1901
1925
dev -> name );
1902
1926
goto drop ;
1903
1927
}
1904
- if (family != ip_tunnel_info_af (info ))
1905
- goto drop ;
1906
-
1907
1928
dst_port = info -> key .tp_dst ? : vxlan -> cfg .dst_port ;
1908
1929
vni = be64_to_cpu (info -> key .tun_id );
1909
- remote_ip .sa .sa_family = family ;
1910
- if (family == AF_INET )
1930
+ remote_ip .sa .sa_family = ip_tunnel_info_af ( info ) ;
1931
+ if (remote_ip . sa . sa_family == AF_INET )
1911
1932
remote_ip .sin .sin_addr .s_addr = info -> key .u .ipv4 .dst ;
1912
1933
else
1913
1934
remote_ip .sin6 .sin6_addr = info -> key .u .ipv6 .dst ;
@@ -1952,6 +1973,10 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
1952
1973
}
1953
1974
1954
1975
if (dst -> sa .sa_family == AF_INET ) {
1976
+ if (!vxlan -> vn4_sock )
1977
+ goto drop ;
1978
+ sk = vxlan -> vn4_sock -> sock -> sk ;
1979
+
1955
1980
if (info && (info -> key .tun_flags & TUNNEL_DONT_FRAGMENT ))
1956
1981
df = htons (IP_DF );
1957
1982
@@ -2013,6 +2038,10 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
2013
2038
struct flowi6 fl6 ;
2014
2039
u32 rt6i_flags ;
2015
2040
2041
+ if (!vxlan -> vn6_sock )
2042
+ goto drop ;
2043
+ sk = vxlan -> vn6_sock -> sock -> sk ;
2044
+
2016
2045
memset (& fl6 , 0 , sizeof (fl6 ));
2017
2046
fl6 .flowi6_oif = rdst ? rdst -> remote_ifindex : 0 ;
2018
2047
fl6 .daddr = dst -> sin6 .sin6_addr ;
@@ -2204,7 +2233,6 @@ static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan)
2204
2233
struct vxlan_net * vn = net_generic (vxlan -> net , vxlan_net_id );
2205
2234
__u32 vni = vxlan -> default_dst .remote_vni ;
2206
2235
2207
- vxlan -> vn_sock = vs ;
2208
2236
spin_lock (& vn -> sock_lock );
2209
2237
hlist_add_head_rcu (& vxlan -> hlist , vni_head (vs , vni ));
2210
2238
spin_unlock (& vn -> sock_lock );
@@ -2244,22 +2272,18 @@ static void vxlan_uninit(struct net_device *dev)
2244
2272
static int vxlan_open (struct net_device * dev )
2245
2273
{
2246
2274
struct vxlan_dev * vxlan = netdev_priv (dev );
2247
- struct vxlan_sock * vs ;
2248
- int ret = 0 ;
2275
+ int ret ;
2249
2276
2250
- vs = vxlan_sock_add (vxlan -> net , vxlan -> cfg .dst_port ,
2251
- vxlan -> cfg .no_share , vxlan -> flags );
2252
- if (IS_ERR (vs ))
2253
- return PTR_ERR (vs );
2254
-
2255
- vxlan_vs_add_dev (vs , vxlan );
2277
+ ret = vxlan_sock_add (vxlan );
2278
+ if (ret < 0 )
2279
+ return ret ;
2256
2280
2257
2281
if (vxlan_addr_multicast (& vxlan -> default_dst .remote_ip )) {
2258
2282
ret = vxlan_igmp_join (vxlan );
2259
2283
if (ret == - EADDRINUSE )
2260
2284
ret = 0 ;
2261
2285
if (ret ) {
2262
- vxlan_sock_release (vs );
2286
+ vxlan_sock_release (vxlan );
2263
2287
return ret ;
2264
2288
}
2265
2289
}
@@ -2294,7 +2318,6 @@ static int vxlan_stop(struct net_device *dev)
2294
2318
{
2295
2319
struct vxlan_dev * vxlan = netdev_priv (dev );
2296
2320
struct vxlan_net * vn = net_generic (vxlan -> net , vxlan_net_id );
2297
- struct vxlan_sock * vs = vxlan -> vn_sock ;
2298
2321
int ret = 0 ;
2299
2322
2300
2323
if (vxlan_addr_multicast (& vxlan -> default_dst .remote_ip ) &&
@@ -2304,7 +2327,7 @@ static int vxlan_stop(struct net_device *dev)
2304
2327
del_timer_sync (& vxlan -> age_timer );
2305
2328
2306
2329
vxlan_flush (vxlan );
2307
- vxlan_sock_release (vs );
2330
+ vxlan_sock_release (vxlan );
2308
2331
2309
2332
return ret ;
2310
2333
}
@@ -2540,14 +2563,13 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
2540
2563
}
2541
2564
2542
2565
/* Create new listen socket if needed */
2543
- static struct vxlan_sock * vxlan_socket_create (struct net * net , __be16 port ,
2544
- u32 flags )
2566
+ static struct vxlan_sock * vxlan_socket_create (struct net * net , bool ipv6 ,
2567
+ __be16 port , u32 flags )
2545
2568
{
2546
2569
struct vxlan_net * vn = net_generic (net , vxlan_net_id );
2547
2570
struct vxlan_sock * vs ;
2548
2571
struct socket * sock ;
2549
2572
unsigned int h ;
2550
- bool ipv6 = !!(flags & VXLAN_F_IPV6 );
2551
2573
struct udp_tunnel_sock_cfg tunnel_cfg ;
2552
2574
2553
2575
vs = kzalloc (sizeof (* vs ), GFP_KERNEL );
@@ -2592,27 +2614,53 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
2592
2614
return vs ;
2593
2615
}
2594
2616
2595
- static struct vxlan_sock * vxlan_sock_add (struct net * net , __be16 port ,
2596
- bool no_share , u32 flags )
2617
+ static int __vxlan_sock_add (struct vxlan_dev * vxlan , bool ipv6 )
2597
2618
{
2598
- struct vxlan_net * vn = net_generic (net , vxlan_net_id );
2599
- struct vxlan_sock * vs ;
2600
- bool ipv6 = flags & VXLAN_F_IPV6 ;
2619
+ struct vxlan_net * vn = net_generic (vxlan -> net , vxlan_net_id );
2620
+ struct vxlan_sock * vs = NULL ;
2601
2621
2602
- if (!no_share ) {
2622
+ if (!vxlan -> cfg . no_share ) {
2603
2623
spin_lock (& vn -> sock_lock );
2604
- vs = vxlan_find_sock (net , ipv6 ? AF_INET6 : AF_INET , port ,
2605
- flags );
2606
- if (vs ) {
2607
- if (!atomic_add_unless (& vs -> refcnt , 1 , 0 ))
2608
- vs = ERR_PTR (- EBUSY );
2624
+ vs = vxlan_find_sock (vxlan -> net , ipv6 ? AF_INET6 : AF_INET ,
2625
+ vxlan -> cfg .dst_port , vxlan -> flags );
2626
+ if (vs && !atomic_add_unless (& vs -> refcnt , 1 , 0 )) {
2609
2627
spin_unlock (& vn -> sock_lock );
2610
- return vs ;
2628
+ return - EBUSY ;
2611
2629
}
2612
2630
spin_unlock (& vn -> sock_lock );
2613
2631
}
2632
+ if (!vs )
2633
+ vs = vxlan_socket_create (vxlan -> net , ipv6 ,
2634
+ vxlan -> cfg .dst_port , vxlan -> flags );
2635
+ if (IS_ERR (vs ))
2636
+ return PTR_ERR (vs );
2637
+ #if IS_ENABLED (CONFIG_IPV6 )
2638
+ if (ipv6 )
2639
+ vxlan -> vn6_sock = vs ;
2640
+ else
2641
+ #endif
2642
+ vxlan -> vn4_sock = vs ;
2643
+ vxlan_vs_add_dev (vs , vxlan );
2644
+ return 0 ;
2645
+ }
2614
2646
2615
- return vxlan_socket_create (net , port , flags );
2647
+ static int vxlan_sock_add (struct vxlan_dev * vxlan )
2648
+ {
2649
+ bool ipv6 = vxlan -> flags & VXLAN_F_IPV6 ;
2650
+ bool metadata = vxlan -> flags & VXLAN_F_COLLECT_METADATA ;
2651
+ int ret = 0 ;
2652
+
2653
+ vxlan -> vn4_sock = NULL ;
2654
+ #if IS_ENABLED (CONFIG_IPV6 )
2655
+ vxlan -> vn6_sock = NULL ;
2656
+ if (ipv6 || metadata )
2657
+ ret = __vxlan_sock_add (vxlan , true);
2658
+ #endif
2659
+ if (!ret && (!ipv6 || metadata ))
2660
+ ret = __vxlan_sock_add (vxlan , false);
2661
+ if (ret < 0 )
2662
+ vxlan_sock_release (vxlan );
2663
+ return ret ;
2616
2664
}
2617
2665
2618
2666
static int vxlan_dev_configure (struct net * src_net , struct net_device * dev ,
@@ -2621,6 +2669,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
2621
2669
struct vxlan_net * vn = net_generic (src_net , vxlan_net_id );
2622
2670
struct vxlan_dev * vxlan = netdev_priv (dev );
2623
2671
struct vxlan_rdst * dst = & vxlan -> default_dst ;
2672
+ unsigned short needed_headroom = ETH_HLEN ;
2624
2673
int err ;
2625
2674
bool use_ipv6 = false;
2626
2675
__be16 default_port = vxlan -> cfg .dst_port ;
@@ -2640,6 +2689,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
2640
2689
if (!IS_ENABLED (CONFIG_IPV6 ))
2641
2690
return - EPFNOSUPPORT ;
2642
2691
use_ipv6 = true;
2692
+ vxlan -> flags |= VXLAN_F_IPV6 ;
2643
2693
}
2644
2694
2645
2695
if (conf -> remote_ifindex ) {
@@ -2660,22 +2710,21 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
2660
2710
pr_info ("IPv6 is disabled via sysctl\n" );
2661
2711
return - EPERM ;
2662
2712
}
2663
- vxlan -> flags |= VXLAN_F_IPV6 ;
2664
2713
}
2665
2714
#endif
2666
2715
2667
2716
if (!conf -> mtu )
2668
2717
dev -> mtu = lowerdev -> mtu - (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM );
2669
2718
2670
- dev -> needed_headroom = lowerdev -> hard_header_len +
2671
- (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM );
2672
- } else if (use_ipv6 ) {
2673
- vxlan -> flags |= VXLAN_F_IPV6 ;
2674
- dev -> needed_headroom = ETH_HLEN + VXLAN6_HEADROOM ;
2675
- } else {
2676
- dev -> needed_headroom = ETH_HLEN + VXLAN_HEADROOM ;
2719
+ needed_headroom = lowerdev -> hard_header_len ;
2677
2720
}
2678
2721
2722
+ if (use_ipv6 || conf -> flags & VXLAN_F_COLLECT_METADATA )
2723
+ needed_headroom += VXLAN6_HEADROOM ;
2724
+ else
2725
+ needed_headroom += VXLAN_HEADROOM ;
2726
+ dev -> needed_headroom = needed_headroom ;
2727
+
2679
2728
memcpy (& vxlan -> cfg , conf , sizeof (* conf ));
2680
2729
if (!vxlan -> cfg .dst_port )
2681
2730
vxlan -> cfg .dst_port = default_port ;
0 commit comments