Skip to content

Commit 8f35043

Browse files
committed
Merge branch 'vxlan-ipv4-ipv6'
Jiri Benc says: ==================== vxlan: support both IPv4 and IPv6 sockets Note: this needs net merged into net-next in order to apply. It's currently not easy enough to work with metadata based vxlan tunnels. In particular, it's necessary to create separate network interfaces for IPv4 and IPv6 tunneling. Assigning an IPv6 address to an IPv4 interface is allowed yet won't do what's expected. With route based tunneling, one has to pay attention to use the vxlan interface opened with the correct family. Other users of this (openvswitch) would need to always create two vxlan interfaces. Furthermore, there's no sane API for creating an IPv6 vxlan metadata based interface. This patchset simplifies this by opening both IPv4 and IPv6 socket if the vxlan interface has the metadata flag (IFLA_VXLAN_COLLECT_METADATA) set. Assignment of addresses etc. works as expected after this. ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 8b7a704 + b1be00a commit 8f35043

File tree

3 files changed

+121
-63
lines changed

3 files changed

+121
-63
lines changed

drivers/net/vxlan.c

Lines changed: 108 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,7 @@ static struct rtnl_link_ops vxlan_link_ops;
7575

7676
static const u8 all_zeros_mac[ETH_ALEN];
7777

78-
static struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
79-
bool no_share, u32 flags);
78+
static int vxlan_sock_add(struct vxlan_dev *vxlan);
8079

8180
/* per-network namespace private data for this module */
8281
struct vxlan_net {
@@ -994,19 +993,30 @@ static bool vxlan_snoop(struct net_device *dev,
994993
static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev)
995994
{
996995
struct vxlan_dev *vxlan;
996+
unsigned short family = dev->default_dst.remote_ip.sa.sa_family;
997997

998998
/* The vxlan_sock is only used by dev, leaving group has
999999
* no effect on other vxlan devices.
10001000
*/
1001-
if (atomic_read(&dev->vn_sock->refcnt) == 1)
1001+
if (family == AF_INET && dev->vn4_sock &&
1002+
atomic_read(&dev->vn4_sock->refcnt) == 1)
10021003
return false;
1004+
#if IS_ENABLED(CONFIG_IPV6)
1005+
if (family == AF_INET6 && dev->vn6_sock &&
1006+
atomic_read(&dev->vn6_sock->refcnt) == 1)
1007+
return false;
1008+
#endif
10031009

10041010
list_for_each_entry(vxlan, &vn->vxlan_list, next) {
10051011
if (!netif_running(vxlan->dev) || vxlan == dev)
10061012
continue;
10071013

1008-
if (vxlan->vn_sock != dev->vn_sock)
1014+
if (family == AF_INET && vxlan->vn4_sock != dev->vn4_sock)
10091015
continue;
1016+
#if IS_ENABLED(CONFIG_IPV6)
1017+
if (family == AF_INET6 && vxlan->vn6_sock != dev->vn6_sock)
1018+
continue;
1019+
#endif
10101020

10111021
if (!vxlan_addr_equal(&vxlan->default_dst.remote_ip,
10121022
&dev->default_dst.remote_ip))
@@ -1022,15 +1032,16 @@ static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev)
10221032
return false;
10231033
}
10241034

1025-
static void vxlan_sock_release(struct vxlan_sock *vs)
1035+
static void __vxlan_sock_release(struct vxlan_sock *vs)
10261036
{
1027-
struct sock *sk = vs->sock->sk;
1028-
struct net *net = sock_net(sk);
1029-
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
1037+
struct vxlan_net *vn;
10301038

1039+
if (!vs)
1040+
return;
10311041
if (!atomic_dec_and_test(&vs->refcnt))
10321042
return;
10331043

1044+
vn = net_generic(sock_net(vs->sock->sk), vxlan_net_id);
10341045
spin_lock(&vn->sock_lock);
10351046
hlist_del_rcu(&vs->hlist);
10361047
vxlan_notify_del_rx_port(vs);
@@ -1039,60 +1050,74 @@ static void vxlan_sock_release(struct vxlan_sock *vs)
10391050
queue_work(vxlan_wq, &vs->del_work);
10401051
}
10411052

1053+
static void vxlan_sock_release(struct vxlan_dev *vxlan)
1054+
{
1055+
__vxlan_sock_release(vxlan->vn4_sock);
1056+
#if IS_ENABLED(CONFIG_IPV6)
1057+
__vxlan_sock_release(vxlan->vn6_sock);
1058+
#endif
1059+
}
1060+
10421061
/* Update multicast group membership when first VNI on
10431062
* multicast address is brought up
10441063
*/
10451064
static int vxlan_igmp_join(struct vxlan_dev *vxlan)
10461065
{
1047-
struct vxlan_sock *vs = vxlan->vn_sock;
1048-
struct sock *sk = vs->sock->sk;
1066+
struct sock *sk;
10491067
union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
10501068
int ifindex = vxlan->default_dst.remote_ifindex;
10511069
int ret = -EINVAL;
10521070

1053-
lock_sock(sk);
10541071
if (ip->sa.sa_family == AF_INET) {
10551072
struct ip_mreqn mreq = {
10561073
.imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr,
10571074
.imr_ifindex = ifindex,
10581075
};
10591076

1077+
sk = vxlan->vn4_sock->sock->sk;
1078+
lock_sock(sk);
10601079
ret = ip_mc_join_group(sk, &mreq);
1080+
release_sock(sk);
10611081
#if IS_ENABLED(CONFIG_IPV6)
10621082
} else {
1083+
sk = vxlan->vn6_sock->sock->sk;
1084+
lock_sock(sk);
10631085
ret = ipv6_stub->ipv6_sock_mc_join(sk, ifindex,
10641086
&ip->sin6.sin6_addr);
1087+
release_sock(sk);
10651088
#endif
10661089
}
1067-
release_sock(sk);
10681090

10691091
return ret;
10701092
}
10711093

10721094
/* Inverse of vxlan_igmp_join when last VNI is brought down */
10731095
static int vxlan_igmp_leave(struct vxlan_dev *vxlan)
10741096
{
1075-
struct vxlan_sock *vs = vxlan->vn_sock;
1076-
struct sock *sk = vs->sock->sk;
1097+
struct sock *sk;
10771098
union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
10781099
int ifindex = vxlan->default_dst.remote_ifindex;
10791100
int ret = -EINVAL;
10801101

1081-
lock_sock(sk);
10821102
if (ip->sa.sa_family == AF_INET) {
10831103
struct ip_mreqn mreq = {
10841104
.imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr,
10851105
.imr_ifindex = ifindex,
10861106
};
10871107

1108+
sk = vxlan->vn4_sock->sock->sk;
1109+
lock_sock(sk);
10881110
ret = ip_mc_leave_group(sk, &mreq);
1111+
release_sock(sk);
10891112
#if IS_ENABLED(CONFIG_IPV6)
10901113
} else {
1114+
sk = vxlan->vn6_sock->sock->sk;
1115+
lock_sock(sk);
10911116
ret = ipv6_stub->ipv6_sock_mc_drop(sk, ifindex,
10921117
&ip->sin6.sin6_addr);
1118+
release_sock(sk);
10931119
#endif
10941120
}
1095-
release_sock(sk);
10961121

10971122
return ret;
10981123
}
@@ -1873,8 +1898,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
18731898
{
18741899
struct ip_tunnel_info *info;
18751900
struct vxlan_dev *vxlan = netdev_priv(dev);
1876-
struct sock *sk = vxlan->vn_sock->sock->sk;
1877-
unsigned short family = vxlan_get_sk_family(vxlan->vn_sock);
1901+
struct sock *sk;
18781902
struct rtable *rt = NULL;
18791903
const struct iphdr *old_iph;
18801904
struct flowi4 fl4;
@@ -1901,13 +1925,10 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
19011925
dev->name);
19021926
goto drop;
19031927
}
1904-
if (family != ip_tunnel_info_af(info))
1905-
goto drop;
1906-
19071928
dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
19081929
vni = be64_to_cpu(info->key.tun_id);
1909-
remote_ip.sa.sa_family = family;
1910-
if (family == AF_INET)
1930+
remote_ip.sa.sa_family = ip_tunnel_info_af(info);
1931+
if (remote_ip.sa.sa_family == AF_INET)
19111932
remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst;
19121933
else
19131934
remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst;
@@ -1952,6 +1973,10 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
19521973
}
19531974

19541975
if (dst->sa.sa_family == AF_INET) {
1976+
if (!vxlan->vn4_sock)
1977+
goto drop;
1978+
sk = vxlan->vn4_sock->sock->sk;
1979+
19551980
if (info && (info->key.tun_flags & TUNNEL_DONT_FRAGMENT))
19561981
df = htons(IP_DF);
19571982

@@ -2013,6 +2038,10 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
20132038
struct flowi6 fl6;
20142039
u32 rt6i_flags;
20152040

2041+
if (!vxlan->vn6_sock)
2042+
goto drop;
2043+
sk = vxlan->vn6_sock->sock->sk;
2044+
20162045
memset(&fl6, 0, sizeof(fl6));
20172046
fl6.flowi6_oif = rdst ? rdst->remote_ifindex : 0;
20182047
fl6.daddr = dst->sin6.sin6_addr;
@@ -2204,7 +2233,6 @@ static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan)
22042233
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
22052234
__u32 vni = vxlan->default_dst.remote_vni;
22062235

2207-
vxlan->vn_sock = vs;
22082236
spin_lock(&vn->sock_lock);
22092237
hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
22102238
spin_unlock(&vn->sock_lock);
@@ -2244,22 +2272,18 @@ static void vxlan_uninit(struct net_device *dev)
22442272
static int vxlan_open(struct net_device *dev)
22452273
{
22462274
struct vxlan_dev *vxlan = netdev_priv(dev);
2247-
struct vxlan_sock *vs;
2248-
int ret = 0;
2275+
int ret;
22492276

2250-
vs = vxlan_sock_add(vxlan->net, vxlan->cfg.dst_port,
2251-
vxlan->cfg.no_share, vxlan->flags);
2252-
if (IS_ERR(vs))
2253-
return PTR_ERR(vs);
2254-
2255-
vxlan_vs_add_dev(vs, vxlan);
2277+
ret = vxlan_sock_add(vxlan);
2278+
if (ret < 0)
2279+
return ret;
22562280

22572281
if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip)) {
22582282
ret = vxlan_igmp_join(vxlan);
22592283
if (ret == -EADDRINUSE)
22602284
ret = 0;
22612285
if (ret) {
2262-
vxlan_sock_release(vs);
2286+
vxlan_sock_release(vxlan);
22632287
return ret;
22642288
}
22652289
}
@@ -2294,7 +2318,6 @@ static int vxlan_stop(struct net_device *dev)
22942318
{
22952319
struct vxlan_dev *vxlan = netdev_priv(dev);
22962320
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
2297-
struct vxlan_sock *vs = vxlan->vn_sock;
22982321
int ret = 0;
22992322

23002323
if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip) &&
@@ -2304,7 +2327,7 @@ static int vxlan_stop(struct net_device *dev)
23042327
del_timer_sync(&vxlan->age_timer);
23052328

23062329
vxlan_flush(vxlan);
2307-
vxlan_sock_release(vs);
2330+
vxlan_sock_release(vxlan);
23082331

23092332
return ret;
23102333
}
@@ -2540,14 +2563,13 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
25402563
}
25412564

25422565
/* Create new listen socket if needed */
2543-
static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
2544-
u32 flags)
2566+
static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
2567+
__be16 port, u32 flags)
25452568
{
25462569
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
25472570
struct vxlan_sock *vs;
25482571
struct socket *sock;
25492572
unsigned int h;
2550-
bool ipv6 = !!(flags & VXLAN_F_IPV6);
25512573
struct udp_tunnel_sock_cfg tunnel_cfg;
25522574

25532575
vs = kzalloc(sizeof(*vs), GFP_KERNEL);
@@ -2592,27 +2614,53 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
25922614
return vs;
25932615
}
25942616

2595-
static struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
2596-
bool no_share, u32 flags)
2617+
static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
25972618
{
2598-
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
2599-
struct vxlan_sock *vs;
2600-
bool ipv6 = flags & VXLAN_F_IPV6;
2619+
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
2620+
struct vxlan_sock *vs = NULL;
26012621

2602-
if (!no_share) {
2622+
if (!vxlan->cfg.no_share) {
26032623
spin_lock(&vn->sock_lock);
2604-
vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port,
2605-
flags);
2606-
if (vs) {
2607-
if (!atomic_add_unless(&vs->refcnt, 1, 0))
2608-
vs = ERR_PTR(-EBUSY);
2624+
vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
2625+
vxlan->cfg.dst_port, vxlan->flags);
2626+
if (vs && !atomic_add_unless(&vs->refcnt, 1, 0)) {
26092627
spin_unlock(&vn->sock_lock);
2610-
return vs;
2628+
return -EBUSY;
26112629
}
26122630
spin_unlock(&vn->sock_lock);
26132631
}
2632+
if (!vs)
2633+
vs = vxlan_socket_create(vxlan->net, ipv6,
2634+
vxlan->cfg.dst_port, vxlan->flags);
2635+
if (IS_ERR(vs))
2636+
return PTR_ERR(vs);
2637+
#if IS_ENABLED(CONFIG_IPV6)
2638+
if (ipv6)
2639+
vxlan->vn6_sock = vs;
2640+
else
2641+
#endif
2642+
vxlan->vn4_sock = vs;
2643+
vxlan_vs_add_dev(vs, vxlan);
2644+
return 0;
2645+
}
26142646

2615-
return vxlan_socket_create(net, port, flags);
2647+
static int vxlan_sock_add(struct vxlan_dev *vxlan)
2648+
{
2649+
bool ipv6 = vxlan->flags & VXLAN_F_IPV6;
2650+
bool metadata = vxlan->flags & VXLAN_F_COLLECT_METADATA;
2651+
int ret = 0;
2652+
2653+
vxlan->vn4_sock = NULL;
2654+
#if IS_ENABLED(CONFIG_IPV6)
2655+
vxlan->vn6_sock = NULL;
2656+
if (ipv6 || metadata)
2657+
ret = __vxlan_sock_add(vxlan, true);
2658+
#endif
2659+
if (!ret && (!ipv6 || metadata))
2660+
ret = __vxlan_sock_add(vxlan, false);
2661+
if (ret < 0)
2662+
vxlan_sock_release(vxlan);
2663+
return ret;
26162664
}
26172665

26182666
static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
@@ -2621,6 +2669,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
26212669
struct vxlan_net *vn = net_generic(src_net, vxlan_net_id);
26222670
struct vxlan_dev *vxlan = netdev_priv(dev);
26232671
struct vxlan_rdst *dst = &vxlan->default_dst;
2672+
unsigned short needed_headroom = ETH_HLEN;
26242673
int err;
26252674
bool use_ipv6 = false;
26262675
__be16 default_port = vxlan->cfg.dst_port;
@@ -2640,6 +2689,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
26402689
if (!IS_ENABLED(CONFIG_IPV6))
26412690
return -EPFNOSUPPORT;
26422691
use_ipv6 = true;
2692+
vxlan->flags |= VXLAN_F_IPV6;
26432693
}
26442694

26452695
if (conf->remote_ifindex) {
@@ -2660,22 +2710,21 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
26602710
pr_info("IPv6 is disabled via sysctl\n");
26612711
return -EPERM;
26622712
}
2663-
vxlan->flags |= VXLAN_F_IPV6;
26642713
}
26652714
#endif
26662715

26672716
if (!conf->mtu)
26682717
dev->mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
26692718

2670-
dev->needed_headroom = lowerdev->hard_header_len +
2671-
(use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
2672-
} else if (use_ipv6) {
2673-
vxlan->flags |= VXLAN_F_IPV6;
2674-
dev->needed_headroom = ETH_HLEN + VXLAN6_HEADROOM;
2675-
} else {
2676-
dev->needed_headroom = ETH_HLEN + VXLAN_HEADROOM;
2719+
needed_headroom = lowerdev->hard_header_len;
26772720
}
26782721

2722+
if (use_ipv6 || conf->flags & VXLAN_F_COLLECT_METADATA)
2723+
needed_headroom += VXLAN6_HEADROOM;
2724+
else
2725+
needed_headroom += VXLAN_HEADROOM;
2726+
dev->needed_headroom = needed_headroom;
2727+
26792728
memcpy(&vxlan->cfg, conf, sizeof(*conf));
26802729
if (!vxlan->cfg.dst_port)
26812730
vxlan->cfg.dst_port = default_port;

0 commit comments

Comments
 (0)