Skip to content

Commit 614732e

Browse files
tgrafdavem330
authored andcommitted
openvswitch: Use regular VXLAN net_device device
This gets rid of all OVS specific VXLAN code in the receive and transmit path by using a VXLAN net_device to represent the vport. Only a small shim layer remains which takes care of handling the VXLAN specific OVS Netlink configuration. Unexports vxlan_sock_add(), vxlan_sock_release(), vxlan_xmit_skb() since they are no longer needed. Signed-off-by: Thomas Graf <[email protected]> Signed-off-by: Pravin B Shelar <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent c9db965 commit 614732e

File tree

10 files changed

+339
-507
lines changed

10 files changed

+339
-507
lines changed

drivers/net/vxlan.c

Lines changed: 116 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,9 @@ static struct rtnl_link_ops vxlan_link_ops;
7575

7676
static const u8 all_zeros_mac[ETH_ALEN];
7777

78+
static struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
79+
bool no_share, u32 flags);
80+
7881
/* per-network namespace private data for this module */
7982
struct vxlan_net {
8083
struct list_head vxlan_list;
@@ -1027,7 +1030,7 @@ static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev)
10271030
return false;
10281031
}
10291032

1030-
void vxlan_sock_release(struct vxlan_sock *vs)
1033+
static void vxlan_sock_release(struct vxlan_sock *vs)
10311034
{
10321035
struct sock *sk = vs->sock->sk;
10331036
struct net *net = sock_net(sk);
@@ -1043,7 +1046,6 @@ void vxlan_sock_release(struct vxlan_sock *vs)
10431046

10441047
queue_work(vxlan_wq, &vs->del_work);
10451048
}
1046-
EXPORT_SYMBOL_GPL(vxlan_sock_release);
10471049

10481050
/* Update multicast group membership when first VNI on
10491051
* multicast address is brought up
@@ -1126,6 +1128,102 @@ static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh,
11261128
return vh;
11271129
}
11281130

1131+
static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
1132+
struct vxlan_metadata *md, u32 vni,
1133+
struct metadata_dst *tun_dst)
1134+
{
1135+
struct iphdr *oip = NULL;
1136+
struct ipv6hdr *oip6 = NULL;
1137+
struct vxlan_dev *vxlan;
1138+
struct pcpu_sw_netstats *stats;
1139+
union vxlan_addr saddr;
1140+
int err = 0;
1141+
union vxlan_addr *remote_ip;
1142+
1143+
/* For flow based devices, map all packets to VNI 0 */
1144+
if (vs->flags & VXLAN_F_FLOW_BASED)
1145+
vni = 0;
1146+
1147+
/* Is this VNI defined? */
1148+
vxlan = vxlan_vs_find_vni(vs, vni);
1149+
if (!vxlan)
1150+
goto drop;
1151+
1152+
remote_ip = &vxlan->default_dst.remote_ip;
1153+
skb_reset_mac_header(skb);
1154+
skb_scrub_packet(skb, !net_eq(vxlan->net, dev_net(vxlan->dev)));
1155+
skb->protocol = eth_type_trans(skb, vxlan->dev);
1156+
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
1157+
1158+
/* Ignore packet loops (and multicast echo) */
1159+
if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr))
1160+
goto drop;
1161+
1162+
/* Re-examine inner Ethernet packet */
1163+
if (remote_ip->sa.sa_family == AF_INET) {
1164+
oip = ip_hdr(skb);
1165+
saddr.sin.sin_addr.s_addr = oip->saddr;
1166+
saddr.sa.sa_family = AF_INET;
1167+
#if IS_ENABLED(CONFIG_IPV6)
1168+
} else {
1169+
oip6 = ipv6_hdr(skb);
1170+
saddr.sin6.sin6_addr = oip6->saddr;
1171+
saddr.sa.sa_family = AF_INET6;
1172+
#endif
1173+
}
1174+
1175+
if (tun_dst) {
1176+
skb_dst_set(skb, (struct dst_entry *)tun_dst);
1177+
tun_dst = NULL;
1178+
}
1179+
1180+
if ((vxlan->flags & VXLAN_F_LEARN) &&
1181+
vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source))
1182+
goto drop;
1183+
1184+
skb_reset_network_header(skb);
1185+
/* In flow-based mode, GBP is carried in dst_metadata */
1186+
if (!(vs->flags & VXLAN_F_FLOW_BASED))
1187+
skb->mark = md->gbp;
1188+
1189+
if (oip6)
1190+
err = IP6_ECN_decapsulate(oip6, skb);
1191+
if (oip)
1192+
err = IP_ECN_decapsulate(oip, skb);
1193+
1194+
if (unlikely(err)) {
1195+
if (log_ecn_error) {
1196+
if (oip6)
1197+
net_info_ratelimited("non-ECT from %pI6\n",
1198+
&oip6->saddr);
1199+
if (oip)
1200+
net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
1201+
&oip->saddr, oip->tos);
1202+
}
1203+
if (err > 1) {
1204+
++vxlan->dev->stats.rx_frame_errors;
1205+
++vxlan->dev->stats.rx_errors;
1206+
goto drop;
1207+
}
1208+
}
1209+
1210+
stats = this_cpu_ptr(vxlan->dev->tstats);
1211+
u64_stats_update_begin(&stats->syncp);
1212+
stats->rx_packets++;
1213+
stats->rx_bytes += skb->len;
1214+
u64_stats_update_end(&stats->syncp);
1215+
1216+
netif_rx(skb);
1217+
1218+
return;
1219+
drop:
1220+
if (tun_dst)
1221+
dst_release((struct dst_entry *)tun_dst);
1222+
1223+
/* Consume bad packet */
1224+
kfree_skb(skb);
1225+
}
1226+
11291227
/* Callback from net/ipv4/udp.c to receive packets */
11301228
static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
11311229
{
@@ -1192,7 +1290,6 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
11921290
info->key.tun_flags |= TUNNEL_CSUM;
11931291

11941292
md = ip_tunnel_info_opts(info, sizeof(*md));
1195-
md->tun_dst = tun_dst;
11961293
} else {
11971294
memset(md, 0, sizeof(*md));
11981295
}
@@ -1231,8 +1328,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
12311328
goto bad_flags;
12321329
}
12331330

1234-
md->vni = vxh->vx_vni;
1235-
vs->rcv(vs, skb, md);
1331+
vxlan_rcv(vs, skb, md, vni >> 8, tun_dst);
12361332
return 0;
12371333

12381334
drop:
@@ -1252,104 +1348,6 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
12521348
return 1;
12531349
}
12541350

1255-
static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
1256-
struct vxlan_metadata *md)
1257-
{
1258-
struct iphdr *oip = NULL;
1259-
struct ipv6hdr *oip6 = NULL;
1260-
struct vxlan_dev *vxlan;
1261-
struct pcpu_sw_netstats *stats;
1262-
union vxlan_addr saddr;
1263-
__u32 vni;
1264-
int err = 0;
1265-
union vxlan_addr *remote_ip;
1266-
1267-
/* For flow based devices, map all packets to VNI 0 */
1268-
if (vs->flags & VXLAN_F_FLOW_BASED)
1269-
vni = 0;
1270-
else
1271-
vni = ntohl(md->vni) >> 8;
1272-
1273-
/* Is this VNI defined? */
1274-
vxlan = vxlan_vs_find_vni(vs, vni);
1275-
if (!vxlan)
1276-
goto drop;
1277-
1278-
remote_ip = &vxlan->default_dst.remote_ip;
1279-
skb_reset_mac_header(skb);
1280-
skb_scrub_packet(skb, !net_eq(vxlan->net, dev_net(vxlan->dev)));
1281-
skb->protocol = eth_type_trans(skb, vxlan->dev);
1282-
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
1283-
1284-
/* Ignore packet loops (and multicast echo) */
1285-
if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr))
1286-
goto drop;
1287-
1288-
/* Re-examine inner Ethernet packet */
1289-
if (remote_ip->sa.sa_family == AF_INET) {
1290-
oip = ip_hdr(skb);
1291-
saddr.sin.sin_addr.s_addr = oip->saddr;
1292-
saddr.sa.sa_family = AF_INET;
1293-
#if IS_ENABLED(CONFIG_IPV6)
1294-
} else {
1295-
oip6 = ipv6_hdr(skb);
1296-
saddr.sin6.sin6_addr = oip6->saddr;
1297-
saddr.sa.sa_family = AF_INET6;
1298-
#endif
1299-
}
1300-
1301-
if (md->tun_dst) {
1302-
skb_dst_set(skb, (struct dst_entry *)md->tun_dst);
1303-
md->tun_dst = NULL;
1304-
}
1305-
1306-
if ((vxlan->flags & VXLAN_F_LEARN) &&
1307-
vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source))
1308-
goto drop;
1309-
1310-
skb_reset_network_header(skb);
1311-
/* In flow-based mode, GBP is carried in dst_metadata */
1312-
if (!(vs->flags & VXLAN_F_FLOW_BASED))
1313-
skb->mark = md->gbp;
1314-
1315-
if (oip6)
1316-
err = IP6_ECN_decapsulate(oip6, skb);
1317-
if (oip)
1318-
err = IP_ECN_decapsulate(oip, skb);
1319-
1320-
if (unlikely(err)) {
1321-
if (log_ecn_error) {
1322-
if (oip6)
1323-
net_info_ratelimited("non-ECT from %pI6\n",
1324-
&oip6->saddr);
1325-
if (oip)
1326-
net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
1327-
&oip->saddr, oip->tos);
1328-
}
1329-
if (err > 1) {
1330-
++vxlan->dev->stats.rx_frame_errors;
1331-
++vxlan->dev->stats.rx_errors;
1332-
goto drop;
1333-
}
1334-
}
1335-
1336-
stats = this_cpu_ptr(vxlan->dev->tstats);
1337-
u64_stats_update_begin(&stats->syncp);
1338-
stats->rx_packets++;
1339-
stats->rx_bytes += skb->len;
1340-
u64_stats_update_end(&stats->syncp);
1341-
1342-
netif_rx(skb);
1343-
1344-
return;
1345-
drop:
1346-
if (md->tun_dst)
1347-
dst_release((struct dst_entry *)md->tun_dst);
1348-
1349-
/* Consume bad packet */
1350-
kfree_skb(skb);
1351-
}
1352-
13531351
static int arp_reduce(struct net_device *dev, struct sk_buff *skb)
13541352
{
13551353
struct vxlan_dev *vxlan = netdev_priv(dev);
@@ -1688,7 +1686,7 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk,
16881686
struct sk_buff *skb,
16891687
struct net_device *dev, struct in6_addr *saddr,
16901688
struct in6_addr *daddr, __u8 prio, __u8 ttl,
1691-
__be16 src_port, __be16 dst_port,
1689+
__be16 src_port, __be16 dst_port, __u32 vni,
16921690
struct vxlan_metadata *md, bool xnet, u32 vxflags)
16931691
{
16941692
struct vxlanhdr *vxh;
@@ -1738,7 +1736,7 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk,
17381736

17391737
vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
17401738
vxh->vx_flags = htonl(VXLAN_HF_VNI);
1741-
vxh->vx_vni = md->vni;
1739+
vxh->vx_vni = vni;
17421740

17431741
if (type & SKB_GSO_TUNNEL_REMCSUM) {
17441742
u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
@@ -1771,10 +1769,10 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk,
17711769
}
17721770
#endif
17731771

1774-
int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
1775-
__be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
1776-
__be16 src_port, __be16 dst_port,
1777-
struct vxlan_metadata *md, bool xnet, u32 vxflags)
1772+
static int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
1773+
__be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
1774+
__be16 src_port, __be16 dst_port, __u32 vni,
1775+
struct vxlan_metadata *md, bool xnet, u32 vxflags)
17781776
{
17791777
struct vxlanhdr *vxh;
17801778
int min_headroom;
@@ -1817,7 +1815,7 @@ int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
18171815

18181816
vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
18191817
vxh->vx_flags = htonl(VXLAN_HF_VNI);
1820-
vxh->vx_vni = md->vni;
1818+
vxh->vx_vni = vni;
18211819

18221820
if (type & SKB_GSO_TUNNEL_REMCSUM) {
18231821
u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
@@ -1844,7 +1842,6 @@ int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
18441842
ttl, df, src_port, dst_port, xnet,
18451843
!(vxflags & VXLAN_F_UDP_CSUM));
18461844
}
1847-
EXPORT_SYMBOL_GPL(vxlan_xmit_skb);
18481845

18491846
/* Bypass encapsulation if the destination is local */
18501847
static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
@@ -2012,10 +2009,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
20122009

20132010
tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
20142011
ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
2015-
md->vni = htonl(vni << 8);
20162012
err = vxlan_xmit_skb(rt, sk, skb, fl4.saddr,
20172013
dst->sin.sin_addr.s_addr, tos, ttl, df,
2018-
src_port, dst_port, md,
2014+
src_port, dst_port, htonl(vni << 8), md,
20192015
!net_eq(vxlan->net, dev_net(vxlan->dev)),
20202016
flags);
20212017
if (err < 0) {
@@ -2070,11 +2066,10 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
20702066
}
20712067

20722068
ttl = ttl ? : ip6_dst_hoplimit(ndst);
2073-
md->vni = htonl(vni << 8);
20742069
md->gbp = skb->mark;
20752070

20762071
err = vxlan6_xmit_skb(ndst, sk, skb, dev, &fl6.saddr, &fl6.daddr,
2077-
0, ttl, src_port, dst_port, md,
2072+
0, ttl, src_port, dst_port, htonl(vni << 8), md,
20782073
!net_eq(vxlan->net, dev_net(vxlan->dev)),
20792074
vxlan->flags);
20802075
#endif
@@ -2269,8 +2264,8 @@ static int vxlan_open(struct net_device *dev)
22692264
struct vxlan_sock *vs;
22702265
int ret = 0;
22712266

2272-
vs = vxlan_sock_add(vxlan->net, vxlan->cfg.dst_port, vxlan_rcv,
2273-
NULL, vxlan->cfg.no_share, vxlan->flags);
2267+
vs = vxlan_sock_add(vxlan->net, vxlan->cfg.dst_port,
2268+
vxlan->cfg.no_share, vxlan->flags);
22742269
if (IS_ERR(vs))
22752270
return PTR_ERR(vs);
22762271

@@ -2563,7 +2558,6 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
25632558

25642559
/* Create new listen socket if needed */
25652560
static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
2566-
vxlan_rcv_t *rcv, void *data,
25672561
u32 flags)
25682562
{
25692563
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
@@ -2592,8 +2586,6 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
25922586

25932587
vs->sock = sock;
25942588
atomic_set(&vs->refcnt, 1);
2595-
vs->rcv = rcv;
2596-
vs->data = data;
25972589
vs->flags = (flags & VXLAN_F_RCV_FLAGS);
25982590

25992591
/* Initialize the vxlan udp offloads structure */
@@ -2617,9 +2609,8 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
26172609
return vs;
26182610
}
26192611

2620-
struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
2621-
vxlan_rcv_t *rcv, void *data,
2622-
bool no_share, u32 flags)
2612+
static struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
2613+
bool no_share, u32 flags)
26232614
{
26242615
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
26252616
struct vxlan_sock *vs;
@@ -2629,7 +2620,7 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
26292620
spin_lock(&vn->sock_lock);
26302621
vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port,
26312622
flags);
2632-
if (vs && vs->rcv == rcv) {
2623+
if (vs) {
26332624
if (!atomic_add_unless(&vs->refcnt, 1, 0))
26342625
vs = ERR_PTR(-EBUSY);
26352626
spin_unlock(&vn->sock_lock);
@@ -2638,9 +2629,8 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
26382629
spin_unlock(&vn->sock_lock);
26392630
}
26402631

2641-
return vxlan_socket_create(net, port, rcv, data, flags);
2632+
return vxlan_socket_create(net, port, flags);
26422633
}
2643-
EXPORT_SYMBOL_GPL(vxlan_sock_add);
26442634

26452635
static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
26462636
struct vxlan_config *conf)

include/net/rtnetlink.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname,
141141
unsigned char name_assign_type,
142142
const struct rtnl_link_ops *ops,
143143
struct nlattr *tb[]);
144+
int rtnl_delete_link(struct net_device *dev);
144145
int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm);
145146

146147
int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len);

0 commit comments

Comments
 (0)