Skip to content

Commit 1902750

Browse files
committed
Merge branch 'ovs-tunnel-mtu'
David Wragg says: ==================== Set a large MTU on ovs-created tunnel devices Prior to 4.3, openvswitch tunnel vports (vxlan, gre and geneve) could transmit vxlan packets of any size, constrained only by the ability to send out the resulting packets. 4.3 introduced netdevs corresponding to tunnel vports. These netdevs have an MTU, which limits the size of a packet that can be successfully encapsulated. The default MTU values are low (1500 or less), which is awkwardly small in the context of physical networks supporting jumbo frames, and leads to a conspicuous change in behaviour for userspace. This patch series sets the MTU on openvswitch-created netdevs to be the relevant maximum (i.e. the maximum IP packet size minus any relevant overhead), effectively restoring the behaviour prior to 4.3. Where relevant, the limits on MTU values that can be directly set on the netdevs are also relaxed. Changes in v2: * Extend to all openvswitch tunnel types, i.e. gre and geneve as well * Use IP_MAX_MTU Changes in v3: * Fix block comment style ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 461547f + 7e05915 commit 1902750

File tree

6 files changed

+87
-22
lines changed

6 files changed

+87
-22
lines changed

drivers/net/geneve.c

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1039,6 +1039,17 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
10391039
return geneve_xmit_skb(skb, dev, info);
10401040
}
10411041

1042+
static int geneve_change_mtu(struct net_device *dev, int new_mtu)
1043+
{
1044+
/* GENEVE overhead is not fixed, so we can't enforce a more
1045+
* precise max MTU.
1046+
*/
1047+
if (new_mtu < 68 || new_mtu > IP_MAX_MTU)
1048+
return -EINVAL;
1049+
dev->mtu = new_mtu;
1050+
return 0;
1051+
}
1052+
10421053
static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
10431054
{
10441055
struct ip_tunnel_info *info = skb_tunnel_info(skb);
@@ -1083,7 +1094,7 @@ static const struct net_device_ops geneve_netdev_ops = {
10831094
.ndo_stop = geneve_stop,
10841095
.ndo_start_xmit = geneve_xmit,
10851096
.ndo_get_stats64 = ip_tunnel_get_stats64,
1086-
.ndo_change_mtu = eth_change_mtu,
1097+
.ndo_change_mtu = geneve_change_mtu,
10871098
.ndo_validate_addr = eth_validate_addr,
10881099
.ndo_set_mac_address = eth_mac_addr,
10891100
.ndo_fill_metadata_dst = geneve_fill_metadata_dst,
@@ -1442,11 +1453,21 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
14421453

14431454
err = geneve_configure(net, dev, &geneve_remote_unspec,
14441455
0, 0, 0, htons(dst_port), true, 0);
1445-
if (err) {
1446-
free_netdev(dev);
1447-
return ERR_PTR(err);
1448-
}
1456+
if (err)
1457+
goto err;
1458+
1459+
/* openvswitch users expect packet sizes to be unrestricted,
1460+
* so set the largest MTU we can.
1461+
*/
1462+
err = geneve_change_mtu(dev, IP_MAX_MTU);
1463+
if (err)
1464+
goto err;
1465+
14491466
return dev;
1467+
1468+
err:
1469+
free_netdev(dev);
1470+
return ERR_PTR(err);
14501471
}
14511472
EXPORT_SYMBOL_GPL(geneve_dev_create_fb);
14521473

drivers/net/vxlan.c

Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2367,29 +2367,43 @@ static void vxlan_set_multicast_list(struct net_device *dev)
23672367
{
23682368
}
23692369

2370-
static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
2370+
static int __vxlan_change_mtu(struct net_device *dev,
2371+
struct net_device *lowerdev,
2372+
struct vxlan_rdst *dst, int new_mtu, bool strict)
23712373
{
2372-
struct vxlan_dev *vxlan = netdev_priv(dev);
2373-
struct vxlan_rdst *dst = &vxlan->default_dst;
2374-
struct net_device *lowerdev;
2375-
int max_mtu;
2374+
int max_mtu = IP_MAX_MTU;
23762375

2377-
lowerdev = __dev_get_by_index(vxlan->net, dst->remote_ifindex);
2378-
if (lowerdev == NULL)
2379-
return eth_change_mtu(dev, new_mtu);
2376+
if (lowerdev)
2377+
max_mtu = lowerdev->mtu;
23802378

23812379
if (dst->remote_ip.sa.sa_family == AF_INET6)
2382-
max_mtu = lowerdev->mtu - VXLAN6_HEADROOM;
2380+
max_mtu -= VXLAN6_HEADROOM;
23832381
else
2384-
max_mtu = lowerdev->mtu - VXLAN_HEADROOM;
2382+
max_mtu -= VXLAN_HEADROOM;
23852383

2386-
if (new_mtu < 68 || new_mtu > max_mtu)
2384+
if (new_mtu < 68)
23872385
return -EINVAL;
23882386

2387+
if (new_mtu > max_mtu) {
2388+
if (strict)
2389+
return -EINVAL;
2390+
2391+
new_mtu = max_mtu;
2392+
}
2393+
23892394
dev->mtu = new_mtu;
23902395
return 0;
23912396
}
23922397

2398+
static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
2399+
{
2400+
struct vxlan_dev *vxlan = netdev_priv(dev);
2401+
struct vxlan_rdst *dst = &vxlan->default_dst;
2402+
struct net_device *lowerdev = __dev_get_by_index(vxlan->net,
2403+
dst->remote_ifindex);
2404+
return __vxlan_change_mtu(dev, lowerdev, dst, new_mtu, true);
2405+
}
2406+
23932407
static int egress_ipv4_tun_info(struct net_device *dev, struct sk_buff *skb,
23942408
struct ip_tunnel_info *info,
23952409
__be16 sport, __be16 dport)
@@ -2765,6 +2779,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
27652779
int err;
27662780
bool use_ipv6 = false;
27672781
__be16 default_port = vxlan->cfg.dst_port;
2782+
struct net_device *lowerdev = NULL;
27682783

27692784
vxlan->net = src_net;
27702785

@@ -2785,9 +2800,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
27852800
}
27862801

27872802
if (conf->remote_ifindex) {
2788-
struct net_device *lowerdev
2789-
= __dev_get_by_index(src_net, conf->remote_ifindex);
2790-
2803+
lowerdev = __dev_get_by_index(src_net, conf->remote_ifindex);
27912804
dst->remote_ifindex = conf->remote_ifindex;
27922805

27932806
if (!lowerdev) {
@@ -2811,6 +2824,12 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
28112824
needed_headroom = lowerdev->hard_header_len;
28122825
}
28132826

2827+
if (conf->mtu) {
2828+
err = __vxlan_change_mtu(dev, lowerdev, dst, conf->mtu, false);
2829+
if (err)
2830+
return err;
2831+
}
2832+
28142833
if (use_ipv6 || conf->flags & VXLAN_F_COLLECT_METADATA)
28152834
needed_headroom += VXLAN6_HEADROOM;
28162835
else

include/net/ip_tunnels.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
230230
int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd);
231231
int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
232232
u8 *protocol, struct flowi4 *fl4);
233+
int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict);
233234
int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu);
234235

235236
struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,

net/ipv4/ip_gre.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1240,6 +1240,14 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
12401240
err = ipgre_newlink(net, dev, tb, NULL);
12411241
if (err < 0)
12421242
goto out;
1243+
1244+
/* openvswitch users expect packet sizes to be unrestricted,
1245+
* so set the largest MTU we can.
1246+
*/
1247+
err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1248+
if (err)
1249+
goto out;
1250+
12431251
return dev;
12441252
out:
12451253
free_netdev(dev);

net/ipv4/ip_tunnel.c

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -943,17 +943,31 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
943943
}
944944
EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
945945

946-
int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
946+
int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
947947
{
948948
struct ip_tunnel *tunnel = netdev_priv(dev);
949949
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
950+
int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
950951

951-
if (new_mtu < 68 ||
952-
new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
952+
if (new_mtu < 68)
953953
return -EINVAL;
954+
955+
if (new_mtu > max_mtu) {
956+
if (strict)
957+
return -EINVAL;
958+
959+
new_mtu = max_mtu;
960+
}
961+
954962
dev->mtu = new_mtu;
955963
return 0;
956964
}
965+
EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
966+
967+
int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
968+
{
969+
return __ip_tunnel_change_mtu(dev, new_mtu, true);
970+
}
957971
EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
958972

959973
static void ip_tunnel_dev_free(struct net_device *dev)

net/openvswitch/vport-vxlan.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
9191
struct vxlan_config conf = {
9292
.no_share = true,
9393
.flags = VXLAN_F_COLLECT_METADATA,
94+
/* Don't restrict the packets that can be sent by MTU */
95+
.mtu = IP_MAX_MTU,
9496
};
9597

9698
if (!options) {

0 commit comments

Comments
 (0)