Skip to content

Commit d52e5a7

Browse files
qsndavem330
authored andcommitted
ipv4: lock mtu in fnhe when received PMTU < net.ipv4.route.min_pmtu
Prior to the rework of PMTU information storage in commit 2c8cec5 ("ipv4: Cache learned PMTU information in inetpeer."), when a PMTU event advertising a PMTU smaller than net.ipv4.route.min_pmtu was received, we would disable setting the DF flag on packets by locking the MTU metric, and set the PMTU to net.ipv4.route.min_pmtu. Since then, we don't disable DF, and set PMTU to net.ipv4.route.min_pmtu, so the intermediate router that has this link with a small MTU will have to drop the packets. This patch reestablishes pre-2.6.39 behavior by splitting rtable->rt_pmtu into a bitfield with rt_mtu_locked and rt_pmtu. rt_mtu_locked indicates that we shouldn't set the DF bit on that path, and is checked in ip_dont_fragment(). One possible workaround is to set net.ipv4.route.min_pmtu to a value low enough to accommodate the lowest MTU encountered. Fixes: 2c8cec5 ("ipv4: Cache learned PMTU information in inetpeer.") Signed-off-by: Sabrina Dubroca <[email protected]> Reviewed-by: Stefano Brivio <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 16c2e4d commit d52e5a7

File tree

5 files changed

+32
-10
lines changed

5 files changed

+32
-10
lines changed

include/net/ip.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -328,14 +328,21 @@ int ip_decrease_ttl(struct iphdr *iph)
328328
return --iph->ttl;
329329
}
330330

331+
static inline int ip_mtu_locked(const struct dst_entry *dst)
332+
{
333+
const struct rtable *rt = (const struct rtable *)dst;
334+
335+
return rt->rt_mtu_locked || dst_metric_locked(dst, RTAX_MTU);
336+
}
337+
331338
static inline
332339
int ip_dont_fragment(const struct sock *sk, const struct dst_entry *dst)
333340
{
334341
u8 pmtudisc = READ_ONCE(inet_sk(sk)->pmtudisc);
335342

336343
return pmtudisc == IP_PMTUDISC_DO ||
337344
(pmtudisc == IP_PMTUDISC_WANT &&
338-
!(dst_metric_locked(dst, RTAX_MTU)));
345+
!ip_mtu_locked(dst));
339346
}
340347

341348
static inline bool ip_sk_accept_pmtu(const struct sock *sk)
@@ -361,7 +368,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
361368
struct net *net = dev_net(dst->dev);
362369

363370
if (net->ipv4.sysctl_ip_fwd_use_pmtu ||
364-
dst_metric_locked(dst, RTAX_MTU) ||
371+
ip_mtu_locked(dst) ||
365372
!forwarding)
366373
return dst_mtu(dst);
367374

include/net/ip_fib.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ struct fib_nh_exception {
5959
int fnhe_genid;
6060
__be32 fnhe_daddr;
6161
u32 fnhe_pmtu;
62+
bool fnhe_mtu_locked;
6263
__be32 fnhe_gw;
6364
unsigned long fnhe_expires;
6465
struct rtable __rcu *fnhe_rth_input;

include/net/route.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,8 @@ struct rtable {
6363
__be32 rt_gateway;
6464

6565
/* Miscellaneous cached information */
66-
u32 rt_pmtu;
66+
u32 rt_mtu_locked:1,
67+
rt_pmtu:31;
6768

6869
u32 rt_table_id;
6970

net/ipv4/route.c

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -634,6 +634,7 @@ static inline u32 fnhe_hashfun(__be32 daddr)
634634
static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
635635
{
636636
rt->rt_pmtu = fnhe->fnhe_pmtu;
637+
rt->rt_mtu_locked = fnhe->fnhe_mtu_locked;
637638
rt->dst.expires = fnhe->fnhe_expires;
638639

639640
if (fnhe->fnhe_gw) {
@@ -644,7 +645,7 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh
644645
}
645646

646647
static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
647-
u32 pmtu, unsigned long expires)
648+
u32 pmtu, bool lock, unsigned long expires)
648649
{
649650
struct fnhe_hash_bucket *hash;
650651
struct fib_nh_exception *fnhe;
@@ -681,8 +682,10 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
681682
fnhe->fnhe_genid = genid;
682683
if (gw)
683684
fnhe->fnhe_gw = gw;
684-
if (pmtu)
685+
if (pmtu) {
685686
fnhe->fnhe_pmtu = pmtu;
687+
fnhe->fnhe_mtu_locked = lock;
688+
}
686689
fnhe->fnhe_expires = max(1UL, expires);
687690
/* Update all cached dsts too */
688691
rt = rcu_dereference(fnhe->fnhe_rth_input);
@@ -706,6 +709,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
706709
fnhe->fnhe_daddr = daddr;
707710
fnhe->fnhe_gw = gw;
708711
fnhe->fnhe_pmtu = pmtu;
712+
fnhe->fnhe_mtu_locked = lock;
709713
fnhe->fnhe_expires = expires;
710714

711715
/* Exception created; mark the cached routes for the nexthop
@@ -787,7 +791,8 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
787791
struct fib_nh *nh = &FIB_RES_NH(res);
788792

789793
update_or_create_fnhe(nh, fl4->daddr, new_gw,
790-
0, jiffies + ip_rt_gc_timeout);
794+
0, false,
795+
jiffies + ip_rt_gc_timeout);
791796
}
792797
if (kill_route)
793798
rt->dst.obsolete = DST_OBSOLETE_KILL;
@@ -1009,15 +1014,18 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
10091014
{
10101015
struct dst_entry *dst = &rt->dst;
10111016
struct fib_result res;
1017+
bool lock = false;
10121018

1013-
if (dst_metric_locked(dst, RTAX_MTU))
1019+
if (ip_mtu_locked(dst))
10141020
return;
10151021

10161022
if (ipv4_mtu(dst) < mtu)
10171023
return;
10181024

1019-
if (mtu < ip_rt_min_pmtu)
1025+
if (mtu < ip_rt_min_pmtu) {
1026+
lock = true;
10201027
mtu = ip_rt_min_pmtu;
1028+
}
10211029

10221030
if (rt->rt_pmtu == mtu &&
10231031
time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
@@ -1027,7 +1035,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
10271035
if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
10281036
struct fib_nh *nh = &FIB_RES_NH(res);
10291037

1030-
update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
1038+
update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock,
10311039
jiffies + ip_rt_mtu_expires);
10321040
}
10331041
rcu_read_unlock();
@@ -1280,7 +1288,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
12801288

12811289
mtu = READ_ONCE(dst->dev->mtu);
12821290

1283-
if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
1291+
if (unlikely(ip_mtu_locked(dst))) {
12841292
if (rt->rt_uses_gateway && mtu > 576)
12851293
mtu = 576;
12861294
}
@@ -1521,6 +1529,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
15211529
rt->rt_is_input = 0;
15221530
rt->rt_iif = 0;
15231531
rt->rt_pmtu = 0;
1532+
rt->rt_mtu_locked = 0;
15241533
rt->rt_gateway = 0;
15251534
rt->rt_uses_gateway = 0;
15261535
rt->rt_table_id = 0;
@@ -2546,6 +2555,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
25462555
rt->rt_is_input = ort->rt_is_input;
25472556
rt->rt_iif = ort->rt_iif;
25482557
rt->rt_pmtu = ort->rt_pmtu;
2558+
rt->rt_mtu_locked = ort->rt_mtu_locked;
25492559

25502560
rt->rt_genid = rt_genid_ipv4(net);
25512561
rt->rt_flags = ort->rt_flags;
@@ -2648,6 +2658,8 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id,
26482658
memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
26492659
if (rt->rt_pmtu && expires)
26502660
metrics[RTAX_MTU - 1] = rt->rt_pmtu;
2661+
if (rt->rt_mtu_locked && expires)
2662+
metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU);
26512663
if (rtnetlink_put_metrics(skb, metrics) < 0)
26522664
goto nla_put_failure;
26532665

net/ipv4/xfrm4_policy.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
100100
xdst->u.rt.rt_gateway = rt->rt_gateway;
101101
xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
102102
xdst->u.rt.rt_pmtu = rt->rt_pmtu;
103+
xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked;
103104
xdst->u.rt.rt_table_id = rt->rt_table_id;
104105
INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
105106
rt_add_uncached_list(&xdst->u.rt);

0 commit comments

Comments
 (0)