Skip to content

Commit 94720e3

Browse files
Julian Anastasovdavem330
authored andcommitted
ipv4: fix fnhe usage by non-cached routes
Allow some non-cached routes to use non-expired fnhe: 1. ip_del_fnhe: moved above and now called by find_exception. The 4.5+ commit deed49d expires fnhe only when caching routes. Change that to: 1.1. use fnhe for non-cached local output routes, with the help from (2) 1.2. allow __mkroute_input to detect expired fnhe (outdated fnhe_gw, for example) when do_cache is false, eg. when itag!=0 for unicast destinations. 2. __mkroute_output: keep fi to allow local routes with orig_oif != 0 to use fnhe info even when the new route will not be cached into fnhe. After commit 839da4d ("net: ipv4: set orig_oif based on fib result for local traffic") it means all local routes will be affected because they are not cached. This change is used to solve a PMTU problem with IPVS (and probably Netfilter DNAT) setups that redirect local clients from target local IP (local route to Virtual IP) to new remote IP target, eg. IPVS TUN real server. Loopback has 64K MTU and we need to create fnhe on the local route that will keep the reduced PMTU for the Virtual IP. Without this change fnhe_pmtu is updated from ICMP but never exposed to non-cached local routes. This includes routes with flowi4_oif!=0 for 4.6+ and with flowi4_oif=any for 4.14+). 3. update_or_create_fnhe: make sure fnhe_expires is not 0 for new entries Fixes: 839da4d ("net: ipv4: set orig_oif based on fib result for local traffic") Fixes: d6d5e99 ("route: do not cache fib route info on local routes with oif") Fixes: deed49d ("route: check and remove route cache when we get route") Cc: David Ahern <[email protected]> Cc: Xin Long <[email protected]> Signed-off-by: Julian Anastasov <[email protected]> Acked-by: David Ahern <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent e002434 commit 94720e3

File tree

1 file changed

+53
-65
lines changed

1 file changed

+53
-65
lines changed

net/ipv4/route.c

Lines changed: 53 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -709,7 +709,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
709709
fnhe->fnhe_gw = gw;
710710
fnhe->fnhe_pmtu = pmtu;
711711
fnhe->fnhe_mtu_locked = lock;
712-
fnhe->fnhe_expires = expires;
712+
fnhe->fnhe_expires = max(1UL, expires);
713713

714714
/* Exception created; mark the cached routes for the nexthop
715715
* stale, so anyone caching it rechecks if this exception
@@ -1297,6 +1297,36 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
12971297
return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
12981298
}
12991299

1300+
static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
1301+
{
1302+
struct fnhe_hash_bucket *hash;
1303+
struct fib_nh_exception *fnhe, __rcu **fnhe_p;
1304+
u32 hval = fnhe_hashfun(daddr);
1305+
1306+
spin_lock_bh(&fnhe_lock);
1307+
1308+
hash = rcu_dereference_protected(nh->nh_exceptions,
1309+
lockdep_is_held(&fnhe_lock));
1310+
hash += hval;
1311+
1312+
fnhe_p = &hash->chain;
1313+
fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
1314+
while (fnhe) {
1315+
if (fnhe->fnhe_daddr == daddr) {
1316+
rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
1317+
fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
1318+
fnhe_flush_routes(fnhe);
1319+
kfree_rcu(fnhe, rcu);
1320+
break;
1321+
}
1322+
fnhe_p = &fnhe->fnhe_next;
1323+
fnhe = rcu_dereference_protected(fnhe->fnhe_next,
1324+
lockdep_is_held(&fnhe_lock));
1325+
}
1326+
1327+
spin_unlock_bh(&fnhe_lock);
1328+
}
1329+
13001330
static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
13011331
{
13021332
struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions);
@@ -1310,8 +1340,14 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
13101340

13111341
for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
13121342
fnhe = rcu_dereference(fnhe->fnhe_next)) {
1313-
if (fnhe->fnhe_daddr == daddr)
1343+
if (fnhe->fnhe_daddr == daddr) {
1344+
if (fnhe->fnhe_expires &&
1345+
time_after(jiffies, fnhe->fnhe_expires)) {
1346+
ip_del_fnhe(nh, daddr);
1347+
break;
1348+
}
13141349
return fnhe;
1350+
}
13151351
}
13161352
return NULL;
13171353
}
@@ -1636,36 +1672,6 @@ static void ip_handle_martian_source(struct net_device *dev,
16361672
#endif
16371673
}
16381674

1639-
static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
1640-
{
1641-
struct fnhe_hash_bucket *hash;
1642-
struct fib_nh_exception *fnhe, __rcu **fnhe_p;
1643-
u32 hval = fnhe_hashfun(daddr);
1644-
1645-
spin_lock_bh(&fnhe_lock);
1646-
1647-
hash = rcu_dereference_protected(nh->nh_exceptions,
1648-
lockdep_is_held(&fnhe_lock));
1649-
hash += hval;
1650-
1651-
fnhe_p = &hash->chain;
1652-
fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
1653-
while (fnhe) {
1654-
if (fnhe->fnhe_daddr == daddr) {
1655-
rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
1656-
fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
1657-
fnhe_flush_routes(fnhe);
1658-
kfree_rcu(fnhe, rcu);
1659-
break;
1660-
}
1661-
fnhe_p = &fnhe->fnhe_next;
1662-
fnhe = rcu_dereference_protected(fnhe->fnhe_next,
1663-
lockdep_is_held(&fnhe_lock));
1664-
}
1665-
1666-
spin_unlock_bh(&fnhe_lock);
1667-
}
1668-
16691675
/* called in rcu_read_lock() section */
16701676
static int __mkroute_input(struct sk_buff *skb,
16711677
const struct fib_result *res,
@@ -1719,20 +1725,10 @@ static int __mkroute_input(struct sk_buff *skb,
17191725

17201726
fnhe = find_exception(&FIB_RES_NH(*res), daddr);
17211727
if (do_cache) {
1722-
if (fnhe) {
1728+
if (fnhe)
17231729
rth = rcu_dereference(fnhe->fnhe_rth_input);
1724-
if (rth && rth->dst.expires &&
1725-
time_after(jiffies, rth->dst.expires)) {
1726-
ip_del_fnhe(&FIB_RES_NH(*res), daddr);
1727-
fnhe = NULL;
1728-
} else {
1729-
goto rt_cache;
1730-
}
1731-
}
1732-
1733-
rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
1734-
1735-
rt_cache:
1730+
else
1731+
rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
17361732
if (rt_cache_valid(rth)) {
17371733
skb_dst_set_noref(skb, &rth->dst);
17381734
goto out;
@@ -2216,39 +2212,31 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
22162212
* the loopback interface and the IP_PKTINFO ipi_ifindex will
22172213
* be set to the loopback interface as well.
22182214
*/
2219-
fi = NULL;
2215+
do_cache = false;
22202216
}
22212217

22222218
fnhe = NULL;
22232219
do_cache &= fi != NULL;
2224-
if (do_cache) {
2220+
if (fi) {
22252221
struct rtable __rcu **prth;
22262222
struct fib_nh *nh = &FIB_RES_NH(*res);
22272223

22282224
fnhe = find_exception(nh, fl4->daddr);
2225+
if (!do_cache)
2226+
goto add;
22292227
if (fnhe) {
22302228
prth = &fnhe->fnhe_rth_output;
2231-
rth = rcu_dereference(*prth);
2232-
if (rth && rth->dst.expires &&
2233-
time_after(jiffies, rth->dst.expires)) {
2234-
ip_del_fnhe(nh, fl4->daddr);
2235-
fnhe = NULL;
2236-
} else {
2237-
goto rt_cache;
2229+
} else {
2230+
if (unlikely(fl4->flowi4_flags &
2231+
FLOWI_FLAG_KNOWN_NH &&
2232+
!(nh->nh_gw &&
2233+
nh->nh_scope == RT_SCOPE_LINK))) {
2234+
do_cache = false;
2235+
goto add;
22382236
}
2237+
prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
22392238
}
2240-
2241-
if (unlikely(fl4->flowi4_flags &
2242-
FLOWI_FLAG_KNOWN_NH &&
2243-
!(nh->nh_gw &&
2244-
nh->nh_scope == RT_SCOPE_LINK))) {
2245-
do_cache = false;
2246-
goto add;
2247-
}
2248-
prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
22492239
rth = rcu_dereference(*prth);
2250-
2251-
rt_cache:
22522240
if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst))
22532241
return rth;
22542242
}

0 commit comments

Comments
 (0)