Skip to content

Commit f87c10a

Browse files
strssndktndavem330
authored andcommitted
ipv4: introduce ip_dst_mtu_maybe_forward and protect forwarding path against pmtu spoofing
While forwarding we should not use the protocol path mtu to calculate the mtu for a forwarded packet but instead use the interface mtu. We mark forwarded skbs in ip_forward with IPSKB_FORWARDED, which was introduced for multicast forwarding. But as it does not conflict with our usage in unicast code path it is perfect for reuse. I moved the functions ip_sk_accept_pmtu, ip_sk_use_pmtu and ip_skb_dst_mtu along with the new ip_dst_mtu_maybe_forward to net/ip.h to fix circular dependencies because of IPSKB_FORWARDED. Because someone might have written a software which does probe destinations manually and expects the kernel to honour those path mtus I introduced a new per-namespace "ip_forward_use_pmtu" knob so someone can disable this new behaviour. We also still use mtus which are locked on a route for forwarding. The reason for this change is, that path mtus information can be injected into the kernel via e.g. icmp_err protocol handler without verification of local sockets. As such, this could cause the IPv4 forwarding path to wrongfully emit fragmentation needed notifications or start to fragment packets along a path. Tunnel and ipsec output paths clear IPCB again, thus IPSKB_FORWARDED won't be set and further fragmentation logic will use the path mtu to determine the fragmentation size. They also recheck packet size with help of path mtu discovery and report appropriate errors. Cc: Eric Dumazet <[email protected]> Cc: David Miller <[email protected]> Cc: John Heffner <[email protected]> Cc: Steffen Klassert <[email protected]> Signed-off-by: Hannes Frederic Sowa <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 6c76a07 commit f87c10a

File tree

8 files changed

+67
-24
lines changed

8 files changed

+67
-24
lines changed

Documentation/networking/ip-sysctl.txt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,19 @@ ip_no_pmtu_disc - INTEGER
3232
min_pmtu - INTEGER
3333
default 552 - minimum discovered Path MTU
3434

35+
ip_forward_use_pmtu - BOOLEAN
36+
By default we don't trust protocol path MTUs while forwarding
37+
because they could be easily forged and can lead to unwanted
38+
fragmentation by the router.
39+
You only need to enable this if you have user-space software
40+
which tries to discover path mtus by itself and depends on the
41+
kernel honoring this information. This is normally not the
42+
case.
43+
Default: 0 (disabled)
44+
Possible values:
45+
0 - disabled
46+
1 - enabled
47+
3548
route/max_size - INTEGER
3649
Maximum number of routes allowed in the kernel. Increase
3750
this when using large numbers of interfaces and/or routes.

include/net/ip.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,39 @@ int ip_dont_fragment(struct sock *sk, struct dst_entry *dst)
263263
!(dst_metric_locked(dst, RTAX_MTU)));
264264
}
265265

266+
static inline bool ip_sk_accept_pmtu(const struct sock *sk)
267+
{
268+
return inet_sk(sk)->pmtudisc != IP_PMTUDISC_INTERFACE;
269+
}
270+
271+
static inline bool ip_sk_use_pmtu(const struct sock *sk)
272+
{
273+
return inet_sk(sk)->pmtudisc < IP_PMTUDISC_PROBE;
274+
}
275+
276+
static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
277+
bool forwarding)
278+
{
279+
struct net *net = dev_net(dst->dev);
280+
281+
if (net->ipv4.sysctl_ip_fwd_use_pmtu ||
282+
dst_metric_locked(dst, RTAX_MTU) ||
283+
!forwarding)
284+
return dst_mtu(dst);
285+
286+
return min(dst->dev->mtu, IP_MAX_MTU);
287+
}
288+
289+
static inline unsigned int ip_skb_dst_mtu(const struct sk_buff *skb)
290+
{
291+
if (!skb->sk || ip_sk_use_pmtu(skb->sk)) {
292+
bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED;
293+
return ip_dst_mtu_maybe_forward(skb_dst(skb), forwarding);
294+
} else {
295+
return min(skb_dst(skb)->dev->mtu, IP_MAX_MTU);
296+
}
297+
}
298+
266299
void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more);
267300

268301
static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk)

include/net/netns/ipv4.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ struct netns_ipv4 {
7070

7171
int sysctl_tcp_ecn;
7272
int sysctl_ip_no_pmtu_disc;
73+
int sysctl_ip_fwd_use_pmtu;
7374

7475
kgid_t sysctl_ping_group_range[2];
7576

include/net/route.h

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@
3636
#include <linux/cache.h>
3737
#include <linux/security.h>
3838

39+
/* IPv4 datagram length is stored into 16bit field (tot_len) */
40+
#define IP_MAX_MTU 0xFFFFU
41+
3942
#define RTO_ONLINK 0x01
4043

4144
#define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE))
@@ -311,20 +314,4 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst)
311314
return hoplimit;
312315
}
313316

314-
static inline bool ip_sk_accept_pmtu(const struct sock *sk)
315-
{
316-
return inet_sk(sk)->pmtudisc != IP_PMTUDISC_INTERFACE;
317-
}
318-
319-
static inline bool ip_sk_use_pmtu(const struct sock *sk)
320-
{
321-
return inet_sk(sk)->pmtudisc < IP_PMTUDISC_PROBE;
322-
}
323-
324-
static inline int ip_skb_dst_mtu(const struct sk_buff *skb)
325-
{
326-
return (!skb->sk || ip_sk_use_pmtu(skb->sk)) ?
327-
dst_mtu(skb_dst(skb)) : skb_dst(skb)->dev->mtu;
328-
}
329-
330317
#endif /* _ROUTE_H */

net/ipv4/ip_forward.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ static int ip_forward_finish(struct sk_buff *skb)
5454

5555
int ip_forward(struct sk_buff *skb)
5656
{
57+
u32 mtu;
5758
struct iphdr *iph; /* Our header */
5859
struct rtable *rt; /* Route we use */
5960
struct ip_options *opt = &(IPCB(skb)->opt);
@@ -88,11 +89,13 @@ int ip_forward(struct sk_buff *skb)
8889
if (opt->is_strictroute && rt->rt_uses_gateway)
8990
goto sr_failed;
9091

91-
if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) &&
92+
IPCB(skb)->flags |= IPSKB_FORWARDED;
93+
mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
94+
if (unlikely(skb->len > mtu && !skb_is_gso(skb) &&
9295
(ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) {
9396
IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS);
9497
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
95-
htonl(dst_mtu(&rt->dst)));
98+
htonl(mtu));
9699
goto drop;
97100
}
98101

net/ipv4/ip_output.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
449449
__be16 not_last_frag;
450450
struct rtable *rt = skb_rtable(skb);
451451
int err = 0;
452+
bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED;
452453

453454
dev = rt->dst.dev;
454455

@@ -458,12 +459,13 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
458459

459460
iph = ip_hdr(skb);
460461

462+
mtu = ip_dst_mtu_maybe_forward(&rt->dst, forwarding);
461463
if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->local_df) ||
462464
(IPCB(skb)->frag_max_size &&
463-
IPCB(skb)->frag_max_size > dst_mtu(&rt->dst)))) {
465+
IPCB(skb)->frag_max_size > mtu))) {
464466
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
465467
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
466-
htonl(ip_skb_dst_mtu(skb)));
468+
htonl(mtu));
467469
kfree_skb(skb);
468470
return -EMSGSIZE;
469471
}
@@ -473,7 +475,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
473475
*/
474476

475477
hlen = iph->ihl * 4;
476-
mtu = dst_mtu(&rt->dst) - hlen; /* Size of data space */
478+
mtu = mtu - hlen; /* Size of data space */
477479
#ifdef CONFIG_BRIDGE_NETFILTER
478480
if (skb->nf_bridge)
479481
mtu -= nf_bridge_mtu_reduction(skb);

net/ipv4/route.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -112,9 +112,6 @@
112112
#define RT_FL_TOS(oldflp4) \
113113
((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
114114

115-
/* IPv4 datagram length is stored into 16bit field (tot_len) */
116-
#define IP_MAX_MTU 0xFFFF
117-
118115
#define RT_GC_TIMEOUT (300*HZ)
119116

120117
static int ip_rt_max_size;

net/ipv4/sysctl_net_ipv4.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -831,6 +831,13 @@ static struct ctl_table ipv4_net_table[] = {
831831
.mode = 0644,
832832
.proc_handler = proc_dointvec
833833
},
834+
{
835+
.procname = "ip_forward_use_pmtu",
836+
.data = &init_net.ipv4.sysctl_ip_fwd_use_pmtu,
837+
.maxlen = sizeof(int),
838+
.mode = 0644,
839+
.proc_handler = proc_dointvec,
840+
},
834841
{ }
835842
};
836843

0 commit comments

Comments
 (0)