Skip to content

Commit 35ebf65

Browse files
committed
ipv4: Create and use fib_compute_spec_dst() helper.
The specific destination is the host we direct unicast replies to. Usually this is the original packet source address, but if we are responding to a multicast or broadcast packet we have to use something different. Specifically we must use the source address we would use if we were to send a packet to the unicast source of the original packet. The routing cache precomputes this value, but we want to remove that precomputation because it creates a hard dependency on the expensive rpfilter source address validation which we'd like to make cheaper. There are only three places where this matters: 1) ICMP replies. 2) pktinfo CMSG 3) IP options Now there will be no real users of rt->rt_spec_dst and we can simply remove it altogether. Signed-off-by: David S. Miller <[email protected]>
1 parent 70e7341 commit 35ebf65

File tree

5 files changed

+49
-16
lines changed

5 files changed

+49
-16
lines changed

include/net/ip_fib.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,7 @@ extern struct fib_table *fib_get_table(struct net *net, u32 id);
230230
/* Exported by fib_frontend.c */
231231
extern const struct nla_policy rtm_ipv4_policy[];
232232
extern void ip_fib_init(void);
233+
extern __be32 fib_compute_spec_dst(struct sk_buff *skb);
233234
extern int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
234235
u8 tos, int oif, struct net_device *dev,
235236
__be32 *spec_dst, u32 *itag);

net/ipv4/fib_frontend.c

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,35 @@ unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
180180
}
181181
EXPORT_SYMBOL(inet_dev_addr_type);
182182

183+
__be32 fib_compute_spec_dst(struct sk_buff *skb)
184+
{
185+
struct net_device *dev = skb->dev;
186+
struct in_device *in_dev;
187+
struct fib_result res;
188+
struct flowi4 fl4;
189+
struct net *net;
190+
191+
if (skb->pkt_type != PACKET_BROADCAST &&
192+
skb->pkt_type != PACKET_MULTICAST)
193+
return ip_hdr(skb)->daddr;
194+
195+
in_dev = __in_dev_get_rcu(dev);
196+
BUG_ON(!in_dev);
197+
fl4.flowi4_oif = 0;
198+
fl4.flowi4_iif = 0;
199+
fl4.daddr = ip_hdr(skb)->saddr;
200+
fl4.saddr = ip_hdr(skb)->daddr;
201+
fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
202+
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
203+
fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0;
204+
205+
net = dev_net(dev);
206+
if (!fib_lookup(net, &fl4, &res))
207+
return FIB_RES_PREFSRC(net, res);
208+
else
209+
return inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
210+
}
211+
183212
/* Given (packet source, input interface) and optional (dst, oif, tos):
184213
* - (main) check, that source is valid i.e. not broadcast or our local
185214
* address.

net/ipv4/icmp.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@
9595
#include <net/checksum.h>
9696
#include <net/xfrm.h>
9797
#include <net/inet_common.h>
98+
#include <net/ip_fib.h>
9899

99100
/*
100101
* Build xmit assembly blocks
@@ -333,7 +334,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
333334
struct flowi4 fl4;
334335
struct sock *sk;
335336
struct inet_sock *inet;
336-
__be32 daddr;
337+
__be32 daddr, saddr;
337338

338339
if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb))
339340
return;
@@ -347,6 +348,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
347348

348349
inet->tos = ip_hdr(skb)->tos;
349350
daddr = ipc.addr = ip_hdr(skb)->saddr;
351+
saddr = fib_compute_spec_dst(skb);
350352
ipc.opt = NULL;
351353
ipc.tx_flags = 0;
352354
if (icmp_param->replyopts.opt.opt.optlen) {
@@ -356,7 +358,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
356358
}
357359
memset(&fl4, 0, sizeof(fl4));
358360
fl4.daddr = daddr;
359-
fl4.saddr = rt->rt_spec_dst;
361+
fl4.saddr = saddr;
360362
fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
361363
fl4.flowi4_proto = IPPROTO_ICMP;
362364
security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));

net/ipv4/ip_options.c

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include <net/icmp.h>
2828
#include <net/route.h>
2929
#include <net/cipso_ipv4.h>
30+
#include <net/ip_fib.h>
3031

3132
/*
3233
* Write options to IP header, record destination address to
@@ -104,7 +105,7 @@ int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb)
104105
sptr = skb_network_header(skb);
105106
dptr = dopt->__data;
106107

107-
daddr = skb_rtable(skb)->rt_spec_dst;
108+
daddr = fib_compute_spec_dst(skb);
108109

109110
if (sopt->rr) {
110111
optlen = sptr[sopt->rr+1];
@@ -250,15 +251,14 @@ void ip_options_fragment(struct sk_buff *skb)
250251
int ip_options_compile(struct net *net,
251252
struct ip_options *opt, struct sk_buff *skb)
252253
{
253-
int l;
254-
unsigned char *iph;
255-
unsigned char *optptr;
256-
int optlen;
254+
__be32 spec_dst = (__force __be32) 0;
257255
unsigned char *pp_ptr = NULL;
258-
struct rtable *rt = NULL;
256+
unsigned char *optptr;
257+
unsigned char *iph;
258+
int optlen, l;
259259

260260
if (skb != NULL) {
261-
rt = skb_rtable(skb);
261+
spec_dst = fib_compute_spec_dst(skb);
262262
optptr = (unsigned char *)&(ip_hdr(skb)[1]);
263263
} else
264264
optptr = opt->__data;
@@ -330,8 +330,8 @@ int ip_options_compile(struct net *net,
330330
pp_ptr = optptr + 2;
331331
goto error;
332332
}
333-
if (rt) {
334-
memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4);
333+
if (skb) {
334+
memcpy(&optptr[optptr[2]-1], &spec_dst, 4);
335335
opt->is_changed = 1;
336336
}
337337
optptr[2] += 4;
@@ -372,8 +372,8 @@ int ip_options_compile(struct net *net,
372372
goto error;
373373
}
374374
opt->ts = optptr - iph;
375-
if (rt) {
376-
memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4);
375+
if (skb) {
376+
memcpy(&optptr[optptr[2]-1], &spec_dst, 4);
377377
timeptr = &optptr[optptr[2]+3];
378378
}
379379
opt->ts_needaddr = 1;

net/ipv4/ip_sockglue.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
#if IS_ENABLED(CONFIG_IPV6)
4141
#include <net/transp_v6.h>
4242
#endif
43+
#include <net/ip_fib.h>
4344

4445
#include <linux/errqueue.h>
4546
#include <asm/uaccess.h>
@@ -1019,8 +1020,8 @@ static int do_ip_setsockopt(struct sock *sk, int level,
10191020
* @sk: socket
10201021
* @skb: buffer
10211022
*
1022-
* To support IP_CMSG_PKTINFO option, we store rt_iif and rt_spec_dst
1023-
* in skb->cb[] before dst drop.
1023+
* To support IP_CMSG_PKTINFO option, we store rt_iif and specific
1024+
* destination in skb->cb[] before dst drop.
10241025
* This way, receiver doesnt make cache line misses to read rtable.
10251026
*/
10261027
void ipv4_pktinfo_prepare(struct sk_buff *skb)
@@ -1030,7 +1031,7 @@ void ipv4_pktinfo_prepare(struct sk_buff *skb)
10301031

10311032
if (rt) {
10321033
pktinfo->ipi_ifindex = rt->rt_iif;
1033-
pktinfo->ipi_spec_dst.s_addr = rt->rt_spec_dst;
1034+
pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb);
10341035
} else {
10351036
pktinfo->ipi_ifindex = 0;
10361037
pktinfo->ipi_spec_dst.s_addr = 0;

0 commit comments

Comments
 (0)