Skip to content

Commit 02b2494

Browse files
Paolo Abenidavem330
authored andcommitted
ipv4: use dst hint for ipv4 list receive
This is alike the previous change, with some additional ipv4 specific quirk. Even when using the route hint we still have to do perform additional per packet checks about source address validity: a new helper is added to wrap them. Hints are explicitly disabled if the destination is a local broadcast, that keeps the code simple and local broadcast are a slower path anyway. UDP flood performances vs recvmmsg() receiver: vanilla patched delta Kpps Kpps % 1683 1871 +11 In the worst case scenario - each packet has a different destination address - the performance delta is within noise range. v3 -> v4: - re-enable hints for forward v2 -> v3: - really fix build (sic) and hint usage check - use fib4_has_custom_rules() helpers (David A.) - add ip_extract_route_hint() helper (Edward C.) - use prev skb as hint instead of copying data (Willem) v1 -> v2: - fix build issue with !CONFIG_IP_MULTIPLE_TABLES Signed-off-by: Paolo Abeni <[email protected]> Reviewed-by: David Ahern <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent c43c3d7 commit 02b2494

File tree

3 files changed

+77
-4
lines changed

3 files changed

+77
-4
lines changed

include/net/route.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,10 @@ int ip_route_input_rcu(struct sk_buff *skb, __be32 dst, __be32 src,
185185
u8 tos, struct net_device *devin,
186186
struct fib_result *res);
187187

188+
int ip_route_use_hint(struct sk_buff *skb, __be32 dst, __be32 src,
189+
u8 tos, struct net_device *devin,
190+
const struct sk_buff *hint);
191+
188192
static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
189193
u8 tos, struct net_device *devin)
190194
{

net/ipv4/ip_input.c

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -302,16 +302,31 @@ static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev)
302302
return true;
303303
}
304304

305+
static bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph,
306+
const struct sk_buff *hint)
307+
{
308+
return hint && !skb_dst(skb) && ip_hdr(hint)->daddr == iph->daddr &&
309+
ip_hdr(hint)->tos == iph->tos;
310+
}
311+
305312
INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *));
306313
INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *));
307314
static int ip_rcv_finish_core(struct net *net, struct sock *sk,
308-
struct sk_buff *skb, struct net_device *dev)
315+
struct sk_buff *skb, struct net_device *dev,
316+
const struct sk_buff *hint)
309317
{
310318
const struct iphdr *iph = ip_hdr(skb);
311319
int (*edemux)(struct sk_buff *skb);
312320
struct rtable *rt;
313321
int err;
314322

323+
if (ip_can_use_hint(skb, iph, hint)) {
324+
err = ip_route_use_hint(skb, iph->daddr, iph->saddr, iph->tos,
325+
dev, hint);
326+
if (unlikely(err))
327+
goto drop_error;
328+
}
329+
315330
if (net->ipv4.sysctl_ip_early_demux &&
316331
!skb_dst(skb) &&
317332
!skb->sk &&
@@ -408,7 +423,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
408423
if (!skb)
409424
return NET_RX_SUCCESS;
410425

411-
ret = ip_rcv_finish_core(net, sk, skb, dev);
426+
ret = ip_rcv_finish_core(net, sk, skb, dev, NULL);
412427
if (ret != NET_RX_DROP)
413428
ret = dst_input(skb);
414429
return ret;
@@ -535,11 +550,20 @@ static void ip_sublist_rcv_finish(struct list_head *head)
535550
}
536551
}
537552

553+
static struct sk_buff *ip_extract_route_hint(const struct net *net,
554+
struct sk_buff *skb, int rt_type)
555+
{
556+
if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST)
557+
return NULL;
558+
559+
return skb;
560+
}
561+
538562
static void ip_list_rcv_finish(struct net *net, struct sock *sk,
539563
struct list_head *head)
540564
{
565+
struct sk_buff *skb, *next, *hint = NULL;
541566
struct dst_entry *curr_dst = NULL;
542-
struct sk_buff *skb, *next;
543567
struct list_head sublist;
544568

545569
INIT_LIST_HEAD(&sublist);
@@ -554,11 +578,14 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk,
554578
skb = l3mdev_ip_rcv(skb);
555579
if (!skb)
556580
continue;
557-
if (ip_rcv_finish_core(net, sk, skb, dev) == NET_RX_DROP)
581+
if (ip_rcv_finish_core(net, sk, skb, dev, hint) == NET_RX_DROP)
558582
continue;
559583

560584
dst = skb_dst(skb);
561585
if (curr_dst != dst) {
586+
hint = ip_extract_route_hint(net, skb,
587+
((struct rtable *)dst)->rt_type);
588+
562589
/* dispatch old sublist */
563590
if (!list_empty(&sublist))
564591
ip_sublist_rcv_finish(&sublist);

net/ipv4/route.c

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2019,10 +2019,52 @@ static int ip_mkroute_input(struct sk_buff *skb,
20192019
return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
20202020
}
20212021

2022+
/* Implements all the saddr-related checks as ip_route_input_slow(),
2023+
* assuming daddr is valid and the destination is not a local broadcast one.
2024+
* Uses the provided hint instead of performing a route lookup.
2025+
*/
2026+
int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2027+
u8 tos, struct net_device *dev,
2028+
const struct sk_buff *hint)
2029+
{
2030+
struct in_device *in_dev = __in_dev_get_rcu(dev);
2031+
struct rtable *rt = (struct rtable *)hint;
2032+
struct net *net = dev_net(dev);
2033+
int err = -EINVAL;
2034+
u32 tag = 0;
2035+
2036+
if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
2037+
goto martian_source;
2038+
2039+
if (ipv4_is_zeronet(saddr))
2040+
goto martian_source;
2041+
2042+
if (ipv4_is_loopback(saddr) && !IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
2043+
goto martian_source;
2044+
2045+
if (rt->rt_type != RTN_LOCAL)
2046+
goto skip_validate_source;
2047+
2048+
tos &= IPTOS_RT_MASK;
2049+
err = fib_validate_source(skb, saddr, daddr, tos, 0, dev, in_dev, &tag);
2050+
if (err < 0)
2051+
goto martian_source;
2052+
2053+
skip_validate_source:
2054+
skb_dst_copy(skb, hint);
2055+
return 0;
2056+
2057+
martian_source:
2058+
ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
2059+
return err;
2060+
}
2061+
20222062
/*
20232063
* NOTE. We drop all the packets that has local source
20242064
* addresses, because every properly looped back packet
20252065
* must have correct destination already attached by output routine.
2066+
* Changes in the enforced policies must be applied also to
2067+
* ip_route_use_hint().
20262068
*
20272069
* Such approach solves two big problems:
20282070
* 1. Not simplex devices are handled properly.

0 commit comments

Comments
 (0)