Skip to content

Commit 7d9e5f4

Browse files
tracywwnjdavem330
authored andcommitted
ipv6: convert major tx path to use RT6_LOOKUP_F_DST_NOREF
For tx path, in most cases, we still have to take refcnt on the dst cause the caller is caching the dst somewhere. But it still is beneficial to make use of RT6_LOOKUP_F_DST_NOREF flag while doing the route lookup. It is cause this flag prevents manipulating refcnt on net->ipv6.ip6_null_entry when doing fib6_rule_lookup() to traverse each routing table. The null_entry is a shared object and constant updates on it cause false sharing. We converted the current major lookup function ip6_route_output_flags() to make use of RT6_LOOKUP_F_DST_NOREF. Together with the change in the rx path, we see noticable performance boost: I ran synflood tests between 2 hosts under the same switch. Both hosts have 20G mlx NIC, and 8 tx/rx queues. Sender sends pure SYN flood with random src IPs and ports using trafgen. Receiver has a simple TCP listener on the target port. Both hosts have multiple custom rules: - For incoming packets, only local table is traversed. - For outgoing packets, 3 tables are traversed to find the route. The packet processing rate on the receiver is as follows: - Before the fix: 3.78Mpps - After the fix: 5.50Mpps Signed-off-by: Wei Wang <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 67f415d commit 7d9e5f4

File tree

4 files changed

+37
-8
lines changed

4 files changed

+37
-8
lines changed

drivers/net/vrf.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1072,12 +1072,14 @@ static struct sk_buff *vrf_l3_rcv(struct net_device *vrf_dev,
10721072
#if IS_ENABLED(CONFIG_IPV6)
10731073
/* send to link-local or multicast address via interface enslaved to
10741074
* VRF device. Force lookup to VRF table without changing flow struct
1075+
* Note: Caller to this function must hold rcu_read_lock() and no refcnt
1076+
* is taken on the dst by this function.
10751077
*/
10761078
static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev,
10771079
struct flowi6 *fl6)
10781080
{
10791081
struct net *net = dev_net(dev);
1080-
int flags = RT6_LOOKUP_F_IFACE;
1082+
int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_DST_NOREF;
10811083
struct dst_entry *dst = NULL;
10821084
struct rt6_info *rt;
10831085

@@ -1087,7 +1089,6 @@ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev,
10871089
*/
10881090
if (fl6->flowi6_oif == dev->ifindex) {
10891091
dst = &net->ipv6.ip6_null_entry->dst;
1090-
dst_hold(dst);
10911092
return dst;
10921093
}
10931094

include/net/ip6_route.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,10 @@ struct dst_entry *ip6_route_input_lookup(struct net *net,
8484
struct flowi6 *fl6,
8585
const struct sk_buff *skb, int flags);
8686

87+
struct dst_entry *ip6_route_output_flags_noref(struct net *net,
88+
const struct sock *sk,
89+
struct flowi6 *fl6, int flags);
90+
8791
struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
8892
struct flowi6 *fl6, int flags);
8993

net/ipv6/route.c

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2415,22 +2415,25 @@ static struct rt6_info *ip6_pol_route_output(struct net *net,
24152415
return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
24162416
}
24172417

2418-
struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2419-
struct flowi6 *fl6, int flags)
2418+
struct dst_entry *ip6_route_output_flags_noref(struct net *net,
2419+
const struct sock *sk,
2420+
struct flowi6 *fl6, int flags)
24202421
{
24212422
bool any_src;
24222423

24232424
if (ipv6_addr_type(&fl6->daddr) &
24242425
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
24252426
struct dst_entry *dst;
24262427

2428+
/* This function does not take refcnt on the dst */
24272429
dst = l3mdev_link_scope_lookup(net, fl6);
24282430
if (dst)
24292431
return dst;
24302432
}
24312433

24322434
fl6->flowi6_iif = LOOPBACK_IFINDEX;
24332435

2436+
flags |= RT6_LOOKUP_F_DST_NOREF;
24342437
any_src = ipv6_addr_any(&fl6->saddr);
24352438
if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
24362439
(fl6->flowi6_oif && any_src))
@@ -2443,6 +2446,28 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
24432446

24442447
return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
24452448
}
2449+
EXPORT_SYMBOL_GPL(ip6_route_output_flags_noref);
2450+
2451+
struct dst_entry *ip6_route_output_flags(struct net *net,
2452+
const struct sock *sk,
2453+
struct flowi6 *fl6,
2454+
int flags)
2455+
{
2456+
struct dst_entry *dst;
2457+
struct rt6_info *rt6;
2458+
2459+
rcu_read_lock();
2460+
dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
2461+
rt6 = (struct rt6_info *)dst;
2462+
/* For dst cached in uncached_list, refcnt is already taken. */
2463+
if (list_empty(&rt6->rt6i_uncached) && !dst_hold_safe(dst)) {
2464+
dst = &net->ipv6.ip6_null_entry->dst;
2465+
dst_hold(dst);
2466+
}
2467+
rcu_read_unlock();
2468+
2469+
return dst;
2470+
}
24462471
EXPORT_SYMBOL_GPL(ip6_route_output_flags);
24472472

24482473
struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)

net/l3mdev/l3mdev.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,8 @@ EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index);
118118
* local and multicast addresses
119119
* @net: network namespace for device index lookup
120120
* @fl6: IPv6 flow struct for lookup
121+
* This function does not hold refcnt on the returned dst.
122+
* Caller must hold rcu_read_lock().
121123
*/
122124

123125
struct dst_entry *l3mdev_link_scope_lookup(struct net *net,
@@ -126,18 +128,15 @@ struct dst_entry *l3mdev_link_scope_lookup(struct net *net,
126128
struct dst_entry *dst = NULL;
127129
struct net_device *dev;
128130

131+
WARN_ON_ONCE(!rcu_read_lock_held());
129132
if (fl6->flowi6_oif) {
130-
rcu_read_lock();
131-
132133
dev = dev_get_by_index_rcu(net, fl6->flowi6_oif);
133134
if (dev && netif_is_l3_slave(dev))
134135
dev = netdev_master_upper_dev_get_rcu(dev);
135136

136137
if (dev && netif_is_l3_master(dev) &&
137138
dev->l3mdev_ops->l3mdev_link_scope_lookup)
138139
dst = dev->l3mdev_ops->l3mdev_link_scope_lookup(dev, fl6);
139-
140-
rcu_read_unlock();
141140
}
142141

143142
return dst;

0 commit comments

Comments
 (0)