Skip to content

Commit 2d912da

Browse files
committed
Merge branch 'net-use-indirect_call-in-some-dst_ops'
Brian Vazquez says: ==================== net: use INDIRECT_CALL in some dst_ops This patch series uses the INDIRECT_CALL wrappers in some dst_ops functions to mitigate retpoline costs. Benefits depend on the platform as described below. Background: The kernel rewrites the retpoline code at __x86_indirect_thunk_r11 depending on the CPU's requirements. The INDIRECT_CALL wrappers provide hints on possible targets and save the retpoline overhead using a direct call in case the target matches one of the hints. The retpoline overhead for the following three cases has been measured by Luigi Rizzo in microbenchmarks, using CPU performance counters, and cover reasonably well the range of possible retpoline overheads compared to a plain indirect call (in equal conditions, specifically with predicted branch, hot cache): - just "jmp *(%r11)" on modern platforms like Intel Cascadelake. In this case the overhead is just 2 clock cycles: - "lfence; jmp *(%r11)" on e.g. some recent AMD CPUs. In this case the lfence is blocked until pending reads complete, so the actual overhead depends on previous instructions. The best case we have measured 15 clock cycles of overhead. - worst case, e.g. skylake, the full retpoline is used __x86_indirect_thunk_r11: call set_u_target capture_speculation: pause lfence jmp capture_speculation .align 16 set_up_target: mov %r11, (%rsp) ret In this case the overhead has been measured in 35-40 clock cycles. The actual time saved hence depends on the platform and current clock speed (which varies heavily, especially when C-states are active). Also note that actual benefit might be lower than expected if the longer retpoline overlaps with some pending memory read. MEASUREMENTS: The INDIRECT_CALL wrappers in this patchset involve the processing of incoming SYN and generation of syncookies. Hence, the test has been run by configuring a receiving host with a single NIC rx queue, disabling RPS and RFS so that all processing occurs on the same core. An external source generates SYN fast enough to saturate the receiving CPU. We ran two sets of experiments, with and without the dst_output patch, comparing the number of syncookies generated over a 20s period in multiple runs. Assuming the CPU is saturated, the time per packet is t = number_of_packets/total_time and if the two datasets have statistically meaningful difference, the difference in times between the two cases gives an estimate of the benefits from one INDIRECT_CALL. Here are the experimental results: Skylake Syncookies over 20s (5 tests) --------------------------------------------------- indirect 9166325 9182023 9170093 9134014 9171082 retpoline 9099308 9126350 9154841 9056377 9122376 Computing the stats on the ns_pkt = 20e6/total_packets gives the following: $ ministat -c 95 -w 70 /tmp/sk-indirect /tmp/sk-retp x /tmp/sk-indirect + /tmp/sk-retp +----------------------------------------------------------------------+ |x xx x + x + + + +| ||______M__A_______|_|____________M_____A___________________| | +----------------------------------------------------------------------+ N Min Max Median Avg Stddev x 5 2.17817e-06 2.18962e-06 2.181e-06 2.182292e-06 4.3252133e-09 + 5 2.18464e-06 2.20839e-06 2.19241e-06 2.194974e-06 8.8695958e-09 Difference at 95.0% confidence 1.2682e-08 +/- 1.01766e-08 0.581132% +/- 0.466326% (Student's t, pooled s = 6.97772e-09) This suggests a difference of 13ns +/- 10ns Our expectation from microbenchmarks was 35-40 cycles per call, but part of the gains may be eaten by stalls from pending memory reads. For Cascadelake: Cascadelake Syncookies over 20s (5 tests) --------------------------------------------------------- indirect 10339797 10297547 1036682 10378891 10384854 retpoline 10332674 10366805 10320374 10334272 10374087 Computing the stats on the ns_pkt = 20e6/total_packets gives no meaningful difference even at just 80% (this was expected): $ ministat -c 80 -w 70 /tmp/cl-indirect /tmp/cl-retp x /tmp/cl-indirect + /tmp/cl-retp +----------------------------------------------------------------------+ | x x + * x + + + x| ||______________|_M_________A_____A_______M________|___| | +----------------------------------------------------------------------+ N Min Max Median Avg Stddev x 5 1.92588e-06 1.94221e-06 1.92923e-06 1.931716e-06 6.6936746e-09 + 5 1.92788e-06 1.93791e-06 1.93531e-06 1.933188e-06 4.3734106e-09 No difference proven at 80.0% confidence ==================== Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
2 parents 4f4e543 + bbd807d commit 2d912da

File tree

9 files changed

+60
-16
lines changed

9 files changed

+60
-16
lines changed

include/net/dst.h

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include <linux/refcount.h>
1919
#include <net/neighbour.h>
2020
#include <asm/processor.h>
21+
#include <linux/indirect_call_wrapper.h>
2122

2223
struct sk_buff;
2324

@@ -193,9 +194,11 @@ dst_feature(const struct dst_entry *dst, u32 feature)
193194
return dst_metric(dst, RTAX_FEATURES) & feature;
194195
}
195196

197+
INDIRECT_CALLABLE_DECLARE(unsigned int ip6_mtu(const struct dst_entry *));
198+
INDIRECT_CALLABLE_DECLARE(unsigned int ipv4_mtu(const struct dst_entry *));
196199
static inline u32 dst_mtu(const struct dst_entry *dst)
197200
{
198-
return dst->ops->mtu(dst);
201+
return INDIRECT_CALL_INET(dst->ops->mtu, ip6_mtu, ipv4_mtu, dst);
199202
}
200203

201204
/* RTT metrics are stored in milliseconds for user ABI, but used as jiffies */
@@ -435,22 +438,36 @@ static inline void dst_set_expires(struct dst_entry *dst, int timeout)
435438
dst->expires = expires;
436439
}
437440

441+
INDIRECT_CALLABLE_DECLARE(int ip6_output(struct net *, struct sock *,
442+
struct sk_buff *));
443+
INDIRECT_CALLABLE_DECLARE(int ip_output(struct net *, struct sock *,
444+
struct sk_buff *));
438445
/* Output packet to network from transport. */
439446
static inline int dst_output(struct net *net, struct sock *sk, struct sk_buff *skb)
440447
{
441-
return skb_dst(skb)->output(net, sk, skb);
448+
return INDIRECT_CALL_INET(skb_dst(skb)->output,
449+
ip6_output, ip_output,
450+
net, sk, skb);
442451
}
443452

453+
INDIRECT_CALLABLE_DECLARE(int ip6_input(struct sk_buff *));
454+
INDIRECT_CALLABLE_DECLARE(int ip_local_deliver(struct sk_buff *));
444455
/* Input packet from network to transport. */
445456
static inline int dst_input(struct sk_buff *skb)
446457
{
447-
return skb_dst(skb)->input(skb);
458+
return INDIRECT_CALL_INET(skb_dst(skb)->input,
459+
ip6_input, ip_local_deliver, skb);
448460
}
449461

462+
INDIRECT_CALLABLE_DECLARE(struct dst_entry *ip6_dst_check(struct dst_entry *,
463+
u32));
464+
INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
465+
u32));
450466
static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie)
451467
{
452468
if (dst->obsolete)
453-
dst = dst->ops->check(dst, cookie);
469+
dst = INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check,
470+
ipv4_dst_check, dst, cookie);
454471
return dst;
455472
}
456473

net/core/sock.c

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -526,11 +526,17 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
526526
}
527527
EXPORT_SYMBOL(__sk_receive_skb);
528528

529+
INDIRECT_CALLABLE_DECLARE(struct dst_entry *ip6_dst_check(struct dst_entry *,
530+
u32));
531+
INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
532+
u32));
529533
struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
530534
{
531535
struct dst_entry *dst = __sk_dst_get(sk);
532536

533-
if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
537+
if (dst && dst->obsolete &&
538+
INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check,
539+
dst, cookie) == NULL) {
534540
sk_tx_queue_clear(sk);
535541
sk->sk_dst_pending_confirm = 0;
536542
RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
@@ -546,7 +552,9 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
546552
{
547553
struct dst_entry *dst = sk_dst_get(sk);
548554

549-
if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
555+
if (dst && dst->obsolete &&
556+
INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check,
557+
dst, cookie) == NULL) {
550558
sk_dst_reset(sk);
551559
dst_release(dst);
552560
return NULL;

net/ipv4/ip_input.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,7 @@ int ip_local_deliver(struct sk_buff *skb)
253253
net, NULL, skb, skb->dev, NULL,
254254
ip_local_deliver_finish);
255255
}
256+
EXPORT_SYMBOL(ip_local_deliver);
256257

257258
static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev)
258259
{

net/ipv4/ip_output.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,7 @@ int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb)
434434
ip_finish_output,
435435
!(IPCB(skb)->flags & IPSKB_REROUTED));
436436
}
437+
EXPORT_SYMBOL(ip_output);
437438

438439
/*
439440
* copy saddr and daddr, possibly using 64bit load/stores

net/ipv4/route.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -133,9 +133,11 @@ static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
133133
* Interface to generic destination cache.
134134
*/
135135

136-
static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
136+
INDIRECT_CALLABLE_SCOPE
137+
struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
137138
static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
138-
static unsigned int ipv4_mtu(const struct dst_entry *dst);
139+
INDIRECT_CALLABLE_SCOPE
140+
unsigned int ipv4_mtu(const struct dst_entry *dst);
139141
static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
140142
static void ipv4_link_failure(struct sk_buff *skb);
141143
static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
@@ -1187,7 +1189,8 @@ void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
11871189
}
11881190
EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
11891191

1190-
static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1192+
INDIRECT_CALLABLE_SCOPE struct dst_entry *ipv4_dst_check(struct dst_entry *dst,
1193+
u32 cookie)
11911194
{
11921195
struct rtable *rt = (struct rtable *) dst;
11931196

@@ -1203,6 +1206,7 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
12031206
return NULL;
12041207
return dst;
12051208
}
1209+
EXPORT_SYMBOL(ipv4_dst_check);
12061210

12071211
static void ipv4_send_dest_unreach(struct sk_buff *skb)
12081212
{
@@ -1311,7 +1315,7 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
13111315
return min(advmss, IPV4_MAX_PMTU - header_size);
13121316
}
13131317

1314-
static unsigned int ipv4_mtu(const struct dst_entry *dst)
1318+
INDIRECT_CALLABLE_SCOPE unsigned int ipv4_mtu(const struct dst_entry *dst)
13151319
{
13161320
const struct rtable *rt = (const struct rtable *)dst;
13171321
unsigned int mtu = rt->rt_pmtu;
@@ -1333,6 +1337,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
13331337

13341338
return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
13351339
}
1340+
EXPORT_SYMBOL(ipv4_mtu);
13361341

13371342
static void ip_del_fnhe(struct fib_nh_common *nhc, __be32 daddr)
13381343
{

net/ipv4/tcp_ipv4.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1649,6 +1649,8 @@ u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph,
16491649
return mss;
16501650
}
16511651

1652+
INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1653+
u32));
16521654
/* The socket must have it's spinlock held when we get
16531655
* here, unless it is a TCP_LISTEN socket.
16541656
*
@@ -1668,7 +1670,8 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
16681670
sk_mark_napi_id(sk, skb);
16691671
if (dst) {
16701672
if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1671-
!dst->ops->check(dst, 0)) {
1673+
!INDIRECT_CALL_1(dst->ops->check, ipv4_dst_check,
1674+
dst, 0)) {
16721675
dst_release(dst);
16731676
sk->sk_rx_dst = NULL;
16741677
}

net/ipv6/ip6_output.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
217217
ip6_finish_output,
218218
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
219219
}
220+
EXPORT_SYMBOL(ip6_output);
220221

221222
bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
222223
{

net/ipv6/route.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,11 @@ enum rt6_nud_state {
8181
RT6_NUD_SUCCEED = 1
8282
};
8383

84-
static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
84+
INDIRECT_CALLABLE_SCOPE
85+
struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
8586
static unsigned int ip6_default_advmss(const struct dst_entry *dst);
86-
static unsigned int ip6_mtu(const struct dst_entry *dst);
87+
INDIRECT_CALLABLE_SCOPE
88+
unsigned int ip6_mtu(const struct dst_entry *dst);
8789
static struct dst_entry *ip6_negative_advice(struct dst_entry *);
8890
static void ip6_dst_destroy(struct dst_entry *);
8991
static void ip6_dst_ifdown(struct dst_entry *,
@@ -2611,7 +2613,8 @@ static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
26112613
return NULL;
26122614
}
26132615

2614-
static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2616+
INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst,
2617+
u32 cookie)
26152618
{
26162619
struct dst_entry *dst_ret;
26172620
struct fib6_info *from;
@@ -2641,6 +2644,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
26412644

26422645
return dst_ret;
26432646
}
2647+
EXPORT_SYMBOL(ip6_dst_check);
26442648

26452649
static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
26462650
{
@@ -3089,7 +3093,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
30893093
return mtu;
30903094
}
30913095

3092-
static unsigned int ip6_mtu(const struct dst_entry *dst)
3096+
INDIRECT_CALLABLE_SCOPE unsigned int ip6_mtu(const struct dst_entry *dst)
30933097
{
30943098
struct inet6_dev *idev;
30953099
unsigned int mtu;
@@ -3111,6 +3115,7 @@ static unsigned int ip6_mtu(const struct dst_entry *dst)
31113115

31123116
return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
31133117
}
3118+
EXPORT_SYMBOL(ip6_mtu);
31143119

31153120
/* MTU selection:
31163121
* 1. mtu on route is locked - use it

net/ipv6/tcp_ipv6.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1420,6 +1420,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
14201420
return NULL;
14211421
}
14221422

1423+
INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1424+
u32));
14231425
/* The socket must have it's spinlock held when we get
14241426
* here, unless it is a TCP_LISTEN socket.
14251427
*
@@ -1473,7 +1475,8 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
14731475
sk_mark_napi_id(sk, skb);
14741476
if (dst) {
14751477
if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1476-
dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1478+
INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1479+
dst, np->rx_dst_cookie) == NULL) {
14771480
dst_release(dst);
14781481
sk->sk_rx_dst = NULL;
14791482
}

0 commit comments

Comments
 (0)