Skip to content

Commit 78c91ae

Browse files
Philo Ludavem330
authored andcommitted
ipv4/udp: Add 4-tuple hash for connected socket
Currently, the udp_table has two hash table, the port hash and portaddr hash. Usually for UDP servers, all sockets have the same local port and addr, so they are all on the same hash slot within a reuseport group. In some applications, UDP servers use connect() to manage clients. In particular, when firstly receiving from an unseen 4 tuple, a new socket is created and connect()ed to the remote addr:port, and then the fd is used exclusively by the client. Once there are connected sks in a reuseport group, udp has to score all sks in the same hash2 slot to find the best match. This could be inefficient with a large number of connections, resulting in high softirq overhead. To solve the problem, this patch implement 4-tuple hash for connected udp sockets. During connect(), hash4 slot is updated, as well as a corresponding counter, hash4_cnt, in hslot2. In __udp4_lib_lookup(), hslot4 will be searched firstly if the counter is non-zero. Otherwise, hslot2 is used like before. Note that only connected sockets enter this hash4 path, while un-connected ones are not affected. hlist_nulls is used for hash4, because we probably move to another hslot wrongly when lookup with concurrent rehash. Then we check nulls at the list end to see if we should restart lookup. Because udp does not use SLAB_TYPESAFE_BY_RCU, we don't need to touch sk_refcnt when lookup. Stress test results (with 1 cpu fully used) are shown below, in pps: (1) _un-connected_ socket as server [a] w/o hash4: 1,825176 [b] w/ hash4: 1,831750 (+0.36%) (2) 500 _connected_ sockets as server [c] w/o hash4: 290860 (only 16% of [a]) [d] w/ hash4: 1,889658 (+3.1% compared with [b]) With hash4, compute_score is skipped when lookup, so [d] is slightly better than [b]. Co-developed-by: Cambda Zhu <[email protected]> Signed-off-by: Cambda Zhu <[email protected]> Co-developed-by: Fred Chen <[email protected]> Signed-off-by: Fred Chen <[email protected]> Co-developed-by: Yubing Qiu <[email protected]> Signed-off-by: Yubing Qiu <[email protected]> Signed-off-by: Philo Lu <[email protected]> Acked-by: Willem de Bruijn <[email protected]> Acked-by: Paolo Abeni <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent dab78a1 commit 78c91ae

File tree

3 files changed

+210
-5
lines changed

3 files changed

+210
-5
lines changed

include/net/udp.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,13 +302,27 @@ static inline int udp_lib_hash(struct sock *sk)
302302
}
303303

304304
void udp_lib_unhash(struct sock *sk);
305-
void udp_lib_rehash(struct sock *sk, u16 new_hash);
305+
void udp_lib_rehash(struct sock *sk, u16 new_hash, u16 new_hash4);
306306

307307
static inline void udp_lib_close(struct sock *sk, long timeout)
308308
{
309309
sk_common_release(sk);
310310
}
311311

312+
/* hash4 routines shared between UDPv4/6 */
313+
#if IS_ENABLED(CONFIG_BASE_SMALL)
314+
static inline void udp_lib_hash4(struct sock *sk, u16 hash)
315+
{
316+
}
317+
318+
static inline void udp4_hash4(struct sock *sk)
319+
{
320+
}
321+
#else /* !CONFIG_BASE_SMALL */
322+
void udp_lib_hash4(struct sock *sk, u16 hash);
323+
void udp4_hash4(struct sock *sk);
324+
#endif /* CONFIG_BASE_SMALL */
325+
312326
int udp_lib_get_port(struct sock *sk, unsigned short snum,
313327
unsigned int hash2_nulladdr);
314328

net/ipv4/udp.c

Lines changed: 194 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -478,6 +478,159 @@ static struct sock *udp4_lib_lookup2(const struct net *net,
478478
return result;
479479
}
480480

481+
#if IS_ENABLED(CONFIG_BASE_SMALL)
482+
static struct sock *udp4_lib_lookup4(const struct net *net,
483+
__be32 saddr, __be16 sport,
484+
__be32 daddr, unsigned int hnum,
485+
int dif, int sdif,
486+
struct udp_table *udptable)
487+
{
488+
return NULL;
489+
}
490+
491+
static void udp_rehash4(struct udp_table *udptable, struct sock *sk,
492+
u16 newhash4)
493+
{
494+
}
495+
496+
static void udp_unhash4(struct udp_table *udptable, struct sock *sk)
497+
{
498+
}
499+
#else /* !CONFIG_BASE_SMALL */
500+
static struct sock *udp4_lib_lookup4(const struct net *net,
501+
__be32 saddr, __be16 sport,
502+
__be32 daddr, unsigned int hnum,
503+
int dif, int sdif,
504+
struct udp_table *udptable)
505+
{
506+
const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
507+
const struct hlist_nulls_node *node;
508+
struct udp_hslot *hslot4;
509+
unsigned int hash4, slot;
510+
struct udp_sock *up;
511+
struct sock *sk;
512+
513+
hash4 = udp_ehashfn(net, daddr, hnum, saddr, sport);
514+
slot = hash4 & udptable->mask;
515+
hslot4 = &udptable->hash4[slot];
516+
INET_ADDR_COOKIE(acookie, saddr, daddr);
517+
518+
begin:
519+
/* SLAB_TYPESAFE_BY_RCU not used, so we don't need to touch sk_refcnt */
520+
udp_lrpa_for_each_entry_rcu(up, node, &hslot4->nulls_head) {
521+
sk = (struct sock *)up;
522+
if (inet_match(net, sk, acookie, ports, dif, sdif))
523+
return sk;
524+
}
525+
526+
/* if the nulls value we got at the end of this lookup is not the
527+
* expected one, we must restart lookup. We probably met an item that
528+
* was moved to another chain due to rehash.
529+
*/
530+
if (get_nulls_value(node) != slot)
531+
goto begin;
532+
533+
return NULL;
534+
}
535+
536+
/* In hash4, rehash can happen in connect(), where hash4_cnt keeps unchanged. */
537+
static void udp_rehash4(struct udp_table *udptable, struct sock *sk,
538+
u16 newhash4)
539+
{
540+
struct udp_hslot *hslot4, *nhslot4;
541+
542+
hslot4 = udp_hashslot4(udptable, udp_sk(sk)->udp_lrpa_hash);
543+
nhslot4 = udp_hashslot4(udptable, newhash4);
544+
udp_sk(sk)->udp_lrpa_hash = newhash4;
545+
546+
if (hslot4 != nhslot4) {
547+
spin_lock_bh(&hslot4->lock);
548+
hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_lrpa_node);
549+
hslot4->count--;
550+
spin_unlock_bh(&hslot4->lock);
551+
552+
spin_lock_bh(&nhslot4->lock);
553+
hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_lrpa_node,
554+
&nhslot4->nulls_head);
555+
nhslot4->count++;
556+
spin_unlock_bh(&nhslot4->lock);
557+
}
558+
}
559+
560+
static void udp_unhash4(struct udp_table *udptable, struct sock *sk)
561+
{
562+
struct udp_hslot *hslot2, *hslot4;
563+
564+
if (udp_hashed4(sk)) {
565+
hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
566+
hslot4 = udp_hashslot4(udptable, udp_sk(sk)->udp_lrpa_hash);
567+
568+
spin_lock(&hslot4->lock);
569+
hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_lrpa_node);
570+
hslot4->count--;
571+
spin_unlock(&hslot4->lock);
572+
573+
spin_lock(&hslot2->lock);
574+
udp_hash4_dec(hslot2);
575+
spin_unlock(&hslot2->lock);
576+
}
577+
}
578+
579+
void udp_lib_hash4(struct sock *sk, u16 hash)
580+
{
581+
struct udp_hslot *hslot, *hslot2, *hslot4;
582+
struct net *net = sock_net(sk);
583+
struct udp_table *udptable;
584+
585+
/* Connected udp socket can re-connect to another remote address,
586+
* so rehash4 is needed.
587+
*/
588+
udptable = net->ipv4.udp_table;
589+
if (udp_hashed4(sk)) {
590+
udp_rehash4(udptable, sk, hash);
591+
return;
592+
}
593+
594+
hslot = udp_hashslot(udptable, net, udp_sk(sk)->udp_port_hash);
595+
hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
596+
hslot4 = udp_hashslot4(udptable, hash);
597+
udp_sk(sk)->udp_lrpa_hash = hash;
598+
599+
spin_lock_bh(&hslot->lock);
600+
if (rcu_access_pointer(sk->sk_reuseport_cb))
601+
reuseport_detach_sock(sk);
602+
603+
spin_lock(&hslot4->lock);
604+
hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_lrpa_node,
605+
&hslot4->nulls_head);
606+
hslot4->count++;
607+
spin_unlock(&hslot4->lock);
608+
609+
spin_lock(&hslot2->lock);
610+
udp_hash4_inc(hslot2);
611+
spin_unlock(&hslot2->lock);
612+
613+
spin_unlock_bh(&hslot->lock);
614+
}
615+
EXPORT_SYMBOL(udp_lib_hash4);
616+
617+
/* call with sock lock */
618+
void udp4_hash4(struct sock *sk)
619+
{
620+
struct net *net = sock_net(sk);
621+
unsigned int hash;
622+
623+
if (sk_unhashed(sk) || sk->sk_rcv_saddr == htonl(INADDR_ANY))
624+
return;
625+
626+
hash = udp_ehashfn(net, sk->sk_rcv_saddr, sk->sk_num,
627+
sk->sk_daddr, sk->sk_dport);
628+
629+
udp_lib_hash4(sk, hash);
630+
}
631+
EXPORT_SYMBOL(udp4_hash4);
632+
#endif /* CONFIG_BASE_SMALL */
633+
481634
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
482635
* harder than this. -DaveM
483636
*/
@@ -493,6 +646,13 @@ struct sock *__udp4_lib_lookup(const struct net *net, __be32 saddr,
493646
hash2 = ipv4_portaddr_hash(net, daddr, hnum);
494647
hslot2 = udp_hashslot2(udptable, hash2);
495648

649+
if (udp_has_hash4(hslot2)) {
650+
result = udp4_lib_lookup4(net, saddr, sport, daddr, hnum,
651+
dif, sdif, udptable);
652+
if (result) /* udp4_lib_lookup4 return sk or NULL */
653+
return result;
654+
}
655+
496656
/* Lookup connected or non-wildcard socket */
497657
result = udp4_lib_lookup2(net, saddr, sport,
498658
daddr, hnum, dif, sdif,
@@ -1933,6 +2093,18 @@ int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
19332093
}
19342094
EXPORT_SYMBOL(udp_pre_connect);
19352095

2096+
static int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
2097+
{
2098+
int res;
2099+
2100+
lock_sock(sk);
2101+
res = __ip4_datagram_connect(sk, uaddr, addr_len);
2102+
if (!res)
2103+
udp4_hash4(sk);
2104+
release_sock(sk);
2105+
return res;
2106+
}
2107+
19362108
int __udp_disconnect(struct sock *sk, int flags)
19372109
{
19382110
struct inet_sock *inet = inet_sk(sk);
@@ -1992,6 +2164,8 @@ void udp_lib_unhash(struct sock *sk)
19922164
hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
19932165
hslot2->count--;
19942166
spin_unlock(&hslot2->lock);
2167+
2168+
udp_unhash4(udptable, sk);
19952169
}
19962170
spin_unlock_bh(&hslot->lock);
19972171
}
@@ -2001,7 +2175,7 @@ EXPORT_SYMBOL(udp_lib_unhash);
20012175
/*
20022176
* inet_rcv_saddr was changed, we must rehash secondary hash
20032177
*/
2004-
void udp_lib_rehash(struct sock *sk, u16 newhash)
2178+
void udp_lib_rehash(struct sock *sk, u16 newhash, u16 newhash4)
20052179
{
20062180
if (sk_hashed(sk)) {
20072181
struct udp_table *udptable = udp_get_table_prot(sk);
@@ -2033,6 +2207,19 @@ void udp_lib_rehash(struct sock *sk, u16 newhash)
20332207
spin_unlock(&nhslot2->lock);
20342208
}
20352209

2210+
if (udp_hashed4(sk)) {
2211+
udp_rehash4(udptable, sk, newhash4);
2212+
2213+
if (hslot2 != nhslot2) {
2214+
spin_lock(&hslot2->lock);
2215+
udp_hash4_dec(hslot2);
2216+
spin_unlock(&hslot2->lock);
2217+
2218+
spin_lock(&nhslot2->lock);
2219+
udp_hash4_inc(nhslot2);
2220+
spin_unlock(&nhslot2->lock);
2221+
}
2222+
}
20362223
spin_unlock_bh(&hslot->lock);
20372224
}
20382225
}
@@ -2044,7 +2231,11 @@ void udp_v4_rehash(struct sock *sk)
20442231
u16 new_hash = ipv4_portaddr_hash(sock_net(sk),
20452232
inet_sk(sk)->inet_rcv_saddr,
20462233
inet_sk(sk)->inet_num);
2047-
udp_lib_rehash(sk, new_hash);
2234+
u16 new_hash4 = udp_ehashfn(sock_net(sk),
2235+
sk->sk_rcv_saddr, sk->sk_num,
2236+
sk->sk_daddr, sk->sk_dport);
2237+
2238+
udp_lib_rehash(sk, new_hash, new_hash4);
20482239
}
20492240

20502241
static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
@@ -2937,7 +3128,7 @@ struct proto udp_prot = {
29373128
.owner = THIS_MODULE,
29383129
.close = udp_lib_close,
29393130
.pre_connect = udp_pre_connect,
2940-
.connect = ip4_datagram_connect,
3131+
.connect = udp_connect,
29413132
.disconnect = udp_disconnect,
29423133
.ioctl = udp_ioctl,
29433134
.init = udp_init_sock,

net/ipv6/udp.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ void udp_v6_rehash(struct sock *sk)
111111
&sk->sk_v6_rcv_saddr,
112112
inet_sk(sk)->inet_num);
113113

114-
udp_lib_rehash(sk, new_hash);
114+
udp_lib_rehash(sk, new_hash, 0); /* 4-tuple hash not implemented */
115115
}
116116

117117
static int compute_score(struct sock *sk, const struct net *net,

0 commit comments

Comments
 (0)