Skip to content

Commit 3b24d85

Browse files
Eric Dumazetdavem330
authored andcommitted
tcp/dccp: do not touch listener sk_refcnt under synflood
When a SYNFLOOD targets a non SO_REUSEPORT listener, multiple cpus contend on sk->sk_refcnt and sk->sk_wmem_alloc changes. By letting listeners use SOCK_RCU_FREE infrastructure, we can relax TCP_LISTEN lookup rules and avoid touching sk_refcnt Note that we still use SLAB_DESTROY_BY_RCU rules for other sockets, only listeners are impacted by this change. Peak performance under SYNFLOOD is increased by ~33% : On my test machine, I could process 3.2 Mpps instead of 2.4 Mpps Most consuming functions are now skb_set_owner_w() and sock_wfree() contending on sk->sk_wmem_alloc when cooking SYNACK and freeing them. Signed-off-by: Eric Dumazet <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 3a5d1c0 commit 3b24d85

File tree

10 files changed

+134
-163
lines changed

10 files changed

+134
-163
lines changed

include/net/inet6_hashtables.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,13 +66,15 @@ static inline struct sock *__inet6_lookup(struct net *net,
6666
const __be16 sport,
6767
const struct in6_addr *daddr,
6868
const u16 hnum,
69-
const int dif)
69+
const int dif,
70+
bool *refcounted)
7071
{
7172
struct sock *sk = __inet6_lookup_established(net, hashinfo, saddr,
7273
sport, daddr, hnum, dif);
74+
*refcounted = true;
7375
if (sk)
7476
return sk;
75-
77+
*refcounted = false;
7678
return inet6_lookup_listener(net, hashinfo, skb, doff, saddr, sport,
7779
daddr, hnum, dif);
7880
}
@@ -81,17 +83,19 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
8183
struct sk_buff *skb, int doff,
8284
const __be16 sport,
8385
const __be16 dport,
84-
int iif)
86+
int iif,
87+
bool *refcounted)
8588
{
8689
struct sock *sk = skb_steal_sock(skb);
8790

91+
*refcounted = true;
8892
if (sk)
8993
return sk;
9094

9195
return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
9296
doff, &ipv6_hdr(skb)->saddr, sport,
9397
&ipv6_hdr(skb)->daddr, ntohs(dport),
94-
iif);
98+
iif, refcounted);
9599
}
96100

97101
struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,

include/net/inet_hashtables.h

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -100,14 +100,10 @@ struct inet_bind_hashbucket {
100100

101101
/*
102102
* Sockets can be hashed in established or listening table
103-
* We must use different 'nulls' end-of-chain value for listening
104-
* hash table, or we might find a socket that was closed and
105-
* reallocated/inserted into established hash table
106103
*/
107-
#define LISTENING_NULLS_BASE (1U << 29)
108104
struct inet_listen_hashbucket {
109105
spinlock_t lock;
110-
struct hlist_nulls_head head;
106+
struct hlist_head head;
111107
};
112108

113109
/* This is for listening sockets, thus all sockets which possess wildcards. */
@@ -304,14 +300,20 @@ static inline struct sock *__inet_lookup(struct net *net,
304300
struct sk_buff *skb, int doff,
305301
const __be32 saddr, const __be16 sport,
306302
const __be32 daddr, const __be16 dport,
307-
const int dif)
303+
const int dif,
304+
bool *refcounted)
308305
{
309306
u16 hnum = ntohs(dport);
310-
struct sock *sk = __inet_lookup_established(net, hashinfo,
311-
saddr, sport, daddr, hnum, dif);
307+
struct sock *sk;
312308

313-
return sk ? : __inet_lookup_listener(net, hashinfo, skb, doff, saddr,
314-
sport, daddr, hnum, dif);
309+
sk = __inet_lookup_established(net, hashinfo, saddr, sport,
310+
daddr, hnum, dif);
311+
*refcounted = true;
312+
if (sk)
313+
return sk;
314+
*refcounted = false;
315+
return __inet_lookup_listener(net, hashinfo, skb, doff, saddr,
316+
sport, daddr, hnum, dif);
315317
}
316318

317319
static inline struct sock *inet_lookup(struct net *net,
@@ -322,28 +324,34 @@ static inline struct sock *inet_lookup(struct net *net,
322324
const int dif)
323325
{
324326
struct sock *sk;
327+
bool refcounted;
325328

326329
sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
327-
dport, dif);
330+
dport, dif, &refcounted);
328331

332+
if (sk && !refcounted && !atomic_inc_not_zero(&sk->sk_refcnt))
333+
sk = NULL;
329334
return sk;
330335
}
331336

332337
static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
333338
struct sk_buff *skb,
334339
int doff,
335340
const __be16 sport,
336-
const __be16 dport)
341+
const __be16 dport,
342+
bool *refcounted)
337343
{
338344
struct sock *sk = skb_steal_sock(skb);
339345
const struct iphdr *iph = ip_hdr(skb);
340346

347+
*refcounted = true;
341348
if (sk)
342349
return sk;
343-
else
344-
return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
345-
doff, iph->saddr, sport,
346-
iph->daddr, dport, inet_iif(skb));
350+
351+
return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
352+
doff, iph->saddr, sport,
353+
iph->daddr, dport, inet_iif(skb),
354+
refcounted);
347355
}
348356

349357
u32 sk_ehashfn(const struct sock *sk);

net/dccp/ipv4.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -764,6 +764,7 @@ static int dccp_v4_rcv(struct sk_buff *skb)
764764
{
765765
const struct dccp_hdr *dh;
766766
const struct iphdr *iph;
767+
bool refcounted;
767768
struct sock *sk;
768769
int min_cov;
769770

@@ -801,7 +802,7 @@ static int dccp_v4_rcv(struct sk_buff *skb)
801802

802803
lookup:
803804
sk = __inet_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
804-
dh->dccph_sport, dh->dccph_dport);
805+
dh->dccph_sport, dh->dccph_dport, &refcounted);
805806
if (!sk) {
806807
dccp_pr_debug("failed to look up flow ID in table and "
807808
"get corresponding socket\n");
@@ -830,6 +831,7 @@ static int dccp_v4_rcv(struct sk_buff *skb)
830831
goto lookup;
831832
}
832833
sock_hold(sk);
834+
refcounted = true;
833835
nsk = dccp_check_req(sk, skb, req);
834836
if (!nsk) {
835837
reqsk_put(req);
@@ -886,7 +888,8 @@ static int dccp_v4_rcv(struct sk_buff *skb)
886888
return 0;
887889

888890
discard_and_relse:
889-
sock_put(sk);
891+
if (refcounted)
892+
sock_put(sk);
890893
goto discard_it;
891894
}
892895

net/dccp/ipv6.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -642,6 +642,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
642642
static int dccp_v6_rcv(struct sk_buff *skb)
643643
{
644644
const struct dccp_hdr *dh;
645+
bool refcounted;
645646
struct sock *sk;
646647
int min_cov;
647648

@@ -670,7 +671,7 @@ static int dccp_v6_rcv(struct sk_buff *skb)
670671
lookup:
671672
sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
672673
dh->dccph_sport, dh->dccph_dport,
673-
inet6_iif(skb));
674+
inet6_iif(skb), &refcounted);
674675
if (!sk) {
675676
dccp_pr_debug("failed to look up flow ID in table and "
676677
"get corresponding socket\n");
@@ -699,6 +700,7 @@ static int dccp_v6_rcv(struct sk_buff *skb)
699700
goto lookup;
700701
}
701702
sock_hold(sk);
703+
refcounted = true;
702704
nsk = dccp_check_req(sk, skb, req);
703705
if (!nsk) {
704706
reqsk_put(req);
@@ -752,7 +754,8 @@ static int dccp_v6_rcv(struct sk_buff *skb)
752754
return 0;
753755

754756
discard_and_relse:
755-
sock_put(sk);
757+
if (refcounted)
758+
sock_put(sk);
756759
goto discard_it;
757760
}
758761

net/ipv4/inet_diag.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -775,13 +775,12 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
775775

776776
for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
777777
struct inet_listen_hashbucket *ilb;
778-
struct hlist_nulls_node *node;
779778
struct sock *sk;
780779

781780
num = 0;
782781
ilb = &hashinfo->listening_hash[i];
783782
spin_lock_bh(&ilb->lock);
784-
sk_nulls_for_each(sk, node, &ilb->head) {
783+
sk_for_each(sk, &ilb->head) {
785784
struct inet_sock *inet = inet_sk(sk);
786785

787786
if (!net_eq(sock_net(sk), net))

net/ipv4/inet_hashtables.c

Lines changed: 25 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -198,76 +198,48 @@ static inline int compute_score(struct sock *sk, struct net *net,
198198
}
199199

200200
/*
201-
* Don't inline this cruft. Here are some nice properties to exploit here. The
202-
* BSD API does not allow a listening sock to specify the remote port nor the
201+
* Here are some nice properties to exploit here. The BSD API
202+
* does not allow a listening sock to specify the remote port nor the
203203
* remote address for the connection. So always assume those are both
204204
* wildcarded during the search since they can never be otherwise.
205205
*/
206206

207-
207+
/* called with rcu_read_lock() : No refcount taken on the socket */
208208
struct sock *__inet_lookup_listener(struct net *net,
209209
struct inet_hashinfo *hashinfo,
210210
struct sk_buff *skb, int doff,
211211
const __be32 saddr, __be16 sport,
212212
const __be32 daddr, const unsigned short hnum,
213213
const int dif)
214214
{
215-
struct sock *sk, *result;
216-
struct hlist_nulls_node *node;
217215
unsigned int hash = inet_lhashfn(net, hnum);
218216
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
219-
int score, hiscore, matches = 0, reuseport = 0;
220-
bool select_ok = true;
217+
int score, hiscore = 0, matches = 0, reuseport = 0;
218+
struct sock *sk, *result = NULL;
221219
u32 phash = 0;
222220

223-
begin:
224-
result = NULL;
225-
hiscore = 0;
226-
sk_nulls_for_each_rcu(sk, node, &ilb->head) {
221+
sk_for_each_rcu(sk, &ilb->head) {
227222
score = compute_score(sk, net, hnum, daddr, dif);
228223
if (score > hiscore) {
229-
result = sk;
230-
hiscore = score;
231224
reuseport = sk->sk_reuseport;
232225
if (reuseport) {
233226
phash = inet_ehashfn(net, daddr, hnum,
234227
saddr, sport);
235-
if (select_ok) {
236-
struct sock *sk2;
237-
sk2 = reuseport_select_sock(sk, phash,
238-
skb, doff);
239-
if (sk2) {
240-
result = sk2;
241-
goto found;
242-
}
243-
}
228+
result = reuseport_select_sock(sk, phash,
229+
skb, doff);
230+
if (result)
231+
return result;
244232
matches = 1;
245233
}
234+
result = sk;
235+
hiscore = score;
246236
} else if (score == hiscore && reuseport) {
247237
matches++;
248238
if (reciprocal_scale(phash, matches) == 0)
249239
result = sk;
250240
phash = next_pseudo_random32(phash);
251241
}
252242
}
253-
/*
254-
* if the nulls value we got at the end of this lookup is
255-
* not the expected one, we must restart lookup.
256-
* We probably met an item that was moved to another chain.
257-
*/
258-
if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE)
259-
goto begin;
260-
if (result) {
261-
found:
262-
if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
263-
result = NULL;
264-
else if (unlikely(compute_score(result, net, hnum, daddr,
265-
dif) < hiscore)) {
266-
sock_put(result);
267-
select_ok = false;
268-
goto begin;
269-
}
270-
}
271243
return result;
272244
}
273245
EXPORT_SYMBOL_GPL(__inet_lookup_listener);
@@ -508,7 +480,8 @@ int __inet_hash(struct sock *sk, struct sock *osk,
508480
if (err)
509481
goto unlock;
510482
}
511-
__sk_nulls_add_node_rcu(sk, &ilb->head);
483+
hlist_add_head_rcu(&sk->sk_node, &ilb->head);
484+
sock_set_flag(sk, SOCK_RCU_FREE);
512485
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
513486
unlock:
514487
spin_unlock(&ilb->lock);
@@ -535,20 +508,25 @@ void inet_unhash(struct sock *sk)
535508
{
536509
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
537510
spinlock_t *lock;
511+
bool listener = false;
538512
int done;
539513

540514
if (sk_unhashed(sk))
541515
return;
542516

543-
if (sk->sk_state == TCP_LISTEN)
517+
if (sk->sk_state == TCP_LISTEN) {
544518
lock = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)].lock;
545-
else
519+
listener = true;
520+
} else {
546521
lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
547-
522+
}
548523
spin_lock_bh(lock);
549524
if (rcu_access_pointer(sk->sk_reuseport_cb))
550525
reuseport_detach_sock(sk);
551-
done = __sk_nulls_del_node_init_rcu(sk);
526+
if (listener)
527+
done = __sk_del_node_init(sk);
528+
else
529+
done = __sk_nulls_del_node_init_rcu(sk);
552530
if (done)
553531
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
554532
spin_unlock_bh(lock);
@@ -684,9 +662,8 @@ void inet_hashinfo_init(struct inet_hashinfo *h)
684662

685663
for (i = 0; i < INET_LHTABLE_SIZE; i++) {
686664
spin_lock_init(&h->listening_hash[i].lock);
687-
INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head,
688-
i + LISTENING_NULLS_BASE);
689-
}
665+
INIT_HLIST_HEAD(&h->listening_hash[i].head);
666+
}
690667
}
691668
EXPORT_SYMBOL_GPL(inet_hashinfo_init);
692669

0 commit comments

Comments
 (0)