Skip to content

Commit 15f41e2

Browse files
committed
Merge branch 'tcp-udp-misc'
Eric Dumazet says: ==================== net: various udp/tcp changes First round of patches for linux-4.7 Add a generic facility for sockets to be freed after an RCU grace period, if they need to. Then UDP stack is changed to no longer use SLAB_DESTROY_BY_RCU, in order to speedup rx processing for traffic encapsulated in UDP. It gives a 17 % speedup for normal UDP reception in stress conditions. Then TCP listeners are changed to use SOCK_RCU_FREE as well to avoid touching sk_refcnt in synflood case : I got up to 30 % performance increase for a mono listener. Then three patches add SK_MEMINFO_DROPS to sock_diag and add per socket rx drops accounting to TCP. Last patch adds rate limiting on ACK sent on behalf of SYN_RECV to better resist to SYNFLOOD targeting one or few flows. ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 43e2dfb + 4ce7e93 commit 15f41e2

23 files changed

+401
-578
lines changed

include/linux/udp.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,11 +98,11 @@ static inline bool udp_get_no_check6_rx(struct sock *sk)
9898
return udp_sk(sk)->no_check6_rx;
9999
}
100100

101-
#define udp_portaddr_for_each_entry(__sk, node, list) \
102-
hlist_nulls_for_each_entry(__sk, node, list, __sk_common.skc_portaddr_node)
101+
#define udp_portaddr_for_each_entry(__sk, list) \
102+
hlist_for_each_entry(__sk, list, __sk_common.skc_portaddr_node)
103103

104-
#define udp_portaddr_for_each_entry_rcu(__sk, node, list) \
105-
hlist_nulls_for_each_entry_rcu(__sk, node, list, __sk_common.skc_portaddr_node)
104+
#define udp_portaddr_for_each_entry_rcu(__sk, list) \
105+
hlist_for_each_entry_rcu(__sk, list, __sk_common.skc_portaddr_node)
106106

107107
#define IS_UDPLITE(__sk) (udp_sk(__sk)->pcflag)
108108

include/net/inet6_hashtables.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,13 +66,15 @@ static inline struct sock *__inet6_lookup(struct net *net,
6666
const __be16 sport,
6767
const struct in6_addr *daddr,
6868
const u16 hnum,
69-
const int dif)
69+
const int dif,
70+
bool *refcounted)
7071
{
7172
struct sock *sk = __inet6_lookup_established(net, hashinfo, saddr,
7273
sport, daddr, hnum, dif);
74+
*refcounted = true;
7375
if (sk)
7476
return sk;
75-
77+
*refcounted = false;
7678
return inet6_lookup_listener(net, hashinfo, skb, doff, saddr, sport,
7779
daddr, hnum, dif);
7880
}
@@ -81,17 +83,19 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
8183
struct sk_buff *skb, int doff,
8284
const __be16 sport,
8385
const __be16 dport,
84-
int iif)
86+
int iif,
87+
bool *refcounted)
8588
{
8689
struct sock *sk = skb_steal_sock(skb);
8790

91+
*refcounted = true;
8892
if (sk)
8993
return sk;
9094

9195
return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
9296
doff, &ipv6_hdr(skb)->saddr, sport,
9397
&ipv6_hdr(skb)->daddr, ntohs(dport),
94-
iif);
98+
iif, refcounted);
9599
}
96100

97101
struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,

include/net/inet_hashtables.h

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -100,14 +100,10 @@ struct inet_bind_hashbucket {
100100

101101
/*
102102
* Sockets can be hashed in established or listening table
103-
* We must use different 'nulls' end-of-chain value for listening
104-
* hash table, or we might find a socket that was closed and
105-
* reallocated/inserted into established hash table
106103
*/
107-
#define LISTENING_NULLS_BASE (1U << 29)
108104
struct inet_listen_hashbucket {
109105
spinlock_t lock;
110-
struct hlist_nulls_head head;
106+
struct hlist_head head;
111107
};
112108

113109
/* This is for listening sockets, thus all sockets which possess wildcards. */
@@ -280,11 +276,8 @@ static inline struct sock *inet_lookup_listener(struct net *net,
280276
net_eq(sock_net(__sk), (__net)))
281277
#endif /* 64-bit arch */
282278

283-
/*
284-
* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
279+
/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
285280
* not check it for lookups anymore, thanks Alexey. -DaveM
286-
*
287-
* Local BH must be disabled here.
288281
*/
289282
struct sock *__inet_lookup_established(struct net *net,
290283
struct inet_hashinfo *hashinfo,
@@ -307,14 +300,20 @@ static inline struct sock *__inet_lookup(struct net *net,
307300
struct sk_buff *skb, int doff,
308301
const __be32 saddr, const __be16 sport,
309302
const __be32 daddr, const __be16 dport,
310-
const int dif)
303+
const int dif,
304+
bool *refcounted)
311305
{
312306
u16 hnum = ntohs(dport);
313-
struct sock *sk = __inet_lookup_established(net, hashinfo,
314-
saddr, sport, daddr, hnum, dif);
307+
struct sock *sk;
315308

316-
return sk ? : __inet_lookup_listener(net, hashinfo, skb, doff, saddr,
317-
sport, daddr, hnum, dif);
309+
sk = __inet_lookup_established(net, hashinfo, saddr, sport,
310+
daddr, hnum, dif);
311+
*refcounted = true;
312+
if (sk)
313+
return sk;
314+
*refcounted = false;
315+
return __inet_lookup_listener(net, hashinfo, skb, doff, saddr,
316+
sport, daddr, hnum, dif);
318317
}
319318

320319
static inline struct sock *inet_lookup(struct net *net,
@@ -325,30 +324,34 @@ static inline struct sock *inet_lookup(struct net *net,
325324
const int dif)
326325
{
327326
struct sock *sk;
327+
bool refcounted;
328328

329-
local_bh_disable();
330329
sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
331-
dport, dif);
332-
local_bh_enable();
330+
dport, dif, &refcounted);
333331

332+
if (sk && !refcounted && !atomic_inc_not_zero(&sk->sk_refcnt))
333+
sk = NULL;
334334
return sk;
335335
}
336336

337337
static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
338338
struct sk_buff *skb,
339339
int doff,
340340
const __be16 sport,
341-
const __be16 dport)
341+
const __be16 dport,
342+
bool *refcounted)
342343
{
343344
struct sock *sk = skb_steal_sock(skb);
344345
const struct iphdr *iph = ip_hdr(skb);
345346

347+
*refcounted = true;
346348
if (sk)
347349
return sk;
348-
else
349-
return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
350-
doff, iph->saddr, sport,
351-
iph->daddr, dport, inet_iif(skb));
350+
351+
return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
352+
doff, iph->saddr, sport,
353+
iph->daddr, dport, inet_iif(skb),
354+
refcounted);
352355
}
353356

354357
u32 sk_ehashfn(const struct sock *sk);

include/net/request_sock.h

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -85,24 +85,23 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
8585
struct request_sock *req;
8686

8787
req = kmem_cache_alloc(ops->slab, GFP_ATOMIC | __GFP_NOWARN);
88-
89-
if (req) {
90-
req->rsk_ops = ops;
91-
if (attach_listener) {
92-
sock_hold(sk_listener);
93-
req->rsk_listener = sk_listener;
94-
} else {
95-
req->rsk_listener = NULL;
88+
if (!req)
89+
return NULL;
90+
req->rsk_listener = NULL;
91+
if (attach_listener) {
92+
if (unlikely(!atomic_inc_not_zero(&sk_listener->sk_refcnt))) {
93+
kmem_cache_free(ops->slab, req);
94+
return NULL;
9695
}
97-
req_to_sk(req)->sk_prot = sk_listener->sk_prot;
98-
sk_node_init(&req_to_sk(req)->sk_node);
99-
sk_tx_queue_clear(req_to_sk(req));
100-
req->saved_syn = NULL;
101-
/* Following is temporary. It is coupled with debugging
102-
* helpers in reqsk_put() & reqsk_free()
103-
*/
104-
atomic_set(&req->rsk_refcnt, 0);
96+
req->rsk_listener = sk_listener;
10597
}
98+
req->rsk_ops = ops;
99+
req_to_sk(req)->sk_prot = sk_listener->sk_prot;
100+
sk_node_init(&req_to_sk(req)->sk_node);
101+
sk_tx_queue_clear(req_to_sk(req));
102+
req->saved_syn = NULL;
103+
atomic_set(&req->rsk_refcnt, 0);
104+
106105
return req;
107106
}
108107

include/net/sock.h

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ struct sock_common {
178178
int skc_bound_dev_if;
179179
union {
180180
struct hlist_node skc_bind_node;
181-
struct hlist_nulls_node skc_portaddr_node;
181+
struct hlist_node skc_portaddr_node;
182182
};
183183
struct proto *skc_prot;
184184
possible_net_t skc_net;
@@ -438,6 +438,7 @@ struct sock {
438438
struct sk_buff *skb);
439439
void (*sk_destruct)(struct sock *sk);
440440
struct sock_reuseport __rcu *sk_reuseport_cb;
441+
struct rcu_head sk_rcu;
441442
};
442443

443444
#define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))
@@ -669,18 +670,18 @@ static inline void sk_add_bind_node(struct sock *sk,
669670
hlist_for_each_entry(__sk, list, sk_bind_node)
670671

671672
/**
672-
* sk_nulls_for_each_entry_offset - iterate over a list at a given struct offset
673+
* sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset
673674
* @tpos: the type * to use as a loop cursor.
674675
* @pos: the &struct hlist_node to use as a loop cursor.
675676
* @head: the head for your list.
676677
* @offset: offset of hlist_node within the struct.
677678
*
678679
*/
679-
#define sk_nulls_for_each_entry_offset(tpos, pos, head, offset) \
680-
for (pos = (head)->first; \
681-
(!is_a_nulls(pos)) && \
680+
#define sk_for_each_entry_offset_rcu(tpos, pos, head, offset) \
681+
for (pos = rcu_dereference((head)->first); \
682+
pos != NULL && \
682683
({ tpos = (typeof(*tpos) *)((void *)pos - offset); 1;}); \
683-
pos = pos->next)
684+
pos = rcu_dereference(pos->next))
684685

685686
static inline struct user_namespace *sk_user_ns(struct sock *sk)
686687
{
@@ -720,6 +721,7 @@ enum sock_flags {
720721
*/
721722
SOCK_FILTER_LOCKED, /* Filter cannot be changed anymore */
722723
SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */
724+
SOCK_RCU_FREE, /* wait rcu grace period in sk_destruct() */
723725
};
724726

725727
#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
@@ -2010,6 +2012,13 @@ sock_skb_set_dropcount(const struct sock *sk, struct sk_buff *skb)
20102012
SOCK_SKB_CB(skb)->dropcount = atomic_read(&sk->sk_drops);
20112013
}
20122014

2015+
static inline void sk_drops_add(struct sock *sk, const struct sk_buff *skb)
2016+
{
2017+
int segs = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
2018+
2019+
atomic_add(segs, &sk->sk_drops);
2020+
}
2021+
20132022
void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
20142023
struct sk_buff *skb);
20152024
void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,

include/net/tcp.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1836,4 +1836,17 @@ static inline void tcp_segs_in(struct tcp_sock *tp, const struct sk_buff *skb)
18361836
tp->data_segs_in += segs_in;
18371837
}
18381838

1839+
/*
1840+
* TCP listen path runs lockless.
1841+
* We forced "struct sock" to be const qualified to make sure
1842+
* we don't modify one of its field by mistake.
1843+
* Here, we increment sk_drops which is an atomic_t, so we can safely
1844+
* make sock writable again.
1845+
*/
1846+
static inline void tcp_listendrop(const struct sock *sk)
1847+
{
1848+
atomic_inc(&((struct sock *)sk)->sk_drops);
1849+
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1850+
}
1851+
18391852
#endif /* _TCP_H */

include/net/udp.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ struct udp_skb_cb {
5959
* @lock: spinlock protecting changes to head/count
6060
*/
6161
struct udp_hslot {
62-
struct hlist_nulls_head head;
62+
struct hlist_head head;
6363
int count;
6464
spinlock_t lock;
6565
} __attribute__((aligned(2 * sizeof(long))));

include/uapi/linux/sock_diag.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ enum {
2020
SK_MEMINFO_WMEM_QUEUED,
2121
SK_MEMINFO_OPTMEM,
2222
SK_MEMINFO_BACKLOG,
23+
SK_MEMINFO_DROPS,
2324

2425
SK_MEMINFO_VARS,
2526
};

net/core/sock.c

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1419,8 +1419,12 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
14191419
}
14201420
EXPORT_SYMBOL(sk_alloc);
14211421

1422-
void sk_destruct(struct sock *sk)
1422+
/* Sockets having SOCK_RCU_FREE will call this function after one RCU
1423+
* grace period. This is the case for UDP sockets and TCP listeners.
1424+
*/
1425+
static void __sk_destruct(struct rcu_head *head)
14231426
{
1427+
struct sock *sk = container_of(head, struct sock, sk_rcu);
14241428
struct sk_filter *filter;
14251429

14261430
if (sk->sk_destruct)
@@ -1449,6 +1453,14 @@ void sk_destruct(struct sock *sk)
14491453
sk_prot_free(sk->sk_prot_creator, sk);
14501454
}
14511455

1456+
void sk_destruct(struct sock *sk)
1457+
{
1458+
if (sock_flag(sk, SOCK_RCU_FREE))
1459+
call_rcu(&sk->sk_rcu, __sk_destruct);
1460+
else
1461+
__sk_destruct(&sk->sk_rcu);
1462+
}
1463+
14521464
static void __sk_free(struct sock *sk)
14531465
{
14541466
if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt))
@@ -1513,6 +1525,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
15131525
newsk->sk_dst_cache = NULL;
15141526
newsk->sk_wmem_queued = 0;
15151527
newsk->sk_forward_alloc = 0;
1528+
atomic_set(&newsk->sk_drops, 0);
15161529
newsk->sk_send_head = NULL;
15171530
newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
15181531

net/core/sock_diag.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype)
6767
mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
6868
mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
6969
mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
70+
mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
7071

7172
return nla_put(skb, attrtype, sizeof(mem), &mem);
7273
}

net/dccp/ipv4.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -764,6 +764,7 @@ static int dccp_v4_rcv(struct sk_buff *skb)
764764
{
765765
const struct dccp_hdr *dh;
766766
const struct iphdr *iph;
767+
bool refcounted;
767768
struct sock *sk;
768769
int min_cov;
769770

@@ -801,7 +802,7 @@ static int dccp_v4_rcv(struct sk_buff *skb)
801802

802803
lookup:
803804
sk = __inet_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
804-
dh->dccph_sport, dh->dccph_dport);
805+
dh->dccph_sport, dh->dccph_dport, &refcounted);
805806
if (!sk) {
806807
dccp_pr_debug("failed to look up flow ID in table and "
807808
"get corresponding socket\n");
@@ -830,6 +831,7 @@ static int dccp_v4_rcv(struct sk_buff *skb)
830831
goto lookup;
831832
}
832833
sock_hold(sk);
834+
refcounted = true;
833835
nsk = dccp_check_req(sk, skb, req);
834836
if (!nsk) {
835837
reqsk_put(req);
@@ -886,7 +888,8 @@ static int dccp_v4_rcv(struct sk_buff *skb)
886888
return 0;
887889

888890
discard_and_relse:
889-
sock_put(sk);
891+
if (refcounted)
892+
sock_put(sk);
890893
goto discard_it;
891894
}
892895

0 commit comments

Comments
 (0)