Skip to content

Commit a4298e4

Browse files
Eric Dumazetdavem330
authored andcommitted
net: add SOCK_RCU_FREE socket flag
We want a generic way to insert an RCU grace period before socket freeing for cases where RCU_SLAB_DESTROY_BY_RCU is adding too much overhead. SLAB_DESTROY_BY_RCU strict rules force us to take a reference on the socket sk_refcnt, and it is a performance problem for UDP encapsulation, or TCP synflood behavior, as many CPUs might attempt the atomic operations on a shared sk_refcnt UDP sockets and TCP listeners can set SOCK_RCU_FREE so that their lookup can use traditional RCU rules, without refcount changes. They can set the flag only once hashed and visible by other cpus. Signed-off-by: Eric Dumazet <[email protected]> Cc: Tom Herbert <[email protected]> Tested-by: Tom Herbert <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 43e2dfb commit a4298e4

File tree

2 files changed

+15
-1
lines changed

2 files changed

+15
-1
lines changed

include/net/sock.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,7 @@ struct sock {
438438
struct sk_buff *skb);
439439
void (*sk_destruct)(struct sock *sk);
440440
struct sock_reuseport __rcu *sk_reuseport_cb;
441+
struct rcu_head sk_rcu;
441442
};
442443

443444
#define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))
@@ -720,6 +721,7 @@ enum sock_flags {
720721
*/
721722
SOCK_FILTER_LOCKED, /* Filter cannot be changed anymore */
722723
SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */
724+
SOCK_RCU_FREE, /* wait rcu grace period in sk_destruct() */
723725
};
724726

725727
#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))

net/core/sock.c

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1419,8 +1419,12 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
14191419
}
14201420
EXPORT_SYMBOL(sk_alloc);
14211421

1422-
void sk_destruct(struct sock *sk)
1422+
/* Sockets having SOCK_RCU_FREE will call this function after one RCU
1423+
* grace period. This is the case for UDP sockets and TCP listeners.
1424+
*/
1425+
static void __sk_destruct(struct rcu_head *head)
14231426
{
1427+
struct sock *sk = container_of(head, struct sock, sk_rcu);
14241428
struct sk_filter *filter;
14251429

14261430
if (sk->sk_destruct)
@@ -1449,6 +1453,14 @@ void sk_destruct(struct sock *sk)
14491453
sk_prot_free(sk->sk_prot_creator, sk);
14501454
}
14511455

1456+
void sk_destruct(struct sock *sk)
1457+
{
1458+
if (sock_flag(sk, SOCK_RCU_FREE))
1459+
call_rcu(&sk->sk_rcu, __sk_destruct);
1460+
else
1461+
__sk_destruct(&sk->sk_rcu);
1462+
}
1463+
14521464
static void __sk_free(struct sock *sk)
14531465
{
14541466
if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt))

0 commit comments

Comments
 (0)